Files
vision-record/analyzer.py

147 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
图片分析模块 - 调用大模型 Vision API
"""
import base64
import requests
import json
import re
from pathlib import Path
from config import LLM_API_URL, LLM_API_KEY, LLM_MODEL, ANALYSIS_PROMPT
class ImageAnalyzer:
"""图片分析器"""
def __init__(self):
self.api_url = LLM_API_URL
self.api_key = LLM_API_KEY
self.model = LLM_MODEL
def encode_image(self, image_path):
"""将图片转为 base64"""
with open(image_path, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')
def analyze(self, image_path):
"""分析图片
Returns:
dict: {'success': bool, 'events': list, 'error': str}
"""
try:
# 编码图片
image_base64 = self.encode_image(image_path)
# 构建请求OpenAI Vision 格式)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": ANALYSIS_PROMPT
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
}
}
]
}
],
"max_tokens": 500
}
# 发送请求
response = requests.post(
f"{self.api_url}/chat/completions",
headers=headers,
json=payload,
timeout=30
)
if response.status_code != 200:
return {
'success': False,
'error': f"API 错误: {response.status_code} - {response.text}"
}
# 解析结果
result = response.json()
content = result['choices'][0]['message']['content']
# 提取事件信息
events = self.parse_events(content)
return {
'success': True,
'raw_response': content,
'events': events
}
except requests.Timeout:
return {'success': False, 'error': 'API 请求超时'}
except Exception as e:
return {'success': False, 'error': str(e)}
def parse_events(self, content):
"""解析事件信息"""
events = []
# 如果无明显事件
if "无明显事件" in content or "没有明显" in content:
return [{
'event_type': '无事件',
'description': '无明显事件',
'confidence': ''
}]
# 尝试解析结构化格式
# 事件类型: xxx
# 描述: xxx
# 置信度: xxx
pattern = r"事件类型[:]\s*(.+?)\s*描述[:]\s*(.+?)\s*置信度[:]\s*(.+)"
matches = re.findall(pattern, content, re.DOTALL)
if matches:
for match in matches:
events.append({
'event_type': match[0].strip(),
'description': match[1].strip(),
'confidence': match[2].strip()
})
else:
# 无法解析结构,将整个内容作为描述
events.append({
'event_type': '其他',
'description': content[:200],
'confidence': ''
})
return events
def analyze_image(image_path):
"""便捷函数"""
analyzer = ImageAnalyzer()
return analyzer.analyze(image_path)
if __name__ == "__main__":
# 测试分析(需要先有一张测试图片)
test_image = Path(__file__).parent / "data" / "images" / "test.jpg"
if test_image.exists():
print(f"分析图片: {test_image}")
result = analyze_image(test_image)
print(f"结果: {json.dumps(result, ensure_ascii=False, indent=2)}")
else:
print("没有测试图片,请先拍照")