Files
vision-record/analyzer.py

162 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
图片分析模块 - 调用大模型 Vision API
"""
import base64
import requests
import json
import re
from pathlib import Path
from config import config_mgr, ANALYSIS_PROMPT
class ImageAnalyzer:
"""图片分析器"""
def __init__(self):
self.api_url = config_mgr.get('vision_api_url')
self.api_key = config_mgr.get('vision_api_key')
self.model = config_mgr.get('vision_model')
def encode_image(self, image_path):
"""将图片转为 base64"""
with open(image_path, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')
def analyze(self, image_path):
"""分析图片
Returns:
dict: {'success': bool, 'events': list, 'error': str}
"""
try:
# 更新配置(可能已被修改)
self.api_url = config_mgr.get('vision_api_url')
self.api_key = config_mgr.get('vision_api_key')
self.model = config_mgr.get('vision_model')
# 检查配置
if not self.api_url:
return {'success': False, 'error': 'Vision API URL 未配置'}
if not self.api_key:
return {'success': False, 'error': 'Vision API Key 未配置'}
if not self.model:
return {'success': False, 'error': 'Vision Model 未配置'}
print(f"[Analyzer] Analyzing: {image_path}")
print(f"[Analyzer] API: {self.api_url}, Model: {self.model}")
# 编码图片
image_base64 = self.encode_image(image_path)
# 构建请求OpenAI Vision 格式)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": ANALYSIS_PROMPT
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
}
}
]
}
],
"max_tokens": 500
}
# 发送请求
response = requests.post(
f"{self.api_url}/chat/completions",
headers=headers,
json=payload,
timeout=30
)
print(f"[Analyzer] Response status: {response.status_code}")
if response.status_code != 200:
print(f"[Analyzer] Error response: {response.text}")
return {
'success': False,
'error': f"API 错误: {response.status_code} - {response.text}"
}
# 解析结果
result = response.json()
content = result['choices'][0]['message']['content']
# 提取事件信息
events = self.parse_events(content)
return {
'success': True,
'raw_response': content,
'events': events
}
except requests.Timeout:
return {'success': False, 'error': 'API 请求超时'}
except Exception as e:
return {'success': False, 'error': str(e)}
def parse_events(self, content):
"""解析事件信息"""
events = []
# 如果无明显事件
if "无明显事件" in content or "没有明显" in content:
return [{
'event_type': '无事件',
'description': '无明显事件',
'confidence': ''
}]
# 尝试解析结构化格式
pattern = r"事件类型[:]\s*(.+?)\s*描述[:]\s*(.+?)\s*置信度[:]\s*(.+)"
matches = re.findall(pattern, content, re.DOTALL)
if matches:
for match in matches:
events.append({
'event_type': match[0].strip(),
'description': match[1].strip(),
'confidence': match[2].strip()
})
else:
# 无法解析结构,将整个内容作为描述
events.append({
'event_type': '其他',
'description': content[:200],
'confidence': ''
})
return events
def analyze_image(image_path):
"""便捷函数"""
analyzer = ImageAnalyzer()
return analyzer.analyze(image_path)
if __name__ == "__main__":
# 测试分析
test_image = Path(__file__).parent / "data" / "images" / "test.jpg"
if test_image.exists():
print(f"分析图片: {test_image}")
result = analyze_image(test_image)
print(f"结果: {json.dumps(result, ensure_ascii=False, indent=2)}")
else:
print("没有测试图片")