vision-record/analyzer.py

"""
图片分析模块 - 调用大模型 Vision API
"""
import base64
import requests
import json
import re
from pathlib import Path
from config import LLM_API_URL, LLM_API_KEY, LLM_MODEL, ANALYSIS_PROMPT


class ImageAnalyzer:
    """图片分析器"""

    def __init__(self):
        self.api_url = LLM_API_URL
        self.api_key = LLM_API_KEY
        self.model = LLM_MODEL

    def encode_image(self, image_path):
        """将图片转为 base64"""
        with open(image_path, 'rb') as f:
            return base64.b64encode(f.read()).decode('utf-8')

    def analyze(self, image_path):
        """分析图片

        Returns:
            dict: {'success': bool, 'events': list, 'error': str}
        """
        try:
            # 编码图片
            image_base64 = self.encode_image(image_path)

            # 构建请求（OpenAI Vision 格式）
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }

            payload = {
                "model": self.model,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": ANALYSIS_PROMPT
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{image_base64}"
                                }
                            }
                        ]
                    }
                ],
                "max_tokens": 500
            }

            # 发送请求
            response = requests.post(
                f"{self.api_url}/chat/completions",
                headers=headers,
                json=payload,
                timeout=30
            )

            if response.status_code != 200:
                return {
                    'success': False,
                    'error': f"API 错误: {response.status_code} - {response.text}"
                }

            # 解析结果
            result = response.json()
            content = result['choices'][0]['message']['content']

            # 提取事件信息
            events = self.parse_events(content)

            return {
                'success': True,
                'raw_response': content,
                'events': events
            }

        except requests.Timeout:
            return {'success': False, 'error': 'API 请求超时'}
        except Exception as e:
            return {'success': False, 'error': str(e)}

    def parse_events(self, content):
        """解析事件信息"""
        events = []

        # 如果无明显事件
        if "无明显事件" in content or "没有明显" in content:
            return [{
                'event_type': '无事件',
                'description': '无明显事件',
                'confidence': '高'
            }]

        # 尝试解析结构化格式
        # 事件类型: xxx
        # 描述: xxx
        # 置信度: xxx

        pattern = r"事件类型[:：]\s*(.+?)\s*描述[:：]\s*(.+?)\s*置信度[:：]\s*(.+)"
        matches = re.findall(pattern, content, re.DOTALL)

        if matches:
            for match in matches:
                events.append({
                    'event_type': match[0].strip(),
                    'description': match[1].strip(),
                    'confidence': match[2].strip()
                })
        else:
            # 无法解析结构，将整个内容作为描述
            events.append({
                'event_type': '其他',
                'description': content[:200],
                'confidence': '中'
            })

        return events


def analyze_image(image_path):
    """便捷函数"""
    analyzer = ImageAnalyzer()
    return analyzer.analyze(image_path)


if __name__ == "__main__":
    # 测试分析（需要先有一张测试图片）
    test_image = Path(__file__).parent / "data" / "images" / "test.jpg"
    if test_image.exists():
        print(f"分析图片: {test_image}")
        result = analyze_image(test_image)
        print(f"结果: {json.dumps(result, ensure_ascii=False, indent=2)}")
    else:
        print("没有测试图片，请先拍照")