fix: 增强错误日志，修复API调用诊断

feat: 多模态支持 - 图片数据传递给视觉模型
2026-04-14 09:28:28 +08:00 · 2026-04-14 09:21:36 +08:00
2 changed files with 53 additions and 8 deletions
--- a/main_v2.py
+++ b/main_v2.py
@@ -969,7 +969,8 @@ async def websocket_endpoint(websocket: WebSocket, user_id: str):
                            messages=history,
                            provider_config=agent_config['provider'],
                            agent_config=agent_config['agent'],
-                            enable_thinking=enable_thinking
+                            enable_thinking=enable_thinking,
+                            images=image_contents  # 传递图片数据给多模态模型
                        )
                        
                        logger.info(f"LLM响应: response长度={len(response)}, thinking长度={len(thinking_content) if thinking_content else 0}")
--- a/services/llm_service.py
+++ b/services/llm_service.py
@@ -98,11 +98,19 @@ class LLMService:
        messages: List[Dict],
        provider_config: dict,
        agent_config: dict,
-        enable_thinking: bool = True
+        enable_thinking: bool = True,
+        images: List[Dict] = None  # 图片数据列表 [{'name', 'type', 'data': base64}]
    ) -> Tuple[str, Optional[str]]:
        """
        调用AI模型进行对话
        
+        Args:
+            messages: 对话历史
+            provider_config: LLM Provider配置
+            agent_config: Agent配置
+            enable_thinking: 是否启用思考
+            images: 图片数据列表（用于多模态模型）
+        
        Returns:
            Tuple[str, Optional[str]]: (回复内容, 思考过程)
        """
@@ -123,6 +131,22 @@ class LLMService:
        if final_messages and final_messages[0]['role'] != 'system':
            final_messages.insert(0, {"role": "system", "content": system_prompt})
        
+        # 如果有图片，构建多模态消息（只修改最后一条用户消息）
+        if images and len(images) > 0:
+            # 找到最后一条用户消息
+            for i in range(len(final_messages) - 1, -1, -1):
+                if final_messages[i]['role'] == 'user':
+                    original_text = final_messages[i]['content']
+                    # 构建多模态内容
+                    multimodal_content = [{"type": "text", "text": original_text if original_text else "请描述这张图片"}]
+                    for img in images:
+                        multimodal_content.append({
+                            "type": "image_url",
+                            "image_url": {"url": img['data']}  # base64 data URL
+                        })
+                    final_messages[i]['content'] = multimodal_content
+                    break
+        
        thinking_content = None
        
        # 处理思考功能
@@ -208,7 +232,7 @@ class LLMService:
        temperature: float = 0.7
    ) -> str:
        """调用API"""
-        url = f"{api_base}/chat/completions"
+        url = f"{api_base.rstrip('/')}/chat/completions"
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
@@ -220,13 +244,33 @@ class LLMService:
            "max_tokens": max_tokens
        }
        
+        # 打印请求详情（调试）
        logger.info(f"调用LLM: url={url}, model={model}")
+        logger.info(f"消息数量: {len(messages)}, 第一条消息类型: {type(messages[0].get('content'))}")
        
-        async with httpx.AsyncClient(timeout=60.0) as client:
-            response = await client.post(url, headers=headers, json=payload)
-            response.raise_for_status()
-            data = response.json()
-            return data['choices'][0]['message']['content']
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(url, headers=headers, json=payload)
+                
+                # 检查HTTP状态
+                if response.status_code != 200:
+                    logger.error(f"API返回错误: status={response.status_code}, body={response.text[:500]}")
+                    response.raise_for_status()
+                
+                data = response.json()
+                
+                # 检查响应格式
+                if 'choices' not in data or len(data['choices']) == 0:
+                    logger.error(f"API响应格式错误: {data}")
+                    raise ValueError("API响应格式错误：缺少choices")
+                
+                return data['choices'][0]['message']['content']
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP错误: {e.response.status_code}, {e.response.text}")
+            raise
+        except Exception as e:
+            logger.error(f"API调用异常: {type(e).__name__}: {e}")
+            raise
    
    async def chat_stream(
        self,
Author	SHA1	Message	Date
hubian	0c4cc96106	fix: 增强错误日志，修复API调用诊断	2026-04-14 09:28:28 +08:00
hubian	2dca775911	feat: 多模态支持 - 图片数据传递给视觉模型	2026-04-14 09:21:36 +08:00