feat: v2.0.0 架构重构 - 大模型池、Agent管理、渠道独立绑定、思考功能

2026-04-12 16:13:34 +08:00
parent 28d0fbc5ce
commit a0c43ba67b
10 changed files with 3597 additions and 74 deletions
--- a/services/llm_service.py
+++ b/services/llm_service.py
@@ -0,0 +1,319 @@
+"""
+LLM服务 - 大模型池管理，支持思考功能
+"""
+import httpx
+from typing import List, Dict, AsyncGenerator, Optional, Tuple
+import json
+import logging
+import re
+
+logger = logging.getLogger(__name__)
+
+
+class LLMService:
+    """大模型调用服务，支持思考功能"""
+    
+    def __init__(self):
+        self.providers_cache = {}  # 缓存Provider配置
+    
+    def load_provider(self, provider_config: dict):
+        """加载Provider配置"""
+        provider_id = provider_config.get('id')
+        self.providers_cache[provider_id] = {
+            'api_base': provider_config.get('api_base'),
+            'api_key': provider_config.get('api_key'),
+            'supports_thinking': provider_config.get('supports_thinking', False),
+            'thinking_model': provider_config.get('thinking_model'),
+            'default_model': provider_config.get('default_model'),
+            'max_tokens': provider_config.get('max_tokens', 4096),
+            'temperature': provider_config.get('temperature', 0.7)
+        }
+    
+    async def get_available_models(self, api_base: str, api_key: str) -> List[dict]:
+        """从API获取可用模型列表"""
+        if not api_base:
+            return []
+        
+        try:
+            url = f"{api_base}/models"
+            headers = {"Authorization": f"Bearer {api_key}"}
+            
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(url, headers=headers)
+                if response.status_code == 200:
+                    data = response.json()
+                    models = []
+                    for m in data.get('data', []):
+                        model_id = m.get('id', '')
+                        if model_id:
+                            models.append({
+                                "id": model_id,
+                                "name": m.get('name', model_id),
+                                "owned_by": m.get('owned_by', 'unknown')
+                            })
+                    return models
+        except Exception as e:
+            logger.warning(f"获取模型列表失败: {e}")
+        
+        return []
+    
+    async def test_connection(self, api_base: str, api_key: str, model: str) -> dict:
+        """测试API连接"""
+        try:
+            url = f"{api_base}/chat/completions"
+            headers = {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "model": model,
+                "messages": [{"role": "user", "content": "测试连接"}],
+                "max_tokens": 50
+            }
+            
+            async with httpx.AsyncClient(timeout=15.0) as client:
+                response = await client.post(url, headers=headers, json=payload)
+                if response.status_code == 200:
+                    data = response.json()
+                    content = data['choices'][0]['message']['content']
+                    return {
+                        "success": True,
+                        "message": f"连接成功！模型响应: {content[:100]}",
+                        "model": model
+                    }
+                else:
+                    return {
+                        "success": False,
+                        "message": f"连接失败: HTTP {response.status_code}"
+                    }
+        except httpx.ConnectError:
+            return {"success": False, "message": f"无法连接到API地址"}
+        except httpx.TimeoutException:
+            return {"success": False, "message": "连接超时"}
+        except Exception as e:
+            return {"success": False, "message": f"连接失败: {str(e)}"}
+    
+    async def chat(
+        self,
+        messages: List[Dict],
+        provider_config: dict,
+        agent_config: dict,
+        enable_thinking: bool = True
+    ) -> Tuple[str, Optional[str]]:
+        """
+        调用AI模型进行对话
+        
+        Returns:
+            Tuple[str, Optional[str]]: (回复内容, 思考过程)
+        """
+        api_base = provider_config.get('api_base')
+        api_key = provider_config.get('api_key')
+        model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
+        supports_thinking = provider_config.get('supports_thinking', False)
+        thinking_model = provider_config.get('thinking_model')
+        
+        max_tokens = provider_config.get('max_tokens', 4096)
+        temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
+        
+        # 构建消息
+        final_messages = messages.copy()
+        
+        # 添加系统提示
+        system_prompt = agent_config.get('system_prompt', '你是一个有用的AI助手。')
+        if final_messages and final_messages[0]['role'] != 'system':
+            final_messages.insert(0, {"role": "system", "content": system_prompt})
+        
+        thinking_content = None
+        
+        # 处理思考功能
+        if enable_thinking and agent_config.get('enable_thinking', True):
+            thinking_prompt = agent_config.get('thinking_prompt')
+            
+            if supports_thinking and thinking_model:
+                # 使用专门的思考模型
+                thinking_messages = final_messages.copy()
+                if thinking_prompt:
+                    thinking_messages.append({"role": "system", "content": thinking_prompt})
+                
+                try:
+                    thinking_result = await self._call_api(
+                        api_base, api_key, thinking_model, thinking_messages,
+                        max_tokens=min(max_tokens, 1000),
+                        temperature=0.3  # 思考时降低温度
+                    )
+                    thinking_content = thinking_result
+                except Exception as e:
+                    logger.warning(f"思考模型调用失败: {e}")
+            
+            elif supports_thinking:
+                # Provider支持思考但无单独模型，尝试在回复中获取思考部分
+                pass  # 在回复解析时处理
+            
+            elif thinking_prompt:
+                # Provider不支持思考，但Agent配置了思考提示词
+                # 将思考提示词添加到系统提示
+                enhanced_system = f"{system_prompt}\n\n在回答之前，请先思考问题。思考过程请用{agent_config.get('thinking_prefix', '')}和{agent_config.get('thinking_suffix', '')}包裹，然后再给出正式回答。"
+                if thinking_prompt:
+                    enhanced_system += f"\n思考指导：{thinking_prompt}"
+                final_messages[0] = {"role": "system", "content": enhanced_system}
+        
+        # 调用主模型
+        try:
+            response = await self._call_api(
+                api_base, api_key, model, final_messages,
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            
+            # 尝试从回复中提取思考内容
+            if enable_thinking and not supports_thinking:
+                thinking_prefix = agent_config.get('thinking_prefix', '')
+                thinking_suffix = agent_config.get('thinking_suffix', '')
+                
+                if thinking_prefix and thinking_suffix:
+                    # 提取思考部分
+                    pattern = f"{re.escape(thinking_prefix)}(.*?)?{re.escape(thinking_suffix)}"
+                    match = re.search(pattern, response, re.DOTALL)
+                    if match:
+                        thinking_content = match.group(1).strip()
+                        # 移除思考部分，只保留回复
+                        response = re.sub(pattern, '', response, flags=re.DOTALL).strip()
+            
+            return response, thinking_content
+            
+        except Exception as e:
+            logger.error(f"LLM调用失败: {e}")
+            raise
+    
+    async def _call_api(
+        self,
+        api_base: str,
+        api_key: str,
+        model: str,
+        messages: List[Dict],
+        max_tokens: int = 4096,
+        temperature: float = 0.7
+    ) -> str:
+        """调用API"""
+        url = f"{api_base}/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens
+        }
+        
+        logger.info(f"调用LLM: url={url}, model={model}")
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            data = response.json()
+            return data['choices'][0]['message']['content']
+    
+    async def chat_stream(
+        self,
+        messages: List[Dict],
+        provider_config: dict,
+        agent_config: dict,
+        enable_thinking: bool = True
+    ) -> AsyncGenerator[dict, None]:
+        """
+        流式调用AI模型
+        
+        Yields:
+            dict: {"type": "thinking"|"content", "text": "..."}
+        """
+        api_base = provider_config.get('api_base')
+        api_key = provider_config.get('api_key')
+        model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
+        max_tokens = provider_config.get('max_tokens', 4096)
+        temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
+        
+        # 构建消息
+        final_messages = messages.copy()
+        system_prompt = agent_config.get('system_prompt', '你是一个有用的AI助手。')
+        if final_messages and final_messages[0]['role'] != 'system':
+            final_messages.insert(0, {"role": "system", "content": system_prompt})
+        
+        # 如果启用思考但模型不支持
+        if enable_thinking and agent_config.get('enable_thinking', True):
+            supports_thinking = provider_config.get('supports_thinking', False)
+            thinking_prompt = agent_config.get('thinking_prompt')
+            
+            if not supports_thinking and thinking_prompt:
+                thinking_prefix = agent_config.get('thinking_prefix', '')
+                thinking_suffix = agent_config.get('thinking_suffix', '')
+                enhanced_system = f"{system_prompt}\n\n在回答之前，请先思考问题。思考过程请用{thinking_prefix}和{thinking_suffix}包裹，然后再给出正式回答。"
+                if thinking_prompt:
+                    enhanced_system += f"\n思考指导：{thinking_prompt}"
+                final_messages[0] = {"role": "system", "content": enhanced_system}
+        
+        url = f"{api_base}/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": model,
+            "messages": final_messages,
+            "stream": True,
+            "temperature": temperature,
+            "max_tokens": max_tokens
+        }
+        
+        thinking_prefix = agent_config.get('thinking_prefix', '')
+        thinking_suffix = agent_config.get('thinking_suffix', '')
+        in_thinking = False
+        thinking_buffer = ""
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            async with client.stream("POST", url, headers=headers, json=payload) as response:
+                async for line in response.aiter_lines():
+                    if line.startswith("data: "):
+                        data_str = line[6:]
+                        if data_str == "[DONE]":
+                            break
+                        try:
+                            data = json.loads(data_str)
+                            if 'choices' in data and len(data['choices']) > 0:
+                                delta = data['choices'][0].get('delta', {})
+                                if 'content' in delta:
+                                    text = delta['content']
+                                    
+                                    # 检测思考部分
+                                    if thinking_prefix and thinking_suffix:
+                                        for char in text:
+                                            if in_thinking:
+                                                thinking_buffer += char
+                                                # 检查是否结束思考
+                                                if thinking_buffer.endswith(thinking_suffix):
+                                                    thinking_content = thinking_buffer[:-len(thinking_suffix)]
+                                                    yield {"type": "thinking", "text": thinking_content}
+                                                    in_thinking = False
+                                                    thinking_buffer = ""
+                                                else:
+                                                    # 检查是否接近结束
+                                                    suffix_len = len(thinking_suffix)
+                                                    if len(thinking_buffer) >= suffix_len:
+                                                        yield {"type": "thinking", "text": thinking_buffer[-suffix_len:]}
+                                            else:
+                                                if char == thinking_prefix[0]:
+                                                    # 可能开始思考
+                                                    thinking_buffer = char
+                                                    if len(thinking_prefix) == 1:
+                                                        in_thinking = True
+                                                else:
+                                                    yield {"type": "content", "text": char}
+                                    else:
+                                        yield {"type": "content", "text": text}
+                        except json.JSONDecodeError:
+                            continue
+
+
+# 全局实例
+llm_service = LLMService()