Files
ai-chat-system/services/llm_service.py

581 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
LLM服务 - 大模型池管理,支持思考功能
"""
import httpx
from typing import List, Dict, AsyncGenerator, Optional, Tuple
import json
import logging
import re
logger = logging.getLogger(__name__)
class LLMService:
"""大模型调用服务,支持思考功能"""
def __init__(self):
self.providers_cache = {} # 缓存Provider配置
def load_provider(self, provider_config: dict):
"""加载Provider配置"""
provider_id = provider_config.get('id')
self.providers_cache[provider_id] = {
'api_base': provider_config.get('api_base'),
'api_key': provider_config.get('api_key'),
'supports_thinking': provider_config.get('supports_thinking', False),
'thinking_model': provider_config.get('thinking_model'),
'default_model': provider_config.get('default_model'),
'max_tokens': provider_config.get('max_tokens', 4096),
'temperature': provider_config.get('temperature', 0.7)
}
async def get_available_models(self, api_base: str, api_key: str) -> List[dict]:
"""从API获取可用模型列表"""
if not api_base:
return []
try:
url = f"{api_base}/models"
headers = {"Authorization": f"Bearer {api_key}"}
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
models = []
for m in data.get('data', []):
model_id = m.get('id', '')
if model_id:
models.append({
"id": model_id,
"name": m.get('name', model_id),
"owned_by": m.get('owned_by', 'unknown')
})
return models
except Exception as e:
logger.warning(f"获取模型列表失败: {e}")
return []
async def test_connection(self, api_base: str, api_key: str, model: str) -> dict:
"""测试API连接"""
try:
url = f"{api_base}/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": [{"role": "user", "content": "测试连接"}],
"max_tokens": 50
}
async with httpx.AsyncClient(timeout=15.0) as client:
response = await client.post(url, headers=headers, json=payload)
if response.status_code == 200:
data = response.json()
content = data['choices'][0]['message']['content']
return {
"success": True,
"message": f"连接成功!模型响应: {content[:100]}",
"model": model
}
else:
return {
"success": False,
"message": f"连接失败: HTTP {response.status_code}"
}
except httpx.ConnectError:
return {"success": False, "message": f"无法连接到API地址"}
except httpx.TimeoutException:
return {"success": False, "message": "连接超时"}
except Exception as e:
return {"success": False, "message": f"连接失败: {str(e)}"}
async def chat(
self,
messages: List[Dict],
provider_config: dict,
agent_config: dict,
enable_thinking: bool = True,
images: List[Dict] = None # 图片数据列表 [{'name', 'type', 'data': base64}]
) -> Tuple[str, Optional[str]]:
"""
调用AI模型进行对话
Args:
messages: 对话历史
provider_config: LLM Provider配置
agent_config: Agent配置
enable_thinking: 是否启用思考
images: 图片数据列表(用于多模态模型)
Returns:
Tuple[str, Optional[str]]: (回复内容, 思考过程)
"""
api_base = provider_config.get('api_base')
api_key = provider_config.get('api_key')
model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
supports_thinking = provider_config.get('supports_thinking', False)
thinking_model = provider_config.get('thinking_model')
max_tokens = provider_config.get('max_tokens', 4096)
temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
# 构建消息
final_messages = messages.copy()
# 添加系统提示
system_prompt = agent_config.get('system_prompt', '你是一个有用的AI助手。')
if final_messages and final_messages[0]['role'] != 'system':
final_messages.insert(0, {"role": "system", "content": system_prompt})
# 如果有图片,构建多模态消息(只修改最后一条用户消息)
if images and len(images) > 0:
# 找到最后一条用户消息
for i in range(len(final_messages) - 1, -1, -1):
if final_messages[i]['role'] == 'user':
original_text = final_messages[i]['content']
# 构建多模态内容
multimodal_content = [{"type": "text", "text": original_text if original_text else "请描述这张图片"}]
for img in images:
multimodal_content.append({
"type": "image_url",
"image_url": {"url": img['data']} # base64 data URL
})
final_messages[i]['content'] = multimodal_content
break
thinking_content = None
# 处理思考功能
if enable_thinking and agent_config.get('enable_thinking', True):
thinking_prompt = agent_config.get('thinking_prompt')
thinking_prefix = agent_config.get('thinking_prefix', '')
thinking_suffix = agent_config.get('thinking_suffix', '')
if supports_thinking and thinking_model:
# 使用专门的思考模型
thinking_messages = final_messages.copy()
if thinking_prompt:
thinking_messages.append({"role": "system", "content": thinking_prompt})
try:
thinking_result = await self._call_api(
api_base, api_key, thinking_model, thinking_messages,
max_tokens=min(max_tokens, 1000),
temperature=0.3
)
thinking_content = thinking_result
except Exception as e:
logger.warning(f"思考模型调用失败: {e}")
elif thinking_prompt:
# Agent配置了思考提示词添加到系统提示中
enhanced_system = f"{system_prompt}\n\n{thinking_prompt}"
final_messages[0] = {"role": "system", "content": enhanced_system}
# 调用主模型
try:
response = await self._call_api(
api_base, api_key, model, final_messages,
max_tokens=max_tokens,
temperature=temperature
)
# 尝试从回复中提取思考内容支持DeepSeek R1、GLM等模型的思考模式
if enable_thinking and agent_config.get('enable_thinking', True):
thinking_prefix = agent_config.get('thinking_prefix', '')
thinking_suffix = agent_config.get('thinking_suffix', '')
# 如果没有配置前缀后缀,使用常见的思考标记
if not thinking_prefix:
# 尝试常见的思考标记
common_thinking_markers = [
('<think>', '</think>'),
('【思考】', '【回答】'),
('Thought:', 'Answer:'),
('思考:', '回答:'),
]
for prefix, suffix in common_thinking_markers:
if prefix in response and suffix in response:
thinking_prefix = prefix
thinking_suffix = suffix
break
# 提取思考部分
if thinking_prefix and thinking_suffix and thinking_prefix in response:
try:
start_idx = response.find(thinking_prefix)
end_idx = response.find(thinking_suffix, start_idx)
if end_idx > start_idx:
thinking_content = response[start_idx + len(thinking_prefix):end_idx].strip()
# 移除思考部分,只保留回复
response = response[end_idx + len(thinking_suffix):].strip()
except Exception as e:
logger.warning(f"提取思考内容失败: {e}")
return response, thinking_content
except Exception as e:
logger.error(f"LLM调用失败: {e}")
raise
async def _call_api(
self,
api_base: str,
api_key: str,
model: str,
messages: List[Dict],
max_tokens: int = 4096,
temperature: float = 0.7
) -> str:
"""调用API"""
url = f"{api_base.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens
}
# 打印请求详情(调试)
logger.info(f"调用LLM: url={url}, model={model}")
logger.info(f"消息数量: {len(messages)}, 第一条消息类型: {type(messages[0].get('content'))}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(url, headers=headers, json=payload)
# 检查HTTP状态
if response.status_code != 200:
logger.error(f"API返回错误: status={response.status_code}, body={response.text[:500]}")
response.raise_for_status()
data = response.json()
# 检查响应格式
if 'choices' not in data or len(data['choices']) == 0:
logger.error(f"API响应格式错误: {data}")
raise ValueError("API响应格式错误缺少choices")
return data['choices'][0]['message']['content']
except httpx.HTTPStatusError as e:
logger.error(f"HTTP错误: {e.response.status_code}, {e.response.text}")
raise
except Exception as e:
logger.error(f"API调用异常: {type(e).__name__}: {e}")
raise
async def chat_stream(
self,
messages: List[Dict],
provider_config: dict,
agent_config: dict,
enable_thinking: bool = True
) -> AsyncGenerator[dict, None]:
"""
流式调用AI模型
Yields:
dict: {"type": "thinking"|"content", "text": "..."}
"""
api_base = provider_config.get('api_base')
api_key = provider_config.get('api_key')
model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
max_tokens = provider_config.get('max_tokens', 4096)
temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
# 构建消息
final_messages = messages.copy()
system_prompt = agent_config.get('system_prompt', '你是一个有用的AI助手。')
if final_messages and final_messages[0]['role'] != 'system':
final_messages.insert(0, {"role": "system", "content": system_prompt})
# 如果启用思考但模型不支持
if enable_thinking and agent_config.get('enable_thinking', True):
supports_thinking = provider_config.get('supports_thinking', False)
thinking_prompt = agent_config.get('thinking_prompt')
if not supports_thinking and thinking_prompt:
thinking_prefix = agent_config.get('thinking_prefix', '')
thinking_suffix = agent_config.get('thinking_suffix', '')
enhanced_system = f"{system_prompt}\n\n在回答之前,请先思考问题。思考过程请用{thinking_prefix}{thinking_suffix}包裹,然后再给出正式回答。"
if thinking_prompt:
enhanced_system += f"\n思考指导:{thinking_prompt}"
final_messages[0] = {"role": "system", "content": enhanced_system}
url = f"{api_base}/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": final_messages,
"stream": True,
"temperature": temperature,
"max_tokens": max_tokens
}
thinking_prefix = agent_config.get('thinking_prefix', '')
thinking_suffix = agent_config.get('thinking_suffix', '')
buffer = "" # 用于累积和检测思考部分
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream("POST", url, headers=headers, json=payload) as response:
async for line in response.aiter_lines():
if line.startswith("data: "):
data_str = line[6:]
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
if 'content' in delta:
text = delta['content']
buffer += text
# 检测思考部分(简化逻辑)
if thinking_prefix and thinking_suffix and thinking_prefix in buffer:
# 尝试解析思考部分
try:
start_idx = buffer.find(thinking_prefix)
if start_idx >= 0:
# 找到思考开始,继续找结束
end_idx = buffer.find(thinking_suffix, start_idx)
if end_idx > start_idx:
# 思考部分完整,发送思考然后发送内容
thinking = buffer[start_idx + len(thinking_prefix):end_idx]
yield {"type": "thinking", "text": thinking}
# 发送思考后的内容
remaining = buffer[end_idx + len(thinking_suffix):]
if remaining:
yield {"type": "content", "text": remaining}
buffer = ""
else:
# 思考部分还没结束,先发送之前的内容
if start_idx > 0:
yield {"type": "content", "text": buffer[:start_idx]}
# 等待更多数据来完成思考部分
buffer = buffer[start_idx:]
else:
# 没有思考标记,直接发送内容
yield {"type": "content", "text": text}
buffer = ""
except:
yield {"type": "content", "text": text}
else:
# 没有思考标记配置,直接发送内容
yield {"type": "content", "text": text}
except json.JSONDecodeError:
continue
# 处理剩余buffer
if buffer:
yield {"type": "content", "text": buffer}
async def chat_with_tools(
self,
messages: List[Dict],
provider_config: dict,
agent_config: dict,
tools: List[Dict] = None,
enable_thinking: bool = True,
images: List[Dict] = None
) -> Tuple[str, Optional[str], Optional[List[Dict]]]:
"""
支持Function Calling的对话
Args:
messages: 对话历史
provider_config: LLM Provider配置
agent_config: Agent配置
tools: 工具定义列表OpenAI Function Calling格式
enable_thinking: 是否启用思考
images: 图片数据列表
Returns:
Tuple[str, Optional[str], Optional[List[Dict]]]: (回复内容, 思考过程, 工具调用记录)
"""
api_base = provider_config.get('api_base')
api_key = provider_config.get('api_key')
model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
supports_function_calling = provider_config.get('supports_function_calling', False)
max_tokens = provider_config.get('max_tokens', 4096)
temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
# 如果不支持Function Calling直接调用普通chat
if not supports_function_calling or not tools:
response, thinking = await self.chat(messages, provider_config, agent_config, enable_thinking, images)
return response, thinking, None
# 构建消息
final_messages = messages.copy()
system_prompt = agent_config.get('system_prompt', '你是一个有用的AI助手。')
if final_messages and final_messages[0]['role'] != 'system':
final_messages.insert(0, {"role": "system", "content": system_prompt})
# 处理图片(多模态)
if images and len(images) > 0:
for i in range(len(final_messages) - 1, -1, -1):
if final_messages[i]['role'] == 'user':
original_text = final_messages[i]['content']
multimodal_content = [{"type": "text", "text": original_text if original_text else "请描述这张图片"}]
for img in images:
multimodal_content.append({
"type": "image_url",
"image_url": {"url": img['data']}
})
final_messages[i]['content'] = multimodal_content
break
# 第一次调用让LLM决定是否调用工具
url = f"{api_base.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": final_messages,
"temperature": temperature,
"max_tokens": max_tokens,
"tools": tools # 传入工具定义
}
logger.info(f"Function Calling调用: url={url}, model={model}, tools={len(tools)}")
tool_calls_record = [] # 记录工具调用
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(url, headers=headers, json=payload)
if response.status_code != 200:
logger.error(f"API返回错误: status={response.status_code}, body={response.text[:500]}")
response.raise_for_status()
data = response.json()
if 'choices' not in data or len(data['choices']) == 0:
raise ValueError("API响应格式错误缺少choices")
message = data['choices'][0]['message']
# 检查是否有工具调用
if 'tool_calls' in message and message['tool_calls']:
logger.info(f"LLM请求调用工具: {len(message['tool_calls'])}")
# 将LLM的工具调用消息添加到历史
final_messages.append({
"role": "assistant",
"content": None,
"tool_calls": message['tool_calls']
})
# 记录工具调用
for tc in message['tool_calls']:
tool_calls_record.append({
"id": tc['id'],
"name": tc['function']['name'],
"arguments": json.loads(tc['function']['arguments'])
})
# 返回工具调用记录,由调用方执行工具
return None, None, tool_calls_record
# 没有工具调用,直接返回内容
content = message.get('content', '')
# 处理思考内容(如果有)
thinking_content = None
# 这里可以添加思考内容提取逻辑
return content, thinking_content, None
except httpx.HTTPStatusError as e:
logger.error(f"HTTP错误: {e.response.status_code}, {e.response.text}")
raise
except Exception as e:
logger.error(f"Function Calling调用异常: {type(e).__name__}: {e}")
raise
async def chat_with_tool_results(
self,
messages: List[Dict],
provider_config: dict,
agent_config: dict,
tool_results: List[Dict],
enable_thinking: bool = True
) -> Tuple[str, Optional[str]]:
"""
第二阶段调用将工具执行结果返回给LLM
Args:
messages: 对话历史(包含工具调用和结果)
provider_config: LLM Provider配置
agent_config: Agent配置
tool_results: 工具执行结果 [{"tool_call_id": "xxx", "content": "..."}]
Returns:
Tuple[str, Optional[str]]: (回复内容, 思考过程)
"""
api_base = provider_config.get('api_base')
api_key = provider_config.get('api_key')
model = agent_config.get('model_override') or provider_config.get('default_model', 'auto')
max_tokens = provider_config.get('max_tokens', 4096)
temperature = agent_config.get('temperature_override') or provider_config.get('temperature', 0.7)
# 将工具结果添加到消息历史
final_messages = messages.copy()
for result in tool_results:
final_messages.append({
"role": "tool",
"tool_call_id": result['tool_call_id'],
"content": result['content']
})
# 调用LLM生成最终回复
url = f"{api_base.rstrip('/')}/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": final_messages,
"temperature": temperature,
"max_tokens": max_tokens
}
logger.info(f"工具结果返回LLM: url={url}, model={model}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(url, headers=headers, json=payload)
if response.status_code != 200:
logger.error(f"API返回错误: status={response.status_code}")
response.raise_for_status()
data = response.json()
content = data['choices'][0]['message']['content']
return content, None
except Exception as e:
logger.error(f"工具结果调用异常: {e}")
raise
# 全局实例
llm_service = LLMService()