diff --git a/__pycache__/server.cpython-310.pyc b/__pycache__/server.cpython-310.pyc index bed4e99..06ec19b 100644 Binary files a/__pycache__/server.cpython-310.pyc and b/__pycache__/server.cpython-310.pyc differ diff --git a/__pycache__/tts_service.cpython-310.pyc b/__pycache__/tts_service.cpython-310.pyc new file mode 100644 index 0000000..f9b665b Binary files /dev/null and b/__pycache__/tts_service.cpython-310.pyc differ diff --git a/logs/server.log b/logs/server.log index 8e45bc0..b2a6182 100644 Binary files a/logs/server.log and b/logs/server.log differ diff --git a/main.py b/main.py index d8731e5..bc901cf 100644 --- a/main.py +++ b/main.py @@ -33,10 +33,16 @@ app.mount("/static", StaticFiles(directory="static"), name="static") @app.get("/") async def index(): - """主页""" + """主页(原版)""" return FileResponse("static/index.html") +@app.get("/tts") +async def tts_page(): + """TTS版本页面""" + return FileResponse("static/tts.html") + + if __name__ == "__main__": PORT = int(os.getenv("PORT", "19019")) SSL_KEY = os.getenv("SSL_KEY", "key.pem") diff --git a/requirements.txt b/requirements.txt index 781210c..7c99566 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ fastapi==0.110.0 uvicorn==0.27.1 python-multipart==0.0.9 -aiohttp==3.9.3 \ No newline at end of file +aiohttp==3.9.3 +edge-tts==6.1.9 \ No newline at end of file diff --git a/server.py b/server.py index 079bb8c..ac5a0db 100644 --- a/server.py +++ b/server.py @@ -11,8 +11,13 @@ from datetime import datetime import aiohttp from fastapi import FastAPI, UploadFile, File, HTTPException, Form from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse from pydantic import BaseModel +# 导入 TTS 服务 +from tts_service import tts_manager, AUDIO_DIR + # 配置 MODEL_SERVICE_URL = os.getenv("MODEL_SERVICE_URL", "http://localhost:19018") PORT = int(os.getenv("PORT", "19019")) @@ -186,6 +191,70 @@ async def delete_conversation(conversation_id: str): raise HTTPException(status_code=500, detail=str(e)) +# ========== TTS 相关接口 ========== + +class TTSSettings(BaseModel): + """TTS 设置""" + provider: str = "none" + voice: Optional[str] = None + + +class TTSResponse(BaseModel): + """TTS 响应""" + audio_url: Optional[str] + provider: str + + +@app.get("/tts/providers") +async def get_tts_providers(): + """获取可用的 TTS 方案列表""" + providers = tts_manager.list_providers() + voices = tts_manager.get_edge_voices() + return { + "providers": providers, + "voices": voices, + "current": tts_manager.current_provider + } + + +@app.post("/tts/settings") +async def set_tts_settings(settings: TTSSettings): + """设置 TTS 方案""" + tts_manager.set_provider(settings.provider) + + # 设置音色(仅 Edge TTS) + if settings.provider == "edge" and settings.voice: + provider = tts_manager.get_provider("edge") + if hasattr(provider, 'set_voice'): + provider.set_voice(settings.voice) + + return { + "provider": settings.provider, + "voice": settings.voice + } + + +@app.post("/tts/synthesize") +async def synthesize_tts(text: str = Form(...), provider: Optional[str] = Form(None)): + """ + 合成语音 + 返回音频文件 URL + """ + try: + audio_url = await tts_manager.synthesize(text, provider) + return TTSResponse( + audio_url=audio_url, + provider=provider or tts_manager.current_provider + ) + except Exception as e: + logger.error(f"TTS synthesis error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# 挂载音频文件目录 +app.mount("/audio", StaticFiles(directory=AUDIO_DIR), name="audio") + + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=PORT) \ No newline at end of file diff --git a/static/tts.html b/static/tts.html new file mode 100644 index 0000000..87f0f4c --- /dev/null +++ b/static/tts.html @@ -0,0 +1,873 @@ + + + + + + 语音对话(TTS版) + + + +
+
+

🎤 语音对话(TTS版)

+
+ + 检测连接... +
+
+ + +
+

🔊 TTS语音合成设置

+
+
+
❌ 无 TTS
+
只显示文字
+
+
+
🌐 Edge TTS
+
检测中...
+
+
+
🤖 ChatTTS
+
暂未部署
+
+
+ +
+ + +
+ +
点击按钮开始录音
+ +
+ + +
+
+ + +
+
+ + +
+
开始对话吧!
+
+ +
+ +
+
+ + + + \ No newline at end of file diff --git a/tts_service.py b/tts_service.py new file mode 100644 index 0000000..f2fd857 --- /dev/null +++ b/tts_service.py @@ -0,0 +1,223 @@ +""" +TTS 语音合成模块 +支持多种 TTS 方案 +""" + +import os +import uuid +import logging +import asyncio +from abc import ABC, abstractmethod +from typing import Optional, Tuple +from datetime import datetime + +# 配置 +AUDIO_DIR = os.getenv("AUDIO_DIR", "audio_cache") +os.makedirs(AUDIO_DIR, exist_ok=True) + +logger = logging.getLogger(__name__) + + +class TTSProvider(ABC): + """TTS 提供者抽象类""" + + @abstractmethod + async def synthesize(self, text: str) -> Tuple[str, str]: + """ + 合成语音 + 返回: (音频文件路径, 音频URL路径) + """ + pass + + @abstractmethod + def get_name(self) -> str: + """获取提供者名称""" + pass + + @abstractmethod + def is_available(self) -> bool: + """检查是否可用""" + pass + + +class EdgeTTSProvider(TTSProvider): + """Edge TTS 提供者(微软免费TTS)""" + + # 可用音色 + VOICES = { + "zh-CN-XiaoxiaoNeural": "晓晓(女)", + "zh-CN-YunxiNeural": "云希(男)", + "zh-CN-YunyangNeural": "云扬(男)", + "zh-CN-XiaochenNeural": "晓晨(女)", + "zh-CN-XiaohanNeural": "晓涵(女)", + "zh-CN-XiaomengNeural": "晓梦(女)", + "zh-CN-XiaomoNeural": "晓墨(女)", + "zh-CN-XiaoruiNeural": "晓睿(女)", + "zh-CN-XiaoshuangNeural": "晓双(女)", + "zh-CN-XiaoxuanNeural": "晓萱(女)", + "zh-CN-XiaoyanNeural": "晓颜(女)", + "zh-CN-XiaoyouNeural": "晓悠(女)", + } + + DEFAULT_VOICE = "zh-CN-XiaoxiaoNeural" + + def __init__(self, voice: Optional[str] = None): + self.voice = voice or self.DEFAULT_VOICE + self._available = None + + async def synthesize(self, text: str) -> Tuple[str, str]: + """使用 Edge TTS 合成语音""" + import edge_tts + + # 生成唯一文件名 + filename = f"{uuid.uuid4().hex}.mp3" + filepath = os.path.join(AUDIO_DIR, filename) + + # 合成语音 + communicate = edge_tts.Communicate(text, self.voice) + await communicate.save(filepath) + + # 返回路径 + audio_url = f"/audio/{filename}" + return filepath, audio_url + + def get_name(self) -> str: + return "Edge TTS" + + def get_voice_name(self) -> str: + """获取当前音色名称""" + return self.VOICES.get(self.voice, self.voice) + + def is_available(self) -> bool: + """检查 Edge TTS 是否可用""" + if self._available is None: + try: + import edge_tts + self._available = True + except ImportError: + logger.warning("edge-tts not installed") + self._available = False + return self._available + + def set_voice(self, voice: str): + """设置音色""" + if voice in self.VOICES: + self.voice = voice + else: + logger.warning(f"Unknown voice: {voice}, using default") + + +class ChatTTSProvider(TTSProvider): + """ChatTTS 提供者(本地部署,预留接口)""" + + # 预留配置 + CHATTTS_URL = os.getenv("CHATTTS_URL", "http://localhost:19020") + + def __init__(self): + self._available = False # 暂不可用 + + async def synthesize(self, text: str) -> Tuple[str, str]: + """ + 使用 ChatTTS 合成语音 + TODO: 后续实现 + """ + raise NotImplementedError("ChatTTS 尚未实现,请先部署 ChatTTS 服务") + + def get_name(self) -> str: + return "ChatTTS" + + def is_available(self) -> bool: + """检查 ChatTTS 是否可用""" + # TODO: 后续实现检测逻辑 + return self._available + + def set_available(self, available: bool): + """设置可用状态(部署后调用)""" + self._available = available + + +class NoTTSProvider(TTSProvider): + """不使用 TTS""" + + async def synthesize(self, text: str) -> Tuple[str, str]: + return None, None + + def get_name(self) -> str: + return "无 TTS" + + def is_available(self) -> bool: + return True + + +# TTS 管理器 +class TTSManager: + """TTS 方案管理""" + + PROVIDERS = { + "edge": EdgeTTSProvider, + "chattts": ChatTTSProvider, + "none": NoTTSProvider, + } + + def __init__(self, default_provider: str = "none"): + self.current_provider = default_provider + self._providers = {} + + # 初始化 Edge TTS(如果可用) + edge_provider = EdgeTTSProvider() + if edge_provider.is_available(): + self._providers["edge"] = edge_provider + + # 初始化 ChatTTS(预留) + self._providers["chattts"] = ChatTTSProvider() + + # 无 TTS + self._providers["none"] = NoTTSProvider() + + def get_provider(self, provider_name: Optional[str] = None) -> TTSProvider: + """获取 TTS 提供者""" + name = provider_name or self.current_provider + return self._providers.get(name, self._providers["none"]) + + def set_provider(self, provider_name: str): + """设置当前 TTS 方案""" + if provider_name in self._providers: + self.current_provider = provider_name + else: + logger.warning(f"Unknown provider: {provider_name}") + + def list_providers(self) -> list: + """列出所有可用方案""" + return [ + { + "name": name, + "display_name": provider.get_name(), + "available": provider.is_available() + } + for name, provider in self._providers.items() + ] + + def get_edge_voices(self) -> dict: + """获取 Edge TTS 可用音色""" + return EdgeTTSProvider.VOICES + + async def synthesize(self, text: str, provider_name: Optional[str] = None) -> Optional[str]: + """ + 合成语音 + 返回音频URL + """ + provider = self.get_provider(provider_name) + if not provider.is_available(): + logger.warning(f"Provider {provider.get_name()} not available") + return None + + try: + _, audio_url = await provider.synthesize(text) + return audio_url + except Exception as e: + logger.error(f"TTS synthesis failed: {e}") + return None + + +# 全局 TTS 管理器 +tts_manager = TTSManager() \ No newline at end of file