From b3cfae14a9b3c5313b17b29a8ca3620e8502bf57 Mon Sep 17 00:00:00 2001 From: hubian <908234780@qq.com> Date: Tue, 21 Apr 2026 18:19:04 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E8=AF=AD=E9=9F=B3=E4=BA=A4=E4=BA=92?= =?UTF-8?q?=E7=BD=91=E9=A1=B5=E5=88=9D=E5=A7=8B=E7=89=88=E6=9C=AC=20-=20?= =?UTF-8?q?=E5=89=8D=E7=AB=AF+=E5=90=8E=E7=AB=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 110 ++++++++++ main.py | 42 ++++ requirements.txt | 4 + server.py | 155 ++++++++++++++ start.sh | 14 ++ static/index.html | 525 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 850 insertions(+) create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 server.py create mode 100755 start.sh create mode 100644 static/index.html diff --git a/README.md b/README.md new file mode 100644 index 0000000..9271cea --- /dev/null +++ b/README.md @@ -0,0 +1,110 @@ +# 语音交互网页 + +基于 Qwen2-Audio 的语音对话网页,点击按钮录音,AI 实时回复。 + +## 端口 + +- **服务端口**: 19019 +- **模型服务**: 19018(需要单独部署) + +## 功能 + +- 点击按钮录音(再次点击停止) +- 录音状态可视化(波形动画) +- 多轮对话支持 +- 服务状态检测 +- 清除对话功能 + +## 架构 + +``` +┌─────────────┐ ┌─────────────────┐ ┌───────────────────┐ +│ 浏览器前端 │ ──── │ 本机后端(19019) │ ──── │ 模型服务(19018) │ +│ (录音/播放) │ HTTP │ (转发代理) │ HTTP │ (Qwen2-Audio) │ +└─────────────┘ └─────────────────┘ └───────────────────┘ +``` + +## 部署 + +### 1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 2. 启动模型服务(另一台电脑) + +参考 `qwen-audio-server` 项目,在 GPU 机器上部署。 + +### 3. 启动网页服务 + +```bash +# 默认端口 19019 +python main.py + +# 或指定模型服务地址 +MODEL_SERVICE_URL=http://192.168.2.5:19018 python main.py +``` + +### 4. 访问网页 + +``` +http://localhost:19019 +``` + +## 环境变量 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| PORT | 服务端口 | 19019 | +| MODEL_SERVICE_URL | 模型服务地址 | http://localhost:19018 | + +## API 接口 + +### 语音对话 + +```bash +POST /api/voice/chat +Content-Type: multipart/form-data + +参数: +- audio: 音频文件 +- conversation_id: 对话ID(可选) + +返回: +{ + "reply": "AI回复文本", + "conversation_id": "xxx", + "timestamp": "..." +} +``` + +### 服务状态 + +```bash +GET /api/status + +返回: +{ + "status": "ok", + "model_service": "http://...", + "model_online": true +} +``` + +## 前端功能 + +| 功能 | 说明 | +|------|------| +| 录音按钮 | 点击开始/停止录音 | +| 波形动画 | 录音时显示动态波形 | +| 消息列表 | 显示对话历史 | +| 状态指示 | 模型服务连接状态 | +| 清除对话 | 重置对话历史 | + +## 注意事项 + +1. 需要浏览器支持 MediaRecorder API(Chrome/Firefox/Edge) +2. 需要用户授权麦克风权限 +3. 模型推理较慢(约10-30秒),请耐心等待 +4. 录音格式为 WebM,模型服务会自动转换 \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..647173d --- /dev/null +++ b/main.py @@ -0,0 +1,42 @@ +""" +语音交互网页 - 主入口 +""" + +import os +import uvicorn +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles + +# 导入后端服务 +from server import app as api_app + +# 主应用 +app = FastAPI(title="Voice Chat Web") + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 挂载 API +app.mount("/api", api_app) + +# 静态文件 +app.mount("/static", StaticFiles(directory="static"), name="static") + + +@app.get("/") +async def index(): + """主页""" + return FileResponse("static/index.html") + + +if __name__ == "__main__": + PORT = int(os.getenv("PORT", "19019")) + uvicorn.run(app, host="0.0.0.0", port=PORT) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..781210c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +fastapi==0.110.0 +uvicorn==0.27.1 +python-multipart==0.0.9 +aiohttp==3.9.3 \ No newline at end of file diff --git a/server.py b/server.py new file mode 100644 index 0000000..4bd9fb2 --- /dev/null +++ b/server.py @@ -0,0 +1,155 @@ +""" +语音交互网页后端 +代理转发到 Qwen2-Audio 模型服务 +""" + +import os +import logging +from typing import Optional +from datetime import datetime + +import aiohttp +from fastapi import FastAPI, UploadFile, File, HTTPException, Form +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel + +# 配置 +MODEL_SERVICE_URL = os.getenv("MODEL_SERVICE_URL", "http://localhost:19018") +PORT = int(os.getenv("PORT", "19019")) + +# 日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Voice Chat Web", + description="语音交互网页后端", + version="1.0.0" +) + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +class VoiceResponse(BaseModel): + """语音响应""" + reply: str + conversation_id: str + timestamp: str + + +class StatusResponse(BaseModel): + """状态响应""" + status: str + model_service: str + model_online: bool + + +@app.get("/") +async def root(): + """根路径返回状态""" + return {"status": "ok", "service": "voice-chat-web"} + + +@app.get("/api/status", response_model=StatusResponse) +async def get_status(): + """检查服务状态""" + try: + async with aiohttp.ClientSession() as session: + async with session.get(f"{MODEL_SERVICE_URL}/", timeout=aiohttp.ClientTimeout(total=5)) as resp: + if resp.status == 200: + data = await resp.json() + return StatusResponse( + status="ok", + model_service=MODEL_SERVICE_URL, + model_online=True + ) + except Exception as e: + logger.warning(f"Model service check failed: {e}") + + return StatusResponse( + status="partial", + model_service=MODEL_SERVICE_URL, + model_online=False + ) + + +@app.post("/api/voice/chat", response_model=VoiceResponse) +async def voice_chat( + audio: UploadFile = File(..., description="音频文件"), + conversation_id: Optional[str] = Form(None, description="对话ID") +): + """ + 语音聊天接口 + 转发到模型服务 + """ + try: + # 读取音频数据 + audio_bytes = await audio.read() + + # 转发到模型服务 + async with aiohttp.ClientSession() as session: + form = aiohttp.FormData() + form.add_field( + 'audio', + audio_bytes, + filename=audio.filename or 'audio.wav', + content_type=audio.content_type or 'audio/wav' + ) + if conversation_id: + form.add_field('conversation_id', conversation_id) + + async with session.post( + f"{MODEL_SERVICE_URL}/api/voice/inference", + data=form, + timeout=aiohttp.ClientTimeout(total=120) # 模型推理可能较慢 + ) as resp: + if resp.status != 200: + error_text = await resp.text() + logger.error(f"Model service error: {error_text}") + raise HTTPException(status_code=resp.status, detail=error_text) + + data = await resp.json() + return VoiceResponse( + reply=data["reply"], + conversation_id=data["conversation_id"], + timestamp=data.get("timestamp", datetime.now().isoformat()) + ) + + except aiohttp.ClientError as e: + logger.error(f"Connection error: {e}") + raise HTTPException(status_code=503, detail="模型服务连接失败") + except Exception as e: + logger.error(f"Voice chat error: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.delete("/api/conversation/{conversation_id}") +async def delete_conversation(conversation_id: str): + """删除对话""" + try: + async with aiohttp.ClientSession() as session: + async with session.delete( + f"{MODEL_SERVICE_URL}/api/voice/conversation/{conversation_id}", + timeout=aiohttp.ClientTimeout(total=10) + ) as resp: + return await resp.json() + except Exception as e: + logger.error(f"Delete conversation error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# 静态文件(前端页面) +app.mount("/static", StaticFiles(directory="static"), name="static") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=PORT) \ No newline at end of file diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..e07a7c6 --- /dev/null +++ b/start.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# 语音交互网页启动脚本 + +cd "$(dirname "$0")" + +PORT=${PORT:-19019} +MODEL_URL=${MODEL_SERVICE_URL:-http://localhost:19018} + +echo "Starting Voice Chat Web on port $PORT..." +echo "Model service: $MODEL_URL" + +export MODEL_SERVICE_URL=$MODEL_URL + +python main.py \ No newline at end of file diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..fd38b9c --- /dev/null +++ b/static/index.html @@ -0,0 +1,525 @@ + + + + + + 语音对话 + + + +
+
+

🎤 语音对话

+
+ + 检测连接... +
+
+ +
+ +
点击按钮开始录音
+ +
+ +
+
开始你的第一次语音对话吧!
+
+ +
+ +
+
+ + + + \ No newline at end of file