feat: AI回复语音播放功能(Edge TTS)

This commit is contained in:
2026-04-28 17:28:07 +08:00
parent 60db170c0d
commit 7de13ffc6d
4 changed files with 252 additions and 7 deletions

View File

@@ -4,7 +4,7 @@ AI Chat App - 后台管理服务
端口: 19021 (与前端同一端口) 端口: 19021 (与前端同一端口)
""" """
from flask import Flask, jsonify, request, send_from_directory from flask import Flask, jsonify, request, send_from_directory, Response
from flask_cors import CORS from flask_cors import CORS
import os import os
import json import json
@@ -12,6 +12,8 @@ import sqlite3
from datetime import datetime from datetime import datetime
import hashlib import hashlib
import base64 import base64
import asyncio
import edge_tts
app = Flask(__name__, static_folder='../www') app = Flask(__name__, static_folder='../www')
CORS(app) CORS(app)
@@ -273,6 +275,8 @@ def init_db():
('app_description', '提供智能对话、多种智能体服务', '应用简介'), ('app_description', '提供智能对话、多种智能体服务', '应用简介'),
('privacy_policy_url', '', '隐私政策链接'), ('privacy_policy_url', '', '隐私政策链接'),
('user_agreement_url', '', '用户协议链接'), ('user_agreement_url', '', '用户协议链接'),
('tts_provider', 'edge', 'TTS方案'),
('tts_voice', 'zh-CN-XiaoxiaoNeural', 'TTS语音'),
] ]
for key, value, desc in default_configs: for key, value, desc in default_configs:
cursor.execute('INSERT INTO system_configs (key, value, description) VALUES (?, ?, ?)', (key, value, desc)) cursor.execute('INSERT INTO system_configs (key, value, description) VALUES (?, ?, ?)', (key, value, desc))
@@ -285,6 +289,8 @@ def init_db():
('app_description', '提供智能对话、多种智能体服务', '应用简介'), ('app_description', '提供智能对话、多种智能体服务', '应用简介'),
('privacy_policy_url', '', '隐私政策链接'), ('privacy_policy_url', '', '隐私政策链接'),
('user_agreement_url', '', '用户协议链接'), ('user_agreement_url', '', '用户协议链接'),
('tts_provider', 'edge', 'TTS方案'),
('tts_voice', 'zh-CN-XiaoxiaoNeural', 'TTS语音'),
] ]
for key, value, desc in default_configs: for key, value, desc in default_configs:
cursor.execute('SELECT COUNT(*) FROM system_configs WHERE key=?', (key,)) cursor.execute('SELECT COUNT(*) FROM system_configs WHERE key=?', (key,))
@@ -1386,12 +1392,62 @@ def get_frontend_config():
'description': system.get('app_description', '提供智能对话、多种智能体服务'), 'description': system.get('app_description', '提供智能对话、多种智能体服务'),
'privacyPolicyUrl': system.get('privacy_policy_url', ''), 'privacyPolicyUrl': system.get('privacy_policy_url', ''),
'userAgreementUrl': system.get('user_agreement_url', ''), 'userAgreementUrl': system.get('user_agreement_url', ''),
'ttsProvider': system.get('tts_provider', 'edge'),
'ttsVoice': system.get('tts_voice', 'zh-CN-XiaoxiaoNeural'),
} }
} }
return jsonify(config) return jsonify(config)
# ==================== TTS 语音合成 ====================
@app.route('/api/tts', methods=['POST'])
def generate_tts():
"""使用 Edge TTS 生成语音"""
data = request.json
text = data.get('text', '')
voice = data.get('voice', 'zh-CN-XiaoxiaoNeural') # 默认中文女声
if not text:
return jsonify({'error': '缺少文本内容'}), 400
try:
# 使用 asyncio 运行 edge_tts
async def generate_audio():
communicate = edge_tts.Communicate(text, voice)
audio_data = b''
for chunk in communicate.stream_sync():
if chunk['type'] == 'audio':
audio_data += chunk['data']
return audio_data
audio_data = asyncio.run(generate_audio())
# 返回音频数据MP3格式
return Response(audio_data, mimetype='audio/mpeg')
except Exception as e:
return jsonify({'error': f'TTS生成失败: {str(e)}'}), 500
@app.route('/api/tts/voices', methods=['GET'])
def get_tts_voices():
"""获取可用的 TTS 语音列表"""
try:
voices = asyncio.run(edge_tts.list_voices())
# 过滤中文语音
chinese_voices = [v for v in voices if v['Locale'].startswith('zh-')]
voice_list = [{
'name': v['ShortName'],
'gender': v['Gender'],
'locale': v['Locale']
} for v in chinese_voices]
return jsonify({'voices': voice_list})
except Exception as e:
return jsonify({'error': f'获取语音列表失败: {str(e)}'}), 500
# ==================== 启动 ==================== # ==================== 启动 ====================
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1399,6 +1399,39 @@ async function loadSystemPage(content) {
<input type="text" class="form-input" id="adminPassword" value="${systemConfigs.admin_password?.value || ''}" placeholder="修改管理员密码"> <input type="text" class="form-input" id="adminPassword" value="${systemConfigs.admin_password?.value || ''}" placeholder="修改管理员密码">
</div> </div>
<h3 style="margin: 24px 0 16px; padding-top: 16px; border-top: 1px solid #e2e8f0;">TTS语音配置</h3>
<div class="form-group">
<label class="form-label">TTS方案</label>
<select class="form-input" id="ttsProvider">
<option value="edge" ${systemConfigs.tts_provider?.value === 'edge' ? 'selected' : ''}>Edge TTS免费</option>
</select>
<span style="color: #999; font-size: 12px;">目前仅支持 Edge TTS后续将添加更多方案</span>
</div>
<div class="form-group">
<label class="form-label">TTS语音</label>
<select class="form-input" id="ttsVoice">
<option value="zh-CN-XiaoxiaoNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoxiaoNeural' ? 'selected' : ''}>晓晓(女声)</option>
<option value="zh-CN-YunxiNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-YunxiNeural' ? 'selected' : ''}>云希(男声)</option>
<option value="zh-CN-YunjianNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-YunjianNeural' ? 'selected' : ''}>云健(男声)</option>
<option value="zh-CN-XiaoyiNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoyiNeural' ? 'selected' : ''}>晓伊(女声)</option>
<option value="zh-CN-YunfengNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-YunfengNeural' ? 'selected' : ''}>云枫(男声)</option>
<option value="zh-CN-XiaochenNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaochenNeural' ? 'selected' : ''}>晓辰(女声)</option>
<option value="zh-CN-XiaohanNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaohanNeural' ? 'selected' : ''}>晓涵(女声)</option>
<option value="zh-CN-XiaomengNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaomengNeural' ? 'selected' : ''}>晓梦(女声)</option>
<option value="zh-CN-XiaomoNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaomoNeural' ? 'selected' : ''}>晓墨(女声)</option>
<option value="zh-CN-XiaoruiNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoruiNeural' ? 'selected' : ''}>晓睿(女声)</option>
<option value="zh-CN-XiaoshuangNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoshuangNeural' ? 'selected' : ''}>晓双(女声)</option>
<option value="zh-CN-XiaoxuanNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoxuanNeural' ? 'selected' : ''}>晓萱(女声)</option>
<option value="zh-CN-XiaoyanNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoyanNeural' ? 'selected' : ''}>晓颜(女声)</option>
<option value="zh-CN-XiaoyouNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-XiaoyouNeural' ? 'selected' : ''}>晓悠(女声)</option>
<option value="zh-CN-YunyaNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-YunyaNeural' ? 'selected' : ''}>云雅(女声)</option>
<option value="zh-CN-YunyangNeural" ${systemConfigs.tts_voice?.value === 'zh-CN-YunyangNeural' ? 'selected' : ''}>云扬(男声)</option>
</select>
<span style="color: #999; font-size: 12px;">选择AI回复的朗读语音</span>
</div>
<h3 style="margin: 24px 0 16px; padding-top: 16px; border-top: 1px solid #e2e8f0;">链接配置</h3> <h3 style="margin: 24px 0 16px; padding-top: 16px; border-top: 1px solid #e2e8f0;">链接配置</h3>
<div class="form-group"> <div class="form-group">
@@ -1436,6 +1469,8 @@ async function saveSystemConfig() {
admin_password: document.getElementById('adminPassword').value, admin_password: document.getElementById('adminPassword').value,
privacy_policy_url: document.getElementById('privacyPolicyUrl').value, privacy_policy_url: document.getElementById('privacyPolicyUrl').value,
user_agreement_url: document.getElementById('userAgreementUrl').value, user_agreement_url: document.getElementById('userAgreementUrl').value,
tts_provider: document.getElementById('ttsProvider').value,
tts_voice: document.getElementById('ttsVoice').value,
}; };
await fetchAPI('/api/admin/system', 'POST', data); await fetchAPI('/api/admin/system', 'POST', data);

View File

@@ -52,6 +52,11 @@ let backendConfig = null; // 从API获取的配置
// 用户状态 // 用户状态
let currentUser = null; // 当前登录用户 { username, password, registeredAt } let currentUser = null; // 当前登录用户 { username, password, registeredAt }
// TTS 语音播放状态
let enableTTS = false; // 是否启用语音播放
let currentPlayingAudio = null; // 当前播放的音频对象
let ttsVoice = 'zh-CN-XiaoxiaoNeural'; // TTS 语音
// 每日使用统计(未登录用户) // 每日使用统计(未登录用户)
let dailyUsage = { let dailyUsage = {
date: null, // 日期 YYYY-MM-DD date: null, // 日期 YYYY-MM-DD
@@ -127,6 +132,8 @@ async function loadBackendConfig() {
// 将后台系统配置赋值到 CONFIG // 将后台系统配置赋值到 CONFIG
if (backendConfig.system) { if (backendConfig.system) {
CONFIG.system = backendConfig.system; CONFIG.system = backendConfig.system;
// 加载 TTS 配置
ttsVoice = backendConfig.system.ttsVoice || 'zh-CN-XiaoxiaoNeural';
} }
// 将后台 LLM 配置赋值到 CONFIG // 将后台 LLM 配置赋值到 CONFIG
@@ -3165,6 +3172,9 @@ function showAgentChatPage() {
<p class="agent-desc-header">${currentAgent.desc}</p> <p class="agent-desc-header">${currentAgent.desc}</p>
</div> </div>
</div> </div>
<button class="feature-btn tts-btn ${enableTTS ? 'active' : ''}" id="ttsBtn" title="语音播放">
<svg viewBox="0 0 24 24" width="20" height="20"><path fill="currentColor" d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02zM14 3.23v2.06c2.89.86 5 3.54 5 6.71s-2.11 5.85-5 6.71v2.06c4.01-.91 7-4.54 7-8.77s-2.99-7.86-7-8.77z"/></svg>
</button>
</header> </header>
<div class="messages-container" id="messagesContainer"> <div class="messages-container" id="messagesContainer">
@@ -3257,6 +3267,21 @@ function showAgentChatPage() {
}); });
} }
// 绑定 TTS 开关按钮(智能体对话)
const ttsBtn = document.getElementById('ttsBtn');
if (ttsBtn) {
ttsBtn.addEventListener('click', () => {
enableTTS = !enableTTS;
ttsBtn.classList.toggle('active', enableTTS);
showToast(enableTTS ? '语音播放已开启' : '语音播放已关闭');
// 如果关闭,停止当前播放
if (!enableTTS && currentPlayingAudio) {
currentPlayingAudio.pause();
currentPlayingAudio = null;
}
});
}
// 绑定功能开关按钮事件 // 绑定功能开关按钮事件
if (thinkingBtn) { if (thinkingBtn) {
thinkingBtn.addEventListener('click', () => { thinkingBtn.addEventListener('click', () => {
@@ -3649,8 +3674,8 @@ function openConversation(id) {
<span class="logo">🤖</span> <span class="logo">🤖</span>
<h1>${escapeHtml(currentConversation.title)}</h1> <h1>${escapeHtml(currentConversation.title)}</h1>
</div> </div>
<button class="clear-btn" id="clearBtn" title="清空对话"> <button class="feature-btn tts-btn ${enableTTS ? 'active' : ''}" id="ttsBtn" title="语音播放">
<svg viewBox="0 0 24 24" width="20" height="20"><path fill="currentColor" d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg> <svg viewBox="0 0 24 24" width="20" height="20"><path fill="currentColor" d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02zM14 3.23v2.06c2.89.86 5 3.54 5 6.71s-2.11 5.85-5 6.71v2.06c4.01-.91 7-4.54 7-8.77s-2.99-7.86-7-8.77z"/></svg>
</button> </button>
</header> </header>
@@ -3748,8 +3773,20 @@ function openConversation(id) {
const backBtn = document.getElementById('backBtn'); const backBtn = document.getElementById('backBtn');
if (backBtn) backBtn.addEventListener('click', showConversationList); if (backBtn) backBtn.addEventListener('click', showConversationList);
const clearBtn = document.getElementById('clearBtn'); // 绑定 TTS 开关按钮
if (clearBtn) clearBtn.addEventListener('click', clearCurrentChat); const ttsBtn = document.getElementById('ttsBtn');
if (ttsBtn) {
ttsBtn.addEventListener('click', () => {
enableTTS = !enableTTS;
ttsBtn.classList.toggle('active', enableTTS);
showToast(enableTTS ? '语音播放已开启' : '语音播放已关闭');
// 如果关闭,停止当前播放
if (!enableTTS && currentPlayingAudio) {
currentPlayingAudio.pause();
currentPlayingAudio = null;
}
});
}
// 绑定功能开关按钮事件 // 绑定功能开关按钮事件
if (thinkingBtn) { if (thinkingBtn) {
@@ -4150,6 +4187,12 @@ async function streamGenerate(userMsgIndex) {
// 记录统计到 backend // 记录统计到 backend
logStatsToBackend('llm_call', currentConversation.agentId || 'chat', 1); logStatsToBackend('llm_call', currentConversation.agentId || 'chat', 1);
// 自动播放 TTS
const lastMsg = currentConversation.messages[aiMessageIndex];
if (enableTTS && lastMsg && lastMsg.content) {
autoPlayTTS(lastMsg.content);
}
// 自动总结标题第一次对话和每隔5次对话 // 自动总结标题第一次对话和每隔5次对话
const totalMessages = currentConversation.messages.length; const totalMessages = currentConversation.messages.length;
// 第一次对话(用户+AI=2条或每5次对话10条 // 第一次对话(用户+AI=2条或每5次对话10条
@@ -4647,6 +4690,8 @@ function renderMessages() {
const copyIcon = `<svg viewBox="0 0 24 24" width="16" height="16"><path fill="currentColor" d="M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z"/></svg>`; const copyIcon = `<svg viewBox="0 0 24 24" width="16" height="16"><path fill="currentColor" d="M16 1H4c-1.1 0-2 .9-2 2v14h2V3h12V1zm3 4H8c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h11c1.1 0 2-.9 2-2V7c0-1.1-.9-2-2-2zm0 16H8V7h11v14z"/></svg>`;
const playIcon = `<svg viewBox="0 0 24 24" width="16" height="16"><path fill="currentColor" d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02zM14 3.23v2.06c2.89.86 5 3.54 5 6.71s-2.11 5.85-5 6.71v2.06c4.01-.91 7-4.54 7-8.77s-2.99-7.86-7-8.77z"/></svg>`;
const actions = isUser const actions = isUser
? `<div class="message-actions"> ? `<div class="message-actions">
<button class="action-btn copy-btn" data-index="${index}" title="复制">${copyIcon}</button> <button class="action-btn copy-btn" data-index="${index}" title="复制">${copyIcon}</button>
@@ -4655,6 +4700,7 @@ function renderMessages() {
</button> </button>
</div>` </div>`
: `<div class="message-actions"> : `<div class="message-actions">
<button class="action-btn tts-btn" data-index="${index}" title="语音播放">${playIcon}</button>
<button class="action-btn copy-btn" data-index="${index}" title="复制">${copyIcon}</button> <button class="action-btn copy-btn" data-index="${index}" title="复制">${copyIcon}</button>
<button class="action-btn regenerate-btn" data-index="${index}" title="重新生成"> <button class="action-btn regenerate-btn" data-index="${index}" title="重新生成">
<svg viewBox="0 0 24 24" width="16" height="16"><path fill="currentColor" d="M17.65 6.35C16.2 4.9 14.21 4 12 4c-4.42 0-7.99 3.58-7.99 8s3.57 8 7.99 8c3.73 0 6.84-2.55 7.73-6h-2.08c-.82 2.33-3.04 4-5.65 4-3.31 0-6-2.69-6-6s2.69-6 6-6c1.66 0 3.14.69 4.22 1.78L13 11h7V4l-2.35 2.35z"/></svg> <svg viewBox="0 0 24 24" width="16" height="16"><path fill="currentColor" d="M17.65 6.35C16.2 4.9 14.21 4 12 4c-4.42 0-7.99 3.58-7.99 8s3.57 8 7.99 8c3.73 0 6.84-2.55 7.73-6h-2.08c-.82 2.33-3.04 4-5.65 4-3.31 0-6-2.69-6-6s2.69-6 6-6c1.66 0 3.14.69 4.22 1.78L13 11h7V4l-2.35 2.35z"/></svg>
@@ -4678,6 +4724,9 @@ function renderMessages() {
}).join(''); }).join('');
// 绑定消息操作按钮事件(事件委托) // 绑定消息操作按钮事件(事件委托)
messagesDiv.querySelectorAll('.tts-btn').forEach(btn => {
btn.addEventListener('click', () => playTTS(parseInt(btn.dataset.index)));
});
messagesDiv.querySelectorAll('.copy-btn').forEach(btn => { messagesDiv.querySelectorAll('.copy-btn').forEach(btn => {
btn.addEventListener('click', () => copyMessage(parseInt(btn.dataset.index))); btn.addEventListener('click', () => copyMessage(parseInt(btn.dataset.index)));
}); });
@@ -4691,6 +4740,102 @@ function renderMessages() {
scrollToBottom(); scrollToBottom();
} }
// TTS 语音播放
async function playTTS(index) {
if (!currentConversation || index < 0) return;
const msg = currentConversation.messages[index];
if (!msg || msg.role !== 'assistant') return;
const text = msg.content;
if (!text) {
showToast('没有可播放的内容');
return;
}
// 如果正在播放,停止播放
if (currentPlayingAudio) {
currentPlayingAudio.pause();
currentPlayingAudio = null;
showToast('已停止播放');
return;
}
// 调用后端 TTS API
try {
showToast('正在生成语音...');
const response = await fetch('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text, voice: ttsVoice })
});
if (!response.ok) {
const error = await response.json();
showToast(error.error || '语音生成失败');
return;
}
// 获取音频数据
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
// 播放音频
currentPlayingAudio = new Audio(audioUrl);
currentPlayingAudio.play();
showToast('开始播放');
// 播放完成后释放资源
currentPlayingAudio.onended = () => {
URL.revokeObjectURL(audioUrl);
currentPlayingAudio = null;
};
} catch (e) {
console.error('TTS播放失败:', e);
showToast('语音播放失败');
}
}
// 自动播放 TTSAI回复完成后
function autoPlayTTS(text) {
if (!enableTTS || !text) return;
// 延迟一点时间,让用户先看到内容
setTimeout(async () => {
try {
const response = await fetch('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text, voice: ttsVoice })
});
if (!response.ok) return;
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
// 停止之前的播放
if (currentPlayingAudio) {
currentPlayingAudio.pause();
}
currentPlayingAudio = new Audio(audioUrl);
currentPlayingAudio.play();
currentPlayingAudio.onended = () => {
URL.revokeObjectURL(audioUrl);
currentPlayingAudio = null;
};
} catch (e) {
console.error('自动TTS播放失败:', e);
}
}, 500);
}
// 折叠/展开思考内容 // 折叠/展开思考内容
function toggleThinking(block) { function toggleThinking(block) {
block.classList.toggle('expanded'); block.classList.toggle('expanded');

View File

@@ -2395,7 +2395,8 @@ body {
font-size: 20px; font-size: 20px;
} }
.clear-btn { /* TTS 语音播放按钮 */
.tts-btn {
background: rgba(255,255,255,0.2); background: rgba(255,255,255,0.2);
border: none; border: none;
border-radius: 8px; border-radius: 8px;
@@ -2404,10 +2405,18 @@ body {
cursor: pointer; cursor: pointer;
} }
.clear-btn:active { .tts-btn:hover {
background: rgba(255,255,255,0.3); background: rgba(255,255,255,0.3);
} }
.tts-btn.active {
background: rgba(255,255,255,0.4);
}
.tts-btn svg {
display: block;
}
.messages-container { .messages-container {
flex: 1; flex: 1;
overflow-y: auto; overflow-y: auto;