Files
voice-chat-web/static/tts.html

1045 lines
34 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>语音对话TTS版</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
justify-content: center;
align-items: center;
padding: 20px;
}
.container {
max-width: 700px;
width: 100%;
background: white;
border-radius: 20px;
padding: 30px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
}
.header {
text-align: center;
margin-bottom: 20px;
}
.header h1 {
color: #333;
font-size: 28px;
margin-bottom: 10px;
}
.status-indicator {
display: inline-flex;
align-items: center;
gap: 8px;
font-size: 14px;
color: #666;
}
.status-dot {
width: 10px;
height: 10px;
border-radius: 50%;
background: #ccc;
}
.status-dot.online {
background: #4CAF50;
animation: pulse 2s infinite;
}
.status-dot.offline {
background: #f44336;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
/* TTS 设置区域 */
.tts-section {
background: #f5f7fa;
border-radius: 10px;
padding: 15px;
margin-bottom: 20px;
}
.tts-section h3 {
color: #333;
font-size: 16px;
margin-bottom: 12px;
}
.tts-options {
display: flex;
gap: 10px;
margin-bottom: 10px;
}
.tts-option {
flex: 1;
padding: 10px;
border: 2px solid #eee;
border-radius: 8px;
cursor: pointer;
text-align: center;
transition: all 0.2s;
}
.tts-option:hover {
border-color: #667eea;
}
.tts-option.selected {
border-color: #667eea;
background: rgba(102,126,234,0.1);
}
.tts-option.disabled {
opacity: 0.5;
cursor: not-allowed;
}
.tts-option .name {
font-weight: bold;
color: #333;
}
.tts-option .status {
font-size: 12px;
color: #999;
}
/* TTS 控制选项 */
.tts-controls {
display: flex;
gap: 15px;
margin-top: 12px;
padding-top: 12px;
border-top: 1px solid #eee;
align-items: center;
}
.auto-play-switch {
display: flex;
align-items: center;
gap: 8px;
}
.switch {
position: relative;
width: 44px;
height: 22px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .3s;
border-radius: 22px;
}
.slider:before {
position: absolute;
content: "";
height: 18px;
width: 18px;
left: 2px;
bottom: 2px;
background-color: white;
transition: .3s;
border-radius: 50%;
}
input:checked + .slider {
background-color: #667eea;
}
input:checked + .slider:before {
transform: translateX(22px);
}
.volume-control {
display: flex;
align-items: center;
gap: 8px;
}
.volume-control input[type="range"] {
width: 80px;
height: 6px;
border-radius: 3px;
background: #ddd;
outline: none;
-webkit-appearance: none;
}
.volume-control input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
width: 14px;
height: 14px;
border-radius: 50%;
background: #667eea;
cursor: pointer;
}
.volume-value {
font-size: 12px;
color: #666;
min-width: 35px;
}
.voice-select {
margin-top: 10px;
}
.voice-select select {
width: 100%;
padding: 8px;
border: 1px solid #ddd;
border-radius: 5px;
}
/* 录音区域 */
.record-section {
text-align: center;
margin-bottom: 20px;
}
.record-btn {
width: 100px;
height: 100px;
border-radius: 50%;
border: none;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
font-size: 24px;
cursor: pointer;
transition: all 0.3s ease;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
gap: 5px;
margin: 0 auto;
}
.record-btn:hover {
transform: scale(1.05);
box-shadow: 0 10px 30px rgba(102,126,234,0.4);
}
.record-btn.recording {
background: linear-gradient(135deg, #f44336 0%, #e53935 100%);
animation: recording-pulse 1s infinite;
}
@keyframes recording-pulse {
0%, 100% { transform: scale(1); }
50% { transform: scale(1.1); }
}
.record-status {
margin-top: 10px;
font-size: 14px;
color: #666;
}
.waveform {
display: flex;
justify-content: center;
gap: 3px;
margin-top: 10px;
height: 30px;
}
.wave-bar {
width: 4px;
height: 10px;
background: #667eea;
border-radius: 2px;
animation: wave 0.5s infinite ease-in-out;
}
@keyframes wave {
0%, 100% { height: 10px; }
50% { height: 25px; }
}
/* 文字输入 */
.text-section {
margin: 15px 0;
}
.text-input-wrapper {
display: flex;
gap: 10px;
}
.text-input {
flex: 1;
padding: 10px 12px;
border: 2px solid #eee;
border-radius: 8px;
font-size: 14px;
outline: none;
}
.text-input:focus {
border-color: #667eea;
}
.send-text-btn {
padding: 10px 15px;
border: none;
border-radius: 8px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
cursor: pointer;
}
/* 消息区域 */
.chat-section {
max-height: 350px;
overflow-y: auto;
border: 1px solid #eee;
border-radius: 10px;
padding: 15px;
background: #f9f9f9;
}
.message {
margin-bottom: 12px;
padding: 10px 12px;
border-radius: 10px;
max-width: 85%;
}
.message.user {
background: #667eea;
color: white;
margin-left: auto;
}
.message.assistant {
background: white;
color: #333;
border: 1px solid #eee;
}
.message .role {
font-size: 12px;
font-weight: bold;
margin-bottom: 5px;
opacity: 0.8;
}
.message .content {
font-size: 14px;
line-height: 1.5;
}
.audio-content {
display: flex;
align-items: center;
}
.play-btn {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 6px 12px;
border-radius: 15px;
border: none;
background: rgba(255,255,255,0.2);
cursor: pointer;
font-size: 13px;
}
.play-btn:hover {
background: rgba(255,255,255,0.3);
}
.tts-play-btn {
background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
color: white;
margin-left: 10px;
}
.loading {
display: flex;
justify-content: center;
gap: 8px;
padding: 15px;
}
.loading-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #667eea;
animation: loading-bounce 0.6s infinite;
}
@keyframes loading-bounce {
0%, 80%, 100% { transform: scale(0); }
40% { transform: scale(1); }
}
.actions {
margin-top: 15px;
text-align: center;
}
.clear-btn {
padding: 8px 15px;
border: 1px solid #ddd;
border-radius: 6px;
background: white;
cursor: pointer;
}
.error-message {
background: #ffebee;
color: #c62828;
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
text-align: center;
}
.hint {
font-size: 13px;
color: #999;
text-align: center;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎤 语音对话TTS版</h1>
<div class="status-indicator">
<span class="status-dot" id="statusDot"></span>
<span id="statusText">检测连接...</span>
</div>
</div>
<!-- TTS 设置 -->
<div class="tts-section">
<h3>🔊 TTS语音合成设置</h3>
<div class="browser-tip" id="browserTip" style="display: none; background: #fff3cd; padding: 8px 12px; border-radius: 6px; margin-bottom: 10px; font-size: 13px; color: #856404;">
⚠️ 当前浏览器可能限制音频播放,建议使用 Chrome/Firefox或点击页面任意位置解锁播放
</div>
<div class="tts-options" id="ttsOptions">
<div class="tts-option" data-provider="none">
<div class="name">❌ 无 TTS</div>
<div class="status">只显示文字</div>
</div>
<div class="tts-option" data-provider="edge">
<div class="name">🌐 Edge TTS</div>
<div class="status" id="edgeStatus">检测中...</div>
</div>
<div class="tts-option" data-provider="chattts">
<div class="name">🤖 ChatTTS</div>
<div class="status" id="chatttsStatus">检测中...</div>
</div>
</div>
<div class="voice-select" id="voiceSelect" style="display: none;">
<label>音色选择:</label>
<select id="voiceDropdown">
<option value="zh-CN-XiaoxiaoNeural">晓晓(女)</option>
<option value="zh-CN-YunxiNeural">云希(男)</option>
<option value="zh-CN-YunyangNeural">云扬(男)</option>
<option value="zh-CN-XiaochenNeural">晓晨(女)</option>
<option value="zh-CN-XiaohanNeural">晓涵(女)</option>
<option value="zh-CN-XiaoyouNeural">晓悠(女)</option>
</select>
</div>
<div class="tts-controls" id="ttsControls" style="display: none;">
<div class="auto-play-switch">
<label class="switch">
<input type="checkbox" id="autoPlaySwitch" checked>
<span class="slider"></span>
</label>
<span>自动播放</span>
</div>
<div class="volume-control">
<span>🔊</span>
<input type="range" id="volumeSlider" min="0.5" max="2" step="0.1" value="1.5">
<span class="volume-value" id="volumeValue">150%</span>
</div>
</div>
</div>
<!-- 录音 -->
<div class="record-section">
<button class="record-btn" id="recordBtn">
<span class="icon">🎤</span>
<span class="text">录音</span>
</button>
<div class="record-status" id="recordStatus">点击按钮开始录音</div>
<div class="waveform" id="waveform" style="display: none;">
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
</div>
</div>
<!-- 文字输入 -->
<div class="text-section">
<div class="text-input-wrapper">
<input type="text" id="textInput" placeholder="输入文字消息..." class="text-input">
<button id="sendTextBtn" class="send-text-btn">发送</button>
</div>
</div>
<!-- 消息区域 -->
<div class="chat-section" id="chatSection">
<div class="hint">开始对话吧!</div>
</div>
<div class="actions">
<button class="clear-btn" id="clearBtn">清除对话</button>
</div>
</div>
<script>
const API_URL = '/api';
// 状态
let isRecording = false;
let audioContext = null;
let audioStream = null;
let scriptProcessor = null;
let recordedBuffers = [];
let conversationId = null;
let currentTTSProvider = 'none';
let currentVoice = 'zh-CN-XiaoxiaoNeural';
let autoPlay = true; // 自动播放开关
let volumeLevel = 1.5; // 音量倍率
let userInteracted = false; // 用户是否已交互
// 用户点击页面解锁音频播放能力
document.addEventListener('click', () => {
userInteracted = true;
}, { once: true });
document.addEventListener('touchstart', () => {
userInteracted = true;
}, { once: true });
// 元素
const statusDot = document.getElementById('statusDot');
const statusText = document.getElementById('statusText');
const recordBtn = document.getElementById('recordBtn');
const recordStatus = document.getElementById('recordStatus');
const waveform = document.getElementById('waveform');
const chatSection = document.getElementById('chatSection');
const clearBtn = document.getElementById('clearBtn');
const textInput = document.getElementById('textInput');
const sendTextBtn = document.getElementById('sendTextBtn');
const ttsOptions = document.getElementById('ttsOptions');
const voiceSelect = document.getElementById('voiceSelect');
const voiceDropdown = document.getElementById('voiceDropdown');
const ttsControls = document.getElementById('ttsControls');
const autoPlaySwitch = document.getElementById('autoPlaySwitch');
const volumeSlider = document.getElementById('volumeSlider');
const volumeValue = document.getElementById('volumeValue');
// 初始化
async function init() {
await checkStatus();
await loadTTSProviders();
// 检测特殊浏览器(小米浏览器等)
checkBrowser();
// 定时检查状态
setInterval(checkStatus, 10000);
}
// 检测浏览器兼容性
function checkBrowser() {
const ua = navigator.userAgent.toLowerCase();
const browserTip = document.getElementById('browserTip');
// 小米浏览器、华为浏览器等国产浏览器UA特征
if (ua.includes('miui') || ua.includes('xiaomi') || ua.includes('huawei') ||
ua.includes('micromessenger') || ua.includes('quark') || ua.includes('ucbrowser')) {
browserTip.style.display = 'block';
}
}
// 检查服务状态
async function checkStatus() {
try {
const resp = await fetch(`${API_URL}/status`);
const data = await resp.json();
if (data.model_online) {
statusDot.className = 'status-dot online';
statusText.textContent = '模型服务已连接';
} else {
statusDot.className = 'status-dot offline';
statusText.textContent = '模型服务离线';
}
} catch (e) {
statusDot.className = 'status-dot offline';
statusText.textContent = '服务连接失败';
}
}
// 加载 TTS 方案
async function loadTTSProviders() {
try {
const resp = await fetch(`${API_URL}/tts/providers`);
const data = await resp.json();
// 更新状态
data.providers.forEach(p => {
const statusElId = p.name === 'edge' ? 'edgeStatus' :
p.name === 'chattts' ? 'chatttsStatus' : null;
if (statusElId) {
const statusEl = document.getElementById(statusElId);
statusEl.textContent = p.available ? '可用 ✓' : '不可用';
const optionEl = ttsOptions.querySelector(`[data-provider="${p.name}"]`);
if (optionEl) {
if (p.available) {
optionEl.classList.remove('disabled');
} else {
optionEl.classList.add('disabled');
}
}
}
});
// 设置当前方案
currentTTSProvider = data.current || 'none';
selectProvider(currentTTSProvider);
} catch (e) {
console.error('加载 TTS 方案失败:', e);
}
}
// 选择 TTS 方案
function selectProvider(provider) {
ttsOptions.querySelectorAll('.tts-option').forEach(el => {
el.classList.remove('selected');
});
const option = ttsOptions.querySelector(`[data-provider="${provider}"]`);
if (option && !option.classList.contains('disabled')) {
option.classList.add('selected');
currentTTSProvider = provider;
// 显示/隐藏音色选择
voiceSelect.style.display = provider === 'edge' ? 'block' : 'none';
// 显示/隐藏控制选项有TTS才显示
ttsControls.style.display = provider !== 'none' ? 'flex' : 'none';
// 保存设置
saveTTSSettings();
}
}
// 保存 TTS 设置
async function saveTTSSettings() {
try {
await fetch(`${API_URL}/tts/settings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
provider: currentTTSProvider,
voice: currentVoice
})
});
} catch (e) {
console.error('保存设置失败:', e);
}
}
// WAV 创建
function createWavFile(audioBuffer, sampleRate = 16000) {
const numChannels = 1;
const bitsPerSample = 16;
const bytesPerSample = bitsPerSample / 8;
const blockAlign = numChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataSize = audioBuffer.length * bytesPerSample;
const headerSize = 44;
const totalSize = headerSize + dataSize;
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
writeString(view, 0, 'RIFF');
view.setUint32(4, totalSize - 8, true);
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true);
floatTo16BitPCM(view, 44, audioBuffer);
return new Blob([buffer], { type: 'audio/wav' });
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function floatTo16BitPCM(view, offset, input) {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
// 初始化录音
async function initAudio() {
try {
audioStream = await navigator.mediaDevices.getUserMedia({
audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 16000 }
});
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
const source = audioContext.createMediaStreamSource(audioStream);
scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1);
scriptProcessor.onaudioprocess = (e) => {
if (isRecording) {
recordedBuffers.push(e.inputBuffer.getChannelData(0).slice());
}
};
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
return true;
} catch (e) {
showError('无法访问麦克风');
return false;
}
}
// 开始录音
async function startRecording() {
if (!audioContext) {
const success = await initAudio();
if (!success) return;
}
recordedBuffers = [];
isRecording = true;
recordBtn.classList.add('recording');
recordBtn.querySelector('.icon').textContent = '⏹️';
recordBtn.querySelector('.text').textContent = '停止';
recordStatus.textContent = '正在录音...';
waveform.style.display = 'flex';
}
// 停止录音
function stopRecording() {
if (isRecording) {
isRecording = false;
const totalLength = recordedBuffers.reduce((acc, buf) => acc + buf.length, 0);
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buf of recordedBuffers) {
mergedBuffer.set(buf, offset);
offset += buf.length;
}
const wavBlob = createWavFile(mergedBuffer, 16000);
const duration = Math.round(totalLength / 16000);
recordBtn.classList.remove('recording');
recordBtn.querySelector('.icon').textContent = '🎤';
recordBtn.querySelector('.text').textContent = '录音';
recordStatus.textContent = '处理中...';
waveform.style.display = 'none';
sendAudio(wavBlob, duration);
}
}
// 发送音频
async function sendAudio(audioBlob, duration) {
showLoading();
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.wav');
if (conversationId) formData.append('conversation_id', conversationId);
try {
const resp = await fetch(`${API_URL}/voice/chat`, {
method: 'POST',
body: formData
});
const data = await resp.json();
conversationId = data.conversation_id;
addMessage('user', audioBlob, duration);
// TTS 合成
let ttsAudioUrl = null;
if (currentTTSProvider !== 'none') {
ttsAudioUrl = await synthesizeTTS(data.reply);
}
addMessage('assistant', data.reply, ttsAudioUrl);
recordStatus.textContent = '点击按钮开始录音';
} catch (e) {
showError('发送失败: ' + e.message);
}
}
// 发送文字
async function sendText(text) {
if (!text.trim()) return;
showLoading();
const formData = new FormData();
formData.append('text', text);
if (conversationId) formData.append('conversation_id', conversationId);
try {
const resp = await fetch(`${API_URL}/voice/text`, {
method: 'POST',
body: formData
});
const data = await resp.json();
conversationId = data.conversation_id;
addMessage('user', text);
// TTS 合成
let ttsAudioUrl = null;
if (currentTTSProvider !== 'none') {
ttsAudioUrl = await synthesizeTTS(data.reply);
}
addMessage('assistant', data.reply, ttsAudioUrl);
textInput.value = '';
} catch (e) {
showError('发送失败: ' + e.message);
}
}
// TTS 合成
async function synthesizeTTS(text) {
try {
const formData = new FormData();
formData.append('text', text);
const resp = await fetch(`${API_URL}/tts/synthesize`, {
method: 'POST',
body: formData
});
const data = await resp.json();
return data.audio_url;
} catch (e) {
console.error('TTS 合成失败:', e);
return null;
}
}
// 添加消息
function addMessage(role, content, audioData = null) {
const hint = chatSection.querySelector('.hint');
if (hint) hint.remove();
const loading = chatSection.querySelector('.loading');
if (loading) loading.remove();
const msg = document.createElement('div');
msg.className = `message ${role}`;
let audioBtnId = ''; // 在函数顶部声明
if (role === 'user' && content instanceof Blob) {
const audioUrl = URL.createObjectURL(content);
msg.innerHTML = `
<div class="role">我</div>
<div class="content audio-content">
<button class="play-btn" onclick="playAudio('${audioUrl}', this)">
<span class="play-icon">▶️</span>
<span class="duration">${audioData}s</span>
</button>
</div>
`;
} else if (role === 'assistant') {
let audioHtml = '';
if (audioData) {
audioBtnId = `audioBtn_${Date.now()}`;
audioHtml = `<button class="play-btn tts-play-btn" id="${audioBtnId}" onclick="playAudio('${audioData}', this)">
<span class="play-icon">🔊</span>
<span>播放回复</span>
</button>`;
}
msg.innerHTML = `
<div class="role">AI</div>
<div class="content">${content}${audioHtml}</div>
`;
} else {
msg.innerHTML = `<div class="role">我</div><div class="content">${content}</div>`;
}
chatSection.appendChild(msg);
chatSection.scrollTop = chatSection.scrollHeight;
// 自动播放在元素添加到DOM后
if (role === 'assistant' && audioData && autoPlay && audioBtnId && userInteracted) {
setTimeout(() => {
const btn = document.getElementById(audioBtnId);
if (btn) {
playAudio(audioData, btn);
}
}, 100);
}
}
// 播放音频
function playAudio(url, btn) {
const audio = new Audio(url);
const icon = btn.querySelector('.play-icon');
audio.onplay = () => {
icon.textContent = '🔊';
btn.classList.add('playing');
};
audio.onended = () => {
icon.textContent = url.startsWith('/audio') || url.startsWith('http') || url.startsWith('blob:') ? '🔊' : '▶️';
btn.classList.remove('playing');
};
audio.play();
}
// 显示加载
function showLoading() {
const hint = chatSection.querySelector('.hint');
if (hint) hint.remove();
const loading = document.createElement('div');
loading.className = 'loading';
loading.innerHTML = '<div class="loading-dot"></div><div class="loading-dot"></div><div class="loading-dot"></div>';
chatSection.appendChild(loading);
chatSection.scrollTop = chatSection.scrollHeight;
}
// 显示错误
function showError(message) {
const loading = chatSection.querySelector('.loading');
if (loading) loading.remove();
const error = document.createElement('div');
error.className = 'error-message';
error.textContent = message;
chatSection.appendChild(error);
setTimeout(() => error.remove(), 3000);
}
// 清除对话
async function clearChat() {
if (conversationId) {
await fetch(`${API_URL}/conversation/${conversationId}`, { method: 'DELETE' });
}
conversationId = null;
chatSection.innerHTML = '<div class="hint">开始对话吧!</div>';
}
// 事件绑定
ttsOptions.addEventListener('click', (e) => {
const option = e.target.closest('.tts-option');
if (option) {
selectProvider(option.dataset.provider);
}
});
voiceDropdown.addEventListener('change', () => {
currentVoice = voiceDropdown.value;
saveTTSSettings();
});
// 自动播放开关
autoPlaySwitch.addEventListener('change', () => {
autoPlay = autoPlaySwitch.checked;
});
// 音量控制
volumeSlider.addEventListener('input', () => {
volumeLevel = parseFloat(volumeSlider.value);
volumeValue.textContent = `${Math.round(volumeLevel * 100)}%`;
});
recordBtn.addEventListener('click', () => {
isRecording ? stopRecording() : startRecording();
});
sendTextBtn.addEventListener('click', () => sendText(textInput.value));
textInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter') sendText(textInput.value);
});
clearBtn.addEventListener('click', clearChat);
// 启动
init();
</script>
</body>
</html>