Files
voice-chat-web/static/tts.html

873 lines
28 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>语音对话TTS版</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
justify-content: center;
align-items: center;
padding: 20px;
}
.container {
max-width: 700px;
width: 100%;
background: white;
border-radius: 20px;
padding: 30px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
}
.header {
text-align: center;
margin-bottom: 20px;
}
.header h1 {
color: #333;
font-size: 28px;
margin-bottom: 10px;
}
.status-indicator {
display: inline-flex;
align-items: center;
gap: 8px;
font-size: 14px;
color: #666;
}
.status-dot {
width: 10px;
height: 10px;
border-radius: 50%;
background: #ccc;
}
.status-dot.online {
background: #4CAF50;
animation: pulse 2s infinite;
}
.status-dot.offline {
background: #f44336;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
/* TTS 设置区域 */
.tts-section {
background: #f5f7fa;
border-radius: 10px;
padding: 15px;
margin-bottom: 20px;
}
.tts-section h3 {
color: #333;
font-size: 16px;
margin-bottom: 12px;
}
.tts-options {
display: flex;
gap: 10px;
margin-bottom: 10px;
}
.tts-option {
flex: 1;
padding: 10px;
border: 2px solid #eee;
border-radius: 8px;
cursor: pointer;
text-align: center;
transition: all 0.2s;
}
.tts-option:hover {
border-color: #667eea;
}
.tts-option.selected {
border-color: #667eea;
background: rgba(102,126,234,0.1);
}
.tts-option.disabled {
opacity: 0.5;
cursor: not-allowed;
}
.tts-option .name {
font-weight: bold;
color: #333;
}
.tts-option .status {
font-size: 12px;
color: #999;
}
.voice-select {
margin-top: 10px;
}
.voice-select select {
width: 100%;
padding: 8px;
border: 1px solid #ddd;
border-radius: 5px;
}
/* 录音区域 */
.record-section {
text-align: center;
margin-bottom: 20px;
}
.record-btn {
width: 100px;
height: 100px;
border-radius: 50%;
border: none;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
font-size: 24px;
cursor: pointer;
transition: all 0.3s ease;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
gap: 5px;
margin: 0 auto;
}
.record-btn:hover {
transform: scale(1.05);
box-shadow: 0 10px 30px rgba(102,126,234,0.4);
}
.record-btn.recording {
background: linear-gradient(135deg, #f44336 0%, #e53935 100%);
animation: recording-pulse 1s infinite;
}
@keyframes recording-pulse {
0%, 100% { transform: scale(1); }
50% { transform: scale(1.1); }
}
.record-status {
margin-top: 10px;
font-size: 14px;
color: #666;
}
.waveform {
display: flex;
justify-content: center;
gap: 3px;
margin-top: 10px;
height: 30px;
}
.wave-bar {
width: 4px;
height: 10px;
background: #667eea;
border-radius: 2px;
animation: wave 0.5s infinite ease-in-out;
}
@keyframes wave {
0%, 100% { height: 10px; }
50% { height: 25px; }
}
/* 文字输入 */
.text-section {
margin: 15px 0;
}
.text-input-wrapper {
display: flex;
gap: 10px;
}
.text-input {
flex: 1;
padding: 10px 12px;
border: 2px solid #eee;
border-radius: 8px;
font-size: 14px;
outline: none;
}
.text-input:focus {
border-color: #667eea;
}
.send-text-btn {
padding: 10px 15px;
border: none;
border-radius: 8px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
cursor: pointer;
}
/* 消息区域 */
.chat-section {
max-height: 350px;
overflow-y: auto;
border: 1px solid #eee;
border-radius: 10px;
padding: 15px;
background: #f9f9f9;
}
.message {
margin-bottom: 12px;
padding: 10px 12px;
border-radius: 10px;
max-width: 85%;
}
.message.user {
background: #667eea;
color: white;
margin-left: auto;
}
.message.assistant {
background: white;
color: #333;
border: 1px solid #eee;
}
.message .role {
font-size: 12px;
font-weight: bold;
margin-bottom: 5px;
opacity: 0.8;
}
.message .content {
font-size: 14px;
line-height: 1.5;
}
.audio-content {
display: flex;
align-items: center;
}
.play-btn {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 6px 12px;
border-radius: 15px;
border: none;
background: rgba(255,255,255,0.2);
cursor: pointer;
font-size: 13px;
}
.play-btn:hover {
background: rgba(255,255,255,0.3);
}
.tts-play-btn {
background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
color: white;
margin-left: 10px;
}
.loading {
display: flex;
justify-content: center;
gap: 8px;
padding: 15px;
}
.loading-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #667eea;
animation: loading-bounce 0.6s infinite;
}
@keyframes loading-bounce {
0%, 80%, 100% { transform: scale(0); }
40% { transform: scale(1); }
}
.actions {
margin-top: 15px;
text-align: center;
}
.clear-btn {
padding: 8px 15px;
border: 1px solid #ddd;
border-radius: 6px;
background: white;
cursor: pointer;
}
.error-message {
background: #ffebee;
color: #c62828;
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
text-align: center;
}
.hint {
font-size: 13px;
color: #999;
text-align: center;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎤 语音对话TTS版</h1>
<div class="status-indicator">
<span class="status-dot" id="statusDot"></span>
<span id="statusText">检测连接...</span>
</div>
</div>
<!-- TTS 设置 -->
<div class="tts-section">
<h3>🔊 TTS语音合成设置</h3>
<div class="tts-options" id="ttsOptions">
<div class="tts-option" data-provider="none">
<div class="name">❌ 无 TTS</div>
<div class="status">只显示文字</div>
</div>
<div class="tts-option" data-provider="edge">
<div class="name">🌐 Edge TTS</div>
<div class="status" id="edgeStatus">检测中...</div>
</div>
<div class="tts-option disabled" data-provider="chattts">
<div class="name">🤖 ChatTTS</div>
<div class="status">暂未部署</div>
</div>
</div>
<div class="voice-select" id="voiceSelect" style="display: none;">
<label>音色选择:</label>
<select id="voiceDropdown">
<option value="zh-CN-XiaoxiaoNeural">晓晓(女)</option>
<option value="zh-CN-YunxiNeural">云希(男)</option>
<option value="zh-CN-YunyangNeural">云扬(男)</option>
<option value="zh-CN-XiaochenNeural">晓晨(女)</option>
<option value="zh-CN-XiaohanNeural">晓涵(女)</option>
<option value="zh-CN-XiaoyouNeural">晓悠(女)</option>
</select>
</div>
</div>
<!-- 录音 -->
<div class="record-section">
<button class="record-btn" id="recordBtn">
<span class="icon">🎤</span>
<span class="text">录音</span>
</button>
<div class="record-status" id="recordStatus">点击按钮开始录音</div>
<div class="waveform" id="waveform" style="display: none;">
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
<div class="wave-bar"></div>
</div>
</div>
<!-- 文字输入 -->
<div class="text-section">
<div class="text-input-wrapper">
<input type="text" id="textInput" placeholder="输入文字消息..." class="text-input">
<button id="sendTextBtn" class="send-text-btn">发送</button>
</div>
</div>
<!-- 消息区域 -->
<div class="chat-section" id="chatSection">
<div class="hint">开始对话吧!</div>
</div>
<div class="actions">
<button class="clear-btn" id="clearBtn">清除对话</button>
</div>
</div>
<script>
const API_URL = '/api';
// 状态
let isRecording = false;
let audioContext = null;
let audioStream = null;
let scriptProcessor = null;
let recordedBuffers = [];
let conversationId = null;
let currentTTSProvider = 'none';
let currentVoice = 'zh-CN-XiaoxiaoNeural';
// 元素
const statusDot = document.getElementById('statusDot');
const statusText = document.getElementById('statusText');
const recordBtn = document.getElementById('recordBtn');
const recordStatus = document.getElementById('recordStatus');
const waveform = document.getElementById('waveform');
const chatSection = document.getElementById('chatSection');
const clearBtn = document.getElementById('clearBtn');
const textInput = document.getElementById('textInput');
const sendTextBtn = document.getElementById('sendTextBtn');
const ttsOptions = document.getElementById('ttsOptions');
const voiceSelect = document.getElementById('voiceSelect');
const voiceDropdown = document.getElementById('voiceDropdown');
// 初始化
async function init() {
await checkStatus();
await loadTTSProviders();
// 定时检查状态
setInterval(checkStatus, 10000);
}
// 检查服务状态
async function checkStatus() {
try {
const resp = await fetch(`${API_URL}/status`);
const data = await resp.json();
if (data.model_online) {
statusDot.className = 'status-dot online';
statusText.textContent = '模型服务已连接';
} else {
statusDot.className = 'status-dot offline';
statusText.textContent = '模型服务离线';
}
} catch (e) {
statusDot.className = 'status-dot offline';
statusText.textContent = '服务连接失败';
}
}
// 加载 TTS 方案
async function loadTTSProviders() {
try {
const resp = await fetch(`${API_URL}/tts/providers`);
const data = await resp.json();
// 更新状态
data.providers.forEach(p => {
if (p.name === 'edge') {
const statusEl = document.getElementById('edgeStatus');
statusEl.textContent = p.available ? '可用 ✓' : '不可用';
const optionEl = ttsOptions.querySelector('[data-provider="edge"]');
if (p.available) {
optionEl.classList.remove('disabled');
}
}
});
// 设置当前方案
currentTTSProvider = data.current || 'none';
selectProvider(currentTTSProvider);
} catch (e) {
console.error('加载 TTS 方案失败:', e);
}
}
// 选择 TTS 方案
function selectProvider(provider) {
ttsOptions.querySelectorAll('.tts-option').forEach(el => {
el.classList.remove('selected');
});
const option = ttsOptions.querySelector(`[data-provider="${provider}"]`);
if (option && !option.classList.contains('disabled')) {
option.classList.add('selected');
currentTTSProvider = provider;
// 显示/隐藏音色选择
voiceSelect.style.display = provider === 'edge' ? 'block' : 'none';
// 保存设置
saveTTSSettings();
}
}
// 保存 TTS 设置
async function saveTTSSettings() {
try {
await fetch(`${API_URL}/tts/settings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
provider: currentTTSProvider,
voice: currentVoice
})
});
} catch (e) {
console.error('保存设置失败:', e);
}
}
// WAV 创建
function createWavFile(audioBuffer, sampleRate = 16000) {
const numChannels = 1;
const bitsPerSample = 16;
const bytesPerSample = bitsPerSample / 8;
const blockAlign = numChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataSize = audioBuffer.length * bytesPerSample;
const headerSize = 44;
const totalSize = headerSize + dataSize;
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
writeString(view, 0, 'RIFF');
view.setUint32(4, totalSize - 8, true);
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true);
floatTo16BitPCM(view, 44, audioBuffer);
return new Blob([buffer], { type: 'audio/wav' });
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function floatTo16BitPCM(view, offset, input) {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
// 初始化录音
async function initAudio() {
try {
audioStream = await navigator.mediaDevices.getUserMedia({
audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 16000 }
});
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
const source = audioContext.createMediaStreamSource(audioStream);
scriptProcessor = audioContext.createScriptProcessor(4096, 1, 1);
scriptProcessor.onaudioprocess = (e) => {
if (isRecording) {
recordedBuffers.push(e.inputBuffer.getChannelData(0).slice());
}
};
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
return true;
} catch (e) {
showError('无法访问麦克风');
return false;
}
}
// 开始录音
async function startRecording() {
if (!audioContext) {
const success = await initAudio();
if (!success) return;
}
recordedBuffers = [];
isRecording = true;
recordBtn.classList.add('recording');
recordBtn.querySelector('.icon').textContent = '⏹️';
recordBtn.querySelector('.text').textContent = '停止';
recordStatus.textContent = '正在录音...';
waveform.style.display = 'flex';
}
// 停止录音
function stopRecording() {
if (isRecording) {
isRecording = false;
const totalLength = recordedBuffers.reduce((acc, buf) => acc + buf.length, 0);
const mergedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buf of recordedBuffers) {
mergedBuffer.set(buf, offset);
offset += buf.length;
}
const wavBlob = createWavFile(mergedBuffer, 16000);
const duration = Math.round(totalLength / 16000);
recordBtn.classList.remove('recording');
recordBtn.querySelector('.icon').textContent = '🎤';
recordBtn.querySelector('.text').textContent = '录音';
recordStatus.textContent = '处理中...';
waveform.style.display = 'none';
sendAudio(wavBlob, duration);
}
}
// 发送音频
async function sendAudio(audioBlob, duration) {
showLoading();
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.wav');
if (conversationId) formData.append('conversation_id', conversationId);
try {
const resp = await fetch(`${API_URL}/voice/chat`, {
method: 'POST',
body: formData
});
const data = await resp.json();
conversationId = data.conversation_id;
addMessage('user', audioBlob, duration);
// TTS 合成
let ttsAudioUrl = null;
if (currentTTSProvider !== 'none') {
ttsAudioUrl = await synthesizeTTS(data.reply);
}
addMessage('assistant', data.reply, ttsAudioUrl);
recordStatus.textContent = '点击按钮开始录音';
} catch (e) {
showError('发送失败: ' + e.message);
}
}
// 发送文字
async function sendText(text) {
if (!text.trim()) return;
showLoading();
const formData = new FormData();
formData.append('text', text);
if (conversationId) formData.append('conversation_id', conversationId);
try {
const resp = await fetch(`${API_URL}/voice/text`, {
method: 'POST',
body: formData
});
const data = await resp.json();
conversationId = data.conversation_id;
addMessage('user', text);
// TTS 合成
let ttsAudioUrl = null;
if (currentTTSProvider !== 'none') {
ttsAudioUrl = await synthesizeTTS(data.reply);
}
addMessage('assistant', data.reply, ttsAudioUrl);
textInput.value = '';
} catch (e) {
showError('发送失败: ' + e.message);
}
}
// TTS 合成
async function synthesizeTTS(text) {
try {
const formData = new FormData();
formData.append('text', text);
const resp = await fetch(`${API_URL}/tts/synthesize`, {
method: 'POST',
body: formData
});
const data = await resp.json();
return data.audio_url;
} catch (e) {
console.error('TTS 合成失败:', e);
return null;
}
}
// 添加消息
function addMessage(role, content, audioData = null) {
const hint = chatSection.querySelector('.hint');
if (hint) hint.remove();
const loading = chatSection.querySelector('.loading');
if (loading) loading.remove();
const msg = document.createElement('div');
msg.className = `message ${role}`;
if (role === 'user' && content instanceof Blob) {
const audioUrl = URL.createObjectURL(content);
msg.innerHTML = `
<div class="role">我</div>
<div class="content audio-content">
<button class="play-btn" onclick="playAudio('${audioUrl}', this)">
<span class="play-icon">▶️</span>
<span class="duration">${audioData}s</span>
</button>
</div>
`;
} else if (role === 'assistant') {
let audioHtml = '';
if (audioData) {
audioHtml = `<button class="play-btn tts-play-btn" onclick="playAudio('${audioData}', this)">
<span class="play-icon">🔊</span>
<span>播放回复</span>
</button>`;
}
msg.innerHTML = `
<div class="role">AI</div>
<div class="content">${content}${audioHtml}</div>
`;
} else {
msg.innerHTML = `<div class="role">我</div><div class="content">${content}</div>`;
}
chatSection.appendChild(msg);
chatSection.scrollTop = chatSection.scrollHeight;
}
// 播放音频
function playAudio(url, btn) {
const audio = new Audio(url);
const icon = btn.querySelector('.play-icon');
audio.onplay = () => {
icon.textContent = '🔊';
btn.classList.add('playing');
};
audio.onended = () => {
icon.textContent = url.startsWith('/audio') ? '🔊' : '▶️';
btn.classList.remove('playing');
};
audio.play();
}
// 显示加载
function showLoading() {
const hint = chatSection.querySelector('.hint');
if (hint) hint.remove();
const loading = document.createElement('div');
loading.className = 'loading';
loading.innerHTML = '<div class="loading-dot"></div><div class="loading-dot"></div><div class="loading-dot"></div>';
chatSection.appendChild(loading);
chatSection.scrollTop = chatSection.scrollHeight;
}
// 显示错误
function showError(message) {
const loading = chatSection.querySelector('.loading');
if (loading) loading.remove();
const error = document.createElement('div');
error.className = 'error-message';
error.textContent = message;
chatSection.appendChild(error);
setTimeout(() => error.remove(), 3000);
}
// 清除对话
async function clearChat() {
if (conversationId) {
await fetch(`${API_URL}/conversation/${conversationId}`, { method: 'DELETE' });
}
conversationId = null;
chatSection.innerHTML = '<div class="hint">开始对话吧!</div>';
}
// 事件绑定
ttsOptions.addEventListener('click', (e) => {
const option = e.target.closest('.tts-option');
if (option) {
selectProvider(option.dataset.provider);
}
});
voiceDropdown.addEventListener('change', () => {
currentVoice = voiceDropdown.value;
saveTTSSettings();
});
recordBtn.addEventListener('click', () => {
isRecording ? stopRecording() : startRecording();
});
sendTextBtn.addEventListener('click', () => sendText(textInput.value));
textInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter') sendText(textInput.value);
});
clearBtn.addEventListener('click', clearChat);
// 启动
init();
</script>
</body>
</html>