fix: 实现对比查看功能

- 读取翻译结果文件内容
- 尝试从原PDF提取原文(OCR识别的文字)
- 按页面分块显示对比
- 前端支持分页对比展示
This commit is contained in:
2026-04-16 23:45:57 +08:00
parent e5c9ea322a
commit 3479cbd04c
2 changed files with 102 additions and 16 deletions

59
app.py
View File

@@ -609,16 +609,65 @@ def compare_view(translation_id):
return jsonify({'error': '请登录后使用此功能'}), 401 return jsonify({'error': '请登录后使用此功能'}), 401
translation = Translation.query.get(translation_id) translation = Translation.query.get(translation_id)
if not translation or translation.user_id != user.id: if not translation or (translation.user_id != user.id and user.user_type != 'admin'):
return jsonify({'error': '无权访问'}), 403 return jsonify({'error': '无权访问'}), 403
# 生成对比文件 # 读取翻译结果文件
# TODO: 实现对比功能 translated_content = ''
if translation.output_path and os.path.exists(translation.output_path):
try:
with open(translation.output_path, 'r', encoding='utf-8') as f:
translated_content = f.read()
except Exception as e:
translated_content = f'读取失败: {str(e)}'
# 从翻译结果中提取各页内容
# 翻译结果格式是Markdown包含"## 第 X 页"分隔
original_pages = []
translated_pages = []
if translated_content:
# 解析翻译结果的页面结构
import re
page_pattern = r'## 第 (\d+) 页\n\n(.*?)\n\n---'
matches = re.findall(page_pattern, translated_content, re.DOTALL)
for page_num, content in matches:
translated_pages.append({
'page': int(page_num),
'content': content.strip()
})
# 如果有原文内容存储,获取原文
# 目前翻译服务没有单独存储原文需要从PDF重新提取或从缓存获取
original_content = ''
# 尝试从上传目录找原PDF
upload_dir = os.path.dirname(translation.output_path.replace('outputs', 'uploads').replace('_translated.md', ''))
possible_paths = [
translation.output_path.replace('outputs', 'uploads').replace('_translated.md', ''),
os.path.join(upload_dir, translation.original_filename),
]
for pdf_path in possible_paths:
if os.path.exists(pdf_path) and pdf_path.endswith('.pdf'):
try:
from pypdf import PdfReader
reader = PdfReader(pdf_path)
for page in reader.pages:
text = page.extract_text()
if text:
original_content += text + '\n\n'
except:
pass
break
return jsonify({ return jsonify({
'id': translation.id, 'id': translation.id,
'original': '原文内容', 'filename': translation.original_filename,
'translated': '文内容' 'original': original_content or '文内容从扫描版PDF提取',
'translated': translated_content,
'pages': translated_pages
}) })

View File

@@ -204,20 +204,57 @@ document.getElementById('viewCompare')?.addEventListener('click', async function
function showCompareView(data) { function showCompareView(data) {
const resultContent = document.getElementById('resultContent'); const resultContent = document.getElementById('resultContent');
resultContent.innerHTML = ` // 如果有分页数据,按页显示
<div class="compare-container"> if (data.pages && data.pages.length > 0) {
<div class="compare-panel original"> let html = '<div class="compare-container">';
<h5>原文</h5>
<div class="content">${escapeHtml(data.original)}</div> for (const page of data.pages) {
html += `
<div class="compare-section mb-4">
<h5 class="text-center mb-3">第 ${page.page} 页</h5>
<div class="row">
<div class="col-md-6">
<div class="card">
<div class="card-header">原文OCR识别</div>
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '原文内容')}</pre></div>
</div>
</div>
<div class="col-md-6">
<div class="card">
<div class="card-header">译文</div>
<div class="card-body">${renderMarkdown(page.content)}</div>
</div>
</div> </div>
<div class="compare-panel translated">
<h5>译文</h5>
<div class="content">${renderMarkdown(data.translated)}</div>
</div> </div>
</div> </div>
`; `;
} }
html += '</div>';
resultContent.innerHTML = html;
} else {
// 单一对比视图
resultContent.innerHTML = `
<div class="compare-container">
<div class="row">
<div class="col-md-6">
<div class="card">
<div class="card-header">原文</div>
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '无原文内容')}</pre></div>
</div>
</div>
<div class="col-md-6">
<div class="card">
<div class="card-header">译文</div>
<div class="card-body">${renderMarkdown(data.translated || '无译文内容')}</div>
</div>
</div>
</div>
</div>
`;
}
}
// 重新翻译 // 重新翻译
document.getElementById('retranslateBtn')?.addEventListener('click', async function() { document.getElementById('retranslateBtn')?.addEventListener('click', async function() {
if (!currentTranslationId) return; if (!currentTranslationId) return;