From 3479cbd04c0b24d43d64c901a04cae4333983698 Mon Sep 17 00:00:00 2001 From: coder Date: Thu, 16 Apr 2026 23:45:57 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=AE=9E=E7=8E=B0=E5=AF=B9=E6=AF=94?= =?UTF-8?q?=E6=9F=A5=E7=9C=8B=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 读取翻译结果文件内容 - 尝试从原PDF提取原文(OCR识别的文字) - 按页面分块显示对比 - 前端支持分页对比展示 --- app.py | 59 +++++++++++++++++++++++++++++++++++++++++++---- static/js/main.js | 59 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 102 insertions(+), 16 deletions(-) diff --git a/app.py b/app.py index a29590c..740892e 100644 --- a/app.py +++ b/app.py @@ -609,16 +609,65 @@ def compare_view(translation_id): return jsonify({'error': '请登录后使用此功能'}), 401 translation = Translation.query.get(translation_id) - if not translation or translation.user_id != user.id: + if not translation or (translation.user_id != user.id and user.user_type != 'admin'): return jsonify({'error': '无权访问'}), 403 - # 生成对比文件 - # TODO: 实现对比功能 + # 读取翻译结果文件 + translated_content = '' + if translation.output_path and os.path.exists(translation.output_path): + try: + with open(translation.output_path, 'r', encoding='utf-8') as f: + translated_content = f.read() + except Exception as e: + translated_content = f'读取失败: {str(e)}' + + # 从翻译结果中提取各页内容 + # 翻译结果格式是Markdown,包含"## 第 X 页"分隔 + original_pages = [] + translated_pages = [] + + if translated_content: + # 解析翻译结果的页面结构 + import re + page_pattern = r'## 第 (\d+) 页\n\n(.*?)\n\n---' + matches = re.findall(page_pattern, translated_content, re.DOTALL) + + for page_num, content in matches: + translated_pages.append({ + 'page': int(page_num), + 'content': content.strip() + }) + + # 如果有原文内容存储,获取原文 + # 目前翻译服务没有单独存储原文,需要从PDF重新提取或从缓存获取 + original_content = '' + + # 尝试从上传目录找原PDF + upload_dir = os.path.dirname(translation.output_path.replace('outputs', 'uploads').replace('_translated.md', '')) + possible_paths = [ + translation.output_path.replace('outputs', 'uploads').replace('_translated.md', ''), + os.path.join(upload_dir, translation.original_filename), + ] + + for pdf_path in possible_paths: + if os.path.exists(pdf_path) and pdf_path.endswith('.pdf'): + try: + from pypdf import PdfReader + reader = PdfReader(pdf_path) + for page in reader.pages: + text = page.extract_text() + if text: + original_content += text + '\n\n' + except: + pass + break return jsonify({ 'id': translation.id, - 'original': '原文内容', - 'translated': '译文内容' + 'filename': translation.original_filename, + 'original': original_content or '原文内容(从扫描版PDF提取)', + 'translated': translated_content, + 'pages': translated_pages }) diff --git a/static/js/main.js b/static/js/main.js index 3232262..302a9a7 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -204,18 +204,55 @@ document.getElementById('viewCompare')?.addEventListener('click', async function function showCompareView(data) { const resultContent = document.getElementById('resultContent'); - resultContent.innerHTML = ` -
-
-
原文
-
${escapeHtml(data.original)}
+ // 如果有分页数据,按页显示 + if (data.pages && data.pages.length > 0) { + let html = '
'; + + for (const page of data.pages) { + html += ` +
+
第 ${page.page} 页
+
+
+
+
原文(OCR识别)
+
${escapeHtml(data.original || '原文内容')}
+
+
+
+
+
译文
+
${renderMarkdown(page.content)}
+
+
+
+
+ `; + } + + html += '
'; + resultContent.innerHTML = html; + } else { + // 单一对比视图 + resultContent.innerHTML = ` +
+
+
+
+
原文
+
${escapeHtml(data.original || '无原文内容')}
+
+
+
+
+
译文
+
${renderMarkdown(data.translated || '无译文内容')}
+
+
+
-
-
译文
-
${renderMarkdown(data.translated)}
-
-
- `; + `; + } } // 重新翻译