8 Commits

Author SHA1 Message Date
128341e98a fix: 修复滚动同步位置计算+添加调试日志 2026-04-17 10:28:21 +08:00
f9c7caae26 feat: PDF对比按页同步滚动+译文按页分隔显示 2026-04-17 10:12:51 +08:00
5157561e83 feat: PDF对比用PDF.js渲染+滚动同步 2026-04-17 10:05:47 +08:00
30527a5ce6 feat: PDF对比用iframe显示原PDF文件(支持扫描版PDF) 2026-04-17 09:53:41 +08:00
2e428b2500 feat: 添加修复upload_path脚本 2026-04-17 09:43:18 +08:00
9569898f33 fix: PDF对比功能无法显示原文 - uploads和outputs使用同一UUID,添加upload_path字段 2026-04-17 09:32:21 +08:00
3479cbd04c fix: 实现对比查看功能
- 读取翻译结果文件内容
- 尝试从原PDF提取原文(OCR识别的文字)
- 按页面分块显示对比
- 前端支持分页对比展示
2026-04-16 23:45:57 +08:00
e5c9ea322a fix: 上传成功后按钮状态改为翻译中
上传PDF成功后立即更新按钮文字为翻译中...而不是保持上传中...
2026-04-16 23:33:40 +08:00
8 changed files with 651 additions and 27 deletions

96
app.py
View File

@@ -374,16 +374,17 @@ def upload_pdf():
output_path = cache_path
else:
# 需要翻译
# 保存上传文件
upload_dir = os.path.join(UPLOAD_DIR, str(uuid.uuid4()))
# 保存上传文件 - 使用同一个UUID确保uploads和outputs目录关联
session_uuid = str(uuid.uuid4())
upload_dir = os.path.join(UPLOAD_DIR, session_uuid)
os.makedirs(upload_dir, exist_ok=True)
upload_path = os.path.join(upload_dir, filename)
with open(upload_path, 'wb') as f:
f.write(file_content)
# 创建输出路径
output_dir = os.path.join(OUTPUT_DIR, str(uuid.uuid4()))
# 创建输出路径 - 使用相同的UUID
output_dir = os.path.join(OUTPUT_DIR, session_uuid)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"{filename}_translated.md")
@@ -400,6 +401,7 @@ def upload_pdf():
translate_params=json.dumps({'instruction': instruction}) if instruction else None,
status='processing' if not from_cache else 'completed',
progress=0 if not from_cache else 100,
upload_path=upload_path if not from_cache else None, # 保存上传路径
output_path=output_path,
from_cache=from_cache
)
@@ -609,19 +611,95 @@ def compare_view(translation_id):
return jsonify({'error': '请登录后使用此功能'}), 401
translation = Translation.query.get(translation_id)
if not translation or translation.user_id != user.id:
if not translation or (translation.user_id != user.id and user.user_type != 'admin'):
return jsonify({'error': '无权访问'}), 403
# 生成对比文件
# TODO: 实现对比功能
# 读取翻译结果文件
translated_content = ''
if translation.output_path and os.path.exists(translation.output_path):
try:
with open(translation.output_path, 'r', encoding='utf-8') as f:
translated_content = f.read()
except Exception as e:
translated_content = f'读取失败: {str(e)}'
# 从翻译结果中提取各页内容
# 翻译结果格式是Markdown包含"## 第 X 页"分隔
original_pages = []
translated_pages = []
if translated_content:
# 解析翻译结果的页面结构
import re
page_pattern = r'## 第 (\d+) 页\n\n(.*?)\n\n---'
matches = re.findall(page_pattern, translated_content, re.DOTALL)
for page_num, content in matches:
translated_pages.append({
'page': int(page_num),
'content': content.strip()
})
# 如果有原文内容存储,获取原文
original_content = ''
# 优先从数据库存储的upload_path获取原PDF
possible_paths = []
if translation.upload_path:
possible_paths.append(translation.upload_path)
# 备用方案:尝试从路径推断(兼容旧数据)
upload_dir = os.path.dirname(translation.output_path.replace('outputs', 'uploads').replace('_translated.md', '')) if translation.output_path else ''
if upload_dir:
possible_paths.append(
translation.output_path.replace('outputs', 'uploads').replace('_translated.md', '') if translation.output_path else ''
)
possible_paths.append(os.path.join(upload_dir, translation.original_filename))
for pdf_path in possible_paths:
if pdf_path and os.path.exists(pdf_path) and pdf_path.endswith('.pdf'):
try:
from pypdf import PdfReader
reader = PdfReader(pdf_path)
for page in reader.pages:
text = page.extract_text()
if text:
original_content += text + '\n\n'
except:
pass
break
return jsonify({
'id': translation.id,
'original': '原文内容',
'translated': '译文内容'
'filename': translation.original_filename,
'original': original_content or '',
'original_pdf_url': f'/api/original-pdf/{translation.id}' if translation.upload_path else None,
'translated': translated_content,
'pages': translated_pages
})
@app.route('/api/original-pdf/<int:translation_id>')
def get_original_pdf(translation_id):
"""获取原始PDF文件"""
user = get_current_user()
if not user:
return jsonify({'error': '请登录后使用此功能'}), 401
translation = Translation.query.get(translation_id)
if not translation or (translation.user_id != user.id and user.user_type != 'admin'):
return jsonify({'error': '无权访问'}), 403
if not translation.upload_path or not os.path.exists(translation.upload_path):
return jsonify({'error': '原PDF文件不存在'}), 404
return send_file(translation.upload_path,
mimetype='application/pdf',
as_attachment=False,
download_name=translation.original_filename)
# ==================== 路由: 用户系统 ====================
@app.route('/login', methods=['GET', 'POST'])
def login():

187
logs/app.log Normal file
View File

@@ -0,0 +1,187 @@
* Serving Flask app 'app'
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:19000
* Running on http://192.168.2.17:19000
Press CTRL+C to quit
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
127.0.0.1 - - [16/Apr/2026 21:50:38] "GET /api/config HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:08] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:08] "GET /static/img/favicon.svg HTTP/1.1" 304 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:53: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
return User.query.get(user_id)
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET / HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/js/main.js HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /api/user/info HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:16] "GET /history HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:16] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET / HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /static/js/main.js HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /api/user/info HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:26] "POST /api/upload HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:26] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:481: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
translation = Translation.query.get(translation_id)
192.168.2.10 - - [16/Apr/2026 21:58:26] "GET /api/status/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:28] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:28] "GET /api/status/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:30] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:30] "GET /api/status/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:32] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:32] "GET /api/status/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:34] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:34] "GET /api/status/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/status/4 HTTP/1.1" 200 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:519: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
translation = Translation.query.get(translation_id)
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/result/4 HTTP/1.1" 200 -
127.0.0.1 - - [16/Apr/2026 22:00:01] "GET /api/health HTTP/1.1" 404 -
127.0.0.1 - - [16/Apr/2026 22:00:33] "POST /api/upload HTTP/1.1" 200 -
127.0.0.1 - - [16/Apr/2026 22:00:38] "GET /api/status/5 HTTP/1.1" 200 -
127.0.0.1 - - [16/Apr/2026 22:01:15] "GET /api/status/5 HTTP/1.1" 200 -
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
127.0.0.1 - - [16/Apr/2026 22:20:01] "GET /api/health HTTP/1.1" 404 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:53: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
return User.query.get(user_id)
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET / HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/js/main.js HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /api/user/info HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:31] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:31] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:33] "POST /admin/translation/5/toggle-share HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:33] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:33] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:34] "POST /admin/translation/4/toggle-share HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:34] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:34] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:42] "GET /admin/translation/5 HTTP/1.1" 200 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:519: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
translation = Translation.query.get(translation_id)
192.168.2.10 - - [16/Apr/2026 22:24:42] "GET /api/result/5 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:45] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:46] "GET /admin/translation/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:47] "GET /api/result/4 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:48] "GET /admin/translations HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET / HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/js/main.js HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /api/user/info HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET / HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/css/style.css HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/js/main.js HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /api/user/info HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/img/favicon.svg HTTP/1.1" 304 -
192.168.2.10 - - [16/Apr/2026 22:25:00] "POST /api/upload HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:00] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:481: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
translation = Translation.query.get(translation_id)
192.168.2.10 - - [16/Apr/2026 22:25:00] "GET /api/status/6 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:02] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:02] "GET /api/status/6 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:05] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:05] "GET /api/status/6 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:07] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:07] "GET /api/status/6 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:09] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:09] "GET /api/status/6 HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:11] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
192.168.2.10 - - [16/Apr/2026 22:25:11] "GET /api/status/6 HTTP/1.1" 200 -
127.0.0.1 - - [16/Apr/2026 22:40:01] "GET /api/health HTTP/1.1" 404 -
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
* Restarting with stat
* Debugger is active!
* Debugger PIN: 154-698-244
127.0.0.1 - - [16/Apr/2026 23:00:01] "GET /api/health HTTP/1.1" 404 -
127.0.0.1 - - [16/Apr/2026 23:03:32] "GET / HTTP/1.1" 200 -

View File

@@ -169,6 +169,7 @@ class Translation(db.Model):
error_message = db.Column(db.Text, nullable=True)
# 输出
upload_path = db.Column(db.String(255), nullable=True) # 原始PDF文件路径
output_path = db.Column(db.String(255), nullable=True) # 翻译结果文件路径
# 时间戳

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""
修复旧数据的 upload_path 字段
通过 file_hash 匹配 uploads 目录中的 PDF 文件
"""
import sqlite3
import os
import hashlib
DB_PATH = 'instance/pdf_translate.db'
UPLOADS_DIR = 'uploads'
def compute_file_hash(filepath):
"""计算文件MD5"""
hasher = hashlib.md5()
with open(filepath, 'rb') as f:
hasher.update(f.read())
return hasher.hexdigest()
def main():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 获取所有 upload_path 为空的翻译记录
cursor.execute('SELECT id, file_hash, original_filename, output_path FROM translations WHERE upload_path IS NULL')
records = cursor.fetchall()
print(f"找到 {len(records)} 条需要修复的记录")
if not records:
print("无需修复")
conn.close()
return
# 找所有上传的PDF文件
pdf_files = []
for root, dirs, files in os.walk(UPLOADS_DIR):
for f in files:
if f.endswith('.pdf'):
pdf_files.append(os.path.join(root, f))
print(f"找到 {len(pdf_files)} 个PDF文件")
# 按hash匹配
fixed_count = 0
for record in records:
id, file_hash, filename, output_path = record
# 找匹配hash的文件
for pdf_path in pdf_files:
try:
pdf_hash = compute_file_hash(pdf_path)
if pdf_hash == file_hash:
print(f"ID {id}: 找到匹配 {pdf_path}")
cursor.execute('UPDATE translations SET upload_path = ? WHERE id = ?', (pdf_path, id))
conn.commit()
fixed_count += 1
break
except Exception as e:
print(f"处理 {pdf_path} 失败: {e}")
conn.close()
print(f"修复完成,共修复 {fixed_count} 条记录")
if __name__ == '__main__':
main()

View File

@@ -75,12 +75,21 @@ body {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
height: 70vh;
}
.compare-panel {
padding: 15px;
border-radius: 5px;
background-color: #f8f9fa;
overflow-y: auto;
height: 100%;
}
.compare-panel h5 {
margin-bottom: 10px;
padding-bottom: 5px;
border-bottom: 1px solid #ddd;
}
.compare-panel.original {
@@ -91,6 +100,48 @@ body {
border-left: 4px solid var(--success-color);
}
.compare-panel iframe {
width: 100%;
height: calc(100% - 40px);
border: none;
}
#pdfContainer {
height: calc(100% - 40px);
overflow-y: auto;
}
#pdfPages {
padding: 10px;
}
.pdf-page-canvas {
border: 1px solid #ddd;
border-radius: 3px;
background: white;
}
.translated-page {
padding-bottom: 30px;
border-bottom: 2px dashed #ddd;
margin-bottom: 20px;
}
.translated-page:last-child {
border-bottom: none;
}
.page-header {
font-size: 0.9em;
padding: 5px 10px;
background: #f0f0f0;
border-radius: 3px;
}
.page-content {
padding: 10px;
}
/* 会员卡片 */
.pricing-card {
transition: transform 0.3s ease;

View File

@@ -70,6 +70,9 @@ document.getElementById('uploadForm').addEventListener('submit', async function(
currentTranslationId = result.translation_id;
currentTaskId = result.task_id;
// 更新按钮状态为翻译中
btnText.textContent = '翻译中...';
// 如果使用缓存,直接显示结果
if (result.from_cache) {
document.getElementById('cacheNotice').style.display = 'block';
@@ -201,20 +204,57 @@ document.getElementById('viewCompare')?.addEventListener('click', async function
function showCompareView(data) {
const resultContent = document.getElementById('resultContent');
resultContent.innerHTML = `
<div class="compare-container">
<div class="compare-panel original">
<h5>原文</h5>
<div class="content">${escapeHtml(data.original)}</div>
// 如果有分页数据,按页显示
if (data.pages && data.pages.length > 0) {
let html = '<div class="compare-container">';
for (const page of data.pages) {
html += `
<div class="compare-section mb-4">
<h5 class="text-center mb-3">第 ${page.page} 页</h5>
<div class="row">
<div class="col-md-6">
<div class="card">
<div class="card-header">原文OCR识别</div>
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '原文内容')}</pre></div>
</div>
</div>
<div class="col-md-6">
<div class="card">
<div class="card-header">译文</div>
<div class="card-body">${renderMarkdown(page.content)}</div>
</div>
</div>
<div class="compare-panel translated">
<h5>译文</h5>
<div class="content">${renderMarkdown(data.translated)}</div>
</div>
</div>
`;
}
html += '</div>';
resultContent.innerHTML = html;
} else {
// 单一对比视图
resultContent.innerHTML = `
<div class="compare-container">
<div class="row">
<div class="col-md-6">
<div class="card">
<div class="card-header">原文</div>
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '无原文内容')}</pre></div>
</div>
</div>
<div class="col-md-6">
<div class="card">
<div class="card-header">译文</div>
<div class="card-body">${renderMarkdown(data.translated || '无译文内容')}</div>
</div>
</div>
</div>
</div>
`;
}
}
// 重新翻译
document.getElementById('retranslateBtn')?.addEventListener('click', async function() {
if (!currentTranslationId) return;

View File

@@ -7,6 +7,7 @@
<link rel="icon" href="/static/img/favicon.svg" type="image/svg+xml">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="/static/css/style.css" rel="stylesheet">
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
</head>
<body>
<nav class="navbar navbar-expand-lg navbar-dark bg-primary">
@@ -76,6 +77,11 @@
}
// 切换对比视图
let syncScrollEnabled = true;
let pdfDoc = null;
let pagePositions = []; // PDF各页位置
let translatedPagePositions = []; // 译文各页位置
document.getElementById('toggleCompare').addEventListener('click', async function() {
showCompare = !showCompare;
@@ -84,27 +90,220 @@
const response = await fetch(`/api/compare/${translationId}`);
const result = await response.json();
// 原文面板如果有PDF URL用PDF.js渲染
let originalHtml = '';
if (result.original_pdf_url) {
originalHtml = '<div id="pdfPages"></div>';
} else if (result.original && result.original.length > 0) {
originalHtml = `<div class="original-text" style="white-space:pre-wrap;font-family:monospace;">${escapeHtml(result.original)}</div>`;
} else {
originalHtml = '<div class="text-muted">原文内容未找到可能PDF已被删除</div>';
}
// 译文按页渲染
let translatedHtml = renderTranslatedByPage(result.translated);
document.getElementById('resultContent').innerHTML = `
<div class="compare-container">
<div class="compare-panel original">
<h5>原文</h5>
<div>${escapeHtml(result.original)}</div>
<div class="compare-panel original" id="originalPanel">
<h5>原文 PDF <small class="text-muted">(共${result.pages.length}页)</small></h5>
<div id="pdfContainer">${originalHtml}</div>
</div>
<div class="compare-panel translated">
<h5>译文</h5>
<div>${renderMarkdown(result.translated)}</div>
<div class="compare-panel translated" id="translatedPanel">
<h5>译文 <small class="text-muted">(共${result.pages.length}页)</small></h5>
<div class="translated-content">${translatedHtml}</div>
</div>
</div>
<div class="text-center mt-2">
<small class="text-muted">💡 滚动任意一侧,另一侧自动同步到对应页</small>
</div>
`;
// 先计算译文位置(因为已经渲染好了)
setTimeout(calculateTranslatedPositions, 100);
// 如果有PDF用PDF.js渲染完成后计算位置并启用同步
if (result.original_pdf_url) {
renderPDF(result.original_pdf_url).then(() => {
setTimeout(() => {
calculatePdfPositions();
enableSyncScroll();
}, 200);
});
} else {
setTimeout(enableSyncScroll, 300);
}
} catch (error) {
alert('加载对比失败');
alert('加载对比失败: ' + error.message);
}
} else {
loadResult();
}
});
// 译文按页渲染
function renderTranslatedByPage(content) {
if (!content) return '<div class="text-muted">译文加载失败</div>';
// 解析分页 - 按 "---" 分隔
const parts = content.split(/\n\n---\n\n/);
let html = '';
parts.forEach((part, index) => {
// 提取页码
const pageNumMatch = part.match(/## 第 (\d+) 页\n\n/);
const pageNum = pageNumMatch ? pageNumMatch[1] : (index + 1);
// 移除页码标题
let contentOnly = part.replace(/## 第 \d+ 页\n\n/, '').replace(/^# .*\n\n/, '').replace(/^> .*\n\n/, '');
html += `
<div class="translated-page" data-page="${pageNum}">
<div class="page-header text-center text-muted mb-3">— 第 ${pageNum} 页 —</div>
<div class="page-content">${renderMarkdown(contentOnly)}</div>
</div>
`;
});
return html;
}
// PDF.js渲染PDF返回Promise
async function renderPDF(url) {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
const pdf = await pdfjsLib.getDocument(url).promise;
pdfDoc = pdf;
const container = document.getElementById('pdfPages');
container.innerHTML = '';
// 渲染所有页面
for (let i = 1; i <= pdf.numPages; i++) {
// 页码标识
const pageNumDiv = document.createElement('div');
pageNumDiv.className = 'page-header text-center text-muted mb-2';
pageNumDiv.textContent = `— 第 ${i} 页 —`;
pageNumDiv.dataset.page = i;
container.appendChild(pageNumDiv);
const page = await pdf.getPage(i);
const canvas = document.createElement('canvas');
canvas.className = 'pdf-page-canvas';
canvas.style.width = '100%';
canvas.style.display = 'block';
canvas.style.marginBottom = '20px';
canvas.dataset.page = i;
const context = canvas.getContext('2d');
const viewport = page.getViewport({ scale: 1.5 });
canvas.height = viewport.height;
canvas.width = viewport.width;
await page.render({
canvasContext: context,
viewport: viewport
}).promise;
container.appendChild(canvas);
}
return pdf;
}
// 计算译文各页位置(相对于滚动容器顶部)
function calculateTranslatedPositions() {
translatedPagePositions = [];
const translatedPanel = document.getElementById('translatedPanel');
const translatedPages = document.querySelectorAll('.translated-page');
translatedPages.forEach(el => {
const page = parseInt(el.dataset.page);
// 使用getBoundingClientRect计算相对位置
const rect = el.getBoundingClientRect();
const panelRect = translatedPanel.getBoundingClientRect();
// 相对位置 = 元素top - 容器top + 当前scrollTop
const relativeTop = rect.top - panelRect.top + translatedPanel.scrollTop;
translatedPagePositions.push({
page: page,
top: relativeTop
});
});
console.log('译文页位置:', translatedPagePositions);
}
// 计算PDF各页位置相对于滚动容器顶部
function calculatePdfPositions() {
pagePositions = [];
const originalPanel = document.getElementById('originalPanel');
const pdfPages = document.querySelectorAll('#pdfPages .page-header');
pdfPages.forEach(el => {
const page = parseInt(el.dataset.page);
// 使用getBoundingClientRect计算相对位置
const rect = el.getBoundingClientRect();
const panelRect = originalPanel.getBoundingClientRect();
// 相对位置 = 元素top - 容器top + 当前scrollTop
const relativeTop = rect.top - panelRect.top + originalPanel.scrollTop;
pagePositions.push({
page: page,
top: relativeTop
});
});
console.log('PDF页位置:', pagePositions);
}
// 根据滚动位置找当前页
function findCurrentPage(scrollTop, positions) {
for (let i = positions.length - 1; i >= 0; i--) {
if (scrollTop >= positions[i].top - 30) {
return positions[i].page;
}
}
return 1;
}
// 滚动到指定页
function scrollToPage(panel, positions, pageNum) {
const pos = positions.find(p => p.page === pageNum);
if (pos) {
panel.scrollTop = pos.top - 10;
}
}
// 滚动同步
function enableSyncScroll() {
const originalPanel = document.getElementById('originalPanel');
const translatedPanel = document.getElementById('translatedPanel');
if (!originalPanel || !translatedPanel || pagePositions.length === 0 || translatedPagePositions.length === 0) {
console.log('滚动同步未启用: 缺少元素或位置数据');
return;
}
console.log('滚动同步已启用');
originalPanel.addEventListener('scroll', function() {
if (!syncScrollEnabled) return;
const currentPage = findCurrentPage(this.scrollTop, pagePositions);
syncScrollEnabled = false;
scrollToPage(translatedPanel, translatedPagePositions, currentPage);
setTimeout(() => syncScrollEnabled = true, 100);
});
translatedPanel.addEventListener('scroll', function() {
if (!syncScrollEnabled) return;
const currentPage = findCurrentPage(this.scrollTop, translatedPagePositions);
syncScrollEnabled = false;
scrollToPage(originalPanel, pagePositions, currentPage);
setTimeout(() => syncScrollEnabled = true, 100);
});
}
// 重新翻译
document.getElementById('retranslateBtn')?.addEventListener('click', async function() {
const instruction = document.getElementById('retranslateInstruction').value;

1
test_simple.txt Normal file
View File

@@ -0,0 +1 @@
This is a test PDF file. Hello World. This is a simple test document.