Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9569898f33 | |||
| 3479cbd04c | |||
| e5c9ea322a | |||
| 17a442b144 | |||
| e524938276 |
75
app.py
75
app.py
@@ -374,16 +374,17 @@ def upload_pdf():
|
|||||||
output_path = cache_path
|
output_path = cache_path
|
||||||
else:
|
else:
|
||||||
# 需要翻译
|
# 需要翻译
|
||||||
# 保存上传文件
|
# 保存上传文件 - 使用同一个UUID确保uploads和outputs目录关联
|
||||||
upload_dir = os.path.join(UPLOAD_DIR, str(uuid.uuid4()))
|
session_uuid = str(uuid.uuid4())
|
||||||
|
upload_dir = os.path.join(UPLOAD_DIR, session_uuid)
|
||||||
os.makedirs(upload_dir, exist_ok=True)
|
os.makedirs(upload_dir, exist_ok=True)
|
||||||
upload_path = os.path.join(upload_dir, filename)
|
upload_path = os.path.join(upload_dir, filename)
|
||||||
|
|
||||||
with open(upload_path, 'wb') as f:
|
with open(upload_path, 'wb') as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
|
|
||||||
# 创建输出路径
|
# 创建输出路径 - 使用相同的UUID
|
||||||
output_dir = os.path.join(OUTPUT_DIR, str(uuid.uuid4()))
|
output_dir = os.path.join(OUTPUT_DIR, session_uuid)
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
output_path = os.path.join(output_dir, f"{filename}_translated.md")
|
output_path = os.path.join(output_dir, f"{filename}_translated.md")
|
||||||
|
|
||||||
@@ -400,6 +401,7 @@ def upload_pdf():
|
|||||||
translate_params=json.dumps({'instruction': instruction}) if instruction else None,
|
translate_params=json.dumps({'instruction': instruction}) if instruction else None,
|
||||||
status='processing' if not from_cache else 'completed',
|
status='processing' if not from_cache else 'completed',
|
||||||
progress=0 if not from_cache else 100,
|
progress=0 if not from_cache else 100,
|
||||||
|
upload_path=upload_path if not from_cache else None, # 保存上传路径
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
from_cache=from_cache
|
from_cache=from_cache
|
||||||
)
|
)
|
||||||
@@ -609,16 +611,71 @@ def compare_view(translation_id):
|
|||||||
return jsonify({'error': '请登录后使用此功能'}), 401
|
return jsonify({'error': '请登录后使用此功能'}), 401
|
||||||
|
|
||||||
translation = Translation.query.get(translation_id)
|
translation = Translation.query.get(translation_id)
|
||||||
if not translation or translation.user_id != user.id:
|
if not translation or (translation.user_id != user.id and user.user_type != 'admin'):
|
||||||
return jsonify({'error': '无权访问'}), 403
|
return jsonify({'error': '无权访问'}), 403
|
||||||
|
|
||||||
# 生成对比文件
|
# 读取翻译结果文件
|
||||||
# TODO: 实现对比功能
|
translated_content = ''
|
||||||
|
if translation.output_path and os.path.exists(translation.output_path):
|
||||||
|
try:
|
||||||
|
with open(translation.output_path, 'r', encoding='utf-8') as f:
|
||||||
|
translated_content = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
translated_content = f'读取失败: {str(e)}'
|
||||||
|
|
||||||
|
# 从翻译结果中提取各页内容
|
||||||
|
# 翻译结果格式是Markdown,包含"## 第 X 页"分隔
|
||||||
|
original_pages = []
|
||||||
|
translated_pages = []
|
||||||
|
|
||||||
|
if translated_content:
|
||||||
|
# 解析翻译结果的页面结构
|
||||||
|
import re
|
||||||
|
page_pattern = r'## 第 (\d+) 页\n\n(.*?)\n\n---'
|
||||||
|
matches = re.findall(page_pattern, translated_content, re.DOTALL)
|
||||||
|
|
||||||
|
for page_num, content in matches:
|
||||||
|
translated_pages.append({
|
||||||
|
'page': int(page_num),
|
||||||
|
'content': content.strip()
|
||||||
|
})
|
||||||
|
|
||||||
|
# 如果有原文内容存储,获取原文
|
||||||
|
original_content = ''
|
||||||
|
|
||||||
|
# 优先从数据库存储的upload_path获取原PDF
|
||||||
|
possible_paths = []
|
||||||
|
|
||||||
|
if translation.upload_path:
|
||||||
|
possible_paths.append(translation.upload_path)
|
||||||
|
|
||||||
|
# 备用方案:尝试从路径推断(兼容旧数据)
|
||||||
|
upload_dir = os.path.dirname(translation.output_path.replace('outputs', 'uploads').replace('_translated.md', '')) if translation.output_path else ''
|
||||||
|
if upload_dir:
|
||||||
|
possible_paths.append(
|
||||||
|
translation.output_path.replace('outputs', 'uploads').replace('_translated.md', '') if translation.output_path else ''
|
||||||
|
)
|
||||||
|
possible_paths.append(os.path.join(upload_dir, translation.original_filename))
|
||||||
|
|
||||||
|
for pdf_path in possible_paths:
|
||||||
|
if pdf_path and os.path.exists(pdf_path) and pdf_path.endswith('.pdf'):
|
||||||
|
try:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
reader = PdfReader(pdf_path)
|
||||||
|
for page in reader.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
original_content += text + '\n\n'
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'id': translation.id,
|
'id': translation.id,
|
||||||
'original': '原文内容',
|
'filename': translation.original_filename,
|
||||||
'translated': '译文内容'
|
'original': original_content or '原文内容未找到(可能PDF已被删除或为扫描版)',
|
||||||
|
'translated': translated_content,
|
||||||
|
'pages': translated_pages
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
187
logs/app.log
Normal file
187
logs/app.log
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
* Serving Flask app 'app'
|
||||||
|
* Debug mode: on
|
||||||
|
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
|
||||||
|
* Running on all addresses (0.0.0.0)
|
||||||
|
* Running on http://127.0.0.1:19000
|
||||||
|
* Running on http://192.168.2.17:19000
|
||||||
|
Press CTRL+C to quit
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/pdf_translate.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
127.0.0.1 - - [16/Apr/2026 21:50:38] "GET /api/config HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:08] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:08] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:53: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
return User.query.get(user_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET / HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/js/main.js HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /api/user/info HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:13] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:16] "GET /history HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:16] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET / HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /static/js/main.js HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:18] "GET /api/user/info HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:26] "POST /api/upload HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:26] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:481: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
translation = Translation.query.get(translation_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:26] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:28] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:28] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:30] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:30] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:32] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:32] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:34] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:34] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/task/17baff33-894b-4e56-8975-cc2e6359cd66 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/status/4 HTTP/1.1" 200 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:519: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
translation = Translation.query.get(translation_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 21:58:37] "GET /api/result/4 HTTP/1.1" 200 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:00:01] "GET /api/health HTTP/1.1" 404 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:00:33] "POST /api/upload HTTP/1.1" 200 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:00:38] "GET /api/status/5 HTTP/1.1" 200 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:01:15] "GET /api/status/5 HTTP/1.1" 200 -
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
|
||||||
|
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:20:01] "GET /api/health HTTP/1.1" 404 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:53: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
return User.query.get(user_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET / HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/js/main.js HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /api/user/info HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:27] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:31] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:31] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:33] "POST /admin/translation/5/toggle-share HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:33] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:33] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:34] "POST /admin/translation/4/toggle-share HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:34] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:34] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:42] "GET /admin/translation/5 HTTP/1.1" 200 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:519: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
translation = Translation.query.get(translation_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:42] "GET /api/result/5 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:45] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:46] "GET /admin/translation/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:47] "GET /api/result/4 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:48] "GET /admin/translations HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET / HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/js/main.js HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:52] "GET /api/user/info HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET / HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/css/style.css HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/js/main.js HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /api/user/info HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:24:54] "GET /static/img/favicon.svg HTTP/1.1" 304 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:00] "POST /api/upload HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:00] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/app.py:481: LegacyAPIWarning: The Query.get() method is considered legacy as of the 1.x series of SQLAlchemy and becomes a legacy construct in 2.0. The method is now available as Session.get() (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
|
||||||
|
translation = Translation.query.get(translation_id)
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:00] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:02] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:02] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:05] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:05] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:07] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:07] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:09] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:09] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:11] "GET /api/task/ec82bf8c-70b3-474a-b94f-64598c81f7fa HTTP/1.1" 200 -
|
||||||
|
192.168.2.10 - - [16/Apr/2026 22:25:11] "GET /api/status/6 HTTP/1.1" 200 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 22:40:01] "GET /api/health HTTP/1.1" 404 -
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
[翻译任务] 开始翻译,使用配置: https://open.bigmodel.cn/api/paas/v4
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
* Detected change in '/home/xian/.openclaw/workspace-coder/works/pdf-translate-web-v2/services.py', reloading
|
||||||
|
* Restarting with stat
|
||||||
|
* Debugger is active!
|
||||||
|
* Debugger PIN: 154-698-244
|
||||||
|
127.0.0.1 - - [16/Apr/2026 23:00:01] "GET /api/health HTTP/1.1" 404 -
|
||||||
|
127.0.0.1 - - [16/Apr/2026 23:03:32] "GET / HTTP/1.1" 200 -
|
||||||
@@ -169,6 +169,7 @@ class Translation(db.Model):
|
|||||||
error_message = db.Column(db.Text, nullable=True)
|
error_message = db.Column(db.Text, nullable=True)
|
||||||
|
|
||||||
# 输出
|
# 输出
|
||||||
|
upload_path = db.Column(db.String(255), nullable=True) # 原始PDF文件路径
|
||||||
output_path = db.Column(db.String(255), nullable=True) # 翻译结果文件路径
|
output_path = db.Column(db.String(255), nullable=True) # 翻译结果文件路径
|
||||||
|
|
||||||
# 时间戳
|
# 时间戳
|
||||||
|
|||||||
154
services.py
154
services.py
@@ -7,10 +7,20 @@ import json
|
|||||||
import time
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
import threading
|
import threading
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pypdf import PdfReader
|
from pypdf import PdfReader
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
# pdf2image 用于将PDF转为图像
|
||||||
|
try:
|
||||||
|
from pdf2image import convert_from_path
|
||||||
|
PDF_TO_IMAGE_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PDF_TO_IMAGE_AVAILABLE = False
|
||||||
|
|
||||||
# ==================== LLM客户端 ====================
|
# ==================== LLM客户端 ====================
|
||||||
class TranslationService:
|
class TranslationService:
|
||||||
@@ -99,6 +109,115 @@ class TranslationService:
|
|||||||
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text)
|
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text)
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
|
def is_vision_model(self):
|
||||||
|
"""检查是否是视觉模型"""
|
||||||
|
model = self.llm_config.get('model', '')
|
||||||
|
# 常见视觉模型名称
|
||||||
|
vision_models = ['vision', 'vlm', 'glm-4v', 'glm-4.6v', 'gpt-4-vision', 'gpt-4o', 'qwen-vl', 'claude-3']
|
||||||
|
return any(v in model.lower() for v in vision_models)
|
||||||
|
|
||||||
|
def pdf_to_images(self, pdf_path, max_pages=None):
|
||||||
|
"""将PDF页面转换为图像"""
|
||||||
|
if not PDF_TO_IMAGE_AVAILABLE:
|
||||||
|
return None, "pdf2image未安装,无法处理扫描版PDF。请安装: pip install pdf2image"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 获取PDF页数
|
||||||
|
reader = PdfReader(pdf_path)
|
||||||
|
total_pages = len(reader.pages)
|
||||||
|
|
||||||
|
if max_pages:
|
||||||
|
pages_to_convert = min(max_pages, total_pages)
|
||||||
|
else:
|
||||||
|
pages_to_convert = total_pages
|
||||||
|
|
||||||
|
# 转换PDF为图像
|
||||||
|
images = convert_from_path(
|
||||||
|
pdf_path,
|
||||||
|
first_page=1,
|
||||||
|
last_page=pages_to_convert,
|
||||||
|
dpi=200, # 适当的DPI
|
||||||
|
fmt='jpeg'
|
||||||
|
)
|
||||||
|
|
||||||
|
return images, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return None, f"PDF转图像失败: {str(e)}"
|
||||||
|
|
||||||
|
def extract_text_from_image(self, image):
|
||||||
|
"""使用视觉模型从图像中提取文字"""
|
||||||
|
if not self.is_vision_model():
|
||||||
|
return None, "当前模型不是视觉模型,无法识别图像文字"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 将图像转为base64
|
||||||
|
buffered = io.BytesIO()
|
||||||
|
image.save(buffered, format="JPEG")
|
||||||
|
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
|
# 构建多模态请求
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.llm_config['model'],
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "请识别并提取这张图片中的所有文字内容。只输出提取的文字,不要添加任何解释或说明。保持原有的段落和格式。"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{img_base64}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=self.llm_config['max_tokens'],
|
||||||
|
temperature=0.1,
|
||||||
|
timeout=self.llm_config['timeout'],
|
||||||
|
)
|
||||||
|
|
||||||
|
content = response.choices[0].message.content
|
||||||
|
return content.strip() if content else '', None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return '', f"视觉模型识别失败: {str(e)}"
|
||||||
|
|
||||||
|
def extract_text_from_scanned_pdf(self, pdf_path, progress_callback=None):
|
||||||
|
"""从扫描版PDF提取文字(使用视觉模型OCR)"""
|
||||||
|
images, error = self.pdf_to_images(pdf_path)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return [], error
|
||||||
|
|
||||||
|
pages_text = []
|
||||||
|
total = len(images)
|
||||||
|
|
||||||
|
for i, image in enumerate(images):
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(int((i+1)/total*50), total, f"OCR识别第{i+1}页...")
|
||||||
|
|
||||||
|
text, err = self.extract_text_from_image(image)
|
||||||
|
|
||||||
|
if err:
|
||||||
|
pages_text.append({
|
||||||
|
'page': i + 1,
|
||||||
|
'text': '',
|
||||||
|
'error': err
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
pages_text.append({
|
||||||
|
'page': i + 1,
|
||||||
|
'text': text or '',
|
||||||
|
'error': None
|
||||||
|
})
|
||||||
|
|
||||||
|
return pages_text, None
|
||||||
|
|
||||||
def chunk_text(self, text, max_size=2000):
|
def chunk_text(self, text, max_size=2000):
|
||||||
"""分块"""
|
"""分块"""
|
||||||
paragraphs = text.split('\n\n')
|
paragraphs = text.split('\n\n')
|
||||||
@@ -131,11 +250,38 @@ class TranslationService:
|
|||||||
Returns:
|
Returns:
|
||||||
翻译统计信息
|
翻译统计信息
|
||||||
"""
|
"""
|
||||||
|
# 先尝试常规提取
|
||||||
pages = self.extract_pdf_text(pdf_path)
|
pages = self.extract_pdf_text(pdf_path)
|
||||||
total_pages = len(pages)
|
total_pages = len(pages)
|
||||||
|
total_text = sum(len(p['text']) for p in pages)
|
||||||
|
|
||||||
|
# 如果无法提取文本,尝试使用视觉模型OCR
|
||||||
|
if total_pages == 0 or total_text < 10:
|
||||||
|
if self.is_vision_model() and PDF_TO_IMAGE_AVAILABLE:
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(0, 0, "检测到扫描版PDF,使用视觉模型OCR...")
|
||||||
|
|
||||||
|
pages, error = self.extract_text_from_scanned_pdf(pdf_path, progress_callback)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
raise ValueError(error)
|
||||||
|
|
||||||
|
total_pages = len(pages)
|
||||||
|
total_text = sum(len(p['text']) for p in pages)
|
||||||
|
|
||||||
|
if total_text < 10:
|
||||||
|
raise ValueError("视觉模型OCR未能提取到有效文字内容")
|
||||||
|
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(50, total_pages, "OCR完成,开始翻译...")
|
||||||
|
else:
|
||||||
|
error_msg = "PDF无法提取文本内容。可能原因:\n1. PDF是扫描版(图像形式)\n2. 当前大模型不是视觉模型,无法识别图像文字\n\n如需处理扫描版PDF,请配置视觉大模型(如 glm-4.6v、gpt-4-vision)"
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(0, 0, error_msg)
|
||||||
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(0, total_pages, "开始翻译...")
|
progress_callback(50, total_pages, "开始翻译...")
|
||||||
|
|
||||||
translated_pages = []
|
translated_pages = []
|
||||||
total_chunks = 0
|
total_chunks = 0
|
||||||
@@ -150,8 +296,10 @@ class TranslationService:
|
|||||||
translated_chunks.append(translated)
|
translated_chunks.append(translated)
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress = int((i + 1) / len(chunks) * 100 / total_pages)
|
# OCR占50%,翻译占50%
|
||||||
progress_callback(progress, total_pages, f"翻译第{page_data['page']}页")
|
page_progress = (i + 1) / len(chunks)
|
||||||
|
overall_progress = 50 + int(page_progress * 50 / total_pages)
|
||||||
|
progress_callback(overall_progress, total_pages, f"翻译第{page_data['page']}页")
|
||||||
|
|
||||||
translated_pages.append({
|
translated_pages.append({
|
||||||
'page': page_data['page'],
|
'page': page_data['page'],
|
||||||
|
|||||||
@@ -70,6 +70,9 @@ document.getElementById('uploadForm').addEventListener('submit', async function(
|
|||||||
currentTranslationId = result.translation_id;
|
currentTranslationId = result.translation_id;
|
||||||
currentTaskId = result.task_id;
|
currentTaskId = result.task_id;
|
||||||
|
|
||||||
|
// 更新按钮状态为翻译中
|
||||||
|
btnText.textContent = '翻译中...';
|
||||||
|
|
||||||
// 如果使用缓存,直接显示结果
|
// 如果使用缓存,直接显示结果
|
||||||
if (result.from_cache) {
|
if (result.from_cache) {
|
||||||
document.getElementById('cacheNotice').style.display = 'block';
|
document.getElementById('cacheNotice').style.display = 'block';
|
||||||
@@ -201,18 +204,55 @@ document.getElementById('viewCompare')?.addEventListener('click', async function
|
|||||||
function showCompareView(data) {
|
function showCompareView(data) {
|
||||||
const resultContent = document.getElementById('resultContent');
|
const resultContent = document.getElementById('resultContent');
|
||||||
|
|
||||||
resultContent.innerHTML = `
|
// 如果有分页数据,按页显示
|
||||||
<div class="compare-container">
|
if (data.pages && data.pages.length > 0) {
|
||||||
<div class="compare-panel original">
|
let html = '<div class="compare-container">';
|
||||||
<h5>原文</h5>
|
|
||||||
<div class="content">${escapeHtml(data.original)}</div>
|
for (const page of data.pages) {
|
||||||
|
html += `
|
||||||
|
<div class="compare-section mb-4">
|
||||||
|
<h5 class="text-center mb-3">第 ${page.page} 页</h5>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-6">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">原文(OCR识别)</div>
|
||||||
|
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '原文内容')}</pre></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-6">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">译文</div>
|
||||||
|
<div class="card-body">${renderMarkdown(page.content)}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
html += '</div>';
|
||||||
|
resultContent.innerHTML = html;
|
||||||
|
} else {
|
||||||
|
// 单一对比视图
|
||||||
|
resultContent.innerHTML = `
|
||||||
|
<div class="compare-container">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-6">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">原文</div>
|
||||||
|
<div class="card-body"><pre style="white-space: pre-wrap;">${escapeHtml(data.original || '无原文内容')}</pre></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-6">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">译文</div>
|
||||||
|
<div class="card-body">${renderMarkdown(data.translated || '无译文内容')}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="compare-panel translated">
|
`;
|
||||||
<h5>译文</h5>
|
}
|
||||||
<div class="content">${renderMarkdown(data.translated)}</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 重新翻译
|
// 重新翻译
|
||||||
|
|||||||
1
test_simple.txt
Normal file
1
test_simple.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
This is a test PDF file. Hello World. This is a simple test document.
|
||||||
Reference in New Issue
Block a user