feat: 新增 pdf_translate 命令行工具
- 支持命令: translate/list/status/download/config - 支持用户认证: --user --password 参数 - 翻译进度实时显示,结果可在网页查看 - 新增 /api/config 和 /api/translations 接口 - 修复异步翻译任务的配置获取逻辑
This commit is contained in:
36
app.py
36
app.py
@@ -445,6 +445,36 @@ def upload_pdf():
|
||||
})
|
||||
|
||||
|
||||
@app.route('/api/config')
|
||||
def api_config():
|
||||
"""获取系统配置"""
|
||||
from admin import get_llm_config, get_site_config
|
||||
|
||||
return jsonify({
|
||||
'site_name': get_site_config().get('site_name'),
|
||||
'max_file_size': get_site_config().get('max_file_size'),
|
||||
'cache_expire_days': get_site_config().get('cache_expire_days'),
|
||||
'llm_config': get_llm_config()
|
||||
})
|
||||
|
||||
|
||||
@app.route('/api/translations')
|
||||
def api_translations_list():
|
||||
"""获取翻译记录列表"""
|
||||
user = get_current_user()
|
||||
|
||||
if user:
|
||||
translations = Translation.query.filter_by(user_id=user.id)\
|
||||
.order_by(Translation.created_at.desc()).limit(20).all()
|
||||
else:
|
||||
# 访客返回空列表
|
||||
translations = []
|
||||
|
||||
return jsonify({
|
||||
'translations': [t.to_dict() for t in translations]
|
||||
})
|
||||
|
||||
|
||||
@app.route('/api/status/<int:translation_id>')
|
||||
def translation_status(translation_id):
|
||||
"""获取翻译状态"""
|
||||
@@ -462,8 +492,12 @@ def translation_status(translation_id):
|
||||
'id': translation.id,
|
||||
'status': translation.status,
|
||||
'progress': translation.progress,
|
||||
'filename': translation.original_filename,
|
||||
'pages': translation.page_count,
|
||||
'from_cache': translation.from_cache,
|
||||
'error': translation.error_message
|
||||
'error': translation.error_message,
|
||||
'created_at': translation.created_at.isoformat() if translation.created_at else None,
|
||||
'completed_at': translation.completed_at.isoformat() if translation.completed_at else None,
|
||||
})
|
||||
|
||||
|
||||
|
||||
307
pdf_translate.py
Executable file
307
pdf_translate.py
Executable file
@@ -0,0 +1,307 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF翻译命令行工具
|
||||
|
||||
使用方法:
|
||||
pdf_translate translate <pdf_file> [--instruction "翻译要求"] [--user <username>] [--password <password>]
|
||||
pdf_translate list [--user <username>] [--password <password>]
|
||||
pdf_translate status <translation_id> [--user <username>] [--password <password>]
|
||||
pdf_translate download <translation_id> <output_file> [--user <username>] [--password <password>]
|
||||
pdf_translate config
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
# API 基础地址
|
||||
API_BASE = "http://localhost:19000"
|
||||
|
||||
def login(username, password, api_base=API_BASE):
|
||||
"""登录获取session"""
|
||||
url = f"{api_base}/api/login"
|
||||
data = {"username": username, "password": password}
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=data, timeout=10)
|
||||
result = response.json()
|
||||
|
||||
if result.get('success'):
|
||||
print(f"✅ 登录成功: {username}")
|
||||
return response.cookies
|
||||
else:
|
||||
print(f"❌ 登录失败: {result.get('error', '未知错误')}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"❌ 登录请求失败: {e}")
|
||||
return None
|
||||
|
||||
def get_user_info(cookies, api_base=API_BASE):
|
||||
"""获取用户信息"""
|
||||
url = f"{api_base}/api/user/info"
|
||||
try:
|
||||
response = requests.get(url, cookies=cookies, timeout=10)
|
||||
result = response.json()
|
||||
return result.get('user')
|
||||
except:
|
||||
return None
|
||||
|
||||
def translate_pdf(pdf_file, instruction=None, cookies=None, api_base=API_BASE):
|
||||
"""上传并翻译PDF"""
|
||||
if not os.path.exists(pdf_file):
|
||||
print(f"❌ 文件不存在: {pdf_file}")
|
||||
return None
|
||||
|
||||
url = f"{api_base}/api/upload"
|
||||
|
||||
print(f"📤 上传文件: {pdf_file}")
|
||||
|
||||
try:
|
||||
with open(pdf_file, 'rb') as f:
|
||||
files = {'file': f}
|
||||
data = {}
|
||||
if instruction:
|
||||
data['instruction'] = instruction
|
||||
|
||||
response = requests.post(url, files=files, data=data, cookies=cookies, timeout=60)
|
||||
result = response.json()
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"❌ 上传失败: {result.get('error', '未知错误')}")
|
||||
return None
|
||||
|
||||
translation_id = result.get('translation_id')
|
||||
task_id = result.get('task_id')
|
||||
from_cache = result.get('from_cache', False)
|
||||
|
||||
print(f"✅ 上传成功!")
|
||||
print(f" 翻译ID: {translation_id}")
|
||||
print(f" 任务ID: {task_id}")
|
||||
print(f" 使用缓存: {'是' if from_cache else '否'}")
|
||||
|
||||
# 等待翻译完成
|
||||
if not from_cache:
|
||||
print(f"\n🔄 等待翻译完成...")
|
||||
poll_translation_status(translation_id, cookies, api_base)
|
||||
|
||||
return translation_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 上传请求失败: {e}")
|
||||
return None
|
||||
|
||||
def poll_translation_status(translation_id, cookies=None, api_base=API_BASE):
|
||||
"""轮询翻译状态"""
|
||||
url = f"{api_base}/api/status/{translation_id}"
|
||||
|
||||
import time
|
||||
max_wait = 300 # 最大等待5分钟
|
||||
waited = 0
|
||||
|
||||
while waited < max_wait:
|
||||
try:
|
||||
response = requests.get(url, cookies=cookies, timeout=10)
|
||||
result = response.json()
|
||||
|
||||
status = result.get('status', 'unknown')
|
||||
progress = result.get('progress', 0)
|
||||
|
||||
print(f" 状态: {status}, 进度: {progress}%")
|
||||
|
||||
if status == 'completed':
|
||||
print(f"✅ 翻译完成!")
|
||||
return True
|
||||
elif status == 'failed':
|
||||
print(f"❌ 翻译失败: {result.get('error', '未知错误')}")
|
||||
return False
|
||||
|
||||
time.sleep(5)
|
||||
waited += 5
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 状态查询失败: {e}")
|
||||
time.sleep(5)
|
||||
waited += 5
|
||||
|
||||
print(f"⚠️ 等待超时,请在网页查看结果")
|
||||
return False
|
||||
|
||||
def list_translations(cookies=None, api_base=API_BASE):
|
||||
"""列出翻译记录"""
|
||||
url = f"{api_base}/api/translations"
|
||||
|
||||
try:
|
||||
response = requests.get(url, cookies=cookies, timeout=10)
|
||||
result = response.json()
|
||||
|
||||
translations = result.get('translations', [])
|
||||
|
||||
if not translations:
|
||||
print("暂无翻译记录")
|
||||
return
|
||||
|
||||
print(f"\n📋 翻译记录 (共{len(translations)}条):\n")
|
||||
print("ID | 文件名 | 状态 | 进度 | 时间")
|
||||
print("-" * 60)
|
||||
|
||||
for t in translations:
|
||||
id_str = str(t.get('id', '?'))[:4]
|
||||
filename = t.get('filename', '?')[:15]
|
||||
status = t.get('status', '?')
|
||||
progress = t.get('progress', 0)
|
||||
created = t.get('created_at', '?')
|
||||
if created and created != '?':
|
||||
created = created[5:16] # 月-日 时:分
|
||||
|
||||
print(f"{id_str:4} | {filename:15} | {status:8} | {progress:3}% | {created}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 获取列表失败: {e}")
|
||||
|
||||
def get_translation_status(translation_id, cookies=None, api_base=API_BASE):
|
||||
"""获取翻译状态"""
|
||||
url = f"{api_base}/api/status/{translation_id}"
|
||||
|
||||
try:
|
||||
response = requests.get(url, cookies=cookies, timeout=10)
|
||||
result = response.json()
|
||||
|
||||
print(f"\n📊 翻译状态 (ID: {translation_id}):\n")
|
||||
print(f" 状态: {result.get('status', '未知')}")
|
||||
print(f" 进度: {result.get('progress', 0)}%")
|
||||
print(f" 文件: {result.get('filename', '未知')}")
|
||||
print(f" 页数: {result.get('pages', '?')}")
|
||||
print(f" 缓存: {'是' if result.get('from_cache') else '否'}")
|
||||
|
||||
if result.get('error'):
|
||||
print(f" 错误: {result.get('error')}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 获取状态失败: {e}")
|
||||
|
||||
def download_translation(translation_id, output_file, cookies=None, api_base=API_BASE):
|
||||
"""下载翻译结果"""
|
||||
url = f"{api_base}/api/download/{translation_id}"
|
||||
|
||||
try:
|
||||
response = requests.get(url, cookies=cookies, timeout=30)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"❌ 下载失败: {response.status_code}")
|
||||
return False
|
||||
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
print(f"✅ 下载成功: {output_file}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 下载失败: {e}")
|
||||
return False
|
||||
|
||||
def show_config(api_base=API_BASE):
|
||||
"""显示当前大模型配置"""
|
||||
url = f"{api_base}/api/config"
|
||||
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
result = response.json()
|
||||
|
||||
print(f"\n⚙️ 当前配置:\n")
|
||||
print(f" 网站名称: {result.get('site_name', '未知')}")
|
||||
print(f" 最大文件: {result.get('max_file_size', '?')}MB")
|
||||
print(f" 缓存天数: {result.get('cache_expire_days', '?')}天")
|
||||
|
||||
llm = result.get('llm_config', {})
|
||||
print(f" 大模型API: {llm.get('api_base', '未设置')}")
|
||||
print(f" 模型: {llm.get('model', '未设置')}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 获取配置失败: {e}")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="PDF翻译命令行工具",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
示例:
|
||||
pdf_translate translate test.pdf --user admin --password admin123
|
||||
pdf_translate translate test.pdf --instruction "保持专业术语原文"
|
||||
pdf_translate list --user admin --password admin123
|
||||
pdf_translate status 1
|
||||
pdf_translate download 1 result.md
|
||||
pdf_translate config
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--user', '-u', help='用户名')
|
||||
parser.add_argument('--password', '-p', help='密码')
|
||||
parser.add_argument('--api', default=API_BASE, help=f'API地址 (默认: {API_BASE})')
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='命令')
|
||||
|
||||
# translate 命令
|
||||
trans_parser = subparsers.add_parser('translate', help='翻译PDF文件')
|
||||
trans_parser.add_argument('file', help='PDF文件路径')
|
||||
trans_parser.add_argument('--instruction', '-i', help='翻译要求')
|
||||
|
||||
# list 命令
|
||||
list_parser = subparsers.add_parser('list', help='列出翻译记录')
|
||||
|
||||
# status 命令
|
||||
status_parser = subparsers.add_parser('status', help='查看翻译状态')
|
||||
status_parser.add_argument('id', type=int, help='翻译ID')
|
||||
|
||||
# download 命令
|
||||
download_parser = subparsers.add_parser('download', help='下载翻译结果')
|
||||
download_parser.add_argument('id', type=int, help='翻译ID')
|
||||
download_parser.add_argument('output', help='输出文件路径')
|
||||
|
||||
# config 命令
|
||||
config_parser = subparsers.add_parser('config', help='显示当前配置')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 更新 API 地址(如果用户指定了)
|
||||
api_base_arg = args.api
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
# 登录(如果提供了用户名密码)
|
||||
cookies = None
|
||||
if args.user and args.password:
|
||||
cookies = login(args.user, args.password, api_base_arg)
|
||||
if not cookies:
|
||||
print("请检查用户名和密码")
|
||||
return
|
||||
|
||||
user = get_user_info(cookies, api_base_arg)
|
||||
if user:
|
||||
print(f"👤 用户: {user.get('username')} ({user.get('user_type')})")
|
||||
|
||||
# 执行命令
|
||||
if args.command == 'translate':
|
||||
translation_id = translate_pdf(args.file, args.instruction, cookies, api_base_arg)
|
||||
if translation_id:
|
||||
print(f"\n💡 在网页查看结果: {api_base_arg}/history")
|
||||
|
||||
elif args.command == 'list':
|
||||
list_translations(cookies, api_base_arg)
|
||||
|
||||
elif args.command == 'status':
|
||||
get_translation_status(args.id, cookies, api_base_arg)
|
||||
|
||||
elif args.command == 'download':
|
||||
download_translation(args.id, args.output, cookies, api_base_arg)
|
||||
|
||||
elif args.command == 'config':
|
||||
show_config(api_base_arg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -279,12 +279,14 @@ class TranslationTask:
|
||||
with app.app_context():
|
||||
from admin import get_llm_config
|
||||
llm_config = get_llm_config()
|
||||
config['LLM_CONFIG'] = llm_config
|
||||
config = {'LLM_CONFIG': llm_config}
|
||||
|
||||
service = TranslationService(config)
|
||||
task['status'] = 'processing'
|
||||
task['started_at'] = datetime.now().isoformat()
|
||||
|
||||
print(f"[翻译任务] 开始翻译,使用配置: {config.get('LLM_CONFIG', {}).get('api_base', '未知')}")
|
||||
|
||||
# 更新数据库状态为 processing
|
||||
if app and translation_id:
|
||||
with app.app_context():
|
||||
|
||||
Reference in New Issue
Block a user