Files
pdf-translate-web/pdf_translate.py
coder ed3d8e095e feat: 新增 pdf_translate 命令行工具
- 支持命令: translate/list/status/download/config
- 支持用户认证: --user --password 参数
- 翻译进度实时显示,结果可在网页查看
- 新增 /api/config 和 /api/translations 接口
- 修复异步翻译任务的配置获取逻辑
2026-04-16 21:50:24 +08:00

307 lines
10 KiB
Python
Executable File

#!/usr/bin/env python3
"""
PDF翻译命令行工具
使用方法:
pdf_translate translate <pdf_file> [--instruction "翻译要求"] [--user <username>] [--password <password>]
pdf_translate list [--user <username>] [--password <password>]
pdf_translate status <translation_id> [--user <username>] [--password <password>]
pdf_translate download <translation_id> <output_file> [--user <username>] [--password <password>]
pdf_translate config
"""
import argparse
import sys
import os
import json
import requests
from pathlib import Path
# API 基础地址
API_BASE = "http://localhost:19000"
def login(username, password, api_base=API_BASE):
"""登录获取session"""
url = f"{api_base}/api/login"
data = {"username": username, "password": password}
try:
response = requests.post(url, json=data, timeout=10)
result = response.json()
if result.get('success'):
print(f"✅ 登录成功: {username}")
return response.cookies
else:
print(f"❌ 登录失败: {result.get('error', '未知错误')}")
return None
except Exception as e:
print(f"❌ 登录请求失败: {e}")
return None
def get_user_info(cookies, api_base=API_BASE):
"""获取用户信息"""
url = f"{api_base}/api/user/info"
try:
response = requests.get(url, cookies=cookies, timeout=10)
result = response.json()
return result.get('user')
except:
return None
def translate_pdf(pdf_file, instruction=None, cookies=None, api_base=API_BASE):
"""上传并翻译PDF"""
if not os.path.exists(pdf_file):
print(f"❌ 文件不存在: {pdf_file}")
return None
url = f"{api_base}/api/upload"
print(f"📤 上传文件: {pdf_file}")
try:
with open(pdf_file, 'rb') as f:
files = {'file': f}
data = {}
if instruction:
data['instruction'] = instruction
response = requests.post(url, files=files, data=data, cookies=cookies, timeout=60)
result = response.json()
if response.status_code != 200:
print(f"❌ 上传失败: {result.get('error', '未知错误')}")
return None
translation_id = result.get('translation_id')
task_id = result.get('task_id')
from_cache = result.get('from_cache', False)
print(f"✅ 上传成功!")
print(f" 翻译ID: {translation_id}")
print(f" 任务ID: {task_id}")
print(f" 使用缓存: {'' if from_cache else ''}")
# 等待翻译完成
if not from_cache:
print(f"\n🔄 等待翻译完成...")
poll_translation_status(translation_id, cookies, api_base)
return translation_id
except Exception as e:
print(f"❌ 上传请求失败: {e}")
return None
def poll_translation_status(translation_id, cookies=None, api_base=API_BASE):
"""轮询翻译状态"""
url = f"{api_base}/api/status/{translation_id}"
import time
max_wait = 300 # 最大等待5分钟
waited = 0
while waited < max_wait:
try:
response = requests.get(url, cookies=cookies, timeout=10)
result = response.json()
status = result.get('status', 'unknown')
progress = result.get('progress', 0)
print(f" 状态: {status}, 进度: {progress}%")
if status == 'completed':
print(f"✅ 翻译完成!")
return True
elif status == 'failed':
print(f"❌ 翻译失败: {result.get('error', '未知错误')}")
return False
time.sleep(5)
waited += 5
except Exception as e:
print(f"⚠️ 状态查询失败: {e}")
time.sleep(5)
waited += 5
print(f"⚠️ 等待超时,请在网页查看结果")
return False
def list_translations(cookies=None, api_base=API_BASE):
"""列出翻译记录"""
url = f"{api_base}/api/translations"
try:
response = requests.get(url, cookies=cookies, timeout=10)
result = response.json()
translations = result.get('translations', [])
if not translations:
print("暂无翻译记录")
return
print(f"\n📋 翻译记录 (共{len(translations)}条):\n")
print("ID | 文件名 | 状态 | 进度 | 时间")
print("-" * 60)
for t in translations:
id_str = str(t.get('id', '?'))[:4]
filename = t.get('filename', '?')[:15]
status = t.get('status', '?')
progress = t.get('progress', 0)
created = t.get('created_at', '?')
if created and created != '?':
created = created[5:16] # 月-日 时:分
print(f"{id_str:4} | {filename:15} | {status:8} | {progress:3}% | {created}")
except Exception as e:
print(f"❌ 获取列表失败: {e}")
def get_translation_status(translation_id, cookies=None, api_base=API_BASE):
"""获取翻译状态"""
url = f"{api_base}/api/status/{translation_id}"
try:
response = requests.get(url, cookies=cookies, timeout=10)
result = response.json()
print(f"\n📊 翻译状态 (ID: {translation_id}):\n")
print(f" 状态: {result.get('status', '未知')}")
print(f" 进度: {result.get('progress', 0)}%")
print(f" 文件: {result.get('filename', '未知')}")
print(f" 页数: {result.get('pages', '?')}")
print(f" 缓存: {'' if result.get('from_cache') else ''}")
if result.get('error'):
print(f" 错误: {result.get('error')}")
except Exception as e:
print(f"❌ 获取状态失败: {e}")
def download_translation(translation_id, output_file, cookies=None, api_base=API_BASE):
"""下载翻译结果"""
url = f"{api_base}/api/download/{translation_id}"
try:
response = requests.get(url, cookies=cookies, timeout=30)
if response.status_code != 200:
print(f"❌ 下载失败: {response.status_code}")
return False
with open(output_file, 'wb') as f:
f.write(response.content)
print(f"✅ 下载成功: {output_file}")
return True
except Exception as e:
print(f"❌ 下载失败: {e}")
return False
def show_config(api_base=API_BASE):
"""显示当前大模型配置"""
url = f"{api_base}/api/config"
try:
response = requests.get(url, timeout=10)
result = response.json()
print(f"\n⚙️ 当前配置:\n")
print(f" 网站名称: {result.get('site_name', '未知')}")
print(f" 最大文件: {result.get('max_file_size', '?')}MB")
print(f" 缓存天数: {result.get('cache_expire_days', '?')}")
llm = result.get('llm_config', {})
print(f" 大模型API: {llm.get('api_base', '未设置')}")
print(f" 模型: {llm.get('model', '未设置')}")
except Exception as e:
print(f"❌ 获取配置失败: {e}")
def main():
parser = argparse.ArgumentParser(
description="PDF翻译命令行工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
pdf_translate translate test.pdf --user admin --password admin123
pdf_translate translate test.pdf --instruction "保持专业术语原文"
pdf_translate list --user admin --password admin123
pdf_translate status 1
pdf_translate download 1 result.md
pdf_translate config
"""
)
parser.add_argument('--user', '-u', help='用户名')
parser.add_argument('--password', '-p', help='密码')
parser.add_argument('--api', default=API_BASE, help=f'API地址 (默认: {API_BASE})')
subparsers = parser.add_subparsers(dest='command', help='命令')
# translate 命令
trans_parser = subparsers.add_parser('translate', help='翻译PDF文件')
trans_parser.add_argument('file', help='PDF文件路径')
trans_parser.add_argument('--instruction', '-i', help='翻译要求')
# list 命令
list_parser = subparsers.add_parser('list', help='列出翻译记录')
# status 命令
status_parser = subparsers.add_parser('status', help='查看翻译状态')
status_parser.add_argument('id', type=int, help='翻译ID')
# download 命令
download_parser = subparsers.add_parser('download', help='下载翻译结果')
download_parser.add_argument('id', type=int, help='翻译ID')
download_parser.add_argument('output', help='输出文件路径')
# config 命令
config_parser = subparsers.add_parser('config', help='显示当前配置')
args = parser.parse_args()
# 更新 API 地址(如果用户指定了)
api_base_arg = args.api
if not args.command:
parser.print_help()
return
# 登录(如果提供了用户名密码)
cookies = None
if args.user and args.password:
cookies = login(args.user, args.password, api_base_arg)
if not cookies:
print("请检查用户名和密码")
return
user = get_user_info(cookies, api_base_arg)
if user:
print(f"👤 用户: {user.get('username')} ({user.get('user_type')})")
# 执行命令
if args.command == 'translate':
translation_id = translate_pdf(args.file, args.instruction, cookies, api_base_arg)
if translation_id:
print(f"\n💡 在网页查看结果: {api_base_arg}/history")
elif args.command == 'list':
list_translations(cookies, api_base_arg)
elif args.command == 'status':
get_translation_status(args.id, cookies, api_base_arg)
elif args.command == 'download':
download_translation(args.id, args.output, cookies, api_base_arg)
elif args.command == 'config':
show_config(api_base_arg)
if __name__ == '__main__':
main()