pdf-translate-web/pdf_translate.py

#!/usr/bin/env python3
"""
PDF翻译命令行工具

使用方法:
    pdf_translate translate <pdf_file> [--instruction "翻译要求"] [--user <username>] [--password <password>]
    pdf_translate list [--user <username>] [--password <password>]
    pdf_translate status <translation_id> [--user <username>] [--password <password>]
    pdf_translate download <translation_id> <output_file> [--user <username>] [--password <password>]
    pdf_translate config
"""

import argparse
import sys
import os
import json
import requests
from pathlib import Path

# API 基础地址
API_BASE = "http://localhost:19000"

def login(username, password, api_base=API_BASE):
    """登录获取session"""
    url = f"{api_base}/api/login"
    data = {"username": username, "password": password}

    try:
        response = requests.post(url, json=data, timeout=10)
        result = response.json()

        if result.get('success'):
            print(f"✅ 登录成功: {username}")
            return response.cookies
        else:
            print(f"❌ 登录失败: {result.get('error', '未知错误')}")
            return None
    except Exception as e:
        print(f"❌ 登录请求失败: {e}")
        return None

def get_user_info(cookies, api_base=API_BASE):
    """获取用户信息"""
    url = f"{api_base}/api/user/info"
    try:
        response = requests.get(url, cookies=cookies, timeout=10)
        result = response.json()
        return result.get('user')
    except:
        return None

def translate_pdf(pdf_file, instruction=None, cookies=None, api_base=API_BASE):
    """上传并翻译PDF"""
    if not os.path.exists(pdf_file):
        print(f"❌ 文件不存在: {pdf_file}")
        return None

    url = f"{api_base}/api/upload"

    print(f"📤 上传文件: {pdf_file}")

    try:
        with open(pdf_file, 'rb') as f:
            files = {'file': f}
            data = {}
            if instruction:
                data['instruction'] = instruction

            response = requests.post(url, files=files, data=data, cookies=cookies, timeout=60)
            result = response.json()

        if response.status_code != 200:
            print(f"❌ 上传失败: {result.get('error', '未知错误')}")
            return None

        translation_id = result.get('translation_id')
        task_id = result.get('task_id')
        from_cache = result.get('from_cache', False)

        print(f"✅ 上传成功!")
        print(f"   翻译ID: {translation_id}")
        print(f"   任务ID: {task_id}")
        print(f"   使用缓存: {'是' if from_cache else '否'}")

        # 等待翻译完成
        if not from_cache:
            print(f"\n🔄 等待翻译完成...")
            poll_translation_status(translation_id, cookies, api_base)

        return translation_id

    except Exception as e:
        print(f"❌ 上传请求失败: {e}")
        return None

def poll_translation_status(translation_id, cookies=None, api_base=API_BASE):
    """轮询翻译状态"""
    url = f"{api_base}/api/status/{translation_id}"

    import time
    max_wait = 300  # 最大等待5分钟
    waited = 0

    while waited < max_wait:
        try:
            response = requests.get(url, cookies=cookies, timeout=10)
            result = response.json()

            status = result.get('status', 'unknown')
            progress = result.get('progress', 0)

            print(f"   状态: {status}, 进度: {progress}%")

            if status == 'completed':
                print(f"✅ 翻译完成!")
                return True
            elif status == 'failed':
                print(f"❌ 翻译失败: {result.get('error', '未知错误')}")
                return False

            time.sleep(5)
            waited += 5

        except Exception as e:
            print(f"⚠️ 状态查询失败: {e}")
            time.sleep(5)
            waited += 5

    print(f"⚠️ 等待超时，请在网页查看结果")
    return False

def list_translations(cookies=None, api_base=API_BASE):
    """列出翻译记录"""
    url = f"{api_base}/api/translations"

    try:
        response = requests.get(url, cookies=cookies, timeout=10)
        result = response.json()

        translations = result.get('translations', [])

        if not translations:
            print("暂无翻译记录")
            return

        print(f"\n📋 翻译记录 (共{len(translations)}条):\n")
        print("ID    | 文件名           | 状态      | 进度 | 时间")
        print("-" * 60)

        for t in translations:
            id_str = str(t.get('id', '?'))[:4]
            filename = t.get('filename', '?')[:15]
            status = t.get('status', '?')
            progress = t.get('progress', 0)
            created = t.get('created_at', '?')
            if created and created != '?':
                created = created[5:16]  # 月-日 时:分

            print(f"{id_str:4} | {filename:15} | {status:8} | {progress:3}% | {created}")

    except Exception as e:
        print(f"❌ 获取列表失败: {e}")

def get_translation_status(translation_id, cookies=None, api_base=API_BASE):
    """获取翻译状态"""
    url = f"{api_base}/api/status/{translation_id}"

    try:
        response = requests.get(url, cookies=cookies, timeout=10)
        result = response.json()

        print(f"\n📊 翻译状态 (ID: {translation_id}):\n")
        print(f"   状态: {result.get('status', '未知')}")
        print(f"   进度: {result.get('progress', 0)}%")
        print(f"   文件: {result.get('filename', '未知')}")
        print(f"   页数: {result.get('pages', '?')}")
        print(f"   缓存: {'是' if result.get('from_cache') else '否'}")

        if result.get('error'):
            print(f"   错误: {result.get('error')}")

    except Exception as e:
        print(f"❌ 获取状态失败: {e}")

def download_translation(translation_id, output_file, cookies=None, api_base=API_BASE):
    """下载翻译结果"""
    url = f"{api_base}/api/download/{translation_id}"

    try:
        response = requests.get(url, cookies=cookies, timeout=30)

        if response.status_code != 200:
            print(f"❌ 下载失败: {response.status_code}")
            return False

        with open(output_file, 'wb') as f:
            f.write(response.content)

        print(f"✅ 下载成功: {output_file}")
        return True

    except Exception as e:
        print(f"❌ 下载失败: {e}")
        return False

def show_config(api_base=API_BASE):
    """显示当前大模型配置"""
    url = f"{api_base}/api/config"

    try:
        response = requests.get(url, timeout=10)
        result = response.json()

        print(f"\n⚙️ 当前配置:\n")
        print(f"   网站名称: {result.get('site_name', '未知')}")
        print(f"   最大文件: {result.get('max_file_size', '?')}MB")
        print(f"   缓存天数: {result.get('cache_expire_days', '?')}天")

        llm = result.get('llm_config', {})
        print(f"   大模型API: {llm.get('api_base', '未设置')}")
        print(f"   模型: {llm.get('model', '未设置')}")

    except Exception as e:
        print(f"❌ 获取配置失败: {e}")

def main():
    parser = argparse.ArgumentParser(
        description="PDF翻译命令行工具",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例:
  pdf_translate translate test.pdf --user admin --password admin123
  pdf_translate translate test.pdf --instruction "保持专业术语原文"
  pdf_translate list --user admin --password admin123
  pdf_translate status 1
  pdf_translate download 1 result.md
  pdf_translate config
        """
    )

    parser.add_argument('--user', '-u', help='用户名')
    parser.add_argument('--password', '-p', help='密码')
    parser.add_argument('--api', default=API_BASE, help=f'API地址 (默认: {API_BASE})')

    subparsers = parser.add_subparsers(dest='command', help='命令')

    # translate 命令
    trans_parser = subparsers.add_parser('translate', help='翻译PDF文件')
    trans_parser.add_argument('file', help='PDF文件路径')
    trans_parser.add_argument('--instruction', '-i', help='翻译要求')

    # list 命令
    list_parser = subparsers.add_parser('list', help='列出翻译记录')

    # status 命令
    status_parser = subparsers.add_parser('status', help='查看翻译状态')
    status_parser.add_argument('id', type=int, help='翻译ID')

    # download 命令
    download_parser = subparsers.add_parser('download', help='下载翻译结果')
    download_parser.add_argument('id', type=int, help='翻译ID')
    download_parser.add_argument('output', help='输出文件路径')

    # config 命令
    config_parser = subparsers.add_parser('config', help='显示当前配置')

    args = parser.parse_args()

    # 更新 API 地址（如果用户指定了）
    api_base_arg = args.api

    if not args.command:
        parser.print_help()
        return

    # 登录（如果提供了用户名密码）
    cookies = None
    if args.user and args.password:
        cookies = login(args.user, args.password, api_base_arg)
        if not cookies:
            print("请检查用户名和密码")
            return

        user = get_user_info(cookies, api_base_arg)
        if user:
            print(f"👤 用户: {user.get('username')} ({user.get('user_type')})")

    # 执行命令
    if args.command == 'translate':
        translation_id = translate_pdf(args.file, args.instruction, cookies, api_base_arg)
        if translation_id:
            print(f"\n💡 在网页查看结果: {api_base_arg}/history")

    elif args.command == 'list':
        list_translations(cookies, api_base_arg)

    elif args.command == 'status':
        get_translation_status(args.id, cookies, api_base_arg)

    elif args.command == 'download':
        download_translation(args.id, args.output, cookies, api_base_arg)

    elif args.command == 'config':
        show_config(api_base_arg)

if __name__ == '__main__':
    main()