From e524938276ff7c6f6b9d21c723f312270bf7e6b5 Mon Sep 17 00:00:00 2001 From: coder Date: Thu, 16 Apr 2026 22:06:03 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0PDF=E6=96=87=E6=9C=AC?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E6=A3=80=E6=B5=8B=EF=BC=8C=E6=8F=90=E7=A4=BA?= =?UTF-8?q?=E6=89=AB=E6=8F=8F=E7=89=88PDF=E6=97=A0=E6=B3=95=E7=BF=BB?= =?UTF-8?q?=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 检查PDF是否能提取文本内容 - 如果是扫描版PDF(图像形式),抛出明确错误提示 - 需要OCR处理的PDF会提示用户使用文本版PDF --- services.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services.py b/services.py index 1f68ce0..376d6b8 100644 --- a/services.py +++ b/services.py @@ -134,6 +134,14 @@ class TranslationService: pages = self.extract_pdf_text(pdf_path) total_pages = len(pages) + # 检查是否有可翻译内容 + total_text = sum(len(p['text']) for p in pages) + if total_pages == 0 or total_text < 10: + error_msg = "PDF无法提取文本内容。可能原因:\n1. PDF是扫描版(图像形式),需要OCR处理\n2. PDF为空或加密\n请使用包含可提取文本的PDF文件。" + if progress_callback: + progress_callback(0, 0, error_msg) + raise ValueError(error_msg) + if progress_callback: progress_callback(0, total_pages, "开始翻译...")