From e524938276ff7c6f6b9d21c723f312270bf7e6b5 Mon Sep 17 00:00:00 2001
From: coder <coder@tphai.com>
Date: Thu, 16 Apr 2026 22:06:03 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0PDF=E6=96=87=E6=9C=AC?=
 =?UTF-8?q?=E6=8F=90=E5=8F=96=E6=A3=80=E6=B5=8B=EF=BC=8C=E6=8F=90=E7=A4=BA?=
 =?UTF-8?q?=E6=89=AB=E6=8F=8F=E7=89=88PDF=E6=97=A0=E6=B3=95=E7=BF=BB?=
 =?UTF-8?q?=E8=AF=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 检查PDF是否能提取文本内容
- 如果是扫描版PDF（图像形式），抛出明确错误提示
- 需要OCR处理的PDF会提示用户使用文本版PDF
---
 services.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/services.py b/services.py
index 1f68ce0..376d6b8 100644
--- a/services.py
+++ b/services.py
@@ -134,6 +134,14 @@ class TranslationService:
         pages = self.extract_pdf_text(pdf_path)
         total_pages = len(pages)
         
+        # 检查是否有可翻译内容
+        total_text = sum(len(p['text']) for p in pages)
+        if total_pages == 0 or total_text < 10:
+            error_msg = "PDF无法提取文本内容。可能原因：\n1. PDF是扫描版（图像形式），需要OCR处理\n2. PDF为空或加密\n请使用包含可提取文本的PDF文件。"
+            if progress_callback:
+                progress_callback(0, 0, error_msg)
+            raise ValueError(error_msg)
+        
         if progress_callback:
             progress_callback(0, total_pages, "开始翻译...")