fix: 添加PDF文本提取检测，提示扫描版PDF无法翻译

- 检查PDF是否能提取文本内容 - 如果是扫描版PDF（图像形式），抛出明确错误提示 - 需要OCR处理的PDF会提示用户使用文本版PDF
2026-04-16 22:06:03 +08:00
parent ed3d8e095e
commit e524938276
1 changed files with 8 additions and 0 deletions
--- a/services.py
+++ b/services.py
@@ -134,6 +134,14 @@ class TranslationService:
        pages = self.extract_pdf_text(pdf_path)
        total_pages = len(pages)
        
+        # 检查是否有可翻译内容
+        total_text = sum(len(p['text']) for p in pages)
+        if total_pages == 0 or total_text < 10:
+            error_msg = "PDF无法提取文本内容。可能原因：\n1. PDF是扫描版（图像形式），需要OCR处理\n2. PDF为空或加密\n请使用包含可提取文本的PDF文件。"
+            if progress_callback:
+                progress_callback(0, 0, error_msg)
+            raise ValueError(error_msg)
+        
        if progress_callback:
            progress_callback(0, total_pages, "开始翻译...")