Fix: 修复无扩展名文件无法读取的问题

- 增加多种编码尝试 - 支持无扩展名文件识别 - 增强文件读取容错性
2026-04-08 00:10:58 +08:00
parent cdaadef10c
commit 3c862314c7
4 changed files with 26559 additions and 0 deletions
--- a/services.py
+++ b/services.py
@@ -272,6 +272,16 @@ class DocumentIndexer:
        """读取文档内容"""
        ext = os.path.splitext(filepath)[1].lower()
        
+        # 尝试读取文本文件（包括没有扩展名的）
+        try:
+            with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+                if content.strip():  # 如果能读取到内容
+                    return content
+        except:
+            pass
+        
+        # 按扩展名处理特定格式
        if ext in ['.txt', '.md', '.json', '.html']:
            with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
                return f.read()
@@ -295,6 +305,19 @@ class DocumentIndexer:
            except:
                pass
        
+        # 最后尝试以二进制方式读取并解码
+        try:
+            with open(filepath, 'rb') as f:
+                content = f.read()
+                # 尝试多种编码
+                for encoding in ['utf-8', 'gbk', 'gb2312', 'latin-1']:
+                    try:
+                        return content.decode(encoding)
+                    except:
+                        continue
+        except:
+            pass
+        
        return None
    
    def _split_content(self, content):