Files
llm-index-rag/config.py
coder cdaadef10c V1.0.0: 基于索引的知识检索系统
核心功能:
- 文档索引:使用LLM分析提取关键词/摘要/主题/实体
- 查询处理:LLM分析查询意图并扩展关键词
- BM25检索:基于倒排索引的相关性排序
- RAG问答:检索增强生成

技术栈:
- Flask + SQLAlchemy
- OpenAI API兼容LLM
- BM25算法

特点: 不依赖向量模型和向量库
2026-04-07 23:48:06 +08:00

65 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
LLM Index RAG 配置文件
基于索引和搜索的RAG系统不使用向量模型
"""
# ==================== 应用配置 ====================
APP_NAME = "LLM Index RAG"
APP_VERSION = "1.0.0"
SECRET_KEY = "llm-index-rag-secret-key"
# ==================== LLM配置 ====================
LLM_CONFIG = {
"api_base": "http://192.168.2.5:1234/v1",
"api_key": "sk-lm-fuP5tGU8:Hi7YU87jHyDP6Ay8Tl2j",
"model": "qwen/qwen3.5-35b-a3b",
"max_tokens": 4000,
"temperature": 0.3,
"timeout": 120,
}
# ==================== 文档配置 ====================
DOCUMENT_DIR = "documents" # 文档存储目录
INDEX_DIR = "indexes" # 索引存储目录
LOG_DIR = "logs" # 日志目录
SUPPORTED_FORMATS = ['.txt', '.md', '.pdf', '.docx', '.html', '.json']
# 文档处理配置
DOC_CONFIG = {
"chunk_size": 2000, # 文档分块大小
"chunk_overlap": 200, # 分块重叠
"max_keywords": 20, # 每个文档块最大关键词数
"max_summary_length": 500, # 摘要最大长度
}
# ==================== 索引配置 ====================
INDEX_CONFIG = {
# BM25参数
"bm25_k1": 1.5, # 词频饱和参数
"bm25_b": 0.75, # 文档长度归一化参数
# 检索配置
"max_results": 20, # 最大返回结果数
"min_score": 0.1, # 最低相关性分数
# 关键词权重
"title_weight": 3.0, # 标题关键词权重
"keyword_weight": 2.0, # 显式关键词权重
"content_weight": 1.0, # 内容关键词权重
"summary_weight": 1.5, # 摘要关键词权重
}
# ==================== 查询配置 ====================
QUERY_CONFIG = {
"max_expansion_terms": 5, # 查询扩展最大词数
"use_query_expansion": True, # 是否启用查询扩展
"use_rerank": True, # 是否使用重排
}
# ==================== 数据库配置 ====================
DATABASE_URL = "sqlite:///llm_index_rag.db"
# ==================== API配置 ====================
API_HOST = "0.0.0.0"
API_PORT = 19001