核心功能: - 文档索引:使用LLM分析提取关键词/摘要/主题/实体 - 查询处理:LLM分析查询意图并扩展关键词 - BM25检索:基于倒排索引的相关性排序 - RAG问答:检索增强生成 技术栈: - Flask + SQLAlchemy - OpenAI API兼容LLM - BM25算法 特点: 不依赖向量模型和向量库
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
"""
|
||
LLM Index RAG 配置文件
|
||
基于索引和搜索的RAG系统(不使用向量模型)
|
||
"""
|
||
|
||
# ==================== 应用配置 ====================
|
||
APP_NAME = "LLM Index RAG"
|
||
APP_VERSION = "1.0.0"
|
||
SECRET_KEY = "llm-index-rag-secret-key"
|
||
|
||
# ==================== LLM配置 ====================
|
||
LLM_CONFIG = {
|
||
"api_base": "http://192.168.2.5:1234/v1",
|
||
"api_key": "sk-lm-fuP5tGU8:Hi7YU87jHyDP6Ay8Tl2j",
|
||
"model": "qwen/qwen3.5-35b-a3b",
|
||
"max_tokens": 4000,
|
||
"temperature": 0.3,
|
||
"timeout": 120,
|
||
}
|
||
|
||
# ==================== 文档配置 ====================
|
||
DOCUMENT_DIR = "documents" # 文档存储目录
|
||
INDEX_DIR = "indexes" # 索引存储目录
|
||
LOG_DIR = "logs" # 日志目录
|
||
|
||
SUPPORTED_FORMATS = ['.txt', '.md', '.pdf', '.docx', '.html', '.json']
|
||
|
||
# 文档处理配置
|
||
DOC_CONFIG = {
|
||
"chunk_size": 2000, # 文档分块大小
|
||
"chunk_overlap": 200, # 分块重叠
|
||
"max_keywords": 20, # 每个文档块最大关键词数
|
||
"max_summary_length": 500, # 摘要最大长度
|
||
}
|
||
|
||
# ==================== 索引配置 ====================
|
||
INDEX_CONFIG = {
|
||
# BM25参数
|
||
"bm25_k1": 1.5, # 词频饱和参数
|
||
"bm25_b": 0.75, # 文档长度归一化参数
|
||
|
||
# 检索配置
|
||
"max_results": 20, # 最大返回结果数
|
||
"min_score": 0.1, # 最低相关性分数
|
||
|
||
# 关键词权重
|
||
"title_weight": 3.0, # 标题关键词权重
|
||
"keyword_weight": 2.0, # 显式关键词权重
|
||
"content_weight": 1.0, # 内容关键词权重
|
||
"summary_weight": 1.5, # 摘要关键词权重
|
||
}
|
||
|
||
# ==================== 查询配置 ====================
|
||
QUERY_CONFIG = {
|
||
"max_expansion_terms": 5, # 查询扩展最大词数
|
||
"use_query_expansion": True, # 是否启用查询扩展
|
||
"use_rerank": True, # 是否使用重排
|
||
}
|
||
|
||
# ==================== 数据库配置 ====================
|
||
DATABASE_URL = "sqlite:///llm_index_rag.db"
|
||
|
||
# ==================== API配置 ====================
|
||
API_HOST = "0.0.0.0"
|
||
API_PORT = 19001 |