diff --git a/.gitignore b/.gitignore index d1fba09..845f5ca 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,7 @@ instance/ *.db __pycache__/ *.pyc -.envdocuments/ +.env +documents/ indexes/ +user_config.json \ No newline at end of file diff --git a/app.py b/app.py index a94eb09..45a74af 100644 --- a/app.py +++ b/app.py @@ -13,6 +13,38 @@ from config import * from models import db, Document, DocumentChunk, InvertedIndex, QueryLog, IndexStats from services import DocumentIndexer, SearchEngine, RAGGenerator +# ==================== 配置文件路径 ==================== +CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config.py') +USER_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'user_config.json') + + +def load_user_config(): + """加载用户配置""" + if os.path.exists(USER_CONFIG_FILE): + with open(USER_CONFIG_FILE, 'r', encoding='utf-8') as f: + return json.load(f) + return {} + + +def save_user_config(config_type, config_data): + """保存用户配置""" + user_config = load_user_config() + user_config[config_type] = config_data + with open(USER_CONFIG_FILE, 'w', encoding='utf-8') as f: + json.dump(user_config, f, ensure_ascii=False, indent=2) + + +def get_effective_config(): + """获取有效配置(用户配置覆盖默认配置)""" + user_config = load_user_config() + + return { + 'llm': {**LLM_CONFIG, **user_config.get('llm', {})}, + 'index': {**INDEX_CONFIG, **user_config.get('index', {})}, + 'doc': {**DOC_CONFIG, **user_config.get('doc', {})} + } + + # ==================== 创建应用 ==================== app = Flask(__name__) app.config['SECRET_KEY'] = SECRET_KEY @@ -66,6 +98,13 @@ def search_page(): return render_template('search.html') +@app.route('/settings') +def settings_page(): + """设置页""" + config = get_effective_config() + return render_template('settings.html', config=config) + + # ==================== API路由 ==================== # === 文档管理 === @@ -315,6 +354,70 @@ def api_log_feedback(log_id): return jsonify({'success': True}) +# === 配置管理 === + +@app.route('/api/config', methods=['GET']) +def api_get_config(): + """获取当前配置""" + return jsonify(get_effective_config()) + + +@app.route('/api/config/llm', methods=['POST']) +def api_save_llm_config(): + """保存LLM配置""" + data = request.json + save_user_config('llm', data) + return jsonify({'success': True}) + + +@app.route('/api/config/index', methods=['POST']) +def api_save_index_config(): + """保存索引配置""" + data = request.json + save_user_config('index', data) + return jsonify({'success': True}) + + +@app.route('/api/config/doc', methods=['POST']) +def api_save_doc_config(): + """保存文档配置""" + data = request.json + save_user_config('doc', data) + return jsonify({'success': True}) + + +@app.route('/api/config/test', methods=['POST']) +def api_test_config(): + """测试LLM连接""" + config = get_effective_config() + llm_config = config['llm'] + + try: + from openai import OpenAI + client = OpenAI( + api_key=llm_config['api_key'], + base_url=llm_config['api_base'], + ) + + # 发送简单测试请求 + response = client.chat.completions.create( + model=llm_config['model'], + messages=[{"role": "user", "content": "Hello"}], + max_tokens=10 + ) + + return jsonify({ + 'success': True, + 'model': llm_config['model'], + 'response': response.choices[0].message.content + }) + except Exception as e: + return jsonify({ + 'success': False, + 'error': str(e) + }) + + # ==================== 启动 ==================== if __name__ == '__main__': init_app() diff --git a/services.py b/services.py index dbbc039..159e4e6 100644 --- a/services.py +++ b/services.py @@ -16,17 +16,53 @@ from config import LLM_CONFIG, DOC_CONFIG, INDEX_CONFIG from models import db, Document, DocumentChunk, InvertedIndex, IndexStats +def get_llm_config(): + """获取有效的LLM配置(支持动态更新)""" + user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json') + if os.path.exists(user_config_file): + with open(user_config_file, 'r', encoding='utf-8') as f: + user_config = json.load(f) + return {**LLM_CONFIG, **user_config.get('llm', {})} + return LLM_CONFIG + + +def get_doc_config(): + """获取有效的文档配置""" + user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json') + if os.path.exists(user_config_file): + with open(user_config_file, 'r', encoding='utf-8') as f: + user_config = json.load(f) + return {**DOC_CONFIG, **user_config.get('doc', {})} + return DOC_CONFIG + + +def get_index_config(): + """获取有效的索引配置""" + user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json') + if os.path.exists(user_config_file): + with open(user_config_file, 'r', encoding='utf-8') as f: + user_config = json.load(f) + return {**INDEX_CONFIG, **user_config.get('index', {})} + return INDEX_CONFIG + + class LLMService: """LLM服务封装""" def __init__(self): - self.client = OpenAI( - api_key=LLM_CONFIG['api_key'], - base_url=LLM_CONFIG['api_base'], + pass # 不再在初始化时设置配置 + + def _get_client(self): + """获取LLM客户端""" + config = get_llm_config() + return OpenAI( + api_key=config['api_key'], + base_url=config['api_base'], ) - self.model = LLM_CONFIG['model'] - self.max_tokens = LLM_CONFIG['max_tokens'] - self.temperature = LLM_CONFIG['temperature'] + + def _get_config(self): + """获取当前配置""" + return get_llm_config() def analyze_document(self, content, title=None): """ @@ -60,8 +96,10 @@ class LLMService: 只返回JSON,不要其他内容。""" try: - response = self.client.chat.completions.create( - model=self.model, + config = self._get_config() + client = self._get_client() + response = client.chat.completions.create( + model=config['model'], messages=[{"role": "user", "content": prompt}], max_tokens=1000, temperature=0.3, @@ -106,8 +144,10 @@ class LLMService: 只返回JSON。""" try: - response = self.client.chat.completions.create( - model=self.model, + config = self._get_config() + client = self._get_client() + response = client.chat.completions.create( + model=config['model'], messages=[{"role": "user", "content": prompt}], max_tokens=500, temperature=0.3, @@ -150,8 +190,10 @@ class LLMService: 只返回JSON。""" try: - response = self.client.chat.completions.create( - model=self.model, + config = self._get_config() + client = self._get_client() + response = client.chat.completions.create( + model=config['model'], messages=[{"role": "user", "content": prompt}], max_tokens=500, temperature=0.3, @@ -177,8 +219,11 @@ class DocumentIndexer: def __init__(self): self.llm = LLMService() - self.chunk_size = DOC_CONFIG['chunk_size'] - self.chunk_overlap = DOC_CONFIG['chunk_overlap'] + + def _get_chunk_config(self): + """获取分块配置""" + config = get_doc_config() + return config['chunk_size'], config['chunk_overlap'] def index_document(self, doc_id): """ @@ -330,6 +375,7 @@ class DocumentIndexer: Returns: list: 内容块列表 """ + chunk_size, _ = self._get_chunk_config() chunks = [] # 按段落分割 @@ -337,7 +383,7 @@ class DocumentIndexer: current_chunk = "" for para in paragraphs: - if len(current_chunk) + len(para) < self.chunk_size: + if len(current_chunk) + len(para) < chunk_size: current_chunk += para + '\n\n' else: if current_chunk.strip(): @@ -347,7 +393,7 @@ class DocumentIndexer: if current_chunk.strip(): chunks.append(current_chunk.strip()) - return chunks if chunks else [content[:self.chunk_size]] + return chunks if chunks else [content[:chunk_size]] def _compute_term_freq(self, content): """计算词频""" @@ -424,8 +470,11 @@ class SearchEngine: def __init__(self): self.llm = LLMService() - self.k1 = INDEX_CONFIG['bm25_k1'] - self.b = INDEX_CONFIG['bm25_b'] + + def _get_bm25_params(self): + """获取BM25参数""" + config = get_index_config() + return config['bm25_k1'], config['bm25_b'] def search(self, query, top_k=10): """ @@ -542,6 +591,7 @@ class SearchEngine: continue # BM25计算 + k1, b = self._get_bm25_params() score = 0 doc_len = doc.word_count or 1000 @@ -557,8 +607,8 @@ class SearchEngine: tf = data['terms'].get(term, 0) # BM25公式 - tf_component = (tf * (self.k1 + 1)) / ( - tf + self.k1 * (1 - self.b + self.b * doc_len / avg_doc_len) + tf_component = (tf * (k1 + 1)) / ( + tf + k1 * (1 - b + b * doc_len / avg_doc_len) ) score += idf * tf_component @@ -653,13 +703,14 @@ class RAGGenerator: 请给出准确、简洁的回答,并标注信息来源。""" try: + llm_config = get_llm_config() client = OpenAI( - api_key=LLM_CONFIG['api_key'], - base_url=LLM_CONFIG['api_base'], + api_key=llm_config['api_key'], + base_url=llm_config['api_base'], ) response = client.chat.completions.create( - model=LLM_CONFIG['model'], + model=llm_config['model'], messages=[{"role": "user", "content": prompt}], max_tokens=1000, temperature=0.5, diff --git a/static/js/main.js b/static/js/main.js new file mode 100644 index 0000000..2e9efdb --- /dev/null +++ b/static/js/main.js @@ -0,0 +1,247 @@ +/** + * LLM Index RAG - 前端交互脚本 + */ + +// 搜索表单处理 +document.getElementById('searchForm')?.addEventListener('submit', async function(e) { + e.preventDefault(); + + const query = document.getElementById('queryInput').value.trim(); + const mode = document.querySelector('input[name="mode"]:checked').value; + + if (!query) { + alert('请输入查询内容'); + return; + } + + // 显示加载状态 + const resultsSection = document.getElementById('resultsSection'); + const ragSection = document.getElementById('ragSection'); + const resultsContainer = document.getElementById('resultsContainer'); + const ragAnswer = document.getElementById('ragAnswer'); + const ragSources = document.getElementById('ragSources'); + const resultCount = document.getElementById('resultCount'); + + resultsSection.style.display = 'none'; + ragSection.style.display = 'none'; + resultsContainer.innerHTML = '
正在检索...
${r.summary || r.content?.substring(0, 200) + '...' || ''}
+未找到相关结果
+请尝试其他关键词,或先上传并索引文档
+正在生成回答...
${s.content?.substring(0, 150) + '...' || ''}
+无参考来源
'; + } + } + } catch (err) { + resultsContainer.innerHTML = `