""" LLM Index RAG 主应用 """ import os import json from datetime import datetime from flask import Flask, request, jsonify, render_template, send_file from flask_sqlalchemy import SQLAlchemy from werkzeug.utils import secure_filename from config import * from models import db, Document, DocumentChunk, InvertedIndex, QueryLog, IndexStats from services import DocumentIndexer, SearchEngine, RAGGenerator # ==================== 配置文件路径 ==================== CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config.py') USER_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'user_config.json') def load_user_config(): """加载用户配置""" if os.path.exists(USER_CONFIG_FILE): with open(USER_CONFIG_FILE, 'r', encoding='utf-8') as f: return json.load(f) return {} def save_user_config(config_type, config_data): """保存用户配置""" user_config = load_user_config() user_config[config_type] = config_data with open(USER_CONFIG_FILE, 'w', encoding='utf-8') as f: json.dump(user_config, f, ensure_ascii=False, indent=2) def get_effective_config(): """获取有效配置(用户配置覆盖默认配置)""" user_config = load_user_config() return { 'llm': {**LLM_CONFIG, **user_config.get('llm', {})}, 'index': {**INDEX_CONFIG, **user_config.get('index', {})}, 'doc': {**DOC_CONFIG, **user_config.get('doc', {})} } # ==================== 创建应用 ==================== app = Flask(__name__) app.config['SECRET_KEY'] = SECRET_KEY app.config['SQLALCHEMY_DATABASE_URI'] = DATABASE_URL app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB # 初始化数据库 db.init_app(app) # 初始化服务 indexer = DocumentIndexer() search_engine = SearchEngine() rag_generator = RAGGenerator() # ==================== 初始化函数 ==================== def init_app(): """初始化应用""" # 创建目录 for dir_name in [DOCUMENT_DIR, INDEX_DIR, LOG_DIR]: if not os.path.exists(dir_name): os.makedirs(dir_name) # 创建数据库表 with app.app_context(): db.create_all() # 初始化统计 IndexStats.get_stats() # ==================== 页面路由 ==================== @app.route('/') def index(): """首页""" stats = IndexStats.get_stats() return render_template('index.html', stats=stats) @app.route('/documents') def documents(): """文档列表页""" page = request.args.get('page', 1, type=int) docs = Document.query.order_by(Document.created_at.desc()).paginate(page=page, per_page=20) return render_template('documents.html', docs=docs) @app.route('/search') def search_page(): """搜索页""" return render_template('search.html') @app.route('/settings') def settings_page(): """设置页""" config = get_effective_config() return render_template('settings.html', config=config) # ==================== API路由 ==================== # === 文档管理 === @app.route('/api/documents', methods=['GET']) def api_list_documents(): """获取文档列表""" page = request.args.get('page', 1, type=int) status = request.args.get('status', '') query = Document.query if status: query = query.filter_by(status=status) docs = query.order_by(Document.created_at.desc()).paginate(page=page, per_page=20) return jsonify({ 'documents': [d.to_dict() for d in docs.items], 'total': docs.total, 'pages': docs.pages, 'current_page': docs.page }) @app.route('/api/documents', methods=['POST']) def api_upload_document(): """上传文档""" if 'file' not in request.files: return jsonify({'error': '未上传文件'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'error': '未选择文件'}), 400 # 检查文件类型 ext = os.path.splitext(file.filename)[1].lower() if ext not in SUPPORTED_FORMATS: return jsonify({'error': f'不支持的文件格式: {ext}'}), 400 # 保存文件 filename = secure_filename(file.filename) filepath = os.path.join(DOCUMENT_DIR, filename) # 避免重名 if os.path.exists(filepath): base, ext = os.path.splitext(filename) filename = f"{base}_{datetime.now().strftime('%Y%m%d%H%M%S')}{ext}" filepath = os.path.join(DOCUMENT_DIR, filename) file.save(filepath) # 创建文档记录 doc = Document( filename=filename, filepath=filepath, file_type=ext, file_size=os.path.getsize(filepath), title=request.form.get('title', filename), source=request.form.get('source', ''), ) db.session.add(doc) db.session.commit() return jsonify({ 'success': True, 'document': doc.to_dict() }) @app.route('/api/documents/', methods=['GET']) def api_get_document(doc_id): """获取文档详情""" doc = Document.query.get_or_404(doc_id) chunks = DocumentChunk.query.filter_by(document_id=doc_id).all() return jsonify({ 'document': doc.to_dict(), 'chunks': [c.to_dict() for c in chunks] }) @app.route('/api/documents/', methods=['DELETE']) def api_delete_document(doc_id): """删除文档""" doc = Document.query.get_or_404(doc_id) # 删除文件 if os.path.exists(doc.filepath): os.remove(doc.filepath) # 删除数据库记录(级联删除chunks) db.session.delete(doc) db.session.commit() # 更新统计 IndexStats.get_stats().update_stats() return jsonify({'success': True}) # === 索引管理 === @app.route('/api/index/', methods=['POST']) def api_index_document(doc_id): """索引单个文档""" success = indexer.index_document(doc_id) if success: return jsonify({'success': True, 'message': '索引完成'}) else: return jsonify({'error': '索引失败'}), 500 @app.route('/api/index/batch', methods=['POST']) def api_batch_index(): """批量索引所有待索引文档""" pending_docs = Document.query.filter_by(status='pending').all() results = {'success': 0, 'failed': 0, 'total': len(pending_docs)} for doc in pending_docs: if indexer.index_document(doc.id): results['success'] += 1 else: results['failed'] += 1 return jsonify(results) @app.route('/api/index/rebuild', methods=['POST']) def api_rebuild_index(): """重建所有索引""" # 清除旧索引 InvertedIndex.query.delete() DocumentChunk.query.delete() docs = Document.query.all() for doc in docs: doc.status = 'pending' doc.indexed_at = None db.session.commit() # 批量索引 return api_batch_index() @app.route('/api/stats', methods=['GET']) def api_get_stats(): """获取索引统计""" stats = IndexStats.get_stats() return jsonify({ 'total_documents': stats.total_documents, 'total_chunks': stats.total_chunks, 'total_terms': stats.total_terms, 'total_words': stats.total_words, 'last_indexed_at': stats.last_indexed_at.isoformat() if stats.last_indexed_at else None }) # === 搜索 === @app.route('/api/search', methods=['POST']) def api_search(): """搜索文档""" data = request.json query = data.get('query', '') top_k = data.get('top_k', 10) if not query: return jsonify({'error': '查询不能为空'}), 400 results = search_engine.search(query, top_k) return jsonify({ 'query': query, 'results': results, 'total': len(results) }) @app.route('/api/search/suggestions', methods=['GET']) def api_search_suggestions(): """获取搜索建议(自动补全)""" prefix = request.args.get('prefix', '') if len(prefix) < 2: return jsonify({'suggestions': []}) # 从倒排索引中查找匹配的词 terms = InvertedIndex.query.filter( InvertedIndex.term.ilike(f'{prefix}%') ).order_by(InvertedIndex.total_freq.desc()).limit(10).all() return jsonify({ 'suggestions': [t.term for t in terms] }) # === RAG === @app.route('/api/rag/answer', methods=['POST']) def api_rag_answer(): """RAG问答""" data = request.json query = data.get('query', '') top_k = data.get('top_k', 5) if not query: return jsonify({'error': '查询不能为空'}), 400 result = rag_generator.answer(query, top_k) return jsonify(result) # === 查询日志 === @app.route('/api/logs', methods=['GET']) def api_get_logs(): """获取查询日志""" page = request.args.get('page', 1, type=int) logs = QueryLog.query.order_by(QueryLog.created_at.desc()).paginate(page=page, per_page=50) return jsonify({ 'logs': [l.to_dict() for l in logs.items], 'total': logs.total, 'pages': logs.pages }) @app.route('/api/logs//feedback', methods=['POST']) def api_log_feedback(log_id): """提交查询反馈""" log = QueryLog.query.get_or_404(log_id) data = request.json log.rating = data.get('rating') log.feedback = data.get('feedback') db.session.commit() return jsonify({'success': True}) # === 配置管理 === @app.route('/api/config', methods=['GET']) def api_get_config(): """获取当前配置""" return jsonify(get_effective_config()) @app.route('/api/config/llm', methods=['POST']) def api_save_llm_config(): """保存LLM配置""" data = request.json save_user_config('llm', data) return jsonify({'success': True}) @app.route('/api/config/index', methods=['POST']) def api_save_index_config(): """保存索引配置""" data = request.json save_user_config('index', data) return jsonify({'success': True}) @app.route('/api/config/doc', methods=['POST']) def api_save_doc_config(): """保存文档配置""" data = request.json save_user_config('doc', data) return jsonify({'success': True}) @app.route('/api/config/test', methods=['POST']) def api_test_config(): """测试LLM连接""" config = get_effective_config() llm_config = config['llm'] try: from openai import OpenAI client = OpenAI( api_key=llm_config['api_key'], base_url=llm_config['api_base'], ) # 发送简单测试请求 response = client.chat.completions.create( model=llm_config['model'], messages=[{"role": "user", "content": "Hello"}], max_tokens=10 ) return jsonify({ 'success': True, 'model': llm_config['model'], 'response': response.choices[0].message.content }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }) # ==================== 启动 ==================== if __name__ == '__main__': init_app() app.run(host=API_HOST, port=API_PORT, debug=True)