新增: - 系统设置页面 (/settings) - 支持动态配置LLM、索引、文档处理参数 - 配置API - 保存配置、测试LLM连接 - 前端JS交互文件 - 搜索、文档管理功能 修复: - 首页搜索框无法正常工作的问题(缺少main.js) - 服务支持动态读取配置(无需重启生效) 改进: - LLM/索引/文档配置支持热更新 - 添加测试LLM连接功能
424 lines
11 KiB
Python
424 lines
11 KiB
Python
"""
|
||
LLM Index RAG 主应用
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
from datetime import datetime
|
||
from flask import Flask, request, jsonify, render_template, send_file
|
||
from flask_sqlalchemy import SQLAlchemy
|
||
from werkzeug.utils import secure_filename
|
||
|
||
from config import *
|
||
from models import db, Document, DocumentChunk, InvertedIndex, QueryLog, IndexStats
|
||
from services import DocumentIndexer, SearchEngine, RAGGenerator
|
||
|
||
# ==================== 配置文件路径 ====================
|
||
CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config.py')
|
||
USER_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'user_config.json')
|
||
|
||
|
||
def load_user_config():
|
||
"""加载用户配置"""
|
||
if os.path.exists(USER_CONFIG_FILE):
|
||
with open(USER_CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
return {}
|
||
|
||
|
||
def save_user_config(config_type, config_data):
|
||
"""保存用户配置"""
|
||
user_config = load_user_config()
|
||
user_config[config_type] = config_data
|
||
with open(USER_CONFIG_FILE, 'w', encoding='utf-8') as f:
|
||
json.dump(user_config, f, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def get_effective_config():
|
||
"""获取有效配置(用户配置覆盖默认配置)"""
|
||
user_config = load_user_config()
|
||
|
||
return {
|
||
'llm': {**LLM_CONFIG, **user_config.get('llm', {})},
|
||
'index': {**INDEX_CONFIG, **user_config.get('index', {})},
|
||
'doc': {**DOC_CONFIG, **user_config.get('doc', {})}
|
||
}
|
||
|
||
|
||
# ==================== 创建应用 ====================
|
||
app = Flask(__name__)
|
||
app.config['SECRET_KEY'] = SECRET_KEY
|
||
app.config['SQLALCHEMY_DATABASE_URI'] = DATABASE_URL
|
||
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
|
||
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB
|
||
|
||
# 初始化数据库
|
||
db.init_app(app)
|
||
|
||
# 初始化服务
|
||
indexer = DocumentIndexer()
|
||
search_engine = SearchEngine()
|
||
rag_generator = RAGGenerator()
|
||
|
||
|
||
# ==================== 初始化函数 ====================
|
||
def init_app():
|
||
"""初始化应用"""
|
||
# 创建目录
|
||
for dir_name in [DOCUMENT_DIR, INDEX_DIR, LOG_DIR]:
|
||
if not os.path.exists(dir_name):
|
||
os.makedirs(dir_name)
|
||
|
||
# 创建数据库表
|
||
with app.app_context():
|
||
db.create_all()
|
||
# 初始化统计
|
||
IndexStats.get_stats()
|
||
|
||
|
||
# ==================== 页面路由 ====================
|
||
@app.route('/')
|
||
def index():
|
||
"""首页"""
|
||
stats = IndexStats.get_stats()
|
||
return render_template('index.html', stats=stats)
|
||
|
||
|
||
@app.route('/documents')
|
||
def documents():
|
||
"""文档列表页"""
|
||
page = request.args.get('page', 1, type=int)
|
||
docs = Document.query.order_by(Document.created_at.desc()).paginate(page=page, per_page=20)
|
||
return render_template('documents.html', docs=docs)
|
||
|
||
|
||
@app.route('/search')
|
||
def search_page():
|
||
"""搜索页"""
|
||
return render_template('search.html')
|
||
|
||
|
||
@app.route('/settings')
|
||
def settings_page():
|
||
"""设置页"""
|
||
config = get_effective_config()
|
||
return render_template('settings.html', config=config)
|
||
|
||
|
||
# ==================== API路由 ====================
|
||
|
||
# === 文档管理 ===
|
||
|
||
@app.route('/api/documents', methods=['GET'])
|
||
def api_list_documents():
|
||
"""获取文档列表"""
|
||
page = request.args.get('page', 1, type=int)
|
||
status = request.args.get('status', '')
|
||
|
||
query = Document.query
|
||
|
||
if status:
|
||
query = query.filter_by(status=status)
|
||
|
||
docs = query.order_by(Document.created_at.desc()).paginate(page=page, per_page=20)
|
||
|
||
return jsonify({
|
||
'documents': [d.to_dict() for d in docs.items],
|
||
'total': docs.total,
|
||
'pages': docs.pages,
|
||
'current_page': docs.page
|
||
})
|
||
|
||
|
||
@app.route('/api/documents', methods=['POST'])
|
||
def api_upload_document():
|
||
"""上传文档"""
|
||
if 'file' not in request.files:
|
||
return jsonify({'error': '未上传文件'}), 400
|
||
|
||
file = request.files['file']
|
||
if file.filename == '':
|
||
return jsonify({'error': '未选择文件'}), 400
|
||
|
||
# 检查文件类型
|
||
ext = os.path.splitext(file.filename)[1].lower()
|
||
if ext not in SUPPORTED_FORMATS:
|
||
return jsonify({'error': f'不支持的文件格式: {ext}'}), 400
|
||
|
||
# 保存文件
|
||
filename = secure_filename(file.filename)
|
||
filepath = os.path.join(DOCUMENT_DIR, filename)
|
||
|
||
# 避免重名
|
||
if os.path.exists(filepath):
|
||
base, ext = os.path.splitext(filename)
|
||
filename = f"{base}_{datetime.now().strftime('%Y%m%d%H%M%S')}{ext}"
|
||
filepath = os.path.join(DOCUMENT_DIR, filename)
|
||
|
||
file.save(filepath)
|
||
|
||
# 创建文档记录
|
||
doc = Document(
|
||
filename=filename,
|
||
filepath=filepath,
|
||
file_type=ext,
|
||
file_size=os.path.getsize(filepath),
|
||
title=request.form.get('title', filename),
|
||
source=request.form.get('source', ''),
|
||
)
|
||
db.session.add(doc)
|
||
db.session.commit()
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'document': doc.to_dict()
|
||
})
|
||
|
||
|
||
@app.route('/api/documents/<int:doc_id>', methods=['GET'])
|
||
def api_get_document(doc_id):
|
||
"""获取文档详情"""
|
||
doc = Document.query.get_or_404(doc_id)
|
||
|
||
chunks = DocumentChunk.query.filter_by(document_id=doc_id).all()
|
||
|
||
return jsonify({
|
||
'document': doc.to_dict(),
|
||
'chunks': [c.to_dict() for c in chunks]
|
||
})
|
||
|
||
|
||
@app.route('/api/documents/<int:doc_id>', methods=['DELETE'])
|
||
def api_delete_document(doc_id):
|
||
"""删除文档"""
|
||
doc = Document.query.get_or_404(doc_id)
|
||
|
||
# 删除文件
|
||
if os.path.exists(doc.filepath):
|
||
os.remove(doc.filepath)
|
||
|
||
# 删除数据库记录(级联删除chunks)
|
||
db.session.delete(doc)
|
||
db.session.commit()
|
||
|
||
# 更新统计
|
||
IndexStats.get_stats().update_stats()
|
||
|
||
return jsonify({'success': True})
|
||
|
||
|
||
# === 索引管理 ===
|
||
|
||
@app.route('/api/index/<int:doc_id>', methods=['POST'])
|
||
def api_index_document(doc_id):
|
||
"""索引单个文档"""
|
||
success = indexer.index_document(doc_id)
|
||
|
||
if success:
|
||
return jsonify({'success': True, 'message': '索引完成'})
|
||
else:
|
||
return jsonify({'error': '索引失败'}), 500
|
||
|
||
|
||
@app.route('/api/index/batch', methods=['POST'])
|
||
def api_batch_index():
|
||
"""批量索引所有待索引文档"""
|
||
pending_docs = Document.query.filter_by(status='pending').all()
|
||
|
||
results = {'success': 0, 'failed': 0, 'total': len(pending_docs)}
|
||
|
||
for doc in pending_docs:
|
||
if indexer.index_document(doc.id):
|
||
results['success'] += 1
|
||
else:
|
||
results['failed'] += 1
|
||
|
||
return jsonify(results)
|
||
|
||
|
||
@app.route('/api/index/rebuild', methods=['POST'])
|
||
def api_rebuild_index():
|
||
"""重建所有索引"""
|
||
# 清除旧索引
|
||
InvertedIndex.query.delete()
|
||
DocumentChunk.query.delete()
|
||
|
||
docs = Document.query.all()
|
||
for doc in docs:
|
||
doc.status = 'pending'
|
||
doc.indexed_at = None
|
||
|
||
db.session.commit()
|
||
|
||
# 批量索引
|
||
return api_batch_index()
|
||
|
||
|
||
@app.route('/api/stats', methods=['GET'])
|
||
def api_get_stats():
|
||
"""获取索引统计"""
|
||
stats = IndexStats.get_stats()
|
||
|
||
return jsonify({
|
||
'total_documents': stats.total_documents,
|
||
'total_chunks': stats.total_chunks,
|
||
'total_terms': stats.total_terms,
|
||
'total_words': stats.total_words,
|
||
'last_indexed_at': stats.last_indexed_at.isoformat() if stats.last_indexed_at else None
|
||
})
|
||
|
||
|
||
# === 搜索 ===
|
||
|
||
@app.route('/api/search', methods=['POST'])
|
||
def api_search():
|
||
"""搜索文档"""
|
||
data = request.json
|
||
query = data.get('query', '')
|
||
top_k = data.get('top_k', 10)
|
||
|
||
if not query:
|
||
return jsonify({'error': '查询不能为空'}), 400
|
||
|
||
results = search_engine.search(query, top_k)
|
||
|
||
return jsonify({
|
||
'query': query,
|
||
'results': results,
|
||
'total': len(results)
|
||
})
|
||
|
||
|
||
@app.route('/api/search/suggestions', methods=['GET'])
|
||
def api_search_suggestions():
|
||
"""获取搜索建议(自动补全)"""
|
||
prefix = request.args.get('prefix', '')
|
||
|
||
if len(prefix) < 2:
|
||
return jsonify({'suggestions': []})
|
||
|
||
# 从倒排索引中查找匹配的词
|
||
terms = InvertedIndex.query.filter(
|
||
InvertedIndex.term.ilike(f'{prefix}%')
|
||
).order_by(InvertedIndex.total_freq.desc()).limit(10).all()
|
||
|
||
return jsonify({
|
||
'suggestions': [t.term for t in terms]
|
||
})
|
||
|
||
|
||
# === RAG ===
|
||
|
||
@app.route('/api/rag/answer', methods=['POST'])
|
||
def api_rag_answer():
|
||
"""RAG问答"""
|
||
data = request.json
|
||
query = data.get('query', '')
|
||
top_k = data.get('top_k', 5)
|
||
|
||
if not query:
|
||
return jsonify({'error': '查询不能为空'}), 400
|
||
|
||
result = rag_generator.answer(query, top_k)
|
||
|
||
return jsonify(result)
|
||
|
||
|
||
# === 查询日志 ===
|
||
|
||
@app.route('/api/logs', methods=['GET'])
|
||
def api_get_logs():
|
||
"""获取查询日志"""
|
||
page = request.args.get('page', 1, type=int)
|
||
|
||
logs = QueryLog.query.order_by(QueryLog.created_at.desc()).paginate(page=page, per_page=50)
|
||
|
||
return jsonify({
|
||
'logs': [l.to_dict() for l in logs.items],
|
||
'total': logs.total,
|
||
'pages': logs.pages
|
||
})
|
||
|
||
|
||
@app.route('/api/logs/<int:log_id>/feedback', methods=['POST'])
|
||
def api_log_feedback(log_id):
|
||
"""提交查询反馈"""
|
||
log = QueryLog.query.get_or_404(log_id)
|
||
|
||
data = request.json
|
||
log.rating = data.get('rating')
|
||
log.feedback = data.get('feedback')
|
||
|
||
db.session.commit()
|
||
|
||
return jsonify({'success': True})
|
||
|
||
|
||
# === 配置管理 ===
|
||
|
||
@app.route('/api/config', methods=['GET'])
|
||
def api_get_config():
|
||
"""获取当前配置"""
|
||
return jsonify(get_effective_config())
|
||
|
||
|
||
@app.route('/api/config/llm', methods=['POST'])
|
||
def api_save_llm_config():
|
||
"""保存LLM配置"""
|
||
data = request.json
|
||
save_user_config('llm', data)
|
||
return jsonify({'success': True})
|
||
|
||
|
||
@app.route('/api/config/index', methods=['POST'])
|
||
def api_save_index_config():
|
||
"""保存索引配置"""
|
||
data = request.json
|
||
save_user_config('index', data)
|
||
return jsonify({'success': True})
|
||
|
||
|
||
@app.route('/api/config/doc', methods=['POST'])
|
||
def api_save_doc_config():
|
||
"""保存文档配置"""
|
||
data = request.json
|
||
save_user_config('doc', data)
|
||
return jsonify({'success': True})
|
||
|
||
|
||
@app.route('/api/config/test', methods=['POST'])
|
||
def api_test_config():
|
||
"""测试LLM连接"""
|
||
config = get_effective_config()
|
||
llm_config = config['llm']
|
||
|
||
try:
|
||
from openai import OpenAI
|
||
client = OpenAI(
|
||
api_key=llm_config['api_key'],
|
||
base_url=llm_config['api_base'],
|
||
)
|
||
|
||
# 发送简单测试请求
|
||
response = client.chat.completions.create(
|
||
model=llm_config['model'],
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
max_tokens=10
|
||
)
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'model': llm_config['model'],
|
||
'response': response.choices[0].message.content
|
||
})
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'error': str(e)
|
||
})
|
||
|
||
|
||
# ==================== 启动 ====================
|
||
if __name__ == '__main__':
|
||
init_app()
|
||
app.run(host=API_HOST, port=API_PORT, debug=True) |