5 Commits
v1.0.0 ... main

Author SHA1 Message Date
ab6469b9dd feat: 添加索引模式开关 - 支持简单索引模式(不使用LLM)
新增配置:
- USE_LLM_INDEX = False (默认关闭LLM增强索引)

简单索引模式:
- 不调用LLM,速度快
- 使用词频统计提取关键词
- 适合快速建立索引

LLM增强模式(USE_LLM_INDEX=True):
- 使用LLM分析文档,提取摘要、关键词、实体
- 索引质量更高,但速度慢、需要LLM服务
2026-04-09 17:28:37 +08:00
bdbfa2a176 fix: 修复文档读取编码问题 - 支持GBK/GB2312等中文编码
问题: 上传的GBK编码文档显示乱码
解决: 自动检测并尝试多种编码(utf-8, gbk, gb2312, gb18030, big5等)
2026-04-09 17:13:02 +08:00
8baecc520a fix: 修复文档列表显示问题和添加文档详情页面
修复:
- Document.to_dict() 添加 file_type 和 file_size 字段
- 文档列表正确显示类型和大小

新增:
- /documents/<id> 文档详情页面
- document_detail.html 模板(显示文档信息、分块内容、关键词等)
2026-04-09 17:02:17 +08:00
d54a105e55 fix: 修复搜索API报错 - 添加缺失的QueryLog导入 2026-04-09 16:52:54 +08:00
8c7a99d83f feat: 添加配置管理功能和修复搜索问题
新增:
- 系统设置页面 (/settings) - 支持动态配置LLM、索引、文档处理参数
- 配置API - 保存配置、测试LLM连接
- 前端JS交互文件 - 搜索、文档管理功能

修复:
- 首页搜索框无法正常工作的问题(缺少main.js)
- 服务支持动态读取配置(无需重启生效)

改进:
- LLM/索引/文档配置支持热更新
- 添加测试LLM连接功能
2026-04-09 12:54:31 +08:00
9 changed files with 1028 additions and 51 deletions

4
.gitignore vendored
View File

@@ -2,5 +2,7 @@ instance/
*.db
__pycache__/
*.pyc
.envdocuments/
.env
documents/
indexes/
user_config.json

113
app.py
View File

@@ -13,6 +13,38 @@ from config import *
from models import db, Document, DocumentChunk, InvertedIndex, QueryLog, IndexStats
from services import DocumentIndexer, SearchEngine, RAGGenerator
# ==================== 配置文件路径 ====================
CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config.py')
USER_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'user_config.json')
def load_user_config():
"""加载用户配置"""
if os.path.exists(USER_CONFIG_FILE):
with open(USER_CONFIG_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
return {}
def save_user_config(config_type, config_data):
"""保存用户配置"""
user_config = load_user_config()
user_config[config_type] = config_data
with open(USER_CONFIG_FILE, 'w', encoding='utf-8') as f:
json.dump(user_config, f, ensure_ascii=False, indent=2)
def get_effective_config():
"""获取有效配置(用户配置覆盖默认配置)"""
user_config = load_user_config()
return {
'llm': {**LLM_CONFIG, **user_config.get('llm', {})},
'index': {**INDEX_CONFIG, **user_config.get('index', {})},
'doc': {**DOC_CONFIG, **user_config.get('doc', {})}
}
# ==================== 创建应用 ====================
app = Flask(__name__)
app.config['SECRET_KEY'] = SECRET_KEY
@@ -66,6 +98,13 @@ def search_page():
return render_template('search.html')
@app.route('/settings')
def settings_page():
"""设置页"""
config = get_effective_config()
return render_template('settings.html', config=config)
# ==================== API路由 ====================
# === 文档管理 ===
@@ -138,7 +177,7 @@ def api_upload_document():
@app.route('/api/documents/<int:doc_id>', methods=['GET'])
def api_get_document(doc_id):
"""获取文档详情"""
"""获取文档详情API"""
doc = Document.query.get_or_404(doc_id)
chunks = DocumentChunk.query.filter_by(document_id=doc_id).all()
@@ -149,6 +188,14 @@ def api_get_document(doc_id):
})
@app.route('/documents/<int:doc_id>')
def document_detail_page(doc_id):
"""文档详情页面"""
doc = Document.query.get_or_404(doc_id)
chunks = DocumentChunk.query.filter_by(document_id=doc_id).order_by(DocumentChunk.chunk_index).all()
return render_template('document_detail.html', doc=doc, chunks=chunks)
@app.route('/api/documents/<int:doc_id>', methods=['DELETE'])
def api_delete_document(doc_id):
"""删除文档"""
@@ -315,6 +362,70 @@ def api_log_feedback(log_id):
return jsonify({'success': True})
# === 配置管理 ===
@app.route('/api/config', methods=['GET'])
def api_get_config():
"""获取当前配置"""
return jsonify(get_effective_config())
@app.route('/api/config/llm', methods=['POST'])
def api_save_llm_config():
"""保存LLM配置"""
data = request.json
save_user_config('llm', data)
return jsonify({'success': True})
@app.route('/api/config/index', methods=['POST'])
def api_save_index_config():
"""保存索引配置"""
data = request.json
save_user_config('index', data)
return jsonify({'success': True})
@app.route('/api/config/doc', methods=['POST'])
def api_save_doc_config():
"""保存文档配置"""
data = request.json
save_user_config('doc', data)
return jsonify({'success': True})
@app.route('/api/config/test', methods=['POST'])
def api_test_config():
"""测试LLM连接"""
config = get_effective_config()
llm_config = config['llm']
try:
from openai import OpenAI
client = OpenAI(
api_key=llm_config['api_key'],
base_url=llm_config['api_base'],
)
# 发送简单测试请求
response = client.chat.completions.create(
model=llm_config['model'],
messages=[{"role": "user", "content": "Hello"}],
max_tokens=10
)
return jsonify({
'success': True,
'model': llm_config['model'],
'response': response.choices[0].message.content
})
except Exception as e:
return jsonify({
'success': False,
'error': str(e)
})
# ==================== 启动 ====================
if __name__ == '__main__':
init_app()

View File

@@ -33,6 +33,12 @@ DOC_CONFIG = {
"max_summary_length": 500, # 摘要最大长度
}
# ==================== 索引模式 ====================
# use_llm_index: 是否使用LLM增强索引
# True - 使用LLM分析文档提取关键词、摘要等需要LLM服务速度慢
# False - 使用简单分词和词频统计速度快无需LLM
USE_LLM_INDEX = False # 默认关闭LLM索引
# ==================== 索引配置 ====================
INDEX_CONFIG = {
# BM25参数

View File

@@ -92,6 +92,8 @@ class Document(db.Model):
return {
'id': self.id,
'filename': self.filename,
'file_type': self.file_type,
'file_size': self.file_size,
'title': self.title,
'status': self.status,
'summary': self.summary,

View File

@@ -12,21 +12,57 @@ from collections import Counter
from openai import OpenAI
from flask import current_app
from config import LLM_CONFIG, DOC_CONFIG, INDEX_CONFIG
from models import db, Document, DocumentChunk, InvertedIndex, IndexStats
from config import LLM_CONFIG, DOC_CONFIG, INDEX_CONFIG, USE_LLM_INDEX
from models import db, Document, DocumentChunk, InvertedIndex, IndexStats, QueryLog
def get_llm_config():
"""获取有效的LLM配置支持动态更新"""
user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json')
if os.path.exists(user_config_file):
with open(user_config_file, 'r', encoding='utf-8') as f:
user_config = json.load(f)
return {**LLM_CONFIG, **user_config.get('llm', {})}
return LLM_CONFIG
def get_doc_config():
"""获取有效的文档配置"""
user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json')
if os.path.exists(user_config_file):
with open(user_config_file, 'r', encoding='utf-8') as f:
user_config = json.load(f)
return {**DOC_CONFIG, **user_config.get('doc', {})}
return DOC_CONFIG
def get_index_config():
"""获取有效的索引配置"""
user_config_file = os.path.join(os.path.dirname(__file__), 'user_config.json')
if os.path.exists(user_config_file):
with open(user_config_file, 'r', encoding='utf-8') as f:
user_config = json.load(f)
return {**INDEX_CONFIG, **user_config.get('index', {})}
return INDEX_CONFIG
class LLMService:
"""LLM服务封装"""
def __init__(self):
self.client = OpenAI(
api_key=LLM_CONFIG['api_key'],
base_url=LLM_CONFIG['api_base'],
pass # 不再在初始化时设置配置
def _get_client(self):
"""获取LLM客户端"""
config = get_llm_config()
return OpenAI(
api_key=config['api_key'],
base_url=config['api_base'],
)
self.model = LLM_CONFIG['model']
self.max_tokens = LLM_CONFIG['max_tokens']
self.temperature = LLM_CONFIG['temperature']
def _get_config(self):
"""获取当前配置"""
return get_llm_config()
def analyze_document(self, content, title=None):
"""
@@ -60,8 +96,10 @@ class LLMService:
只返回JSON不要其他内容。"""
try:
response = self.client.chat.completions.create(
model=self.model,
config = self._get_config()
client = self._get_client()
response = client.chat.completions.create(
model=config['model'],
messages=[{"role": "user", "content": prompt}],
max_tokens=1000,
temperature=0.3,
@@ -106,8 +144,10 @@ class LLMService:
只返回JSON。"""
try:
response = self.client.chat.completions.create(
model=self.model,
config = self._get_config()
client = self._get_client()
response = client.chat.completions.create(
model=config['model'],
messages=[{"role": "user", "content": prompt}],
max_tokens=500,
temperature=0.3,
@@ -150,8 +190,10 @@ class LLMService:
只返回JSON。"""
try:
response = self.client.chat.completions.create(
model=self.model,
config = self._get_config()
client = self._get_client()
response = client.chat.completions.create(
model=config['model'],
messages=[{"role": "user", "content": prompt}],
max_tokens=500,
temperature=0.3,
@@ -177,8 +219,11 @@ class DocumentIndexer:
def __init__(self):
self.llm = LLMService()
self.chunk_size = DOC_CONFIG['chunk_size']
self.chunk_overlap = DOC_CONFIG['chunk_overlap']
def _get_chunk_config(self):
"""获取分块配置"""
config = get_doc_config()
return config['chunk_size'], config['chunk_overlap']
def index_document(self, doc_id):
"""
@@ -207,15 +252,25 @@ class DocumentIndexer:
doc.content = content
doc.word_count = len(content)
# 使用LLM分析整个文档
print(f" 正在分析文档: {doc.filename}")
analysis = self.llm.analyze_document(content, doc.title)
# 检查是否使用LLM增强索引
use_llm = USE_LLM_INDEX
doc.summary = analysis.get('summary', '')
doc.set_keywords(analysis.get('keywords', []))
doc.set_topics(analysis.get('topics', []))
doc.category = analysis.get('category', '')
doc.set_entities(analysis.get('entities', {}))
if use_llm:
# 使用LLM分析整个文档
print(f" 正在使用LLM分析文档: {doc.filename}")
analysis = self.llm.analyze_document(content, doc.title)
doc.summary = analysis.get('summary', '')
doc.set_keywords(analysis.get('keywords', []))
doc.set_topics(analysis.get('topics', []))
doc.category = analysis.get('category', '')
doc.set_entities(analysis.get('entities', {}))
else:
# 简单模式不使用LLM
print(f" 正在索引文档(简单模式): {doc.filename}")
# 从内容中提取简单关键词
simple_keywords = self._extract_simple_keywords(content)
doc.set_keywords(simple_keywords[:20])
# 分块处理
chunks = self._split_content(content)
@@ -234,10 +289,14 @@ class DocumentIndexer:
end_char=len(chunk_content)
)
# LLM分析分块
chunk_analysis = self.llm.analyze_chunk(chunk_content)
chunk.summary = chunk_analysis.get('summary', '')
chunk.set_keywords(chunk_analysis.get('keywords', []))
if use_llm:
# LLM分析分块
chunk_analysis = self.llm.analyze_chunk(chunk_content)
chunk.summary = chunk_analysis.get('summary', '')
chunk.set_keywords(chunk_analysis.get('keywords', []))
else:
# 简单模式:从分块提取关键词
chunk.set_keywords(self._extract_simple_keywords(chunk_content)[:10])
# 计算词频
term_freq = self._compute_term_freq(chunk_content)
@@ -268,23 +327,70 @@ class DocumentIndexer:
print(f" ✗ 索引失败: {e}")
return False
def _extract_simple_keywords(self, content):
"""简单提取关键词不使用LLM"""
import re
from collections import Counter
# 提取中文词组2-4字
chinese_words = re.findall(r'[\u4e00-\u9fff]{2,4}', content)
# 过滤常见无意义词
stopwords = {'', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '',
'一个', '一些', '这种', '那种', '什么', '怎么', '如何', '为什么'}
# 统计词频
word_freq = Counter(w for w in chinese_words if w not in stopwords)
# 返回高频词
return [w for w, _ in word_freq.most_common(30)]
def _read_document(self, filepath):
"""读取文档内容"""
ext = os.path.splitext(filepath)[1].lower()
# 尝试读取文本文件(包括没有扩展名的)
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
if content.strip(): # 如果能读取到内容
return content
except:
pass
# 尝试多种编码读取文本文件
encodings = ['utf-8', 'gbk', 'gb2312', 'gb18030', 'big5', 'utf-16', 'latin-1']
# 按扩展名处理特定格式
if ext in ['.txt', '.md', '.json', '.html']:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
def try_read_with_encoding(encodings_list):
for enc in encodings_list:
try:
with open(filepath, 'r', encoding=enc) as f:
content = f.read()
# 检查是否有有效的中文字符
if content.strip() and len(content) > 0:
# 简单验证:检查是否有乱码
# 如果内容看起来合理,返回它
return content, enc
except (UnicodeDecodeError, UnicodeError):
continue
return None, None
# 按扩展名处理
if ext in ['.txt', '.md', '.json', '.html', '']:
# 先尝试常见编码
content, used_enc = try_read_with_encoding(encodings)
if content:
print(f" 使用编码 {used_enc} 读取文件")
return content
# 如果都失败,尝试二进制读取后解码
try:
with open(filepath, 'rb') as f:
raw = f.read()
# 尝试chardet检测编码
try:
import chardet
detected = chardet.detect(raw)
if detected['encoding']:
return raw.decode(detected['encoding'])
except:
pass
# 最后尝试
return raw.decode('utf-8', errors='replace')
except:
return ""
elif ext == '.pdf':
try:
@@ -330,6 +436,7 @@ class DocumentIndexer:
Returns:
list: 内容块列表
"""
chunk_size, _ = self._get_chunk_config()
chunks = []
# 按段落分割
@@ -337,7 +444,7 @@ class DocumentIndexer:
current_chunk = ""
for para in paragraphs:
if len(current_chunk) + len(para) < self.chunk_size:
if len(current_chunk) + len(para) < chunk_size:
current_chunk += para + '\n\n'
else:
if current_chunk.strip():
@@ -347,7 +454,7 @@ class DocumentIndexer:
if current_chunk.strip():
chunks.append(current_chunk.strip())
return chunks if chunks else [content[:self.chunk_size]]
return chunks if chunks else [content[:chunk_size]]
def _compute_term_freq(self, content):
"""计算词频"""
@@ -424,8 +531,11 @@ class SearchEngine:
def __init__(self):
self.llm = LLMService()
self.k1 = INDEX_CONFIG['bm25_k1']
self.b = INDEX_CONFIG['bm25_b']
def _get_bm25_params(self):
"""获取BM25参数"""
config = get_index_config()
return config['bm25_k1'], config['bm25_b']
def search(self, query, top_k=10):
"""
@@ -542,6 +652,7 @@ class SearchEngine:
continue
# BM25计算
k1, b = self._get_bm25_params()
score = 0
doc_len = doc.word_count or 1000
@@ -557,8 +668,8 @@ class SearchEngine:
tf = data['terms'].get(term, 0)
# BM25公式
tf_component = (tf * (self.k1 + 1)) / (
tf + self.k1 * (1 - self.b + self.b * doc_len / avg_doc_len)
tf_component = (tf * (k1 + 1)) / (
tf + k1 * (1 - b + b * doc_len / avg_doc_len)
)
score += idf * tf_component
@@ -653,13 +764,14 @@ class RAGGenerator:
请给出准确、简洁的回答,并标注信息来源。"""
try:
llm_config = get_llm_config()
client = OpenAI(
api_key=LLM_CONFIG['api_key'],
base_url=LLM_CONFIG['api_base'],
api_key=llm_config['api_key'],
base_url=llm_config['api_base'],
)
response = client.chat.completions.create(
model=LLM_CONFIG['model'],
model=llm_config['model'],
messages=[{"role": "user", "content": prompt}],
max_tokens=1000,
temperature=0.5,

247
static/js/main.js Normal file
View File

@@ -0,0 +1,247 @@
/**
* LLM Index RAG - 前端交互脚本
*/
// 搜索表单处理
document.getElementById('searchForm')?.addEventListener('submit', async function(e) {
e.preventDefault();
const query = document.getElementById('queryInput').value.trim();
const mode = document.querySelector('input[name="mode"]:checked').value;
if (!query) {
alert('请输入查询内容');
return;
}
// 显示加载状态
const resultsSection = document.getElementById('resultsSection');
const ragSection = document.getElementById('ragSection');
const resultsContainer = document.getElementById('resultsContainer');
const ragAnswer = document.getElementById('ragAnswer');
const ragSources = document.getElementById('ragSources');
const resultCount = document.getElementById('resultCount');
resultsSection.style.display = 'none';
ragSection.style.display = 'none';
resultsContainer.innerHTML = '<div class="text-center py-4"><div class="spinner-border text-primary"></div><p class="mt-2">正在检索...</p></div>';
resultsSection.style.display = 'block';
try {
if (mode === 'search') {
// 文档检索模式
const response = await fetch('/api/search', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({query: query, top_k: 10})
});
const data = await response.json();
if (data.error) {
resultsContainer.innerHTML = `<div class="alert alert-danger">${data.error}</div>`;
return;
}
resultCount.textContent = data.total;
if (data.results && data.results.length > 0) {
resultsContainer.innerHTML = data.results.map(r => `
<div class="result-item">
<h6 class="mb-1">
<a href="/documents" class="text-decoration-none">${r.title || r.document_title || '文档'}</a>
</h6>
<p class="text-muted small mb-1">${r.summary || r.content?.substring(0, 200) + '...' || ''}</p>
<div class="d-flex gap-2 align-items-center">
<span class="source-tag">${r.source || '本地文档'}</span>
<span class="badge bg-primary">${(r.score * 100).toFixed(1)}%</span>
</div>
</div>
`).join('');
} else {
resultsContainer.innerHTML = `
<div class="text-center py-4 text-muted">
<i class="bi bi-search display-4"></i>
<p class="mt-2">未找到相关结果</p>
<p class="small">请尝试其他关键词,或先上传并索引文档</p>
</div>
`;
}
} else {
// RAG问答模式
resultsContainer.innerHTML = '<div class="text-center py-4"><div class="spinner-border text-primary"></div><p class="mt-2">正在生成回答...</p></div>';
const response = await fetch('/api/rag/answer', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({query: query, top_k: 5})
});
const data = await response.json();
if (data.error) {
resultsContainer.innerHTML = `<div class="alert alert-danger">${data.error}</div>`;
return;
}
resultsSection.style.display = 'none';
ragSection.style.display = 'block';
// 显示回答
ragAnswer.innerHTML = `
<div class="d-flex align-items-start gap-3">
<div class="bg-primary text-white rounded-circle p-2">
<i class="bi bi-robot"></i>
</div>
<div class="flex-grow-1">
<div class="markdown-content">${data.answer || '抱歉,无法生成回答。'}</div>
</div>
</div>
`;
// 显示来源
if (data.sources && data.sources.length > 0) {
ragSources.innerHTML = data.sources.map(s => `
<div class="result-item">
<h6 class="mb-1">${s.title || s.document_title || '参考文档'}</h6>
<p class="text-muted small mb-0">${s.content?.substring(0, 150) + '...' || ''}</p>
</div>
`).join('');
} else {
ragSources.innerHTML = '<p class="text-muted">无参考来源</p>';
}
}
} catch (err) {
resultsContainer.innerHTML = `<div class="alert alert-danger">请求失败: ${err.message}</div>`;
}
});
// 文档上传
document.getElementById('uploadForm')?.addEventListener('submit', async function(e) {
e.preventDefault();
const formData = new FormData(this);
const fileInput = document.getElementById('fileInput');
if (!fileInput.files.length) {
alert('请选择文件');
return;
}
const uploadBtn = document.getElementById('uploadBtn');
uploadBtn.disabled = true;
uploadBtn.innerHTML = '<span class="spinner-border spinner-border-sm"></span> 上传中...';
try {
const response = await fetch('/api/documents', {
method: 'POST',
body: formData
});
const data = await response.json();
if (data.success) {
alert('上传成功!');
location.reload();
} else {
alert('上传失败: ' + (data.error || '未知错误'));
}
} catch (err) {
alert('上传失败: ' + err.message);
} finally {
uploadBtn.disabled = false;
uploadBtn.innerHTML = '<i class="bi bi-upload"></i> 上传';
}
});
// 索引文档
async function indexDocument(docId) {
if (!confirm('确定要索引此文档吗?这可能需要一些时间。')) return;
try {
const response = await fetch(`/api/index/${docId}`, {method: 'POST'});
const data = await response.json();
if (data.success) {
alert('索引完成!');
location.reload();
} else {
alert('索引失败: ' + (data.error || '未知错误'));
}
} catch (err) {
alert('索引失败: ' + err.message);
}
}
// 删除文档
async function deleteDocument(docId) {
if (!confirm('确定要删除此文档吗?此操作不可恢复。')) return;
try {
const response = await fetch(`/api/documents/${docId}`, {method: 'DELETE'});
const data = await response.json();
if (data.success) {
alert('删除成功!');
location.reload();
} else {
alert('删除失败: ' + (data.error || '未知错误'));
}
} catch (err) {
alert('删除失败: ' + err.message);
}
}
// 批量索引
async function batchIndex() {
if (!confirm('确定要索引所有待处理文档吗?')) return;
try {
const response = await fetch('/api/index/batch', {method: 'POST'});
const data = await response.json();
alert(`索引完成!成功: ${data.success}, 失败: ${data.failed}`);
location.reload();
} catch (err) {
alert('批量索引失败: ' + err.message);
}
}
// 重建索引
async function rebuildIndex() {
if (!confirm('重建索引将清除所有现有索引,确定继续吗?')) return;
try {
const response = await fetch('/api/index/rebuild', {method: 'POST'});
const data = await response.json();
alert(`重建完成!成功: ${data.success}, 失败: ${data.failed}`);
location.reload();
} catch (err) {
alert('重建索引失败: ' + err.message);
}
}
// 加载统计信息
async function loadStats() {
try {
const response = await fetch('/api/stats');
const stats = await response.json();
document.getElementById('statDocs').textContent = stats.total_documents || 0;
document.getElementById('statChunks').textContent = stats.total_chunks || 0;
document.getElementById('statTerms').textContent = stats.total_terms || 0;
document.getElementById('statWords').textContent = (stats.total_words || 0).toLocaleString();
} catch (err) {
console.error('加载统计失败:', err);
}
}
// 页面加载时刷新统计
document.addEventListener('DOMContentLoaded', function() {
// 如果在首页,定时刷新统计
if (document.getElementById('statDocs')) {
loadStats();
setInterval(loadStats, 30000); // 每30秒刷新
}
});

View File

@@ -0,0 +1,212 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ doc.title or doc.filename }} - 文档详情</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.0/font/bootstrap-icons.css" rel="stylesheet">
<style>
body { background-color: #f8f9fa; }
.chunk-card { border-left: 4px solid #667eea; }
</style>
</head>
<body>
<nav class="navbar navbar-expand-lg navbar-dark bg-dark">
<div class="container">
<a class="navbar-brand" href="/"><i class="bi bi-search"></i> LLM Index RAG</a>
<div class="navbar-nav ms-auto">
<a class="nav-link" href="/">首页</a>
<a class="nav-link" href="/documents">文档管理</a>
<a class="nav-link" href="/search">知识检索</a>
<a class="nav-link" href="/settings">系统设置</a>
</div>
</div>
</nav>
<div class="container py-4">
<nav aria-label="breadcrumb" class="mb-3">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/documents">文档管理</a></li>
<li class="breadcrumb-item active">{{ doc.title or doc.filename }}</li>
</ol>
</nav>
<!-- 文档信息 -->
<div class="card mb-4">
<div class="card-header d-flex justify-content-between align-items-center">
<h5 class="mb-0"><i class="bi bi-file-earmark-text"></i> 文档信息</h5>
<div>
{% if doc.status == 'indexed' %}
<span class="badge bg-success">已索引</span>
{% elif doc.status == 'processing' %}
<span class="badge bg-warning">处理中</span>
{% elif doc.status == 'failed' %}
<span class="badge bg-danger">失败</span>
{% else %}
<span class="badge bg-secondary">待索引</span>
{% endif %}
</div>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<table class="table table-sm">
<tr>
<th width="120">文件名</th>
<td>{{ doc.filename }}</td>
</tr>
<tr>
<th>标题</th>
<td>{{ doc.title or '-' }}</td>
</tr>
<tr>
<th>文件类型</th>
<td>{{ doc.file_type }}</td>
</tr>
<tr>
<th>文件大小</th>
<td>{{ (doc.file_size / 1024)|round(1) }} KB</td>
</tr>
</table>
</div>
<div class="col-md-6">
<table class="table table-sm">
<tr>
<th width="120">分块数</th>
<td>{{ doc.chunk_count }}</td>
</tr>
<tr>
<th>字数</th>
<td>{{ doc.word_count|default(0) }}</td>
</tr>
<tr>
<th>上传时间</th>
<td>{{ doc.created_at.strftime('%Y-%m-%d %H:%M') if doc.created_at else '-' }}</td>
</tr>
<tr>
<th>索引时间</th>
<td>{{ doc.indexed_at.strftime('%Y-%m-%d %H:%M') if doc.indexed_at else '-' }}</td>
</tr>
</table>
</div>
</div>
{% if doc.summary %}
<div class="mt-3">
<h6><i class="bi bi-card-text"></i> 文档摘要</h6>
<p class="text-muted">{{ doc.summary }}</p>
</div>
{% endif %}
{% if doc.keywords %}
<div class="mt-3">
<h6><i class="bi bi-tags"></i> 关键词</h6>
<div>
{% for kw in doc.get_keywords() %}
<span class="badge bg-primary me-1">{{ kw }}</span>
{% endfor %}
</div>
</div>
{% endif %}
{% if doc.category %}
<div class="mt-3">
<h6><i class="bi bi-folder"></i> 分类</h6>
<span class="badge bg-info">{{ doc.category }}</span>
</div>
{% endif %}
</div>
</div>
<!-- 文档分块 -->
<div class="card">
<div class="card-header">
<h5 class="mb-0"><i class="bi bi-puzzle"></i> 文档分块 ({{ chunks|length }} 个)</h5>
</div>
<div class="card-body">
{% if chunks %}
<div class="accordion" id="chunksAccordion">
{% for chunk in chunks %}
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#chunk{{ loop.index }}">
<strong>分块 {{ loop.index }}</strong>
<span class="ms-3 text-muted small">
{{ chunk.content[:50] }}...
</span>
</button>
</h2>
<div id="chunk{{ loop.index }}" class="accordion-collapse collapse" data-bs-parent="#chunksAccordion">
<div class="accordion-body">
{% if chunk.summary %}
<div class="alert alert-info small">
<strong>摘要:</strong> {{ chunk.summary }}
</div>
{% endif %}
{% if chunk.keywords %}
<div class="mb-2">
{% for kw in chunk.get_keywords() %}
<span class="badge bg-secondary me-1">{{ kw }}</span>
{% endfor %}
</div>
{% endif %}
<pre class="bg-light p-3 rounded" style="white-space: pre-wrap; word-wrap: break-word;">{{ chunk.content }}</pre>
</div>
</div>
</div>
{% endfor %}
</div>
{% else %}
<p class="text-muted text-center py-4">暂无分块数据,请先索引文档</p>
{% endif %}
</div>
</div>
<div class="mt-4">
<a href="/documents" class="btn btn-secondary">
<i class="bi bi-arrow-left"></i> 返回列表
</a>
{% if doc.status == 'pending' %}
<button class="btn btn-primary" onclick="indexDoc({{ doc.id }})">
<i class="bi bi-arrow-repeat"></i> 索引文档
</button>
{% endif %}
<button class="btn btn-danger" onclick="deleteDoc({{ doc.id }})">
<i class="bi bi-trash"></i> 删除文档
</button>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script>
function indexDoc(id) {
if (!confirm('确定要索引此文档吗?')) return;
fetch(`/api/index/${id}`, { method: 'POST' })
.then(r => r.json())
.then(data => {
alert(data.success ? '索引完成' : '索引失败: ' + (data.error || '未知错误'));
if (data.success) location.reload();
});
}
function deleteDoc(id) {
if (!confirm('确定要删除此文档吗?此操作不可恢复!')) return;
fetch(`/api/documents/${id}`, { method: 'DELETE' })
.then(r => r.json())
.then(data => {
if (data.success) {
alert('删除成功');
window.location.href = '/documents';
} else {
alert('删除失败');
}
});
}
</script>
</body>
</html>

View File

@@ -28,6 +28,7 @@
<a class="nav-link" href="/">首页</a>
<a class="nav-link" href="/documents">文档管理</a>
<a class="nav-link" href="/search">知识检索</a>
<a class="nav-link" href="/settings">系统设置</a>
</div>
</div>
</nav>

284
templates/settings.html Normal file
View File

@@ -0,0 +1,284 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>系统设置 - LLM Index RAG</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.0/font/bootstrap-icons.css" rel="stylesheet">
<style>
body { background-color: #f8f9fa; }
.config-card { border-radius: 10px; border: none; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
</style>
</head>
<body>
<!-- 导航栏 -->
<nav class="navbar navbar-expand-lg navbar-dark bg-dark">
<div class="container">
<a class="navbar-brand" href="/">
<i class="bi bi-search"></i> LLM Index RAG
</a>
<div class="navbar-nav ms-auto">
<a class="nav-link" href="/">首页</a>
<a class="nav-link" href="/documents">文档管理</a>
<a class="nav-link" href="/search">知识检索</a>
<a class="nav-link active" href="/settings">系统设置</a>
</div>
</div>
</nav>
<div class="container py-4">
<h4 class="mb-4"><i class="bi bi-gear"></i> 系统设置</h4>
<!-- LLM配置 -->
<div class="card config-card mb-4">
<div class="card-header bg-white">
<h5 class="mb-0"><i class="bi bi-cpu text-primary"></i> 大模型配置</h5>
</div>
<div class="card-body">
<form id="llmConfigForm">
<div class="row">
<div class="col-md-6 mb-3">
<label class="form-label">API地址</label>
<input type="text" class="form-control" id="apiBase"
value="{{ config.llm.api_base }}" placeholder="http://localhost:1234/v1">
<small class="text-muted">LLM API的基础URL</small>
</div>
<div class="col-md-6 mb-3">
<label class="form-label">API Key</label>
<input type="text" class="form-control" id="apiKey"
value="{{ config.llm.api_key }}" placeholder="sk-xxx">
<small class="text-muted">API密钥</small>
</div>
</div>
<div class="row">
<div class="col-md-6 mb-3">
<label class="form-label">模型名称</label>
<input type="text" class="form-control" id="model"
value="{{ config.llm.model }}" placeholder="qwen/qwen3.5-35b-a3b">
</div>
<div class="col-md-3 mb-3">
<label class="form-label">Max Tokens</label>
<input type="number" class="form-control" id="maxTokens"
value="{{ config.llm.max_tokens }}">
</div>
<div class="col-md-3 mb-3">
<label class="form-label">Temperature</label>
<input type="number" class="form-control" id="temperature"
value="{{ config.llm.temperature }}" step="0.1" min="0" max="2">
</div>
</div>
<div class="mb-3">
<label class="form-label">超时时间(秒)</label>
<input type="number" class="form-control" id="timeout"
value="{{ config.llm.timeout }}">
</div>
<div class="d-flex gap-2">
<button type="submit" class="btn btn-primary">
<i class="bi bi-save"></i> 保存配置
</button>
<button type="button" class="btn btn-outline-secondary" onclick="testConnection()">
<i class="bi bi-plug"></i> 测试连接
</button>
</div>
</form>
<div id="testResult" class="mt-3"></div>
</div>
</div>
<!-- 索引配置 -->
<div class="card config-card mb-4">
<div class="card-header bg-white">
<h5 class="mb-0"><i class="bi bi-list-columns text-success"></i> 索引配置</h5>
</div>
<div class="card-body">
<form id="indexConfigForm">
<div class="row">
<div class="col-md-4 mb-3">
<label class="form-label">BM25 K1参数</label>
<input type="number" class="form-control" id="bm25K1"
value="{{ config.index.bm25_k1 }}" step="0.1">
<small class="text-muted">词频饱和参数推荐1.2-2.0</small>
</div>
<div class="col-md-4 mb-3">
<label class="form-label">BM25 B参数</label>
<input type="number" class="form-control" id="bm25B"
value="{{ config.index.bm25_b }}" step="0.05">
<small class="text-muted">文档长度归一化推荐0.75</small>
</div>
<div class="col-md-4 mb-3">
<label class="form-label">最大返回结果</label>
<input type="number" class="form-control" id="maxResults"
value="{{ config.index.max_results }}">
</div>
</div>
<div class="row">
<div class="col-md-4 mb-3">
<label class="form-label">标题权重</label>
<input type="number" class="form-control" id="titleWeight"
value="{{ config.index.title_weight }}" step="0.5">
</div>
<div class="col-md-4 mb-3">
<label class="form-label">关键词权重</label>
<input type="number" class="form-control" id="keywordWeight"
value="{{ config.index.keyword_weight }}" step="0.5">
</div>
<div class="col-md-4 mb-3">
<label class="form-label">内容权重</label>
<input type="number" class="form-control" id="contentWeight"
value="{{ config.index.content_weight }}" step="0.5">
</div>
</div>
<button type="submit" class="btn btn-success">
<i class="bi bi-save"></i> 保存配置
</button>
</form>
</div>
</div>
<!-- 文档配置 -->
<div class="card config-card">
<div class="card-header bg-white">
<h5 class="mb-0"><i class="bi bi-file-earmark-text text-warning"></i> 文档处理配置</h5>
</div>
<div class="card-body">
<form id="docConfigForm">
<div class="row">
<div class="col-md-4 mb-3">
<label class="form-label">分块大小</label>
<input type="number" class="form-control" id="chunkSize"
value="{{ config.doc.chunk_size }}">
<small class="text-muted">字符数</small>
</div>
<div class="col-md-4 mb-3">
<label class="form-label">分块重叠</label>
<input type="number" class="form-control" id="chunkOverlap"
value="{{ config.doc.chunk_overlap }}">
</div>
<div class="col-md-4 mb-3">
<label class="form-label">最大关键词数</label>
<input type="number" class="form-control" id="maxKeywords"
value="{{ config.doc.max_keywords }}">
</div>
</div>
<button type="submit" class="btn btn-warning">
<i class="bi bi-save"></i> 保存配置
</button>
</form>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script>
// 保存LLM配置
document.getElementById('llmConfigForm')?.addEventListener('submit', async function(e) {
e.preventDefault();
const config = {
api_base: document.getElementById('apiBase').value,
api_key: document.getElementById('apiKey').value,
model: document.getElementById('model').value,
max_tokens: parseInt(document.getElementById('maxTokens').value),
temperature: parseFloat(document.getElementById('temperature').value),
timeout: parseInt(document.getElementById('timeout').value)
};
try {
const res = await fetch('/api/config/llm', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(config)
});
const data = await res.json();
if (data.success) {
alert('LLM配置已保存');
} else {
alert('保存失败: ' + data.error);
}
} catch (err) {
alert('保存失败: ' + err.message);
}
});
// 测试连接
async function testConnection() {
const resultDiv = document.getElementById('testResult');
resultDiv.innerHTML = '<div class="alert alert-info"><span class="spinner-border spinner-border-sm"></span> 测试中...</div>';
try {
const res = await fetch('/api/config/test', {method: 'POST'});
const data = await res.json();
if (data.success) {
resultDiv.innerHTML = '<div class="alert alert-success"><i class="bi bi-check-circle"></i> 连接成功!模型: ' + data.model + '</div>';
} else {
resultDiv.innerHTML = '<div class="alert alert-danger"><i class="bi bi-x-circle"></i> 连接失败: ' + data.error + '</div>';
}
} catch (err) {
resultDiv.innerHTML = '<div class="alert alert-danger"><i class="bi bi-x-circle"></i> 连接失败: ' + err.message + '</div>';
}
}
// 保存索引配置
document.getElementById('indexConfigForm')?.addEventListener('submit', async function(e) {
e.preventDefault();
const config = {
bm25_k1: parseFloat(document.getElementById('bm25K1').value),
bm25_b: parseFloat(document.getElementById('bm25B').value),
max_results: parseInt(document.getElementById('maxResults').value),
title_weight: parseFloat(document.getElementById('titleWeight').value),
keyword_weight: parseFloat(document.getElementById('keywordWeight').value),
content_weight: parseFloat(document.getElementById('contentWeight').value)
};
try {
const res = await fetch('/api/config/index', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(config)
});
const data = await res.json();
if (data.success) {
alert('索引配置已保存!');
} else {
alert('保存失败: ' + data.error);
}
} catch (err) {
alert('保存失败: ' + err.message);
}
});
// 保存文档配置
document.getElementById('docConfigForm')?.addEventListener('submit', async function(e) {
e.preventDefault();
const config = {
chunk_size: parseInt(document.getElementById('chunkSize').value),
chunk_overlap: parseInt(document.getElementById('chunkOverlap').value),
max_keywords: parseInt(document.getElementById('maxKeywords').value)
};
try {
const res = await fetch('/api/config/doc', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(config)
});
const data = await res.json();
if (data.success) {
alert('文档配置已保存!');
} else {
alert('保存失败: ' + data.error);
}
} catch (err) {
alert('保存失败: ' + err.message);
}
});
</script>
</body>
</html>