feat: 支持多图上传和智能解析产品参数
- 新增 /api/parse-images API 预览解析结果 - 智能添加支持多张图片上传和粘贴 - 支持一次解析出多个产品参数 - 所有类别(模型/GPU/CPU/动态分类)都支持图片解析 - 添加 vision_model 配置支持视觉模型
This commit is contained in:
301
app.py
301
app.py
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
ParamHub - 参数百科
|
||||
AI大模型与硬件参数速查平台
|
||||
v1.4.0 - 新增图片上传功能
|
||||
v1.5.0 - 支持多图上传和智能解析产品参数
|
||||
"""
|
||||
|
||||
from flask import Flask, render_template, jsonify, request
|
||||
@@ -46,6 +46,7 @@ LLM_CONFIG = {
|
||||
'base_url': 'http://192.168.2.17:19007/v1',
|
||||
'api_key': 'xxxx',
|
||||
'model': 'auto',
|
||||
'vision_model': 'gpt-4-vision-preview', # 视觉模型(解析图片)
|
||||
}
|
||||
|
||||
# 默认网站配置
|
||||
@@ -81,9 +82,10 @@ def save_data(file_path, data):
|
||||
|
||||
# ============ 大模型智能解析 ============
|
||||
|
||||
def parse_with_llm(text, category_type):
|
||||
def parse_with_llm(text, category_type, images=None):
|
||||
"""
|
||||
使用大模型解析文本,提取结构化数据
|
||||
使用大模型解析文本/图片,提取结构化数据
|
||||
支持多张图片输入,可能解析出多个产品
|
||||
"""
|
||||
|
||||
# 根据类型定义字段模板
|
||||
@@ -139,7 +141,62 @@ def parse_with_llm(text, category_type):
|
||||
|
||||
fields = field_templates.get(category_type, field_templates['dynamic'])
|
||||
|
||||
prompt = f"""请解析以下文本,提取结构化数据。
|
||||
# 构建消息内容
|
||||
content_parts = []
|
||||
|
||||
# 如果有图片,添加图片内容
|
||||
if images and len(images) > 0:
|
||||
content_parts.append({
|
||||
"type": "text",
|
||||
"text": f"""请分析图片中的产品参数信息,提取结构化数据。
|
||||
|
||||
需要提取的字段:
|
||||
{json.dumps(fields, ensure_ascii=False, indent=2)}
|
||||
|
||||
重要要求:
|
||||
1. 图片中可能包含1个或多个产品,请识别所有产品
|
||||
2. 如果是多张图片,请综合分析所有图片内容
|
||||
3. 数字字段只返回数字,不带单位
|
||||
4. 如果某字段没有提及,返回null
|
||||
5. 返回格式:如果识别到多个产品,返回数组 [{"name": ...}, {"name": ...}]; 如果只有一个产品,返回单个对象 {"name": ...}
|
||||
6. 只返回JSON数据,不要其他内容"""
|
||||
})
|
||||
|
||||
# 添加每张图片(支持URL或base64)
|
||||
for img in images:
|
||||
if isinstance(img, str):
|
||||
if img.startswith('http'):
|
||||
# URL图片
|
||||
content_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": img}
|
||||
})
|
||||
elif img.startswith('data:'):
|
||||
# base64图片
|
||||
content_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": img}
|
||||
})
|
||||
else:
|
||||
# 本地路径,读取并转为base64
|
||||
try:
|
||||
img_path = IMAGES_DIR / img.replace('/static/uploads/', '')
|
||||
if img_path.exists():
|
||||
with open(img_path, 'rb') as f:
|
||||
img_data = base64.b64encode(f.read()).decode()
|
||||
ext = img_path.suffix.lower()
|
||||
mime_type = f'image/{ext if ext != "jpg" else "jpeg"}'
|
||||
content_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{mime_type};base64,{img_data}"}
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"读取图片失败: {e}")
|
||||
else:
|
||||
# 纯文本解析
|
||||
content_parts.append({
|
||||
"type": "text",
|
||||
"text": f"""请解析以下文本,提取结构化数据。
|
||||
|
||||
文本内容:
|
||||
{text}
|
||||
@@ -154,8 +211,12 @@ def parse_with_llm(text, category_type):
|
||||
4. 返回JSON格式,不要包含任何其他内容
|
||||
|
||||
请直接返回JSON数据:"""
|
||||
|
||||
})
|
||||
|
||||
try:
|
||||
# 使用视觉模型解析
|
||||
model = LLM_CONFIG.get('vision_model', 'gpt-4-vision-preview') if images else LLM_CONFIG['model']
|
||||
|
||||
response = requests.post(
|
||||
f"{LLM_CONFIG['base_url']}/chat/completions",
|
||||
headers={
|
||||
@@ -163,15 +224,15 @@ def parse_with_llm(text, category_type):
|
||||
"Authorization": f"Bearer {LLM_CONFIG['api_key']}"
|
||||
},
|
||||
json={
|
||||
"model": LLM_CONFIG['model'],
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "你是一个数据提取助手,负责从文本中提取结构化数据。只返回JSON,不要其他内容。"},
|
||||
{"role": "user", "content": prompt}
|
||||
{"role": "system", "content": "你是一个产品参数提取助手,负责从文本和图片中提取结构化的产品参数数据。只返回JSON,不要其他内容。如果图片中包含多个产品,返回数组。"},
|
||||
{"role": "user", "content": content_parts}
|
||||
],
|
||||
"max_tokens": 1000,
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.1
|
||||
},
|
||||
timeout=30
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@@ -186,28 +247,38 @@ def parse_with_llm(text, category_type):
|
||||
# 解析JSON
|
||||
parsed = json.loads(content)
|
||||
|
||||
# 清理null值
|
||||
cleaned = {}
|
||||
for k, v in parsed.items():
|
||||
if v is not None and v != '' and v != 'null':
|
||||
# 尝试转换数字
|
||||
if isinstance(v, str):
|
||||
try:
|
||||
if '.' in v:
|
||||
cleaned[k] = float(v)
|
||||
else:
|
||||
cleaned[k] = int(v)
|
||||
except:
|
||||
cleaned[k] = v
|
||||
else:
|
||||
cleaned[k] = v
|
||||
# 处理结果(可能是数组或单个对象)
|
||||
results = []
|
||||
if isinstance(parsed, list):
|
||||
results = parsed
|
||||
else:
|
||||
results = [parsed]
|
||||
|
||||
return cleaned
|
||||
# 清理每个结果的null值
|
||||
cleaned_results = []
|
||||
for item in results:
|
||||
cleaned = {}
|
||||
for k, v in item.items():
|
||||
if v is not None and v != '' and v != 'null':
|
||||
# 尝试转换数字
|
||||
if isinstance(v, str):
|
||||
try:
|
||||
if '.' in v:
|
||||
cleaned[k] = float(v)
|
||||
else:
|
||||
cleaned[k] = int(v)
|
||||
except:
|
||||
cleaned[k] = v
|
||||
else:
|
||||
cleaned[k] = v
|
||||
cleaned_results.append(cleaned)
|
||||
|
||||
return cleaned_results
|
||||
except Exception as e:
|
||||
print(f"LLM解析失败: {e}")
|
||||
|
||||
# 降级处理:返回基本结构
|
||||
return {'name': text[:50], 'description': text}
|
||||
return [{'name': text[:50] if text else '未命名产品', 'description': text}]
|
||||
|
||||
# ============ 页面路由 ============
|
||||
|
||||
@@ -393,109 +464,171 @@ def api_toggle_model_visible(model_id):
|
||||
|
||||
return jsonify({'success': True, 'visible': model['visible']})
|
||||
|
||||
# ============ 图片解析API(预览) ============
|
||||
|
||||
@app.route('/api/parse-images', methods=['POST'])
|
||||
def api_parse_images():
|
||||
"""
|
||||
解析图片中的产品参数(预览模式,不保存)
|
||||
支持多张图片,可能返回多个产品
|
||||
"""
|
||||
data = request.get_json()
|
||||
text = data.get('text', '')
|
||||
images = data.get('images', [])
|
||||
category_type = data.get('category_type', 'dynamic')
|
||||
|
||||
if not text and not images:
|
||||
return jsonify({'error': '文本或图片不能都为空'}), 400
|
||||
|
||||
if not images:
|
||||
return jsonify({'error': '请上传至少一张图片'}), 400
|
||||
|
||||
# 调用大模型解析
|
||||
parsed_list = parse_with_llm(text, category_type, images)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'count': len(parsed_list),
|
||||
'products': parsed_list,
|
||||
'raw_text': text,
|
||||
'images': images
|
||||
})
|
||||
|
||||
# ============ 智能添加API ============
|
||||
|
||||
@app.route('/api/models/smart-add', methods=['POST'])
|
||||
def api_smart_add_model():
|
||||
"""智能添加模型(粘贴文本解析)"""
|
||||
"""智能添加模型(支持文本和多图解析,可能添加多个产品)"""
|
||||
data = request.get_json()
|
||||
text = data.get('text', '')
|
||||
images = data.get('images', [])
|
||||
|
||||
if not text:
|
||||
return jsonify({'error': '文本不能为空'}), 400
|
||||
if not text and not images:
|
||||
return jsonify({'error': '文本或图片不能都为空'}), 400
|
||||
|
||||
# 大模型解析
|
||||
parsed = parse_with_llm(text, 'model')
|
||||
# 大模型解析(支持多图)
|
||||
parsed_list = parse_with_llm(text, 'model', images)
|
||||
|
||||
# 补充必要字段
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text # 保存原始文本
|
||||
parsed['publish_date'] = parsed.get('publish_date', '') # 发布日期
|
||||
parsed['views'] = 0 # 热度初始化为0
|
||||
parsed['is_pinned'] = False # 置顶初始化为False
|
||||
|
||||
# 保存
|
||||
# 处理多个产品
|
||||
results = []
|
||||
models = load_data(MODELS_FILE)
|
||||
models.append(parsed)
|
||||
|
||||
for parsed in parsed_list:
|
||||
# 补充必要字段
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text # 保存原始文本
|
||||
parsed['images'] = images # 保存图片
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
|
||||
models.append(parsed)
|
||||
results.append(parsed)
|
||||
|
||||
save_data(MODELS_FILE, models)
|
||||
|
||||
return jsonify(parsed)
|
||||
# 返回添加的产品列表
|
||||
return jsonify({'success': True, 'count': len(results), 'products': results})
|
||||
|
||||
@app.route('/api/gpus/smart-add', methods=['POST'])
|
||||
def api_smart_add_gpu():
|
||||
"""智能添加GPU"""
|
||||
"""智能添加GPU(支持文本和多图解析)"""
|
||||
data = request.get_json()
|
||||
text = data.get('text', '')
|
||||
images = data.get('images', [])
|
||||
|
||||
if not text:
|
||||
return jsonify({'error': '文本不能为空'}), 400
|
||||
if not text and not images:
|
||||
return jsonify({'error': '文本或图片不能都为空'}), 400
|
||||
|
||||
parsed = parse_with_llm(text, 'gpu')
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
parsed_list = parse_with_llm(text, 'gpu', images)
|
||||
|
||||
results = []
|
||||
gpus = load_data(GPUS_FILE)
|
||||
gpus.append(parsed)
|
||||
|
||||
for parsed in parsed_list:
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['images'] = images
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
|
||||
gpus.append(parsed)
|
||||
results.append(parsed)
|
||||
|
||||
save_data(GPUS_FILE, gpus)
|
||||
|
||||
return jsonify(parsed)
|
||||
return jsonify({'success': True, 'count': len(results), 'products': results})
|
||||
|
||||
@app.route('/api/cpus/smart-add', methods=['POST'])
|
||||
def api_smart_add_cpu():
|
||||
"""智能添加CPU"""
|
||||
"""智能添加CPU(支持文本和多图解析)"""
|
||||
data = request.get_json()
|
||||
text = data.get('text', '')
|
||||
images = data.get('images', [])
|
||||
|
||||
if not text:
|
||||
return jsonify({'error': '文本不能为空'}), 400
|
||||
if not text and not images:
|
||||
return jsonify({'error': '文本或图片不能都为空'}), 400
|
||||
|
||||
parsed = parse_with_llm(text, 'cpu')
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
parsed_list = parse_with_llm(text, 'cpu', images)
|
||||
|
||||
results = []
|
||||
cpus = load_data(CPUS_FILE)
|
||||
cpus.append(parsed)
|
||||
|
||||
for parsed in parsed_list:
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['images'] = images
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
|
||||
cpus.append(parsed)
|
||||
results.append(parsed)
|
||||
|
||||
save_data(CPUS_FILE, cpus)
|
||||
|
||||
return jsonify(parsed)
|
||||
return jsonify({'success': True, 'count': len(results), 'products': results})
|
||||
|
||||
@app.route('/api/items/<category_id>/smart-add', methods=['POST'])
|
||||
def api_smart_add_item(category_id):
|
||||
"""智能添加动态分类数据"""
|
||||
"""智能添加动态分类数据(支持文本和多图解析)"""
|
||||
data = request.get_json()
|
||||
text = data.get('text', '')
|
||||
images = data.get('images', [])
|
||||
|
||||
if not text:
|
||||
return jsonify({'error': '文本不能为空'}), 400
|
||||
if not text and not images:
|
||||
return jsonify({'error': '文本或图片不能都为空'}), 400
|
||||
|
||||
parsed = parse_with_llm(text, 'dynamic')
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['category_id'] = category_id
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
parsed_list = parse_with_llm(text, 'dynamic', images)
|
||||
|
||||
results = []
|
||||
items_file = DATA_DIR / f'items_{category_id}.json'
|
||||
items = load_data(items_file)
|
||||
items.append(parsed)
|
||||
|
||||
for parsed in parsed_list:
|
||||
parsed['id'] = uuid.uuid4().hex[:12]
|
||||
parsed['category_id'] = category_id
|
||||
parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed['visible'] = True
|
||||
parsed['raw_text'] = text
|
||||
parsed['images'] = images
|
||||
parsed['publish_date'] = parsed.get('publish_date', '')
|
||||
parsed['views'] = 0
|
||||
parsed['is_pinned'] = False
|
||||
|
||||
items.append(parsed)
|
||||
results.append(parsed)
|
||||
|
||||
save_data(items_file, items)
|
||||
|
||||
return jsonify(parsed)
|
||||
return jsonify({'success': True, 'count': len(results), 'products': results})
|
||||
|
||||
# ============ GPU API ============
|
||||
|
||||
|
||||
Reference in New Issue
Block a user