diff --git a/app.py b/app.py index 62048fb..653dc84 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,7 @@ """ ParamHub - 参数百科 AI大模型与硬件参数速查平台 -v1.4.0 - 新增图片上传功能 +v1.5.0 - 支持多图上传和智能解析产品参数 """ from flask import Flask, render_template, jsonify, request @@ -46,6 +46,7 @@ LLM_CONFIG = { 'base_url': 'http://192.168.2.17:19007/v1', 'api_key': 'xxxx', 'model': 'auto', + 'vision_model': 'gpt-4-vision-preview', # 视觉模型(解析图片) } # 默认网站配置 @@ -81,9 +82,10 @@ def save_data(file_path, data): # ============ 大模型智能解析 ============ -def parse_with_llm(text, category_type): +def parse_with_llm(text, category_type, images=None): """ - 使用大模型解析文本,提取结构化数据 + 使用大模型解析文本/图片,提取结构化数据 + 支持多张图片输入,可能解析出多个产品 """ # 根据类型定义字段模板 @@ -139,7 +141,62 @@ def parse_with_llm(text, category_type): fields = field_templates.get(category_type, field_templates['dynamic']) - prompt = f"""请解析以下文本,提取结构化数据。 + # 构建消息内容 + content_parts = [] + + # 如果有图片,添加图片内容 + if images and len(images) > 0: + content_parts.append({ + "type": "text", + "text": f"""请分析图片中的产品参数信息,提取结构化数据。 + +需要提取的字段: +{json.dumps(fields, ensure_ascii=False, indent=2)} + +重要要求: +1. 图片中可能包含1个或多个产品,请识别所有产品 +2. 如果是多张图片,请综合分析所有图片内容 +3. 数字字段只返回数字,不带单位 +4. 如果某字段没有提及,返回null +5. 返回格式:如果识别到多个产品,返回数组 [{"name": ...}, {"name": ...}]; 如果只有一个产品,返回单个对象 {"name": ...} +6. 只返回JSON数据,不要其他内容""" + }) + + # 添加每张图片(支持URL或base64) + for img in images: + if isinstance(img, str): + if img.startswith('http'): + # URL图片 + content_parts.append({ + "type": "image_url", + "image_url": {"url": img} + }) + elif img.startswith('data:'): + # base64图片 + content_parts.append({ + "type": "image_url", + "image_url": {"url": img} + }) + else: + # 本地路径,读取并转为base64 + try: + img_path = IMAGES_DIR / img.replace('/static/uploads/', '') + if img_path.exists(): + with open(img_path, 'rb') as f: + img_data = base64.b64encode(f.read()).decode() + ext = img_path.suffix.lower() + mime_type = f'image/{ext if ext != "jpg" else "jpeg"}' + content_parts.append({ + "type": "image_url", + "image_url": {"url": f"data:{mime_type};base64,{img_data}"} + }) + except Exception as e: + print(f"读取图片失败: {e}") + else: + # 纯文本解析 + content_parts.append({ + "type": "text", + "text": f"""请解析以下文本,提取结构化数据。 文本内容: {text} @@ -154,8 +211,12 @@ def parse_with_llm(text, category_type): 4. 返回JSON格式,不要包含任何其他内容 请直接返回JSON数据:""" - + }) + try: + # 使用视觉模型解析 + model = LLM_CONFIG.get('vision_model', 'gpt-4-vision-preview') if images else LLM_CONFIG['model'] + response = requests.post( f"{LLM_CONFIG['base_url']}/chat/completions", headers={ @@ -163,15 +224,15 @@ def parse_with_llm(text, category_type): "Authorization": f"Bearer {LLM_CONFIG['api_key']}" }, json={ - "model": LLM_CONFIG['model'], + "model": model, "messages": [ - {"role": "system", "content": "你是一个数据提取助手,负责从文本中提取结构化数据。只返回JSON,不要其他内容。"}, - {"role": "user", "content": prompt} + {"role": "system", "content": "你是一个产品参数提取助手,负责从文本和图片中提取结构化的产品参数数据。只返回JSON,不要其他内容。如果图片中包含多个产品,返回数组。"}, + {"role": "user", "content": content_parts} ], - "max_tokens": 1000, + "max_tokens": 2000, "temperature": 0.1 }, - timeout=30 + timeout=60 ) if response.status_code == 200: @@ -186,28 +247,38 @@ def parse_with_llm(text, category_type): # 解析JSON parsed = json.loads(content) - # 清理null值 - cleaned = {} - for k, v in parsed.items(): - if v is not None and v != '' and v != 'null': - # 尝试转换数字 - if isinstance(v, str): - try: - if '.' in v: - cleaned[k] = float(v) - else: - cleaned[k] = int(v) - except: - cleaned[k] = v - else: - cleaned[k] = v + # 处理结果(可能是数组或单个对象) + results = [] + if isinstance(parsed, list): + results = parsed + else: + results = [parsed] - return cleaned + # 清理每个结果的null值 + cleaned_results = [] + for item in results: + cleaned = {} + for k, v in item.items(): + if v is not None and v != '' and v != 'null': + # 尝试转换数字 + if isinstance(v, str): + try: + if '.' in v: + cleaned[k] = float(v) + else: + cleaned[k] = int(v) + except: + cleaned[k] = v + else: + cleaned[k] = v + cleaned_results.append(cleaned) + + return cleaned_results except Exception as e: print(f"LLM解析失败: {e}") # 降级处理:返回基本结构 - return {'name': text[:50], 'description': text} + return [{'name': text[:50] if text else '未命名产品', 'description': text}] # ============ 页面路由 ============ @@ -393,109 +464,171 @@ def api_toggle_model_visible(model_id): return jsonify({'success': True, 'visible': model['visible']}) +# ============ 图片解析API(预览) ============ + +@app.route('/api/parse-images', methods=['POST']) +def api_parse_images(): + """ + 解析图片中的产品参数(预览模式,不保存) + 支持多张图片,可能返回多个产品 + """ + data = request.get_json() + text = data.get('text', '') + images = data.get('images', []) + category_type = data.get('category_type', 'dynamic') + + if not text and not images: + return jsonify({'error': '文本或图片不能都为空'}), 400 + + if not images: + return jsonify({'error': '请上传至少一张图片'}), 400 + + # 调用大模型解析 + parsed_list = parse_with_llm(text, category_type, images) + + return jsonify({ + 'success': True, + 'count': len(parsed_list), + 'products': parsed_list, + 'raw_text': text, + 'images': images + }) + # ============ 智能添加API ============ @app.route('/api/models/smart-add', methods=['POST']) def api_smart_add_model(): - """智能添加模型(粘贴文本解析)""" + """智能添加模型(支持文本和多图解析,可能添加多个产品)""" data = request.get_json() text = data.get('text', '') + images = data.get('images', []) - if not text: - return jsonify({'error': '文本不能为空'}), 400 + if not text and not images: + return jsonify({'error': '文本或图片不能都为空'}), 400 - # 大模型解析 - parsed = parse_with_llm(text, 'model') + # 大模型解析(支持多图) + parsed_list = parse_with_llm(text, 'model', images) - # 补充必要字段 - parsed['id'] = uuid.uuid4().hex[:12] - parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - parsed['visible'] = True - parsed['raw_text'] = text # 保存原始文本 - parsed['publish_date'] = parsed.get('publish_date', '') # 发布日期 - parsed['views'] = 0 # 热度初始化为0 - parsed['is_pinned'] = False # 置顶初始化为False - - # 保存 + # 处理多个产品 + results = [] models = load_data(MODELS_FILE) - models.append(parsed) + + for parsed in parsed_list: + # 补充必要字段 + parsed['id'] = uuid.uuid4().hex[:12] + parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + parsed['visible'] = True + parsed['raw_text'] = text # 保存原始文本 + parsed['images'] = images # 保存图片 + parsed['publish_date'] = parsed.get('publish_date', '') + parsed['views'] = 0 + parsed['is_pinned'] = False + + models.append(parsed) + results.append(parsed) + save_data(MODELS_FILE, models) - return jsonify(parsed) + # 返回添加的产品列表 + return jsonify({'success': True, 'count': len(results), 'products': results}) @app.route('/api/gpus/smart-add', methods=['POST']) def api_smart_add_gpu(): - """智能添加GPU""" + """智能添加GPU(支持文本和多图解析)""" data = request.get_json() text = data.get('text', '') + images = data.get('images', []) - if not text: - return jsonify({'error': '文本不能为空'}), 400 + if not text and not images: + return jsonify({'error': '文本或图片不能都为空'}), 400 - parsed = parse_with_llm(text, 'gpu') - parsed['id'] = uuid.uuid4().hex[:12] - parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - parsed['visible'] = True - parsed['raw_text'] = text - parsed['publish_date'] = parsed.get('publish_date', '') - parsed['views'] = 0 - parsed['is_pinned'] = False + parsed_list = parse_with_llm(text, 'gpu', images) + results = [] gpus = load_data(GPUS_FILE) - gpus.append(parsed) + + for parsed in parsed_list: + parsed['id'] = uuid.uuid4().hex[:12] + parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + parsed['visible'] = True + parsed['raw_text'] = text + parsed['images'] = images + parsed['publish_date'] = parsed.get('publish_date', '') + parsed['views'] = 0 + parsed['is_pinned'] = False + + gpus.append(parsed) + results.append(parsed) + save_data(GPUS_FILE, gpus) - return jsonify(parsed) + return jsonify({'success': True, 'count': len(results), 'products': results}) @app.route('/api/cpus/smart-add', methods=['POST']) def api_smart_add_cpu(): - """智能添加CPU""" + """智能添加CPU(支持文本和多图解析)""" data = request.get_json() text = data.get('text', '') + images = data.get('images', []) - if not text: - return jsonify({'error': '文本不能为空'}), 400 + if not text and not images: + return jsonify({'error': '文本或图片不能都为空'}), 400 - parsed = parse_with_llm(text, 'cpu') - parsed['id'] = uuid.uuid4().hex[:12] - parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - parsed['visible'] = True - parsed['raw_text'] = text - parsed['publish_date'] = parsed.get('publish_date', '') - parsed['views'] = 0 - parsed['is_pinned'] = False + parsed_list = parse_with_llm(text, 'cpu', images) + results = [] cpus = load_data(CPUS_FILE) - cpus.append(parsed) + + for parsed in parsed_list: + parsed['id'] = uuid.uuid4().hex[:12] + parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + parsed['visible'] = True + parsed['raw_text'] = text + parsed['images'] = images + parsed['publish_date'] = parsed.get('publish_date', '') + parsed['views'] = 0 + parsed['is_pinned'] = False + + cpus.append(parsed) + results.append(parsed) + save_data(CPUS_FILE, cpus) - return jsonify(parsed) + return jsonify({'success': True, 'count': len(results), 'products': results}) @app.route('/api/items//smart-add', methods=['POST']) def api_smart_add_item(category_id): - """智能添加动态分类数据""" + """智能添加动态分类数据(支持文本和多图解析)""" data = request.get_json() text = data.get('text', '') + images = data.get('images', []) - if not text: - return jsonify({'error': '文本不能为空'}), 400 + if not text and not images: + return jsonify({'error': '文本或图片不能都为空'}), 400 - parsed = parse_with_llm(text, 'dynamic') - parsed['id'] = uuid.uuid4().hex[:12] - parsed['category_id'] = category_id - parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - parsed['visible'] = True - parsed['raw_text'] = text - parsed['publish_date'] = parsed.get('publish_date', '') - parsed['views'] = 0 - parsed['is_pinned'] = False + parsed_list = parse_with_llm(text, 'dynamic', images) + results = [] items_file = DATA_DIR / f'items_{category_id}.json' items = load_data(items_file) - items.append(parsed) + + for parsed in parsed_list: + parsed['id'] = uuid.uuid4().hex[:12] + parsed['category_id'] = category_id + parsed['created_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + parsed['visible'] = True + parsed['raw_text'] = text + parsed['images'] = images + parsed['publish_date'] = parsed.get('publish_date', '') + parsed['views'] = 0 + parsed['is_pinned'] = False + + items.append(parsed) + results.append(parsed) + save_data(items_file, items) - return jsonify(parsed) + return jsonify({'success': True, 'count': len(results), 'products': results}) # ============ GPU API ============ diff --git a/templates/admin.html b/templates/admin.html index 82aa9d2..aa9d513 100644 --- a/templates/admin.html +++ b/templates/admin.html @@ -277,25 +277,52 @@