param-hub-python/data/knowledge.json

[
  {"id": "k001", "title": "什么是参数量？", "category": "ai-models", "icon": "ri-calculator-line", "content": "参数量（Parameters）是衡量大模型规模的指标，表示模型中权重参数的数量。例如 GPT-3 有 175B 参数，即约1750亿个参数。", "detail": "参数量决定了模型的容量和表达能力。一般来说，参数量越大，模型能力越强，但也需要更多计算资源。\n\n常见规模分类：\n- 小模型：<1B (适合边缘设备)\n- 中模型：1B-10B (消费级GPU可运行)\n- 大模型：10B-100B (需要多GPU)\n- 超大模型：>100B (需要数据中心)", "order": 1},
  {"id": "k002", "title": "什么是上下文长度？", "category": "ai-models", "icon": "ri-text-wrap", "content": "上下文长度（Context Length）是模型能处理的输入文本最大长度。更长的上下文意味着模型可以理解更长的文档或对话历史。", "detail": "常见长度：\n- 4K：传统长度，适合简单对话\n- 32K：中等长度，适合长文档\n- 128K：超长上下文，如GPT-4 Turbo\n- 200K：Claude 3的极限长度", "order": 2},
  {"id": "k003", "title": "什么是量化？", "category": "ai-models", "icon": "ri-scales-3-line", "content": "量化（Quantization）是将模型参数从高精度转换为低精度，减少显存占用和计算量。如FP16→INT8→INT4，精度损失可控，资源节省显著。", "detail": "量化效果：\n- FP32→FP16: 显存减半，精度基本不变\n- FP16→INT8: 显存再减半，精度略降\n- INT8→INT4: 显存再减半，需特殊技术\n\n推荐工具：llama.cpp、GPTQ、AWQ等", "order": 3},
  {"id": "k004", "title": "什么是MMLU？", "category": "ai-models", "icon": "ri-bar-chart-box-line", "content": "MMLU（Massive Multitask Language Understanding）是评估大模型综合能力的标准测试集，覆盖57个学科领域。", "detail": "分数参考：\n- 60-70%：入门级，如GPT-3\n- 70-80%：中等水平，如Llama 2 70B\n- 80-90%：优秀水平，如GPT-4、Claude 3", "order": 4},
  {"id": "k005", "title": "如何计算显存需求？", "category": "gpus", "icon": "ri-memory-line", "content": "模型显存需求 ≈ 参数量 × 每参数字节数 × 1.3（含KV Cache开销）", "detail": "计算公式：\n- FP32: 参数量 × 4字节 × 1.3\n- FP16: 参数量 × 2字节 × 1.3\n- INT8: 参数量 × 1字节 × 1.3\n- INT4: 参数量 × 0.5字节 × 1.3\n\n例如：7B模型FP16加载需要约 7 × 2 × 1.3 ≈ 18GB显存", "order": 1},
  {"id": "k006", "title": "GPU架构演进", "category": "gpus", "icon": "ri-history-line", "content": "NVIDIA GPU架构从Fermi到Hopper，每一代都有显著提升。了解架构有助于选择合适的GPU。", "detail": "主要架构：\n- Volta (2017): V100, 引入Tensor Core\n- Turing (2018): RTX 20系列, RT Core\n- Ampere (2020): A100, RTX 30系列\n- Hopper (2022): H100, FP8支持\n- Ada Lovelace (2022): RTX 40系列, L40S", "order": 2},
  {"id": "k007", "title": "CPU核心数选择", "category": "cpus", "icon": "ri-database-2-line", "content": "CPU核心数的选择取决于应用场景。更多核心适合并行任务，但单核性能也很重要。", "detail": "场景推荐：\n- 办公/日常：4-6核足够\n- 开发/编译：8-16核\n- 服务器/虚拟化：16-64核\n- 高性能计算：64核以上\n\n注意：AI训练主要依赖GPU，CPU主要用于数据预处理", "order": 1}
]