llm-library/data/papers.json

{
  "arch": {
    "id": "arch",
    "name": "模型架构设计",
    "icon": "🏗️",
    "desc": "Transformer 变体、注意力机制、MoE、位置编码、SSM 等",
    "color": "arch",
    "areas": [
      {
        "id": "transformer",
        "name": "Transformer 核心架构",
        "mainline": [
          {
            "title": "Attention Is All You Need",
            "authors": "Vaswani et al.",
            "year": 2017,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "1706.03762"
          },
          {
            "title": "GPT-2 / GPT-3: Language Models are Few-Shot Learners",
            "authors": "Radford et al. / Brown et al.",
            "year": 2020,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2005.14165"
          },
          {
            "title": "LLaMA: Open and Efficient Foundation Language Models",
            "authors": "Touvron et al.",
            "year": 2023,
            "venue": "Meta AI",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2302.13971"
          },
          {
            "title": "Llama 3 / Llama 4 (Scout/Maverick/Behemoth, MoE, 10M context)",
            "authors": "Meta AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2601.11659"
          },
          {
            "title": "DeepSeek-V3 / V3.2: MoE + MLA + MTP + Sparse Attention",
            "authors": "DeepSeek-AI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2412.19437"
          },
          {
            "title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          },
          {
            "title": "Kimi K2: Open Agentic Intelligence (1T MoE, 128K ctx, Muon optimizer)",
            "authors": "Moonshot AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2507.20534"
          },
          {
            "title": "Kimi K2.5: Visual Agentic Intelligence (native multimodal, Agent Swarm)",
            "authors": "Moonshot AI",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2602.02276"
          }
        ],
        "branches": [
          {
            "title": "T5: Exploring the Limits of Transfer Learning",
            "authors": "Raffel et al.",
            "year": 2020,
            "venue": "JMLR",
            "tags": [
              "支线"
            ],
            "arxiv": "1910.10683"
          },
          {
            "title": "PaLM / PaLM-2: Pathways Language Model",
            "authors": "Chowdhery et al.",
            "year": 2022,
            "venue": "JMLR",
            "tags": [
              "支线"
            ],
            "arxiv": "2204.02311"
          },
          {
            "title": "Gemma 2 / Gemma 3 系列",
            "authors": "Google DeepMind",
            "year": 2024,
            "venue": "Google",
            "tags": [
              "支线"
            ],
            "arxiv": "2408.00118"
          }
        ],
        "forward": []
      },
      {
        "id": "attention",
        "name": "注意力机制演进",
        "mainline": [
          {
            "title": "Multi-Head Attention (MHA)",
            "authors": "Vaswani et al.",
            "year": 2017,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "1706.03762"
          },
          {
            "title": "MQA: Fast Transformer Decoding — One Write-Head Is All You Need",
            "authors": "Shazeer",
            "year": 2019,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "1911.02150"
          },
          {
            "title": "GQA: Training Generalized Multi-Query Transformer Models",
            "authors": "Ainslie et al.",
            "year": 2023,
            "venue": "EMNLP",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2305.13245"
          },
          {
            "title": "MLA: Multi-head Latent Attention (DeepSeek-V2/V3)",
            "authors": "DeepSeek-AI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2405.04434"
          },
          {
            "title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          },
          {
            "title": "FlashAttention-3 / Sparse Attention 工程化",
            "authors": "Dao et al. / DeepSeek",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2407.08608"
          },
          {
            "title": "TEST",
            "authors": "Test",
            "year": 2026,
            "venue": "Test",
            "tags": [
              "test"
            ]
          }
        ],
        "branches": [
          {
            "title": "FlashAttention-1 / FlashAttention-2: IO-Aware Exact Attention",
            "authors": "Dao et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2205.14135"
          },
          {
            "title": "Differential Transformer (DIFF Transformer)",
            "authors": "Ye et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2410.05258"
          }
        ],
        "forward": [
          {
            "title": "Engram: Conditional Memory via Scalable Lookup (N-gram O(1), 分离记忆与推理)",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前瞻"
            ],
            "arxiv": "2601.07372"
          }
        ]
      },
      {
        "id": "moe",
        "name": "MoE 混合专家",
        "mainline": [
          {
            "title": "Sparsely-Gated MoE Layer: Outrageously Large Neural Networks",
            "authors": "Shazeer et al.",
            "year": 2017,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "1701.06538"
          },
          {
            "title": "Switch Transformers: Scaling to Trillion Parameter Models",
            "authors": "Fedus et al.",
            "year": 2022,
            "venue": "JMLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2101.03961"
          },
          {
            "title": "Mixtral of Experts (8x7B, 开放 MoE)",
            "authors": "Jiang et al.",
            "year": 2024,
            "venue": "Mistral AI",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2401.04088"
          },
          {
            "title": "DeepSeekMoE: Fine-Grained Expert Segmentation + Shared Experts",
            "authors": "DeepSeek-AI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2401.06066"
          },
          {
            "title": "DeepSeek-V4: 1.6T参数 MoE + 领域专家独立训练后合并 + On-Policy Distillation",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          }
        ],
        "branches": [
          {
            "title": "GLaM: Efficient Scaling with Mixture-of-Experts",
            "authors": "Du et al.",
            "year": 2022,
            "venue": "ICML",
            "tags": [
              "支线"
            ],
            "arxiv": "2112.06905"
          },
          {
            "title": "DeepSeek-V3 Multi-Token Prediction (MTP)",
            "authors": "DeepSeek-AI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2412.19437"
          },
          {
            "title": "Muon: Muon is Scalable for LLM Training (Kimi 核心优化器)",
            "authors": "Jordan et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2502.16982"
          }
        ],
        "forward": []
      },
      {
        "id": "posenc",
        "name": "位置编码",
        "mainline": [
          {
            "title": "Sinusoidal Positional Encoding",
            "authors": "Vaswani et al.",
            "year": 2017,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "1706.03762"
          },
          {
            "title": "RoPE: Rotary Position Embedding",
            "authors": "Su et al.",
            "year": 2021,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2104.09864"
          },
          {
            "title": "ALiBi: Train Short, Test Long",
            "authors": "Press et al.",
            "year": 2022,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2108.12409"
          },
          {
            "title": "YaRN: Efficient Context Window Extension of LLMs",
            "authors": "Peng et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2309.00071"
          },
          {
            "title": "NoPE + 10M Token Context (Llama 4 Scout)",
            "authors": "Kazemnejad et al. / Meta",
            "year": 2025,
            "venue": "ICML / Meta",
            "tags": [
              "前沿"
            ],
            "arxiv": "2305.19466"
          }
        ],
        "branches": [
          {
            "title": "ReRoPE / SelfExtend: Training-Free Length Extension",
            "authors": "Su et al. / Jin et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2401.01325"
          }
        ],
        "forward": []
      },
      {
        "id": "norm",
        "name": "归一化与激活函数",
        "mainline": [
          {
            "title": "Layer Normalization",
            "authors": "Ba et al.",
            "year": 2016,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "1607.06450"
          },
          {
            "title": "RMSNorm: Root Mean Square Layer Normalization",
            "authors": "Zhang & Sennrich",
            "year": 2019,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "1910.07467"
          },
          {
            "title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers",
            "authors": "Wang et al.",
            "year": 2022,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2203.00555"
          },
          {
            "title": "SwiGLU / GLU Variants (成为行业标配)",
            "authors": "Shazeer et al.",
            "year": 2020,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2002.05202"
          },
          {
            "title": "mHC: Manifold-Constrained Hyper-Connections (DeepSeek-V4 基础)",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2512.24880"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "ssm",
        "name": "新兴架构 (SSM / Linear Attention)",
        "mainline": [
          {
            "title": "S4: Efficiently Modeling Long Sequences with Structured State Spaces",
            "authors": "Gu et al.",
            "year": 2022,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2111.00396"
          },
          {
            "title": "Mamba: Linear-Time Sequence Modeling with Selective State Spaces",
            "authors": "Gu & Dao",
            "year": 2023,
            "venue": "COLM",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2312.00752"
          },
          {
            "title": "RWKV: Reinventing RNNs for the Transformer Era",
            "authors": "Peng et al.",
            "year": 2023,
            "venue": "EMNLP",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2305.13048"
          },
          {
            "title": "Mamba-2: Transformers are SSMs",
            "authors": "Dao & Gu",
            "year": 2024,
            "venue": "ICML",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2405.21060"
          },
          {
            "title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)",
            "authors": "Moonshot AI",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2602.02276"
          },
          {
            "title": "Titans: Learning to Memorize at Test Time (Neural Memory)",
            "authors": "Behrouz et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.00663"
          }
        ],
        "branches": [
          {
            "title": "RetNet: Retentive Network — A Successor to Transformer",
            "authors": "Sun et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2307.08621"
          },
          {
            "title": "Hymba: Hybrid Mamba-Transformer",
            "authors": "NVIDIA / Meta",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2411.13676"
          }
        ],
        "forward": [
          {
            "title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)",
            "authors": "Behrouz et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前瞻"
            ],
            "arxiv": "2501.00663"
          },
          {
            "title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)",
            "authors": "Zhu et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前瞻"
            ],
            "arxiv": "2406.02528"
          }
        ]
      }
    ]
  },
  "multi": {
    "id": "multi",
    "name": "多模态",
    "icon": "🖼️",
    "desc": "视觉-语言、音频、视频、Omni 统一模态",
    "color": "multi",
    "areas": [
      {
        "id": "vision",
        "name": "视觉-语言模型",
        "mainline": [
          {
            "title": "CLIP: Learning Transferable Visual Models (对比学习奠基)",
            "authors": "Radford et al.",
            "year": 2021,
            "venue": "ICML",
            "tags": [
              "起点"
            ],
            "arxiv": "2103.00020"
          },
          {
            "title": "BLIP-2: Bootstrapping Language-Image Pre-training (Q-Former)",
            "authors": "Li et al.",
            "year": 2023,
            "venue": "ICML",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2301.12597"
          },
          {
            "title": "LLaVA / LLaVA-1.5: Visual Instruction Tuning",
            "authors": "Liu et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2304.08485"
          },
          {
            "title": "GPT-4o System Card: 原生多模态 (文本/图像/音频端到端)",
            "authors": "OpenAI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2410.21276"
          },
          {
            "title": "Qwen3-VL / InternVL3 / Kimi K2.5 Visual: 开源 SOTA 视觉-语言",
            "authors": "Alibaba / Shanghai AI Lab / Moonshot",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2602.02276"
          }
        ],
        "branches": [
          {
            "title": "SigLIP: Sigmoid Loss for Language Image Pre-training",
            "authors": "Zhai et al.",
            "year": 2023,
            "venue": "ICCV",
            "tags": [
              "支线"
            ],
            "arxiv": "2303.15343"
          },
          {
            "title": "Flamingo: Visual Language Model for Few-Shot Learning",
            "authors": "Alayrac et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2204.14198"
          }
        ],
        "forward": []
      },
      {
        "id": "omni",
        "name": "Omni 统一多模态",
        "mainline": [
          {
            "title": "Gemini 1.0 Technical Report: 原生多模态 (文本/图像/音频/视频)",
            "authors": "Google DeepMind",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "2312.11805"
          },
          {
            "title": "GPT-4o System Card: 端到端 omni 多模态",
            "authors": "OpenAI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2410.21276"
          },
          {
            "title": "Chameleon: Mixed-Modal Early-Fusion Foundation Models",
            "authors": "Team Chameleon (Meta)",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2405.09818"
          },
          {
            "title": "Qwen3.5-Omni Technical Report: 全模态 (文本/图像/音频/视频/语音)",
            "authors": "Alibaba Qwen",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2604.15804"
          }
        ],
        "branches": [
          {
            "title": "EMU3: Next-Token Prediction is All You Need (Meta 统一多模态)",
            "authors": "Wang et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2409.18869"
          }
        ],
        "forward": []
      },
      {
        "id": "audio",
        "name": "音频/语音模型",
        "mainline": [
          {
            "title": "Whisper / Whisper-large-v3: Robust Speech Recognition",
            "authors": "Radford et al.",
            "year": 2023,
            "venue": "ICML",
            "tags": [
              "起点"
            ],
            "arxiv": "2212.04356"
          },
          {
            "title": "Qwen-Audio / Qwen2-Audio: 通用音频理解",
            "authors": "Alibaba",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2311.07919"
          },
          {
            "title": "CosyVoice 2: Scalable Streaming Speech Synthesis",
            "authors": "Du et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2412.10117"
          }
        ],
        "branches": [
          {
            "title": "Moshi: Real-time Speech Dialogue (Kyutai)",
            "authors": "Défossez et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2410.00037"
          }
        ],
        "forward": []
      },
      {
        "id": "video",
        "name": "视频理解",
        "mainline": [
          {
            "title": "Video-LLaMA / VideoChat: 视频-语言对话",
            "authors": "Zhang et al.",
            "year": 2023,
            "venue": "EMNLP",
            "tags": [
              "起点"
            ],
            "arxiv": "2306.02858"
          },
          {
            "title": "Gemini 1.5 Pro: 1M token 长上下文视频理解",
            "authors": "Google DeepMind",
            "year": 2024,
            "venue": "Google",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2403.05530"
          },
          {
            "title": "LLaVA-OneVision / Qwen3-VL 视频能力",
            "authors": "Liu et al. / Qwen",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2408.03326"
          }
        ],
        "branches": [],
        "forward": []
      }
    ]
  },
  "data": {
    "id": "data",
    "name": "数据工程",
    "icon": "📊",
    "desc": "采集清洗、数据配比、合成数据、质量筛选",
    "color": "data",
    "areas": [
      {
        "id": "dedup",
        "name": "数据清洗与去重",
        "mainline": [
          {
            "title": "Deduplicating Training Data Makes Language Models Better",
            "authors": "Lee et al.",
            "year": 2022,
            "venue": "ACL",
            "tags": [
              "起点"
            ],
            "arxiv": "2107.06499"
          },
          {
            "title": "The Pile: An 800GB Dataset of Diverse Text",
            "authors": "Gao et al.",
            "year": 2020,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2101.00027"
          },
          {
            "title": "CCNet / RefinedWeb / FineWeb: 大规模高质量 Web 数据",
            "authors": "Penedo et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2306.01116"
          },
          {
            "title": "DCLM: DataComp-LM — 数据筛选标准化基准",
            "authors": "Li et al.",
            "year": 2024,
            "venue": "NeurIPS",
            "tags": [
              "前沿"
            ],
            "arxiv": "2406.11794"
          },
          {
            "title": "Dolma / OLMo / datatrove: 全开放数据处理管线",
            "authors": "Soldaini et al.",
            "year": 2024,
            "venue": "ACL",
            "tags": [
              "前沿"
            ],
            "arxiv": "2402.00159"
          }
        ],
        "branches": [
          {
            "title": "C4 / mC4: Colossal Clean Crawled Corpus (T5)",
            "authors": "Raffel et al.",
            "year": 2020,
            "venue": "JMLR",
            "tags": [
              "支线"
            ],
            "arxiv": "1910.10683"
          }
        ],
        "forward": []
      },
      {
        "id": "mixing",
        "name": "数据配比与课程学习",
        "mainline": [
          {
            "title": "DoReMi: Optimizing Data Mixtures Speeds Up LM Pretraining",
            "authors": "Xie et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2305.10429"
          },
          {
            "title": "DOGE: Domain-General Reweighting for LLM Pretraining",
            "authors": "Fan et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2405.13063"
          }
        ],
        "branches": [
          {
            "title": "Scaling Data-Constrained Language Models (多轮重复训练)",
            "authors": "Muennighoff et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.16264"
          }
        ],
        "forward": []
      },
      {
        "id": "synthesis",
        "name": "合成数据生成",
        "mainline": [
          {
            "title": "Self-Instruct: Aligning LM with Self-Generated Instructions",
            "authors": "Wang et al.",
            "year": 2023,
            "venue": "ACL",
            "tags": [
              "起点"
            ],
            "arxiv": "2212.10560"
          },
          {
            "title": "Evol-Instruct (WizardLM) / Orca: 渐进式指令演化",
            "authors": "Xu et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2304.12244"
          },
          {
            "title": "Magpie: Alignment Data Synthesis from Scratch",
            "authors": "Xu et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2406.06859"
          },
          {
            "title": "Phi-4: 合成数据驱动的推理训练 + 代码合成",
            "authors": "Microsoft Research",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2412.08905"
          },
          {
            "title": "DeepSeek-R1 冷启动数据合成 (长推理链)",
            "authors": "DeepSeek-AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.12948"
          }
        ],
        "branches": [
          {
            "title": "Alpaca: A Strong, Replicable Instruction-Following Model (Stanford CRFM)",
            "authors": "Taori et al.",
            "year": 2023,
            "venue": "Stanford",
            "tags": [
              "支线"
            ],
            "arxiv": "2303.08774"
          }
        ],
        "forward": []
      }
    ]
  },
  "pretrain": {
    "id": "pretrain",
    "name": "预训练",
    "icon": "🔥",
    "desc": "训练目标、分布式并行、训练稳定性、Scaling Law、长上下文",
    "color": "pretrain",
    "areas": [
      {
        "id": "scaling",
        "name": "Scaling Law 与计算优化",
        "mainline": [
          {
            "title": "Scaling Laws for Neural Language Models (Kaplan)",
            "authors": "Kaplan et al.",
            "year": 2020,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "2001.08361"
          },
          {
            "title": "Chinchilla: Training Compute-Optimal Large Language Models",
            "authors": "Hoffmann et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2203.15556"
          },
          {
            "title": "Scaling Data-Constrained Language Models (重复训练 scaling)",
            "authors": "Muennighoff et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2305.16264"
          },
          {
            "title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens",
            "authors": "Alibaba / DeepSeek",
            "year": 2025,
            "venue": "Industry",
            "tags": [
              "前沿"
            ],
            "arxiv": "2503.20630"
          },
          {
            "title": "Inference-Aware Scaling Laws (部署成本纳入 scaling)",
            "authors": "Sardana & Frankle",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2401.00448"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "distributed",
        "name": "分布式训练系统",
        "mainline": [
          {
            "title": "Megatron-LM: Training Multi-Billion Parameter Models (TP/PP)",
            "authors": "Shoeybi et al.",
            "year": 2020,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "1909.08053"
          },
          {
            "title": "ZeRO / ZeRO++ / ZeRO-3: Memory Optimizations (DeepSpeed)",
            "authors": "Rajbhandari et al.",
            "year": 2020,
            "venue": "SC",
            "tags": [
              "关键节点"
            ],
            "arxiv": "1910.02054"
          },
          {
            "title": "3D 并行 + 序列并行 + 专家并行 (DeepSeek-V3 部署)",
            "authors": "DeepSeek-AI",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2412.19437"
          }
        ],
        "branches": [
          {
            "title": "GPipe / PipeDream: Pipeline Parallelism 基础",
            "authors": "Huang et al.",
            "year": 2019,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "1811.06965"
          },
          {
            "title": "Ring Attention / Striped Attention: 长序列分布式训练",
            "authors": "Liu et al. / Brandon et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2310.01889"
          }
        ],
        "forward": []
      },
      {
        "id": "stability",
        "name": "训练稳定性与精度",
        "mainline": [
          {
            "title": "Mixed Precision Training (FP16 → 标准)",
            "authors": "Micikevicius et al.",
            "year": 2018,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "1710.03740"
          },
          {
            "title": "FP8 Training for LLMs (H100/B200 原生支持)",
            "authors": "Micikevicius et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2310.18313"
          }
        ],
        "branches": [
          {
            "title": "GLM / BLOOM: 千卡训练稳定性工程经验",
            "authors": "Zeng et al. / BigScience",
            "year": 2023,
            "venue": "KDD",
            "tags": [
              "支线"
            ],
            "arxiv": "2210.02414"
          }
        ],
        "forward": []
      }
    ]
  },
  "post": {
    "id": "post",
    "name": "后训练",
    "icon": "🎯",
    "desc": "SFT、RLHF/DPO/GRPO、推理增强、安全对齐",
    "color": "post",
    "areas": [
      {
        "id": "sft",
        "name": "SFT 监督微调",
        "mainline": [
          {
            "title": "Finetuned Language Models Are Zero-Shot Learners (FLAN)",
            "authors": "Chung et al.",
            "year": 2022,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2109.01652"
          },
          {
            "title": "LIMA: Less Is More for Alignment (1,000 高质量样本)",
            "authors": "Zhou et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2305.11206"
          },
          {
            "title": "Tulu 3 / Orca 3: 系统化后训练管线",
            "authors": "AllenAI / MSR",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2411.15124"
          }
        ],
        "branches": [
          {
            "title": "LoRA: Low-Rank Adaptation of LLMs",
            "authors": "Hu et al.",
            "year": 2022,
            "venue": "ICLR",
            "tags": [
              "支线"
            ],
            "arxiv": "2106.09685"
          },
          {
            "title": "QLoRA: Efficient Finetuning of Quantized LLMs",
            "authors": "Dettmers et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.14314"
          }
        ],
        "forward": []
      },
      {
        "id": "alignment",
        "name": "偏好对齐 (RLHF → DPO → GRPO)",
        "mainline": [
          {
            "title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)",
            "authors": "Ouyang et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2203.02155"
          },
          {
            "title": "Constitutional AI: Harmlessness from AI Feedback",
            "authors": "Bai et al.",
            "year": 2022,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2212.08073"
          },
          {
            "title": "DPO: Direct Preference Optimization",
            "authors": "Rafailov et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2305.18290"
          },
          {
            "title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)",
            "authors": "DeepSeek-AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.12948"
          },
          {
            "title": "SimPO / ORPO / KTO: 参考模型不可知的对齐方法",
            "authors": "Meng et al.",
            "year": 2024,
            "venue": "ICML",
            "tags": [
              "前沿"
            ],
            "arxiv": "2405.14734"
          }
        ],
        "branches": [
          {
            "title": "RLAIF: Constitutional AI — RL from AI Feedback",
            "authors": "Bai et al.",
            "year": 2022,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2212.08073"
          }
        ],
        "forward": []
      },
      {
        "id": "reasoning",
        "name": "推理增强 (Reasoning)",
        "mainline": [
          {
            "title": "Chain-of-Thought Prompting Elicits Reasoning in LLMs",
            "authors": "Wei et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2201.11903"
          },
          {
            "title": "Self-Consistency Improves Chain of Thought Reasoning",
            "authors": "Wang et al.",
            "year": 2023,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2203.11171"
          },
          {
            "title": "STaR: Self-Taught Reasoner / ReST (自学习推理链)",
            "authors": "Zelikman et al.",
            "year": 2022,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2203.14465"
          },
          {
            "title": "OpenAI o1 System Card / o3 System Card (推理时 Scaling)",
            "authors": "OpenAI",
            "year": 2024,
            "venue": "OpenAI",
            "tags": [
              "前沿"
            ],
            "arxiv": "2412.16720"
          },
          {
            "title": "DeepSeek-R1: 开源推理模型 (RL + Cold-Start SFT)",
            "authors": "DeepSeek-AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.12948"
          },
          {
            "title": "Kimi K2-Thinking (扩展推理, 256K上下文)",
            "authors": "Moonshot AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2507.20534"
          }
        ],
        "branches": [
          {
            "title": "Tree of Thoughts: Deliberate Problem Solving with LLMs",
            "authors": "Yao et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.10601"
          },
          {
            "title": "PRM/ORM: Let's Verify Step by Step (过程/结果奖励模型)",
            "authors": "Lightman et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.20050"
          }
        ],
        "forward": [
          {
            "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking",
            "authors": "Zelikman et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前瞻"
            ],
            "arxiv": "2403.09629"
          }
        ]
      }
    ]
  },
  "compress": {
    "id": "compress",
    "name": "模型压缩",
    "icon": "📦",
    "desc": "量化、剪枝、蒸馏、KV Cache 压缩",
    "color": "compress",
    "areas": [
      {
        "id": "quant",
        "name": "量化 (Quantization)",
        "mainline": [
          {
            "title": "GPTQ: Accurate Post-Training Quantization for GPT",
            "authors": "Frantar et al.",
            "year": 2023,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2210.17323"
          },
          {
            "title": "AWQ: Activation-aware Weight Quantization",
            "authors": "Lin et al.",
            "year": 2024,
            "venue": "MLSys",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2306.00978"
          },
          {
            "title": "SmoothQuant: Accurate and Efficient Post-Training Quantization",
            "authors": "Xiao et al.",
            "year": 2024,
            "venue": "ICML",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2211.10438"
          },
          {
            "title": "QuaRot / SpinQuant: Outlier-Free 4-bit Quantization",
            "authors": "Ashkboos et al.",
            "year": 2025,
            "venue": "ICLR",
            "tags": [
              "前沿"
            ],
            "arxiv": "2404.00456"
          },
          {
            "title": "DeepSeek-V4 FP4 QAT: 4-bit 量化感知训练 (99.7% recall)",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          }
        ],
        "branches": [
          {
            "title": "FP8 / FP4 推理 (NVIDIA B200 原生支持)",
            "authors": "Micikevicius et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2310.18313"
          }
        ],
        "forward": [
          {
            "title": "BitNet b1.58: 1.58-bit LLM (三值量化 {−1,0,1})",
            "authors": "Wang et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前瞻"
            ],
            "arxiv": "2402.17764"
          }
        ]
      },
      {
        "id": "prune",
        "name": "剪枝 (Pruning)",
        "mainline": [
          {
            "title": "SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot",
            "authors": "Frantar & Alistarh",
            "year": 2023,
            "venue": "ICML",
            "tags": [
              "起点"
            ],
            "arxiv": "2301.00774"
          },
          {
            "title": "Wanda: A Simple and Effective Pruning Approach for LLMs",
            "authors": "Sun et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2306.11695"
          },
          {
            "title": "SliceGPT: Compress LLMs by Deleting Rows/Columns",
            "authors": "Ashkboos et al.",
            "year": 2024,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2401.15024"
          },
          {
            "title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝",
            "authors": "Ma et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2305.11627"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "distill",
        "name": "知识蒸馏",
        "mainline": [
          {
            "title": "Distilling the Knowledge in a Neural Network",
            "authors": "Hinton et al.",
            "year": 2015,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "1503.02531"
          },
          {
            "title": "Orca: Progressive Learning from Complex Explanation Traces",
            "authors": "Mukherjee et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2306.02707"
          },
          {
            "title": "DeepSeek-R1 蒸馏 (R1 → Qwen/Llama 小模型)",
            "authors": "DeepSeek-AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2501.12948"
          },
          {
            "title": "DeepSeek-V4 On-Policy Distillation: 10 Teacher Models → 1 Student",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "kvcache",
        "name": "KV Cache 压缩",
        "mainline": [
          {
            "title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks",
            "authors": "Xiao et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2309.17453"
          },
          {
            "title": "H2O: Heavy-Hitter Oracle for Efficient KV Cache Eviction",
            "authors": "Zhang et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2306.14048"
          },
          {
            "title": "FastGen / KIVI / CacheGen: 自适应 KV 压缩与量化",
            "authors": "Ge et al.",
            "year": 2024,
            "venue": "ICLR / SIGCOMM",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2310.07240"
          },
          {
            "title": "DeepSeek-V4 CSA+HCA: KV Cache 降至 V3.2 的 10%, FLOPs 降至 27%",
            "authors": "DeepSeek-AI",
            "year": 2026,
            "venue": "DeepSeek",
            "tags": [
              "前沿"
            ],
            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
          },
          {
            "title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)",
            "authors": "Ainslie et al.",
            "year": 2023,
            "venue": "EMNLP",
            "tags": [
              "前沿"
            ],
            "arxiv": "2305.13245"
          }
        ],
        "branches": [],
        "forward": []
      }
    ]
  },
  "deploy": {
    "id": "deploy",
    "name": "部署推理",
    "icon": "🚀",
    "desc": "推理引擎、加速技术、服务化、边缘部署、成本优化",
    "color": "deploy",
    "areas": [
      {
        "id": "engine",
        "name": "推理引擎与框架",
        "mainline": [
          {
            "title": "vLLM: Easy, Fast, and Cheap LLM Serving with PagedAttention",
            "authors": "Kwon et al.",
            "year": 2023,
            "venue": "SOSP",
            "tags": [
              "起点"
            ],
            "arxiv": "2309.06180"
          },
          {
            "title": "SGLang: Efficient LLM Programming and Serving",
            "authors": "Zheng et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2312.07104"
          }
        ],
        "branches": [
          {
            "title": "MII / DeepSpeed-FastGen: Dynamic SplitFuse",
            "authors": "Microsoft",
            "year": 2024,
            "venue": "Industry",
            "tags": [
              "支线"
            ],
            "arxiv": "2401.08671"
          }
        ],
        "forward": []
      },
      {
        "id": "accel",
        "name": "推理加速技术",
        "mainline": [
          {
            "title": "Speculative Decoding: Fast Inference from Transformers",
            "authors": "Leviathan et al. / Chen et al.",
            "year": 2023,
            "venue": "ICML",
            "tags": [
              "起点"
            ],
            "arxiv": "2211.17192"
          },
          {
            "title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads",
            "authors": "Cai et al.",
            "year": 2024,
            "venue": "ICML",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2401.10774"
          },
          {
            "title": "EAGLE / EAGLE-2: 推测解码框架 (无需 draft model)",
            "authors": "Li et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2401.15077"
          },
          {
            "title": "Continuous Batching + Prefill-Decode Disaggregation",
            "authors": "Patel et al. / Yu et al.",
            "year": 2024,
            "venue": "OSDI",
            "tags": [
              "前沿"
            ],
            "arxiv": "2401.08671"
          },
          {
            "title": "DeepSeek-V3 Multi-Token Prediction (MTP) + V4 Flash 推理",
            "authors": "DeepSeek-AI",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2412.19437"
          }
        ],
        "branches": [
          {
            "title": "FlashDecoding / FlashInfer: GPU 算子级加速",
            "authors": "Dao et al. / Ye et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2310.12049"
          },
          {
            "title": "KV Cache Offloading (GPU ↔ CPU 动态迁移)",
            "authors": "Sheng et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2303.06865"
          }
        ],
        "forward": []
      }
    ]
  },
  "agent": {
    "id": "agent",
    "name": "Agent & 应用",
    "icon": "🤖",
    "desc": "Tool Use、RAG、Multi-Agent、MCP/A2A、Computer Use",
    "color": "agent",
    "areas": [
      {
        "id": "react",
        "name": "Agent 核心 (ReAct / Tool Use / Computer Use)",
        "mainline": [
          {
            "title": "ReAct: Synergizing Reasoning and Acting in Language Models",
            "authors": "Yao et al.",
            "year": 2023,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2210.03629"
          },
          {
            "title": "Toolformer: Language Models Can Teach Themselves to Use Tools",
            "authors": "Schick et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2302.04761"
          },
          {
            "title": "SWE-Agent: Agent-Computer Interfaces for Software Engineering",
            "authors": "Yang et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2405.15793"
          },
          {
            "title": "SEAgent: Self-Evolving Computer Use Agent (GRPO 自主学习)",
            "authors": "Sun et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2508.04700"
          }
        ],
        "branches": [
          {
            "title": "Voyager: Open-Ended Embodied Agent with LLMs (Minecraft)",
            "authors": "Wang et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.16291"
          },
          {
            "title": "Claude Opus 4 & Sonnet 4 System Card (Agent 安全评估)",
            "authors": "Anthropic",
            "year": 2025,
            "venue": "Anthropic",
            "tags": [
              "支线"
            ],
            "pdf": "https://www-cdn.anthropic.com/6be99a52cb68eb70eb9572b4cafad13df32ed995.pdf"
          }
        ],
        "forward": []
      },
      {
        "id": "rag",
        "name": "RAG 检索增强生成",
        "mainline": [
          {
            "title": "RAG: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",
            "authors": "Lewis et al.",
            "year": 2020,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2005.11401"
          },
          {
            "title": "Self-RAG: Learning to Retrieve, Generate, and Critique",
            "authors": "Asai et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2310.11511"
          },
          {
            "title": "GraphRAG: From Local to Global — Graph-based RAG",
            "authors": "Edge et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2404.16130"
          },
          {
            "title": "Agentic RAG / Corrective RAG / Adaptive RAG",
            "authors": "Yan et al. / Asai et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2401.15884"
          }
        ],
        "branches": [
          {
            "title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels",
            "authors": "Gao et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2212.10496"
          },
          {
            "title": "ColBERT / ColPali: Late-Interaction / Visual RAG",
            "authors": "Khattab et al.",
            "year": 2022,
            "venue": "SIGIR",
            "tags": [
              "支线"
            ],
            "arxiv": "2112.01488"
          }
        ],
        "forward": []
      },
      {
        "id": "multiagent",
        "name": "Multi-Agent & 协议",
        "mainline": [
          {
            "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
            "authors": "Wu et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "2308.08155"
          },
          {
            "title": "MetaGPT / ChatDev / CrewAI: 多 Agent 协作框架",
            "authors": "Hong et al. / Qian et al.",
            "year": 2024,
            "venue": "ACL",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2308.00352"
          },
          {
            "title": "Kimi K2.5 Agent Swarm: 100 子 Agent / 1,500 工具调用",
            "authors": "Moonshot AI",
            "year": 2026,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2602.02276"
          }
        ],
        "branches": [
          {
            "title": "Gorilla: Large Language Model Connected with Massive APIs",
            "authors": "Patil et al.",
            "year": 2023,
            "venue": "ICML",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.15334"
          }
        ],
        "forward": []
      }
    ]
  },
  "eval": {
    "id": "eval",
    "name": "评估体系",
    "icon": "📏",
    "desc": "通用、推理、编码、Agent、安全评估",
    "color": "eval",
    "areas": [
      {
        "id": "general",
        "name": "通用能力评估",
        "mainline": [
          {
            "title": "MMLU: Measuring Massive Multitask Language Understanding",
            "authors": "Hendrycks et al.",
            "year": 2021,
            "venue": "ICLR",
            "tags": [
              "起点"
            ],
            "arxiv": "2009.03300"
          },
          {
            "title": "BIG-bench: Beyond the Imitation Game (204 tasks)",
            "authors": "Srivastava et al.",
            "year": 2023,
            "venue": "TMLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2206.04615"
          },
          {
            "title": "MMLU-Pro: A More Robust and Challenging Benchmark",
            "authors": "Wang et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2406.01574"
          },
          {
            "title": "LiveBench: A Challenging, Contamination-Free LLM Benchmark",
            "authors": "White et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2406.19314"
          },
          {
            "title": "Humanity's Last Exam (HLE): 人类知识极限测试",
            "authors": "Phan et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.14249"
          }
        ],
        "branches": [
          {
            "title": "C-Eval / CMMLU: 中文评估",
            "authors": "Huang et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "支线"
            ],
            "arxiv": "2305.08322"
          }
        ],
        "forward": []
      },
      {
        "id": "reasoning",
        "name": "推理 & 数学评估",
        "mainline": [
          {
            "title": "GSM8K / MATH: 数学推理基础",
            "authors": "Cobbe et al. / Hendrycks et al.",
            "year": 2021,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2103.03874"
          },
          {
            "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
            "authors": "Rein et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2311.12022"
          },
          {
            "title": "BBH: Challenging BIG-Bench Tasks (BIG-Bench Hard)",
            "authors": "Suzgun et al.",
            "year": 2023,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2210.09261"
          },
          {
            "title": "HLE (Humanity's Last Exam): 3000问题极限测试",
            "authors": "Phan et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2501.14249"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "code",
        "name": "编码评估",
        "mainline": [
          {
            "title": "HumanEval / MBPP: 函数级代码生成",
            "authors": "Chen et al. / Austin et al.",
            "year": 2021,
            "venue": "NeurIPS",
            "tags": [
              "起点"
            ],
            "arxiv": "2107.03374"
          },
          {
            "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?",
            "authors": "Jimenez et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2310.06770"
          },
          {
            "title": "LiveCodeBench: Holistic and Contamination-Free Coding",
            "authors": "Jain et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2408.07935"
          },
          {
            "title": "BigCodeBench: Benchmarking Code Generation with Diverse Tasks",
            "authors": "Zhuo et al.",
            "year": 2025,
            "venue": "arXiv",
            "tags": [
              "前沿"
            ],
            "arxiv": "2406.15877"
          }
        ],
        "branches": [],
        "forward": []
      },
      {
        "id": "agent_eval",
        "name": "Agent 评估",
        "mainline": [
          {
            "title": "BFCL: Berkeley Function Calling Leaderboard",
            "authors": "Yan et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "起点"
            ],
            "arxiv": "2402.16053"
          },
          {
            "title": "τ-bench: Agent Tool Use & Task Completion",
            "authors": "Yao et al.",
            "year": 2024,
            "venue": "arXiv",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2406.12045"
          },
          {
            "title": "WebArena / VisualWebArena: 真实 Web 任务 Agent 评估",
            "authors": "Zhou et al.",
            "year": 2024,
            "venue": "ICLR",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2307.13854"
          },
          {
            "title": "GAIA: A Benchmark for General AI Assistants",
            "authors": "Mialon et al.",
            "year": 2023,
            "venue": "NeurIPS",
            "tags": [
              "关键节点"
            ],
            "arxiv": "2311.12983"
          }
        ],
        "branches": [],
        "forward": []
      }
    ]
  }
}