Files
llm-library/data/papers.json

2256 lines
62 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"arch": {
"id": "arch",
"name": "模型架构设计",
"icon": "🏗️",
"desc": "Transformer 变体、注意力机制、MoE、位置编码、SSM 等",
"color": "arch",
"areas": [
{
"id": "transformer",
"name": "Transformer 核心架构",
"mainline": [
{
"title": "Attention Is All You Need",
"authors": "Vaswani et al.",
"year": 2017,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "1706.03762"
},
{
"title": "GPT-2 / GPT-3: Language Models are Few-Shot Learners",
"authors": "Radford et al. / Brown et al.",
"year": 2020,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2005.14165"
},
{
"title": "LLaMA: Open and Efficient Foundation Language Models",
"authors": "Touvron et al.",
"year": 2023,
"venue": "Meta AI",
"tags": [
"关键节点"
],
"arxiv": "2302.13971"
},
{
"title": "DeepSeek-V3 / V3.2: MoE + MLA + MTP + Sparse Attention",
"authors": "DeepSeek-AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2412.19437"
},
{
"title": "Kimi K2: Open Agentic Intelligence (1T MoE, 128K ctx, Muon optimizer)",
"authors": "Moonshot AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2507.20534"
},
{
"title": "Llama 3 / Llama 3.1: The Llama 3 Herd of Models (405B dense, multilingual)",
"authors": "Meta AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2407.21783"
},
{
"title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
},
{
"title": "Kimi K2.5: Visual Agentic Intelligence (native multimodal, Agent Swarm)",
"authors": "Moonshot AI",
"year": 2026,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2602.02276"
}
],
"branches": [
{
"title": "T5: Exploring the Limits of Transfer Learning",
"authors": "Raffel et al.",
"year": 2020,
"venue": "JMLR",
"tags": [
"支线"
],
"arxiv": "1910.10683"
},
{
"title": "PaLM / PaLM-2: Pathways Language Model",
"authors": "Chowdhery et al.",
"year": 2022,
"venue": "JMLR",
"tags": [
"支线"
],
"arxiv": "2204.02311"
},
{
"title": "Gemma 2 / Gemma 3 系列",
"authors": "Google DeepMind",
"year": 2024,
"venue": "Google",
"tags": [
"支线"
],
"arxiv": "2408.00118"
}
],
"forward": []
},
{
"id": "attention",
"name": "注意力机制演进",
"mainline": [
{
"title": "Multi-Head Attention (MHA)",
"authors": "Vaswani et al.",
"year": 2017,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "1706.03762"
},
{
"title": "MQA: Fast Transformer Decoding — One Write-Head Is All You Need",
"authors": "Shazeer",
"year": 2019,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "1911.02150"
},
{
"title": "GQA: Training Generalized Multi-Query Transformer Models",
"authors": "Ainslie et al.",
"year": 2023,
"venue": "EMNLP",
"tags": [
"关键节点"
],
"arxiv": "2305.13245"
},
{
"title": "MLA: Multi-head Latent Attention (DeepSeek-V2/V3)",
"authors": "DeepSeek-AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2405.04434"
},
{
"title": "FlashAttention-3 / Sparse Attention 工程化",
"authors": "Dao et al. / DeepSeek",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2407.08608"
},
{
"title": "MiniMax-01: Lightning Attention + MoE (456B, 45.9B active)",
"authors": "MiniMax",
"year": 2025,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2501.08313"
},
{
"title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
}
],
"branches": [
{
"title": "FlashAttention-1 / FlashAttention-2: IO-Aware Exact Attention",
"authors": "Dao et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2205.14135"
},
{
"title": "Differential Transformer (DIFF Transformer)",
"authors": "Ye et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2410.05258"
}
],
"forward": [
{
"title": "Engram: Conditional Memory via Scalable Lookup (N-gram O(1), 分离记忆与推理)",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2601.07372"
}
]
},
{
"id": "moe",
"name": "MoE 混合专家",
"mainline": [
{
"title": "Sparsely-Gated MoE Layer: Outrageously Large Neural Networks",
"authors": "Shazeer et al.",
"year": 2017,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "1701.06538"
},
{
"title": "Switch Transformers: Scaling to Trillion Parameter Models",
"authors": "Fedus et al.",
"year": 2022,
"venue": "JMLR",
"tags": [
"关键节点"
],
"arxiv": "2101.03961"
},
{
"title": "DeepSeekMoE: Fine-Grained Expert Segmentation + Shared Experts",
"authors": "DeepSeek-AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2401.06066"
},
{
"title": "Mixtral of Experts (8x7B, 开放 MoE)",
"authors": "Jiang et al.",
"year": 2024,
"venue": "Mistral AI",
"tags": [
"关键节点"
],
"arxiv": "2401.04088"
},
{
"title": "DeepSeek-V4: 1.6T参数 MoE + 领域专家独立训练后合并 + On-Policy Distillation",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
}
],
"branches": [
{
"title": "GLaM: Efficient Scaling with Mixture-of-Experts",
"authors": "Du et al.",
"year": 2022,
"venue": "ICML",
"tags": [
"支线"
],
"arxiv": "2112.06905"
},
{
"title": "DeepSeek-V3 Multi-Token Prediction (MTP)",
"authors": "DeepSeek-AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2412.19437"
},
{
"title": "Muon: Muon is Scalable for LLM Training (Kimi 核心优化器)",
"authors": "Jordan et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2502.16982"
}
],
"forward": []
},
{
"id": "posenc",
"name": "位置编码",
"mainline": [
{
"title": "Sinusoidal Positional Encoding",
"authors": "Vaswani et al.",
"year": 2017,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "1706.03762"
},
{
"title": "RoPE: Rotary Position Embedding",
"authors": "Su et al.",
"year": 2021,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2104.09864"
},
{
"title": "ALiBi: Train Short, Test Long",
"authors": "Press et al.",
"year": 2022,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2108.12409"
},
{
"title": "YaRN: Efficient Context Window Extension of LLMs",
"authors": "Peng et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2309.00071"
},
{
"title": "NoPE + 10M Token Context (Llama 4 Scout)",
"authors": "Kazemnejad et al. / Meta",
"year": 2025,
"venue": "ICML / Meta",
"tags": [
"前沿"
],
"arxiv": "2305.19466"
}
],
"branches": [
{
"title": "ReRoPE / SelfExtend: Training-Free Length Extension",
"authors": "Su et al. / Jin et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2401.01325"
}
],
"forward": []
},
{
"id": "norm",
"name": "归一化与激活函数",
"mainline": [
{
"title": "Layer Normalization",
"authors": "Ba et al.",
"year": 2016,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "1607.06450"
},
{
"title": "RMSNorm: Root Mean Square Layer Normalization",
"authors": "Zhang & Sennrich",
"year": 2019,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "1910.07467"
},
{
"title": "SwiGLU / GLU Variants (成为行业标配)",
"authors": "Shazeer et al.",
"year": 2020,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2002.05202"
},
{
"title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers",
"authors": "Wang et al.",
"year": 2022,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2203.00555"
},
{
"title": "mHC: Manifold-Constrained Hyper-Connections (DeepSeek-V4 基础)",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2512.24880"
}
],
"branches": [],
"forward": []
},
{
"id": "ssm",
"name": "新兴架构 (SSM / Linear Attention)",
"mainline": [
{
"title": "S4: Efficiently Modeling Long Sequences with Structured State Spaces",
"authors": "Gu et al.",
"year": 2022,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2111.00396"
},
{
"title": "Mamba: Linear-Time Sequence Modeling with Selective State Spaces",
"authors": "Gu & Dao",
"year": 2023,
"venue": "COLM",
"tags": [
"关键节点"
],
"arxiv": "2312.00752"
},
{
"title": "RWKV: Reinventing RNNs for the Transformer Era",
"authors": "Peng et al.",
"year": 2023,
"venue": "EMNLP",
"tags": [
"关键节点"
],
"arxiv": "2305.13048"
},
{
"title": "Mamba-2: Transformers are SSMs",
"authors": "Dao & Gu",
"year": 2024,
"venue": "ICML",
"tags": [
"关键节点"
],
"arxiv": "2405.21060"
},
{
"title": "Titans: Learning to Memorize at Test Time (Neural Memory)",
"authors": "Behrouz et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.00663"
},
{
"title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)",
"authors": "Moonshot AI",
"year": 2026,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2602.02276"
}
],
"branches": [
{
"title": "RetNet: Retentive Network — A Successor to Transformer",
"authors": "Sun et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2307.08621"
},
{
"title": "xLSTM: Extended Long Short-Term Memory (sLSTM + mLSTM, matrix memory, linear attention revival)",
"authors": "Beck et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2405.04517"
},
{
"title": "Hymba: Hybrid Mamba-Transformer",
"authors": "NVIDIA / Meta",
"year": 2025,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2411.13676"
}
],
"forward": [
{
"title": "Byte Latent Transformer: Patches Scale Better Than Tokens (tokenization-free, entropy-based patching)",
"authors": "Pagnoni et al. / Meta FAIR",
"year": 2024,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2412.09871"
},
{
"title": "Large Concept Models: Language Modeling in a Sentence Representation Space (SONAR, 200 languages)",
"authors": "Baranchuk et al. / Meta FAIR",
"year": 2024,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2412.08821"
},
{
"title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)",
"authors": "Zhu et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2406.02528"
},
{
"title": "LLaDA: Large Language Diffusion with mAsking (diffusion-based LLM, 8B, non-autoregressive generation)",
"authors": "Nie et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2502.09992"
},
{
"title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)",
"authors": "Behrouz et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2501.00663"
}
]
}
]
},
"multi": {
"id": "multi",
"name": "多模态",
"icon": "🖼️",
"desc": "视觉-语言、音频、视频、Omni 统一模态",
"color": "multi",
"areas": [
{
"id": "vision",
"name": "视觉-语言模型",
"mainline": [
{
"title": "CLIP: Learning Transferable Visual Models (对比学习奠基)",
"authors": "Radford et al.",
"year": 2021,
"venue": "ICML",
"tags": [
"起点"
],
"arxiv": "2103.00020"
},
{
"title": "BLIP-2: Bootstrapping Language-Image Pre-training (Q-Former)",
"authors": "Li et al.",
"year": 2023,
"venue": "ICML",
"tags": [
"关键节点"
],
"arxiv": "2301.12597"
},
{
"title": "LLaVA / LLaVA-1.5: Visual Instruction Tuning",
"authors": "Liu et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2304.08485"
},
{
"title": "GPT-4o System Card: 原生多模态 (文本/图像/音频端到端)",
"authors": "OpenAI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2410.21276"
},
{
"title": "Qwen3-VL / InternVL3 / Kimi K2.5 Visual: 开源 SOTA 视觉-语言",
"authors": "Alibaba / Shanghai AI Lab / Moonshot",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2602.02276"
},
{
"title": "GPT-4V / GPT-4o / GPT-5.4: Multimodal Frontier (vision + speech + text native, real-time voice)",
"authors": "OpenAI",
"year": 2026,
"venue": "OpenAI",
"tags": [
"关键节点"
],
"arxiv": ""
}
],
"branches": [
{
"title": "Flamingo: Visual Language Model for Few-Shot Learning",
"authors": "Alayrac et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2204.14198"
},
{
"title": "SigLIP: Sigmoid Loss for Language Image Pre-training",
"authors": "Zhai et al.",
"year": 2023,
"venue": "ICCV",
"tags": [
"支线"
],
"arxiv": "2303.15343"
}
],
"forward": []
},
{
"id": "omni",
"name": "Omni 统一多模态",
"mainline": [
{
"title": "Gemini 1.0 Technical Report: 原生多模态 (文本/图像/音频/视频)",
"authors": "Google DeepMind",
"year": 2023,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "2312.11805"
},
{
"title": "Chameleon: Mixed-Modal Early-Fusion Foundation Models",
"authors": "Team Chameleon (Meta)",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2405.09818"
},
{
"title": "GPT-4o System Card: 端到端 omni 多模态",
"authors": "OpenAI",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2410.21276"
},
{
"title": "Qwen3.5-Omni Technical Report: 全模态 (文本/图像/音频/视频/语音)",
"authors": "Alibaba Qwen",
"year": 2026,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2604.15804"
}
],
"branches": [
{
"title": "EMU3: Next-Token Prediction is All You Need (Meta 统一多模态)",
"authors": "Wang et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2409.18869"
}
],
"forward": []
},
{
"id": "audio",
"name": "音频/语音模型",
"mainline": [
{
"title": "Whisper / Whisper-large-v3: Robust Speech Recognition",
"authors": "Radford et al.",
"year": 2023,
"venue": "ICML",
"tags": [
"起点"
],
"arxiv": "2212.04356"
},
{
"title": "CosyVoice 2: Scalable Streaming Speech Synthesis",
"authors": "Du et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2412.10117"
},
{
"title": "Qwen-Audio / Qwen2-Audio: 通用音频理解",
"authors": "Alibaba",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2311.07919"
}
],
"branches": [
{
"title": "Moshi: Real-time Speech Dialogue (Kyutai)",
"authors": "Défossez et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2410.00037"
}
],
"forward": []
},
{
"id": "video",
"name": "视频理解",
"mainline": [
{
"title": "Video-LLaMA / VideoChat: 视频-语言对话",
"authors": "Zhang et al.",
"year": 2023,
"venue": "EMNLP",
"tags": [
"起点"
],
"arxiv": "2306.02858"
},
{
"title": "Gemini 1.5 Pro: 1M token 长上下文视频理解",
"authors": "Google DeepMind",
"year": 2024,
"venue": "Google",
"tags": [
"关键节点"
],
"arxiv": "2403.05530"
},
{
"title": "LLaVA-OneVision / Qwen3-VL 视频能力",
"authors": "Liu et al. / Qwen",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2408.03326"
}
],
"branches": [],
"forward": []
}
]
},
"data": {
"id": "data",
"name": "数据工程",
"icon": "📊",
"desc": "采集清洗、数据配比、合成数据、质量筛选",
"color": "data",
"areas": [
{
"id": "dedup",
"name": "数据清洗与去重",
"mainline": [
{
"title": "The Pile: An 800GB Dataset of Diverse Text",
"authors": "Gao et al.",
"year": 2020,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2101.00027"
},
{
"title": "Deduplicating Training Data Makes Language Models Better",
"authors": "Lee et al.",
"year": 2022,
"venue": "ACL",
"tags": [
"起点"
],
"arxiv": "2107.06499"
},
{
"title": "CCNet / RefinedWeb / FineWeb: 大规模高质量 Web 数据",
"authors": "Penedo et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2306.01116"
},
{
"title": "DCLM: DataComp-LM — 数据筛选标准化基准",
"authors": "Li et al.",
"year": 2024,
"venue": "NeurIPS",
"tags": [
"前沿"
],
"arxiv": "2406.11794"
},
{
"title": "Dolma / OLMo / datatrove: 全开放数据处理管线",
"authors": "Soldaini et al.",
"year": 2024,
"venue": "ACL",
"tags": [
"前沿"
],
"arxiv": "2402.00159"
}
],
"branches": [
{
"title": "C4 / mC4: Colossal Clean Crawled Corpus (T5)",
"authors": "Raffel et al.",
"year": 2020,
"venue": "JMLR",
"tags": [
"支线"
],
"arxiv": "1910.10683"
}
],
"forward": []
},
{
"id": "mixing",
"name": "数据配比与课程学习",
"mainline": [
{
"title": "DoReMi: Optimizing Data Mixtures Speeds Up LM Pretraining",
"authors": "Xie et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2305.10429"
},
{
"title": "DOGE: Domain-General Reweighting for LLM Pretraining",
"authors": "Fan et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2405.13063"
}
],
"branches": [
{
"title": "Scaling Data-Constrained Language Models (多轮重复训练)",
"authors": "Muennighoff et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2305.16264"
}
],
"forward": []
},
{
"id": "synthesis",
"name": "合成数据生成",
"mainline": [
{
"title": "Evol-Instruct (WizardLM) / Orca: 渐进式指令演化",
"authors": "Xu et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2304.12244"
},
{
"title": "Self-Instruct: Aligning LM with Self-Generated Instructions",
"authors": "Wang et al.",
"year": 2023,
"venue": "ACL",
"tags": [
"起点"
],
"arxiv": "2212.10560"
},
{
"title": "Magpie: Alignment Data Synthesis from Scratch",
"authors": "Xu et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2406.06859"
},
{
"title": "DeepSeek-R1 冷启动数据合成 (长推理链)",
"authors": "DeepSeek-AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.12948"
},
{
"title": "Phi-4: 合成数据驱动的推理训练 + 代码合成",
"authors": "Microsoft Research",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2412.08905"
}
],
"branches": [
{
"title": "Alpaca: A Strong, Replicable Instruction-Following Model (Stanford CRFM)",
"authors": "Taori et al.",
"year": 2023,
"venue": "Stanford",
"tags": [
"支线"
],
"arxiv": "2303.08774"
}
],
"forward": []
}
]
},
"pretrain": {
"id": "pretrain",
"name": "预训练",
"icon": "🔥",
"desc": "训练目标、分布式并行、训练稳定性、Scaling Law、长上下文",
"color": "pretrain",
"areas": [
{
"id": "scaling",
"name": "Scaling Law 与计算优化",
"mainline": [
{
"title": "Scaling Laws for Neural Language Models (Kaplan)",
"authors": "Kaplan et al.",
"year": 2020,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "2001.08361"
},
{
"title": "Chinchilla: Training Compute-Optimal Large Language Models",
"authors": "Hoffmann et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2203.15556"
},
{
"title": "Scaling Data-Constrained Language Models (重复训练 scaling)",
"authors": "Muennighoff et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2305.16264"
},
{
"title": "Inference-Aware Scaling Laws (部署成本纳入 scaling)",
"authors": "Sardana & Frankle",
"year": 2024,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2401.00448"
},
{
"title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens",
"authors": "Alibaba / DeepSeek",
"year": 2025,
"venue": "Industry",
"tags": [
"前沿"
],
"arxiv": "2503.20630"
}
],
"branches": [],
"forward": []
},
{
"id": "distributed",
"name": "分布式训练系统",
"mainline": [
{
"title": "Megatron-LM: Training Multi-Billion Parameter Models (TP/PP)",
"authors": "Shoeybi et al.",
"year": 2020,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "1909.08053"
},
{
"title": "ZeRO / ZeRO++ / ZeRO-3: Memory Optimizations (DeepSpeed)",
"authors": "Rajbhandari et al.",
"year": 2020,
"venue": "SC",
"tags": [
"关键节点"
],
"arxiv": "1910.02054"
},
{
"title": "3D 并行 + 序列并行 + 专家并行 (DeepSeek-V3 部署)",
"authors": "DeepSeek-AI",
"year": 2024,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2412.19437"
}
],
"branches": [
{
"title": "GPipe / PipeDream: Pipeline Parallelism 基础",
"authors": "Huang et al.",
"year": 2019,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "1811.06965"
},
{
"title": "Ring Attention / Striped Attention: 长序列分布式训练",
"authors": "Liu et al. / Brandon et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2310.01889"
}
],
"forward": []
},
{
"id": "stability",
"name": "训练稳定性与精度",
"mainline": [
{
"title": "Mixed Precision Training (FP16 → 标准)",
"authors": "Micikevicius et al.",
"year": 2018,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "1710.03740"
},
{
"title": "FP8 Training for LLMs (H100/B200 原生支持)",
"authors": "Micikevicius et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2310.18313"
}
],
"branches": [
{
"title": "GLM / BLOOM: 千卡训练稳定性工程经验",
"authors": "Zeng et al. / BigScience",
"year": 2023,
"venue": "KDD",
"tags": [
"支线"
],
"arxiv": "2210.02414"
}
],
"forward": []
}
]
},
"post": {
"id": "post",
"name": "后训练",
"icon": "🎯",
"desc": "SFT、RLHF/DPO/GRPO、推理增强、安全对齐",
"color": "post",
"areas": [
{
"id": "sft",
"name": "SFT 监督微调",
"mainline": [
{
"title": "Finetuned Language Models Are Zero-Shot Learners (FLAN)",
"authors": "Chung et al.",
"year": 2022,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2109.01652"
},
{
"title": "LIMA: Less Is More for Alignment (1,000 高质量样本)",
"authors": "Zhou et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2305.11206"
},
{
"title": "Tulu 3 / Orca 3: 系统化后训练管线",
"authors": "AllenAI / MSR",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2411.15124"
}
],
"branches": [
{
"title": "LoRA: Low-Rank Adaptation of LLMs",
"authors": "Hu et al.",
"year": 2022,
"venue": "ICLR",
"tags": [
"支线"
],
"arxiv": "2106.09685"
},
{
"title": "QLoRA: Efficient Finetuning of Quantized LLMs",
"authors": "Dettmers et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2305.14314"
}
],
"forward": []
},
{
"id": "alignment",
"name": "偏好对齐 (RLHF → DPO → GRPO)",
"mainline": [
{
"title": "Constitutional AI: Harmlessness from AI Feedback",
"authors": "Bai et al.",
"year": 2022,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2212.08073"
},
{
"title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)",
"authors": "Ouyang et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2203.02155"
},
{
"title": "DPO: Direct Preference Optimization",
"authors": "Rafailov et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2305.18290"
},
{
"title": "SimPO / ORPO / KTO: 参考模型不可知的对齐方法",
"authors": "Meng et al.",
"year": 2024,
"venue": "ICML",
"tags": [
"前沿"
],
"arxiv": "2405.14734"
},
{
"title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)",
"authors": "DeepSeek-AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.12948"
}
],
"branches": [
{
"title": "RLAIF: Constitutional AI — RL from AI Feedback",
"authors": "Bai et al.",
"year": 2022,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2212.08073"
}
],
"forward": []
},
{
"id": "reasoning",
"name": "推理增强 (Reasoning)",
"mainline": [
{
"title": "Chain-of-Thought Prompting Elicits Reasoning in LLMs",
"authors": "Wei et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2201.11903"
},
{
"title": "STaR: Self-Taught Reasoner / ReST (自学习推理链)",
"authors": "Zelikman et al.",
"year": 2022,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2203.14465"
},
{
"title": "Self-Consistency Improves Chain of Thought Reasoning",
"authors": "Wang et al.",
"year": 2023,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2203.11171"
},
{
"title": "OpenAI o1 System Card / o3 System Card (推理时 Scaling)",
"authors": "OpenAI",
"year": 2024,
"venue": "OpenAI",
"tags": [
"前沿"
],
"arxiv": "2412.16720"
},
{
"title": "DeepSeek-R1: 开源推理模型 (RL + Cold-Start SFT)",
"authors": "DeepSeek-AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.12948"
},
{
"title": "Kimi K2-Thinking (扩展推理, 256K上下文)",
"authors": "Moonshot AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2507.20534"
},
{
"title": "MiniMax-M1: CISPO (Curriculum-Informed Synthetic Planning Optimization) RL for reasoning emergence",
"authors": "MiniMax",
"year": 2025,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2506.13585"
}
],
"branches": [
{
"title": "Tree of Thoughts: Deliberate Problem Solving with LLMs",
"authors": "Yao et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2305.10601"
},
{
"title": "PRM/ORM: Let's Verify Step by Step (过程/结果奖励模型)",
"authors": "Lightman et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"支线"
],
"arxiv": "2305.20050"
}
],
"forward": [
{
"title": "Coconut: Training LLMs to Reason in a Continuous Latent Space (摆脱文字链, 潜在空间连续推理)",
"authors": "Hao et al. / Meta FAIR",
"year": 2024,
"venue": "NeurIPS 2024",
"tags": [
"前瞻"
],
"arxiv": "2412.06769"
},
{
"title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking",
"authors": "Zelikman et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2403.09629"
}
]
}
]
},
"compress": {
"id": "compress",
"name": "模型压缩",
"icon": "📦",
"desc": "量化、剪枝、蒸馏、KV Cache 压缩",
"color": "compress",
"areas": [
{
"id": "quant",
"name": "量化 (Quantization)",
"mainline": [
{
"title": "GPTQ: Accurate Post-Training Quantization for GPT",
"authors": "Frantar et al.",
"year": 2023,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2210.17323"
},
{
"title": "AWQ: Activation-aware Weight Quantization",
"authors": "Lin et al.",
"year": 2024,
"venue": "MLSys",
"tags": [
"关键节点"
],
"arxiv": "2306.00978"
},
{
"title": "SmoothQuant: Accurate and Efficient Post-Training Quantization",
"authors": "Xiao et al.",
"year": 2024,
"venue": "ICML",
"tags": [
"关键节点"
],
"arxiv": "2211.10438"
},
{
"title": "QuaRot / SpinQuant: Outlier-Free 4-bit Quantization",
"authors": "Ashkboos et al.",
"year": 2025,
"venue": "ICLR",
"tags": [
"前沿"
],
"arxiv": "2404.00456"
},
{
"title": "DeepSeek-V4 FP4 QAT: 4-bit 量化感知训练 (99.7% recall)",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
}
],
"branches": [
{
"title": "FP8 / FP4 推理 (NVIDIA B200 原生支持)",
"authors": "Micikevicius et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2310.18313"
}
],
"forward": [
{
"title": "BitNet b1.58: 1.58-bit LLM (三值量化 {1,0,1})",
"authors": "Wang et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前瞻"
],
"arxiv": "2402.17764"
}
]
},
{
"id": "prune",
"name": "剪枝 (Pruning)",
"mainline": [
{
"title": "SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot",
"authors": "Frantar & Alistarh",
"year": 2023,
"venue": "ICML",
"tags": [
"起点"
],
"arxiv": "2301.00774"
},
{
"title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝",
"authors": "Ma et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2305.11627"
},
{
"title": "SliceGPT: Compress LLMs by Deleting Rows/Columns",
"authors": "Ashkboos et al.",
"year": 2024,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2401.15024"
},
{
"title": "Wanda: A Simple and Effective Pruning Approach for LLMs",
"authors": "Sun et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2306.11695"
}
],
"branches": [],
"forward": []
},
{
"id": "distill",
"name": "知识蒸馏",
"mainline": [
{
"title": "Distilling the Knowledge in a Neural Network",
"authors": "Hinton et al.",
"year": 2015,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "1503.02531"
},
{
"title": "Orca: Progressive Learning from Complex Explanation Traces",
"authors": "Mukherjee et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2306.02707"
},
{
"title": "DeepSeek-R1 蒸馏 (R1 → Qwen/Llama 小模型)",
"authors": "DeepSeek-AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2501.12948"
},
{
"title": "DeepSeek-V4 On-Policy Distillation: 10 Teacher Models → 1 Student",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
}
],
"branches": [],
"forward": []
},
{
"id": "kvcache",
"name": "KV Cache 压缩",
"mainline": [
{
"title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)",
"authors": "Ainslie et al.",
"year": 2023,
"venue": "EMNLP",
"tags": [
"前沿"
],
"arxiv": "2305.13245"
},
{
"title": "H2O: Heavy-Hitter Oracle for Efficient KV Cache Eviction",
"authors": "Zhang et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2306.14048"
},
{
"title": "FastGen / KIVI / CacheGen: 自适应 KV 压缩与量化",
"authors": "Ge et al.",
"year": 2024,
"venue": "ICLR / SIGCOMM",
"tags": [
"关键节点"
],
"arxiv": "2310.07240"
},
{
"title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks",
"authors": "Xiao et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2309.17453"
},
{
"title": "DeepSeek-V4 CSA+HCA: KV Cache 降至 V3.2 的 10%, FLOPs 降至 27%",
"authors": "DeepSeek-AI",
"year": 2026,
"venue": "DeepSeek",
"tags": [
"前沿"
],
"pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
}
],
"branches": [],
"forward": []
}
]
},
"deploy": {
"id": "deploy",
"name": "部署推理",
"icon": "🚀",
"desc": "推理引擎、加速技术、服务化、边缘部署、成本优化",
"color": "deploy",
"areas": [
{
"id": "engine",
"name": "推理引擎与框架",
"mainline": [
{
"title": "vLLM: Easy, Fast, and Cheap LLM Serving with PagedAttention",
"authors": "Kwon et al.",
"year": 2023,
"venue": "SOSP",
"tags": [
"起点"
],
"arxiv": "2309.06180"
},
{
"title": "SGLang: Efficient LLM Programming and Serving",
"authors": "Zheng et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2312.07104"
}
],
"branches": [
{
"title": "MII / DeepSpeed-FastGen: Dynamic SplitFuse",
"authors": "Microsoft",
"year": 2024,
"venue": "Industry",
"tags": [
"支线"
],
"arxiv": "2401.08671"
}
],
"forward": []
},
{
"id": "accel",
"name": "推理加速技术",
"mainline": [
{
"title": "Speculative Decoding: Fast Inference from Transformers",
"authors": "Leviathan et al. / Chen et al.",
"year": 2023,
"venue": "ICML",
"tags": [
"起点"
],
"arxiv": "2211.17192"
},
{
"title": "Continuous Batching + Prefill-Decode Disaggregation",
"authors": "Patel et al. / Yu et al.",
"year": 2024,
"venue": "OSDI",
"tags": [
"前沿"
],
"arxiv": "2401.08671"
},
{
"title": "EAGLE / EAGLE-2: 推测解码框架 (无需 draft model)",
"authors": "Li et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2401.15077"
},
{
"title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads",
"authors": "Cai et al.",
"year": 2024,
"venue": "ICML",
"tags": [
"关键节点"
],
"arxiv": "2401.10774"
},
{
"title": "DeepSeek-V3 Multi-Token Prediction (MTP) + V4 Flash 推理",
"authors": "DeepSeek-AI",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2412.19437"
}
],
"branches": [
{
"title": "KV Cache Offloading (GPU ↔ CPU 动态迁移)",
"authors": "Sheng et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2303.06865"
},
{
"title": "FlashDecoding / FlashInfer: GPU 算子级加速",
"authors": "Dao et al. / Ye et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2310.12049"
}
],
"forward": []
}
]
},
"agent": {
"id": "agent",
"name": "Agent & 应用",
"icon": "🤖",
"desc": "Tool Use、RAG、Multi-Agent、MCP/A2A、Computer Use",
"color": "agent",
"areas": [
{
"id": "react",
"name": "Agent 核心 (ReAct / Tool Use / Computer Use)",
"mainline": [
{
"title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)",
"authors": "Park et al. / Stanford",
"year": 2023,
"venue": "UIST 2023",
"tags": [
"起点"
],
"arxiv": "2304.03442"
},
{
"title": "ReAct: Synergizing Reasoning and Acting in Language Models",
"authors": "Yao et al.",
"year": 2023,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2210.03629"
},
{
"title": "Toolformer: Language Models Can Teach Themselves to Use Tools",
"authors": "Schick et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2302.04761"
},
{
"title": "SWE-Agent: Agent-Computer Interfaces for Software Engineering",
"authors": "Yang et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2405.15793"
},
{
"title": "SEAgent: Self-Evolving Computer Use Agent (GRPO 自主学习)",
"authors": "Sun et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2508.04700"
}
],
"branches": [
{
"title": "Voyager: Open-Ended Embodied Agent with LLMs (Minecraft)",
"authors": "Wang et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"支线"
],
"arxiv": "2305.16291"
},
{
"title": "Claude Opus 4 & Sonnet 4 System Card (Agent 安全评估)",
"authors": "Anthropic",
"year": 2025,
"venue": "Anthropic",
"tags": [
"支线"
],
"pdf": "https://www-cdn.anthropic.com/6be99a52cb68eb70eb9572b4cafad13df32ed995.pdf"
}
],
"forward": []
},
{
"id": "rag",
"name": "RAG 检索增强生成",
"mainline": [
{
"title": "RAG: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",
"authors": "Lewis et al.",
"year": 2020,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2005.11401"
},
{
"title": "GraphRAG: From Local to Global — Graph-based RAG",
"authors": "Edge et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2404.16130"
},
{
"title": "Self-RAG: Learning to Retrieve, Generate, and Critique",
"authors": "Asai et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2310.11511"
},
{
"title": "Agentic RAG / Corrective RAG / Adaptive RAG",
"authors": "Yan et al. / Asai et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2401.15884"
}
],
"branches": [
{
"title": "ColBERT / ColPali: Late-Interaction / Visual RAG",
"authors": "Khattab et al.",
"year": 2022,
"venue": "SIGIR",
"tags": [
"支线"
],
"arxiv": "2112.01488"
},
{
"title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels",
"authors": "Gao et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2212.10496"
}
],
"forward": []
},
{
"id": "multiagent",
"name": "Multi-Agent & 协议",
"mainline": [
{
"title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
"authors": "Wu et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "2308.08155"
},
{
"title": "MetaGPT / ChatDev / CrewAI: 多 Agent 协作框架",
"authors": "Hong et al. / Qian et al.",
"year": 2024,
"venue": "ACL",
"tags": [
"关键节点"
],
"arxiv": "2308.00352"
},
{
"title": "Kimi K2.5 Agent Swarm: 100 子 Agent / 1,500 工具调用",
"authors": "Moonshot AI",
"year": 2026,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2602.02276"
}
],
"branches": [
{
"title": "Gorilla: Large Language Model Connected with Massive APIs",
"authors": "Patil et al.",
"year": 2023,
"venue": "ICML",
"tags": [
"支线"
],
"arxiv": "2305.15334"
}
],
"forward": []
}
]
},
"eval": {
"id": "eval",
"name": "评估体系",
"icon": "📏",
"desc": "通用、推理、编码、Agent、安全评估",
"color": "eval",
"areas": [
{
"id": "general",
"name": "通用能力评估",
"mainline": [
{
"title": "MMLU: Measuring Massive Multitask Language Understanding",
"authors": "Hendrycks et al.",
"year": 2021,
"venue": "ICLR",
"tags": [
"起点"
],
"arxiv": "2009.03300"
},
{
"title": "BIG-bench: Beyond the Imitation Game (204 tasks)",
"authors": "Srivastava et al.",
"year": 2023,
"venue": "TMLR",
"tags": [
"关键节点"
],
"arxiv": "2206.04615"
},
{
"title": "LiveBench: A Challenging, Contamination-Free LLM Benchmark",
"authors": "White et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2406.19314"
},
{
"title": "MMLU-Pro: A More Robust and Challenging Benchmark",
"authors": "Wang et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2406.01574"
},
{
"title": "Humanity's Last Exam (HLE): 人类知识极限测试",
"authors": "Phan et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.14249"
}
],
"branches": [
{
"title": "C-Eval / CMMLU: 中文评估",
"authors": "Huang et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"支线"
],
"arxiv": "2305.08322"
}
],
"forward": []
},
{
"id": "reasoning",
"name": "推理 & 数学评估",
"mainline": [
{
"title": "GSM8K / MATH: 数学推理基础",
"authors": "Cobbe et al. / Hendrycks et al.",
"year": 2021,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2103.03874"
},
{
"title": "BBH: Challenging BIG-Bench Tasks (BIG-Bench Hard)",
"authors": "Suzgun et al.",
"year": 2023,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2210.09261"
},
{
"title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
"authors": "Rein et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2311.12022"
},
{
"title": "HLE (Humanity's Last Exam): 3000问题极限测试",
"authors": "Phan et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2501.14249"
}
],
"branches": [],
"forward": []
},
{
"id": "code",
"name": "编码评估",
"mainline": [
{
"title": "HumanEval / MBPP: 函数级代码生成",
"authors": "Chen et al. / Austin et al.",
"year": 2021,
"venue": "NeurIPS",
"tags": [
"起点"
],
"arxiv": "2107.03374"
},
{
"title": "LiveCodeBench: Holistic and Contamination-Free Coding",
"authors": "Jain et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2408.07935"
},
{
"title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?",
"authors": "Jimenez et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2310.06770"
},
{
"title": "BigCodeBench: Benchmarking Code Generation with Diverse Tasks",
"authors": "Zhuo et al.",
"year": 2025,
"venue": "arXiv",
"tags": [
"前沿"
],
"arxiv": "2406.15877"
}
],
"branches": [],
"forward": []
},
{
"id": "agent_eval",
"name": "Agent 评估",
"mainline": [
{
"title": "GAIA: A Benchmark for General AI Assistants",
"authors": "Mialon et al.",
"year": 2023,
"venue": "NeurIPS",
"tags": [
"关键节点"
],
"arxiv": "2311.12983"
},
{
"title": "BFCL: Berkeley Function Calling Leaderboard",
"authors": "Yan et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"起点"
],
"arxiv": "2402.16053"
},
{
"title": "WebArena / VisualWebArena: 真实 Web 任务 Agent 评估",
"authors": "Zhou et al.",
"year": 2024,
"venue": "ICLR",
"tags": [
"关键节点"
],
"arxiv": "2307.13854"
},
{
"title": "τ-bench: Agent Tool Use & Task Completion",
"authors": "Yao et al.",
"year": 2024,
"venue": "arXiv",
"tags": [
"关键节点"
],
"arxiv": "2406.12045"
}
],
"branches": [],
"forward": []
}
]
}
}