From 63a2aedaf25fd792a53fba6d3b3b18d771186a7c Mon Sep 17 00:00:00 2001
From: LaoWang <257199637@qq.com>
Date: Tue, 2 Jun 2026 12:57:09 +0000
Subject: [PATCH] =?UTF-8?q?fix:=20=E6=89=80=E6=9C=89=E8=AE=BA=E6=96=87?=
 =?UTF-8?q?=E6=8C=89=E5=B9=B4=E4=BB=BD=E6=8E=92=E5=BA=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/papers.json | 638 +++++++++++++++++++++++------------------------
 1 file changed, 319 insertions(+), 319 deletions(-)

diff --git a/data/papers.json b/data/papers.json
index f79a4dc..120ce05 100644
--- a/data/papers.json
+++ b/data/papers.json
@@ -40,16 +40,6 @@
             ],
             "arxiv": "2302.13971"
           },
-          {
-            "title": "Llama 3 / Llama 4 (Scout/Maverick/Behemoth, MoE, 10M context)",
-            "authors": "Meta AI",
-            "year": 2025,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2601.11659"
-          },
           {
             "title": "DeepSeek-V3 / V3.2: MoE + MLA + MTP + Sparse Attention",
             "authors": "DeepSeek-AI",
@@ -60,16 +50,6 @@
             ],
             "arxiv": "2412.19437"
           },
-          {
-            "title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context",
-            "authors": "DeepSeek-AI",
-            "year": 2026,
-            "venue": "DeepSeek",
-            "tags": [
-              "前沿"
-            ],
-            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
-          },
           {
             "title": "Kimi K2: Open Agentic Intelligence (1T MoE, 128K ctx, Muon optimizer)",
             "authors": "Moonshot AI",
@@ -80,6 +60,26 @@
             ],
             "arxiv": "2507.20534"
           },
+          {
+            "title": "Llama 3 / Llama 4 (Scout/Maverick/Behemoth, MoE, 10M context)",
+            "authors": "Meta AI",
+            "year": 2025,
+            "venue": "arXiv",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2601.11659"
+          },
+          {
+            "title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context",
+            "authors": "DeepSeek-AI",
+            "year": 2026,
+            "venue": "DeepSeek",
+            "tags": [
+              "前沿"
+            ],
+            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
+          },
           {
             "title": "Kimi K2.5: Visual Agentic Intelligence (native multimodal, Agent Swarm)",
             "authors": "Moonshot AI",
@@ -169,16 +169,6 @@
             ],
             "arxiv": "2405.04434"
           },
-          {
-            "title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)",
-            "authors": "DeepSeek-AI",
-            "year": 2026,
-            "venue": "DeepSeek",
-            "tags": [
-              "前沿"
-            ],
-            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
-          },
           {
             "title": "FlashAttention-3 / Sparse Attention 工程化",
             "authors": "Dao et al. / DeepSeek",
@@ -198,6 +188,16 @@
               "关键节点"
             ],
             "arxiv": "2501.08313"
+          },
+          {
+            "title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)",
+            "authors": "DeepSeek-AI",
+            "year": 2026,
+            "venue": "DeepSeek",
+            "tags": [
+              "前沿"
+            ],
+            "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
           }
         ],
         "branches": [
@@ -259,16 +259,6 @@
             ],
             "arxiv": "2101.03961"
           },
-          {
-            "title": "Mixtral of Experts (8x7B, 开放 MoE)",
-            "authors": "Jiang et al.",
-            "year": 2024,
-            "venue": "Mistral AI",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2401.04088"
-          },
           {
             "title": "DeepSeekMoE: Fine-Grained Expert Segmentation + Shared Experts",
             "authors": "DeepSeek-AI",
@@ -279,6 +269,16 @@
             ],
             "arxiv": "2401.06066"
           },
+          {
+            "title": "Mixtral of Experts (8x7B, 开放 MoE)",
+            "authors": "Jiang et al.",
+            "year": 2024,
+            "venue": "Mistral AI",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2401.04088"
+          },
           {
             "title": "DeepSeek-V4: 1.6T参数 MoE + 领域专家独立训练后合并 + On-Policy Distillation",
             "authors": "DeepSeek-AI",
@@ -417,16 +417,6 @@
             ],
             "arxiv": "1910.07467"
           },
-          {
-            "title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers",
-            "authors": "Wang et al.",
-            "year": 2022,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2203.00555"
-          },
           {
             "title": "SwiGLU / GLU Variants (成为行业标配)",
             "authors": "Shazeer et al.",
@@ -437,6 +427,16 @@
             ],
             "arxiv": "2002.05202"
           },
+          {
+            "title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers",
+            "authors": "Wang et al.",
+            "year": 2022,
+            "venue": "arXiv",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2203.00555"
+          },
           {
             "title": "mHC: Manifold-Constrained Hyper-Connections (DeepSeek-V4 基础)",
             "authors": "DeepSeek-AI",
@@ -495,16 +495,6 @@
             ],
             "arxiv": "2405.21060"
           },
-          {
-            "title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)",
-            "authors": "Moonshot AI",
-            "year": 2026,
-            "venue": "arXiv",
-            "tags": [
-              "前沿"
-            ],
-            "arxiv": "2602.02276"
-          },
           {
             "title": "Titans: Learning to Memorize at Test Time (Neural Memory)",
             "authors": "Behrouz et al.",
@@ -514,6 +504,16 @@
               "前沿"
             ],
             "arxiv": "2501.00663"
+          },
+          {
+            "title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)",
+            "authors": "Moonshot AI",
+            "year": 2026,
+            "venue": "arXiv",
+            "tags": [
+              "前沿"
+            ],
+            "arxiv": "2602.02276"
           }
         ],
         "branches": [
@@ -527,16 +527,6 @@
             ],
             "arxiv": "2307.08621"
           },
-          {
-            "title": "Hymba: Hybrid Mamba-Transformer",
-            "authors": "NVIDIA / Meta",
-            "year": 2025,
-            "venue": "arXiv",
-            "tags": [
-              "支线"
-            ],
-            "arxiv": "2411.13676"
-          },
           {
             "title": "xLSTM: Extended Long Short-Term Memory (sLSTM + mLSTM, matrix memory, linear attention revival)",
             "authors": "Beck et al.",
@@ -546,29 +536,19 @@
               "支线"
             ],
             "arxiv": "2405.04517"
-          }
-        ],
-        "forward": [
+          },
           {
-            "title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)",
-            "authors": "Behrouz et al.",
+            "title": "Hymba: Hybrid Mamba-Transformer",
+            "authors": "NVIDIA / Meta",
             "year": 2025,
             "venue": "arXiv",
             "tags": [
-              "前瞻"
+              "支线"
             ],
-            "arxiv": "2501.00663"
-          },
-          {
-            "title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)",
-            "authors": "Zhu et al.",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "前瞻"
-            ],
-            "arxiv": "2406.02528"
-          },
+            "arxiv": "2411.13676"
+          }
+        ],
+        "forward": [
           {
             "title": "Byte Latent Transformer: Patches Scale Better Than Tokens (tokenization-free, entropy-based patching)",
             "authors": "Pagnoni et al. / Meta FAIR",
@@ -589,6 +569,16 @@
             ],
             "arxiv": "2412.08821"
           },
+          {
+            "title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)",
+            "authors": "Zhu et al.",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "前瞻"
+            ],
+            "arxiv": "2406.02528"
+          },
           {
             "title": "LLaDA: Large Language Diffusion with mAsking (diffusion-based LLM, 8B, non-autoregressive generation)",
             "authors": "Nie et al.",
@@ -598,6 +588,16 @@
               "前瞻"
             ],
             "arxiv": "2502.09992"
+          },
+          {
+            "title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)",
+            "authors": "Behrouz et al.",
+            "year": 2025,
+            "venue": "arXiv",
+            "tags": [
+              "前瞻"
+            ],
+            "arxiv": "2501.00663"
           }
         ]
       }
@@ -676,16 +676,6 @@
           }
         ],
         "branches": [
-          {
-            "title": "SigLIP: Sigmoid Loss for Language Image Pre-training",
-            "authors": "Zhai et al.",
-            "year": 2023,
-            "venue": "ICCV",
-            "tags": [
-              "支线"
-            ],
-            "arxiv": "2303.15343"
-          },
           {
             "title": "Flamingo: Visual Language Model for Few-Shot Learning",
             "authors": "Alayrac et al.",
@@ -695,6 +685,16 @@
               "支线"
             ],
             "arxiv": "2204.14198"
+          },
+          {
+            "title": "SigLIP: Sigmoid Loss for Language Image Pre-training",
+            "authors": "Zhai et al.",
+            "year": 2023,
+            "venue": "ICCV",
+            "tags": [
+              "支线"
+            ],
+            "arxiv": "2303.15343"
           }
         ],
         "forward": []
@@ -713,16 +713,6 @@
             ],
             "arxiv": "2312.11805"
           },
-          {
-            "title": "GPT-4o System Card: 端到端 omni 多模态",
-            "authors": "OpenAI",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2410.21276"
-          },
           {
             "title": "Chameleon: Mixed-Modal Early-Fusion Foundation Models",
             "authors": "Team Chameleon (Meta)",
@@ -733,6 +723,16 @@
             ],
             "arxiv": "2405.09818"
           },
+          {
+            "title": "GPT-4o System Card: 端到端 omni 多模态",
+            "authors": "OpenAI",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2410.21276"
+          },
           {
             "title": "Qwen3.5-Omni Technical Report: 全模态 (文本/图像/音频/视频/语音)",
             "authors": "Alibaba Qwen",
@@ -772,16 +772,6 @@
             ],
             "arxiv": "2212.04356"
           },
-          {
-            "title": "Qwen-Audio / Qwen2-Audio: 通用音频理解",
-            "authors": "Alibaba",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2311.07919"
-          },
           {
             "title": "CosyVoice 2: Scalable Streaming Speech Synthesis",
             "authors": "Du et al.",
@@ -791,6 +781,16 @@
               "前沿"
             ],
             "arxiv": "2412.10117"
+          },
+          {
+            "title": "Qwen-Audio / Qwen2-Audio: 通用音频理解",
+            "authors": "Alibaba",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2311.07919"
           }
         ],
         "branches": [
@@ -858,16 +858,6 @@
         "id": "dedup",
         "name": "数据清洗与去重",
         "mainline": [
-          {
-            "title": "Deduplicating Training Data Makes Language Models Better",
-            "authors": "Lee et al.",
-            "year": 2022,
-            "venue": "ACL",
-            "tags": [
-              "起点"
-            ],
-            "arxiv": "2107.06499"
-          },
           {
             "title": "The Pile: An 800GB Dataset of Diverse Text",
             "authors": "Gao et al.",
@@ -878,6 +868,16 @@
             ],
             "arxiv": "2101.00027"
           },
+          {
+            "title": "Deduplicating Training Data Makes Language Models Better",
+            "authors": "Lee et al.",
+            "year": 2022,
+            "venue": "ACL",
+            "tags": [
+              "起点"
+            ],
+            "arxiv": "2107.06499"
+          },
           {
             "title": "CCNet / RefinedWeb / FineWeb: 大规模高质量 Web 数据",
             "authors": "Penedo et al.",
@@ -966,16 +966,6 @@
         "id": "synthesis",
         "name": "合成数据生成",
         "mainline": [
-          {
-            "title": "Self-Instruct: Aligning LM with Self-Generated Instructions",
-            "authors": "Wang et al.",
-            "year": 2023,
-            "venue": "ACL",
-            "tags": [
-              "起点"
-            ],
-            "arxiv": "2212.10560"
-          },
           {
             "title": "Evol-Instruct (WizardLM) / Orca: 渐进式指令演化",
             "authors": "Xu et al.",
@@ -986,6 +976,16 @@
             ],
             "arxiv": "2304.12244"
           },
+          {
+            "title": "Self-Instruct: Aligning LM with Self-Generated Instructions",
+            "authors": "Wang et al.",
+            "year": 2023,
+            "venue": "ACL",
+            "tags": [
+              "起点"
+            ],
+            "arxiv": "2212.10560"
+          },
           {
             "title": "Magpie: Alignment Data Synthesis from Scratch",
             "authors": "Xu et al.",
@@ -996,16 +996,6 @@
             ],
             "arxiv": "2406.06859"
           },
-          {
-            "title": "Phi-4: 合成数据驱动的推理训练 + 代码合成",
-            "authors": "Microsoft Research",
-            "year": 2025,
-            "venue": "arXiv",
-            "tags": [
-              "前沿"
-            ],
-            "arxiv": "2412.08905"
-          },
           {
             "title": "DeepSeek-R1 冷启动数据合成 (长推理链)",
             "authors": "DeepSeek-AI",
@@ -1015,6 +1005,16 @@
               "前沿"
             ],
             "arxiv": "2501.12948"
+          },
+          {
+            "title": "Phi-4: 合成数据驱动的推理训练 + 代码合成",
+            "authors": "Microsoft Research",
+            "year": 2025,
+            "venue": "arXiv",
+            "tags": [
+              "前沿"
+            ],
+            "arxiv": "2412.08905"
           }
         ],
         "branches": [
@@ -1074,16 +1074,6 @@
             ],
             "arxiv": "2305.16264"
           },
-          {
-            "title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens",
-            "authors": "Alibaba / DeepSeek",
-            "year": 2025,
-            "venue": "Industry",
-            "tags": [
-              "前沿"
-            ],
-            "arxiv": "2503.20630"
-          },
           {
             "title": "Inference-Aware Scaling Laws (部署成本纳入 scaling)",
             "authors": "Sardana & Frankle",
@@ -1093,6 +1083,16 @@
               "前沿"
             ],
             "arxiv": "2401.00448"
+          },
+          {
+            "title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens",
+            "authors": "Alibaba / DeepSeek",
+            "year": 2025,
+            "venue": "Industry",
+            "tags": [
+              "前沿"
+            ],
+            "arxiv": "2503.20630"
           }
         ],
         "branches": [],
@@ -1268,16 +1268,6 @@
         "id": "alignment",
         "name": "偏好对齐 (RLHF → DPO → GRPO)",
         "mainline": [
-          {
-            "title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)",
-            "authors": "Ouyang et al.",
-            "year": 2022,
-            "venue": "NeurIPS",
-            "tags": [
-              "起点"
-            ],
-            "arxiv": "2203.02155"
-          },
           {
             "title": "Constitutional AI: Harmlessness from AI Feedback",
             "authors": "Bai et al.",
@@ -1288,6 +1278,16 @@
             ],
             "arxiv": "2212.08073"
           },
+          {
+            "title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)",
+            "authors": "Ouyang et al.",
+            "year": 2022,
+            "venue": "NeurIPS",
+            "tags": [
+              "起点"
+            ],
+            "arxiv": "2203.02155"
+          },
           {
             "title": "DPO: Direct Preference Optimization",
             "authors": "Rafailov et al.",
@@ -1298,16 +1298,6 @@
             ],
             "arxiv": "2305.18290"
           },
-          {
-            "title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)",
-            "authors": "DeepSeek-AI",
-            "year": 2025,
-            "venue": "arXiv",
-            "tags": [
-              "前沿"
-            ],
-            "arxiv": "2501.12948"
-          },
           {
             "title": "SimPO / ORPO / KTO: 参考模型不可知的对齐方法",
             "authors": "Meng et al.",
@@ -1317,6 +1307,16 @@
               "前沿"
             ],
             "arxiv": "2405.14734"
+          },
+          {
+            "title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)",
+            "authors": "DeepSeek-AI",
+            "year": 2025,
+            "venue": "arXiv",
+            "tags": [
+              "前沿"
+            ],
+            "arxiv": "2501.12948"
           }
         ],
         "branches": [
@@ -1347,16 +1347,6 @@
             ],
             "arxiv": "2201.11903"
           },
-          {
-            "title": "Self-Consistency Improves Chain of Thought Reasoning",
-            "authors": "Wang et al.",
-            "year": 2023,
-            "venue": "ICLR",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2203.11171"
-          },
           {
             "title": "STaR: Self-Taught Reasoner / ReST (自学习推理链)",
             "authors": "Zelikman et al.",
@@ -1367,6 +1357,16 @@
             ],
             "arxiv": "2203.14465"
           },
+          {
+            "title": "Self-Consistency Improves Chain of Thought Reasoning",
+            "authors": "Wang et al.",
+            "year": 2023,
+            "venue": "ICLR",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2203.11171"
+          },
           {
             "title": "OpenAI o1 System Card / o3 System Card (推理时 Scaling)",
             "authors": "OpenAI",
@@ -1431,16 +1431,6 @@
           }
         ],
         "forward": [
-          {
-            "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking",
-            "authors": "Zelikman et al.",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "前瞻"
-            ],
-            "arxiv": "2403.09629"
-          },
           {
             "title": "Coconut: Training LLMs to Reason in a Continuous Latent Space (摆脱文字链, 潜在空间连续推理)",
             "authors": "Hao et al. / Meta FAIR",
@@ -1450,6 +1440,16 @@
               "前瞻"
             ],
             "arxiv": "2412.06769"
+          },
+          {
+            "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking",
+            "authors": "Zelikman et al.",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "前瞻"
+            ],
+            "arxiv": "2403.09629"
           }
         ]
       }
@@ -1557,14 +1557,14 @@
             "arxiv": "2301.00774"
           },
           {
-            "title": "Wanda: A Simple and Effective Pruning Approach for LLMs",
-            "authors": "Sun et al.",
+            "title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝",
+            "authors": "Ma et al.",
             "year": 2024,
-            "venue": "ICLR",
+            "venue": "arXiv",
             "tags": [
-              "关键节点"
+              "前沿"
             ],
-            "arxiv": "2306.11695"
+            "arxiv": "2305.11627"
           },
           {
             "title": "SliceGPT: Compress LLMs by Deleting Rows/Columns",
@@ -1577,14 +1577,14 @@
             "arxiv": "2401.15024"
           },
           {
-            "title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝",
-            "authors": "Ma et al.",
+            "title": "Wanda: A Simple and Effective Pruning Approach for LLMs",
+            "authors": "Sun et al.",
             "year": 2024,
-            "venue": "arXiv",
+            "venue": "ICLR",
             "tags": [
-              "前沿"
+              "关键节点"
             ],
-            "arxiv": "2305.11627"
+            "arxiv": "2306.11695"
           }
         ],
         "branches": [],
@@ -1643,14 +1643,14 @@
         "name": "KV Cache 压缩",
         "mainline": [
           {
-            "title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks",
-            "authors": "Xiao et al.",
-            "year": 2024,
-            "venue": "ICLR",
+            "title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)",
+            "authors": "Ainslie et al.",
+            "year": 2023,
+            "venue": "EMNLP",
             "tags": [
-              "起点"
+              "前沿"
             ],
-            "arxiv": "2309.17453"
+            "arxiv": "2305.13245"
           },
           {
             "title": "H2O: Heavy-Hitter Oracle for Efficient KV Cache Eviction",
@@ -1672,6 +1672,16 @@
             ],
             "arxiv": "2310.07240"
           },
+          {
+            "title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks",
+            "authors": "Xiao et al.",
+            "year": 2024,
+            "venue": "ICLR",
+            "tags": [
+              "起点"
+            ],
+            "arxiv": "2309.17453"
+          },
           {
             "title": "DeepSeek-V4 CSA+HCA: KV Cache 降至 V3.2 的 10%, FLOPs 降至 27%",
             "authors": "DeepSeek-AI",
@@ -1681,16 +1691,6 @@
               "前沿"
             ],
             "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf"
-          },
-          {
-            "title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)",
-            "authors": "Ainslie et al.",
-            "year": 2023,
-            "venue": "EMNLP",
-            "tags": [
-              "前沿"
-            ],
-            "arxiv": "2305.13245"
           }
         ],
         "branches": [],
@@ -1759,14 +1759,14 @@
             "arxiv": "2211.17192"
           },
           {
-            "title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads",
-            "authors": "Cai et al.",
+            "title": "Continuous Batching + Prefill-Decode Disaggregation",
+            "authors": "Patel et al. / Yu et al.",
             "year": 2024,
-            "venue": "ICML",
+            "venue": "OSDI",
             "tags": [
-              "关键节点"
+              "前沿"
             ],
-            "arxiv": "2401.10774"
+            "arxiv": "2401.08671"
           },
           {
             "title": "EAGLE / EAGLE-2: 推测解码框架 (无需 draft model)",
@@ -1779,14 +1779,14 @@
             "arxiv": "2401.15077"
           },
           {
-            "title": "Continuous Batching + Prefill-Decode Disaggregation",
-            "authors": "Patel et al. / Yu et al.",
+            "title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads",
+            "authors": "Cai et al.",
             "year": 2024,
-            "venue": "OSDI",
+            "venue": "ICML",
             "tags": [
-              "前沿"
+              "关键节点"
             ],
-            "arxiv": "2401.08671"
+            "arxiv": "2401.10774"
           },
           {
             "title": "DeepSeek-V3 Multi-Token Prediction (MTP) + V4 Flash 推理",
@@ -1800,16 +1800,6 @@
           }
         ],
         "branches": [
-          {
-            "title": "FlashDecoding / FlashInfer: GPU 算子级加速",
-            "authors": "Dao et al. / Ye et al.",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "支线"
-            ],
-            "arxiv": "2310.12049"
-          },
           {
             "title": "KV Cache Offloading (GPU ↔ CPU 动态迁移)",
             "authors": "Sheng et al.",
@@ -1819,6 +1809,16 @@
               "支线"
             ],
             "arxiv": "2303.06865"
+          },
+          {
+            "title": "FlashDecoding / FlashInfer: GPU 算子级加速",
+            "authors": "Dao et al. / Ye et al.",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "支线"
+            ],
+            "arxiv": "2310.12049"
           }
         ],
         "forward": []
@@ -1836,6 +1836,16 @@
         "id": "react",
         "name": "Agent 核心 (ReAct / Tool Use / Computer Use)",
         "mainline": [
+          {
+            "title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)",
+            "authors": "Park et al. / Stanford",
+            "year": 2023,
+            "venue": "UIST 2023",
+            "tags": [
+              "起点"
+            ],
+            "arxiv": "2304.03442"
+          },
           {
             "title": "ReAct: Synergizing Reasoning and Acting in Language Models",
             "authors": "Yao et al.",
@@ -1875,16 +1885,6 @@
               "前沿"
             ],
             "arxiv": "2508.04700"
-          },
-          {
-            "title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)",
-            "authors": "Park et al. / Stanford",
-            "year": 2023,
-            "venue": "UIST 2023",
-            "tags": [
-              "起点"
-            ],
-            "arxiv": "2304.03442"
           }
         ],
         "branches": [
@@ -1925,16 +1925,6 @@
             ],
             "arxiv": "2005.11401"
           },
-          {
-            "title": "Self-RAG: Learning to Retrieve, Generate, and Critique",
-            "authors": "Asai et al.",
-            "year": 2024,
-            "venue": "ICLR",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2310.11511"
-          },
           {
             "title": "GraphRAG: From Local to Global — Graph-based RAG",
             "authors": "Edge et al.",
@@ -1945,6 +1935,16 @@
             ],
             "arxiv": "2404.16130"
           },
+          {
+            "title": "Self-RAG: Learning to Retrieve, Generate, and Critique",
+            "authors": "Asai et al.",
+            "year": 2024,
+            "venue": "ICLR",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2310.11511"
+          },
           {
             "title": "Agentic RAG / Corrective RAG / Adaptive RAG",
             "authors": "Yan et al. / Asai et al.",
@@ -1957,16 +1957,6 @@
           }
         ],
         "branches": [
-          {
-            "title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels",
-            "authors": "Gao et al.",
-            "year": 2023,
-            "venue": "arXiv",
-            "tags": [
-              "支线"
-            ],
-            "arxiv": "2212.10496"
-          },
           {
             "title": "ColBERT / ColPali: Late-Interaction / Visual RAG",
             "authors": "Khattab et al.",
@@ -1976,6 +1966,16 @@
               "支线"
             ],
             "arxiv": "2112.01488"
+          },
+          {
+            "title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels",
+            "authors": "Gao et al.",
+            "year": 2023,
+            "venue": "arXiv",
+            "tags": [
+              "支线"
+            ],
+            "arxiv": "2212.10496"
           }
         ],
         "forward": []
@@ -2062,16 +2062,6 @@
             ],
             "arxiv": "2206.04615"
           },
-          {
-            "title": "MMLU-Pro: A More Robust and Challenging Benchmark",
-            "authors": "Wang et al.",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2406.01574"
-          },
           {
             "title": "LiveBench: A Challenging, Contamination-Free LLM Benchmark",
             "authors": "White et al.",
@@ -2082,6 +2072,16 @@
             ],
             "arxiv": "2406.19314"
           },
+          {
+            "title": "MMLU-Pro: A More Robust and Challenging Benchmark",
+            "authors": "Wang et al.",
+            "year": 2024,
+            "venue": "arXiv",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2406.01574"
+          },
           {
             "title": "Humanity's Last Exam (HLE): 人类知识极限测试",
             "authors": "Phan et al.",
@@ -2121,16 +2121,6 @@
             ],
             "arxiv": "2103.03874"
           },
-          {
-            "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
-            "authors": "Rein et al.",
-            "year": 2023,
-            "venue": "NeurIPS",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2311.12022"
-          },
           {
             "title": "BBH: Challenging BIG-Bench Tasks (BIG-Bench Hard)",
             "authors": "Suzgun et al.",
@@ -2141,6 +2131,16 @@
             ],
             "arxiv": "2210.09261"
           },
+          {
+            "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
+            "authors": "Rein et al.",
+            "year": 2023,
+            "venue": "NeurIPS",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2311.12022"
+          },
           {
             "title": "HLE (Humanity's Last Exam): 3000问题极限测试",
             "authors": "Phan et al.",
@@ -2169,16 +2169,6 @@
             ],
             "arxiv": "2107.03374"
           },
-          {
-            "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?",
-            "authors": "Jimenez et al.",
-            "year": 2024,
-            "venue": "ICLR",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2310.06770"
-          },
           {
             "title": "LiveCodeBench: Holistic and Contamination-Free Coding",
             "authors": "Jain et al.",
@@ -2189,6 +2179,16 @@
             ],
             "arxiv": "2408.07935"
           },
+          {
+            "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?",
+            "authors": "Jimenez et al.",
+            "year": 2024,
+            "venue": "ICLR",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2310.06770"
+          },
           {
             "title": "BigCodeBench: Benchmarking Code Generation with Diverse Tasks",
             "authors": "Zhuo et al.",
@@ -2207,6 +2207,16 @@
         "id": "agent_eval",
         "name": "Agent 评估",
         "mainline": [
+          {
+            "title": "GAIA: A Benchmark for General AI Assistants",
+            "authors": "Mialon et al.",
+            "year": 2023,
+            "venue": "NeurIPS",
+            "tags": [
+              "关键节点"
+            ],
+            "arxiv": "2311.12983"
+          },
           {
             "title": "BFCL: Berkeley Function Calling Leaderboard",
             "authors": "Yan et al.",
@@ -2217,16 +2227,6 @@
             ],
             "arxiv": "2402.16053"
           },
-          {
-            "title": "τ-bench: Agent Tool Use & Task Completion",
-            "authors": "Yao et al.",
-            "year": 2024,
-            "venue": "arXiv",
-            "tags": [
-              "关键节点"
-            ],
-            "arxiv": "2406.12045"
-          },
           {
             "title": "WebArena / VisualWebArena: 真实 Web 任务 Agent 评估",
             "authors": "Zhou et al.",
@@ -2238,14 +2238,14 @@
             "arxiv": "2307.13854"
           },
           {
-            "title": "GAIA: A Benchmark for General AI Assistants",
-            "authors": "Mialon et al.",
-            "year": 2023,
-            "venue": "NeurIPS",
+            "title": "τ-bench: Agent Tool Use & Task Completion",
+            "authors": "Yao et al.",
+            "year": 2024,
+            "venue": "arXiv",
             "tags": [
               "关键节点"
             ],
-            "arxiv": "2311.12983"
+            "arxiv": "2406.12045"
           }
         ],
         "branches": [],