From 63a2aedaf25fd792a53fba6d3b3b18d771186a7c Mon Sep 17 00:00:00 2001 From: LaoWang <257199637@qq.com> Date: Tue, 2 Jun 2026 12:57:09 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=89=80=E6=9C=89=E8=AE=BA=E6=96=87?= =?UTF-8?q?=E6=8C=89=E5=B9=B4=E4=BB=BD=E6=8E=92=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/papers.json | 638 +++++++++++++++++++++++------------------------ 1 file changed, 319 insertions(+), 319 deletions(-) diff --git a/data/papers.json b/data/papers.json index f79a4dc..120ce05 100644 --- a/data/papers.json +++ b/data/papers.json @@ -40,16 +40,6 @@ ], "arxiv": "2302.13971" }, - { - "title": "Llama 3 / Llama 4 (Scout/Maverick/Behemoth, MoE, 10M context)", - "authors": "Meta AI", - "year": 2025, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2601.11659" - }, { "title": "DeepSeek-V3 / V3.2: MoE + MLA + MTP + Sparse Attention", "authors": "DeepSeek-AI", @@ -60,16 +50,6 @@ ], "arxiv": "2412.19437" }, - { - "title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context", - "authors": "DeepSeek-AI", - "year": 2026, - "venue": "DeepSeek", - "tags": [ - "前沿" - ], - "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf" - }, { "title": "Kimi K2: Open Agentic Intelligence (1T MoE, 128K ctx, Muon optimizer)", "authors": "Moonshot AI", @@ -80,6 +60,26 @@ ], "arxiv": "2507.20534" }, + { + "title": "Llama 3 / Llama 4 (Scout/Maverick/Behemoth, MoE, 10M context)", + "authors": "Meta AI", + "year": 2025, + "venue": "arXiv", + "tags": [ + "关键节点" + ], + "arxiv": "2601.11659" + }, + { + "title": "DeepSeek-V4: 1.6T MoE + CSA+HCA Hybrid Attention + 1M Context", + "authors": "DeepSeek-AI", + "year": 2026, + "venue": "DeepSeek", + "tags": [ + "前沿" + ], + "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf" + }, { "title": "Kimi K2.5: Visual Agentic Intelligence (native multimodal, Agent Swarm)", "authors": "Moonshot AI", @@ -169,16 +169,6 @@ ], "arxiv": "2405.04434" }, - { - "title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)", - "authors": "DeepSeek-AI", - "year": 2026, - "venue": "DeepSeek", - "tags": [ - "前沿" - ], - "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf" - }, { "title": "FlashAttention-3 / Sparse Attention 工程化", "authors": "Dao et al. / DeepSeek", @@ -198,6 +188,16 @@ "关键节点" ], "arxiv": "2501.08313" + }, + { + "title": "DeepSeek-V4: CSA+HCA Hybrid Attention (KV Cache → 10% of V3.2)", + "authors": "DeepSeek-AI", + "year": 2026, + "venue": "DeepSeek", + "tags": [ + "前沿" + ], + "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf" } ], "branches": [ @@ -259,16 +259,6 @@ ], "arxiv": "2101.03961" }, - { - "title": "Mixtral of Experts (8x7B, 开放 MoE)", - "authors": "Jiang et al.", - "year": 2024, - "venue": "Mistral AI", - "tags": [ - "关键节点" - ], - "arxiv": "2401.04088" - }, { "title": "DeepSeekMoE: Fine-Grained Expert Segmentation + Shared Experts", "authors": "DeepSeek-AI", @@ -279,6 +269,16 @@ ], "arxiv": "2401.06066" }, + { + "title": "Mixtral of Experts (8x7B, 开放 MoE)", + "authors": "Jiang et al.", + "year": 2024, + "venue": "Mistral AI", + "tags": [ + "关键节点" + ], + "arxiv": "2401.04088" + }, { "title": "DeepSeek-V4: 1.6T参数 MoE + 领域专家独立训练后合并 + On-Policy Distillation", "authors": "DeepSeek-AI", @@ -417,16 +417,6 @@ ], "arxiv": "1910.07467" }, - { - "title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers", - "authors": "Wang et al.", - "year": 2022, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2203.00555" - }, { "title": "SwiGLU / GLU Variants (成为行业标配)", "authors": "Shazeer et al.", @@ -437,6 +427,16 @@ ], "arxiv": "2002.05202" }, + { + "title": "DeepNorm: DeepNet — Scaling Transformers to 1,000 Layers", + "authors": "Wang et al.", + "year": 2022, + "venue": "arXiv", + "tags": [ + "关键节点" + ], + "arxiv": "2203.00555" + }, { "title": "mHC: Manifold-Constrained Hyper-Connections (DeepSeek-V4 基础)", "authors": "DeepSeek-AI", @@ -495,16 +495,6 @@ ], "arxiv": "2405.21060" }, - { - "title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)", - "authors": "Moonshot AI", - "year": 2026, - "venue": "arXiv", - "tags": [ - "前沿" - ], - "arxiv": "2602.02276" - }, { "title": "Titans: Learning to Memorize at Test Time (Neural Memory)", "authors": "Behrouz et al.", @@ -514,6 +504,16 @@ "前沿" ], "arxiv": "2501.00663" + }, + { + "title": "Kimi Linear (K2.5 混合架构: Transformer + Linear Attention)", + "authors": "Moonshot AI", + "year": 2026, + "venue": "arXiv", + "tags": [ + "前沿" + ], + "arxiv": "2602.02276" } ], "branches": [ @@ -527,16 +527,6 @@ ], "arxiv": "2307.08621" }, - { - "title": "Hymba: Hybrid Mamba-Transformer", - "authors": "NVIDIA / Meta", - "year": 2025, - "venue": "arXiv", - "tags": [ - "支线" - ], - "arxiv": "2411.13676" - }, { "title": "xLSTM: Extended Long Short-Term Memory (sLSTM + mLSTM, matrix memory, linear attention revival)", "authors": "Beck et al.", @@ -546,29 +536,19 @@ "支线" ], "arxiv": "2405.04517" - } - ], - "forward": [ + }, { - "title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)", - "authors": "Behrouz et al.", + "title": "Hymba: Hybrid Mamba-Transformer", + "authors": "NVIDIA / Meta", "year": 2025, "venue": "arXiv", "tags": [ - "前瞻" + "支线" ], - "arxiv": "2501.00663" - }, - { - "title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)", - "authors": "Zhu et al.", - "year": 2024, - "venue": "arXiv", - "tags": [ - "前瞻" - ], - "arxiv": "2406.02528" - }, + "arxiv": "2411.13676" + } + ], + "forward": [ { "title": "Byte Latent Transformer: Patches Scale Better Than Tokens (tokenization-free, entropy-based patching)", "authors": "Pagnoni et al. / Meta FAIR", @@ -589,6 +569,16 @@ ], "arxiv": "2412.08821" }, + { + "title": "MatMul-free Language Modeling (消除矩阵乘法, 类脑计算)", + "authors": "Zhu et al.", + "year": 2024, + "venue": "arXiv", + "tags": [ + "前瞻" + ], + "arxiv": "2406.02528" + }, { "title": "LLaDA: Large Language Diffusion with mAsking (diffusion-based LLM, 8B, non-autoregressive generation)", "authors": "Nie et al.", @@ -598,6 +588,16 @@ "前瞻" ], "arxiv": "2502.09992" + }, + { + "title": "Titans: Learning to Memorize at Test Time (神经记忆, 超越 Transformer)", + "authors": "Behrouz et al.", + "year": 2025, + "venue": "arXiv", + "tags": [ + "前瞻" + ], + "arxiv": "2501.00663" } ] } @@ -676,16 +676,6 @@ } ], "branches": [ - { - "title": "SigLIP: Sigmoid Loss for Language Image Pre-training", - "authors": "Zhai et al.", - "year": 2023, - "venue": "ICCV", - "tags": [ - "支线" - ], - "arxiv": "2303.15343" - }, { "title": "Flamingo: Visual Language Model for Few-Shot Learning", "authors": "Alayrac et al.", @@ -695,6 +685,16 @@ "支线" ], "arxiv": "2204.14198" + }, + { + "title": "SigLIP: Sigmoid Loss for Language Image Pre-training", + "authors": "Zhai et al.", + "year": 2023, + "venue": "ICCV", + "tags": [ + "支线" + ], + "arxiv": "2303.15343" } ], "forward": [] @@ -713,16 +713,6 @@ ], "arxiv": "2312.11805" }, - { - "title": "GPT-4o System Card: 端到端 omni 多模态", - "authors": "OpenAI", - "year": 2024, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2410.21276" - }, { "title": "Chameleon: Mixed-Modal Early-Fusion Foundation Models", "authors": "Team Chameleon (Meta)", @@ -733,6 +723,16 @@ ], "arxiv": "2405.09818" }, + { + "title": "GPT-4o System Card: 端到端 omni 多模态", + "authors": "OpenAI", + "year": 2024, + "venue": "arXiv", + "tags": [ + "关键节点" + ], + "arxiv": "2410.21276" + }, { "title": "Qwen3.5-Omni Technical Report: 全模态 (文本/图像/音频/视频/语音)", "authors": "Alibaba Qwen", @@ -772,16 +772,6 @@ ], "arxiv": "2212.04356" }, - { - "title": "Qwen-Audio / Qwen2-Audio: 通用音频理解", - "authors": "Alibaba", - "year": 2024, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2311.07919" - }, { "title": "CosyVoice 2: Scalable Streaming Speech Synthesis", "authors": "Du et al.", @@ -791,6 +781,16 @@ "前沿" ], "arxiv": "2412.10117" + }, + { + "title": "Qwen-Audio / Qwen2-Audio: 通用音频理解", + "authors": "Alibaba", + "year": 2024, + "venue": "arXiv", + "tags": [ + "关键节点" + ], + "arxiv": "2311.07919" } ], "branches": [ @@ -858,16 +858,6 @@ "id": "dedup", "name": "数据清洗与去重", "mainline": [ - { - "title": "Deduplicating Training Data Makes Language Models Better", - "authors": "Lee et al.", - "year": 2022, - "venue": "ACL", - "tags": [ - "起点" - ], - "arxiv": "2107.06499" - }, { "title": "The Pile: An 800GB Dataset of Diverse Text", "authors": "Gao et al.", @@ -878,6 +868,16 @@ ], "arxiv": "2101.00027" }, + { + "title": "Deduplicating Training Data Makes Language Models Better", + "authors": "Lee et al.", + "year": 2022, + "venue": "ACL", + "tags": [ + "起点" + ], + "arxiv": "2107.06499" + }, { "title": "CCNet / RefinedWeb / FineWeb: 大规模高质量 Web 数据", "authors": "Penedo et al.", @@ -966,16 +966,6 @@ "id": "synthesis", "name": "合成数据生成", "mainline": [ - { - "title": "Self-Instruct: Aligning LM with Self-Generated Instructions", - "authors": "Wang et al.", - "year": 2023, - "venue": "ACL", - "tags": [ - "起点" - ], - "arxiv": "2212.10560" - }, { "title": "Evol-Instruct (WizardLM) / Orca: 渐进式指令演化", "authors": "Xu et al.", @@ -986,6 +976,16 @@ ], "arxiv": "2304.12244" }, + { + "title": "Self-Instruct: Aligning LM with Self-Generated Instructions", + "authors": "Wang et al.", + "year": 2023, + "venue": "ACL", + "tags": [ + "起点" + ], + "arxiv": "2212.10560" + }, { "title": "Magpie: Alignment Data Synthesis from Scratch", "authors": "Xu et al.", @@ -996,16 +996,6 @@ ], "arxiv": "2406.06859" }, - { - "title": "Phi-4: 合成数据驱动的推理训练 + 代码合成", - "authors": "Microsoft Research", - "year": 2025, - "venue": "arXiv", - "tags": [ - "前沿" - ], - "arxiv": "2412.08905" - }, { "title": "DeepSeek-R1 冷启动数据合成 (长推理链)", "authors": "DeepSeek-AI", @@ -1015,6 +1005,16 @@ "前沿" ], "arxiv": "2501.12948" + }, + { + "title": "Phi-4: 合成数据驱动的推理训练 + 代码合成", + "authors": "Microsoft Research", + "year": 2025, + "venue": "arXiv", + "tags": [ + "前沿" + ], + "arxiv": "2412.08905" } ], "branches": [ @@ -1074,16 +1074,6 @@ ], "arxiv": "2305.16264" }, - { - "title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens", - "authors": "Alibaba / DeepSeek", - "year": 2025, - "venue": "Industry", - "tags": [ - "前沿" - ], - "arxiv": "2503.20630" - }, { "title": "Inference-Aware Scaling Laws (部署成本纳入 scaling)", "authors": "Sardana & Frankle", @@ -1093,6 +1083,16 @@ "前沿" ], "arxiv": "2401.00448" + }, + { + "title": "Qwen3 / DeepSeek-V3 实践: 超 Chinchilla ~15×~60× tokens", + "authors": "Alibaba / DeepSeek", + "year": 2025, + "venue": "Industry", + "tags": [ + "前沿" + ], + "arxiv": "2503.20630" } ], "branches": [], @@ -1268,16 +1268,6 @@ "id": "alignment", "name": "偏好对齐 (RLHF → DPO → GRPO)", "mainline": [ - { - "title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)", - "authors": "Ouyang et al.", - "year": 2022, - "venue": "NeurIPS", - "tags": [ - "起点" - ], - "arxiv": "2203.02155" - }, { "title": "Constitutional AI: Harmlessness from AI Feedback", "authors": "Bai et al.", @@ -1288,6 +1278,16 @@ ], "arxiv": "2212.08073" }, + { + "title": "InstructGPT: Training Language Models to Follow Instructions (RLHF + PPO)", + "authors": "Ouyang et al.", + "year": 2022, + "venue": "NeurIPS", + "tags": [ + "起点" + ], + "arxiv": "2203.02155" + }, { "title": "DPO: Direct Preference Optimization", "authors": "Rafailov et al.", @@ -1298,16 +1298,6 @@ ], "arxiv": "2305.18290" }, - { - "title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)", - "authors": "DeepSeek-AI", - "year": 2025, - "venue": "arXiv", - "tags": [ - "前沿" - ], - "arxiv": "2501.12948" - }, { "title": "SimPO / ORPO / KTO: 参考模型不可知的对齐方法", "authors": "Meng et al.", @@ -1317,6 +1307,16 @@ "前沿" ], "arxiv": "2405.14734" + }, + { + "title": "DeepSeek-R1 / GRPO: 纯 RL 驱动推理涌现 (无人工标注)", + "authors": "DeepSeek-AI", + "year": 2025, + "venue": "arXiv", + "tags": [ + "前沿" + ], + "arxiv": "2501.12948" } ], "branches": [ @@ -1347,16 +1347,6 @@ ], "arxiv": "2201.11903" }, - { - "title": "Self-Consistency Improves Chain of Thought Reasoning", - "authors": "Wang et al.", - "year": 2023, - "venue": "ICLR", - "tags": [ - "关键节点" - ], - "arxiv": "2203.11171" - }, { "title": "STaR: Self-Taught Reasoner / ReST (自学习推理链)", "authors": "Zelikman et al.", @@ -1367,6 +1357,16 @@ ], "arxiv": "2203.14465" }, + { + "title": "Self-Consistency Improves Chain of Thought Reasoning", + "authors": "Wang et al.", + "year": 2023, + "venue": "ICLR", + "tags": [ + "关键节点" + ], + "arxiv": "2203.11171" + }, { "title": "OpenAI o1 System Card / o3 System Card (推理时 Scaling)", "authors": "OpenAI", @@ -1431,16 +1431,6 @@ } ], "forward": [ - { - "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking", - "authors": "Zelikman et al.", - "year": 2024, - "venue": "arXiv", - "tags": [ - "前瞻" - ], - "arxiv": "2403.09629" - }, { "title": "Coconut: Training LLMs to Reason in a Continuous Latent Space (摆脱文字链, 潜在空间连续推理)", "authors": "Hao et al. / Meta FAIR", @@ -1450,6 +1440,16 @@ "前瞻" ], "arxiv": "2412.06769" + }, + { + "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking", + "authors": "Zelikman et al.", + "year": 2024, + "venue": "arXiv", + "tags": [ + "前瞻" + ], + "arxiv": "2403.09629" } ] } @@ -1557,14 +1557,14 @@ "arxiv": "2301.00774" }, { - "title": "Wanda: A Simple and Effective Pruning Approach for LLMs", - "authors": "Sun et al.", + "title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝", + "authors": "Ma et al.", "year": 2024, - "venue": "ICLR", + "venue": "arXiv", "tags": [ - "关键节点" + "前沿" ], - "arxiv": "2306.11695" + "arxiv": "2305.11627" }, { "title": "SliceGPT: Compress LLMs by Deleting Rows/Columns", @@ -1577,14 +1577,14 @@ "arxiv": "2401.15024" }, { - "title": "LLM-Pruner / ShortGPT: 结构化剪枝 + 层剪枝", - "authors": "Ma et al.", + "title": "Wanda: A Simple and Effective Pruning Approach for LLMs", + "authors": "Sun et al.", "year": 2024, - "venue": "arXiv", + "venue": "ICLR", "tags": [ - "前沿" + "关键节点" ], - "arxiv": "2305.11627" + "arxiv": "2306.11695" } ], "branches": [], @@ -1643,14 +1643,14 @@ "name": "KV Cache 压缩", "mainline": [ { - "title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks", - "authors": "Xiao et al.", - "year": 2024, - "venue": "ICLR", + "title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)", + "authors": "Ainslie et al.", + "year": 2023, + "venue": "EMNLP", "tags": [ - "起点" + "前沿" ], - "arxiv": "2309.17453" + "arxiv": "2305.13245" }, { "title": "H2O: Heavy-Hitter Oracle for Efficient KV Cache Eviction", @@ -1672,6 +1672,16 @@ ], "arxiv": "2310.07240" }, + { + "title": "StreamingLLM: Efficient Streaming Language Models with Attention Sinks", + "authors": "Xiao et al.", + "year": 2024, + "venue": "ICLR", + "tags": [ + "起点" + ], + "arxiv": "2309.17453" + }, { "title": "DeepSeek-V4 CSA+HCA: KV Cache 降至 V3.2 的 10%, FLOPs 降至 27%", "authors": "DeepSeek-AI", @@ -1681,16 +1691,6 @@ "前沿" ], "pdf": "https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/resolve/main/DeepSeek_V4.pdf" - }, - { - "title": "GQA/MQA: 从架构层面减少 KV 头 (Llama3/DeepSeek 标配)", - "authors": "Ainslie et al.", - "year": 2023, - "venue": "EMNLP", - "tags": [ - "前沿" - ], - "arxiv": "2305.13245" } ], "branches": [], @@ -1759,14 +1759,14 @@ "arxiv": "2211.17192" }, { - "title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads", - "authors": "Cai et al.", + "title": "Continuous Batching + Prefill-Decode Disaggregation", + "authors": "Patel et al. / Yu et al.", "year": 2024, - "venue": "ICML", + "venue": "OSDI", "tags": [ - "关键节点" + "前沿" ], - "arxiv": "2401.10774" + "arxiv": "2401.08671" }, { "title": "EAGLE / EAGLE-2: 推测解码框架 (无需 draft model)", @@ -1779,14 +1779,14 @@ "arxiv": "2401.15077" }, { - "title": "Continuous Batching + Prefill-Decode Disaggregation", - "authors": "Patel et al. / Yu et al.", + "title": "Medusa: Simple LLM Inference Acceleration with Multiple Heads", + "authors": "Cai et al.", "year": 2024, - "venue": "OSDI", + "venue": "ICML", "tags": [ - "前沿" + "关键节点" ], - "arxiv": "2401.08671" + "arxiv": "2401.10774" }, { "title": "DeepSeek-V3 Multi-Token Prediction (MTP) + V4 Flash 推理", @@ -1800,16 +1800,6 @@ } ], "branches": [ - { - "title": "FlashDecoding / FlashInfer: GPU 算子级加速", - "authors": "Dao et al. / Ye et al.", - "year": 2024, - "venue": "arXiv", - "tags": [ - "支线" - ], - "arxiv": "2310.12049" - }, { "title": "KV Cache Offloading (GPU ↔ CPU 动态迁移)", "authors": "Sheng et al.", @@ -1819,6 +1809,16 @@ "支线" ], "arxiv": "2303.06865" + }, + { + "title": "FlashDecoding / FlashInfer: GPU 算子级加速", + "authors": "Dao et al. / Ye et al.", + "year": 2024, + "venue": "arXiv", + "tags": [ + "支线" + ], + "arxiv": "2310.12049" } ], "forward": [] @@ -1836,6 +1836,16 @@ "id": "react", "name": "Agent 核心 (ReAct / Tool Use / Computer Use)", "mainline": [ + { + "title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)", + "authors": "Park et al. / Stanford", + "year": 2023, + "venue": "UIST 2023", + "tags": [ + "起点" + ], + "arxiv": "2304.03442" + }, { "title": "ReAct: Synergizing Reasoning and Acting in Language Models", "authors": "Yao et al.", @@ -1875,16 +1885,6 @@ "前沿" ], "arxiv": "2508.04700" - }, - { - "title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)", - "authors": "Park et al. / Stanford", - "year": 2023, - "venue": "UIST 2023", - "tags": [ - "起点" - ], - "arxiv": "2304.03442" } ], "branches": [ @@ -1925,16 +1925,6 @@ ], "arxiv": "2005.11401" }, - { - "title": "Self-RAG: Learning to Retrieve, Generate, and Critique", - "authors": "Asai et al.", - "year": 2024, - "venue": "ICLR", - "tags": [ - "关键节点" - ], - "arxiv": "2310.11511" - }, { "title": "GraphRAG: From Local to Global — Graph-based RAG", "authors": "Edge et al.", @@ -1945,6 +1935,16 @@ ], "arxiv": "2404.16130" }, + { + "title": "Self-RAG: Learning to Retrieve, Generate, and Critique", + "authors": "Asai et al.", + "year": 2024, + "venue": "ICLR", + "tags": [ + "关键节点" + ], + "arxiv": "2310.11511" + }, { "title": "Agentic RAG / Corrective RAG / Adaptive RAG", "authors": "Yan et al. / Asai et al.", @@ -1957,16 +1957,6 @@ } ], "branches": [ - { - "title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels", - "authors": "Gao et al.", - "year": 2023, - "venue": "arXiv", - "tags": [ - "支线" - ], - "arxiv": "2212.10496" - }, { "title": "ColBERT / ColPali: Late-Interaction / Visual RAG", "authors": "Khattab et al.", @@ -1976,6 +1966,16 @@ "支线" ], "arxiv": "2112.01488" + }, + { + "title": "HyDE: Precise Zero-Shot Dense Retrieval without Relevant Labels", + "authors": "Gao et al.", + "year": 2023, + "venue": "arXiv", + "tags": [ + "支线" + ], + "arxiv": "2212.10496" } ], "forward": [] @@ -2062,16 +2062,6 @@ ], "arxiv": "2206.04615" }, - { - "title": "MMLU-Pro: A More Robust and Challenging Benchmark", - "authors": "Wang et al.", - "year": 2024, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2406.01574" - }, { "title": "LiveBench: A Challenging, Contamination-Free LLM Benchmark", "authors": "White et al.", @@ -2082,6 +2072,16 @@ ], "arxiv": "2406.19314" }, + { + "title": "MMLU-Pro: A More Robust and Challenging Benchmark", + "authors": "Wang et al.", + "year": 2024, + "venue": "arXiv", + "tags": [ + "关键节点" + ], + "arxiv": "2406.01574" + }, { "title": "Humanity's Last Exam (HLE): 人类知识极限测试", "authors": "Phan et al.", @@ -2121,16 +2121,6 @@ ], "arxiv": "2103.03874" }, - { - "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark", - "authors": "Rein et al.", - "year": 2023, - "venue": "NeurIPS", - "tags": [ - "关键节点" - ], - "arxiv": "2311.12022" - }, { "title": "BBH: Challenging BIG-Bench Tasks (BIG-Bench Hard)", "authors": "Suzgun et al.", @@ -2141,6 +2131,16 @@ ], "arxiv": "2210.09261" }, + { + "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark", + "authors": "Rein et al.", + "year": 2023, + "venue": "NeurIPS", + "tags": [ + "关键节点" + ], + "arxiv": "2311.12022" + }, { "title": "HLE (Humanity's Last Exam): 3000问题极限测试", "authors": "Phan et al.", @@ -2169,16 +2169,6 @@ ], "arxiv": "2107.03374" }, - { - "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?", - "authors": "Jimenez et al.", - "year": 2024, - "venue": "ICLR", - "tags": [ - "关键节点" - ], - "arxiv": "2310.06770" - }, { "title": "LiveCodeBench: Holistic and Contamination-Free Coding", "authors": "Jain et al.", @@ -2189,6 +2179,16 @@ ], "arxiv": "2408.07935" }, + { + "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?", + "authors": "Jimenez et al.", + "year": 2024, + "venue": "ICLR", + "tags": [ + "关键节点" + ], + "arxiv": "2310.06770" + }, { "title": "BigCodeBench: Benchmarking Code Generation with Diverse Tasks", "authors": "Zhuo et al.", @@ -2207,6 +2207,16 @@ "id": "agent_eval", "name": "Agent 评估", "mainline": [ + { + "title": "GAIA: A Benchmark for General AI Assistants", + "authors": "Mialon et al.", + "year": 2023, + "venue": "NeurIPS", + "tags": [ + "关键节点" + ], + "arxiv": "2311.12983" + }, { "title": "BFCL: Berkeley Function Calling Leaderboard", "authors": "Yan et al.", @@ -2217,16 +2227,6 @@ ], "arxiv": "2402.16053" }, - { - "title": "τ-bench: Agent Tool Use & Task Completion", - "authors": "Yao et al.", - "year": 2024, - "venue": "arXiv", - "tags": [ - "关键节点" - ], - "arxiv": "2406.12045" - }, { "title": "WebArena / VisualWebArena: 真实 Web 任务 Agent 评估", "authors": "Zhou et al.", @@ -2238,14 +2238,14 @@ "arxiv": "2307.13854" }, { - "title": "GAIA: A Benchmark for General AI Assistants", - "authors": "Mialon et al.", - "year": 2023, - "venue": "NeurIPS", + "title": "τ-bench: Agent Tool Use & Task Completion", + "authors": "Yao et al.", + "year": 2024, + "venue": "arXiv", "tags": [ "关键节点" ], - "arxiv": "2311.12983" + "arxiv": "2406.12045" } ], "branches": [],