feat: 新增 9 篇关键缺位论文 (189篇)
架构 5 篇: - MiniMax-01: Lightning Attention + MoE (2501.08313) - Byte Latent Transformer: tokenization-free (2412.09871) - Large Concept Models: 句子级推理 (2412.08821) - xLSTM: LSTM复兴, matrix memory (2405.04517) - LLaDA: Diffusion LLM, 非自回归 (2502.09992) 后训练 2 篇: - MiniMax-M1: CISPO RL 推理涌现 (2506.13585) - Coconut: 潜在空间连续推理 (2412.06769) Agent 1 篇: - Generative Agents: Stanford AI Town (2304.03442) 多模态 1 篇: - GPT-4V/4o/5.4: 视觉-语音原生多模态
This commit is contained in:
@@ -188,6 +188,16 @@
|
|||||||
"前沿"
|
"前沿"
|
||||||
],
|
],
|
||||||
"arxiv": "2407.08608"
|
"arxiv": "2407.08608"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "MiniMax-01: Lightning Attention + MoE (456B, 45.9B active)",
|
||||||
|
"authors": "MiniMax",
|
||||||
|
"year": 2025,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"关键节点"
|
||||||
|
],
|
||||||
|
"arxiv": "2501.08313"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"branches": [
|
"branches": [
|
||||||
@@ -526,6 +536,16 @@
|
|||||||
"支线"
|
"支线"
|
||||||
],
|
],
|
||||||
"arxiv": "2411.13676"
|
"arxiv": "2411.13676"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "xLSTM: Extended Long Short-Term Memory (sLSTM + mLSTM, matrix memory, linear attention revival)",
|
||||||
|
"authors": "Beck et al.",
|
||||||
|
"year": 2024,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"支线"
|
||||||
|
],
|
||||||
|
"arxiv": "2405.04517"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"forward": [
|
"forward": [
|
||||||
@@ -548,6 +568,36 @@
|
|||||||
"前瞻"
|
"前瞻"
|
||||||
],
|
],
|
||||||
"arxiv": "2406.02528"
|
"arxiv": "2406.02528"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Byte Latent Transformer: Patches Scale Better Than Tokens (tokenization-free, entropy-based patching)",
|
||||||
|
"authors": "Pagnoni et al. / Meta FAIR",
|
||||||
|
"year": 2024,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"前瞻"
|
||||||
|
],
|
||||||
|
"arxiv": "2412.09871"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Large Concept Models: Language Modeling in a Sentence Representation Space (SONAR, 200 languages)",
|
||||||
|
"authors": "Baranchuk et al. / Meta FAIR",
|
||||||
|
"year": 2024,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"前瞻"
|
||||||
|
],
|
||||||
|
"arxiv": "2412.08821"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "LLaDA: Large Language Diffusion with mAsking (diffusion-based LLM, 8B, non-autoregressive generation)",
|
||||||
|
"authors": "Nie et al.",
|
||||||
|
"year": 2025,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"前瞻"
|
||||||
|
],
|
||||||
|
"arxiv": "2502.09992"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -613,6 +663,16 @@
|
|||||||
"前沿"
|
"前沿"
|
||||||
],
|
],
|
||||||
"arxiv": "2602.02276"
|
"arxiv": "2602.02276"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "GPT-4V / GPT-4o / GPT-5.4: Multimodal Frontier (vision + speech + text native, real-time voice)",
|
||||||
|
"authors": "OpenAI",
|
||||||
|
"year": 2026,
|
||||||
|
"venue": "OpenAI",
|
||||||
|
"tags": [
|
||||||
|
"关键节点"
|
||||||
|
],
|
||||||
|
"arxiv": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"branches": [
|
"branches": [
|
||||||
@@ -1336,6 +1396,16 @@
|
|||||||
"前沿"
|
"前沿"
|
||||||
],
|
],
|
||||||
"arxiv": "2507.20534"
|
"arxiv": "2507.20534"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "MiniMax-M1: CISPO (Curriculum-Informed Synthetic Planning Optimization) RL for reasoning emergence",
|
||||||
|
"authors": "MiniMax",
|
||||||
|
"year": 2025,
|
||||||
|
"venue": "arXiv",
|
||||||
|
"tags": [
|
||||||
|
"关键节点"
|
||||||
|
],
|
||||||
|
"arxiv": "2506.13585"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"branches": [
|
"branches": [
|
||||||
@@ -1370,6 +1440,16 @@
|
|||||||
"前瞻"
|
"前瞻"
|
||||||
],
|
],
|
||||||
"arxiv": "2403.09629"
|
"arxiv": "2403.09629"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Coconut: Training LLMs to Reason in a Continuous Latent Space (摆脱文字链, 潜在空间连续推理)",
|
||||||
|
"authors": "Hao et al. / Meta FAIR",
|
||||||
|
"year": 2024,
|
||||||
|
"venue": "NeurIPS 2024",
|
||||||
|
"tags": [
|
||||||
|
"前瞻"
|
||||||
|
],
|
||||||
|
"arxiv": "2412.06769"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1795,6 +1875,16 @@
|
|||||||
"前沿"
|
"前沿"
|
||||||
],
|
],
|
||||||
"arxiv": "2508.04700"
|
"arxiv": "2508.04700"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Generative Agents: Interactive Simulacra of Human Behavior (Stanford AI Town, memory-stream architecture, 25 agents)",
|
||||||
|
"authors": "Park et al. / Stanford",
|
||||||
|
"year": 2023,
|
||||||
|
"venue": "UIST 2023",
|
||||||
|
"tags": [
|
||||||
|
"起点"
|
||||||
|
],
|
||||||
|
"arxiv": "2304.03442"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"branches": [
|
"branches": [
|
||||||
|
|||||||
Reference in New Issue
Block a user