fix: 翻译 API 支持 arXiv + HF 两种论文源
This commit is contained in:
@@ -489,23 +489,42 @@ import subprocess, threading
|
||||
_translate_lock = threading.Lock()
|
||||
_translating = set()
|
||||
|
||||
@app.post("/api/translate/{arxiv_id}")
|
||||
async def trigger_translation(arxiv_id: str):
|
||||
"""Trigger pdf2zh translation for a paper (DeepSeek V4 Flash)"""
|
||||
pdf_path = PAPERS_DIR / "arxiv" / f"{arxiv_id}.pdf"
|
||||
if not pdf_path.exists():
|
||||
raise HTTPException(status_code=404, detail="PDF not found")
|
||||
@app.get("/api/translate/status")
|
||||
def translation_status():
|
||||
return {"translating": list(_translating)}
|
||||
|
||||
@app.post("/api/translate/{paper_id}")
|
||||
async def trigger_translation(paper_id: str):
|
||||
"""Trigger pdf2zh translation for a paper (DeepSeek V4 Flash).
|
||||
paper_id can be arxiv ID (e.g. 1706.03762) or a HF filename."""
|
||||
# Find the PDF
|
||||
pdf_path = None
|
||||
# Try arxiv
|
||||
candidate = PAPERS_DIR / "arxiv" / f"{paper_id}.pdf"
|
||||
if candidate.exists():
|
||||
pdf_path = candidate
|
||||
else:
|
||||
# Try HF papers directory
|
||||
hf_dir = PAPERS_DIR / "hf"
|
||||
if hf_dir.exists():
|
||||
for f in hf_dir.glob("*.pdf"):
|
||||
if paper_id in f.stem:
|
||||
pdf_path = f
|
||||
break
|
||||
|
||||
out_path = TRANSLATED_DIR / f"{arxiv_id}.pdf"
|
||||
if not pdf_path:
|
||||
raise HTTPException(status_code=404, detail=f"PDF not found for {paper_id}")
|
||||
|
||||
out_path = TRANSLATED_DIR / f"{paper_id}.pdf"
|
||||
if out_path.exists():
|
||||
return {"arxiv_id": arxiv_id, "status": "already_translated"}
|
||||
return {"paper_id": paper_id, "status": "already_translated"}
|
||||
|
||||
if arxiv_id in _translating:
|
||||
return {"arxiv_id": arxiv_id, "status": "in_progress"}
|
||||
if paper_id in _translating:
|
||||
return {"paper_id": paper_id, "status": "in_progress"}
|
||||
|
||||
def do_translate():
|
||||
try:
|
||||
_translating.add(arxiv_id)
|
||||
_translating.add(paper_id)
|
||||
from pdf2zh.doclayout import OnnxModel
|
||||
from pdf2zh.high_level import translate
|
||||
model = OnnxModel.from_pretrained()
|
||||
@@ -514,22 +533,22 @@ async def trigger_translation(arxiv_id: str):
|
||||
lang_in='en', lang_out='zh',
|
||||
service='deepseek', thread=4, model=model,
|
||||
)
|
||||
mono = TRANSLATED_DIR / f"{arxiv_id}-mono.pdf"
|
||||
dual = TRANSLATED_DIR / f"{arxiv_id}-dual.pdf"
|
||||
mono = TRANSLATED_DIR / f"{paper_id}-mono.pdf"
|
||||
dual = TRANSLATED_DIR / f"{paper_id}-dual.pdf"
|
||||
if mono.exists():
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
mono.rename(out_path)
|
||||
if dual.exists():
|
||||
dual.unlink()
|
||||
log.info(f"Translated: {arxiv_id}")
|
||||
log.info(f"Translated: {paper_id}")
|
||||
except Exception as e:
|
||||
log.error(f"Translation failed for {arxiv_id}: {e}")
|
||||
log.error(f"Translation failed for {paper_id}: {e}")
|
||||
finally:
|
||||
_translating.discard(arxiv_id)
|
||||
_translating.discard(paper_id)
|
||||
|
||||
ThreadPoolExecutor(max_workers=1).submit(do_translate)
|
||||
return {"arxiv_id": arxiv_id, "status": "started"}
|
||||
return {"paper_id": paper_id, "status": "started"}
|
||||
|
||||
@app.get("/api/translate/status")
|
||||
def translation_status():
|
||||
|
||||
Reference in New Issue
Block a user