feat: pdf2zh + DeepSeek V4 Flash 翻译集成
This commit is contained in:
@@ -482,6 +482,59 @@ def serve_translated(arxiv_id: str):
|
||||
return FileResponse(fp, media_type="application/pdf",
|
||||
headers={"Content-Disposition": "inline"})
|
||||
|
||||
# ─── Routes: Trigger translation ───────────────────────
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import subprocess, threading
|
||||
|
||||
_translate_lock = threading.Lock()
|
||||
_translating = set()
|
||||
|
||||
@app.post("/api/translate/{arxiv_id}")
|
||||
async def trigger_translation(arxiv_id: str):
|
||||
"""Trigger pdf2zh translation for a paper (DeepSeek V4 Flash)"""
|
||||
pdf_path = PAPERS_DIR / "arxiv" / f"{arxiv_id}.pdf"
|
||||
if not pdf_path.exists():
|
||||
raise HTTPException(status_code=404, detail="PDF not found")
|
||||
|
||||
out_path = TRANSLATED_DIR / f"{arxiv_id}.pdf"
|
||||
if out_path.exists():
|
||||
return {"arxiv_id": arxiv_id, "status": "already_translated"}
|
||||
|
||||
if arxiv_id in _translating:
|
||||
return {"arxiv_id": arxiv_id, "status": "in_progress"}
|
||||
|
||||
def do_translate():
|
||||
try:
|
||||
_translating.add(arxiv_id)
|
||||
from pdf2zh.doclayout import OnnxModel
|
||||
from pdf2zh.high_level import translate
|
||||
model = OnnxModel.from_pretrained()
|
||||
translate(
|
||||
[str(pdf_path)], output=str(TRANSLATED_DIR),
|
||||
lang_in='en', lang_out='zh',
|
||||
service='deepseek', thread=4, model=model,
|
||||
)
|
||||
mono = TRANSLATED_DIR / f"{arxiv_id}-mono.pdf"
|
||||
dual = TRANSLATED_DIR / f"{arxiv_id}-dual.pdf"
|
||||
if mono.exists():
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
mono.rename(out_path)
|
||||
if dual.exists():
|
||||
dual.unlink()
|
||||
log.info(f"Translated: {arxiv_id}")
|
||||
except Exception as e:
|
||||
log.error(f"Translation failed for {arxiv_id}: {e}")
|
||||
finally:
|
||||
_translating.discard(arxiv_id)
|
||||
|
||||
ThreadPoolExecutor(max_workers=1).submit(do_translate)
|
||||
return {"arxiv_id": arxiv_id, "status": "started"}
|
||||
|
||||
@app.get("/api/translate/status")
|
||||
def translation_status():
|
||||
return {"translating": list(_translating)}
|
||||
|
||||
# ─── Health ─────────────────────────────────────────────
|
||||
@app.get("/api/health")
|
||||
def health():
|
||||
|
||||
Reference in New Issue
Block a user