From 7f16e5decffd45d91ff73ec73122dd4ed800263c Mon Sep 17 00:00:00 2001 From: LaoWang <257199637@qq.com> Date: Tue, 2 Jun 2026 12:30:38 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20=E7=BF=BB=E8=AF=91=20API=20=E6=94=AF?= =?UTF-8?q?=E6=8C=81=20arXiv=20+=20HF=20=E4=B8=A4=E7=A7=8D=E8=AE=BA?= =?UTF-8?q?=E6=96=87=E6=BA=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 7 ++----- api/server.py | 53 ++++++++++++++++++++++++++++++++--------------- static/app.js | 4 +++- static/index.html | 4 +++- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e7d53a..f819031 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,7 @@ FROM python:3.11-slim RUN apt-get update && apt-get install -y --no-install-recommends \ poppler-utils \ - libgl1-mesa-glx \ - libglib2.0-0 \ - libsm6 libxext6 libxrender-dev libgomp1 \ + libgomp1 \ && rm -rf /var/lib/apt/lists/* WORKDIR /app @@ -18,8 +16,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . -# Pre-download the layout model -RUN python3 -c "from pdf2zh.doclayout import OnnxModel; OnnxModel.from_pretrained()" +LABEL org.opencontainers.image.description="LLM 论文图书馆 + pdf2zh 翻译引擎" VOLUME ["/app/papers", "/app/data"] EXPOSE 8000 diff --git a/api/server.py b/api/server.py index 3b4932e..5ff3729 100644 --- a/api/server.py +++ b/api/server.py @@ -489,23 +489,42 @@ import subprocess, threading _translate_lock = threading.Lock() _translating = set() -@app.post("/api/translate/{arxiv_id}") -async def trigger_translation(arxiv_id: str): - """Trigger pdf2zh translation for a paper (DeepSeek V4 Flash)""" - pdf_path = PAPERS_DIR / "arxiv" / f"{arxiv_id}.pdf" - if not pdf_path.exists(): - raise HTTPException(status_code=404, detail="PDF not found") +@app.get("/api/translate/status") +def translation_status(): + return {"translating": list(_translating)} + +@app.post("/api/translate/{paper_id}") +async def trigger_translation(paper_id: str): + """Trigger pdf2zh translation for a paper (DeepSeek V4 Flash). + paper_id can be arxiv ID (e.g. 1706.03762) or a HF filename.""" + # Find the PDF + pdf_path = None + # Try arxiv + candidate = PAPERS_DIR / "arxiv" / f"{paper_id}.pdf" + if candidate.exists(): + pdf_path = candidate + else: + # Try HF papers directory + hf_dir = PAPERS_DIR / "hf" + if hf_dir.exists(): + for f in hf_dir.glob("*.pdf"): + if paper_id in f.stem: + pdf_path = f + break - out_path = TRANSLATED_DIR / f"{arxiv_id}.pdf" + if not pdf_path: + raise HTTPException(status_code=404, detail=f"PDF not found for {paper_id}") + + out_path = TRANSLATED_DIR / f"{paper_id}.pdf" if out_path.exists(): - return {"arxiv_id": arxiv_id, "status": "already_translated"} + return {"paper_id": paper_id, "status": "already_translated"} - if arxiv_id in _translating: - return {"arxiv_id": arxiv_id, "status": "in_progress"} + if paper_id in _translating: + return {"paper_id": paper_id, "status": "in_progress"} def do_translate(): try: - _translating.add(arxiv_id) + _translating.add(paper_id) from pdf2zh.doclayout import OnnxModel from pdf2zh.high_level import translate model = OnnxModel.from_pretrained() @@ -514,22 +533,22 @@ async def trigger_translation(arxiv_id: str): lang_in='en', lang_out='zh', service='deepseek', thread=4, model=model, ) - mono = TRANSLATED_DIR / f"{arxiv_id}-mono.pdf" - dual = TRANSLATED_DIR / f"{arxiv_id}-dual.pdf" + mono = TRANSLATED_DIR / f"{paper_id}-mono.pdf" + dual = TRANSLATED_DIR / f"{paper_id}-dual.pdf" if mono.exists(): if out_path.exists(): out_path.unlink() mono.rename(out_path) if dual.exists(): dual.unlink() - log.info(f"Translated: {arxiv_id}") + log.info(f"Translated: {paper_id}") except Exception as e: - log.error(f"Translation failed for {arxiv_id}: {e}") + log.error(f"Translation failed for {paper_id}: {e}") finally: - _translating.discard(arxiv_id) + _translating.discard(paper_id) ThreadPoolExecutor(max_workers=1).submit(do_translate) - return {"arxiv_id": arxiv_id, "status": "started"} + return {"paper_id": paper_id, "status": "started"} @app.get("/api/translate/status") def translation_status(): diff --git a/static/app.js b/static/app.js index bff1666..9e38427 100644 --- a/static/app.js +++ b/static/app.js @@ -151,7 +151,9 @@ function renderPaper(p) { const links = []; if (pdfUrl) links.push(``); else if (p.arxiv) links.push(`📋 arXiv`); - if (p.arxiv) links.push(`📖 译文`); + // Show translation button for ALL papers with arxiv or pdf + const paperId = p.arxiv || (p.pdf ? p.pdf.split('/').pop().replace('.pdf','') : null); + if (paperId) links.push(`📖 译文`); return `
${p.year||'—'}
${p.title}
${p.authors||''}${p.venue?`${p.venue}`:''}${tags}
diff --git a/static/index.html b/static/index.html index b687345..1cfc950 100644 --- a/static/index.html +++ b/static/index.html @@ -183,7 +183,9 @@ function renderPaper(p) { const links = []; if (pdfUrl) links.push(``); else if (p.arxiv) links.push(`📋 arXiv`); - if (p.arxiv) links.push(`📖 译文`); + // Show translation button for ALL papers with arxiv or pdf + const paperId = p.arxiv || (p.pdf ? p.pdf.split('/').pop().replace('.pdf','') : null); + if (paperId) links.push(`📖 译文`); return `
${p.year||'—'}
${p.title}
${p.authors||''}${p.venue?`${p.venue}`:''}${tags}