feat: pdf2zh + DeepSeek V4 Flash 翻译集成

This commit is contained in:
2026-06-02 12:14:59 +00:00
parent beae7102b8
commit 06724077c0
6 changed files with 143 additions and 2 deletions

View File

@@ -1,16 +1,30 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends poppler-utils && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y --no-install-recommends \
poppler-utils \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 libxext6 libxrender-dev libgomp1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install PyTorch CPU + pdf2zh
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir pdf2zh
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Pre-download the layout model
RUN python3 -c "from pdf2zh.doclayout import OnnxModel; OnnxModel.from_pretrained()"
VOLUME ["/app/papers", "/app/data"]
EXPOSE 8000
ENV PORT=8000
ENV LOG_LEVEL=info
ENV DEEPSEEK_MODEL=deepseek-chat
CMD ["sh", "-c", "python3 -m uvicorn api.server:app --host 0.0.0.0 --port ${PORT} --log-level ${LOG_LEVEL}"]