Spaces:
Paused
Paused
# ββββββββββββββββ | |
# Stage 1: Builder | |
# ββββββββββββββββ | |
FROM python:3.10-slim AS builder | |
# Install build dependencies | |
RUN apt-get update && apt-get install -y \ | |
build-essential \ | |
gcc \ | |
g++ \ | |
pkg-config \ | |
libmupdf-dev \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Upgrade pip and install wheel | |
RUN pip install --upgrade pip setuptools wheel | |
# Create virtual environment | |
RUN python -m venv /opt/venv | |
ENV PATH="/opt/venv/bin:$PATH" | |
# Copy requirements and install dependencies | |
WORKDIR /build | |
COPY requirements.txt . | |
# Install Python packages | |
RUN pip install --no-cache-dir -r requirements.txt | |
# ββββββββββββββββ | |
# Stage 2: Production | |
# ββββββββββββββββ | |
FROM python:3.10-slim | |
# Install runtime dependencies | |
RUN apt-get update && apt-get install -y \ | |
poppler-utils \ | |
tesseract-ocr \ | |
tesseract-ocr-eng \ | |
tesseract-ocr-fas \ | |
mupdf-tools \ | |
libmupdf-dev \ | |
ffmpeg \ | |
libopencv-dev \ | |
libgl1-mesa-dri \ | |
libglib2.0-0 \ | |
libsm6 \ | |
libxext6 \ | |
libxrender-dev \ | |
libgomp1 \ | |
libgbm1 \ | |
libxss1 \ | |
libgtk-3-0 \ | |
libxtst6 \ | |
libxrandr2 \ | |
libasound2 \ | |
libpangocairo-1.0-0 \ | |
libatk1.0-0 \ | |
libcairo-gobject2 \ | |
libgdk-pixbuf-2.0-0 \ | |
libffi-dev \ | |
libssl-dev \ | |
curl \ | |
wget \ | |
sqlite3 \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& apt-get clean | |
# Create non-root user | |
RUN groupadd -g 1000 appuser && useradd -r -u 1000 -g appuser appuser | |
# Copy virtual environment from builder | |
COPY --from=builder /opt/venv /opt/venv | |
ENV PATH="/opt/venv/bin:$PATH" | |
# Set working directory | |
WORKDIR /app | |
# Copy all files | |
COPY --chown=appuser:appuser . . | |
# Pre-download TrOCR model to speed up startup | |
RUN python -c "from transformers import TrOCRProcessor, VisionEncoderDecoderModel; \ | |
TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed'); \ | |
VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')" || echo "Model download failed, will download at runtime" | |
# Environment variables | |
ENV PYTHONPATH=/app | |
ENV TRANSFORMERS_CACHE=/app/cache | |
ENV HF_HOME=/app/cache | |
ENV LOG_LEVEL=INFO | |
ENV ENVIRONMENT=production | |
ENV PYTHONUNBUFFERED=1 | |
# Switch to non-root user | |
USER appuser | |
# Expose port | |
EXPOSE 7860 | |
# Healthcheck | |
HEALTHCHECK --interval=45s --timeout=30s --start-period=180s --retries=10 \ | |
CMD python -c "import requests; requests.get('http://localhost:7860')" || exit 1 | |
# Start Gradio app (not FastAPI) | |
CMD ["python", "app.py"] |