# ──────────────── # Stage 1: Builder # ──────────────── FROM python:3.10-slim AS builder # Install build dependencies RUN apt-get update && apt-get install -y \ build-essential \ gcc \ g++ \ pkg-config \ libmupdf-dev \ && rm -rf /var/lib/apt/lists/* # Upgrade pip and install wheel RUN pip install --upgrade pip setuptools wheel # Create virtual environment RUN python -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" # Copy requirements and install dependencies WORKDIR /build COPY requirements.txt . # Install Python packages RUN pip install --no-cache-dir -r requirements.txt # ──────────────── # Stage 2: Production # ──────────────── FROM python:3.10-slim # Install runtime dependencies RUN apt-get update && apt-get install -y \ poppler-utils \ tesseract-ocr \ tesseract-ocr-eng \ tesseract-ocr-fas \ mupdf-tools \ libmupdf-dev \ ffmpeg \ libopencv-dev \ libgl1-mesa-dri \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgomp1 \ libgbm1 \ libxss1 \ libgtk-3-0 \ libxtst6 \ libxrandr2 \ libasound2 \ libpangocairo-1.0-0 \ libatk1.0-0 \ libcairo-gobject2 \ libgdk-pixbuf-2.0-0 \ libffi-dev \ libssl-dev \ curl \ wget \ sqlite3 \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean # Create non-root user RUN groupadd -g 1000 appuser && useradd -r -u 1000 -g appuser appuser # Copy virtual environment from builder COPY --from=builder /opt/venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" # Set working directory WORKDIR /app # Copy all files COPY --chown=appuser:appuser . . # Pre-download TrOCR model to speed up startup RUN python -c "from transformers import TrOCRProcessor, VisionEncoderDecoderModel; \ TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed'); \ VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')" || echo "Model download failed, will download at runtime" # Environment variables ENV PYTHONPATH=/app ENV TRANSFORMERS_CACHE=/app/cache ENV HF_HOME=/app/cache ENV LOG_LEVEL=INFO ENV ENVIRONMENT=production ENV PYTHONUNBUFFERED=1 # Switch to non-root user USER appuser # Expose port EXPOSE 7860 # Healthcheck HEALTHCHECK --interval=45s --timeout=30s --start-period=180s --retries=10 \ CMD python -c "import requests; requests.get('http://localhost:7860')" || exit 1 # Start Gradio app (not FastAPI) CMD ["python", "app.py"]