# Stage 1: Get uv installer FROM ghcr.io/astral-sh/uv:0.2.12 as uv # Stage 2: Main application image FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive # Copy uv from first stage COPY --from=uv /uv /uv # Install Python, pip, venv, and system dependencies RUN apt-get update && \ apt-get install -y --fix-missing --no-install-recommends \ python3.11 python3.11-venv python3-pip ffmpeg \ build-essential \ git \ && apt-get clean && \ rm -rf /var/lib/apt/lists/* # Create virtual environment with uv RUN --mount=type=cache,target=/root/.cache/uv \ /uv venv /opt/venv # Set environment variables ENV VIRTUAL_ENV=/opt/venv \ PATH="/opt/venv/bin:$PATH" # Create user and set permissions (required for HF Spaces) RUN useradd -m -u 1000 user && \ chown -R user /opt/venv # Switch to user context USER user WORKDIR /app # Set home to user's home directory and other envs ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH \ HF_HOME=/home/user/.cache/huggingface \ UV_CACHE_DIR=/app/.uv-cache # Create cache directory with proper permissions RUN mkdir -p $UV_CACHE_DIR && chown -R user:user $UV_CACHE_DIR # Copy requirements first for caching COPY --chown=user requirements.txt . # Install Python packages with uv caching RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \ /uv pip install --no-cache-dir -r requirements.txt # Install build dependencies for flash-attn RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \ /uv pip install -U --no-cache-dir setuptools wheel ninja packaging # Install flash-attn RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \ /uv pip install flash-attn --no-build-isolation #/uv pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl # Copy application code COPY --chown=user . . # Expose FastRTC port (matches HF Spaces default) EXPOSE 7860 # Start the application using uvicorn (FastAPI) CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]