# PyTorch + CUDA 12.1 + cuDNN 8 (passt zur L4)
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

# -- System-Pakete minimal, aber was wir wirklich brauchen --
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        git-lfs build-essential portaudio19-dev ffmpeg && \
    rm -rf /var/lib/apt/lists/*

# Non-root-User (Spaces-Empfehlung)
RUN useradd -m -u 1000 user
USER user
WORKDIR /app
ENV PATH="/home/user/.local/bin:$PATH"

# Orpheus-/SNAC-Code + Server
COPY --chown=user . /app

ENV HF_HOME=/app/.cache
ENV VLLM_USE_LM_FORMAT_ENFORCER=0
# GPU-freundliches Torch-Upgrade (falls gewünscht)
#RUN pip install --no-cache-dir \
#        torch==2.3.1+cu121 torchaudio==2.3.1+cu121 \
#        --index-url https://download.pytorch.org/whl/cu121

RUN pip install --no-cache-dir "transformers==4.40.2" "lm-format-enforcer==0.9.8"
RUN pip install --no-cache-dir vllm>=0.9.0

# Python-Abhängigkeiten
COPY --chown=user requirements.txt .
RUN pip install --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

RUN pip install --no-cache-dir "realtimetts[system]>=0.5.5"

RUN pip install --no-cache-dir flashinfer-cu121-preview || echo "FlashInfer not available – continuing without."

# nur *diese* Engine-Pflicht nachliefern, aber ohne Resolver:
# RUN pip install --no-cache-dir pyttsx3==2.90 --no-deps

# optional, um Warn-Spam zu reduzieren
# RUN pip install --no-cache-dir azure-cognitiveservices-speech==1.33.0 --no-deps \
#                 tqdm==4.66.1 --no-deps

EXPOSE 7860

# ───── Environment ───────────────────────────────────────

ENV ORPHEUS_MODEL=SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1
ENV MODEL_ID="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1"
ENV ORPHEUS_API_URL=http://127.0.0.1:1234

# ───── Entrypoint ────────────────────────────────────────
CMD bash -c "\
  python -m vllm.entrypoints.openai.api_server \
         --model ${MODEL_ID} \
         --port 1234 \
         --dtype bfloat16 \
         --gpu-memory-utilization 0.85 \
         --max-model-len 8192 & \ 
  uvicorn app:app --host 0.0.0.0 --port 7860"