# PyTorch + CUDA 12.1 + cuDNN 8 (passt zur L4) FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime # -- System-Pakete minimal, aber was wir wirklich brauchen -- RUN apt-get update && \ apt-get install -y --no-install-recommends \ git-lfs build-essential portaudio19-dev ffmpeg && \ rm -rf /var/lib/apt/lists/* # Non-root-User (Spaces-Empfehlung) RUN useradd -m -u 1000 user USER user WORKDIR /app ENV PATH="/home/user/.local/bin:$PATH" # Orpheus-/SNAC-Code + Server COPY --chown=user . /app ENV HF_HOME=/app/.cache ENV VLLM_USE_LM_FORMAT_ENFORCER=0 # GPU-freundliches Torch-Upgrade (falls gewünscht) #RUN pip install --no-cache-dir \ # torch==2.3.1+cu121 torchaudio==2.3.1+cu121 \ # --index-url https://download.pytorch.org/whl/cu121 RUN pip install --no-cache-dir "transformers==4.40.2" "lm-format-enforcer==0.9.8" RUN pip install --no-cache-dir vllm>=0.9.0 # Python-Abhängigkeiten COPY --chown=user requirements.txt . RUN pip install --upgrade pip && \ pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir "realtimetts[system]>=0.5.5" RUN pip install --no-cache-dir flashinfer-cu121-preview || echo "FlashInfer not available – continuing without." # nur *diese* Engine-Pflicht nachliefern, aber ohne Resolver: # RUN pip install --no-cache-dir pyttsx3==2.90 --no-deps # optional, um Warn-Spam zu reduzieren # RUN pip install --no-cache-dir azure-cognitiveservices-speech==1.33.0 --no-deps \ # tqdm==4.66.1 --no-deps EXPOSE 7860 # ───── Environment ─────────────────────────────────────── ENV ORPHEUS_MODEL=SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1 ENV MODEL_ID="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1" ENV ORPHEUS_API_URL=http://127.0.0.1:1234 # ───── Entrypoint ──────────────────────────────────────── CMD bash -c "\ python -m vllm.entrypoints.openai.api_server \ --model ${MODEL_ID} \ --port 1234 \ --dtype bfloat16 \ --gpu-memory-utilization 0.85 \ --max-model-len 8192 & \ uvicorn app:app --host 0.0.0.0 --port 7860"