services: Qwen3-Embedding-0.6B: container_name: Qwen3-Embedding-0.6B restart: no #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经测试正常,可放心使用 #image: dengcao/vllm-openai:v0.9.2rc2 image: dengcao/vllm-openai:v0.9.2 ipc: host volumes: - ./models:/models command: ["--model", "/models/Qwen3-Embedding-0.6B", "--served-model-name", "Qwen3-Embedding-0.6B", "--gpu-memory-utilization", "0.90"] ports: - 8007:8000 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu]