Spaces:
Running
on
L40S
Running
on
L40S
python -m vllm.entrypoints.openai.api_server \ | |
--model moonshotai/Kimi-Dev-72B \ | |
--tensor-parallel-size 4 \ | |
--max-num-seqs 8 \ | |
--max-model-len 131072 \ | |
--gpu-memory-utilization 0.9 \ | |
--host localhost \ | |
--served-model-name kimi-dev \ | |
--port 8080 | |
SERVICE_URL="http://localhost:8080/v1/models" | |
TIMEOUT=500 # 最大等待秒数 | |
INTERVAL=5 # 检测间隔秒数 | |
ELAPSED=0 | |