Kimi-Dev-72B / serve_vllm.sh
miaoyibo
vllm
56b9716
raw
history blame
423 Bytes
#!/bin/bash
python -m vllm.entrypoints.openai.api_server \
--model moonshotai/Kimi-Dev-72B \
--tensor-parallel-size 4 \
--max-num-seqs 8 \
--max-model-len 131072 \
--gpu-memory-utilization 0.9 \
--host localhost \
--served-model-name kimi-dev \
--port 8080
SERVICE_URL="http://localhost:8080/v1/models"
TIMEOUT=500 # 最大等待秒数
INTERVAL=5 # 检测间隔秒数
ELAPSED=0