File size: 847 Bytes
ddfa14d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# NOTE replace with your own model path
export BASE_MODEL_PATH=''
export BASE_PORT=8231
echo $BASE_MODEL_PATH
echo $BASE_PORT

lsof -i :$BASE_PORT

# 终止该进程
kill -9 $(lsof -t -i:$BASE_PORT)

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve $BASE_MODEL_PATH --host 0.0.0.0 --port $BASE_PORT --max-model-len 12000  --tensor-parallel-size 8 --api-key pku --trust-remote-code  --dtype auto --enforce-eager --swap-space 1 --limit-mm-per-prompt "image=6" 

# NOTE should set the limit-mm-per-prompt 


echo 'Base Port:' $BASE_PORT

lsof -i :$BASE_PORT

# 终止该进程
kill -9 $(lsof -t -i:$BASE_PORT)

# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve /aifs4su/yaodong/spring_r1_model/QVQ-72B-Preview --enable-reasoning --reasoning-parser deepseek_r1 --host 0.0.0.0 --port 8009 --max-model-len 12000  --tensor-parallel-size 8 --api-key jiayi