Spaces:
Running
on
L40S
Running
on
L40S
miaoyibo
commited on
Commit
·
3425837
1
Parent(s):
eca802f
- serve_vllm.sh +2 -2
serve_vllm.sh
CHANGED
@@ -3,8 +3,8 @@
|
|
3 |
python -m vllm.entrypoints.openai.api_server \
|
4 |
--model moonshotai/Kimi-Dev-72B \
|
5 |
--tensor-parallel-size 4 \
|
6 |
-
--max-num-seqs
|
7 |
-
--max-model-len
|
8 |
--gpu-memory-utilization 0.9 \
|
9 |
--host localhost \
|
10 |
--served-model-name kimi-dev \
|
|
|
3 |
python -m vllm.entrypoints.openai.api_server \
|
4 |
--model moonshotai/Kimi-Dev-72B \
|
5 |
--tensor-parallel-size 4 \
|
6 |
+
--max-num-seqs 4 \
|
7 |
+
--max-model-len 65536 \
|
8 |
--gpu-memory-utilization 0.9 \
|
9 |
--host localhost \
|
10 |
--served-model-name kimi-dev \
|