Spaces:
Running
on
L40S
Running
on
L40S
miaoyibo
commited on
Commit
·
56b9716
1
Parent(s):
5ce5804
vllm
Browse files- app.py +6 -0
- start.sh → serve_vllm.sh +1 -27
app.py
CHANGED
@@ -340,6 +340,12 @@ def main(args: argparse.Namespace):
|
|
340 |
)
|
341 |
|
342 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
args = parse_args()
|
344 |
print(args)
|
345 |
main(args)
|
|
|
340 |
)
|
341 |
|
342 |
if __name__ == "__main__":
|
343 |
+
print("start serving vllm")
|
344 |
+
script_path = os.path.join(os.path.dirname(__file__), "serve_vllm.sh")
|
345 |
+
subprocess.Popen(["bash", script_path])
|
346 |
+
time.sleep(300)
|
347 |
+
print("finiashed loading vllm")
|
348 |
+
|
349 |
args = parse_args()
|
350 |
print(args)
|
351 |
main(args)
|
start.sh → serve_vllm.sh
RENAMED
@@ -11,32 +11,6 @@ python -m vllm.entrypoints.openai.api_server \
|
|
11 |
--port 8080
|
12 |
|
13 |
SERVICE_URL="http://localhost:8080/v1/models"
|
14 |
-
TIMEOUT=
|
15 |
INTERVAL=5 # 检测间隔秒数
|
16 |
ELAPSED=0
|
17 |
-
|
18 |
-
echo "[*] 等待 vLLM 服务启动,最长等待 ${TIMEOUT}s ..."
|
19 |
-
|
20 |
-
while true; do
|
21 |
-
# 尝试请求模型列表接口,检查是否包含指定模型
|
22 |
-
if curl -s "$SERVICE_URL" | grep -q "moonshotai"; then
|
23 |
-
echo "✅ vLLM 服务已成功启动!"
|
24 |
-
break
|
25 |
-
fi
|
26 |
-
|
27 |
-
if [ $ELAPSED -ge $TIMEOUT ]; then
|
28 |
-
echo "❌ 等待超时,vLLM 服务未启动成功。"
|
29 |
-
exit 1
|
30 |
-
fi
|
31 |
-
|
32 |
-
echo "⏳ 服务尚未就绪,等待 ${INTERVAL}s 后重试..."
|
33 |
-
sleep $INTERVAL
|
34 |
-
ELAPSED=$((ELAPSED + INTERVAL))
|
35 |
-
done
|
36 |
-
|
37 |
-
# 这里写部署成功后要执行的命令
|
38 |
-
echo "[*] 现在执行后续操作..."
|
39 |
-
|
40 |
-
# 例如启动前端服务、运行测试脚本等
|
41 |
-
# ./start_frontend.sh
|
42 |
-
python app.py
|
|
|
11 |
--port 8080
|
12 |
|
13 |
SERVICE_URL="http://localhost:8080/v1/models"
|
14 |
+
TIMEOUT=500 # 最大等待秒数
|
15 |
INTERVAL=5 # 检测间隔秒数
|
16 |
ELAPSED=0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|