Spaces:
Runtime error
Runtime error
VenkateshRoshan
commited on
Commit
·
cef9c21
1
Parent(s):
c95a175
health check updated
Browse files- app.py +99 -20
- dockerfile +1 -1
app.py
CHANGED
@@ -7,6 +7,96 @@ import os
|
|
7 |
import tarfile
|
8 |
from typing import List, Tuple
|
9 |
import boto3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
class CustomerSupportBot:
|
12 |
def __init__(self, model_path="models/customer_support_gpt"):
|
@@ -90,6 +180,11 @@ class CustomerSupportBot:
|
|
90 |
|
91 |
def create_chat_interface():
|
92 |
bot = CustomerSupportBot(model_path="/app/models")
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
# Function to run initial query
|
95 |
def initial_query():
|
@@ -188,29 +283,13 @@ def create_chat_interface():
|
|
188 |
|
189 |
return interface
|
190 |
|
191 |
-
from flask import Flask, jsonify
|
192 |
-
import threading
|
193 |
-
|
194 |
-
app = Flask(__name__)
|
195 |
-
|
196 |
-
# Health check endpoint for SageMaker
|
197 |
-
@app.route("/ping", methods=["GET"])
|
198 |
-
def ping():
|
199 |
-
return jsonify({"status": "ok"}), 200
|
200 |
-
|
201 |
-
# Start Flask app on a separate thread
|
202 |
-
def run_flask():
|
203 |
-
app.run(host="0.0.0.0", port=8081) # Different port for Flask
|
204 |
-
|
205 |
-
threading.Thread(target=run_flask).start()
|
206 |
-
|
207 |
if __name__ == "__main__":
|
208 |
demo = create_chat_interface()
|
209 |
-
print("Starting
|
210 |
demo.launch(
|
211 |
share=False,
|
212 |
-
server_name="0.0.0.0",
|
213 |
-
server_port=
|
214 |
debug=True,
|
215 |
-
inline=False
|
216 |
)
|
|
|
7 |
import tarfile
|
8 |
from typing import List, Tuple
|
9 |
import boto3
|
10 |
+
from flask import Flask, Response, jsonify, request
|
11 |
+
import threading
|
12 |
+
import psutil
|
13 |
+
import logging
|
14 |
+
from waitress import serve
|
15 |
+
|
16 |
+
# Set up logging
|
17 |
+
logging.basicConfig(level=logging.INFO)
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
+
|
20 |
+
class HealthCheckServer:
|
21 |
+
def __init__(self, bot=None):
|
22 |
+
self.app = Flask(__name__)
|
23 |
+
self.bot = bot
|
24 |
+
|
25 |
+
# Health check endpoint
|
26 |
+
@self.app.route("/ping", methods=["GET"])
|
27 |
+
def ping():
|
28 |
+
try:
|
29 |
+
# Check system health
|
30 |
+
healthy = self.check_system_health()
|
31 |
+
if healthy:
|
32 |
+
logger.info("Health check passed")
|
33 |
+
return Response(response='\n', status=200, mimetype='application/json')
|
34 |
+
else:
|
35 |
+
logger.error("Health check failed")
|
36 |
+
return Response(response='\n', status=500, mimetype='application/json')
|
37 |
+
except Exception as e:
|
38 |
+
logger.error(f"Health check error: {str(e)}")
|
39 |
+
return Response(response='\n', status=500, mimetype='application/json')
|
40 |
+
|
41 |
+
# Inference endpoint
|
42 |
+
@self.app.route("/invocations", methods=["POST"])
|
43 |
+
def invocations():
|
44 |
+
try:
|
45 |
+
if not request.is_json:
|
46 |
+
return Response(response='This predictor only supports JSON data',
|
47 |
+
status=415,
|
48 |
+
mimetype='text/plain')
|
49 |
+
|
50 |
+
data = request.get_json()
|
51 |
+
message = data.get('message', '')
|
52 |
+
|
53 |
+
if not message:
|
54 |
+
return Response(response=json.dumps({"error": "No message provided"}),
|
55 |
+
status=400,
|
56 |
+
mimetype='application/json')
|
57 |
+
|
58 |
+
response = self.bot.generate_response(message)
|
59 |
+
return Response(response=json.dumps({"response": response}),
|
60 |
+
status=200,
|
61 |
+
mimetype='application/json')
|
62 |
+
|
63 |
+
except Exception as e:
|
64 |
+
logger.error(f"Inference error: {str(e)}")
|
65 |
+
return Response(response=json.dumps({"error": str(e)}),
|
66 |
+
status=500,
|
67 |
+
mimetype='application/json')
|
68 |
+
|
69 |
+
def check_system_health(self):
|
70 |
+
"""Check if system and model are healthy"""
|
71 |
+
try:
|
72 |
+
# Check if model is loaded
|
73 |
+
if self.bot and not hasattr(self.bot, 'model'):
|
74 |
+
logger.error("Model not loaded")
|
75 |
+
return False
|
76 |
+
|
77 |
+
# Check memory usage
|
78 |
+
mem = psutil.virtual_memory()
|
79 |
+
if mem.percent > 90:
|
80 |
+
logger.error(f"High memory usage: {mem.percent}%")
|
81 |
+
return False
|
82 |
+
|
83 |
+
# Check CPU usage
|
84 |
+
if psutil.cpu_percent() > 95:
|
85 |
+
logger.error(f"High CPU usage: {psutil.cpu_percent()}%")
|
86 |
+
return False
|
87 |
+
|
88 |
+
# Log current resource usage
|
89 |
+
logger.info(f"System health: Memory {mem.percent}%, CPU {psutil.cpu_percent()}%")
|
90 |
+
return True
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
logger.error(f"Health check error: {str(e)}")
|
94 |
+
return False
|
95 |
+
|
96 |
+
def run(self):
|
97 |
+
"""Run the health check server"""
|
98 |
+
logger.info("Starting health check server on port 8080...")
|
99 |
+
serve(self.app, host='0.0.0.0', port=8080)
|
100 |
|
101 |
class CustomerSupportBot:
|
102 |
def __init__(self, model_path="models/customer_support_gpt"):
|
|
|
180 |
|
181 |
def create_chat_interface():
|
182 |
bot = CustomerSupportBot(model_path="/app/models")
|
183 |
+
|
184 |
+
# Start health check server
|
185 |
+
health_server = HealthCheckServer(bot)
|
186 |
+
health_thread = threading.Thread(target=health_server.run, daemon=True)
|
187 |
+
health_thread.start()
|
188 |
|
189 |
# Function to run initial query
|
190 |
def initial_query():
|
|
|
283 |
|
284 |
return interface
|
285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
if __name__ == "__main__":
|
287 |
demo = create_chat_interface()
|
288 |
+
print("Starting Gradio server...")
|
289 |
demo.launch(
|
290 |
share=False,
|
291 |
+
server_name="0.0.0.0",
|
292 |
+
server_port=7860, # Changed to 7860 for Gradio
|
293 |
debug=True,
|
294 |
+
inline=False
|
295 |
)
|
dockerfile
CHANGED
@@ -75,7 +75,7 @@ RUN chmod +x serve.sh
|
|
75 |
|
76 |
# Expose the Gradio port
|
77 |
EXPOSE 8080
|
78 |
-
EXPOSE
|
79 |
|
80 |
# Set entry point to the serve script
|
81 |
ENTRYPOINT ["./serve.sh"]
|
|
|
75 |
|
76 |
# Expose the Gradio port
|
77 |
EXPOSE 8080
|
78 |
+
EXPOSE 7860
|
79 |
|
80 |
# Set entry point to the serve script
|
81 |
ENTRYPOINT ["./serve.sh"]
|