VenkateshRoshan commited on
Commit
cef9c21
·
1 Parent(s): c95a175

health check updated

Browse files
Files changed (2) hide show
  1. app.py +99 -20
  2. dockerfile +1 -1
app.py CHANGED
@@ -7,6 +7,96 @@ import os
7
  import tarfile
8
  from typing import List, Tuple
9
  import boto3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  class CustomerSupportBot:
12
  def __init__(self, model_path="models/customer_support_gpt"):
@@ -90,6 +180,11 @@ class CustomerSupportBot:
90
 
91
  def create_chat_interface():
92
  bot = CustomerSupportBot(model_path="/app/models")
 
 
 
 
 
93
 
94
  # Function to run initial query
95
  def initial_query():
@@ -188,29 +283,13 @@ def create_chat_interface():
188
 
189
  return interface
190
 
191
- from flask import Flask, jsonify
192
- import threading
193
-
194
- app = Flask(__name__)
195
-
196
- # Health check endpoint for SageMaker
197
- @app.route("/ping", methods=["GET"])
198
- def ping():
199
- return jsonify({"status": "ok"}), 200
200
-
201
- # Start Flask app on a separate thread
202
- def run_flask():
203
- app.run(host="0.0.0.0", port=8081) # Different port for Flask
204
-
205
- threading.Thread(target=run_flask).start()
206
-
207
  if __name__ == "__main__":
208
  demo = create_chat_interface()
209
- print("Starting Gradient server...")
210
  demo.launch(
211
  share=False,
212
- server_name="0.0.0.0", # Makes the server accessible from other machines
213
- server_port=8080, # Specify the port - updated from 7860 to 8080
214
  debug=True,
215
- inline=False#, server_port=6006
216
  )
 
7
  import tarfile
8
  from typing import List, Tuple
9
  import boto3
10
+ from flask import Flask, Response, jsonify, request
11
+ import threading
12
+ import psutil
13
+ import logging
14
+ from waitress import serve
15
+
16
+ # Set up logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class HealthCheckServer:
21
+ def __init__(self, bot=None):
22
+ self.app = Flask(__name__)
23
+ self.bot = bot
24
+
25
+ # Health check endpoint
26
+ @self.app.route("/ping", methods=["GET"])
27
+ def ping():
28
+ try:
29
+ # Check system health
30
+ healthy = self.check_system_health()
31
+ if healthy:
32
+ logger.info("Health check passed")
33
+ return Response(response='\n', status=200, mimetype='application/json')
34
+ else:
35
+ logger.error("Health check failed")
36
+ return Response(response='\n', status=500, mimetype='application/json')
37
+ except Exception as e:
38
+ logger.error(f"Health check error: {str(e)}")
39
+ return Response(response='\n', status=500, mimetype='application/json')
40
+
41
+ # Inference endpoint
42
+ @self.app.route("/invocations", methods=["POST"])
43
+ def invocations():
44
+ try:
45
+ if not request.is_json:
46
+ return Response(response='This predictor only supports JSON data',
47
+ status=415,
48
+ mimetype='text/plain')
49
+
50
+ data = request.get_json()
51
+ message = data.get('message', '')
52
+
53
+ if not message:
54
+ return Response(response=json.dumps({"error": "No message provided"}),
55
+ status=400,
56
+ mimetype='application/json')
57
+
58
+ response = self.bot.generate_response(message)
59
+ return Response(response=json.dumps({"response": response}),
60
+ status=200,
61
+ mimetype='application/json')
62
+
63
+ except Exception as e:
64
+ logger.error(f"Inference error: {str(e)}")
65
+ return Response(response=json.dumps({"error": str(e)}),
66
+ status=500,
67
+ mimetype='application/json')
68
+
69
+ def check_system_health(self):
70
+ """Check if system and model are healthy"""
71
+ try:
72
+ # Check if model is loaded
73
+ if self.bot and not hasattr(self.bot, 'model'):
74
+ logger.error("Model not loaded")
75
+ return False
76
+
77
+ # Check memory usage
78
+ mem = psutil.virtual_memory()
79
+ if mem.percent > 90:
80
+ logger.error(f"High memory usage: {mem.percent}%")
81
+ return False
82
+
83
+ # Check CPU usage
84
+ if psutil.cpu_percent() > 95:
85
+ logger.error(f"High CPU usage: {psutil.cpu_percent()}%")
86
+ return False
87
+
88
+ # Log current resource usage
89
+ logger.info(f"System health: Memory {mem.percent}%, CPU {psutil.cpu_percent()}%")
90
+ return True
91
+
92
+ except Exception as e:
93
+ logger.error(f"Health check error: {str(e)}")
94
+ return False
95
+
96
+ def run(self):
97
+ """Run the health check server"""
98
+ logger.info("Starting health check server on port 8080...")
99
+ serve(self.app, host='0.0.0.0', port=8080)
100
 
101
  class CustomerSupportBot:
102
  def __init__(self, model_path="models/customer_support_gpt"):
 
180
 
181
  def create_chat_interface():
182
  bot = CustomerSupportBot(model_path="/app/models")
183
+
184
+ # Start health check server
185
+ health_server = HealthCheckServer(bot)
186
+ health_thread = threading.Thread(target=health_server.run, daemon=True)
187
+ health_thread.start()
188
 
189
  # Function to run initial query
190
  def initial_query():
 
283
 
284
  return interface
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  if __name__ == "__main__":
287
  demo = create_chat_interface()
288
+ print("Starting Gradio server...")
289
  demo.launch(
290
  share=False,
291
+ server_name="0.0.0.0",
292
+ server_port=7860, # Changed to 7860 for Gradio
293
  debug=True,
294
+ inline=False
295
  )
dockerfile CHANGED
@@ -75,7 +75,7 @@ RUN chmod +x serve.sh
75
 
76
  # Expose the Gradio port
77
  EXPOSE 8080
78
- EXPOSE 8081
79
 
80
  # Set entry point to the serve script
81
  ENTRYPOINT ["./serve.sh"]
 
75
 
76
  # Expose the Gradio port
77
  EXPOSE 8080
78
+ EXPOSE 7860
79
 
80
  # Set entry point to the serve script
81
  ENTRYPOINT ["./serve.sh"]