Frankie-walsh4 commited on
Commit
8c02af0
Β·
1 Parent(s): ec89f95
Files changed (1) hide show
  1. app.py +97 -26
app.py CHANGED
@@ -2,12 +2,29 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import time
 
 
5
  from huggingface_hub.errors import HfHubHTTPError
6
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
10
- client = InferenceClient("Trinoid/Data_Management_Mistral")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def respond(
13
  message,
@@ -29,44 +46,98 @@ def respond(
29
 
30
  response = ""
31
 
 
 
 
32
  # Try to initialize the model with retries
33
  max_retries = 3
34
  retry_count = 0
35
 
 
 
 
36
  while retry_count < max_retries:
37
  try:
38
- print(f"Attempt {retry_count + 1}/{max_retries} to call the model...")
39
- for message in client.chat_completion(
40
- messages,
41
- max_tokens=max_tokens,
42
- stream=True,
43
- temperature=temperature,
44
- top_p=top_p,
45
- ):
46
- token = message.choices[0].delta.content
47
- if token:
48
- response += token
49
- yield response
50
- # If we got here, we were successful
51
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  except HfHubHTTPError as e:
54
  retry_count += 1
55
  error_message = str(e)
56
  print(f"Error: {error_message}")
57
 
58
- if "504 Server Error: Gateway Timeout" in error_message and retry_count < max_retries:
59
- wait_time = 10 # seconds
60
- print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
61
- yield f"βŒ› Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
62
- time.sleep(wait_time)
63
- else:
64
- print("All retries failed or different error occurred.")
65
- if "504 Server Error" in error_message:
66
- yield "❌ The model timed out after multiple attempts. Your model might still be loading or the server is busy. Try again in a few minutes."
67
  else:
68
- yield f"❌ An error occurred: {error_message}"
69
- break
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  """
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import time
5
+ import json
6
+ import requests
7
  from huggingface_hub.errors import HfHubHTTPError
8
 
9
  """
10
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
11
  """
12
+ # Get token from environment (even though we might not need it)
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ print(f"HF_TOKEN is {'available' if HF_TOKEN else 'not available'}")
15
+
16
+ # Try direct client with and without token
17
+ if HF_TOKEN:
18
+ client = InferenceClient("Trinoid/Data_Management_Mistral", token=HF_TOKEN)
19
+ print("Created client with token")
20
+ else:
21
+ client = InferenceClient("Trinoid/Data_Management_Mistral")
22
+ print("Created client without token")
23
+
24
+ # Alternative API endpoint setup
25
+ API_URL = "https://api-inference.huggingface.co/models/Trinoid/Data_Management_Mistral"
26
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
27
+
28
 
29
  def respond(
30
  message,
 
46
 
47
  response = ""
48
 
49
+ # Debug the messages being sent
50
+ print(f"Sending messages: {json.dumps(messages, indent=2)}")
51
+
52
  # Try to initialize the model with retries
53
  max_retries = 3
54
  retry_count = 0
55
 
56
+ # Try both methods: InferenceClient and direct API call
57
+ use_direct_api = False
58
+
59
  while retry_count < max_retries:
60
  try:
61
+ print(f"Attempt {retry_count + 1}/{max_retries} using {'direct API' if use_direct_api else 'InferenceClient'}...")
62
+
63
+ if not use_direct_api:
64
+ # Method 1: Using InferenceClient
65
+ for message in client.chat_completion(
66
+ messages,
67
+ max_tokens=max_tokens,
68
+ stream=True,
69
+ temperature=temperature,
70
+ top_p=top_p,
71
+ ):
72
+ token = message.choices[0].delta.content
73
+ if token:
74
+ response += token
75
+ yield response
76
+ # If we got here, we were successful
77
+ break
78
+ else:
79
+ # Method 2: Direct API call
80
+ payload = {
81
+ "inputs": messages,
82
+ "parameters": {
83
+ "max_new_tokens": max_tokens,
84
+ "temperature": temperature,
85
+ "top_p": top_p,
86
+ },
87
+ "stream": False,
88
+ }
89
+
90
+ print(f"Making direct API call to {API_URL}")
91
+ api_response = requests.post(API_URL, headers=headers, json=payload)
92
+ print(f"API response status: {api_response.status_code}")
93
+
94
+ if api_response.status_code == 200:
95
+ result = api_response.json()
96
+ print(f"API response: {json.dumps(result, indent=2)}")
97
+ if isinstance(result, list) and len(result) > 0 and "generated_text" in result[0]:
98
+ response = result[0]["generated_text"]
99
+ yield response
100
+ break
101
+ else:
102
+ print(f"Unexpected API response format: {result}")
103
+ retry_count += 1
104
+ else:
105
+ print(f"API error: {api_response.text}")
106
+ if api_response.status_code == 504 and retry_count < max_retries - 1:
107
+ retry_count += 1
108
+ yield f"βŒ› Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
109
+ time.sleep(10)
110
+ else:
111
+ yield f"❌ API error: {api_response.status_code} - {api_response.text}"
112
+ break
113
 
114
  except HfHubHTTPError as e:
115
  retry_count += 1
116
  error_message = str(e)
117
  print(f"Error: {error_message}")
118
 
119
+ if "504 Server Error: Gateway Timeout" in error_message:
120
+ if retry_count < max_retries - 1:
121
+ wait_time = 10 # seconds
122
+ print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
123
+ yield f"βŒ› Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
124
+ time.sleep(wait_time)
125
+ # Try direct API on next attempt
126
+ use_direct_api = True
 
127
  else:
128
+ print("All retries failed.")
129
+ yield "❌ The model timed out after multiple attempts. Try again in a few minutes."
130
+ break
131
+ else:
132
+ print(f"Non-timeout error: {error_message}")
133
+ yield f"❌ An error occurred: {error_message}"
134
+ # Try direct API on next attempt
135
+ use_direct_api = True
136
+
137
+ except Exception as e:
138
+ print(f"Unexpected error: {str(e)}")
139
+ yield f"❌ Unexpected error: {str(e)}"
140
+ break
141
 
142
 
143
  """