Frankie-walsh4 commited on
Commit
9074c4e
·
1 Parent(s): a7af00e

Update app to use environment variables for token

Browse files
Files changed (1) hide show
  1. app.py +49 -21
app.py CHANGED
@@ -1,21 +1,21 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
- from dotenv import load_dotenv
5
-
6
- # Load environment variables from .env file
7
- load_dotenv()
8
 
9
  """
10
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
11
  """
12
- # You need to provide an API token to access your private model
13
- # Get your token from: https://huggingface.co/settings/tokens
14
- HF_TOKEN = os.environ.get("HF_TOKEN") # You should set this as an environment variable
 
 
15
  if not HF_TOKEN:
16
  print("Warning: No Hugging Face token found in environment variables.")
17
- print("Please set your HF_TOKEN environment variable or add it directly in the code.")
18
- print("Get your token from: https://huggingface.co/settings/tokens")
19
  else:
20
  print("HF_TOKEN found in environment variables!")
21
 
@@ -44,18 +44,45 @@ def respond(
44
  messages.append({"role": "user", "content": message})
45
 
46
  response = ""
47
-
48
- for message in client.chat_completion(
49
- messages,
50
- max_tokens=max_tokens,
51
- stream=True,
52
- temperature=temperature,
53
- top_p=top_p,
54
- ):
55
- token = message.choices[0].delta.content
56
-
57
- response += token
58
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  """
@@ -75,6 +102,7 @@ demo = gr.ChatInterface(
75
  label="Top-p (nucleus sampling)",
76
  ),
77
  ],
 
78
  )
79
 
80
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
+ import time
5
+ from huggingface_hub.errors import HfHubHTTPError
 
 
6
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
10
+ # Get the token from environment variables
11
+ # For Hugging Face Spaces, add your token as a secret named HF_TOKEN
12
+ # https://huggingface.co/docs/hub/spaces-overview#managing-secrets
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+
15
  if not HF_TOKEN:
16
  print("Warning: No Hugging Face token found in environment variables.")
17
+ print("For Hugging Face Spaces: Add your token as a secret named HF_TOKEN in the Settings tab.")
18
+ print("See: https://huggingface.co/docs/hub/spaces-overview#managing-secrets")
19
  else:
20
  print("HF_TOKEN found in environment variables!")
21
 
 
44
  messages.append({"role": "user", "content": message})
45
 
46
  response = ""
47
+
48
+ # Try to initialize the model with retries
49
+ max_retries = 3
50
+ retry_count = 0
51
+
52
+ while retry_count < max_retries:
53
+ try:
54
+ print(f"Attempt {retry_count + 1}/{max_retries} to call the model...")
55
+ for message in client.chat_completion(
56
+ messages,
57
+ max_tokens=max_tokens,
58
+ stream=True,
59
+ temperature=temperature,
60
+ top_p=top_p,
61
+ ):
62
+ token = message.choices[0].delta.content
63
+ if token:
64
+ response += token
65
+ yield response
66
+ # If we got here, we were successful
67
+ break
68
+
69
+ except HfHubHTTPError as e:
70
+ retry_count += 1
71
+ error_message = str(e)
72
+ print(f"Error: {error_message}")
73
+
74
+ if "504 Server Error: Gateway Timeout" in error_message and retry_count < max_retries:
75
+ wait_time = 10 # seconds
76
+ print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
77
+ yield f"⌛ Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
78
+ time.sleep(wait_time)
79
+ else:
80
+ print("All retries failed or different error occurred.")
81
+ if "504 Server Error" in error_message:
82
+ yield "❌ The model timed out after multiple attempts. Your model might still be loading or the server is busy. Try again in a few minutes."
83
+ else:
84
+ yield f"❌ An error occurred: {error_message}"
85
+ break
86
 
87
 
88
  """
 
102
  label="Top-p (nucleus sampling)",
103
  ),
104
  ],
105
+ description="This interface uses your fine-tuned Mistral model for Microsoft 365 data management. The first request may take some time as the model loads."
106
  )
107
 
108