userlollolol1 commited on
Commit
3b5fe4f
·
verified ·
1 Parent(s): 7ba8373

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -1,65 +1,85 @@
1
  import os
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
- hf_token = os.getenv("apikey")
9
- client = InferenceClient(token=hf_token, model="HuggingFaceH4/zephyr-7b-beta")
10
 
11
- def respond(
12
- message,
13
- history: list[tuple[str, str]],
14
- system_message,
15
- max_tokens,
16
- temperature,
17
- top_p,
18
- ):
19
- messages = [{"role": "system", "content": system_message}]
20
 
21
- for val in history:
22
- if val[0]:
23
- messages.append({"role": "user", "content": val[0]})
24
- if val[1]:
25
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
26
 
27
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
28
 
 
29
  response = ""
 
30
 
31
- for message in client.chat_completion(
 
32
  messages,
33
  max_tokens=max_tokens,
34
  stream=True,
35
  temperature=temperature,
36
  top_p=top_p,
37
  ):
38
- token = message.choices[0].delta.content
39
-
40
  response += token
41
  yield response
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- """
45
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
- """
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
  ],
61
  )
62
 
63
-
64
  if __name__ == "__main__":
65
- demo.launch(share=True)
 
1
  import os
2
+ import requests
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
+ # API keys from environment
7
+ hf_token = os.getenv("apikey") # Hugging Face token
8
+ tavily_token = os.getenv("tavily") # Tavily key
 
 
9
 
10
+ # 30B model (chat-tuned)
11
+ client = InferenceClient(token=hf_token, model="NousResearch/Nous-Hermes-2-Yi-34B")
 
 
 
 
 
 
 
12
 
13
+ # Web search fallback
14
+ def search_tavily(query):
15
+ try:
16
+ res = requests.post("https://api.tavily.com/search", json={
17
+ "api_key": tavily_token,
18
+ "query": query,
19
+ "include_answer": True,
20
+ "search_depth": "basic"
21
+ })
22
+ return res.json().get("answer", "")
23
+ except:
24
+ return ""
25
 
26
+ # Main chat logic
27
+ def respond(message, history: list[tuple[str, str]], max_tokens, temperature, top_p):
28
+ messages = []
29
+
30
+ for user, assistant in history:
31
+ if user:
32
+ messages.append({"role": "user", "content": user})
33
+ if assistant:
34
+ messages.append({"role": "assistant", "content": assistant})
35
 
36
+ messages.append({"role": "user", "content": message})
37
  response = ""
38
+ fallback_needed = False
39
 
40
+ # Attempt answer
41
+ for msg in client.chat_completion(
42
  messages,
43
  max_tokens=max_tokens,
44
  stream=True,
45
  temperature=temperature,
46
  top_p=top_p,
47
  ):
48
+ token = msg.choices[0].delta.content
 
49
  response += token
50
  yield response
51
 
52
+ # Check if it's unsure
53
+ triggers = ["i don't know", "no information", "i cannot", "as an ai", "unsure"]
54
+ if any(trigger in response.lower() for trigger in triggers):
55
+ fallback_needed = True
56
+
57
+ # Retry with web search if needed
58
+ if fallback_needed:
59
+ web_snippet = search_tavily(message)
60
+ if web_snippet:
61
+ prompt = f"User asked: {message}\nHere’s info from the web: {web_snippet}\nAnswer:"
62
+ response = ""
63
+ for msg in client.chat_completion(
64
+ [{"role": "user", "content": prompt}],
65
+ max_tokens=max_tokens,
66
+ stream=True,
67
+ temperature=temperature,
68
+ top_p=top_p,
69
+ ):
70
+ token = msg.choices[0].delta.content
71
+ response += token
72
+ yield response
73
 
74
+ # Gradio interface without system message
 
 
75
  demo = gr.ChatInterface(
76
  respond,
77
  additional_inputs=[
 
78
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
79
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
80
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
 
 
 
 
 
 
81
  ],
82
  )
83
 
 
84
  if __name__ == "__main__":
85
+ demo.launch()