MisterAI commited on
Commit
0389297
·
verified ·
1 Parent(s): b0e8f8e

Upload app.py.H2O_GGUF

Browse files
Files changed (1) hide show
  1. app.py.H2O_GGUF +274 -0
app.py.H2O_GGUF ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #app.py.chatbot
2
+ #app.py Modif04
3
+ #https://www.freddyboulton.com/blog/llama-cpp-python
4
+ import gradio as gr
5
+ from llama_cpp import Llama
6
+
7
+ llm = Llama(
8
+ model_path="/home/user/app/h2o-danube3-500m-chat-Q4_K_M.gguf",
9
+ verbose=True
10
+ )
11
+
12
+ def predict(message, history):
13
+ # messages = [{"role": "system", "content": "You are a helpful assistant."}]
14
+ # messages = [{"role": "assistant", "content": "You are a helpful assistant."}]
15
+ # messages = [{"role": "assistant", "content": "Bonjour, comment puis-je vous aider?"}]
16
+ messages = []
17
+ for user_message, bot_message in history:
18
+ if user_message:
19
+ messages.append({"role": "user", "content": user_message})
20
+ if bot_message:
21
+ messages.append({"role": "assistant", "content": bot_message})
22
+ messages.append({"role": "user", "content": message})
23
+
24
+ response = ""
25
+ for chunk in llm.create_chat_completion(
26
+ stream=True,
27
+ messages=messages,
28
+ ):
29
+ part = chunk["choices"][0]["delta"].get("content", None)
30
+ if part:
31
+ response += part
32
+ yield response
33
+
34
+ demo = gr.ChatInterface(predict)
35
+
36
+ demo.launch()
37
+
38
+
39
+
40
+ ##app.py Modif03
41
+ #import gradio as gr
42
+ #from huggingface_hub import create_inference_endpoint, InferenceClient
43
+ #from transformers import AutoModelForCausalLM, AutoTokenizer
44
+ #
45
+ ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf"
46
+ ##model = AutoModelForCausalLM.from_pretrained(model_name)
47
+ ##tokenizer = AutoTokenizer.from_pretrained(model_name)
48
+ #
49
+ ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
50
+ ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf")
51
+ ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf")
52
+ #
53
+ ## Créez une instance Inference locale
54
+ #endpoint = create_inference_endpoint(
55
+ # "Local-Endpoint-MisterAI-H2O",
56
+ # repository="MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf",
57
+ ## model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf",
58
+ # framework="pytorch",
59
+ # task="text-generation",
60
+ # accelerator="cpu",
61
+ # vendor="local",
62
+ # region="local",
63
+ # type="unprotected",
64
+ # instance_size="small",
65
+ # instance_type="local",
66
+ # URL="http://0.0.0.0:6789"
67
+ #)
68
+ #
69
+ #print(f"Endpoint créé à l'URL : {endpoint.url}")
70
+ #
71
+ #client = endpoint.client
72
+ #
73
+ #
74
+ #
75
+ #def respond(
76
+ # message,
77
+ # history: list[tuple[str, str]],
78
+ # system_message,
79
+ # max_tokens,
80
+ # temperature,
81
+ # top_p,
82
+ #):
83
+ # messages = [{"role": "system", "content": system_message}]
84
+ #
85
+ # for val in history:
86
+ # if val[0]:
87
+ # messages.append({"role": "user", "content": val[0]})
88
+ # if val[1]:
89
+ # messages.append({"role": "assistant", "content": val[1]})
90
+ #
91
+ # messages.append({"role": "user", "content": message})
92
+ #
93
+ # response = ""
94
+ #
95
+ # for message in client.chat_completion(
96
+ # messages,
97
+ # max_tokens=max_tokens,
98
+ # stream=True,
99
+ # temperature=temperature,
100
+ # top_p=top_p,
101
+ # ):
102
+ # token = message.choices[0].delta.content
103
+ #
104
+ # response += token
105
+ # yield response
106
+ #
107
+ #demo = gr.ChatInterface(
108
+ # respond,
109
+ # additional_inputs=[
110
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
111
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
112
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
113
+ # gr.Slider(
114
+ # minimum=0.1,
115
+ # maximum=1.0,
116
+ # value=0.95,
117
+ # step=0.05,
118
+ # label="Top-p (nucleus sampling)",
119
+ # ),
120
+ # ],
121
+ #)
122
+ #
123
+ #
124
+ #if __name__ == "__main__":
125
+ # demo.launch()
126
+ #
127
+ #
128
+ #
129
+ #
130
+ ##app.py Modif01
131
+ #import gradio as gr
132
+ #from huggingface_hub import Inference, InferenceClient
133
+ #from transformers import AutoModelForCausalLM, AutoTokenizer
134
+ #
135
+ ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf"
136
+ ##model = AutoModelForCausalLM.from_pretrained(model_name)
137
+ ##tokenizer = AutoTokenizer.from_pretrained(model_name)
138
+ #
139
+ ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
140
+ ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf")
141
+ ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf")
142
+ #
143
+ ## Créez une instance Inference locale
144
+ #inference = Inference(
145
+ # model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf",
146
+ # device="cpu", # Utilisez le CPU pour l'inference
147
+ # token=None, # Pas de token nécessaire pour cette instance
148
+ #)
149
+ #
150
+ #client = inference
151
+ #
152
+ #
153
+ #
154
+ #def respond(
155
+ # message,
156
+ # history: list[tuple[str, str]],
157
+ # system_message,
158
+ # max_tokens,
159
+ # temperature,
160
+ # top_p,
161
+ #):
162
+ # messages = [{"role": "system", "content": system_message}]
163
+ #
164
+ # for val in history:
165
+ # if val[0]:
166
+ # messages.append({"role": "user", "content": val[0]})
167
+ # if val[1]:
168
+ # messages.append({"role": "assistant", "content": val[1]})
169
+ #
170
+ # messages.append({"role": "user", "content": message})
171
+ #
172
+ # response = ""
173
+ #
174
+ # for message in client.chat_completion(
175
+ # messages,
176
+ # max_tokens=max_tokens,
177
+ # stream=True,
178
+ # temperature=temperature,
179
+ # top_p=top_p,
180
+ # ):
181
+ # token = message.choices[0].delta.content
182
+ #
183
+ # response += token
184
+ # yield response
185
+ #
186
+ #demo = gr.ChatInterface(
187
+ # respond,
188
+ # additional_inputs=[
189
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
190
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
191
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
192
+ # gr.Slider(
193
+ # minimum=0.1,
194
+ # maximum=1.0,
195
+ # value=0.95,
196
+ # step=0.05,
197
+ # label="Top-p (nucleus sampling)",
198
+ # ),
199
+ # ],
200
+ #)
201
+ #
202
+ #
203
+ #if __name__ == "__main__":
204
+ # demo.launch()
205
+ #
206
+ #
207
+ #
208
+ #
209
+ #
210
+ ##app.py ORIGINAL
211
+ #import gradio as gr
212
+ #from huggingface_hub import InferenceClient
213
+ #
214
+ #"""
215
+ #For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
216
+ #"""
217
+ #client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
218
+ #
219
+ #
220
+ #def respond(
221
+ # message,
222
+ # history: list[tuple[str, str]],
223
+ # system_message,
224
+ # max_tokens,
225
+ # temperature,
226
+ # top_p,
227
+ #):
228
+ # messages = [{"role": "system", "content": system_message}]
229
+ #
230
+ # for val in history:
231
+ # if val[0]:
232
+ # messages.append({"role": "user", "content": val[0]})
233
+ # if val[1]:
234
+ # messages.append({"role": "assistant", "content": val[1]})
235
+ #
236
+ # messages.append({"role": "user", "content": message})
237
+ #
238
+ # response = ""
239
+ #
240
+ # for message in client.chat_completion(
241
+ # messages,
242
+ # max_tokens=max_tokens,
243
+ # stream=True,
244
+ # temperature=temperature,
245
+ # top_p=top_p,
246
+ # ):
247
+ # token = message.choices[0].delta.content
248
+ #
249
+ # response += token
250
+ # yield response
251
+ #
252
+ #"""
253
+ #For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
254
+ #"""
255
+ #demo = gr.ChatInterface(
256
+ # respond,
257
+ # additional_inputs=[
258
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
259
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
260
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
261
+ # gr.Slider(
262
+ # minimum=0.1,
263
+ # maximum=1.0,
264
+ # value=0.95,
265
+ # step=0.05,
266
+ # label="Top-p (nucleus sampling)",
267
+ # ),
268
+ # ],
269
+ #)
270
+ #
271
+ #
272
+ #if __name__ == "__main__":
273
+ # demo.launch()
274
+