Spaces:
Running
on
Zero
Running
on
Zero
Update chatbot.py
Browse files- chatbot.py +18 -19
chatbot.py
CHANGED
@@ -214,24 +214,7 @@ def qwen_inference(user_prompt, chat_history):
|
|
214 |
]
|
215 |
})
|
216 |
|
217 |
-
|
218 |
-
messages, tokenize=False, add_generation_prompt=True
|
219 |
-
)
|
220 |
-
image_inputs, video_inputs = process_vision_info(messages)
|
221 |
-
inputs = processor(
|
222 |
-
text=[text],
|
223 |
-
images=image_inputs,
|
224 |
-
videos=video_inputs,
|
225 |
-
padding=True,
|
226 |
-
return_tensors="pt",
|
227 |
-
).to("cuda")
|
228 |
-
|
229 |
-
streamer = TextIteratorStreamer(
|
230 |
-
processor, skip_prompt=True, **{"skip_special_tokens": True}
|
231 |
-
)
|
232 |
-
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
233 |
-
|
234 |
-
return generation_kwargs
|
235 |
|
236 |
# Initialize inference clients for different models
|
237 |
client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
@@ -242,7 +225,23 @@ client_mistral_nemo = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
|
|
242 |
@spaces.GPU(duration=60, queue=False)
|
243 |
def model_inference( user_prompt, chat_history):
|
244 |
if user_prompt["files"]:
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
248 |
thread.start()
|
|
|
214 |
]
|
215 |
})
|
216 |
|
217 |
+
return messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
# Initialize inference clients for different models
|
220 |
client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
225 |
@spaces.GPU(duration=60, queue=False)
|
226 |
def model_inference( user_prompt, chat_history):
|
227 |
if user_prompt["files"]:
|
228 |
+
messages = qwen_inference(user_prompt, chat_history)
|
229 |
+
text = processor.apply_chat_template(
|
230 |
+
messages, tokenize=False, add_generation_prompt=True
|
231 |
+
)
|
232 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
233 |
+
inputs = processor(
|
234 |
+
text=[text],
|
235 |
+
images=image_inputs,
|
236 |
+
videos=video_inputs,
|
237 |
+
padding=True,
|
238 |
+
return_tensors="pt",
|
239 |
+
).to("cuda")
|
240 |
+
|
241 |
+
streamer = TextIteratorStreamer(
|
242 |
+
processor, skip_prompt=True, **{"skip_special_tokens": True}
|
243 |
+
)
|
244 |
+
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
245 |
|
246 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
247 |
thread.start()
|