KingNish commited on
Commit
297327b
1 Parent(s): 2f30731

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +18 -19
chatbot.py CHANGED
@@ -214,24 +214,7 @@ def qwen_inference(user_prompt, chat_history):
214
  ]
215
  })
216
 
217
- text = processor.apply_chat_template(
218
- messages, tokenize=False, add_generation_prompt=True
219
- )
220
- image_inputs, video_inputs = process_vision_info(messages)
221
- inputs = processor(
222
- text=[text],
223
- images=image_inputs,
224
- videos=video_inputs,
225
- padding=True,
226
- return_tensors="pt",
227
- ).to("cuda")
228
-
229
- streamer = TextIteratorStreamer(
230
- processor, skip_prompt=True, **{"skip_special_tokens": True}
231
- )
232
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
233
-
234
- return generation_kwargs
235
 
236
  # Initialize inference clients for different models
237
  client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
@@ -242,7 +225,23 @@ client_mistral_nemo = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
242
  @spaces.GPU(duration=60, queue=False)
243
  def model_inference( user_prompt, chat_history):
244
  if user_prompt["files"]:
245
- generation_kwargs = qwen_inference(user_prompt, chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
248
  thread.start()
 
214
  ]
215
  })
216
 
217
+ return messages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  # Initialize inference clients for different models
220
  client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 
225
  @spaces.GPU(duration=60, queue=False)
226
  def model_inference( user_prompt, chat_history):
227
  if user_prompt["files"]:
228
+ messages = qwen_inference(user_prompt, chat_history)
229
+ text = processor.apply_chat_template(
230
+ messages, tokenize=False, add_generation_prompt=True
231
+ )
232
+ image_inputs, video_inputs = process_vision_info(messages)
233
+ inputs = processor(
234
+ text=[text],
235
+ images=image_inputs,
236
+ videos=video_inputs,
237
+ padding=True,
238
+ return_tensors="pt",
239
+ ).to("cuda")
240
+
241
+ streamer = TextIteratorStreamer(
242
+ processor, skip_prompt=True, **{"skip_special_tokens": True}
243
+ )
244
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
245
 
246
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
247
  thread.start()