Spaces:
Runtime error
Runtime error
fixed gradio error on completed stream
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
| 3 |
import torch, transformers
|
| 4 |
from threading import Thread
|
|
|
|
| 5 |
|
| 6 |
#Load the model
|
| 7 |
model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq'
|
|
@@ -53,9 +54,11 @@ def chat(message, history):
|
|
| 53 |
t, stream = chat_processor(chat=message)
|
| 54 |
response = ""
|
| 55 |
for character in stream:
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
t.join()
|
| 60 |
torch.cuda.empty_cache()
|
| 61 |
|
|
|
|
| 2 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
| 3 |
import torch, transformers
|
| 4 |
from threading import Thread
|
| 5 |
+
import time
|
| 6 |
|
| 7 |
#Load the model
|
| 8 |
model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq'
|
|
|
|
| 54 |
t, stream = chat_processor(chat=message)
|
| 55 |
response = ""
|
| 56 |
for character in stream:
|
| 57 |
+
if character is not None:
|
| 58 |
+
response += character
|
| 59 |
+
# print(character)
|
| 60 |
+
yield response
|
| 61 |
+
time.sleep(0.1)
|
| 62 |
t.join()
|
| 63 |
torch.cuda.empty_cache()
|
| 64 |
|