shisa-qwen2-7B-test

Sleeping

keitokei1994 commited on Jun 16, 2024

Commit

19af97e

verified ·

1 Parent(s): 610597f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,16 +11,16 @@ from threading import Thread
 import spaces
 import time
-token = os.environ["HF_TOKEN"]
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
 )
 model = AutoModelForCausalLM.from_pretrained(
-    "NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config, token=token
 )
-tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B", token=token)
 terminators = [
     tok.eos_token_id,
     tok.convert_tokens_to_ids("<|eot_id|>")
@@ -37,7 +37,7 @@ else:
 # Dispatch Errors
-@spaces.GPU(duration=150)
 def chat(message, history, temperature,do_sample, max_tokens):
     chat = []
     for item in history:
@@ -97,6 +97,6 @@ demo = gr.ChatInterface(
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
-    description="Now Running [NousResearch/Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) in 4bit"
 )
 demo.launch()

 import spaces
 import time
+#token = os.environ["HF_TOKEN"]
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
 )
 model = AutoModelForCausalLM.from_pretrained(
+    "shisa-ai/shisa-v1-qwen2-7b", quantization_config=quantization_config, token=token
 )
+tok = AutoTokenizer.from_pretrained("shisa-ai/shisa-v1-qwen2-7b", token=token)
 terminators = [
     tok.eos_token_id,
     tok.convert_tokens_to_ids("<|eot_id|>")
 # Dispatch Errors
+@spaces.GPU(duration=120)
 def chat(message, history, temperature,do_sample, max_tokens):
     chat = []
     for item in history:
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
+    description="Now Running [shisa-ai/shisa-v1-qwen2-7b](https://huggingface.co/shisa-ai/shisa-v1-qwen2-7b) in 4bit"
 )
 demo.launch()