Update test.py
Browse files
test.py
CHANGED
|
@@ -3,19 +3,15 @@ from transformers import AutoTokenizer
|
|
| 3 |
import torch
|
| 4 |
|
| 5 |
model_id = "LnL-AI/Yi-6B-gptq-4bit"
|
| 6 |
-
|
| 7 |
device = torch.device("cuda:0")
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 12 |
|
| 13 |
prompt = "I am in Shanghai, preparing to visit the natural history museum. Can you tell me the best way to"
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
res = model_q.generate(**inp, num_beams=1, min_new_tokens=1, max_new_tokens=512)
|
| 18 |
|
| 19 |
predicted_text = tokenizer.decode(res[0])
|
| 20 |
-
|
| 21 |
print(predicted_text)
|
|
|
|
| 3 |
import torch
|
| 4 |
|
| 5 |
model_id = "LnL-AI/Yi-6B-gptq-4bit"
|
|
|
|
| 6 |
device = torch.device("cuda:0")
|
| 7 |
|
| 8 |
+
model = AutoGPTQForCausalLM.from_quantized(model_id, use_safetensors=True, device=device)
|
|
|
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 10 |
|
| 11 |
prompt = "I am in Shanghai, preparing to visit the natural history museum. Can you tell me the best way to"
|
| 12 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
| 13 |
|
| 14 |
+
res = model.generate(**inputs, num_beams=1, min_new_tokens=1, max_new_tokens=512)
|
|
|
|
|
|
|
| 15 |
|
| 16 |
predicted_text = tokenizer.decode(res[0])
|
|
|
|
| 17 |
print(predicted_text)
|