Update model.py
Browse files
model.py
CHANGED
@@ -5,12 +5,16 @@ import torch
|
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
7 |
|
|
|
|
|
|
|
|
|
8 |
model_id = "TheBloke/Chronos-Beluga-v2-13B-GPTQ"
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
|
10 |
model = AutoGPTQForCausalLM.from_quantized(model_id,
|
11 |
use_safetensors=True,
|
12 |
trust_remote_code=False,
|
13 |
-
device="cuda
|
14 |
use_triton=False,
|
15 |
quantize_config=None)
|
16 |
|
|
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
7 |
|
8 |
+
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
9 |
+
print(f"{torch.cuda.current_device()}")
|
10 |
+
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
11 |
+
|
12 |
model_id = "TheBloke/Chronos-Beluga-v2-13B-GPTQ"
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
|
14 |
model = AutoGPTQForCausalLM.from_quantized(model_id,
|
15 |
use_safetensors=True,
|
16 |
trust_remote_code=False,
|
17 |
+
device="cuda",
|
18 |
use_triton=False,
|
19 |
quantize_config=None)
|
20 |
|