kajdun
/

iubaris-13b-v3_GPTQ

Text Generation

4-bit precision

Model card Files Files and versions

kajdun commited on Aug 21, 2023

Commit

b0866b2

·

1 Parent(s): b374e13

Update handler.py

Files changed (1) hide show

handler.py +1 -1

handler.py CHANGED Viewed

@@ -9,7 +9,7 @@ device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler():
     def __init__(self, path=""):
         # load the optimized model
-        model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True, low_cpu_mem_usage=True, device_map="auto") #file_name="model-quantized.onnx")
         tokenizer = AutoTokenizer.from_pretrained(path)
         # or you can also use pipeline
         self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)

 class EndpointHandler():
     def __init__(self, path=""):
         # load the optimized model
+        model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True) #file_name="model-quantized.onnx")
         tokenizer = AutoTokenizer.from_pretrained(path)
         # or you can also use pipeline
         self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)