kajdun commited on
Commit
b0866b2
·
1 Parent(s): b374e13

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +1 -1
handler.py CHANGED
@@ -9,7 +9,7 @@ device = 0 if torch.cuda.is_available() else -1
9
  class EndpointHandler():
10
  def __init__(self, path=""):
11
  # load the optimized model
12
- model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True, low_cpu_mem_usage=True, device_map="auto") #file_name="model-quantized.onnx")
13
  tokenizer = AutoTokenizer.from_pretrained(path)
14
  # or you can also use pipeline
15
  self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
 
9
  class EndpointHandler():
10
  def __init__(self, path=""):
11
  # load the optimized model
12
+ model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True) #file_name="model-quantized.onnx")
13
  tokenizer = AutoTokenizer.from_pretrained(path)
14
  # or you can also use pipeline
15
  self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)