Update handler.py
Browse files- handler.py +1 -1
handler.py
CHANGED
|
@@ -9,7 +9,7 @@ device = 0 if torch.cuda.is_available() else -1
|
|
| 9 |
class EndpointHandler():
|
| 10 |
def __init__(self, path=""):
|
| 11 |
# load the optimized model
|
| 12 |
-
model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 14 |
# or you can also use pipeline
|
| 15 |
self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
|
|
|
| 9 |
class EndpointHandler():
|
| 10 |
def __init__(self, path=""):
|
| 11 |
# load the optimized model
|
| 12 |
+
model = AutoGPTQForCausalLM.from_quantized(path, use_safetensors=True) #file_name="model-quantized.onnx")
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 14 |
# or you can also use pipeline
|
| 15 |
self.pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|