Update app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
|
|
60 |
|
61 |
# Load the model with the inferred device map
|
62 |
# model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
|
63 |
-
model.half()
|
64 |
|
65 |
|
66 |
|
@@ -73,7 +73,7 @@ def respond(
|
|
73 |
temperature,
|
74 |
top_p,
|
75 |
):
|
76 |
-
|
77 |
messages = [{"role": "user", "content": "Hello, how are you?"}]
|
78 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
|
79 |
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
|
|
60 |
|
61 |
# Load the model with the inferred device map
|
62 |
# model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
|
63 |
+
# model.half()
|
64 |
|
65 |
|
66 |
|
|
|
73 |
temperature,
|
74 |
top_p,
|
75 |
):
|
76 |
+
model= model.to('cuda')
|
77 |
messages = [{"role": "user", "content": "Hello, how are you?"}]
|
78 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
|
79 |
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|