from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel | |
import torch | |
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
LORA_REPO = "sahil239/chatbot-v2" | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
model = AutoModelForCausalLM.from_pretrained( | |
BASE_MODEL, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
device_map="auto" | |
) | |
model = PeftModel.from_pretrained(model, LORA_REPO) | |
model.eval() |