from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" LORA_REPO = "sahil239/chatbot-v2" tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) model = PeftModel.from_pretrained(model, LORA_REPO) model.eval()