from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" LORA_REPO = "sahil239/chatbot-v2" tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", trust_remote_code=True ) model = PeftModel.from_pretrained(model, LORA_REPO) model.eval()