# merge_with_autopeft.py import torch, os from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer # lora_dir is your *adapter* checkpoint dir produced by training LORA_DIR = "llama-3.2-3b-finetuned" OUT_DIR = "merged-fp16" DTYPE = torch.float16 print("Loading LoRA with AutoPeft (this reads base_model_name_or_path from the adapter config)...") model = AutoPeftModelForCausalLM.from_pretrained( LORA_DIR, torch_dtype=DTYPE, device_map="cpu", ) print("Merging and unloading adapters...") model = model.merge_and_unload() # <- this *actually* bakes the deltas into weights os.makedirs(OUT_DIR, exist_ok=True) print("Saving merged model...") model.save_pretrained(OUT_DIR, safe_serialization=True) tok = AutoTokenizer.from_pretrained(LORA_DIR, use_fast=False) # works because tokenizer is same as base tok.save_pretrained(OUT_DIR) print("✅ Done")