# merge_with_autopeft.py
import torch, os
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

# lora_dir is your *adapter* checkpoint dir produced by training
LORA_DIR   = "llama-3.2-3b-finetuned"
OUT_DIR    = "merged-fp16"
DTYPE      = torch.float16

print("Loading LoRA with AutoPeft (this reads base_model_name_or_path from the adapter config)...")
model = AutoPeftModelForCausalLM.from_pretrained(
    LORA_DIR,
    torch_dtype=DTYPE,
    device_map="cpu",
)

print("Merging and unloading adapters...")
model = model.merge_and_unload()     # <- this *actually* bakes the deltas into weights

os.makedirs(OUT_DIR, exist_ok=True)
print("Saving merged model...")
model.save_pretrained(OUT_DIR, safe_serialization=True)

tok = AutoTokenizer.from_pretrained(LORA_DIR, use_fast=False)  # works because tokenizer is same as base
tok.save_pretrained(OUT_DIR)

print("✅ Done")