math-lora / test_ft.py

Model save

a53d969 verified about 1 year ago

6.59 kB

	import argparse
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
	import transformers
	from peft import LoraConfig, get_peft_model
	from datasets import load_dataset
	from transformers.trainer_callback import TrainerCallback
	import os
	import random
	import subprocess
	from huggingface_hub import HfApi, hf_hub_download

	def generate_mmlu_slurm(model_path, hub_model_id, output_dir, num_gpus=1):
	model_short_name = model_path.split('/')[-1]
	filename = f"run_mmlu_{model_short_name}.sbatch"

	port = random.randint(10000, 65535)

	content = f"""#!/bin/bash
	#SBATCH --nodes=1
	#SBATCH --gpus-per-node={num_gpus}
	#SBATCH --time=24:00:00
	#SBATCH --job-name={port}_mmlu_{model_short_name}
	#SBATCH --mail-user=mailto:[email protected]
	#SBATCH --mail-type=ALL

	source /opt/rh/devtoolset-10/enable
	source /data/davis_brown/miniconda3/bin/activate
	conda init
	conda activate quip

	CUDA_VISIBLE_DEVICES=0 lm_eval \\
	--model hf \\
	--model_args pretrained={model_path},parallelize=True,peft={hub_model_id} \\
	--tasks mmlu \\
	--device cuda:0 \\
	--batch_size 8 \\
	--output_path={output_dir}/{hub_model_id} \\
	--num_fewshot 5
	"""

	with open(filename, 'w') as f:
	f.write(content)

	print(f"Generated MMLU evaluation SLURM script: {filename}")
	return filename

	def launch_mmlu_evaluation(model_path, hub_model_id, output_dir):
	slurm_script = generate_mmlu_slurm(model_path, hub_model_id, output_dir)
	try:
	subprocess.run(["sbatch", slurm_script], check=True)
	print(f"Submitted MMLU evaluation job: {slurm_script}")
	except subprocess.CalledProcessError as e:
	print(f"Failed to submit MMLU evaluation job: {e}")

	# Custom callback to push to Hub
	class PushToHubCallback(TrainerCallback):
	def __init__(self, trainer, push_frequency):
	self.trainer = trainer
	self.push_frequency = push_frequency

	def on_step_end(self, args, state, control, **kwargs):
	if state.global_step % self.push_frequency == 0:
	self.trainer.save_model()
	self.trainer.push_to_hub(
	commit_message=f"Training in progress - Step {state.global_step}"
	)
	def main(args):
	if args.only_mmlu:
	launch_mmlu_evaluation(args.model_id, args.hub_model_id, args.output_dir)
	return

	model_id = args.model_id
	output_dir = args.output_dir
	hub_model_id = args.hub_model_id

	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", low_cpu_mem_usage=True)

	target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj', ]# 'lm_head']

	config = LoraConfig(
	r=args.lora_rank,
	lora_alpha=args.lora_rank,
	target_modules=target_modules,
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM",
	use_rslora=True
	)

	model = get_peft_model(model, config)
	model.print_trainable_parameters()
	model.enable_input_require_grads()

	# data = load_dataset("togethercomputer/RedPajama-Data-1T-Sample")
	data = load_dataset("open-web-math/open-web-math")


	max_seq_length = args.max_seq_length
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.model_max_length = max_seq_length

	def preprocess_function(examples):
	return tokenizer(examples["text"], truncation=True, max_length=max_seq_length, padding="max_length")

	processed_dataset = data["train"].map(preprocess_function, batched=True)

	tokenizer.pad_token = tokenizer.eos_token
	torch.cuda.empty_cache()
	trainer = transformers.Trainer(
	model=model,
	train_dataset=processed_dataset,
	args=TrainingArguments(
	per_device_train_batch_size=args.batch_size,
	gradient_accumulation_steps=args.gradient_accumulation_steps,
	gradient_checkpointing=True,
	warmup_steps=200,
	max_steps=args.max_steps,
	learning_rate=2e-4,
	bf16=True,
	logging_steps=25,
	output_dir=output_dir,
	optim="adamw_bnb_8bit",
	logging_first_step=True,
	push_to_hub=True,
	hub_model_id=hub_model_id,
	),
	data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
	)
	model.config.use_cache = False

	push_frequency = 100
	trainer.add_callback(PushToHubCallback(trainer, push_frequency, hub_model_id))

	trainer.train()

	final_commit_hash = trainer.push_to_hub("Training complete")
	print(f"Training complete. Final commit hash: {final_commit_hash}")

	# MMLU Evaluation
	if args.run_mmlu:
	launch_mmlu_evaluation(model_id, hub_model_id, output_dir)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Fine-tune a language model and/or run MMLU evaluation")
	parser.add_argument("--model_id", type=str, default="ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16",
	help="Model ID to fine-tune or evaluate")
	parser.add_argument("--max_seq_length", type=int, default=2048, help="Maximum sequence length")
	parser.add_argument("--output_dir", type=str, required=True, help="Output directory for checkpoints and results")
	parser.add_argument("--hub_model_id", type=str,
	default="davisrbr/ISTA-DASLab-Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16-hf-100000_r8_cont",
	help="Hub model ID for pushing or LoRA weights")
	parser.add_argument("--batch_size", type=int, default=1, help="Per-device batch size")
	parser.add_argument("--gradient_accumulation_steps", type=int, default=8, help="Gradient accumulation steps")
	parser.add_argument("--max_steps", type=int, default=50000, help="Maximum number of training steps")
	parser.add_argument("--run_mmlu", action="store_true", help="Run MMLU evaluation after training")
	parser.add_argument("--lora_rank", type=int, default=8, help="Rank of LoRA adaptation")
	parser.add_argument("--only_mmlu", action="store_true", help="Only run MMLU evaluation without training")
	parser.add_argument("--launch_slurm", action="store_true", help="Launch the entire script as a SLURM job")
	parser.add_argument("--num_gpus", type=int, default=4, help="Number of GPUs to use for training")
	parser.add_argument("--commit_hash", type=str, help="Specific commit hash to evaluate (for MMLU only)")

	args = parser.parse_args()
	main(args)