{ | |
"stage": "sft", | |
"do_train": true, | |
"model_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit", | |
"dataset": "sql_create_context_v4", | |
"template": "llama3", | |
"finetuning_type": "lora", | |
"lora_target": "all", | |
"output_dir": "llama3_lora", | |
"per_device_train_batch_size": 2, | |
"gradient_accumulation_steps": 4, | |
"lr_scheduler_type": "cosine", | |
"logging_steps": 10, | |
"warmup_ratio": 0.1, | |
"save_steps": 1000, | |
"learning_rate": 5e-05, | |
"num_train_epochs": 100, | |
"max_samples": 1000, | |
"max_grad_norm": 1.0, | |
"quantization_bit": 4, | |
"loraplus_lr_ratio": 16.0, | |
"fp16": true | |
} |