|
{ |
|
"best_global_step": 3000, |
|
"best_metric": 2.238426446914673, |
|
"best_model_checkpoint": "./gpt2-alpaca-lora/checkpoint-3000", |
|
"epoch": 1.9430051813471503, |
|
"eval_steps": 250, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16191709844559585, |
|
"eval_loss": 2.402658224105835, |
|
"eval_mean_token_accuracy": 0.5123065428499796, |
|
"eval_num_tokens": 703050.0, |
|
"eval_runtime": 4.6408, |
|
"eval_samples_per_second": 560.46, |
|
"eval_steps_per_second": 35.123, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3238341968911917, |
|
"grad_norm": 0.5626281499862671, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5788, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3238341968911917, |
|
"eval_loss": 2.35490083694458, |
|
"eval_mean_token_accuracy": 0.5189222602025132, |
|
"eval_num_tokens": 1405018.0, |
|
"eval_runtime": 4.6775, |
|
"eval_samples_per_second": 556.069, |
|
"eval_steps_per_second": 34.848, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48575129533678757, |
|
"eval_loss": 2.327059030532837, |
|
"eval_mean_token_accuracy": 0.5225200927330672, |
|
"eval_num_tokens": 2104755.0, |
|
"eval_runtime": 4.6238, |
|
"eval_samples_per_second": 562.53, |
|
"eval_steps_per_second": 35.253, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6476683937823834, |
|
"grad_norm": 0.4609125554561615, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6476683937823834, |
|
"eval_loss": 2.3085591793060303, |
|
"eval_mean_token_accuracy": 0.5245303420201402, |
|
"eval_num_tokens": 2811160.0, |
|
"eval_runtime": 4.5658, |
|
"eval_samples_per_second": 569.667, |
|
"eval_steps_per_second": 35.7, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8095854922279793, |
|
"eval_loss": 2.2929277420043945, |
|
"eval_mean_token_accuracy": 0.5260607554503014, |
|
"eval_num_tokens": 3506263.0, |
|
"eval_runtime": 4.6703, |
|
"eval_samples_per_second": 556.922, |
|
"eval_steps_per_second": 34.901, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9715025906735751, |
|
"grad_norm": 0.5748232007026672, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3911, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9715025906735751, |
|
"eval_loss": 2.281684398651123, |
|
"eval_mean_token_accuracy": 0.5284550193628651, |
|
"eval_num_tokens": 4209663.0, |
|
"eval_runtime": 4.5679, |
|
"eval_samples_per_second": 569.406, |
|
"eval_steps_per_second": 35.684, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.133419689119171, |
|
"eval_loss": 2.2740871906280518, |
|
"eval_mean_token_accuracy": 0.5288037231729075, |
|
"eval_num_tokens": 4904955.0, |
|
"eval_runtime": 4.6289, |
|
"eval_samples_per_second": 561.909, |
|
"eval_steps_per_second": 35.214, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2953367875647668, |
|
"grad_norm": 0.5409476161003113, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3581, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2953367875647668, |
|
"eval_loss": 2.2654471397399902, |
|
"eval_mean_token_accuracy": 0.5307357484943296, |
|
"eval_num_tokens": 5609264.0, |
|
"eval_runtime": 4.7388, |
|
"eval_samples_per_second": 548.87, |
|
"eval_steps_per_second": 34.397, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4572538860103628, |
|
"eval_loss": 2.2559146881103516, |
|
"eval_mean_token_accuracy": 0.5315202057727275, |
|
"eval_num_tokens": 6300985.0, |
|
"eval_runtime": 4.6198, |
|
"eval_samples_per_second": 563.008, |
|
"eval_steps_per_second": 35.283, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6191709844559585, |
|
"grad_norm": 0.6490165591239929, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3444, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.6191709844559585, |
|
"eval_loss": 2.2506017684936523, |
|
"eval_mean_token_accuracy": 0.5325605070298435, |
|
"eval_num_tokens": 6998999.0, |
|
"eval_runtime": 4.6688, |
|
"eval_samples_per_second": 557.099, |
|
"eval_steps_per_second": 34.912, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7810880829015545, |
|
"eval_loss": 2.2434535026550293, |
|
"eval_mean_token_accuracy": 0.533348783393579, |
|
"eval_num_tokens": 7714495.0, |
|
"eval_runtime": 4.628, |
|
"eval_samples_per_second": 562.011, |
|
"eval_steps_per_second": 35.22, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9430051813471503, |
|
"grad_norm": 0.5797879099845886, |
|
"learning_rate": 0.0002, |
|
"loss": 2.33, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.9430051813471503, |
|
"eval_loss": 2.238426446914673, |
|
"eval_mean_token_accuracy": 0.5334653168733866, |
|
"eval_num_tokens": 8410972.0, |
|
"eval_runtime": 4.621, |
|
"eval_samples_per_second": 562.868, |
|
"eval_steps_per_second": 35.274, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5564002921906176.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|