{ "best_global_step": 3000, "best_metric": 2.238426446914673, "best_model_checkpoint": "./gpt2-alpaca-lora/checkpoint-3000", "epoch": 1.9430051813471503, "eval_steps": 250, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16191709844559585, "eval_loss": 2.402658224105835, "eval_mean_token_accuracy": 0.5123065428499796, "eval_num_tokens": 703050.0, "eval_runtime": 4.6408, "eval_samples_per_second": 560.46, "eval_steps_per_second": 35.123, "step": 250 }, { "epoch": 0.3238341968911917, "grad_norm": 0.5626281499862671, "learning_rate": 0.0002, "loss": 2.5788, "step": 500 }, { "epoch": 0.3238341968911917, "eval_loss": 2.35490083694458, "eval_mean_token_accuracy": 0.5189222602025132, "eval_num_tokens": 1405018.0, "eval_runtime": 4.6775, "eval_samples_per_second": 556.069, "eval_steps_per_second": 34.848, "step": 500 }, { "epoch": 0.48575129533678757, "eval_loss": 2.327059030532837, "eval_mean_token_accuracy": 0.5225200927330672, "eval_num_tokens": 2104755.0, "eval_runtime": 4.6238, "eval_samples_per_second": 562.53, "eval_steps_per_second": 35.253, "step": 750 }, { "epoch": 0.6476683937823834, "grad_norm": 0.4609125554561615, "learning_rate": 0.0002, "loss": 2.4241, "step": 1000 }, { "epoch": 0.6476683937823834, "eval_loss": 2.3085591793060303, "eval_mean_token_accuracy": 0.5245303420201402, "eval_num_tokens": 2811160.0, "eval_runtime": 4.5658, "eval_samples_per_second": 569.667, "eval_steps_per_second": 35.7, "step": 1000 }, { "epoch": 0.8095854922279793, "eval_loss": 2.2929277420043945, "eval_mean_token_accuracy": 0.5260607554503014, "eval_num_tokens": 3506263.0, "eval_runtime": 4.6703, "eval_samples_per_second": 556.922, "eval_steps_per_second": 34.901, "step": 1250 }, { "epoch": 0.9715025906735751, "grad_norm": 0.5748232007026672, "learning_rate": 0.0002, "loss": 2.3911, "step": 1500 }, { "epoch": 0.9715025906735751, "eval_loss": 2.281684398651123, "eval_mean_token_accuracy": 0.5284550193628651, "eval_num_tokens": 4209663.0, "eval_runtime": 4.5679, "eval_samples_per_second": 569.406, "eval_steps_per_second": 35.684, "step": 1500 }, { "epoch": 1.133419689119171, "eval_loss": 2.2740871906280518, "eval_mean_token_accuracy": 0.5288037231729075, "eval_num_tokens": 4904955.0, "eval_runtime": 4.6289, "eval_samples_per_second": 561.909, "eval_steps_per_second": 35.214, "step": 1750 }, { "epoch": 1.2953367875647668, "grad_norm": 0.5409476161003113, "learning_rate": 0.0002, "loss": 2.3581, "step": 2000 }, { "epoch": 1.2953367875647668, "eval_loss": 2.2654471397399902, "eval_mean_token_accuracy": 0.5307357484943296, "eval_num_tokens": 5609264.0, "eval_runtime": 4.7388, "eval_samples_per_second": 548.87, "eval_steps_per_second": 34.397, "step": 2000 }, { "epoch": 1.4572538860103628, "eval_loss": 2.2559146881103516, "eval_mean_token_accuracy": 0.5315202057727275, "eval_num_tokens": 6300985.0, "eval_runtime": 4.6198, "eval_samples_per_second": 563.008, "eval_steps_per_second": 35.283, "step": 2250 }, { "epoch": 1.6191709844559585, "grad_norm": 0.6490165591239929, "learning_rate": 0.0002, "loss": 2.3444, "step": 2500 }, { "epoch": 1.6191709844559585, "eval_loss": 2.2506017684936523, "eval_mean_token_accuracy": 0.5325605070298435, "eval_num_tokens": 6998999.0, "eval_runtime": 4.6688, "eval_samples_per_second": 557.099, "eval_steps_per_second": 34.912, "step": 2500 }, { "epoch": 1.7810880829015545, "eval_loss": 2.2434535026550293, "eval_mean_token_accuracy": 0.533348783393579, "eval_num_tokens": 7714495.0, "eval_runtime": 4.628, "eval_samples_per_second": 562.011, "eval_steps_per_second": 35.22, "step": 2750 }, { "epoch": 1.9430051813471503, "grad_norm": 0.5797879099845886, "learning_rate": 0.0002, "loss": 2.33, "step": 3000 }, { "epoch": 1.9430051813471503, "eval_loss": 2.238426446914673, "eval_mean_token_accuracy": 0.5334653168733866, "eval_num_tokens": 8410972.0, "eval_runtime": 4.621, "eval_samples_per_second": 562.868, "eval_steps_per_second": 35.274, "step": 3000 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5564002921906176.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }