{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01255739120197779, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006278695600988894, "grad_norm": 1.6499921083450317, "learning_rate": 1e-05, "loss": 27.2109, "step": 1 }, { "epoch": 0.0006278695600988894, "eval_loss": 1.5100669860839844, "eval_runtime": 233.4391, "eval_samples_per_second": 11.493, "eval_steps_per_second": 5.749, "step": 1 }, { "epoch": 0.0012557391201977789, "grad_norm": 1.3669140338897705, "learning_rate": 2e-05, "loss": 22.2695, "step": 2 }, { "epoch": 0.0018836086802966683, "grad_norm": 1.511452078819275, "learning_rate": 3e-05, "loss": 24.6953, "step": 3 }, { "epoch": 0.0025114782403955578, "grad_norm": 1.1043322086334229, "learning_rate": 4e-05, "loss": 23.2188, "step": 4 }, { "epoch": 0.0031393478004944474, "grad_norm": 1.18587327003479, "learning_rate": 5e-05, "loss": 22.7891, "step": 5 }, { "epoch": 0.0031393478004944474, "eval_loss": 1.508397102355957, "eval_runtime": 85.0417, "eval_samples_per_second": 31.549, "eval_steps_per_second": 15.781, "step": 5 }, { "epoch": 0.0037672173605933367, "grad_norm": 2.3235645294189453, "learning_rate": 6e-05, "loss": 25.4453, "step": 6 }, { "epoch": 0.004395086920692226, "grad_norm": 1.4490516185760498, "learning_rate": 7e-05, "loss": 22.5156, "step": 7 }, { "epoch": 0.0050229564807911155, "grad_norm": 1.4218536615371704, "learning_rate": 8e-05, "loss": 25.3516, "step": 8 }, { "epoch": 0.005650826040890005, "grad_norm": 1.5750902891159058, "learning_rate": 9e-05, "loss": 23.1602, "step": 9 }, { "epoch": 0.006278695600988895, "grad_norm": 1.523149847984314, "learning_rate": 0.0001, "loss": 25.4883, "step": 10 }, { "epoch": 0.006278695600988895, "eval_loss": 1.4983664751052856, "eval_runtime": 84.6571, "eval_samples_per_second": 31.693, "eval_steps_per_second": 15.852, "step": 10 }, { "epoch": 0.006906565161087784, "grad_norm": 1.5122368335723877, "learning_rate": 9.755282581475769e-05, "loss": 23.3281, "step": 11 }, { "epoch": 0.007534434721186673, "grad_norm": 1.6233738660812378, "learning_rate": 9.045084971874738e-05, "loss": 23.0508, "step": 12 }, { "epoch": 0.008162304281285563, "grad_norm": 1.552497386932373, "learning_rate": 7.938926261462366e-05, "loss": 24.582, "step": 13 }, { "epoch": 0.008790173841384452, "grad_norm": 2.016003370285034, "learning_rate": 6.545084971874738e-05, "loss": 22.6602, "step": 14 }, { "epoch": 0.009418043401483342, "grad_norm": 1.8063987493515015, "learning_rate": 5e-05, "loss": 25.3066, "step": 15 }, { "epoch": 0.009418043401483342, "eval_loss": 1.4756321907043457, "eval_runtime": 83.3991, "eval_samples_per_second": 32.171, "eval_steps_per_second": 16.091, "step": 15 }, { "epoch": 0.010045912961582231, "grad_norm": 1.5516988039016724, "learning_rate": 3.4549150281252636e-05, "loss": 22.9258, "step": 16 }, { "epoch": 0.01067378252168112, "grad_norm": 3.2057738304138184, "learning_rate": 2.061073738537635e-05, "loss": 23.6406, "step": 17 }, { "epoch": 0.01130165208178001, "grad_norm": 3.0766243934631348, "learning_rate": 9.549150281252633e-06, "loss": 24.7305, "step": 18 }, { "epoch": 0.0119295216418789, "grad_norm": 1.9255625009536743, "learning_rate": 2.4471741852423237e-06, "loss": 21.9883, "step": 19 }, { "epoch": 0.01255739120197779, "grad_norm": 1.7242449522018433, "learning_rate": 0.0, "loss": 21.7461, "step": 20 }, { "epoch": 0.01255739120197779, "eval_loss": 1.4681618213653564, "eval_runtime": 84.5809, "eval_samples_per_second": 31.721, "eval_steps_per_second": 15.866, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3756522253320192.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }