{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.87956643918105, "eval_steps": 3110, "global_step": 31100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.987956643918105, "grad_norm": 0.008763309568166733, "learning_rate": 9.002572347266882e-06, "loss": 2.156, "step": 3110 }, { "epoch": 9.987956643918105, "eval_accuracy": 0.04915439834529748, "eval_loss": 2.9957287311553955, "eval_runtime": 36.4037, "eval_samples_per_second": 225.774, "eval_steps_per_second": 11.29, "step": 3110 }, { "epoch": 19.97591328783621, "grad_norm": 1.9511775970458984, "learning_rate": 8.002572347266881e-06, "loss": 2.1411, "step": 6220 }, { "epoch": 19.97591328783621, "eval_accuracy": 0.05371699720160603, "eval_loss": 3.0536019802093506, "eval_runtime": 36.6549, "eval_samples_per_second": 224.227, "eval_steps_per_second": 11.213, "step": 6220 }, { "epoch": 29.963869931754317, "grad_norm": 2.7666614055633545, "learning_rate": 7.002572347266882e-06, "loss": 1.9888, "step": 9330 }, { "epoch": 29.963869931754317, "eval_accuracy": 0.05665733868678266, "eval_loss": 3.3323957920074463, "eval_runtime": 36.5927, "eval_samples_per_second": 224.607, "eval_steps_per_second": 11.232, "step": 9330 }, { "epoch": 39.95182657567242, "grad_norm": 3.709224224090576, "learning_rate": 6.002572347266882e-06, "loss": 1.8759, "step": 12440 }, { "epoch": 39.95182657567242, "eval_accuracy": 0.05706290302956564, "eval_loss": 3.6092300415039062, "eval_runtime": 36.6501, "eval_samples_per_second": 224.256, "eval_steps_per_second": 11.214, "step": 12440 }, { "epoch": 49.939783219590524, "grad_norm": 3.455683708190918, "learning_rate": 5.002893890675241e-06, "loss": 1.8129, "step": 15550 }, { "epoch": 49.939783219590524, "eval_accuracy": 0.05752524638033824, "eval_loss": 3.809110641479492, "eval_runtime": 36.6247, "eval_samples_per_second": 224.411, "eval_steps_per_second": 11.222, "step": 15550 }, { "epoch": 59.927739863508634, "grad_norm": 3.957350254058838, "learning_rate": 4.003536977491962e-06, "loss": 1.7708, "step": 18660 }, { "epoch": 59.927739863508634, "eval_accuracy": 0.05777264062943586, "eval_loss": 3.9897844791412354, "eval_runtime": 36.4616, "eval_samples_per_second": 225.415, "eval_steps_per_second": 11.272, "step": 18660 }, { "epoch": 69.91569650742673, "grad_norm": 4.713634014129639, "learning_rate": 3.003858520900322e-06, "loss": 1.7413, "step": 21770 }, { "epoch": 69.91569650742673, "eval_accuracy": 0.057914588149409904, "eval_loss": 4.2735114097595215, "eval_runtime": 36.4969, "eval_samples_per_second": 225.197, "eval_steps_per_second": 11.261, "step": 21770 }, { "epoch": 79.90365315134484, "grad_norm": 3.9962944984436035, "learning_rate": 2.0045016077170422e-06, "loss": 1.7172, "step": 24880 }, { "epoch": 79.90365315134484, "eval_accuracy": 0.0580362574522448, "eval_loss": 4.343382835388184, "eval_runtime": 36.6256, "eval_samples_per_second": 224.406, "eval_steps_per_second": 11.222, "step": 24880 }, { "epoch": 89.89160979526295, "grad_norm": 2.9578592777252197, "learning_rate": 1.0048231511254019e-06, "loss": 1.7056, "step": 27990 }, { "epoch": 89.89160979526295, "eval_accuracy": 0.058063295075097, "eval_loss": 4.511988162994385, "eval_runtime": 36.4648, "eval_samples_per_second": 225.395, "eval_steps_per_second": 11.271, "step": 27990 }, { "epoch": 99.87956643918105, "grad_norm": 3.108112335205078, "learning_rate": 5.466237942122187e-09, "loss": 1.7018, "step": 31100 }, { "epoch": 99.87956643918105, "eval_accuracy": 0.05819442754593016, "eval_loss": 4.51352071762085, "eval_runtime": 36.5052, "eval_samples_per_second": 225.146, "eval_steps_per_second": 11.259, "step": 31100 }, { "epoch": 99.87956643918105, "step": 31100, "total_flos": 1.4711495868588088e+18, "train_loss": 1.861133707420619, "train_runtime": 58944.542, "train_samples_per_second": 105.616, "train_steps_per_second": 0.528 } ], "logging_steps": 3110, "max_steps": 31100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4711495868588088e+18, "train_batch_size": 25, "trial_name": null, "trial_params": null }