{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 18390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8156606851549756, "grad_norm": 2.7835583686828613, "learning_rate": 0.00019968889417401253, "loss": 0.6438, "step": 1000 }, { "epoch": 1.631321370309951, "grad_norm": 0.3261709213256836, "learning_rate": 0.00019676585418772425, "loss": 0.2388, "step": 2000 }, { "epoch": 2.4469820554649266, "grad_norm": 2.9635205268859863, "learning_rate": 0.00019084912501825553, "loss": 0.2032, "step": 3000 }, { "epoch": 3.262642740619902, "grad_norm": 2.162414312362671, "learning_rate": 0.00018212175520336934, "loss": 0.163, "step": 4000 }, { "epoch": 4.078303425774878, "grad_norm": 3.4721012115478516, "learning_rate": 0.00017085374734710157, "loss": 0.1519, "step": 5000 }, { "epoch": 4.893964110929853, "grad_norm": 1.8501887321472168, "learning_rate": 0.0001573937049265616, "loss": 0.13, "step": 6000 }, { "epoch": 5.709624796084829, "grad_norm": 3.3048102855682373, "learning_rate": 0.00014215804738782126, "loss": 0.1186, "step": 7000 }, { "epoch": 6.525285481239804, "grad_norm": 2.223738193511963, "learning_rate": 0.00012561812718836913, "loss": 0.1046, "step": 8000 }, { "epoch": 7.3409461663947795, "grad_norm": 0.2893391251564026, "learning_rate": 0.00010828564735203954, "loss": 0.0957, "step": 9000 }, { "epoch": 8.156606851549755, "grad_norm": 0.24986231327056885, "learning_rate": 9.069683068014265e-05, "loss": 0.0912, "step": 10000 }, { "epoch": 8.97226753670473, "grad_norm": 0.6728571653366089, "learning_rate": 7.339583038310173e-05, "loss": 0.08, "step": 11000 }, { "epoch": 9.787928221859707, "grad_norm": 0.3318624794483185, "learning_rate": 5.69178953654216e-05, "loss": 0.0722, "step": 12000 }, { "epoch": 10.603588907014682, "grad_norm": 0.1652187556028366, "learning_rate": 4.177281098721372e-05, "loss": 0.07, "step": 13000 }, { "epoch": 11.419249592169658, "grad_norm": 0.13997943699359894, "learning_rate": 2.8429127602959905e-05, "loss": 0.0635, "step": 14000 }, { "epoch": 12.234910277324634, "grad_norm": 0.17078348994255066, "learning_rate": 1.729966480637476e-05, "loss": 0.0607, "step": 15000 }, { "epoch": 13.05057096247961, "grad_norm": 0.10830472409725189, "learning_rate": 8.728739843127509e-06, "loss": 0.0599, "step": 16000 }, { "epoch": 13.866231647634583, "grad_norm": 0.13888326287269592, "learning_rate": 2.9815153118197825e-06, "loss": 0.0566, "step": 17000 }, { "epoch": 14.681892332789559, "grad_norm": 0.138445645570755, "learning_rate": 2.3579570823278885e-07, "loss": 0.0556, "step": 18000 } ], "logging_steps": 1000, "max_steps": 18390, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 6000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.5627004461372e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }