{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.83948635634029, "eval_steps": 3110, "global_step": 31100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.983948635634029, "grad_norm": 4.509789943695068, "learning_rate": 9.001286173633441e-06, "loss": 2.5113, "step": 3110 }, { "epoch": 9.983948635634029, "eval_accuracy": 0.15658839274850955, "eval_loss": 2.851839542388916, "eval_runtime": 30.9634, "eval_samples_per_second": 265.442, "eval_steps_per_second": 13.274, "step": 3110 }, { "epoch": 19.967897271268058, "grad_norm": 3.6269993782043457, "learning_rate": 8.001607717041802e-06, "loss": 2.3003, "step": 6220 }, { "epoch": 19.967897271268058, "eval_accuracy": 0.14758486433872733, "eval_loss": 3.0242345333099365, "eval_runtime": 30.8398, "eval_samples_per_second": 266.506, "eval_steps_per_second": 13.327, "step": 6220 }, { "epoch": 29.951845906902086, "grad_norm": 4.074563980102539, "learning_rate": 7.0022508038585215e-06, "loss": 2.2124, "step": 9330 }, { "epoch": 29.951845906902086, "eval_accuracy": 0.14040637547146856, "eval_loss": 3.124624729156494, "eval_runtime": 30.966, "eval_samples_per_second": 265.42, "eval_steps_per_second": 13.273, "step": 9330 }, { "epoch": 39.935794542536115, "grad_norm": 4.5159077644348145, "learning_rate": 6.002572347266882e-06, "loss": 2.1682, "step": 12440 }, { "epoch": 39.935794542536115, "eval_accuracy": 0.13709088696921767, "eval_loss": 3.18056058883667, "eval_runtime": 30.9323, "eval_samples_per_second": 265.709, "eval_steps_per_second": 13.287, "step": 12440 }, { "epoch": 49.919743178170144, "grad_norm": 4.013918399810791, "learning_rate": 5.003215434083602e-06, "loss": 2.1384, "step": 15550 }, { "epoch": 49.919743178170144, "eval_accuracy": 0.1342255748874559, "eval_loss": 3.2024312019348145, "eval_runtime": 30.8117, "eval_samples_per_second": 266.749, "eval_steps_per_second": 13.339, "step": 15550 }, { "epoch": 59.90369181380417, "grad_norm": 3.466132640838623, "learning_rate": 4.003536977491962e-06, "loss": 2.1246, "step": 18660 }, { "epoch": 59.90369181380417, "eval_accuracy": 0.1322545321815306, "eval_loss": 3.2168166637420654, "eval_runtime": 30.7446, "eval_samples_per_second": 267.331, "eval_steps_per_second": 13.368, "step": 18660 }, { "epoch": 69.88764044943821, "grad_norm": 2.7771756649017334, "learning_rate": 3.004180064308682e-06, "loss": 2.1121, "step": 21770 }, { "epoch": 69.88764044943821, "eval_accuracy": 0.13129855908782787, "eval_loss": 3.2208762168884277, "eval_runtime": 30.8368, "eval_samples_per_second": 266.532, "eval_steps_per_second": 13.328, "step": 21770 }, { "epoch": 79.87158908507223, "grad_norm": 2.471256732940674, "learning_rate": 2.0045016077170422e-06, "loss": 2.0987, "step": 24880 }, { "epoch": 79.87158908507223, "eval_accuracy": 0.1307032485703857, "eval_loss": 3.213690996170044, "eval_runtime": 30.7206, "eval_samples_per_second": 267.54, "eval_steps_per_second": 13.379, "step": 24880 }, { "epoch": 89.85553772070627, "grad_norm": 2.8352723121643066, "learning_rate": 1.0048231511254019e-06, "loss": 2.0986, "step": 27990 }, { "epoch": 89.85553772070627, "eval_accuracy": 0.1300915223533547, "eval_loss": 3.2373690605163574, "eval_runtime": 30.864, "eval_samples_per_second": 266.297, "eval_steps_per_second": 13.316, "step": 27990 }, { "epoch": 99.83948635634029, "grad_norm": 2.9546291828155518, "learning_rate": 5.144694533762058e-09, "loss": 2.0962, "step": 31100 }, { "epoch": 99.83948635634029, "eval_accuracy": 0.12952913979802896, "eval_loss": 3.238151788711548, "eval_runtime": 31.0746, "eval_samples_per_second": 264.492, "eval_steps_per_second": 13.226, "step": 31100 }, { "epoch": 99.83948635634029, "step": 31100, "total_flos": 9.751242948802248e+17, "train_loss": 2.186087721061093, "train_runtime": 48974.7066, "train_samples_per_second": 127.117, "train_steps_per_second": 0.635 } ], "logging_steps": 3110, "max_steps": 31100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.751242948802248e+17, "train_batch_size": 50, "trial_name": null, "trial_params": null }