sharkMeow's picture
End of training
94f78be verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.83948635634029,
"eval_steps": 3110,
"global_step": 31100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 9.983948635634029,
"grad_norm": 4.509789943695068,
"learning_rate": 9.001286173633441e-06,
"loss": 2.5113,
"step": 3110
},
{
"epoch": 9.983948635634029,
"eval_accuracy": 0.15658839274850955,
"eval_loss": 2.851839542388916,
"eval_runtime": 30.9634,
"eval_samples_per_second": 265.442,
"eval_steps_per_second": 13.274,
"step": 3110
},
{
"epoch": 19.967897271268058,
"grad_norm": 3.6269993782043457,
"learning_rate": 8.001607717041802e-06,
"loss": 2.3003,
"step": 6220
},
{
"epoch": 19.967897271268058,
"eval_accuracy": 0.14758486433872733,
"eval_loss": 3.0242345333099365,
"eval_runtime": 30.8398,
"eval_samples_per_second": 266.506,
"eval_steps_per_second": 13.327,
"step": 6220
},
{
"epoch": 29.951845906902086,
"grad_norm": 4.074563980102539,
"learning_rate": 7.0022508038585215e-06,
"loss": 2.2124,
"step": 9330
},
{
"epoch": 29.951845906902086,
"eval_accuracy": 0.14040637547146856,
"eval_loss": 3.124624729156494,
"eval_runtime": 30.966,
"eval_samples_per_second": 265.42,
"eval_steps_per_second": 13.273,
"step": 9330
},
{
"epoch": 39.935794542536115,
"grad_norm": 4.5159077644348145,
"learning_rate": 6.002572347266882e-06,
"loss": 2.1682,
"step": 12440
},
{
"epoch": 39.935794542536115,
"eval_accuracy": 0.13709088696921767,
"eval_loss": 3.18056058883667,
"eval_runtime": 30.9323,
"eval_samples_per_second": 265.709,
"eval_steps_per_second": 13.287,
"step": 12440
},
{
"epoch": 49.919743178170144,
"grad_norm": 4.013918399810791,
"learning_rate": 5.003215434083602e-06,
"loss": 2.1384,
"step": 15550
},
{
"epoch": 49.919743178170144,
"eval_accuracy": 0.1342255748874559,
"eval_loss": 3.2024312019348145,
"eval_runtime": 30.8117,
"eval_samples_per_second": 266.749,
"eval_steps_per_second": 13.339,
"step": 15550
},
{
"epoch": 59.90369181380417,
"grad_norm": 3.466132640838623,
"learning_rate": 4.003536977491962e-06,
"loss": 2.1246,
"step": 18660
},
{
"epoch": 59.90369181380417,
"eval_accuracy": 0.1322545321815306,
"eval_loss": 3.2168166637420654,
"eval_runtime": 30.7446,
"eval_samples_per_second": 267.331,
"eval_steps_per_second": 13.368,
"step": 18660
},
{
"epoch": 69.88764044943821,
"grad_norm": 2.7771756649017334,
"learning_rate": 3.004180064308682e-06,
"loss": 2.1121,
"step": 21770
},
{
"epoch": 69.88764044943821,
"eval_accuracy": 0.13129855908782787,
"eval_loss": 3.2208762168884277,
"eval_runtime": 30.8368,
"eval_samples_per_second": 266.532,
"eval_steps_per_second": 13.328,
"step": 21770
},
{
"epoch": 79.87158908507223,
"grad_norm": 2.471256732940674,
"learning_rate": 2.0045016077170422e-06,
"loss": 2.0987,
"step": 24880
},
{
"epoch": 79.87158908507223,
"eval_accuracy": 0.1307032485703857,
"eval_loss": 3.213690996170044,
"eval_runtime": 30.7206,
"eval_samples_per_second": 267.54,
"eval_steps_per_second": 13.379,
"step": 24880
},
{
"epoch": 89.85553772070627,
"grad_norm": 2.8352723121643066,
"learning_rate": 1.0048231511254019e-06,
"loss": 2.0986,
"step": 27990
},
{
"epoch": 89.85553772070627,
"eval_accuracy": 0.1300915223533547,
"eval_loss": 3.2373690605163574,
"eval_runtime": 30.864,
"eval_samples_per_second": 266.297,
"eval_steps_per_second": 13.316,
"step": 27990
},
{
"epoch": 99.83948635634029,
"grad_norm": 2.9546291828155518,
"learning_rate": 5.144694533762058e-09,
"loss": 2.0962,
"step": 31100
},
{
"epoch": 99.83948635634029,
"eval_accuracy": 0.12952913979802896,
"eval_loss": 3.238151788711548,
"eval_runtime": 31.0746,
"eval_samples_per_second": 264.492,
"eval_steps_per_second": 13.226,
"step": 31100
},
{
"epoch": 99.83948635634029,
"step": 31100,
"total_flos": 9.751242948802248e+17,
"train_loss": 2.186087721061093,
"train_runtime": 48974.7066,
"train_samples_per_second": 127.117,
"train_steps_per_second": 0.635
}
],
"logging_steps": 3110,
"max_steps": 31100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.751242948802248e+17,
"train_batch_size": 50,
"trial_name": null,
"trial_params": null
}