fusion_None_sep_SEP_describe_gpt / trainer_state.json
sharkMeow's picture
End of training
856f86a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 59.653179190751445,
"eval_steps": 774,
"global_step": 7740,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 5.965317919075145,
"grad_norm": 1.8564964532852173,
"learning_rate": 9.002583979328166e-06,
"loss": 2.6831,
"step": 774
},
{
"epoch": 5.965317919075145,
"eval_accuracy": 0.20230847285641806,
"eval_loss": 2.644841432571411,
"eval_runtime": 24.73,
"eval_samples_per_second": 318.803,
"eval_steps_per_second": 15.973,
"step": 774
},
{
"epoch": 11.93063583815029,
"grad_norm": 2.6696064472198486,
"learning_rate": 8.002583979328166e-06,
"loss": 2.5187,
"step": 1548
},
{
"epoch": 11.93063583815029,
"eval_accuracy": 0.20776255707762556,
"eval_loss": 2.6594879627227783,
"eval_runtime": 24.8781,
"eval_samples_per_second": 316.905,
"eval_steps_per_second": 15.877,
"step": 1548
},
{
"epoch": 17.895953757225435,
"grad_norm": 3.204340696334839,
"learning_rate": 7.003875968992248e-06,
"loss": 2.4385,
"step": 2322
},
{
"epoch": 17.895953757225435,
"eval_accuracy": 0.20416878065279892,
"eval_loss": 2.7390198707580566,
"eval_runtime": 24.8705,
"eval_samples_per_second": 317.002,
"eval_steps_per_second": 15.882,
"step": 2322
},
{
"epoch": 23.86127167630058,
"grad_norm": 2.2868332862854004,
"learning_rate": 6.003875968992249e-06,
"loss": 2.3938,
"step": 3096
},
{
"epoch": 23.86127167630058,
"eval_accuracy": 0.20230847285641806,
"eval_loss": 2.7900900840759277,
"eval_runtime": 25.0841,
"eval_samples_per_second": 314.303,
"eval_steps_per_second": 15.747,
"step": 3096
},
{
"epoch": 29.826589595375722,
"grad_norm": 2.375437021255493,
"learning_rate": 5.003875968992249e-06,
"loss": 2.3615,
"step": 3870
},
{
"epoch": 29.826589595375722,
"eval_accuracy": 0.19949264332825978,
"eval_loss": 2.8409342765808105,
"eval_runtime": 24.829,
"eval_samples_per_second": 317.532,
"eval_steps_per_second": 15.909,
"step": 3870
},
{
"epoch": 35.79190751445087,
"grad_norm": 2.595209836959839,
"learning_rate": 4.005167958656331e-06,
"loss": 2.3383,
"step": 4644
},
{
"epoch": 35.79190751445087,
"eval_accuracy": 0.19636817182479283,
"eval_loss": 2.9096667766571045,
"eval_runtime": 24.7239,
"eval_samples_per_second": 318.882,
"eval_steps_per_second": 15.976,
"step": 4644
},
{
"epoch": 41.75722543352601,
"grad_norm": 3.1603972911834717,
"learning_rate": 3.0051679586563307e-06,
"loss": 2.32,
"step": 5418
},
{
"epoch": 41.75722543352601,
"eval_accuracy": 0.19426324563310865,
"eval_loss": 2.9305648803710938,
"eval_runtime": 24.8147,
"eval_samples_per_second": 317.715,
"eval_steps_per_second": 15.918,
"step": 5418
},
{
"epoch": 47.72254335260116,
"grad_norm": 2.607807159423828,
"learning_rate": 2.005167958656331e-06,
"loss": 2.3179,
"step": 6192
},
{
"epoch": 47.72254335260116,
"eval_accuracy": 0.1923357432775241,
"eval_loss": 2.9449644088745117,
"eval_runtime": 25.2589,
"eval_samples_per_second": 312.128,
"eval_steps_per_second": 15.638,
"step": 6192
},
{
"epoch": 53.6878612716763,
"grad_norm": 3.093654155731201,
"learning_rate": 1.0064599483204135e-06,
"loss": 2.3027,
"step": 6966
},
{
"epoch": 53.6878612716763,
"eval_accuracy": 0.19090704098314448,
"eval_loss": 2.933745861053467,
"eval_runtime": 25.4979,
"eval_samples_per_second": 309.201,
"eval_steps_per_second": 15.491,
"step": 6966
},
{
"epoch": 59.653179190751445,
"grad_norm": 2.4531562328338623,
"learning_rate": 6.4599483204134375e-09,
"loss": 2.3015,
"step": 7740
},
{
"epoch": 59.653179190751445,
"eval_accuracy": 0.1897640791476408,
"eval_loss": 2.943028450012207,
"eval_runtime": 26.7233,
"eval_samples_per_second": 295.024,
"eval_steps_per_second": 14.781,
"step": 7740
},
{
"epoch": 59.653179190751445,
"step": 7740,
"total_flos": 9.747443743335875e+17,
"train_loss": 2.3976070906764777,
"train_runtime": 26120.7286,
"train_samples_per_second": 142.985,
"train_steps_per_second": 0.296
}
],
"logging_steps": 774,
"max_steps": 7740,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.747443743335875e+17,
"train_batch_size": 60,
"trial_name": null,
"trial_params": null
}