|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.653179190751445, |
|
"eval_steps": 774, |
|
"global_step": 7740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.965317919075145, |
|
"grad_norm": 1.8564964532852173, |
|
"learning_rate": 9.002583979328166e-06, |
|
"loss": 2.6831, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 5.965317919075145, |
|
"eval_accuracy": 0.20230847285641806, |
|
"eval_loss": 2.644841432571411, |
|
"eval_runtime": 24.73, |
|
"eval_samples_per_second": 318.803, |
|
"eval_steps_per_second": 15.973, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.93063583815029, |
|
"grad_norm": 2.6696064472198486, |
|
"learning_rate": 8.002583979328166e-06, |
|
"loss": 2.5187, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 11.93063583815029, |
|
"eval_accuracy": 0.20776255707762556, |
|
"eval_loss": 2.6594879627227783, |
|
"eval_runtime": 24.8781, |
|
"eval_samples_per_second": 316.905, |
|
"eval_steps_per_second": 15.877, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 17.895953757225435, |
|
"grad_norm": 3.204340696334839, |
|
"learning_rate": 7.003875968992248e-06, |
|
"loss": 2.4385, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 17.895953757225435, |
|
"eval_accuracy": 0.20416878065279892, |
|
"eval_loss": 2.7390198707580566, |
|
"eval_runtime": 24.8705, |
|
"eval_samples_per_second": 317.002, |
|
"eval_steps_per_second": 15.882, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 23.86127167630058, |
|
"grad_norm": 2.2868332862854004, |
|
"learning_rate": 6.003875968992249e-06, |
|
"loss": 2.3938, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 23.86127167630058, |
|
"eval_accuracy": 0.20230847285641806, |
|
"eval_loss": 2.7900900840759277, |
|
"eval_runtime": 25.0841, |
|
"eval_samples_per_second": 314.303, |
|
"eval_steps_per_second": 15.747, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 29.826589595375722, |
|
"grad_norm": 2.375437021255493, |
|
"learning_rate": 5.003875968992249e-06, |
|
"loss": 2.3615, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 29.826589595375722, |
|
"eval_accuracy": 0.19949264332825978, |
|
"eval_loss": 2.8409342765808105, |
|
"eval_runtime": 24.829, |
|
"eval_samples_per_second": 317.532, |
|
"eval_steps_per_second": 15.909, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 35.79190751445087, |
|
"grad_norm": 2.595209836959839, |
|
"learning_rate": 4.005167958656331e-06, |
|
"loss": 2.3383, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 35.79190751445087, |
|
"eval_accuracy": 0.19636817182479283, |
|
"eval_loss": 2.9096667766571045, |
|
"eval_runtime": 24.7239, |
|
"eval_samples_per_second": 318.882, |
|
"eval_steps_per_second": 15.976, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 41.75722543352601, |
|
"grad_norm": 3.1603972911834717, |
|
"learning_rate": 3.0051679586563307e-06, |
|
"loss": 2.32, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 41.75722543352601, |
|
"eval_accuracy": 0.19426324563310865, |
|
"eval_loss": 2.9305648803710938, |
|
"eval_runtime": 24.8147, |
|
"eval_samples_per_second": 317.715, |
|
"eval_steps_per_second": 15.918, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 47.72254335260116, |
|
"grad_norm": 2.607807159423828, |
|
"learning_rate": 2.005167958656331e-06, |
|
"loss": 2.3179, |
|
"step": 6192 |
|
}, |
|
{ |
|
"epoch": 47.72254335260116, |
|
"eval_accuracy": 0.1923357432775241, |
|
"eval_loss": 2.9449644088745117, |
|
"eval_runtime": 25.2589, |
|
"eval_samples_per_second": 312.128, |
|
"eval_steps_per_second": 15.638, |
|
"step": 6192 |
|
}, |
|
{ |
|
"epoch": 53.6878612716763, |
|
"grad_norm": 3.093654155731201, |
|
"learning_rate": 1.0064599483204135e-06, |
|
"loss": 2.3027, |
|
"step": 6966 |
|
}, |
|
{ |
|
"epoch": 53.6878612716763, |
|
"eval_accuracy": 0.19090704098314448, |
|
"eval_loss": 2.933745861053467, |
|
"eval_runtime": 25.4979, |
|
"eval_samples_per_second": 309.201, |
|
"eval_steps_per_second": 15.491, |
|
"step": 6966 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"grad_norm": 2.4531562328338623, |
|
"learning_rate": 6.4599483204134375e-09, |
|
"loss": 2.3015, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"eval_accuracy": 0.1897640791476408, |
|
"eval_loss": 2.943028450012207, |
|
"eval_runtime": 26.7233, |
|
"eval_samples_per_second": 295.024, |
|
"eval_steps_per_second": 14.781, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"step": 7740, |
|
"total_flos": 9.747443743335875e+17, |
|
"train_loss": 2.3976070906764777, |
|
"train_runtime": 26120.7286, |
|
"train_samples_per_second": 142.985, |
|
"train_steps_per_second": 0.296 |
|
} |
|
], |
|
"logging_steps": 774, |
|
"max_steps": 7740, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.747443743335875e+17, |
|
"train_batch_size": 60, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|