|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7321835340058575, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.981921394222078e-05, |
|
"loss": 0.1383, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.963842788444155e-05, |
|
"loss": 0.1388, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.945764182666233e-05, |
|
"loss": 0.1394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.92768557688831e-05, |
|
"loss": 0.1395, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.909606971110389e-05, |
|
"loss": 0.1401, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.891528365332466e-05, |
|
"loss": 0.1402, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.873449759554544e-05, |
|
"loss": 0.1404, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.855371153776621e-05, |
|
"loss": 0.1404, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.837292547998698e-05, |
|
"loss": 0.1404, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 58.9732, |
|
"eval_gen_len": 127.0, |
|
"eval_loss": 0.16096045076847076, |
|
"eval_max_gen_len": 127.0, |
|
"eval_runtime": 5905.2845, |
|
"eval_samples_per_second": 2.54, |
|
"eval_steps_per_second": 0.053, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.819213942220776e-05, |
|
"loss": 0.1407, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.801135336442853e-05, |
|
"loss": 0.1408, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.78305673066493e-05, |
|
"loss": 0.1414, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.764978124887008e-05, |
|
"loss": 0.1412, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.746899519109087e-05, |
|
"loss": 0.1412, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.728820913331164e-05, |
|
"loss": 0.1414, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.710742307553242e-05, |
|
"loss": 0.1414, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.69266370177532e-05, |
|
"loss": 0.1417, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.674585095997398e-05, |
|
"loss": 0.1412, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 58.854, |
|
"eval_gen_len": 126.9273, |
|
"eval_loss": 0.1599324494600296, |
|
"eval_max_gen_len": 127.0, |
|
"eval_runtime": 5913.6366, |
|
"eval_samples_per_second": 2.537, |
|
"eval_steps_per_second": 0.053, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.656506490219475e-05, |
|
"loss": 0.1414, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.638427884441553e-05, |
|
"loss": 0.1414, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.62034927866363e-05, |
|
"loss": 0.1416, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.602270672885707e-05, |
|
"loss": 0.1417, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.584192067107786e-05, |
|
"loss": 0.1413, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.566113461329863e-05, |
|
"loss": 0.1414, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.548034855551941e-05, |
|
"loss": 0.1419, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.529956249774018e-05, |
|
"loss": 0.1416, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.511877643996096e-05, |
|
"loss": 0.1415, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_bleu": 58.7355, |
|
"eval_gen_len": 127.0, |
|
"eval_loss": 0.15812213718891144, |
|
"eval_max_gen_len": 127.0, |
|
"eval_runtime": 5912.1982, |
|
"eval_samples_per_second": 2.537, |
|
"eval_steps_per_second": 0.053, |
|
"step": 13500 |
|
} |
|
], |
|
"max_steps": 276570, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.402764131303424e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|