|
{ |
|
"best_metric": 0.34489724040031433, |
|
"best_model_checkpoint": "./checkpoint-my/checkpoint-1500", |
|
"epoch": 7.56789812129618, |
|
"eval_steps": 500, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7375328083989503e-05, |
|
"loss": 4.7871, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 26.4661, |
|
"eval_gen_len": 31.9891, |
|
"eval_loss": 1.3954318761825562, |
|
"eval_runtime": 189.1048, |
|
"eval_samples_per_second": 5.352, |
|
"eval_steps_per_second": 1.338, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.4750656167979002e-05, |
|
"loss": 0.3945, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_bleu": 27.1091, |
|
"eval_gen_len": 32.2134, |
|
"eval_loss": 0.34945473074913025, |
|
"eval_runtime": 187.8434, |
|
"eval_samples_per_second": 5.387, |
|
"eval_steps_per_second": 1.347, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.2125984251968505e-05, |
|
"loss": 0.145, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_bleu": 27.3119, |
|
"eval_gen_len": 32.2885, |
|
"eval_loss": 0.34489724040031433, |
|
"eval_runtime": 188.4135, |
|
"eval_samples_per_second": 5.371, |
|
"eval_steps_per_second": 1.343, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.650043744531934e-05, |
|
"loss": 0.3623, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_bleu": 3.0191, |
|
"eval_gen_len": 46.4427, |
|
"eval_loss": 0.7403104901313782, |
|
"eval_runtime": 245.2236, |
|
"eval_samples_per_second": 4.127, |
|
"eval_steps_per_second": 1.032, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.562554680664917e-05, |
|
"loss": 0.3224, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_bleu": 3.0664, |
|
"eval_gen_len": 46.9269, |
|
"eval_loss": 0.7314952611923218, |
|
"eval_runtime": 247.2109, |
|
"eval_samples_per_second": 4.094, |
|
"eval_steps_per_second": 1.023, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.4750656167979002e-05, |
|
"loss": 0.3066, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_bleu": 2.9075, |
|
"eval_gen_len": 47.915, |
|
"eval_loss": 0.726075291633606, |
|
"eval_runtime": 280.6593, |
|
"eval_samples_per_second": 3.606, |
|
"eval_steps_per_second": 0.901, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.3875765529308838e-05, |
|
"loss": 0.2952, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_bleu": 2.9214, |
|
"eval_gen_len": 47.5, |
|
"eval_loss": 0.7223652005195618, |
|
"eval_runtime": 268.7931, |
|
"eval_samples_per_second": 3.765, |
|
"eval_steps_per_second": 0.941, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.3000874890638671e-05, |
|
"loss": 0.2859, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_bleu": 3.0026, |
|
"eval_gen_len": 47.8577, |
|
"eval_loss": 0.7183992266654968, |
|
"eval_runtime": 271.7722, |
|
"eval_samples_per_second": 3.724, |
|
"eval_steps_per_second": 0.931, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 0.1257, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_bleu": 27.2567, |
|
"eval_gen_len": 32.3172, |
|
"eval_loss": 0.3456554710865021, |
|
"eval_runtime": 190.3961, |
|
"eval_samples_per_second": 5.315, |
|
"eval_steps_per_second": 1.329, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 1.3162393162393164e-05, |
|
"loss": 0.1218, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_bleu": 27.4171, |
|
"eval_gen_len": 32.2915, |
|
"eval_loss": 0.3452778160572052, |
|
"eval_runtime": 193.9304, |
|
"eval_samples_per_second": 5.218, |
|
"eval_steps_per_second": 1.305, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.247863247863248e-05, |
|
"loss": 0.1189, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_bleu": 27.4615, |
|
"eval_gen_len": 32.2204, |
|
"eval_loss": 0.34533679485321045, |
|
"eval_runtime": 186.2569, |
|
"eval_samples_per_second": 5.433, |
|
"eval_steps_per_second": 1.358, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.1794871794871796e-05, |
|
"loss": 0.1166, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_bleu": 27.6321, |
|
"eval_gen_len": 32.2816, |
|
"eval_loss": 0.34552034735679626, |
|
"eval_runtime": 185.1476, |
|
"eval_samples_per_second": 5.466, |
|
"eval_steps_per_second": 1.366, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.125462495795493e-05, |
|
"loss": 0.1171, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_bleu": 27.3986, |
|
"eval_gen_len": 32.4397, |
|
"eval_loss": 0.34709280729293823, |
|
"eval_runtime": 204.1804, |
|
"eval_samples_per_second": 4.956, |
|
"eval_steps_per_second": 1.239, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.0581903800874538e-05, |
|
"loss": 0.1157, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_bleu": 27.5083, |
|
"eval_gen_len": 32.2816, |
|
"eval_loss": 0.34769660234451294, |
|
"eval_runtime": 209.4596, |
|
"eval_samples_per_second": 4.831, |
|
"eval_steps_per_second": 1.208, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.909182643794148e-06, |
|
"loss": 0.1121, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_bleu": 27.4437, |
|
"eval_gen_len": 32.2194, |
|
"eval_loss": 0.34789395332336426, |
|
"eval_runtime": 209.9641, |
|
"eval_samples_per_second": 4.82, |
|
"eval_steps_per_second": 1.205, |
|
"step": 7500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14865, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 5.2005742820563354e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|