mt5-finetune-coqe / trainer_state.json
duyvu8373's picture
Upload 12 files
eab1eff verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.12621359223301,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 46.9286,
"eval_gen_len": 17.6388,
"eval_loss": 0.2930362820625305,
"eval_meteor": 0.6401,
"eval_runtime": 21.9119,
"eval_samples_per_second": 24.005,
"eval_steps_per_second": 1.232,
"step": 206
},
{
"epoch": 2.0,
"eval_bleu": 55.0709,
"eval_gen_len": 17.481,
"eval_loss": 0.17119793593883514,
"eval_meteor": 0.7436,
"eval_runtime": 17.1858,
"eval_samples_per_second": 30.607,
"eval_steps_per_second": 1.571,
"step": 412
},
{
"epoch": 2.43,
"learning_rate": 0.0002757281553398058,
"loss": 1.4069,
"step": 500
},
{
"epoch": 3.0,
"eval_bleu": 56.5722,
"eval_gen_len": 17.635,
"eval_loss": 0.12680290639400482,
"eval_meteor": 0.7412,
"eval_runtime": 17.5013,
"eval_samples_per_second": 30.055,
"eval_steps_per_second": 1.543,
"step": 618
},
{
"epoch": 4.0,
"eval_bleu": 59.7915,
"eval_gen_len": 17.5513,
"eval_loss": 0.10222692042589188,
"eval_meteor": 0.7776,
"eval_runtime": 17.1596,
"eval_samples_per_second": 30.653,
"eval_steps_per_second": 1.573,
"step": 824
},
{
"epoch": 4.85,
"learning_rate": 0.00025145631067961165,
"loss": 0.2339,
"step": 1000
},
{
"epoch": 5.0,
"eval_bleu": 60.1489,
"eval_gen_len": 17.6255,
"eval_loss": 0.09315221011638641,
"eval_meteor": 0.7764,
"eval_runtime": 17.1827,
"eval_samples_per_second": 30.612,
"eval_steps_per_second": 1.571,
"step": 1030
},
{
"epoch": 6.0,
"eval_bleu": 60.441,
"eval_gen_len": 17.6749,
"eval_loss": 0.08468244969844818,
"eval_meteor": 0.7696,
"eval_runtime": 17.5475,
"eval_samples_per_second": 29.976,
"eval_steps_per_second": 1.539,
"step": 1236
},
{
"epoch": 7.0,
"eval_bleu": 61.5099,
"eval_gen_len": 17.6027,
"eval_loss": 0.07199732959270477,
"eval_meteor": 0.7875,
"eval_runtime": 17.5698,
"eval_samples_per_second": 29.938,
"eval_steps_per_second": 1.537,
"step": 1442
},
{
"epoch": 7.28,
"learning_rate": 0.00022718446601941746,
"loss": 0.1661,
"step": 1500
},
{
"epoch": 8.0,
"eval_bleu": 61.471,
"eval_gen_len": 17.6901,
"eval_loss": 0.07024037092924118,
"eval_meteor": 0.7778,
"eval_runtime": 17.3131,
"eval_samples_per_second": 30.382,
"eval_steps_per_second": 1.56,
"step": 1648
},
{
"epoch": 9.0,
"eval_bleu": 62.8426,
"eval_gen_len": 17.6274,
"eval_loss": 0.05566093325614929,
"eval_meteor": 0.7954,
"eval_runtime": 18.4096,
"eval_samples_per_second": 28.572,
"eval_steps_per_second": 1.467,
"step": 1854
},
{
"epoch": 9.71,
"learning_rate": 0.00020291262135922327,
"loss": 0.1289,
"step": 2000
},
{
"epoch": 10.0,
"eval_bleu": 62.5415,
"eval_gen_len": 17.6825,
"eval_loss": 0.05298692733049393,
"eval_meteor": 0.7883,
"eval_runtime": 17.3063,
"eval_samples_per_second": 30.394,
"eval_steps_per_second": 1.56,
"step": 2060
},
{
"epoch": 11.0,
"eval_bleu": 62.8065,
"eval_gen_len": 17.711,
"eval_loss": 0.049979548901319504,
"eval_meteor": 0.7881,
"eval_runtime": 17.359,
"eval_samples_per_second": 30.301,
"eval_steps_per_second": 1.555,
"step": 2266
},
{
"epoch": 12.0,
"eval_bleu": 62.6083,
"eval_gen_len": 17.7167,
"eval_loss": 0.04917723312973976,
"eval_meteor": 0.7826,
"eval_runtime": 17.3203,
"eval_samples_per_second": 30.369,
"eval_steps_per_second": 1.559,
"step": 2472
},
{
"epoch": 12.14,
"learning_rate": 0.00017864077669902913,
"loss": 0.1034,
"step": 2500
},
{
"epoch": 13.0,
"eval_bleu": 63.5012,
"eval_gen_len": 17.6844,
"eval_loss": 0.042322106659412384,
"eval_meteor": 0.7943,
"eval_runtime": 17.3469,
"eval_samples_per_second": 30.322,
"eval_steps_per_second": 1.556,
"step": 2678
},
{
"epoch": 14.0,
"eval_bleu": 62.9858,
"eval_gen_len": 17.7091,
"eval_loss": 0.04112717881798744,
"eval_meteor": 0.7876,
"eval_runtime": 17.2586,
"eval_samples_per_second": 30.478,
"eval_steps_per_second": 1.564,
"step": 2884
},
{
"epoch": 14.56,
"learning_rate": 0.00015436893203883494,
"loss": 0.0862,
"step": 3000
},
{
"epoch": 15.0,
"eval_bleu": 64.4301,
"eval_gen_len": 17.6806,
"eval_loss": 0.03431503847241402,
"eval_meteor": 0.8036,
"eval_runtime": 17.1867,
"eval_samples_per_second": 30.605,
"eval_steps_per_second": 1.571,
"step": 3090
},
{
"epoch": 16.0,
"eval_bleu": 65.3347,
"eval_gen_len": 17.654,
"eval_loss": 0.031859882175922394,
"eval_meteor": 0.8153,
"eval_runtime": 17.2343,
"eval_samples_per_second": 30.52,
"eval_steps_per_second": 1.567,
"step": 3296
},
{
"epoch": 16.99,
"learning_rate": 0.00013009708737864078,
"loss": 0.073,
"step": 3500
},
{
"epoch": 17.0,
"eval_bleu": 62.9474,
"eval_gen_len": 17.7433,
"eval_loss": 0.03299812600016594,
"eval_meteor": 0.7817,
"eval_runtime": 17.342,
"eval_samples_per_second": 30.331,
"eval_steps_per_second": 1.557,
"step": 3502
},
{
"epoch": 18.0,
"eval_bleu": 66.3794,
"eval_gen_len": 17.6217,
"eval_loss": 0.02293582074344158,
"eval_meteor": 0.8285,
"eval_runtime": 17.2657,
"eval_samples_per_second": 30.465,
"eval_steps_per_second": 1.564,
"step": 3708
},
{
"epoch": 19.0,
"eval_bleu": 64.1881,
"eval_gen_len": 17.711,
"eval_loss": 0.02707734704017639,
"eval_meteor": 0.7974,
"eval_runtime": 17.8576,
"eval_samples_per_second": 29.455,
"eval_steps_per_second": 1.512,
"step": 3914
},
{
"epoch": 19.42,
"learning_rate": 0.00010582524271844659,
"loss": 0.0609,
"step": 4000
},
{
"epoch": 20.0,
"eval_bleu": 65.4997,
"eval_gen_len": 17.6616,
"eval_loss": 0.022358747199177742,
"eval_meteor": 0.8156,
"eval_runtime": 17.5005,
"eval_samples_per_second": 30.056,
"eval_steps_per_second": 1.543,
"step": 4120
},
{
"epoch": 21.0,
"eval_bleu": 64.8991,
"eval_gen_len": 17.6863,
"eval_loss": 0.022536493837833405,
"eval_meteor": 0.8061,
"eval_runtime": 17.556,
"eval_samples_per_second": 29.961,
"eval_steps_per_second": 1.538,
"step": 4326
},
{
"epoch": 21.84,
"learning_rate": 8.155339805825241e-05,
"loss": 0.0538,
"step": 4500
},
{
"epoch": 22.0,
"eval_bleu": 66.3962,
"eval_gen_len": 17.6407,
"eval_loss": 0.018802011385560036,
"eval_meteor": 0.8251,
"eval_runtime": 17.5884,
"eval_samples_per_second": 29.906,
"eval_steps_per_second": 1.535,
"step": 4532
},
{
"epoch": 23.0,
"eval_bleu": 65.0479,
"eval_gen_len": 17.6863,
"eval_loss": 0.02161051519215107,
"eval_meteor": 0.8069,
"eval_runtime": 17.501,
"eval_samples_per_second": 30.055,
"eval_steps_per_second": 1.543,
"step": 4738
},
{
"epoch": 24.0,
"eval_bleu": 66.0078,
"eval_gen_len": 17.6559,
"eval_loss": 0.01826297491788864,
"eval_meteor": 0.8205,
"eval_runtime": 17.773,
"eval_samples_per_second": 29.595,
"eval_steps_per_second": 1.519,
"step": 4944
},
{
"epoch": 24.27,
"learning_rate": 5.728155339805825e-05,
"loss": 0.0476,
"step": 5000
},
{
"epoch": 25.0,
"eval_bleu": 65.1378,
"eval_gen_len": 17.6863,
"eval_loss": 0.021101167425513268,
"eval_meteor": 0.8087,
"eval_runtime": 17.5024,
"eval_samples_per_second": 30.053,
"eval_steps_per_second": 1.543,
"step": 5150
},
{
"epoch": 26.0,
"eval_bleu": 65.5523,
"eval_gen_len": 17.6749,
"eval_loss": 0.018090983852744102,
"eval_meteor": 0.8134,
"eval_runtime": 17.7903,
"eval_samples_per_second": 29.567,
"eval_steps_per_second": 1.518,
"step": 5356
},
{
"epoch": 26.7,
"learning_rate": 3.3009708737864073e-05,
"loss": 0.044,
"step": 5500
},
{
"epoch": 27.0,
"eval_bleu": 66.1951,
"eval_gen_len": 17.6597,
"eval_loss": 0.016722695901989937,
"eval_meteor": 0.8212,
"eval_runtime": 17.6724,
"eval_samples_per_second": 29.764,
"eval_steps_per_second": 1.528,
"step": 5562
},
{
"epoch": 28.0,
"eval_bleu": 66.2812,
"eval_gen_len": 17.6559,
"eval_loss": 0.0156955998390913,
"eval_meteor": 0.8226,
"eval_runtime": 17.5211,
"eval_samples_per_second": 30.021,
"eval_steps_per_second": 1.541,
"step": 5768
},
{
"epoch": 29.0,
"eval_bleu": 65.4079,
"eval_gen_len": 17.6863,
"eval_loss": 0.017170317471027374,
"eval_meteor": 0.8103,
"eval_runtime": 17.494,
"eval_samples_per_second": 30.067,
"eval_steps_per_second": 1.543,
"step": 5974
},
{
"epoch": 29.13,
"learning_rate": 8.737864077669902e-06,
"loss": 0.04,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 6180,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 1.55075881506816e+16,
"train_batch_size": 20,
"trial_name": null,
"trial_params": null
}