|
{ |
|
"best_metric": 3.4478375911712646, |
|
"best_model_checkpoint": "checkpoints/it5-small/checkpoint-17663", |
|
"epoch": 12.753068592057762, |
|
"eval_steps": 1039, |
|
"global_step": 17663, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14945848375451262, |
|
"eval_g2l_cer": 293.7627, |
|
"eval_g2l_gen_len": 13.9075, |
|
"eval_g2l_rouge1": 17.4489, |
|
"eval_g2l_rouge2": 8.198, |
|
"eval_g2l_rougeL": 17.2001, |
|
"eval_g2l_rougeLsum": 17.1915, |
|
"eval_l2ex_cer": 136.8329, |
|
"eval_l2ex_gen_len": 54.1737, |
|
"eval_l2ex_rouge1": 17.812, |
|
"eval_l2ex_rouge2": 6.7548, |
|
"eval_l2ex_rougeL": 16.7136, |
|
"eval_l2ex_rougeLsum": 16.7013, |
|
"eval_l2g_cer": 154.2791, |
|
"eval_l2g_gen_len": 50.8855, |
|
"eval_l2g_rouge1": 13.7399, |
|
"eval_l2g_rouge2": 4.278, |
|
"eval_l2g_rougeL": 13.0474, |
|
"eval_l2g_rougeLsum": 13.0446, |
|
"eval_loss": 4.6176557540893555, |
|
"eval_runtime": 109.9862, |
|
"eval_samples_per_second": 90.202, |
|
"eval_steps_per_second": 0.709, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15018050541516245, |
|
"grad_norm": 99.67880249023438, |
|
"learning_rate": 0.0002003853564547206, |
|
"loss": 5.9292, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3003610108303249, |
|
"grad_norm": 195.41990661621094, |
|
"learning_rate": 0.0004007707129094412, |
|
"loss": 4.6698, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4505415162454874, |
|
"grad_norm": 274.79534912109375, |
|
"learning_rate": 0.0006011560693641619, |
|
"loss": 4.515, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.6007220216606498, |
|
"grad_norm": 210.73873901367188, |
|
"learning_rate": 0.0008015414258188824, |
|
"loss": 4.4776, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7501805054151625, |
|
"eval_g2l_cer": 61.7538, |
|
"eval_g2l_gen_len": 3.2726, |
|
"eval_g2l_rouge1": 27.6616, |
|
"eval_g2l_rouge2": 19.0238, |
|
"eval_g2l_rougeL": 27.5754, |
|
"eval_g2l_rougeLsum": 27.5869, |
|
"eval_l2ex_cer": 88.1951, |
|
"eval_l2ex_gen_len": 16.4502, |
|
"eval_l2ex_rouge1": 28.4409, |
|
"eval_l2ex_rouge2": 14.714, |
|
"eval_l2ex_rougeL": 26.3524, |
|
"eval_l2ex_rougeLsum": 26.38, |
|
"eval_l2g_cer": 81.2622, |
|
"eval_l2g_gen_len": 12.7833, |
|
"eval_l2g_rouge1": 28.7842, |
|
"eval_l2g_rouge2": 15.662, |
|
"eval_l2g_rougeL": 27.3798, |
|
"eval_l2g_rougeLsum": 27.3872, |
|
"eval_loss": 4.086117744445801, |
|
"eval_runtime": 92.0779, |
|
"eval_samples_per_second": 107.746, |
|
"eval_steps_per_second": 0.847, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7509025270758123, |
|
"grad_norm": 280.9309387207031, |
|
"learning_rate": 0.0009999999746484521, |
|
"loss": 4.4853, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9010830324909748, |
|
"grad_norm": 278.86737060546875, |
|
"learning_rate": 0.000999720525233547, |
|
"loss": 4.4543, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.0512635379061372, |
|
"grad_norm": 347.6966857910156, |
|
"learning_rate": 0.0009988930279319957, |
|
"loss": 4.4011, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.2014440433212996, |
|
"grad_norm": 388.53009033203125, |
|
"learning_rate": 0.0009975183907215961, |
|
"loss": 4.3391, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 1.3516245487364622, |
|
"grad_norm": 374.0000915527344, |
|
"learning_rate": 0.0009955981219336306, |
|
"loss": 4.2954, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 1.500361010830325, |
|
"eval_g2l_cer": 60.8741, |
|
"eval_g2l_gen_len": 5.7528, |
|
"eval_g2l_rouge1": 34.1975, |
|
"eval_g2l_rouge2": 23.0519, |
|
"eval_g2l_rougeL": 33.7746, |
|
"eval_g2l_rougeLsum": 33.7503, |
|
"eval_l2ex_cer": 90.0364, |
|
"eval_l2ex_gen_len": 38.0339, |
|
"eval_l2ex_rouge1": 27.7928, |
|
"eval_l2ex_rouge2": 12.8688, |
|
"eval_l2ex_rougeL": 25.3603, |
|
"eval_l2ex_rougeLsum": 25.3866, |
|
"eval_l2g_cer": 95.9503, |
|
"eval_l2g_gen_len": 38.6107, |
|
"eval_l2g_rouge1": 31.3843, |
|
"eval_l2g_rouge2": 20.1215, |
|
"eval_l2g_rougeL": 30.0596, |
|
"eval_l2g_rougeLsum": 30.1295, |
|
"eval_loss": 4.214888095855713, |
|
"eval_runtime": 102.9376, |
|
"eval_samples_per_second": 96.379, |
|
"eval_steps_per_second": 0.758, |
|
"step": 2078 |
|
}, |
|
{ |
|
"epoch": 1.5018050541516246, |
|
"grad_norm": 282.3067932128906, |
|
"learning_rate": 0.0009931343285978396, |
|
"loss": 4.2676, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.651985559566787, |
|
"grad_norm": 429.1705017089844, |
|
"learning_rate": 0.000990129714130466, |
|
"loss": 4.2425, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 1.8021660649819493, |
|
"grad_norm": 342.697509765625, |
|
"learning_rate": 0.0009865875753679114, |
|
"loss": 4.2198, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.952346570397112, |
|
"grad_norm": 287.99896240234375, |
|
"learning_rate": 0.0009825117989492565, |
|
"loss": 4.1951, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 2.1025270758122745, |
|
"grad_norm": 329.3447570800781, |
|
"learning_rate": 0.0009779068570516165, |
|
"loss": 4.137, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 2.2505415162454874, |
|
"eval_g2l_cer": 57.0175, |
|
"eval_g2l_gen_len": 3.4026, |
|
"eval_g2l_rouge1": 33.2248, |
|
"eval_g2l_rouge2": 24.7912, |
|
"eval_g2l_rougeL": 33.1481, |
|
"eval_g2l_rougeLsum": 33.1288, |
|
"eval_l2ex_cer": 83.9364, |
|
"eval_l2ex_gen_len": 17.4142, |
|
"eval_l2ex_rouge1": 30.4747, |
|
"eval_l2ex_rouge2": 15.2972, |
|
"eval_l2ex_rougeL": 27.443, |
|
"eval_l2ex_rougeLsum": 27.5452, |
|
"eval_l2g_cer": 80.6606, |
|
"eval_l2g_gen_len": 14.6152, |
|
"eval_l2g_rouge1": 34.2385, |
|
"eval_l2g_rouge2": 22.2422, |
|
"eval_l2g_rougeL": 32.5658, |
|
"eval_l2g_rougeLsum": 32.6354, |
|
"eval_loss": 3.8766722679138184, |
|
"eval_runtime": 93.2118, |
|
"eval_samples_per_second": 106.435, |
|
"eval_steps_per_second": 0.837, |
|
"step": 3117 |
|
}, |
|
{ |
|
"epoch": 2.252707581227437, |
|
"grad_norm": 373.11468505859375, |
|
"learning_rate": 0.00097277780248301, |
|
"loss": 4.1204, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.4028880866425992, |
|
"grad_norm": 332.1923522949219, |
|
"learning_rate": 0.0009671302631381275, |
|
"loss": 4.1156, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 2.5530685920577616, |
|
"grad_norm": 439.4508361816406, |
|
"learning_rate": 0.0009609704358230791, |
|
"loss": 4.0976, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 2.7032490974729244, |
|
"grad_norm": 283.86175537109375, |
|
"learning_rate": 0.0009543050794559023, |
|
"loss": 4.0829, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 2.853429602888087, |
|
"grad_norm": 373.6076965332031, |
|
"learning_rate": 0.0009471415076502864, |
|
"loss": 4.0567, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 3.00072202166065, |
|
"eval_g2l_cer": 54.3375, |
|
"eval_g2l_gen_len": 5.1478, |
|
"eval_g2l_rouge1": 37.1423, |
|
"eval_g2l_rouge2": 25.8415, |
|
"eval_g2l_rougeL": 36.8263, |
|
"eval_g2l_rougeLsum": 36.799, |
|
"eval_l2ex_cer": 88.2942, |
|
"eval_l2ex_gen_len": 35.3597, |
|
"eval_l2ex_rouge1": 28.7362, |
|
"eval_l2ex_rouge2": 13.0176, |
|
"eval_l2ex_rougeL": 26.1133, |
|
"eval_l2ex_rougeLsum": 26.1476, |
|
"eval_l2g_cer": 85.0531, |
|
"eval_l2g_gen_len": 29.6997, |
|
"eval_l2g_rouge1": 35.6931, |
|
"eval_l2g_rouge2": 23.4243, |
|
"eval_l2g_rougeL": 34.012, |
|
"eval_l2g_rougeLsum": 34.0684, |
|
"eval_loss": 3.9321906566619873, |
|
"eval_runtime": 100.3303, |
|
"eval_samples_per_second": 98.883, |
|
"eval_steps_per_second": 0.777, |
|
"step": 4156 |
|
}, |
|
{ |
|
"epoch": 3.003610108303249, |
|
"grad_norm": 456.342529296875, |
|
"learning_rate": 0.0009394875806906537, |
|
"loss": 4.045, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.1537906137184115, |
|
"grad_norm": 336.9688720703125, |
|
"learning_rate": 0.0009313516969074018, |
|
"loss": 4.0002, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 3.303971119133574, |
|
"grad_norm": 354.8324890136719, |
|
"learning_rate": 0.0009227427834617708, |
|
"loss": 3.9884, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 3.4541516245487367, |
|
"grad_norm": 341.2556457519531, |
|
"learning_rate": 0.0009136702865504465, |
|
"loss": 3.9819, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 3.604332129963899, |
|
"grad_norm": 348.25225830078125, |
|
"learning_rate": 0.0009041441610406489, |
|
"loss": 3.9644, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 3.7509025270758123, |
|
"eval_g2l_cer": 54.7671, |
|
"eval_g2l_gen_len": 3.3446, |
|
"eval_g2l_rouge1": 34.9931, |
|
"eval_g2l_rouge2": 26.209, |
|
"eval_g2l_rougeL": 34.9234, |
|
"eval_g2l_rougeLsum": 34.913, |
|
"eval_l2ex_cer": 85.5203, |
|
"eval_l2ex_gen_len": 20.1264, |
|
"eval_l2ex_rouge1": 30.9385, |
|
"eval_l2ex_rouge2": 15.6373, |
|
"eval_l2ex_rougeL": 28.096, |
|
"eval_l2ex_rougeLsum": 28.1379, |
|
"eval_l2g_cer": 77.9784, |
|
"eval_l2g_gen_len": 14.0847, |
|
"eval_l2g_rouge1": 35.0014, |
|
"eval_l2g_rouge2": 23.2642, |
|
"eval_l2g_rougeL": 33.3426, |
|
"eval_l2g_rougeLsum": 33.385, |
|
"eval_loss": 3.767300844192505, |
|
"eval_runtime": 93.092, |
|
"eval_samples_per_second": 106.572, |
|
"eval_steps_per_second": 0.838, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 3.7545126353790614, |
|
"grad_norm": 282.4262390136719, |
|
"learning_rate": 0.0008941748595470763, |
|
"loss": 3.9561, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.904693140794224, |
|
"grad_norm": 309.5865783691406, |
|
"learning_rate": 0.0008837733209626942, |
|
"loss": 3.9365, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 4.054873646209386, |
|
"grad_norm": 314.7027893066406, |
|
"learning_rate": 0.0008729509584559499, |
|
"loss": 3.9144, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 4.205054151624549, |
|
"grad_norm": 354.8764953613281, |
|
"learning_rate": 0.0008617196469475859, |
|
"loss": 3.871, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 4.355234657039711, |
|
"grad_norm": 318.5345458984375, |
|
"learning_rate": 0.0008500917100807916, |
|
"loss": 3.8729, |
|
"step": 6032 |
|
}, |
|
{ |
|
"epoch": 4.501083032490975, |
|
"eval_g2l_cer": 54.0839, |
|
"eval_g2l_gen_len": 5.259, |
|
"eval_g2l_rouge1": 37.7304, |
|
"eval_g2l_rouge2": 26.5008, |
|
"eval_g2l_rougeL": 37.4145, |
|
"eval_g2l_rougeLsum": 37.389, |
|
"eval_l2ex_cer": 87.7795, |
|
"eval_l2ex_gen_len": 34.444, |
|
"eval_l2ex_rouge1": 29.4565, |
|
"eval_l2ex_rouge2": 13.4704, |
|
"eval_l2ex_rougeL": 26.584, |
|
"eval_l2ex_rougeLsum": 26.6084, |
|
"eval_l2g_cer": 84.0775, |
|
"eval_l2g_gen_len": 27.3501, |
|
"eval_l2g_rouge1": 36.4264, |
|
"eval_l2g_rouge2": 23.9948, |
|
"eval_l2g_rougeL": 34.6234, |
|
"eval_l2g_rougeLsum": 34.6872, |
|
"eval_loss": 3.814581871032715, |
|
"eval_runtime": 99.5293, |
|
"eval_samples_per_second": 99.679, |
|
"eval_steps_per_second": 0.784, |
|
"step": 6234 |
|
}, |
|
{ |
|
"epoch": 4.505415162454874, |
|
"grad_norm": 302.9512634277344, |
|
"learning_rate": 0.0008380799066989898, |
|
"loss": 3.8677, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.6555956678700365, |
|
"grad_norm": 250.16696166992188, |
|
"learning_rate": 0.0008256974168460981, |
|
"loss": 3.8378, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 4.8057761732851985, |
|
"grad_norm": 348.5408935546875, |
|
"learning_rate": 0.0008129578273046243, |
|
"loss": 3.852, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 4.955956678700361, |
|
"grad_norm": 355.0418701171875, |
|
"learning_rate": 0.0007998751166874639, |
|
"loss": 3.845, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 5.106137184115523, |
|
"grad_norm": 349.153564453125, |
|
"learning_rate": 0.0007864636400997593, |
|
"loss": 3.799, |
|
"step": 7072 |
|
}, |
|
{ |
|
"epoch": 5.251263537906137, |
|
"eval_g2l_cer": 52.3635, |
|
"eval_g2l_gen_len": 3.512, |
|
"eval_g2l_rouge1": 37.4372, |
|
"eval_g2l_rouge2": 27.515, |
|
"eval_g2l_rougeL": 37.3252, |
|
"eval_g2l_rougeLsum": 37.3643, |
|
"eval_l2ex_cer": 81.2205, |
|
"eval_l2ex_gen_len": 18.2107, |
|
"eval_l2ex_rouge1": 32.8709, |
|
"eval_l2ex_rouge2": 16.7456, |
|
"eval_l2ex_rougeL": 29.6388, |
|
"eval_l2ex_rougeLsum": 29.717, |
|
"eval_l2g_cer": 73.2203, |
|
"eval_l2g_gen_len": 13.7837, |
|
"eval_l2g_rouge1": 37.1711, |
|
"eval_l2g_rouge2": 24.5593, |
|
"eval_l2g_rougeL": 35.3326, |
|
"eval_l2g_rougeLsum": 35.3608, |
|
"eval_loss": 3.6153602600097656, |
|
"eval_runtime": 92.481, |
|
"eval_samples_per_second": 107.276, |
|
"eval_steps_per_second": 0.843, |
|
"step": 7273 |
|
}, |
|
{ |
|
"epoch": 5.256317689530686, |
|
"grad_norm": 313.629638671875, |
|
"learning_rate": 0.0007727381133876502, |
|
"loss": 3.7717, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 5.406498194945849, |
|
"grad_norm": 366.9100036621094, |
|
"learning_rate": 0.0007587135969911982, |
|
"loss": 3.7698, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 5.556678700361011, |
|
"grad_norm": 405.91522216796875, |
|
"learning_rate": 0.000744405479419202, |
|
"loss": 3.7686, |
|
"step": 7696 |
|
}, |
|
{ |
|
"epoch": 5.706859205776174, |
|
"grad_norm": 323.0384521484375, |
|
"learning_rate": 0.000729829460364038, |
|
"loss": 3.7567, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 5.8570397111913355, |
|
"grad_norm": 236.5808563232422, |
|
"learning_rate": 0.0007150015334750512, |
|
"loss": 3.7504, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 6.0014440433213, |
|
"eval_g2l_cer": 52.8799, |
|
"eval_g2l_gen_len": 4.9953, |
|
"eval_g2l_rouge1": 38.9469, |
|
"eval_g2l_rouge2": 27.4705, |
|
"eval_g2l_rougeL": 38.7013, |
|
"eval_g2l_rougeLsum": 38.6638, |
|
"eval_l2ex_cer": 83.8059, |
|
"eval_l2ex_gen_len": 29.591, |
|
"eval_l2ex_rouge1": 31.5224, |
|
"eval_l2ex_rouge2": 15.0063, |
|
"eval_l2ex_rougeL": 28.1947, |
|
"eval_l2ex_rougeLsum": 28.2288, |
|
"eval_l2g_cer": 80.9099, |
|
"eval_l2g_gen_len": 24.6797, |
|
"eval_l2g_rouge1": 37.5363, |
|
"eval_l2g_rouge2": 24.8417, |
|
"eval_l2g_rougeL": 35.6194, |
|
"eval_l2g_rougeLsum": 35.6546, |
|
"eval_loss": 3.7020649909973145, |
|
"eval_runtime": 98.5066, |
|
"eval_samples_per_second": 100.714, |
|
"eval_steps_per_second": 0.792, |
|
"step": 8312 |
|
}, |
|
{ |
|
"epoch": 6.007220216606498, |
|
"grad_norm": 265.8843994140625, |
|
"learning_rate": 0.000699937968809401, |
|
"loss": 3.7525, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 6.15740072202166, |
|
"grad_norm": 291.2859802246094, |
|
"learning_rate": 0.0006846552949796146, |
|
"loss": 3.6905, |
|
"step": 8528 |
|
}, |
|
{ |
|
"epoch": 6.307581227436823, |
|
"grad_norm": 268.9134826660156, |
|
"learning_rate": 0.0006691702810174398, |
|
"loss": 3.6939, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 6.457761732851986, |
|
"grad_norm": 292.39154052734375, |
|
"learning_rate": 0.0006534999179738952, |
|
"loss": 3.7019, |
|
"step": 8944 |
|
}, |
|
{ |
|
"epoch": 6.607942238267148, |
|
"grad_norm": 298.4355773925781, |
|
"learning_rate": 0.0006376614002757093, |
|
"loss": 3.6892, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 6.751624548736462, |
|
"eval_g2l_cer": 51.8358, |
|
"eval_g2l_gen_len": 3.5613, |
|
"eval_g2l_rouge1": 37.7745, |
|
"eval_g2l_rouge2": 27.9843, |
|
"eval_g2l_rougeL": 37.6788, |
|
"eval_g2l_rougeLsum": 37.6728, |
|
"eval_l2ex_cer": 84.3919, |
|
"eval_l2ex_gen_len": 20.5961, |
|
"eval_l2ex_rouge1": 32.2651, |
|
"eval_l2ex_rouge2": 16.2957, |
|
"eval_l2ex_rougeL": 28.9932, |
|
"eval_l2ex_rougeLsum": 29.0868, |
|
"eval_l2g_cer": 75.9064, |
|
"eval_l2g_gen_len": 14.9821, |
|
"eval_l2g_rouge1": 37.3147, |
|
"eval_l2g_rouge2": 24.7819, |
|
"eval_l2g_rougeL": 35.4462, |
|
"eval_l2g_rougeLsum": 35.4671, |
|
"eval_loss": 3.5767345428466797, |
|
"eval_runtime": 93.796, |
|
"eval_samples_per_second": 105.772, |
|
"eval_steps_per_second": 0.832, |
|
"step": 9351 |
|
}, |
|
{ |
|
"epoch": 6.758122743682311, |
|
"grad_norm": 257.7994689941406, |
|
"learning_rate": 0.0006216721068586019, |
|
"loss": 3.6827, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 6.908303249097473, |
|
"grad_norm": 264.89263916015625, |
|
"learning_rate": 0.0006055495820981136, |
|
"loss": 3.6662, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 7.058483754512635, |
|
"grad_norm": 289.5307312011719, |
|
"learning_rate": 0.0005893115165589047, |
|
"loss": 3.6482, |
|
"step": 9776 |
|
}, |
|
{ |
|
"epoch": 7.208664259927798, |
|
"grad_norm": 254.08291625976562, |
|
"learning_rate": 0.0005729757275836455, |
|
"loss": 3.6198, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 7.35884476534296, |
|
"grad_norm": 181.30072021484375, |
|
"learning_rate": 0.0005565601397428012, |
|
"loss": 3.6104, |
|
"step": 10192 |
|
}, |
|
{ |
|
"epoch": 7.501805054151625, |
|
"eval_g2l_cer": 52.672, |
|
"eval_g2l_gen_len": 4.8856, |
|
"eval_g2l_rouge1": 39.792, |
|
"eval_g2l_rouge2": 28.4504, |
|
"eval_g2l_rougeL": 39.5411, |
|
"eval_g2l_rougeLsum": 39.532, |
|
"eval_l2ex_cer": 87.7735, |
|
"eval_l2ex_gen_len": 32.8952, |
|
"eval_l2ex_rouge1": 30.3306, |
|
"eval_l2ex_rouge2": 14.2701, |
|
"eval_l2ex_rougeL": 27.0806, |
|
"eval_l2ex_rougeLsum": 27.0923, |
|
"eval_l2g_cer": 83.3818, |
|
"eval_l2g_gen_len": 25.6912, |
|
"eval_l2g_rouge1": 37.5685, |
|
"eval_l2g_rouge2": 25.026, |
|
"eval_l2g_rougeL": 35.6593, |
|
"eval_l2g_rougeLsum": 35.7335, |
|
"eval_loss": 3.6247808933258057, |
|
"eval_runtime": 97.6949, |
|
"eval_samples_per_second": 101.551, |
|
"eval_steps_per_second": 0.798, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 7.509025270758123, |
|
"grad_norm": 213.4962158203125, |
|
"learning_rate": 0.0005400827651667568, |
|
"loss": 3.6116, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.659205776173286, |
|
"grad_norm": 200.51080322265625, |
|
"learning_rate": 0.0005235616837818695, |
|
"loss": 3.613, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 7.809386281588448, |
|
"grad_norm": 210.73484802246094, |
|
"learning_rate": 0.0005070150234721305, |
|
"loss": 3.5998, |
|
"step": 10816 |
|
}, |
|
{ |
|
"epoch": 7.95956678700361, |
|
"grad_norm": 245.5376434326172, |
|
"learning_rate": 0.0004904609401882021, |
|
"loss": 3.6003, |
|
"step": 11024 |
|
}, |
|
{ |
|
"epoch": 8.109747292418772, |
|
"grad_norm": 234.57310485839844, |
|
"learning_rate": 0.00047391759802566346, |
|
"loss": 3.5769, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 8.251985559566787, |
|
"eval_g2l_cer": 51.2806, |
|
"eval_g2l_gen_len": 4.0062, |
|
"eval_g2l_rouge1": 39.5838, |
|
"eval_g2l_rouge2": 28.8731, |
|
"eval_g2l_rougeL": 39.3964, |
|
"eval_g2l_rougeLsum": 39.3929, |
|
"eval_l2ex_cer": 81.177, |
|
"eval_l2ex_gen_len": 21.2641, |
|
"eval_l2ex_rouge1": 33.1446, |
|
"eval_l2ex_rouge2": 16.6249, |
|
"eval_l2ex_rougeL": 29.723, |
|
"eval_l2ex_rougeLsum": 29.7703, |
|
"eval_l2g_cer": 73.5184, |
|
"eval_l2g_gen_len": 15.4612, |
|
"eval_l2g_rouge1": 38.7817, |
|
"eval_l2g_rouge2": 25.5669, |
|
"eval_l2g_rougeL": 36.728, |
|
"eval_l2g_rougeLsum": 36.7794, |
|
"eval_loss": 3.502086639404297, |
|
"eval_runtime": 94.3872, |
|
"eval_samples_per_second": 105.11, |
|
"eval_steps_per_second": 0.826, |
|
"step": 11429 |
|
}, |
|
{ |
|
"epoch": 8.259927797833935, |
|
"grad_norm": 212.4471435546875, |
|
"learning_rate": 0.00045740314929431514, |
|
"loss": 3.5588, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 8.410108303249098, |
|
"grad_norm": 227.54701232910156, |
|
"learning_rate": 0.000440935714600417, |
|
"loss": 3.5575, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 8.56028880866426, |
|
"grad_norm": 175.19906616210938, |
|
"learning_rate": 0.00042453336296371426, |
|
"loss": 3.5425, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 8.710469314079422, |
|
"grad_norm": 197.85183715820312, |
|
"learning_rate": 0.0004082140919910653, |
|
"loss": 3.5516, |
|
"step": 12064 |
|
}, |
|
{ |
|
"epoch": 8.860649819494585, |
|
"grad_norm": 174.2976531982422, |
|
"learning_rate": 0.00039199580812842894, |
|
"loss": 3.5262, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.00216606498195, |
|
"eval_g2l_cer": 52.3453, |
|
"eval_g2l_gen_len": 4.6585, |
|
"eval_g2l_rouge1": 39.8411, |
|
"eval_g2l_rouge2": 28.7875, |
|
"eval_g2l_rougeL": 39.7115, |
|
"eval_g2l_rougeLsum": 39.6779, |
|
"eval_l2ex_cer": 84.2856, |
|
"eval_l2ex_gen_len": 28.0884, |
|
"eval_l2ex_rouge1": 31.9562, |
|
"eval_l2ex_rouge2": 15.2366, |
|
"eval_l2ex_rougeL": 28.4418, |
|
"eval_l2ex_rougeLsum": 28.4937, |
|
"eval_l2g_cer": 82.0714, |
|
"eval_l2g_gen_len": 22.9518, |
|
"eval_l2g_rouge1": 38.3811, |
|
"eval_l2g_rouge2": 25.3667, |
|
"eval_l2g_rougeL": 36.2921, |
|
"eval_l2g_rougeLsum": 36.3599, |
|
"eval_loss": 3.542821168899536, |
|
"eval_runtime": 97.8166, |
|
"eval_samples_per_second": 101.424, |
|
"eval_steps_per_second": 0.797, |
|
"step": 12468 |
|
}, |
|
{ |
|
"epoch": 9.010830324909747, |
|
"grad_norm": 148.35972595214844, |
|
"learning_rate": 0.00037589630701287847, |
|
"loss": 3.5412, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 9.16101083032491, |
|
"grad_norm": 181.6919708251953, |
|
"learning_rate": 0.00035993325394620217, |
|
"loss": 3.4914, |
|
"step": 12688 |
|
}, |
|
{ |
|
"epoch": 9.311191335740073, |
|
"grad_norm": 218.32742309570312, |
|
"learning_rate": 0.00034412416451151385, |
|
"loss": 3.5073, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 9.461371841155234, |
|
"grad_norm": 144.4810791015625, |
|
"learning_rate": 0.0003284863853541441, |
|
"loss": 3.5023, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 9.611552346570397, |
|
"grad_norm": 204.1243896484375, |
|
"learning_rate": 0.00031303707514790077, |
|
"loss": 3.4959, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 9.752346570397112, |
|
"eval_g2l_cer": 50.963, |
|
"eval_g2l_gen_len": 3.9938, |
|
"eval_g2l_rouge1": 39.9737, |
|
"eval_g2l_rouge2": 29.4419, |
|
"eval_g2l_rougeL": 39.8196, |
|
"eval_g2l_rougeLsum": 39.8028, |
|
"eval_l2ex_cer": 81.2748, |
|
"eval_l2ex_gen_len": 22.4378, |
|
"eval_l2ex_rouge1": 33.2444, |
|
"eval_l2ex_rouge2": 16.7607, |
|
"eval_l2ex_rougeL": 29.9288, |
|
"eval_l2ex_rougeLsum": 29.9861, |
|
"eval_l2g_cer": 74.1059, |
|
"eval_l2g_gen_len": 15.8848, |
|
"eval_l2g_rouge1": 38.9415, |
|
"eval_l2g_rouge2": 25.7151, |
|
"eval_l2g_rougeL": 36.9163, |
|
"eval_l2g_rougeLsum": 36.9791, |
|
"eval_loss": 3.475815534591675, |
|
"eval_runtime": 94.3857, |
|
"eval_samples_per_second": 105.111, |
|
"eval_steps_per_second": 0.826, |
|
"step": 13507 |
|
}, |
|
{ |
|
"epoch": 9.76173285198556, |
|
"grad_norm": 156.18637084960938, |
|
"learning_rate": 0.0002977931857675812, |
|
"loss": 3.4928, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 9.911913357400723, |
|
"grad_norm": 116.77749633789062, |
|
"learning_rate": 0.00028277144368839654, |
|
"loss": 3.4805, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 10.062093862815885, |
|
"grad_norm": 147.93405151367188, |
|
"learning_rate": 0.00026798833163271856, |
|
"loss": 3.4859, |
|
"step": 13936 |
|
}, |
|
{ |
|
"epoch": 10.212274368231046, |
|
"grad_norm": 129.8922882080078, |
|
"learning_rate": 0.0002534600704842848, |
|
"loss": 3.4512, |
|
"step": 14144 |
|
}, |
|
{ |
|
"epoch": 10.36245487364621, |
|
"grad_norm": 129.17861938476562, |
|
"learning_rate": 0.00023920260148970935, |
|
"loss": 3.4679, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 10.502527075812274, |
|
"eval_g2l_cer": 51.589, |
|
"eval_g2l_gen_len": 4.615, |
|
"eval_g2l_rouge1": 40.3572, |
|
"eval_g2l_rouge2": 29.0165, |
|
"eval_g2l_rougeL": 40.1696, |
|
"eval_g2l_rougeLsum": 40.127, |
|
"eval_l2ex_cer": 83.3939, |
|
"eval_l2ex_gen_len": 27.2939, |
|
"eval_l2ex_rouge1": 32.0, |
|
"eval_l2ex_rouge2": 15.2047, |
|
"eval_l2ex_rougeL": 28.4221, |
|
"eval_l2ex_rougeLsum": 28.4944, |
|
"eval_l2g_cer": 78.1523, |
|
"eval_l2g_gen_len": 21.5805, |
|
"eval_l2g_rouge1": 38.8011, |
|
"eval_l2g_rouge2": 25.5645, |
|
"eval_l2g_rougeL": 36.5851, |
|
"eval_l2g_rougeLsum": 36.6686, |
|
"eval_loss": 3.490727663040161, |
|
"eval_runtime": 97.0968, |
|
"eval_samples_per_second": 102.176, |
|
"eval_steps_per_second": 0.803, |
|
"step": 14546 |
|
}, |
|
{ |
|
"epoch": 10.512635379061372, |
|
"grad_norm": 120.92171478271484, |
|
"learning_rate": 0.00022523156876682566, |
|
"loss": 3.4559, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 10.662815884476535, |
|
"grad_norm": 72.50321960449219, |
|
"learning_rate": 0.00021156230213905934, |
|
"loss": 3.4527, |
|
"step": 14768 |
|
}, |
|
{ |
|
"epoch": 10.812996389891698, |
|
"grad_norm": 29.285614013671875, |
|
"learning_rate": 0.0001982098003146605, |
|
"loss": 3.445, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 10.963176895306859, |
|
"grad_norm": 139.87692260742188, |
|
"learning_rate": 0.00018518871442925475, |
|
"loss": 3.4474, |
|
"step": 15184 |
|
}, |
|
{ |
|
"epoch": 11.113357400722021, |
|
"grad_norm": 115.22322845458984, |
|
"learning_rate": 0.0001725133319697725, |
|
"loss": 3.444, |
|
"step": 15392 |
|
}, |
|
{ |
|
"epoch": 11.252707581227437, |
|
"eval_g2l_cer": 50.9127, |
|
"eval_g2l_gen_len": 4.3921, |
|
"eval_g2l_rouge1": 41.0139, |
|
"eval_g2l_rouge2": 29.6136, |
|
"eval_g2l_rougeL": 40.8151, |
|
"eval_g2l_rougeLsum": 40.8006, |
|
"eval_l2ex_cer": 85.5529, |
|
"eval_l2ex_gen_len": 28.0987, |
|
"eval_l2ex_rouge1": 31.8781, |
|
"eval_l2ex_rouge2": 15.2454, |
|
"eval_l2ex_rougeL": 28.3874, |
|
"eval_l2ex_rougeLsum": 28.4594, |
|
"eval_l2g_cer": 79.5578, |
|
"eval_l2g_gen_len": 20.9953, |
|
"eval_l2g_rouge1": 38.8463, |
|
"eval_l2g_rouge2": 25.6991, |
|
"eval_l2g_rougeL": 36.7062, |
|
"eval_l2g_rougeLsum": 36.784, |
|
"eval_loss": 3.4728221893310547, |
|
"eval_runtime": 95.7364, |
|
"eval_samples_per_second": 103.628, |
|
"eval_steps_per_second": 0.815, |
|
"step": 15585 |
|
}, |
|
{ |
|
"epoch": 11.263537906137184, |
|
"grad_norm": 90.60981750488281, |
|
"learning_rate": 0.000160197561097393, |
|
"loss": 3.4291, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 11.413718411552347, |
|
"grad_norm": 84.94570922851562, |
|
"learning_rate": 0.00014825491538670787, |
|
"loss": 3.4392, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 11.56389891696751, |
|
"grad_norm": 74.72296142578125, |
|
"learning_rate": 0.00013669849899784683, |
|
"loss": 3.4338, |
|
"step": 16016 |
|
}, |
|
{ |
|
"epoch": 11.714079422382671, |
|
"grad_norm": 73.01377868652344, |
|
"learning_rate": 0.0001255409922978368, |
|
"loss": 3.4147, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 11.864259927797834, |
|
"grad_norm": 29.206298828125, |
|
"learning_rate": 0.00011479463794697153, |
|
"loss": 3.4327, |
|
"step": 16432 |
|
}, |
|
{ |
|
"epoch": 12.0028880866426, |
|
"eval_g2l_cer": 51.0704, |
|
"eval_g2l_gen_len": 4.428, |
|
"eval_g2l_rouge1": 40.8383, |
|
"eval_g2l_rouge2": 29.5544, |
|
"eval_g2l_rougeL": 40.6824, |
|
"eval_g2l_rougeLsum": 40.648, |
|
"eval_l2ex_cer": 85.2521, |
|
"eval_l2ex_gen_len": 27.6208, |
|
"eval_l2ex_rouge1": 31.8072, |
|
"eval_l2ex_rouge2": 15.1802, |
|
"eval_l2ex_rougeL": 28.2533, |
|
"eval_l2ex_rougeLsum": 28.3249, |
|
"eval_l2g_cer": 78.2445, |
|
"eval_l2g_gen_len": 20.4718, |
|
"eval_l2g_rouge1": 38.9972, |
|
"eval_l2g_rouge2": 25.8126, |
|
"eval_l2g_rougeL": 36.8133, |
|
"eval_l2g_rougeLsum": 36.8913, |
|
"eval_loss": 3.460233211517334, |
|
"eval_runtime": 96.6773, |
|
"eval_samples_per_second": 102.62, |
|
"eval_steps_per_second": 0.807, |
|
"step": 16624 |
|
}, |
|
{ |
|
"epoch": 12.014440433212997, |
|
"grad_norm": 75.82231140136719, |
|
"learning_rate": 0.00010447122746545834, |
|
"loss": 3.4103, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 12.16462093862816, |
|
"grad_norm": 68.91098022460938, |
|
"learning_rate": 9.458208829508076e-05, |
|
"loss": 3.4193, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 12.31480144404332, |
|
"grad_norm": 53.81922149658203, |
|
"learning_rate": 8.513807137007677e-05, |
|
"loss": 3.3993, |
|
"step": 17056 |
|
}, |
|
{ |
|
"epoch": 12.464981949458483, |
|
"grad_norm": 9.850875854492188, |
|
"learning_rate": 7.61495392108672e-05, |
|
"loss": 3.401, |
|
"step": 17264 |
|
}, |
|
{ |
|
"epoch": 12.615162454873646, |
|
"grad_norm": 36.068912506103516, |
|
"learning_rate": 6.762635455369967e-05, |
|
"loss": 3.4024, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 12.753068592057762, |
|
"eval_g2l_cer": 50.4581, |
|
"eval_g2l_gen_len": 4.2626, |
|
"eval_g2l_rouge1": 40.828, |
|
"eval_g2l_rouge2": 29.681, |
|
"eval_g2l_rougeL": 40.7031, |
|
"eval_g2l_rougeLsum": 40.6328, |
|
"eval_l2ex_cer": 83.6513, |
|
"eval_l2ex_gen_len": 25.1151, |
|
"eval_l2ex_rouge1": 32.7247, |
|
"eval_l2ex_rouge2": 15.913, |
|
"eval_l2ex_rougeL": 29.2133, |
|
"eval_l2ex_rougeLsum": 29.2625, |
|
"eval_l2g_cer": 76.3987, |
|
"eval_l2g_gen_len": 18.6672, |
|
"eval_l2g_rouge1": 39.0308, |
|
"eval_l2g_rouge2": 25.8759, |
|
"eval_l2g_rougeL": 36.9076, |
|
"eval_l2g_rougeLsum": 36.965, |
|
"eval_loss": 3.4478375911712646, |
|
"eval_runtime": 94.7036, |
|
"eval_samples_per_second": 104.758, |
|
"eval_steps_per_second": 0.824, |
|
"step": 17663 |
|
} |
|
], |
|
"logging_steps": 208, |
|
"max_steps": 20775, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1039, |
|
"total_flos": 1.0504606235964211e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|