|
{ |
|
"best_metric": 3.1630301475524902, |
|
"best_model_checkpoint": "checkpoints/it5-large/checkpoint-78926", |
|
"epoch": 14.251715420729505, |
|
"eval_steps": 4154, |
|
"global_step": 78926, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14987360057782592, |
|
"eval_g2l_cer": 67.2645, |
|
"eval_g2l_gen_len": 4.5733, |
|
"eval_g2l_rouge1": 27.1595, |
|
"eval_g2l_rouge2": 15.5941, |
|
"eval_g2l_rougeL": 26.9535, |
|
"eval_g2l_rougeLsum": 26.9576, |
|
"eval_l2ex_cer": 130.3597, |
|
"eval_l2ex_gen_len": 47.8171, |
|
"eval_l2ex_rouge1": 22.1003, |
|
"eval_l2ex_rouge2": 9.5437, |
|
"eval_l2ex_rougeL": 20.2017, |
|
"eval_l2ex_rougeLsum": 19.2847, |
|
"eval_l2g_cer": 106.9099, |
|
"eval_l2g_gen_len": 30.346, |
|
"eval_l2g_rouge1": 27.2135, |
|
"eval_l2g_rouge2": 14.1149, |
|
"eval_l2g_rougeL": 25.3922, |
|
"eval_l2g_rougeLsum": 25.3986, |
|
"eval_loss": 3.804034948348999, |
|
"eval_runtime": 310.7233, |
|
"eval_samples_per_second": 31.929, |
|
"eval_steps_per_second": 0.502, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.15005417118093176, |
|
"grad_norm": 129.91856384277344, |
|
"learning_rate": 6.004335260115606e-05, |
|
"loss": 4.6945, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.3001083423618635, |
|
"grad_norm": 190.0506591796875, |
|
"learning_rate": 0.00012008670520231212, |
|
"loss": 3.8417, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 0.45016251354279524, |
|
"grad_norm": 208.42779541015625, |
|
"learning_rate": 0.0001801300578034682, |
|
"loss": 3.712, |
|
"step": 2493 |
|
}, |
|
{ |
|
"epoch": 0.600216684723727, |
|
"grad_norm": 293.423583984375, |
|
"learning_rate": 0.00024017341040462423, |
|
"loss": 3.6763, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 0.7500902853015529, |
|
"eval_g2l_cer": 53.5059, |
|
"eval_g2l_gen_len": 3.5087, |
|
"eval_g2l_rouge1": 37.5417, |
|
"eval_g2l_rouge2": 29.1384, |
|
"eval_g2l_rougeL": 37.463, |
|
"eval_g2l_rougeLsum": 37.4022, |
|
"eval_l2ex_cer": 102.0708, |
|
"eval_l2ex_gen_len": 25.9866, |
|
"eval_l2ex_rouge1": 26.7853, |
|
"eval_l2ex_rouge2": 12.9071, |
|
"eval_l2ex_rougeL": 24.0724, |
|
"eval_l2ex_rougeLsum": 24.0445, |
|
"eval_l2g_cer": 86.4648, |
|
"eval_l2g_gen_len": 15.0081, |
|
"eval_l2g_rouge1": 30.7776, |
|
"eval_l2g_rouge2": 18.1789, |
|
"eval_l2g_rougeL": 29.1675, |
|
"eval_l2g_rougeLsum": 29.2136, |
|
"eval_loss": 3.5662293434143066, |
|
"eval_runtime": 296.355, |
|
"eval_samples_per_second": 33.477, |
|
"eval_steps_per_second": 0.526, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.7502708559046587, |
|
"grad_norm": 500.86932373046875, |
|
"learning_rate": 0.0002999999989317841, |
|
"loss": 3.6694, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.9003250270855905, |
|
"grad_norm": 378.1305236816406, |
|
"learning_rate": 0.00029991745158829114, |
|
"loss": 3.641, |
|
"step": 4986 |
|
}, |
|
{ |
|
"epoch": 1.0503791982665223, |
|
"grad_norm": 309.59503173828125, |
|
"learning_rate": 0.0002996710832786393, |
|
"loss": 3.6169, |
|
"step": 5817 |
|
}, |
|
{ |
|
"epoch": 1.200433369447454, |
|
"grad_norm": 364.1076965332031, |
|
"learning_rate": 0.00029926116366930635, |
|
"loss": 3.5732, |
|
"step": 6648 |
|
}, |
|
{ |
|
"epoch": 1.3504875406283858, |
|
"grad_norm": 402.1815490722656, |
|
"learning_rate": 0.00029868814144453027, |
|
"loss": 3.5547, |
|
"step": 7479 |
|
}, |
|
{ |
|
"epoch": 1.500180570603106, |
|
"eval_g2l_cer": 49.6927, |
|
"eval_g2l_gen_len": 4.4371, |
|
"eval_g2l_rouge1": 42.6629, |
|
"eval_g2l_rouge2": 32.7133, |
|
"eval_g2l_rougeL": 42.5078, |
|
"eval_g2l_rougeLsum": 42.487, |
|
"eval_l2ex_cer": 85.9069, |
|
"eval_l2ex_gen_len": 27.3155, |
|
"eval_l2ex_rouge1": 31.0018, |
|
"eval_l2ex_rouge2": 14.7792, |
|
"eval_l2ex_rougeL": 27.5259, |
|
"eval_l2ex_rougeLsum": 27.5817, |
|
"eval_l2g_cer": 76.6936, |
|
"eval_l2g_gen_len": 19.6286, |
|
"eval_l2g_rouge1": 38.3213, |
|
"eval_l2g_rouge2": 24.5167, |
|
"eval_l2g_rougeL": 36.1971, |
|
"eval_l2g_rougeLsum": 36.2764, |
|
"eval_loss": 3.4418885707855225, |
|
"eval_runtime": 302.3378, |
|
"eval_samples_per_second": 32.814, |
|
"eval_steps_per_second": 0.516, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 1.5005417118093174, |
|
"grad_norm": 380.1324157714844, |
|
"learning_rate": 0.0002979526438151941, |
|
"loss": 3.533, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.6505958829902492, |
|
"grad_norm": 358.8219909667969, |
|
"learning_rate": 0.0002970554758323025, |
|
"loss": 3.5167, |
|
"step": 9141 |
|
}, |
|
{ |
|
"epoch": 1.800650054171181, |
|
"grad_norm": 323.29583740234375, |
|
"learning_rate": 0.0002959976195057994, |
|
"loss": 3.5114, |
|
"step": 9972 |
|
}, |
|
{ |
|
"epoch": 1.9507042253521125, |
|
"grad_norm": 283.41790771484375, |
|
"learning_rate": 0.00029478023272969345, |
|
"loss": 3.4955, |
|
"step": 10803 |
|
}, |
|
{ |
|
"epoch": 2.1007583965330445, |
|
"grad_norm": 326.611572265625, |
|
"learning_rate": 0.0002934046480146657, |
|
"loss": 3.4415, |
|
"step": 11634 |
|
}, |
|
{ |
|
"epoch": 2.2502708559046587, |
|
"eval_g2l_cer": 49.446, |
|
"eval_g2l_gen_len": 4.5047, |
|
"eval_g2l_rouge1": 43.91, |
|
"eval_g2l_rouge2": 33.662, |
|
"eval_g2l_rougeL": 43.778, |
|
"eval_g2l_rougeLsum": 43.7883, |
|
"eval_l2ex_cer": 86.4808, |
|
"eval_l2ex_gen_len": 30.4358, |
|
"eval_l2ex_rouge1": 30.7974, |
|
"eval_l2ex_rouge2": 14.4266, |
|
"eval_l2ex_rougeL": 27.2278, |
|
"eval_l2ex_rougeLsum": 27.3219, |
|
"eval_l2g_cer": 84.1694, |
|
"eval_l2g_gen_len": 24.5493, |
|
"eval_l2g_rouge1": 38.1858, |
|
"eval_l2g_rouge2": 25.1392, |
|
"eval_l2g_rougeL": 36.1473, |
|
"eval_l2g_rougeLsum": 36.1987, |
|
"eval_loss": 3.374830961227417, |
|
"eval_runtime": 306.8842, |
|
"eval_samples_per_second": 32.328, |
|
"eval_steps_per_second": 0.508, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 2.250812567713976, |
|
"grad_norm": 368.7247314453125, |
|
"learning_rate": 0.0002918723710295482, |
|
"loss": 3.424, |
|
"step": 12465 |
|
}, |
|
{ |
|
"epoch": 2.400866738894908, |
|
"grad_norm": 468.8291320800781, |
|
"learning_rate": 0.00029018507895326985, |
|
"loss": 3.4228, |
|
"step": 13296 |
|
}, |
|
{ |
|
"epoch": 2.5509209100758397, |
|
"grad_norm": 266.9640808105469, |
|
"learning_rate": 0.00028834461863907226, |
|
"loss": 3.4152, |
|
"step": 14127 |
|
}, |
|
{ |
|
"epoch": 2.7009750812567717, |
|
"grad_norm": 237.8248748779297, |
|
"learning_rate": 0.0002863530045930063, |
|
"loss": 3.4187, |
|
"step": 14958 |
|
}, |
|
{ |
|
"epoch": 2.851029252437703, |
|
"grad_norm": 371.8949890136719, |
|
"learning_rate": 0.00028421241676892145, |
|
"loss": 3.3965, |
|
"step": 15789 |
|
}, |
|
{ |
|
"epoch": 3.0003611412062114, |
|
"eval_g2l_cer": 48.3881, |
|
"eval_g2l_gen_len": 4.4987, |
|
"eval_g2l_rouge1": 44.8641, |
|
"eval_g2l_rouge2": 34.4032, |
|
"eval_g2l_rougeL": 44.6885, |
|
"eval_g2l_rougeLsum": 44.711, |
|
"eval_l2ex_cer": 89.7331, |
|
"eval_l2ex_gen_len": 30.1634, |
|
"eval_l2ex_rouge1": 30.5096, |
|
"eval_l2ex_rouge2": 14.191, |
|
"eval_l2ex_rougeL": 26.9741, |
|
"eval_l2ex_rougeLsum": 27.0965, |
|
"eval_l2g_cer": 81.1389, |
|
"eval_l2g_gen_len": 23.1439, |
|
"eval_l2g_rouge1": 39.3934, |
|
"eval_l2g_rouge2": 25.9597, |
|
"eval_l2g_rougeL": 37.0903, |
|
"eval_l2g_rougeLsum": 37.1641, |
|
"eval_loss": 3.325451612472534, |
|
"eval_runtime": 304.8315, |
|
"eval_samples_per_second": 32.546, |
|
"eval_steps_per_second": 0.512, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 3.001083423618635, |
|
"grad_norm": 298.2984619140625, |
|
"learning_rate": 0.0002819251981823618, |
|
"loss": 3.3917, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 3.151137594799567, |
|
"grad_norm": 318.9201354980469, |
|
"learning_rate": 0.00027949385234597935, |
|
"loss": 3.3406, |
|
"step": 17451 |
|
}, |
|
{ |
|
"epoch": 3.3011917659804983, |
|
"grad_norm": 286.3103942871094, |
|
"learning_rate": 0.0002769210405292737, |
|
"loss": 3.3328, |
|
"step": 18282 |
|
}, |
|
{ |
|
"epoch": 3.4512459371614304, |
|
"grad_norm": 352.0337829589844, |
|
"learning_rate": 0.0002742095788456554, |
|
"loss": 3.3333, |
|
"step": 19113 |
|
}, |
|
{ |
|
"epoch": 3.601300108342362, |
|
"grad_norm": 210.31089782714844, |
|
"learning_rate": 0.0002713624351700232, |
|
"loss": 3.3251, |
|
"step": 19944 |
|
}, |
|
{ |
|
"epoch": 3.7504514265077646, |
|
"eval_g2l_cer": 47.0881, |
|
"eval_g2l_gen_len": 4.2162, |
|
"eval_g2l_rouge1": 45.6068, |
|
"eval_g2l_rouge2": 34.9617, |
|
"eval_g2l_rougeL": 45.369, |
|
"eval_g2l_rougeLsum": 45.3992, |
|
"eval_l2ex_cer": 87.2057, |
|
"eval_l2ex_gen_len": 25.9743, |
|
"eval_l2ex_rouge1": 32.044, |
|
"eval_l2ex_rouge2": 15.4907, |
|
"eval_l2ex_rougeL": 28.2386, |
|
"eval_l2ex_rougeLsum": 28.3364, |
|
"eval_l2g_cer": 81.5351, |
|
"eval_l2g_gen_len": 20.6293, |
|
"eval_l2g_rouge1": 39.7177, |
|
"eval_l2g_rouge2": 26.6455, |
|
"eval_l2g_rougeL": 37.5652, |
|
"eval_l2g_rougeLsum": 37.5978, |
|
"eval_loss": 3.267240047454834, |
|
"eval_runtime": 303.1609, |
|
"eval_samples_per_second": 32.725, |
|
"eval_steps_per_second": 0.515, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 3.7513542795232935, |
|
"grad_norm": 240.68614196777344, |
|
"learning_rate": 0.0002683827258902275, |
|
"loss": 3.3215, |
|
"step": 20775 |
|
}, |
|
{ |
|
"epoch": 3.9014084507042255, |
|
"grad_norm": 203.9857177734375, |
|
"learning_rate": 0.0002652737124959771, |
|
"loss": 3.311, |
|
"step": 21606 |
|
}, |
|
{ |
|
"epoch": 4.0514626218851575, |
|
"grad_norm": 182.24288940429688, |
|
"learning_rate": 0.00026203879800892194, |
|
"loss": 3.2913, |
|
"step": 22437 |
|
}, |
|
{ |
|
"epoch": 4.201516793066089, |
|
"grad_norm": 300.8165283203125, |
|
"learning_rate": 0.00025868152325781986, |
|
"loss": 3.2554, |
|
"step": 23268 |
|
}, |
|
{ |
|
"epoch": 4.351570964247021, |
|
"grad_norm": 230.65304565429688, |
|
"learning_rate": 0.00025520556300286454, |
|
"loss": 3.2636, |
|
"step": 24099 |
|
}, |
|
{ |
|
"epoch": 4.500541711809317, |
|
"eval_g2l_cer": 46.1491, |
|
"eval_g2l_gen_len": 3.8852, |
|
"eval_g2l_rouge1": 45.7526, |
|
"eval_g2l_rouge2": 35.7656, |
|
"eval_g2l_rougeL": 45.6115, |
|
"eval_g2l_rougeLsum": 45.6146, |
|
"eval_l2ex_cer": 82.9457, |
|
"eval_l2ex_gen_len": 17.2662, |
|
"eval_l2ex_rouge1": 31.8116, |
|
"eval_l2ex_rouge2": 16.1098, |
|
"eval_l2ex_rougeL": 28.581, |
|
"eval_l2ex_rougeLsum": 28.6511, |
|
"eval_l2g_cer": 69.3136, |
|
"eval_l2g_gen_len": 12.4397, |
|
"eval_l2g_rouge1": 39.1199, |
|
"eval_l2g_rouge2": 26.5659, |
|
"eval_l2g_rougeL": 37.2837, |
|
"eval_l2g_rougeLsum": 37.3241, |
|
"eval_loss": 3.275228261947632, |
|
"eval_runtime": 264.7397, |
|
"eval_samples_per_second": 37.475, |
|
"eval_steps_per_second": 0.589, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 4.501625135427952, |
|
"grad_norm": 278.4391174316406, |
|
"learning_rate": 0.00025161472191341646, |
|
"loss": 3.2605, |
|
"step": 24930 |
|
}, |
|
{ |
|
"epoch": 4.651679306608884, |
|
"grad_norm": 185.57086181640625, |
|
"learning_rate": 0.00024791293040353913, |
|
"loss": 3.2372, |
|
"step": 25761 |
|
}, |
|
{ |
|
"epoch": 4.801733477789816, |
|
"grad_norm": 199.41229248046875, |
|
"learning_rate": 0.0002441042403299005, |
|
"loss": 3.2549, |
|
"step": 26592 |
|
}, |
|
{ |
|
"epoch": 4.951787648970748, |
|
"grad_norm": 111.84984588623047, |
|
"learning_rate": 0.000240192820556746, |
|
"loss": 3.2505, |
|
"step": 27423 |
|
}, |
|
{ |
|
"epoch": 5.101841820151679, |
|
"grad_norm": 231.06040954589844, |
|
"learning_rate": 0.0002361829523928005, |
|
"loss": 3.2162, |
|
"step": 28254 |
|
}, |
|
{ |
|
"epoch": 5.250631997110871, |
|
"eval_g2l_cer": 45.3929, |
|
"eval_g2l_gen_len": 4.2856, |
|
"eval_g2l_rouge1": 47.7028, |
|
"eval_g2l_rouge2": 36.7159, |
|
"eval_g2l_rougeL": 47.5076, |
|
"eval_g2l_rougeLsum": 47.5342, |
|
"eval_l2ex_cer": 84.2916, |
|
"eval_l2ex_gen_len": 27.5293, |
|
"eval_l2ex_rouge1": 32.3354, |
|
"eval_l2ex_rouge2": 15.6055, |
|
"eval_l2ex_rougeL": 28.4133, |
|
"eval_l2ex_rougeLsum": 28.5758, |
|
"eval_l2g_cer": 74.8295, |
|
"eval_l2g_gen_len": 19.749, |
|
"eval_l2g_rouge1": 40.6449, |
|
"eval_l2g_rouge2": 27.1184, |
|
"eval_l2g_rougeL": 38.3335, |
|
"eval_l2g_rougeLsum": 38.3945, |
|
"eval_loss": 3.2382800579071045, |
|
"eval_runtime": 300.872, |
|
"eval_samples_per_second": 32.974, |
|
"eval_steps_per_second": 0.518, |
|
"step": 29078 |
|
}, |
|
{ |
|
"epoch": 5.251895991332611, |
|
"grad_norm": 298.8398132324219, |
|
"learning_rate": 0.00023207902490509098, |
|
"loss": 3.187, |
|
"step": 29085 |
|
}, |
|
{ |
|
"epoch": 5.401950162513542, |
|
"grad_norm": 126.86690521240234, |
|
"learning_rate": 0.0002278855301148215, |
|
"loss": 3.2012, |
|
"step": 29916 |
|
}, |
|
{ |
|
"epoch": 5.552004333694475, |
|
"grad_norm": 221.00885009765625, |
|
"learning_rate": 0.0002236070580805574, |
|
"loss": 3.1999, |
|
"step": 30747 |
|
}, |
|
{ |
|
"epoch": 5.702058504875406, |
|
"grad_norm": 193.86273193359375, |
|
"learning_rate": 0.00021924829187410153, |
|
"loss": 3.1942, |
|
"step": 31578 |
|
}, |
|
{ |
|
"epoch": 5.852112676056338, |
|
"grad_norm": 126.05673217773438, |
|
"learning_rate": 0.00021481400245456104, |
|
"loss": 3.1947, |
|
"step": 32409 |
|
}, |
|
{ |
|
"epoch": 6.000722282412423, |
|
"eval_g2l_cer": 45.9412, |
|
"eval_g2l_gen_len": 4.4229, |
|
"eval_g2l_rouge1": 47.5003, |
|
"eval_g2l_rouge2": 36.6595, |
|
"eval_g2l_rougeL": 47.3175, |
|
"eval_g2l_rougeLsum": 47.3017, |
|
"eval_l2ex_cer": 82.4504, |
|
"eval_l2ex_gen_len": 23.3741, |
|
"eval_l2ex_rouge1": 32.8857, |
|
"eval_l2ex_rouge2": 15.6166, |
|
"eval_l2ex_rougeL": 28.7672, |
|
"eval_l2ex_rougeLsum": 28.8746, |
|
"eval_l2g_cer": 73.7451, |
|
"eval_l2g_gen_len": 18.5067, |
|
"eval_l2g_rouge1": 40.8866, |
|
"eval_l2g_rouge2": 27.3687, |
|
"eval_l2g_rougeL": 38.5521, |
|
"eval_l2g_rougeLsum": 38.621, |
|
"eval_loss": 3.2279489040374756, |
|
"eval_runtime": 300.5167, |
|
"eval_samples_per_second": 33.013, |
|
"eval_steps_per_second": 0.519, |
|
"step": 33232 |
|
}, |
|
{ |
|
"epoch": 6.00216684723727, |
|
"grad_norm": 254.65907287597656, |
|
"learning_rate": 0.00021030904344621589, |
|
"loss": 3.1923, |
|
"step": 33240 |
|
}, |
|
{ |
|
"epoch": 6.152221018418201, |
|
"grad_norm": 228.19200134277344, |
|
"learning_rate": 0.0002057383458259045, |
|
"loss": 3.1351, |
|
"step": 34071 |
|
}, |
|
{ |
|
"epoch": 6.302275189599134, |
|
"grad_norm": 305.9356689453125, |
|
"learning_rate": 0.00020110691252574222, |
|
"loss": 3.1421, |
|
"step": 34902 |
|
}, |
|
{ |
|
"epoch": 6.452329360780065, |
|
"grad_norm": 190.717041015625, |
|
"learning_rate": 0.00019641981295707994, |
|
"loss": 3.1515, |
|
"step": 35733 |
|
}, |
|
{ |
|
"epoch": 6.602383531960997, |
|
"grad_norm": 241.10513305664062, |
|
"learning_rate": 0.00019168217746169658, |
|
"loss": 3.1506, |
|
"step": 36564 |
|
}, |
|
{ |
|
"epoch": 6.750812567713976, |
|
"eval_g2l_cer": 45.0204, |
|
"eval_g2l_gen_len": 4.0829, |
|
"eval_g2l_rouge1": 47.6328, |
|
"eval_g2l_rouge2": 37.0338, |
|
"eval_g2l_rougeL": 47.5319, |
|
"eval_g2l_rougeLsum": 47.5196, |
|
"eval_l2ex_cer": 84.416, |
|
"eval_l2ex_gen_len": 23.2662, |
|
"eval_l2ex_rouge1": 33.1718, |
|
"eval_l2ex_rouge2": 16.167, |
|
"eval_l2ex_rougeL": 29.1263, |
|
"eval_l2ex_rougeLsum": 29.2504, |
|
"eval_l2g_cer": 75.3622, |
|
"eval_l2g_gen_len": 17.9792, |
|
"eval_l2g_rouge1": 40.4989, |
|
"eval_l2g_rouge2": 27.2808, |
|
"eval_l2g_rougeL": 38.3025, |
|
"eval_l2g_rougeLsum": 38.3215, |
|
"eval_loss": 3.1947431564331055, |
|
"eval_runtime": 297.6683, |
|
"eval_samples_per_second": 33.329, |
|
"eval_steps_per_second": 0.524, |
|
"step": 37386 |
|
}, |
|
{ |
|
"epoch": 6.752437703141928, |
|
"grad_norm": 194.801513671875, |
|
"learning_rate": 0.0001868991916962991, |
|
"loss": 3.1481, |
|
"step": 37395 |
|
}, |
|
{ |
|
"epoch": 6.902491874322861, |
|
"grad_norm": 133.14971923828125, |
|
"learning_rate": 0.00018207609095647728, |
|
"loss": 3.1368, |
|
"step": 38226 |
|
}, |
|
{ |
|
"epoch": 7.052546045503792, |
|
"grad_norm": 179.32647705078125, |
|
"learning_rate": 0.00017721815444632445, |
|
"loss": 3.1199, |
|
"step": 39057 |
|
}, |
|
{ |
|
"epoch": 7.202600216684724, |
|
"grad_norm": 218.9005889892578, |
|
"learning_rate": 0.00017233069949999837, |
|
"loss": 3.094, |
|
"step": 39888 |
|
}, |
|
{ |
|
"epoch": 7.352654387865655, |
|
"grad_norm": 215.17083740234375, |
|
"learning_rate": 0.00016741907576154572, |
|
"loss": 3.0896, |
|
"step": 40719 |
|
}, |
|
{ |
|
"epoch": 7.500902853015529, |
|
"eval_g2l_cer": 44.5361, |
|
"eval_g2l_gen_len": 4.0809, |
|
"eval_g2l_rouge1": 48.0404, |
|
"eval_g2l_rouge2": 37.3411, |
|
"eval_g2l_rougeL": 47.8907, |
|
"eval_g2l_rougeLsum": 47.867, |
|
"eval_l2ex_cer": 83.3722, |
|
"eval_l2ex_gen_len": 21.9188, |
|
"eval_l2ex_rouge1": 33.2159, |
|
"eval_l2ex_rouge2": 16.5159, |
|
"eval_l2ex_rougeL": 29.1348, |
|
"eval_l2ex_rougeLsum": 29.2304, |
|
"eval_l2g_cer": 72.9959, |
|
"eval_l2g_gen_len": 15.519, |
|
"eval_l2g_rouge1": 40.681, |
|
"eval_l2g_rouge2": 27.6769, |
|
"eval_l2g_rougeL": 38.6264, |
|
"eval_l2g_rougeLsum": 38.6627, |
|
"eval_loss": 3.1981189250946045, |
|
"eval_runtime": 291.8194, |
|
"eval_samples_per_second": 33.997, |
|
"eval_steps_per_second": 0.535, |
|
"step": 41540 |
|
}, |
|
{ |
|
"epoch": 7.502708559046587, |
|
"grad_norm": 153.0230712890625, |
|
"learning_rate": 0.00016248865932936134, |
|
"loss": 3.0927, |
|
"step": 41550 |
|
}, |
|
{ |
|
"epoch": 7.6527627302275185, |
|
"grad_norm": 212.92391967773438, |
|
"learning_rate": 0.0001575448468716914, |
|
"loss": 3.0974, |
|
"step": 42381 |
|
}, |
|
{ |
|
"epoch": 7.802816901408451, |
|
"grad_norm": 186.11282348632812, |
|
"learning_rate": 0.00015259304971962191, |
|
"loss": 3.09, |
|
"step": 43212 |
|
}, |
|
{ |
|
"epoch": 7.9528710725893825, |
|
"grad_norm": 107.77149200439453, |
|
"learning_rate": 0.00014763868794401698, |
|
"loss": 3.0957, |
|
"step": 44043 |
|
}, |
|
{ |
|
"epoch": 8.102925243770315, |
|
"grad_norm": 111.45164489746094, |
|
"learning_rate": 0.00014268718442289166, |
|
"loss": 3.0703, |
|
"step": 44874 |
|
}, |
|
{ |
|
"epoch": 8.250993138317082, |
|
"eval_g2l_cer": 44.1066, |
|
"eval_g2l_gen_len": 3.9695, |
|
"eval_g2l_rouge1": 48.1237, |
|
"eval_g2l_rouge2": 37.5462, |
|
"eval_g2l_rougeL": 48.0143, |
|
"eval_g2l_rougeLsum": 48.0057, |
|
"eval_l2ex_cer": 83.1439, |
|
"eval_l2ex_gen_len": 22.295, |
|
"eval_l2ex_rouge1": 33.8654, |
|
"eval_l2ex_rouge2": 16.5697, |
|
"eval_l2ex_rougeL": 29.7053, |
|
"eval_l2ex_rougeLsum": 29.8195, |
|
"eval_l2g_cer": 71.4647, |
|
"eval_l2g_gen_len": 15.6419, |
|
"eval_l2g_rouge1": 41.0845, |
|
"eval_l2g_rouge2": 27.5338, |
|
"eval_l2g_rougeL": 38.8182, |
|
"eval_l2g_rougeLsum": 38.8839, |
|
"eval_loss": 3.19246506690979, |
|
"eval_runtime": 290.3246, |
|
"eval_samples_per_second": 34.172, |
|
"eval_steps_per_second": 0.537, |
|
"step": 45694 |
|
}, |
|
{ |
|
"epoch": 8.252979414951247, |
|
"grad_norm": 117.11378479003906, |
|
"learning_rate": 0.0001377439589057116, |
|
"loss": 3.0554, |
|
"step": 45705 |
|
}, |
|
{ |
|
"epoch": 8.403033586132178, |
|
"grad_norm": 98.44864654541016, |
|
"learning_rate": 0.00013281442208111732, |
|
"loss": 3.0581, |
|
"step": 46536 |
|
}, |
|
{ |
|
"epoch": 8.55308775731311, |
|
"grad_norm": 110.35213470458984, |
|
"learning_rate": 0.00012790396965456613, |
|
"loss": 3.0478, |
|
"step": 47367 |
|
}, |
|
{ |
|
"epoch": 8.703141928494041, |
|
"grad_norm": 56.789737701416016, |
|
"learning_rate": 0.00012301797644237423, |
|
"loss": 3.0599, |
|
"step": 48198 |
|
}, |
|
{ |
|
"epoch": 8.853196099674973, |
|
"grad_norm": 111.45304107666016, |
|
"learning_rate": 0.00011816179048862318, |
|
"loss": 3.0381, |
|
"step": 49029 |
|
}, |
|
{ |
|
"epoch": 9.001083423618635, |
|
"eval_g2l_cer": 44.1774, |
|
"eval_g2l_gen_len": 4.1188, |
|
"eval_g2l_rouge1": 48.6114, |
|
"eval_g2l_rouge2": 37.8262, |
|
"eval_g2l_rougeL": 48.5072, |
|
"eval_g2l_rougeLsum": 48.4844, |
|
"eval_l2ex_cer": 83.6477, |
|
"eval_l2ex_gen_len": 22.4625, |
|
"eval_l2ex_rouge1": 33.2375, |
|
"eval_l2ex_rouge2": 16.4943, |
|
"eval_l2ex_rougeL": 29.1757, |
|
"eval_l2ex_rougeLsum": 29.2794, |
|
"eval_l2g_cer": 72.9254, |
|
"eval_l2g_gen_len": 17.2116, |
|
"eval_l2g_rouge1": 41.0375, |
|
"eval_l2g_rouge2": 27.5603, |
|
"eval_l2g_rougeL": 38.745, |
|
"eval_l2g_rougeLsum": 38.787, |
|
"eval_loss": 3.180062770843506, |
|
"eval_runtime": 297.1459, |
|
"eval_samples_per_second": 33.388, |
|
"eval_steps_per_second": 0.525, |
|
"step": 49848 |
|
}, |
|
{ |
|
"epoch": 9.003250270855904, |
|
"grad_norm": 78.80842590332031, |
|
"learning_rate": 0.00011334072721137046, |
|
"loss": 3.0595, |
|
"step": 49860 |
|
}, |
|
{ |
|
"epoch": 9.153304442036836, |
|
"grad_norm": 138.7894287109375, |
|
"learning_rate": 0.00010856006358457137, |
|
"loss": 3.0096, |
|
"step": 50691 |
|
}, |
|
{ |
|
"epoch": 9.303358613217767, |
|
"grad_norm": 132.17127990722656, |
|
"learning_rate": 0.00010382503236208064, |
|
"loss": 3.0273, |
|
"step": 51522 |
|
}, |
|
{ |
|
"epoch": 9.453412784398699, |
|
"grad_norm": 130.38265991210938, |
|
"learning_rate": 9.914081635005574e-05, |
|
"loss": 3.0237, |
|
"step": 52353 |
|
}, |
|
{ |
|
"epoch": 9.603466955579632, |
|
"grad_norm": 100.80162811279297, |
|
"learning_rate": 9.451254273403124e-05, |
|
"loss": 3.0167, |
|
"step": 53184 |
|
}, |
|
{ |
|
"epoch": 9.751173708920188, |
|
"eval_g2l_cer": 44.0723, |
|
"eval_g2l_gen_len": 4.1516, |
|
"eval_g2l_rouge1": 48.7144, |
|
"eval_g2l_rouge2": 37.9052, |
|
"eval_g2l_rougeL": 48.5889, |
|
"eval_g2l_rougeLsum": 48.5704, |
|
"eval_l2ex_cer": 82.0577, |
|
"eval_l2ex_gen_len": 22.3731, |
|
"eval_l2ex_rouge1": 33.8214, |
|
"eval_l2ex_rouge2": 17.2047, |
|
"eval_l2ex_rougeL": 29.9782, |
|
"eval_l2ex_rougeLsum": 30.0546, |
|
"eval_l2g_cer": 72.335, |
|
"eval_l2g_gen_len": 17.0699, |
|
"eval_l2g_rouge1": 41.6605, |
|
"eval_l2g_rouge2": 28.2593, |
|
"eval_l2g_rougeL": 39.3968, |
|
"eval_l2g_rougeLsum": 39.4309, |
|
"eval_loss": 3.1734836101531982, |
|
"eval_runtime": 297.1857, |
|
"eval_samples_per_second": 33.383, |
|
"eval_steps_per_second": 0.525, |
|
"step": 54002 |
|
}, |
|
{ |
|
"epoch": 9.753521126760564, |
|
"grad_norm": 98.55856323242188, |
|
"learning_rate": 8.994527746687389e-05, |
|
"loss": 3.0202, |
|
"step": 54015 |
|
}, |
|
{ |
|
"epoch": 9.903575297941495, |
|
"grad_norm": 110.90308380126953, |
|
"learning_rate": 8.544401972376058e-05, |
|
"loss": 3.0123, |
|
"step": 54846 |
|
}, |
|
{ |
|
"epoch": 10.053629469122427, |
|
"grad_norm": 103.5262451171875, |
|
"learning_rate": 8.10136964302491e-05, |
|
"loss": 3.0112, |
|
"step": 55677 |
|
}, |
|
{ |
|
"epoch": 10.203683640303359, |
|
"grad_norm": 73.44245147705078, |
|
"learning_rate": 7.665915686943095e-05, |
|
"loss": 2.9824, |
|
"step": 56508 |
|
}, |
|
{ |
|
"epoch": 10.35373781148429, |
|
"grad_norm": 77.93965148925781, |
|
"learning_rate": 7.238516737406908e-05, |
|
"loss": 2.9999, |
|
"step": 57339 |
|
}, |
|
{ |
|
"epoch": 10.501263994221741, |
|
"eval_g2l_cer": 44.1363, |
|
"eval_g2l_gen_len": 4.1471, |
|
"eval_g2l_rouge1": 48.6933, |
|
"eval_g2l_rouge2": 38.0423, |
|
"eval_g2l_rougeL": 48.565, |
|
"eval_g2l_rougeLsum": 48.5648, |
|
"eval_l2ex_cer": 81.2579, |
|
"eval_l2ex_gen_len": 21.4666, |
|
"eval_l2ex_rouge1": 33.958, |
|
"eval_l2ex_rouge2": 16.8411, |
|
"eval_l2ex_rougeL": 29.5656, |
|
"eval_l2ex_rougeLsum": 29.6795, |
|
"eval_l2g_cer": 71.0675, |
|
"eval_l2g_gen_len": 16.2517, |
|
"eval_l2g_rouge1": 41.5203, |
|
"eval_l2g_rouge2": 28.0296, |
|
"eval_l2g_rougeL": 39.1863, |
|
"eval_l2g_rougeLsum": 39.2508, |
|
"eval_loss": 3.1717426776885986, |
|
"eval_runtime": 290.2937, |
|
"eval_samples_per_second": 34.176, |
|
"eval_steps_per_second": 0.537, |
|
"step": 58156 |
|
}, |
|
{ |
|
"epoch": 10.503791982665222, |
|
"grad_norm": 129.3556365966797, |
|
"learning_rate": 6.81964061095297e-05, |
|
"loss": 2.9888, |
|
"step": 58170 |
|
}, |
|
{ |
|
"epoch": 10.653846153846153, |
|
"grad_norm": 112.28192138671875, |
|
"learning_rate": 6.409745795321991e-05, |
|
"loss": 2.9878, |
|
"step": 59001 |
|
}, |
|
{ |
|
"epoch": 10.803900325027085, |
|
"grad_norm": 76.26856231689453, |
|
"learning_rate": 6.009280947613472e-05, |
|
"loss": 2.9817, |
|
"step": 59832 |
|
}, |
|
{ |
|
"epoch": 10.953954496208016, |
|
"grad_norm": 58.20437240600586, |
|
"learning_rate": 5.618684403200737e-05, |
|
"loss": 2.9851, |
|
"step": 60663 |
|
}, |
|
{ |
|
"epoch": 11.10400866738895, |
|
"grad_norm": 108.53790283203125, |
|
"learning_rate": 5.238383695943713e-05, |
|
"loss": 2.9823, |
|
"step": 61494 |
|
}, |
|
{ |
|
"epoch": 11.251354279523294, |
|
"eval_g2l_cer": 44.0289, |
|
"eval_g2l_gen_len": 4.1275, |
|
"eval_g2l_rouge1": 48.9057, |
|
"eval_g2l_rouge2": 38.3159, |
|
"eval_g2l_rougeL": 48.7647, |
|
"eval_g2l_rougeLsum": 48.766, |
|
"eval_l2ex_cer": 82.4492, |
|
"eval_l2ex_gen_len": 22.9445, |
|
"eval_l2ex_rouge1": 33.8799, |
|
"eval_l2ex_rouge2": 16.7295, |
|
"eval_l2ex_rougeL": 29.4575, |
|
"eval_l2ex_rougeLsum": 29.6104, |
|
"eval_l2g_cer": 71.7288, |
|
"eval_l2g_gen_len": 16.7564, |
|
"eval_l2g_rouge1": 41.5535, |
|
"eval_l2g_rouge2": 28.1997, |
|
"eval_l2g_rougeL": 39.2564, |
|
"eval_l2g_rougeLsum": 39.323, |
|
"eval_loss": 3.1673169136047363, |
|
"eval_runtime": 295.6043, |
|
"eval_samples_per_second": 33.562, |
|
"eval_steps_per_second": 0.528, |
|
"step": 62310 |
|
}, |
|
{ |
|
"epoch": 11.254062838569881, |
|
"grad_norm": 78.87760162353516, |
|
"learning_rate": 4.868795090224752e-05, |
|
"loss": 2.9644, |
|
"step": 62325 |
|
}, |
|
{ |
|
"epoch": 11.404117009750813, |
|
"grad_norm": 63.00550079345703, |
|
"learning_rate": 4.510323125319609e-05, |
|
"loss": 2.9714, |
|
"step": 63156 |
|
}, |
|
{ |
|
"epoch": 11.554171180931744, |
|
"grad_norm": 107.51451110839844, |
|
"learning_rate": 4.1633601726023533e-05, |
|
"loss": 2.972, |
|
"step": 63987 |
|
}, |
|
{ |
|
"epoch": 11.704225352112676, |
|
"grad_norm": 66.15150451660156, |
|
"learning_rate": 3.82828600606881e-05, |
|
"loss": 2.9604, |
|
"step": 64818 |
|
}, |
|
{ |
|
"epoch": 11.854279523293608, |
|
"grad_norm": 65.40077209472656, |
|
"learning_rate": 3.505467386648718e-05, |
|
"loss": 2.9667, |
|
"step": 65649 |
|
}, |
|
{ |
|
"epoch": 12.001444564824846, |
|
"eval_g2l_cer": 43.9512, |
|
"eval_g2l_gen_len": 4.0624, |
|
"eval_g2l_rouge1": 48.889, |
|
"eval_g2l_rouge2": 38.1288, |
|
"eval_g2l_rougeL": 48.7444, |
|
"eval_g2l_rougeLsum": 48.751, |
|
"eval_l2ex_cer": 83.3432, |
|
"eval_l2ex_gen_len": 22.5889, |
|
"eval_l2ex_rouge1": 33.672, |
|
"eval_l2ex_rouge2": 16.682, |
|
"eval_l2ex_rougeL": 29.3383, |
|
"eval_l2ex_rougeLsum": 29.4381, |
|
"eval_l2g_cer": 72.969, |
|
"eval_l2g_gen_len": 17.2907, |
|
"eval_l2g_rouge1": 41.384, |
|
"eval_l2g_rouge2": 28.0121, |
|
"eval_l2g_rougeL": 39.0564, |
|
"eval_l2g_rougeLsum": 39.1247, |
|
"eval_loss": 3.163238286972046, |
|
"eval_runtime": 296.9381, |
|
"eval_samples_per_second": 33.411, |
|
"eval_steps_per_second": 0.525, |
|
"step": 66464 |
|
}, |
|
{ |
|
"epoch": 12.00433369447454, |
|
"grad_norm": 80.00790405273438, |
|
"learning_rate": 3.195257660761534e-05, |
|
"loss": 2.9548, |
|
"step": 66480 |
|
}, |
|
{ |
|
"epoch": 12.15438786565547, |
|
"grad_norm": 50.12080001831055, |
|
"learning_rate": 2.897996373555297e-05, |
|
"loss": 2.9599, |
|
"step": 67311 |
|
}, |
|
{ |
|
"epoch": 12.304442036836402, |
|
"grad_norm": 137.98057556152344, |
|
"learning_rate": 2.6140088972519277e-05, |
|
"loss": 2.9426, |
|
"step": 68142 |
|
}, |
|
{ |
|
"epoch": 12.454496208017336, |
|
"grad_norm": 74.86833190917969, |
|
"learning_rate": 2.343606075005708e-05, |
|
"loss": 2.9445, |
|
"step": 68973 |
|
}, |
|
{ |
|
"epoch": 12.604550379198267, |
|
"grad_norm": 85.49880981445312, |
|
"learning_rate": 2.0870838806648037e-05, |
|
"loss": 2.9445, |
|
"step": 69804 |
|
}, |
|
{ |
|
"epoch": 12.751534850126399, |
|
"eval_g2l_cer": 44.0472, |
|
"eval_g2l_gen_len": 4.109, |
|
"eval_g2l_rouge1": 48.9965, |
|
"eval_g2l_rouge2": 38.1664, |
|
"eval_g2l_rougeL": 48.8442, |
|
"eval_g2l_rougeLsum": 48.8419, |
|
"eval_l2ex_cer": 81.2857, |
|
"eval_l2ex_gen_len": 21.2364, |
|
"eval_l2ex_rouge1": 34.1658, |
|
"eval_l2ex_rouge2": 17.3387, |
|
"eval_l2ex_rougeL": 29.9082, |
|
"eval_l2ex_rougeLsum": 30.0362, |
|
"eval_l2g_cer": 70.6762, |
|
"eval_l2g_gen_len": 15.9381, |
|
"eval_l2g_rouge1": 41.6215, |
|
"eval_l2g_rouge2": 28.1386, |
|
"eval_l2g_rougeL": 39.3091, |
|
"eval_l2g_rougeLsum": 39.3715, |
|
"eval_loss": 3.163139581680298, |
|
"eval_runtime": 291.4682, |
|
"eval_samples_per_second": 34.038, |
|
"eval_steps_per_second": 0.535, |
|
"step": 70618 |
|
}, |
|
{ |
|
"epoch": 12.754604550379199, |
|
"grad_norm": 60.18415451049805, |
|
"learning_rate": 1.844723094808244e-05, |
|
"loss": 2.9515, |
|
"step": 70635 |
|
}, |
|
{ |
|
"epoch": 12.90465872156013, |
|
"grad_norm": 39.721649169921875, |
|
"learning_rate": 1.6167889974129134e-05, |
|
"loss": 2.9545, |
|
"step": 71466 |
|
}, |
|
{ |
|
"epoch": 13.054712892741062, |
|
"grad_norm": 49.22962188720703, |
|
"learning_rate": 1.4035310774870041e-05, |
|
"loss": 2.9433, |
|
"step": 72297 |
|
}, |
|
{ |
|
"epoch": 13.204767063921993, |
|
"grad_norm": 55.78800964355469, |
|
"learning_rate": 1.205182759987737e-05, |
|
"loss": 2.9241, |
|
"step": 73128 |
|
}, |
|
{ |
|
"epoch": 13.354821235102925, |
|
"grad_norm": 69.98873138427734, |
|
"learning_rate": 1.0219611503222213e-05, |
|
"loss": 2.939, |
|
"step": 73959 |
|
}, |
|
{ |
|
"epoch": 13.501625135427952, |
|
"eval_g2l_cer": 43.9078, |
|
"eval_g2l_gen_len": 4.0981, |
|
"eval_g2l_rouge1": 48.969, |
|
"eval_g2l_rouge2": 38.1559, |
|
"eval_g2l_rougeL": 48.8152, |
|
"eval_g2l_rougeLsum": 48.8193, |
|
"eval_l2ex_cer": 81.5515, |
|
"eval_l2ex_gen_len": 21.7205, |
|
"eval_l2ex_rouge1": 33.9427, |
|
"eval_l2ex_rouge2": 17.0266, |
|
"eval_l2ex_rougeL": 29.5977, |
|
"eval_l2ex_rougeLsum": 29.7301, |
|
"eval_l2g_cer": 70.7346, |
|
"eval_l2g_gen_len": 16.1531, |
|
"eval_l2g_rouge1": 41.7374, |
|
"eval_l2g_rouge2": 28.1793, |
|
"eval_l2g_rougeL": 39.3779, |
|
"eval_l2g_rougeLsum": 39.4426, |
|
"eval_loss": 3.1640655994415283, |
|
"eval_runtime": 292.5347, |
|
"eval_samples_per_second": 33.914, |
|
"eval_steps_per_second": 0.533, |
|
"step": 74772 |
|
}, |
|
{ |
|
"epoch": 13.504875406283857, |
|
"grad_norm": 69.49555206298828, |
|
"learning_rate": 8.54066796711184e-06, |
|
"loss": 2.9465, |
|
"step": 74790 |
|
}, |
|
{ |
|
"epoch": 13.654929577464788, |
|
"grad_norm": 73.59809112548828, |
|
"learning_rate": 7.016834706756168e-06, |
|
"loss": 2.9391, |
|
"step": 75621 |
|
}, |
|
{ |
|
"epoch": 13.804983748645721, |
|
"grad_norm": 64.26115417480469, |
|
"learning_rate": 5.649779658866368e-06, |
|
"loss": 2.9356, |
|
"step": 76452 |
|
}, |
|
{ |
|
"epoch": 13.955037919826653, |
|
"grad_norm": 37.47693634033203, |
|
"learning_rate": 4.440999155987467e-06, |
|
"loss": 2.9523, |
|
"step": 77283 |
|
}, |
|
{ |
|
"epoch": 14.105092091007585, |
|
"grad_norm": 97.73049926757812, |
|
"learning_rate": 3.391816288662864e-06, |
|
"loss": 2.9394, |
|
"step": 78114 |
|
}, |
|
{ |
|
"epoch": 14.251715420729505, |
|
"eval_g2l_cer": 44.1271, |
|
"eval_g2l_gen_len": 4.1016, |
|
"eval_g2l_rouge1": 48.8563, |
|
"eval_g2l_rouge2": 38.0804, |
|
"eval_g2l_rougeL": 48.7034, |
|
"eval_g2l_rougeLsum": 48.679, |
|
"eval_l2ex_cer": 81.746, |
|
"eval_l2ex_gen_len": 21.9486, |
|
"eval_l2ex_rouge1": 34.1479, |
|
"eval_l2ex_rouge2": 17.1381, |
|
"eval_l2ex_rougeL": 29.7996, |
|
"eval_l2ex_rougeLsum": 29.9184, |
|
"eval_l2g_cer": 71.184, |
|
"eval_l2g_gen_len": 16.3747, |
|
"eval_l2g_rouge1": 41.7919, |
|
"eval_l2g_rouge2": 28.2088, |
|
"eval_l2g_rougeL": 39.4284, |
|
"eval_l2g_rougeLsum": 39.4987, |
|
"eval_loss": 3.1630301475524902, |
|
"eval_runtime": 294.6002, |
|
"eval_samples_per_second": 33.676, |
|
"eval_steps_per_second": 0.53, |
|
"step": 78926 |
|
} |
|
], |
|
"logging_steps": 831, |
|
"max_steps": 83070, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 4154, |
|
"total_flos": 1.4551992475225948e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|