NeoIT5-large / trainer_state.json
snizio's picture
Upload folder using huggingface_hub
6292377 verified
{
"best_metric": 3.1630301475524902,
"best_model_checkpoint": "checkpoints/it5-large/checkpoint-78926",
"epoch": 14.251715420729505,
"eval_steps": 4154,
"global_step": 78926,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14987360057782592,
"eval_g2l_cer": 67.2645,
"eval_g2l_gen_len": 4.5733,
"eval_g2l_rouge1": 27.1595,
"eval_g2l_rouge2": 15.5941,
"eval_g2l_rougeL": 26.9535,
"eval_g2l_rougeLsum": 26.9576,
"eval_l2ex_cer": 130.3597,
"eval_l2ex_gen_len": 47.8171,
"eval_l2ex_rouge1": 22.1003,
"eval_l2ex_rouge2": 9.5437,
"eval_l2ex_rougeL": 20.2017,
"eval_l2ex_rougeLsum": 19.2847,
"eval_l2g_cer": 106.9099,
"eval_l2g_gen_len": 30.346,
"eval_l2g_rouge1": 27.2135,
"eval_l2g_rouge2": 14.1149,
"eval_l2g_rougeL": 25.3922,
"eval_l2g_rougeLsum": 25.3986,
"eval_loss": 3.804034948348999,
"eval_runtime": 310.7233,
"eval_samples_per_second": 31.929,
"eval_steps_per_second": 0.502,
"step": 830
},
{
"epoch": 0.15005417118093176,
"grad_norm": 129.91856384277344,
"learning_rate": 6.004335260115606e-05,
"loss": 4.6945,
"step": 831
},
{
"epoch": 0.3001083423618635,
"grad_norm": 190.0506591796875,
"learning_rate": 0.00012008670520231212,
"loss": 3.8417,
"step": 1662
},
{
"epoch": 0.45016251354279524,
"grad_norm": 208.42779541015625,
"learning_rate": 0.0001801300578034682,
"loss": 3.712,
"step": 2493
},
{
"epoch": 0.600216684723727,
"grad_norm": 293.423583984375,
"learning_rate": 0.00024017341040462423,
"loss": 3.6763,
"step": 3324
},
{
"epoch": 0.7500902853015529,
"eval_g2l_cer": 53.5059,
"eval_g2l_gen_len": 3.5087,
"eval_g2l_rouge1": 37.5417,
"eval_g2l_rouge2": 29.1384,
"eval_g2l_rougeL": 37.463,
"eval_g2l_rougeLsum": 37.4022,
"eval_l2ex_cer": 102.0708,
"eval_l2ex_gen_len": 25.9866,
"eval_l2ex_rouge1": 26.7853,
"eval_l2ex_rouge2": 12.9071,
"eval_l2ex_rougeL": 24.0724,
"eval_l2ex_rougeLsum": 24.0445,
"eval_l2g_cer": 86.4648,
"eval_l2g_gen_len": 15.0081,
"eval_l2g_rouge1": 30.7776,
"eval_l2g_rouge2": 18.1789,
"eval_l2g_rougeL": 29.1675,
"eval_l2g_rougeLsum": 29.2136,
"eval_loss": 3.5662293434143066,
"eval_runtime": 296.355,
"eval_samples_per_second": 33.477,
"eval_steps_per_second": 0.526,
"step": 4154
},
{
"epoch": 0.7502708559046587,
"grad_norm": 500.86932373046875,
"learning_rate": 0.0002999999989317841,
"loss": 3.6694,
"step": 4155
},
{
"epoch": 0.9003250270855905,
"grad_norm": 378.1305236816406,
"learning_rate": 0.00029991745158829114,
"loss": 3.641,
"step": 4986
},
{
"epoch": 1.0503791982665223,
"grad_norm": 309.59503173828125,
"learning_rate": 0.0002996710832786393,
"loss": 3.6169,
"step": 5817
},
{
"epoch": 1.200433369447454,
"grad_norm": 364.1076965332031,
"learning_rate": 0.00029926116366930635,
"loss": 3.5732,
"step": 6648
},
{
"epoch": 1.3504875406283858,
"grad_norm": 402.1815490722656,
"learning_rate": 0.00029868814144453027,
"loss": 3.5547,
"step": 7479
},
{
"epoch": 1.500180570603106,
"eval_g2l_cer": 49.6927,
"eval_g2l_gen_len": 4.4371,
"eval_g2l_rouge1": 42.6629,
"eval_g2l_rouge2": 32.7133,
"eval_g2l_rougeL": 42.5078,
"eval_g2l_rougeLsum": 42.487,
"eval_l2ex_cer": 85.9069,
"eval_l2ex_gen_len": 27.3155,
"eval_l2ex_rouge1": 31.0018,
"eval_l2ex_rouge2": 14.7792,
"eval_l2ex_rougeL": 27.5259,
"eval_l2ex_rougeLsum": 27.5817,
"eval_l2g_cer": 76.6936,
"eval_l2g_gen_len": 19.6286,
"eval_l2g_rouge1": 38.3213,
"eval_l2g_rouge2": 24.5167,
"eval_l2g_rougeL": 36.1971,
"eval_l2g_rougeLsum": 36.2764,
"eval_loss": 3.4418885707855225,
"eval_runtime": 302.3378,
"eval_samples_per_second": 32.814,
"eval_steps_per_second": 0.516,
"step": 8308
},
{
"epoch": 1.5005417118093174,
"grad_norm": 380.1324157714844,
"learning_rate": 0.0002979526438151941,
"loss": 3.533,
"step": 8310
},
{
"epoch": 1.6505958829902492,
"grad_norm": 358.8219909667969,
"learning_rate": 0.0002970554758323025,
"loss": 3.5167,
"step": 9141
},
{
"epoch": 1.800650054171181,
"grad_norm": 323.29583740234375,
"learning_rate": 0.0002959976195057994,
"loss": 3.5114,
"step": 9972
},
{
"epoch": 1.9507042253521125,
"grad_norm": 283.41790771484375,
"learning_rate": 0.00029478023272969345,
"loss": 3.4955,
"step": 10803
},
{
"epoch": 2.1007583965330445,
"grad_norm": 326.611572265625,
"learning_rate": 0.0002934046480146657,
"loss": 3.4415,
"step": 11634
},
{
"epoch": 2.2502708559046587,
"eval_g2l_cer": 49.446,
"eval_g2l_gen_len": 4.5047,
"eval_g2l_rouge1": 43.91,
"eval_g2l_rouge2": 33.662,
"eval_g2l_rougeL": 43.778,
"eval_g2l_rougeLsum": 43.7883,
"eval_l2ex_cer": 86.4808,
"eval_l2ex_gen_len": 30.4358,
"eval_l2ex_rouge1": 30.7974,
"eval_l2ex_rouge2": 14.4266,
"eval_l2ex_rougeL": 27.2278,
"eval_l2ex_rougeLsum": 27.3219,
"eval_l2g_cer": 84.1694,
"eval_l2g_gen_len": 24.5493,
"eval_l2g_rouge1": 38.1858,
"eval_l2g_rouge2": 25.1392,
"eval_l2g_rougeL": 36.1473,
"eval_l2g_rougeLsum": 36.1987,
"eval_loss": 3.374830961227417,
"eval_runtime": 306.8842,
"eval_samples_per_second": 32.328,
"eval_steps_per_second": 0.508,
"step": 12462
},
{
"epoch": 2.250812567713976,
"grad_norm": 368.7247314453125,
"learning_rate": 0.0002918723710295482,
"loss": 3.424,
"step": 12465
},
{
"epoch": 2.400866738894908,
"grad_norm": 468.8291320800781,
"learning_rate": 0.00029018507895326985,
"loss": 3.4228,
"step": 13296
},
{
"epoch": 2.5509209100758397,
"grad_norm": 266.9640808105469,
"learning_rate": 0.00028834461863907226,
"loss": 3.4152,
"step": 14127
},
{
"epoch": 2.7009750812567717,
"grad_norm": 237.8248748779297,
"learning_rate": 0.0002863530045930063,
"loss": 3.4187,
"step": 14958
},
{
"epoch": 2.851029252437703,
"grad_norm": 371.8949890136719,
"learning_rate": 0.00028421241676892145,
"loss": 3.3965,
"step": 15789
},
{
"epoch": 3.0003611412062114,
"eval_g2l_cer": 48.3881,
"eval_g2l_gen_len": 4.4987,
"eval_g2l_rouge1": 44.8641,
"eval_g2l_rouge2": 34.4032,
"eval_g2l_rougeL": 44.6885,
"eval_g2l_rougeLsum": 44.711,
"eval_l2ex_cer": 89.7331,
"eval_l2ex_gen_len": 30.1634,
"eval_l2ex_rouge1": 30.5096,
"eval_l2ex_rouge2": 14.191,
"eval_l2ex_rougeL": 26.9741,
"eval_l2ex_rougeLsum": 27.0965,
"eval_l2g_cer": 81.1389,
"eval_l2g_gen_len": 23.1439,
"eval_l2g_rouge1": 39.3934,
"eval_l2g_rouge2": 25.9597,
"eval_l2g_rougeL": 37.0903,
"eval_l2g_rougeLsum": 37.1641,
"eval_loss": 3.325451612472534,
"eval_runtime": 304.8315,
"eval_samples_per_second": 32.546,
"eval_steps_per_second": 0.512,
"step": 16616
},
{
"epoch": 3.001083423618635,
"grad_norm": 298.2984619140625,
"learning_rate": 0.0002819251981823618,
"loss": 3.3917,
"step": 16620
},
{
"epoch": 3.151137594799567,
"grad_norm": 318.9201354980469,
"learning_rate": 0.00027949385234597935,
"loss": 3.3406,
"step": 17451
},
{
"epoch": 3.3011917659804983,
"grad_norm": 286.3103942871094,
"learning_rate": 0.0002769210405292737,
"loss": 3.3328,
"step": 18282
},
{
"epoch": 3.4512459371614304,
"grad_norm": 352.0337829589844,
"learning_rate": 0.0002742095788456554,
"loss": 3.3333,
"step": 19113
},
{
"epoch": 3.601300108342362,
"grad_norm": 210.31089782714844,
"learning_rate": 0.0002713624351700232,
"loss": 3.3251,
"step": 19944
},
{
"epoch": 3.7504514265077646,
"eval_g2l_cer": 47.0881,
"eval_g2l_gen_len": 4.2162,
"eval_g2l_rouge1": 45.6068,
"eval_g2l_rouge2": 34.9617,
"eval_g2l_rougeL": 45.369,
"eval_g2l_rougeLsum": 45.3992,
"eval_l2ex_cer": 87.2057,
"eval_l2ex_gen_len": 25.9743,
"eval_l2ex_rouge1": 32.044,
"eval_l2ex_rouge2": 15.4907,
"eval_l2ex_rougeL": 28.2386,
"eval_l2ex_rougeLsum": 28.3364,
"eval_l2g_cer": 81.5351,
"eval_l2g_gen_len": 20.6293,
"eval_l2g_rouge1": 39.7177,
"eval_l2g_rouge2": 26.6455,
"eval_l2g_rougeL": 37.5652,
"eval_l2g_rougeLsum": 37.5978,
"eval_loss": 3.267240047454834,
"eval_runtime": 303.1609,
"eval_samples_per_second": 32.725,
"eval_steps_per_second": 0.515,
"step": 20770
},
{
"epoch": 3.7513542795232935,
"grad_norm": 240.68614196777344,
"learning_rate": 0.0002683827258902275,
"loss": 3.3215,
"step": 20775
},
{
"epoch": 3.9014084507042255,
"grad_norm": 203.9857177734375,
"learning_rate": 0.0002652737124959771,
"loss": 3.311,
"step": 21606
},
{
"epoch": 4.0514626218851575,
"grad_norm": 182.24288940429688,
"learning_rate": 0.00026203879800892194,
"loss": 3.2913,
"step": 22437
},
{
"epoch": 4.201516793066089,
"grad_norm": 300.8165283203125,
"learning_rate": 0.00025868152325781986,
"loss": 3.2554,
"step": 23268
},
{
"epoch": 4.351570964247021,
"grad_norm": 230.65304565429688,
"learning_rate": 0.00025520556300286454,
"loss": 3.2636,
"step": 24099
},
{
"epoch": 4.500541711809317,
"eval_g2l_cer": 46.1491,
"eval_g2l_gen_len": 3.8852,
"eval_g2l_rouge1": 45.7526,
"eval_g2l_rouge2": 35.7656,
"eval_g2l_rougeL": 45.6115,
"eval_g2l_rougeLsum": 45.6146,
"eval_l2ex_cer": 82.9457,
"eval_l2ex_gen_len": 17.2662,
"eval_l2ex_rouge1": 31.8116,
"eval_l2ex_rouge2": 16.1098,
"eval_l2ex_rougeL": 28.581,
"eval_l2ex_rougeLsum": 28.6511,
"eval_l2g_cer": 69.3136,
"eval_l2g_gen_len": 12.4397,
"eval_l2g_rouge1": 39.1199,
"eval_l2g_rouge2": 26.5659,
"eval_l2g_rougeL": 37.2837,
"eval_l2g_rougeLsum": 37.3241,
"eval_loss": 3.275228261947632,
"eval_runtime": 264.7397,
"eval_samples_per_second": 37.475,
"eval_steps_per_second": 0.589,
"step": 24924
},
{
"epoch": 4.501625135427952,
"grad_norm": 278.4391174316406,
"learning_rate": 0.00025161472191341646,
"loss": 3.2605,
"step": 24930
},
{
"epoch": 4.651679306608884,
"grad_norm": 185.57086181640625,
"learning_rate": 0.00024791293040353913,
"loss": 3.2372,
"step": 25761
},
{
"epoch": 4.801733477789816,
"grad_norm": 199.41229248046875,
"learning_rate": 0.0002441042403299005,
"loss": 3.2549,
"step": 26592
},
{
"epoch": 4.951787648970748,
"grad_norm": 111.84984588623047,
"learning_rate": 0.000240192820556746,
"loss": 3.2505,
"step": 27423
},
{
"epoch": 5.101841820151679,
"grad_norm": 231.06040954589844,
"learning_rate": 0.0002361829523928005,
"loss": 3.2162,
"step": 28254
},
{
"epoch": 5.250631997110871,
"eval_g2l_cer": 45.3929,
"eval_g2l_gen_len": 4.2856,
"eval_g2l_rouge1": 47.7028,
"eval_g2l_rouge2": 36.7159,
"eval_g2l_rougeL": 47.5076,
"eval_g2l_rougeLsum": 47.5342,
"eval_l2ex_cer": 84.2916,
"eval_l2ex_gen_len": 27.5293,
"eval_l2ex_rouge1": 32.3354,
"eval_l2ex_rouge2": 15.6055,
"eval_l2ex_rougeL": 28.4133,
"eval_l2ex_rougeLsum": 28.5758,
"eval_l2g_cer": 74.8295,
"eval_l2g_gen_len": 19.749,
"eval_l2g_rouge1": 40.6449,
"eval_l2g_rouge2": 27.1184,
"eval_l2g_rougeL": 38.3335,
"eval_l2g_rougeLsum": 38.3945,
"eval_loss": 3.2382800579071045,
"eval_runtime": 300.872,
"eval_samples_per_second": 32.974,
"eval_steps_per_second": 0.518,
"step": 29078
},
{
"epoch": 5.251895991332611,
"grad_norm": 298.8398132324219,
"learning_rate": 0.00023207902490509098,
"loss": 3.187,
"step": 29085
},
{
"epoch": 5.401950162513542,
"grad_norm": 126.86690521240234,
"learning_rate": 0.0002278855301148215,
"loss": 3.2012,
"step": 29916
},
{
"epoch": 5.552004333694475,
"grad_norm": 221.00885009765625,
"learning_rate": 0.0002236070580805574,
"loss": 3.1999,
"step": 30747
},
{
"epoch": 5.702058504875406,
"grad_norm": 193.86273193359375,
"learning_rate": 0.00021924829187410153,
"loss": 3.1942,
"step": 31578
},
{
"epoch": 5.852112676056338,
"grad_norm": 126.05673217773438,
"learning_rate": 0.00021481400245456104,
"loss": 3.1947,
"step": 32409
},
{
"epoch": 6.000722282412423,
"eval_g2l_cer": 45.9412,
"eval_g2l_gen_len": 4.4229,
"eval_g2l_rouge1": 47.5003,
"eval_g2l_rouge2": 36.6595,
"eval_g2l_rougeL": 47.3175,
"eval_g2l_rougeLsum": 47.3017,
"eval_l2ex_cer": 82.4504,
"eval_l2ex_gen_len": 23.3741,
"eval_l2ex_rouge1": 32.8857,
"eval_l2ex_rouge2": 15.6166,
"eval_l2ex_rougeL": 28.7672,
"eval_l2ex_rougeLsum": 28.8746,
"eval_l2g_cer": 73.7451,
"eval_l2g_gen_len": 18.5067,
"eval_l2g_rouge1": 40.8866,
"eval_l2g_rouge2": 27.3687,
"eval_l2g_rougeL": 38.5521,
"eval_l2g_rougeLsum": 38.621,
"eval_loss": 3.2279489040374756,
"eval_runtime": 300.5167,
"eval_samples_per_second": 33.013,
"eval_steps_per_second": 0.519,
"step": 33232
},
{
"epoch": 6.00216684723727,
"grad_norm": 254.65907287597656,
"learning_rate": 0.00021030904344621589,
"loss": 3.1923,
"step": 33240
},
{
"epoch": 6.152221018418201,
"grad_norm": 228.19200134277344,
"learning_rate": 0.0002057383458259045,
"loss": 3.1351,
"step": 34071
},
{
"epoch": 6.302275189599134,
"grad_norm": 305.9356689453125,
"learning_rate": 0.00020110691252574222,
"loss": 3.1421,
"step": 34902
},
{
"epoch": 6.452329360780065,
"grad_norm": 190.717041015625,
"learning_rate": 0.00019641981295707994,
"loss": 3.1515,
"step": 35733
},
{
"epoch": 6.602383531960997,
"grad_norm": 241.10513305664062,
"learning_rate": 0.00019168217746169658,
"loss": 3.1506,
"step": 36564
},
{
"epoch": 6.750812567713976,
"eval_g2l_cer": 45.0204,
"eval_g2l_gen_len": 4.0829,
"eval_g2l_rouge1": 47.6328,
"eval_g2l_rouge2": 37.0338,
"eval_g2l_rougeL": 47.5319,
"eval_g2l_rougeLsum": 47.5196,
"eval_l2ex_cer": 84.416,
"eval_l2ex_gen_len": 23.2662,
"eval_l2ex_rouge1": 33.1718,
"eval_l2ex_rouge2": 16.167,
"eval_l2ex_rougeL": 29.1263,
"eval_l2ex_rougeLsum": 29.2504,
"eval_l2g_cer": 75.3622,
"eval_l2g_gen_len": 17.9792,
"eval_l2g_rouge1": 40.4989,
"eval_l2g_rouge2": 27.2808,
"eval_l2g_rougeL": 38.3025,
"eval_l2g_rougeLsum": 38.3215,
"eval_loss": 3.1947431564331055,
"eval_runtime": 297.6683,
"eval_samples_per_second": 33.329,
"eval_steps_per_second": 0.524,
"step": 37386
},
{
"epoch": 6.752437703141928,
"grad_norm": 194.801513671875,
"learning_rate": 0.0001868991916962991,
"loss": 3.1481,
"step": 37395
},
{
"epoch": 6.902491874322861,
"grad_norm": 133.14971923828125,
"learning_rate": 0.00018207609095647728,
"loss": 3.1368,
"step": 38226
},
{
"epoch": 7.052546045503792,
"grad_norm": 179.32647705078125,
"learning_rate": 0.00017721815444632445,
"loss": 3.1199,
"step": 39057
},
{
"epoch": 7.202600216684724,
"grad_norm": 218.9005889892578,
"learning_rate": 0.00017233069949999837,
"loss": 3.094,
"step": 39888
},
{
"epoch": 7.352654387865655,
"grad_norm": 215.17083740234375,
"learning_rate": 0.00016741907576154572,
"loss": 3.0896,
"step": 40719
},
{
"epoch": 7.500902853015529,
"eval_g2l_cer": 44.5361,
"eval_g2l_gen_len": 4.0809,
"eval_g2l_rouge1": 48.0404,
"eval_g2l_rouge2": 37.3411,
"eval_g2l_rougeL": 47.8907,
"eval_g2l_rougeLsum": 47.867,
"eval_l2ex_cer": 83.3722,
"eval_l2ex_gen_len": 21.9188,
"eval_l2ex_rouge1": 33.2159,
"eval_l2ex_rouge2": 16.5159,
"eval_l2ex_rougeL": 29.1348,
"eval_l2ex_rougeLsum": 29.2304,
"eval_l2g_cer": 72.9959,
"eval_l2g_gen_len": 15.519,
"eval_l2g_rouge1": 40.681,
"eval_l2g_rouge2": 27.6769,
"eval_l2g_rougeL": 38.6264,
"eval_l2g_rougeLsum": 38.6627,
"eval_loss": 3.1981189250946045,
"eval_runtime": 291.8194,
"eval_samples_per_second": 33.997,
"eval_steps_per_second": 0.535,
"step": 41540
},
{
"epoch": 7.502708559046587,
"grad_norm": 153.0230712890625,
"learning_rate": 0.00016248865932936134,
"loss": 3.0927,
"step": 41550
},
{
"epoch": 7.6527627302275185,
"grad_norm": 212.92391967773438,
"learning_rate": 0.0001575448468716914,
"loss": 3.0974,
"step": 42381
},
{
"epoch": 7.802816901408451,
"grad_norm": 186.11282348632812,
"learning_rate": 0.00015259304971962191,
"loss": 3.09,
"step": 43212
},
{
"epoch": 7.9528710725893825,
"grad_norm": 107.77149200439453,
"learning_rate": 0.00014763868794401698,
"loss": 3.0957,
"step": 44043
},
{
"epoch": 8.102925243770315,
"grad_norm": 111.45164489746094,
"learning_rate": 0.00014268718442289166,
"loss": 3.0703,
"step": 44874
},
{
"epoch": 8.250993138317082,
"eval_g2l_cer": 44.1066,
"eval_g2l_gen_len": 3.9695,
"eval_g2l_rouge1": 48.1237,
"eval_g2l_rouge2": 37.5462,
"eval_g2l_rougeL": 48.0143,
"eval_g2l_rougeLsum": 48.0057,
"eval_l2ex_cer": 83.1439,
"eval_l2ex_gen_len": 22.295,
"eval_l2ex_rouge1": 33.8654,
"eval_l2ex_rouge2": 16.5697,
"eval_l2ex_rougeL": 29.7053,
"eval_l2ex_rougeLsum": 29.8195,
"eval_l2g_cer": 71.4647,
"eval_l2g_gen_len": 15.6419,
"eval_l2g_rouge1": 41.0845,
"eval_l2g_rouge2": 27.5338,
"eval_l2g_rougeL": 38.8182,
"eval_l2g_rougeLsum": 38.8839,
"eval_loss": 3.19246506690979,
"eval_runtime": 290.3246,
"eval_samples_per_second": 34.172,
"eval_steps_per_second": 0.537,
"step": 45694
},
{
"epoch": 8.252979414951247,
"grad_norm": 117.11378479003906,
"learning_rate": 0.0001377439589057116,
"loss": 3.0554,
"step": 45705
},
{
"epoch": 8.403033586132178,
"grad_norm": 98.44864654541016,
"learning_rate": 0.00013281442208111732,
"loss": 3.0581,
"step": 46536
},
{
"epoch": 8.55308775731311,
"grad_norm": 110.35213470458984,
"learning_rate": 0.00012790396965456613,
"loss": 3.0478,
"step": 47367
},
{
"epoch": 8.703141928494041,
"grad_norm": 56.789737701416016,
"learning_rate": 0.00012301797644237423,
"loss": 3.0599,
"step": 48198
},
{
"epoch": 8.853196099674973,
"grad_norm": 111.45304107666016,
"learning_rate": 0.00011816179048862318,
"loss": 3.0381,
"step": 49029
},
{
"epoch": 9.001083423618635,
"eval_g2l_cer": 44.1774,
"eval_g2l_gen_len": 4.1188,
"eval_g2l_rouge1": 48.6114,
"eval_g2l_rouge2": 37.8262,
"eval_g2l_rougeL": 48.5072,
"eval_g2l_rougeLsum": 48.4844,
"eval_l2ex_cer": 83.6477,
"eval_l2ex_gen_len": 22.4625,
"eval_l2ex_rouge1": 33.2375,
"eval_l2ex_rouge2": 16.4943,
"eval_l2ex_rougeL": 29.1757,
"eval_l2ex_rougeLsum": 29.2794,
"eval_l2g_cer": 72.9254,
"eval_l2g_gen_len": 17.2116,
"eval_l2g_rouge1": 41.0375,
"eval_l2g_rouge2": 27.5603,
"eval_l2g_rougeL": 38.745,
"eval_l2g_rougeLsum": 38.787,
"eval_loss": 3.180062770843506,
"eval_runtime": 297.1459,
"eval_samples_per_second": 33.388,
"eval_steps_per_second": 0.525,
"step": 49848
},
{
"epoch": 9.003250270855904,
"grad_norm": 78.80842590332031,
"learning_rate": 0.00011334072721137046,
"loss": 3.0595,
"step": 49860
},
{
"epoch": 9.153304442036836,
"grad_norm": 138.7894287109375,
"learning_rate": 0.00010856006358457137,
"loss": 3.0096,
"step": 50691
},
{
"epoch": 9.303358613217767,
"grad_norm": 132.17127990722656,
"learning_rate": 0.00010382503236208064,
"loss": 3.0273,
"step": 51522
},
{
"epoch": 9.453412784398699,
"grad_norm": 130.38265991210938,
"learning_rate": 9.914081635005574e-05,
"loss": 3.0237,
"step": 52353
},
{
"epoch": 9.603466955579632,
"grad_norm": 100.80162811279297,
"learning_rate": 9.451254273403124e-05,
"loss": 3.0167,
"step": 53184
},
{
"epoch": 9.751173708920188,
"eval_g2l_cer": 44.0723,
"eval_g2l_gen_len": 4.1516,
"eval_g2l_rouge1": 48.7144,
"eval_g2l_rouge2": 37.9052,
"eval_g2l_rougeL": 48.5889,
"eval_g2l_rougeLsum": 48.5704,
"eval_l2ex_cer": 82.0577,
"eval_l2ex_gen_len": 22.3731,
"eval_l2ex_rouge1": 33.8214,
"eval_l2ex_rouge2": 17.2047,
"eval_l2ex_rougeL": 29.9782,
"eval_l2ex_rougeLsum": 30.0546,
"eval_l2g_cer": 72.335,
"eval_l2g_gen_len": 17.0699,
"eval_l2g_rouge1": 41.6605,
"eval_l2g_rouge2": 28.2593,
"eval_l2g_rougeL": 39.3968,
"eval_l2g_rougeLsum": 39.4309,
"eval_loss": 3.1734836101531982,
"eval_runtime": 297.1857,
"eval_samples_per_second": 33.383,
"eval_steps_per_second": 0.525,
"step": 54002
},
{
"epoch": 9.753521126760564,
"grad_norm": 98.55856323242188,
"learning_rate": 8.994527746687389e-05,
"loss": 3.0202,
"step": 54015
},
{
"epoch": 9.903575297941495,
"grad_norm": 110.90308380126953,
"learning_rate": 8.544401972376058e-05,
"loss": 3.0123,
"step": 54846
},
{
"epoch": 10.053629469122427,
"grad_norm": 103.5262451171875,
"learning_rate": 8.10136964302491e-05,
"loss": 3.0112,
"step": 55677
},
{
"epoch": 10.203683640303359,
"grad_norm": 73.44245147705078,
"learning_rate": 7.665915686943095e-05,
"loss": 2.9824,
"step": 56508
},
{
"epoch": 10.35373781148429,
"grad_norm": 77.93965148925781,
"learning_rate": 7.238516737406908e-05,
"loss": 2.9999,
"step": 57339
},
{
"epoch": 10.501263994221741,
"eval_g2l_cer": 44.1363,
"eval_g2l_gen_len": 4.1471,
"eval_g2l_rouge1": 48.6933,
"eval_g2l_rouge2": 38.0423,
"eval_g2l_rougeL": 48.565,
"eval_g2l_rougeLsum": 48.5648,
"eval_l2ex_cer": 81.2579,
"eval_l2ex_gen_len": 21.4666,
"eval_l2ex_rouge1": 33.958,
"eval_l2ex_rouge2": 16.8411,
"eval_l2ex_rougeL": 29.5656,
"eval_l2ex_rougeLsum": 29.6795,
"eval_l2g_cer": 71.0675,
"eval_l2g_gen_len": 16.2517,
"eval_l2g_rouge1": 41.5203,
"eval_l2g_rouge2": 28.0296,
"eval_l2g_rougeL": 39.1863,
"eval_l2g_rougeLsum": 39.2508,
"eval_loss": 3.1717426776885986,
"eval_runtime": 290.2937,
"eval_samples_per_second": 34.176,
"eval_steps_per_second": 0.537,
"step": 58156
},
{
"epoch": 10.503791982665222,
"grad_norm": 129.3556365966797,
"learning_rate": 6.81964061095297e-05,
"loss": 2.9888,
"step": 58170
},
{
"epoch": 10.653846153846153,
"grad_norm": 112.28192138671875,
"learning_rate": 6.409745795321991e-05,
"loss": 2.9878,
"step": 59001
},
{
"epoch": 10.803900325027085,
"grad_norm": 76.26856231689453,
"learning_rate": 6.009280947613472e-05,
"loss": 2.9817,
"step": 59832
},
{
"epoch": 10.953954496208016,
"grad_norm": 58.20437240600586,
"learning_rate": 5.618684403200737e-05,
"loss": 2.9851,
"step": 60663
},
{
"epoch": 11.10400866738895,
"grad_norm": 108.53790283203125,
"learning_rate": 5.238383695943713e-05,
"loss": 2.9823,
"step": 61494
},
{
"epoch": 11.251354279523294,
"eval_g2l_cer": 44.0289,
"eval_g2l_gen_len": 4.1275,
"eval_g2l_rouge1": 48.9057,
"eval_g2l_rouge2": 38.3159,
"eval_g2l_rougeL": 48.7647,
"eval_g2l_rougeLsum": 48.766,
"eval_l2ex_cer": 82.4492,
"eval_l2ex_gen_len": 22.9445,
"eval_l2ex_rouge1": 33.8799,
"eval_l2ex_rouge2": 16.7295,
"eval_l2ex_rougeL": 29.4575,
"eval_l2ex_rougeLsum": 29.6104,
"eval_l2g_cer": 71.7288,
"eval_l2g_gen_len": 16.7564,
"eval_l2g_rouge1": 41.5535,
"eval_l2g_rouge2": 28.1997,
"eval_l2g_rougeL": 39.2564,
"eval_l2g_rougeLsum": 39.323,
"eval_loss": 3.1673169136047363,
"eval_runtime": 295.6043,
"eval_samples_per_second": 33.562,
"eval_steps_per_second": 0.528,
"step": 62310
},
{
"epoch": 11.254062838569881,
"grad_norm": 78.87760162353516,
"learning_rate": 4.868795090224752e-05,
"loss": 2.9644,
"step": 62325
},
{
"epoch": 11.404117009750813,
"grad_norm": 63.00550079345703,
"learning_rate": 4.510323125319609e-05,
"loss": 2.9714,
"step": 63156
},
{
"epoch": 11.554171180931744,
"grad_norm": 107.51451110839844,
"learning_rate": 4.1633601726023533e-05,
"loss": 2.972,
"step": 63987
},
{
"epoch": 11.704225352112676,
"grad_norm": 66.15150451660156,
"learning_rate": 3.82828600606881e-05,
"loss": 2.9604,
"step": 64818
},
{
"epoch": 11.854279523293608,
"grad_norm": 65.40077209472656,
"learning_rate": 3.505467386648718e-05,
"loss": 2.9667,
"step": 65649
},
{
"epoch": 12.001444564824846,
"eval_g2l_cer": 43.9512,
"eval_g2l_gen_len": 4.0624,
"eval_g2l_rouge1": 48.889,
"eval_g2l_rouge2": 38.1288,
"eval_g2l_rougeL": 48.7444,
"eval_g2l_rougeLsum": 48.751,
"eval_l2ex_cer": 83.3432,
"eval_l2ex_gen_len": 22.5889,
"eval_l2ex_rouge1": 33.672,
"eval_l2ex_rouge2": 16.682,
"eval_l2ex_rougeL": 29.3383,
"eval_l2ex_rougeLsum": 29.4381,
"eval_l2g_cer": 72.969,
"eval_l2g_gen_len": 17.2907,
"eval_l2g_rouge1": 41.384,
"eval_l2g_rouge2": 28.0121,
"eval_l2g_rougeL": 39.0564,
"eval_l2g_rougeLsum": 39.1247,
"eval_loss": 3.163238286972046,
"eval_runtime": 296.9381,
"eval_samples_per_second": 33.411,
"eval_steps_per_second": 0.525,
"step": 66464
},
{
"epoch": 12.00433369447454,
"grad_norm": 80.00790405273438,
"learning_rate": 3.195257660761534e-05,
"loss": 2.9548,
"step": 66480
},
{
"epoch": 12.15438786565547,
"grad_norm": 50.12080001831055,
"learning_rate": 2.897996373555297e-05,
"loss": 2.9599,
"step": 67311
},
{
"epoch": 12.304442036836402,
"grad_norm": 137.98057556152344,
"learning_rate": 2.6140088972519277e-05,
"loss": 2.9426,
"step": 68142
},
{
"epoch": 12.454496208017336,
"grad_norm": 74.86833190917969,
"learning_rate": 2.343606075005708e-05,
"loss": 2.9445,
"step": 68973
},
{
"epoch": 12.604550379198267,
"grad_norm": 85.49880981445312,
"learning_rate": 2.0870838806648037e-05,
"loss": 2.9445,
"step": 69804
},
{
"epoch": 12.751534850126399,
"eval_g2l_cer": 44.0472,
"eval_g2l_gen_len": 4.109,
"eval_g2l_rouge1": 48.9965,
"eval_g2l_rouge2": 38.1664,
"eval_g2l_rougeL": 48.8442,
"eval_g2l_rougeLsum": 48.8419,
"eval_l2ex_cer": 81.2857,
"eval_l2ex_gen_len": 21.2364,
"eval_l2ex_rouge1": 34.1658,
"eval_l2ex_rouge2": 17.3387,
"eval_l2ex_rougeL": 29.9082,
"eval_l2ex_rougeLsum": 30.0362,
"eval_l2g_cer": 70.6762,
"eval_l2g_gen_len": 15.9381,
"eval_l2g_rouge1": 41.6215,
"eval_l2g_rouge2": 28.1386,
"eval_l2g_rougeL": 39.3091,
"eval_l2g_rougeLsum": 39.3715,
"eval_loss": 3.163139581680298,
"eval_runtime": 291.4682,
"eval_samples_per_second": 34.038,
"eval_steps_per_second": 0.535,
"step": 70618
},
{
"epoch": 12.754604550379199,
"grad_norm": 60.18415451049805,
"learning_rate": 1.844723094808244e-05,
"loss": 2.9515,
"step": 70635
},
{
"epoch": 12.90465872156013,
"grad_norm": 39.721649169921875,
"learning_rate": 1.6167889974129134e-05,
"loss": 2.9545,
"step": 71466
},
{
"epoch": 13.054712892741062,
"grad_norm": 49.22962188720703,
"learning_rate": 1.4035310774870041e-05,
"loss": 2.9433,
"step": 72297
},
{
"epoch": 13.204767063921993,
"grad_norm": 55.78800964355469,
"learning_rate": 1.205182759987737e-05,
"loss": 2.9241,
"step": 73128
},
{
"epoch": 13.354821235102925,
"grad_norm": 69.98873138427734,
"learning_rate": 1.0219611503222213e-05,
"loss": 2.939,
"step": 73959
},
{
"epoch": 13.501625135427952,
"eval_g2l_cer": 43.9078,
"eval_g2l_gen_len": 4.0981,
"eval_g2l_rouge1": 48.969,
"eval_g2l_rouge2": 38.1559,
"eval_g2l_rougeL": 48.8152,
"eval_g2l_rougeLsum": 48.8193,
"eval_l2ex_cer": 81.5515,
"eval_l2ex_gen_len": 21.7205,
"eval_l2ex_rouge1": 33.9427,
"eval_l2ex_rouge2": 17.0266,
"eval_l2ex_rougeL": 29.5977,
"eval_l2ex_rougeLsum": 29.7301,
"eval_l2g_cer": 70.7346,
"eval_l2g_gen_len": 16.1531,
"eval_l2g_rouge1": 41.7374,
"eval_l2g_rouge2": 28.1793,
"eval_l2g_rougeL": 39.3779,
"eval_l2g_rougeLsum": 39.4426,
"eval_loss": 3.1640655994415283,
"eval_runtime": 292.5347,
"eval_samples_per_second": 33.914,
"eval_steps_per_second": 0.533,
"step": 74772
},
{
"epoch": 13.504875406283857,
"grad_norm": 69.49555206298828,
"learning_rate": 8.54066796711184e-06,
"loss": 2.9465,
"step": 74790
},
{
"epoch": 13.654929577464788,
"grad_norm": 73.59809112548828,
"learning_rate": 7.016834706756168e-06,
"loss": 2.9391,
"step": 75621
},
{
"epoch": 13.804983748645721,
"grad_norm": 64.26115417480469,
"learning_rate": 5.649779658866368e-06,
"loss": 2.9356,
"step": 76452
},
{
"epoch": 13.955037919826653,
"grad_norm": 37.47693634033203,
"learning_rate": 4.440999155987467e-06,
"loss": 2.9523,
"step": 77283
},
{
"epoch": 14.105092091007585,
"grad_norm": 97.73049926757812,
"learning_rate": 3.391816288662864e-06,
"loss": 2.9394,
"step": 78114
},
{
"epoch": 14.251715420729505,
"eval_g2l_cer": 44.1271,
"eval_g2l_gen_len": 4.1016,
"eval_g2l_rouge1": 48.8563,
"eval_g2l_rouge2": 38.0804,
"eval_g2l_rougeL": 48.7034,
"eval_g2l_rougeLsum": 48.679,
"eval_l2ex_cer": 81.746,
"eval_l2ex_gen_len": 21.9486,
"eval_l2ex_rouge1": 34.1479,
"eval_l2ex_rouge2": 17.1381,
"eval_l2ex_rougeL": 29.7996,
"eval_l2ex_rougeLsum": 29.9184,
"eval_l2g_cer": 71.184,
"eval_l2g_gen_len": 16.3747,
"eval_l2g_rouge1": 41.7919,
"eval_l2g_rouge2": 28.2088,
"eval_l2g_rougeL": 39.4284,
"eval_l2g_rougeLsum": 39.4987,
"eval_loss": 3.1630301475524902,
"eval_runtime": 294.6002,
"eval_samples_per_second": 33.676,
"eval_steps_per_second": 0.53,
"step": 78926
}
],
"logging_steps": 831,
"max_steps": 83070,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 4154,
"total_flos": 1.4551992475225948e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}