NeoMT5-base / trainer_state.json
snizio's picture
Upload folder using huggingface_hub
40240a9 verified
{
"best_metric": 3.5637874603271484,
"best_model_checkpoint": "checkpoints/mt5-base/checkpoint-37386",
"epoch": 13.501625135427952,
"eval_steps": 2077,
"global_step": 37386,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14987360057782592,
"eval_g2l_cer": 286.0084,
"eval_g2l_gen_len": 8.1826,
"eval_g2l_rouge1": 5.5622,
"eval_g2l_rouge2": 1.1913,
"eval_g2l_rougeL": 5.4996,
"eval_g2l_rougeLsum": 5.5114,
"eval_l2ex_cer": 86.6247,
"eval_l2ex_gen_len": 7.9383,
"eval_l2ex_rouge1": 16.8571,
"eval_l2ex_rouge2": 5.0595,
"eval_l2ex_rougeL": 15.4605,
"eval_l2ex_rougeLsum": 15.4778,
"eval_l2g_cer": 87.9265,
"eval_l2g_gen_len": 5.5834,
"eval_l2g_rouge1": 12.3664,
"eval_l2g_rouge2": 1.6791,
"eval_l2g_rougeL": 11.57,
"eval_l2g_rougeLsum": 11.5871,
"eval_loss": 8.359071731567383,
"eval_runtime": 145.7254,
"eval_samples_per_second": 68.08,
"eval_steps_per_second": 2.134,
"step": 415
},
{
"epoch": 0.15023474178403756,
"grad_norm": 1659.9119873046875,
"learning_rate": 2.0038535645472063e-05,
"loss": 15.088,
"step": 416
},
{
"epoch": 0.3004694835680751,
"grad_norm": 8.369742393493652,
"learning_rate": 4.0077071290944125e-05,
"loss": 7.1403,
"step": 832
},
{
"epoch": 0.4507042253521127,
"grad_norm": 1.8832136392593384,
"learning_rate": 6.0115606936416195e-05,
"loss": 5.3841,
"step": 1248
},
{
"epoch": 0.6009389671361502,
"grad_norm": 23.656179428100586,
"learning_rate": 8.015414258188825e-05,
"loss": 4.9928,
"step": 1664
},
{
"epoch": 0.7500902853015529,
"eval_g2l_cer": 59.4485,
"eval_g2l_gen_len": 2.9556,
"eval_g2l_rouge1": 27.1975,
"eval_g2l_rouge2": 17.2281,
"eval_g2l_rougeL": 27.1916,
"eval_g2l_rougeLsum": 27.2117,
"eval_l2ex_cer": 95.5123,
"eval_l2ex_gen_len": 23.6742,
"eval_l2ex_rouge1": 25.7497,
"eval_l2ex_rouge2": 11.1192,
"eval_l2ex_rougeL": 22.9461,
"eval_l2ex_rougeLsum": 22.9441,
"eval_l2g_cer": 83.2502,
"eval_l2g_gen_len": 15.9711,
"eval_l2g_rouge1": 27.2934,
"eval_l2g_rouge2": 14.8195,
"eval_l2g_rougeL": 25.9582,
"eval_l2g_rougeLsum": 25.9617,
"eval_loss": 4.2178425788879395,
"eval_runtime": 203.8137,
"eval_samples_per_second": 48.677,
"eval_steps_per_second": 1.526,
"step": 2077
},
{
"epoch": 0.7511737089201878,
"grad_norm": 1.5714240074157715,
"learning_rate": 9.999999747638704e-05,
"loss": 4.7614,
"step": 2080
},
{
"epoch": 0.9014084507042254,
"grad_norm": 2.348027467727661,
"learning_rate": 9.997217976013284e-05,
"loss": 4.6037,
"step": 2496
},
{
"epoch": 1.051643192488263,
"grad_norm": 2.6275577545166016,
"learning_rate": 9.98898067640237e-05,
"loss": 4.5136,
"step": 2912
},
{
"epoch": 1.2018779342723005,
"grad_norm": 1.1122652292251587,
"learning_rate": 9.975296886788363e-05,
"loss": 4.4057,
"step": 3328
},
{
"epoch": 1.352112676056338,
"grad_norm": 1.2248876094818115,
"learning_rate": 9.956181621053908e-05,
"loss": 4.3513,
"step": 3744
},
{
"epoch": 1.500180570603106,
"eval_g2l_cer": 56.9353,
"eval_g2l_gen_len": 3.093,
"eval_g2l_rouge1": 32.8366,
"eval_g2l_rouge2": 24.3793,
"eval_g2l_rougeL": 32.7964,
"eval_g2l_rougeLsum": 32.7681,
"eval_l2ex_cer": 84.2827,
"eval_l2ex_gen_len": 20.4604,
"eval_l2ex_rouge1": 28.5353,
"eval_l2ex_rouge2": 12.5551,
"eval_l2ex_rougeL": 25.5058,
"eval_l2ex_rougeLsum": 25.5427,
"eval_l2g_cer": 81.1104,
"eval_l2g_gen_len": 18.0072,
"eval_l2g_rouge1": 32.688,
"eval_l2g_rouge2": 18.9467,
"eval_l2g_rougeL": 30.7295,
"eval_l2g_rougeLsum": 30.7676,
"eval_loss": 3.949233293533325,
"eval_runtime": 197.691,
"eval_samples_per_second": 50.184,
"eval_steps_per_second": 1.573,
"step": 4154
},
{
"epoch": 1.5023474178403755,
"grad_norm": 1.4981008768081665,
"learning_rate": 9.931655852508637e-05,
"loss": 4.3061,
"step": 4160
},
{
"epoch": 1.652582159624413,
"grad_norm": 1.050997018814087,
"learning_rate": 9.901746490877203e-05,
"loss": 4.2525,
"step": 4576
},
{
"epoch": 1.8028169014084507,
"grad_norm": 0.8377422094345093,
"learning_rate": 9.866486352773886e-05,
"loss": 4.2289,
"step": 4992
},
{
"epoch": 1.9530516431924883,
"grad_norm": 0.968815267086029,
"learning_rate": 9.82591412569612e-05,
"loss": 4.1958,
"step": 5408
},
{
"epoch": 2.103286384976526,
"grad_norm": 0.9952152967453003,
"learning_rate": 9.780074325576496e-05,
"loss": 4.1187,
"step": 5824
},
{
"epoch": 2.2502708559046587,
"eval_g2l_cer": 53.8806,
"eval_g2l_gen_len": 3.0548,
"eval_g2l_rouge1": 35.439,
"eval_g2l_rouge2": 27.2433,
"eval_g2l_rougeL": 35.4384,
"eval_g2l_rougeLsum": 35.3985,
"eval_l2ex_cer": 89.3431,
"eval_l2ex_gen_len": 23.4573,
"eval_l2ex_rouge1": 27.8815,
"eval_l2ex_rouge2": 12.1568,
"eval_l2ex_rougeL": 24.5796,
"eval_l2ex_rougeLsum": 24.6286,
"eval_l2g_cer": 78.589,
"eval_l2g_gen_len": 17.4946,
"eval_l2g_rouge1": 35.6236,
"eval_l2g_rouge2": 22.8027,
"eval_l2g_rougeL": 33.8966,
"eval_l2g_rougeLsum": 33.9001,
"eval_loss": 3.8344309329986572,
"eval_runtime": 202.9852,
"eval_samples_per_second": 48.875,
"eval_steps_per_second": 1.532,
"step": 6231
},
{
"epoch": 2.2535211267605635,
"grad_norm": 24.567110061645508,
"learning_rate": 9.72901724793979e-05,
"loss": 4.0993,
"step": 6240
},
{
"epoch": 2.403755868544601,
"grad_norm": 0.9726364612579346,
"learning_rate": 9.672798912718604e-05,
"loss": 4.0734,
"step": 6656
},
{
"epoch": 2.5539906103286385,
"grad_norm": 0.9216151833534241,
"learning_rate": 9.611481002788184e-05,
"loss": 4.0584,
"step": 7072
},
{
"epoch": 2.704225352112676,
"grad_norm": 0.7880883812904358,
"learning_rate": 9.545130796287832e-05,
"loss": 4.0312,
"step": 7488
},
{
"epoch": 2.8544600938967135,
"grad_norm": 0.9635422229766846,
"learning_rate": 9.473821092803199e-05,
"loss": 4.0046,
"step": 7904
},
{
"epoch": 3.0003611412062114,
"eval_g2l_cer": 52.062,
"eval_g2l_gen_len": 3.0702,
"eval_g2l_rouge1": 36.8811,
"eval_g2l_rouge2": 28.8156,
"eval_g2l_rougeL": 36.8925,
"eval_g2l_rougeLsum": 36.8317,
"eval_l2ex_cer": 90.1083,
"eval_l2ex_gen_len": 22.4645,
"eval_l2ex_rouge1": 27.5056,
"eval_l2ex_rouge2": 12.5248,
"eval_l2ex_rougeL": 24.4085,
"eval_l2ex_rougeLsum": 24.4463,
"eval_l2g_cer": 78.1779,
"eval_l2g_gen_len": 17.8095,
"eval_l2g_rouge1": 36.8332,
"eval_l2g_rouge2": 23.9422,
"eval_l2g_rougeL": 34.9672,
"eval_l2g_rougeLsum": 34.995,
"eval_loss": 3.7595808506011963,
"eval_runtime": 202.3409,
"eval_samples_per_second": 49.031,
"eval_steps_per_second": 1.537,
"step": 8308
},
{
"epoch": 3.004694835680751,
"grad_norm": 0.900855541229248,
"learning_rate": 9.397630133490413e-05,
"loss": 3.992,
"step": 8320
},
{
"epoch": 3.1549295774647885,
"grad_norm": 0.8881470561027527,
"learning_rate": 9.316641515229741e-05,
"loss": 3.9362,
"step": 8736
},
{
"epoch": 3.3051643192488265,
"grad_norm": 0.7969784140586853,
"learning_rate": 9.230944098902894e-05,
"loss": 3.9143,
"step": 9152
},
{
"epoch": 3.455399061032864,
"grad_norm": 0.8603357672691345,
"learning_rate": 9.1406319118947e-05,
"loss": 3.9162,
"step": 9568
},
{
"epoch": 3.6056338028169015,
"grad_norm": 0.9974511861801147,
"learning_rate": 9.045804044926044e-05,
"loss": 3.8987,
"step": 9984
},
{
"epoch": 3.7504514265077646,
"eval_g2l_cer": 50.7917,
"eval_g2l_gen_len": 3.0031,
"eval_g2l_rouge1": 37.7135,
"eval_g2l_rouge2": 29.9526,
"eval_g2l_rougeL": 37.7649,
"eval_g2l_rougeLsum": 37.7041,
"eval_l2ex_cer": 86.8671,
"eval_l2ex_gen_len": 22.2271,
"eval_l2ex_rouge1": 28.7692,
"eval_l2ex_rouge2": 12.8536,
"eval_l2ex_rougeL": 25.3768,
"eval_l2ex_rougeLsum": 25.4158,
"eval_l2g_cer": 73.3411,
"eval_l2g_gen_len": 15.6692,
"eval_l2g_rouge1": 37.5152,
"eval_l2g_rouge2": 24.5536,
"eval_l2g_rougeL": 35.5225,
"eval_l2g_rougeLsum": 35.5437,
"eval_loss": 3.7121169567108154,
"eval_runtime": 192.3527,
"eval_samples_per_second": 51.577,
"eval_steps_per_second": 1.617,
"step": 10385
},
{
"epoch": 3.755868544600939,
"grad_norm": 0.8458616733551025,
"learning_rate": 8.94656454333133e-05,
"loss": 3.8883,
"step": 10400
},
{
"epoch": 3.9061032863849765,
"grad_norm": 3.1263327598571777,
"learning_rate": 8.843022292899726e-05,
"loss": 3.8775,
"step": 10816
},
{
"epoch": 4.056338028169014,
"grad_norm": 1.013489842414856,
"learning_rate": 8.735290900405437e-05,
"loss": 3.8514,
"step": 11232
},
{
"epoch": 4.206572769953052,
"grad_norm": 0.9674685001373291,
"learning_rate": 8.623488568958123e-05,
"loss": 3.7962,
"step": 11648
},
{
"epoch": 4.356807511737089,
"grad_norm": 1.0607421398162842,
"learning_rate": 8.507737968310197e-05,
"loss": 3.8043,
"step": 12064
},
{
"epoch": 4.500541711809317,
"eval_g2l_cer": 50.088,
"eval_g2l_gen_len": 3.0488,
"eval_g2l_rouge1": 38.7702,
"eval_g2l_rouge2": 30.6004,
"eval_g2l_rougeL": 38.7959,
"eval_g2l_rougeLsum": 38.7454,
"eval_l2ex_cer": 84.5143,
"eval_l2ex_gen_len": 20.52,
"eval_l2ex_rouge1": 28.9181,
"eval_l2ex_rouge2": 13.2853,
"eval_l2ex_rougeL": 25.6409,
"eval_l2ex_rougeLsum": 25.6588,
"eval_l2g_cer": 72.4949,
"eval_l2g_gen_len": 15.2432,
"eval_l2g_rouge1": 37.6479,
"eval_l2g_rouge2": 24.833,
"eval_l2g_rougeL": 35.7678,
"eval_l2g_rougeLsum": 35.776,
"eval_loss": 3.674677848815918,
"eval_runtime": 190.2532,
"eval_samples_per_second": 52.146,
"eval_steps_per_second": 1.635,
"step": 12462
},
{
"epoch": 4.507042253521127,
"grad_norm": 0.9242987632751465,
"learning_rate": 8.388166100263313e-05,
"loss": 3.804,
"step": 12480
},
{
"epoch": 4.657276995305164,
"grad_norm": 0.8233311772346497,
"learning_rate": 8.264904159321721e-05,
"loss": 3.7844,
"step": 12896
},
{
"epoch": 4.807511737089202,
"grad_norm": 1.918661117553711,
"learning_rate": 8.138087388745395e-05,
"loss": 3.7948,
"step": 13312
},
{
"epoch": 4.957746478873239,
"grad_norm": 0.8277648091316223,
"learning_rate": 8.00785493216083e-05,
"loss": 3.7951,
"step": 13728
},
{
"epoch": 5.107981220657277,
"grad_norm": 1.0518523454666138,
"learning_rate": 7.874349680892367e-05,
"loss": 3.7423,
"step": 14144
},
{
"epoch": 5.250631997110871,
"eval_g2l_cer": 49.743,
"eval_g2l_gen_len": 3.0201,
"eval_g2l_rouge1": 38.8263,
"eval_g2l_rouge2": 31.1673,
"eval_g2l_rougeL": 38.8286,
"eval_g2l_rougeLsum": 38.7898,
"eval_l2ex_cer": 86.565,
"eval_l2ex_gen_len": 21.7523,
"eval_l2ex_rouge1": 28.4984,
"eval_l2ex_rouge2": 13.072,
"eval_l2ex_rougeL": 25.2667,
"eval_l2ex_rougeLsum": 25.2757,
"eval_l2g_cer": 73.2917,
"eval_l2g_gen_len": 16.0011,
"eval_l2g_rouge1": 38.0438,
"eval_l2g_rouge2": 25.3209,
"eval_l2g_rougeL": 36.1091,
"eval_l2g_rougeLsum": 36.1243,
"eval_loss": 3.649608850479126,
"eval_runtime": 197.4229,
"eval_samples_per_second": 50.253,
"eval_steps_per_second": 1.575,
"step": 14539
},
{
"epoch": 5.258215962441315,
"grad_norm": 0.8540360331535339,
"learning_rate": 7.737718117181538e-05,
"loss": 3.7126,
"step": 14560
},
{
"epoch": 5.408450704225352,
"grad_norm": 0.9189392328262329,
"learning_rate": 7.598110153466441e-05,
"loss": 3.7223,
"step": 14976
},
{
"epoch": 5.55868544600939,
"grad_norm": 0.92618727684021,
"learning_rate": 7.45567896789749e-05,
"loss": 3.7139,
"step": 15392
},
{
"epoch": 5.708920187793427,
"grad_norm": 0.7882264852523804,
"learning_rate": 7.310580836270044e-05,
"loss": 3.7179,
"step": 15808
},
{
"epoch": 5.859154929577465,
"grad_norm": 0.8529959321022034,
"learning_rate": 7.162974960558259e-05,
"loss": 3.7121,
"step": 16224
},
{
"epoch": 6.000722282412423,
"eval_g2l_cer": 49.3934,
"eval_g2l_gen_len": 3.0096,
"eval_g2l_rouge1": 39.4408,
"eval_g2l_rouge2": 31.7057,
"eval_g2l_rougeL": 39.4639,
"eval_g2l_rougeLsum": 39.4161,
"eval_l2ex_cer": 86.119,
"eval_l2ex_gen_len": 20.7112,
"eval_l2ex_rouge1": 28.8739,
"eval_l2ex_rouge2": 13.2661,
"eval_l2ex_rougeL": 25.7042,
"eval_l2ex_rougeLsum": 25.7118,
"eval_l2g_cer": 73.625,
"eval_l2g_gen_len": 15.9897,
"eval_l2g_rouge1": 38.1171,
"eval_l2g_rouge2": 25.6405,
"eval_l2g_rougeL": 36.2592,
"eval_l2g_rougeLsum": 36.2666,
"eval_loss": 3.6273715496063232,
"eval_runtime": 193.9276,
"eval_samples_per_second": 51.158,
"eval_steps_per_second": 1.604,
"step": 16616
},
{
"epoch": 6.009389671361502,
"grad_norm": 0.7976297736167908,
"learning_rate": 7.013023294238368e-05,
"loss": 3.7191,
"step": 16640
},
{
"epoch": 6.15962441314554,
"grad_norm": 0.8516309261322021,
"learning_rate": 6.860890364592963e-05,
"loss": 3.6428,
"step": 17056
},
{
"epoch": 6.309859154929577,
"grad_norm": 0.9273515343666077,
"learning_rate": 6.706743092191335e-05,
"loss": 3.6566,
"step": 17472
},
{
"epoch": 6.460093896713615,
"grad_norm": 0.932829737663269,
"learning_rate": 6.550750607743873e-05,
"loss": 3.6627,
"step": 17888
},
{
"epoch": 6.610328638497653,
"grad_norm": 0.9968202114105225,
"learning_rate": 6.393084066531485e-05,
"loss": 3.6652,
"step": 18304
},
{
"epoch": 6.750812567713976,
"eval_g2l_cer": 49.5579,
"eval_g2l_gen_len": 2.9938,
"eval_g2l_rouge1": 39.6581,
"eval_g2l_rouge2": 32.026,
"eval_g2l_rougeL": 39.6932,
"eval_g2l_rougeLsum": 39.6518,
"eval_l2ex_cer": 88.4427,
"eval_l2ex_gen_len": 23.11,
"eval_l2ex_rouge1": 28.1485,
"eval_l2ex_rouge2": 12.4558,
"eval_l2ex_rougeL": 24.9414,
"eval_l2ex_rougeLsum": 24.9605,
"eval_l2g_cer": 73.3296,
"eval_l2g_gen_len": 16.3263,
"eval_l2g_rouge1": 38.4506,
"eval_l2g_rouge2": 25.7696,
"eval_l2g_rougeL": 36.5748,
"eval_l2g_rougeLsum": 36.6091,
"eval_loss": 3.6120047569274902,
"eval_runtime": 197.9501,
"eval_samples_per_second": 50.119,
"eval_steps_per_second": 1.571,
"step": 18693
},
{
"epoch": 6.76056338028169,
"grad_norm": 0.7791869640350342,
"learning_rate": 6.233916460613673e-05,
"loss": 3.6614,
"step": 18720
},
{
"epoch": 6.910798122065728,
"grad_norm": 0.9385781288146973,
"learning_rate": 6.0734224290212784e-05,
"loss": 3.6471,
"step": 19136
},
{
"epoch": 7.061032863849765,
"grad_norm": 0.8267916440963745,
"learning_rate": 5.9117780661421754e-05,
"loss": 3.6264,
"step": 19552
},
{
"epoch": 7.211267605633803,
"grad_norm": 0.794131875038147,
"learning_rate": 5.7491607285101345e-05,
"loss": 3.6015,
"step": 19968
},
{
"epoch": 7.36150234741784,
"grad_norm": 0.8748852610588074,
"learning_rate": 5.585748840208869e-05,
"loss": 3.5993,
"step": 20384
},
{
"epoch": 7.500902853015529,
"eval_g2l_cer": 50.088,
"eval_g2l_gen_len": 3.0582,
"eval_g2l_rouge1": 39.9874,
"eval_g2l_rouge2": 32.4432,
"eval_g2l_rougeL": 40.0195,
"eval_g2l_rougeLsum": 39.9365,
"eval_l2ex_cer": 87.6165,
"eval_l2ex_gen_len": 22.7133,
"eval_l2ex_rouge1": 28.1937,
"eval_l2ex_rouge2": 12.5673,
"eval_l2ex_rougeL": 24.9397,
"eval_l2ex_rougeLsum": 24.921,
"eval_l2g_cer": 72.7284,
"eval_l2g_gen_len": 15.6759,
"eval_l2g_rouge1": 38.4813,
"eval_l2g_rouge2": 25.936,
"eval_l2g_rougeL": 36.5693,
"eval_l2g_rougeLsum": 36.5729,
"eval_loss": 3.6013987064361572,
"eval_runtime": 195.438,
"eval_samples_per_second": 50.763,
"eval_steps_per_second": 1.591,
"step": 20770
},
{
"epoch": 7.511737089201878,
"grad_norm": 0.9019631743431091,
"learning_rate": 5.4217216971047445e-05,
"loss": 3.5978,
"step": 20800
},
{
"epoch": 7.661971830985916,
"grad_norm": 0.8872570395469666,
"learning_rate": 5.257259270122993e-05,
"loss": 3.6113,
"step": 21216
},
{
"epoch": 7.812206572769953,
"grad_norm": 0.7394893169403076,
"learning_rate": 5.0925420077832285e-05,
"loss": 3.593,
"step": 21632
},
{
"epoch": 7.962441314553991,
"grad_norm": 0.8534842133522034,
"learning_rate": 4.927750638210947e-05,
"loss": 3.5963,
"step": 22048
},
{
"epoch": 8.112676056338028,
"grad_norm": 0.9047814607620239,
"learning_rate": 4.7630659708422666e-05,
"loss": 3.5722,
"step": 22464
},
{
"epoch": 8.250993138317082,
"eval_g2l_cer": 49.5716,
"eval_g2l_gen_len": 3.0388,
"eval_g2l_rouge1": 40.4088,
"eval_g2l_rouge2": 32.7272,
"eval_g2l_rougeL": 40.4374,
"eval_g2l_rougeLsum": 40.3677,
"eval_l2ex_cer": 83.5858,
"eval_l2ex_gen_len": 20.4851,
"eval_l2ex_rouge1": 29.084,
"eval_l2ex_rouge2": 12.9208,
"eval_l2ex_rougeL": 25.6832,
"eval_l2ex_rougeLsum": 25.7033,
"eval_l2g_cer": 72.1741,
"eval_l2g_gen_len": 15.6461,
"eval_l2g_rouge1": 38.8628,
"eval_l2g_rouge2": 26.1912,
"eval_l2g_rougeL": 36.9072,
"eval_l2g_rougeLsum": 36.9086,
"eval_loss": 3.5901942253112793,
"eval_runtime": 190.412,
"eval_samples_per_second": 52.103,
"eval_steps_per_second": 1.633,
"step": 22847
},
{
"epoch": 8.262910798122066,
"grad_norm": 0.8366677761077881,
"learning_rate": 4.598668698039414e-05,
"loss": 3.5641,
"step": 22880
},
{
"epoch": 8.413145539906104,
"grad_norm": 0.8628195524215698,
"learning_rate": 4.4347391968347015e-05,
"loss": 3.5702,
"step": 23296
},
{
"epoch": 8.56338028169014,
"grad_norm": 0.9060849547386169,
"learning_rate": 4.27145733102046e-05,
"loss": 3.5508,
"step": 23712
},
{
"epoch": 8.713615023474178,
"grad_norm": 0.8726539015769958,
"learning_rate": 4.109002253802116e-05,
"loss": 3.5637,
"step": 24128
},
{
"epoch": 8.863849765258216,
"grad_norm": 0.9154978394508362,
"learning_rate": 3.947552211230913e-05,
"loss": 3.5435,
"step": 24544
},
{
"epoch": 9.001083423618635,
"eval_g2l_cer": 48.6326,
"eval_g2l_gen_len": 3.008,
"eval_g2l_rouge1": 40.6427,
"eval_g2l_rouge2": 33.0447,
"eval_g2l_rougeL": 40.6651,
"eval_g2l_rougeLsum": 40.6197,
"eval_l2ex_cer": 85.6816,
"eval_l2ex_gen_len": 20.9753,
"eval_l2ex_rouge1": 28.5827,
"eval_l2ex_rouge2": 12.8213,
"eval_l2ex_rougeL": 25.352,
"eval_l2ex_rougeLsum": 25.3642,
"eval_l2g_cer": 72.7802,
"eval_l2g_gen_len": 15.8102,
"eval_l2g_rouge1": 38.814,
"eval_l2g_rouge2": 26.1373,
"eval_l2g_rougeL": 36.8943,
"eval_l2g_rougeLsum": 36.9272,
"eval_loss": 3.5814104080200195,
"eval_runtime": 193.5202,
"eval_samples_per_second": 51.266,
"eval_steps_per_second": 1.607,
"step": 24924
},
{
"epoch": 9.014084507042254,
"grad_norm": 0.9910312294960022,
"learning_rate": 3.7872843466319744e-05,
"loss": 3.5601,
"step": 24960
},
{
"epoch": 9.164319248826292,
"grad_norm": 0.913223922252655,
"learning_rate": 3.6283745062422726e-05,
"loss": 3.5156,
"step": 25376
},
{
"epoch": 9.314553990610328,
"grad_norm": 0.9026065468788147,
"learning_rate": 3.470997046271774e-05,
"loss": 3.5337,
"step": 25792
},
{
"epoch": 9.464788732394366,
"grad_norm": 0.9726517796516418,
"learning_rate": 3.315324641599434e-05,
"loss": 3.5294,
"step": 26208
},
{
"epoch": 9.615023474178404,
"grad_norm": 0.954593300819397,
"learning_rate": 3.161528096313964e-05,
"loss": 3.5242,
"step": 26624
},
{
"epoch": 9.751173708920188,
"eval_g2l_cer": 48.3196,
"eval_g2l_gen_len": 3.0196,
"eval_g2l_rouge1": 41.1733,
"eval_g2l_rouge2": 33.4761,
"eval_g2l_rougeL": 41.172,
"eval_g2l_rougeLsum": 41.1111,
"eval_l2ex_cer": 86.3469,
"eval_l2ex_gen_len": 21.333,
"eval_l2ex_rouge1": 28.6196,
"eval_l2ex_rouge2": 12.797,
"eval_l2ex_rougeL": 25.331,
"eval_l2ex_rougeLsum": 25.3251,
"eval_l2g_cer": 71.8519,
"eval_l2g_gen_len": 15.5771,
"eval_l2g_rouge1": 38.9877,
"eval_l2g_rouge2": 26.3016,
"eval_l2g_rougeL": 36.97,
"eval_l2g_rougeLsum": 37.0109,
"eval_loss": 3.5751187801361084,
"eval_runtime": 190.5769,
"eval_samples_per_second": 52.058,
"eval_steps_per_second": 1.632,
"step": 27001
},
{
"epoch": 9.765258215962442,
"grad_norm": 0.7817335724830627,
"learning_rate": 3.00977615630722e-05,
"loss": 3.5332,
"step": 27040
},
{
"epoch": 9.915492957746478,
"grad_norm": 0.8576836585998535,
"learning_rate": 2.8602353241258667e-05,
"loss": 3.5247,
"step": 27456
},
{
"epoch": 10.065727699530516,
"grad_norm": 0.924045741558075,
"learning_rate": 2.7130696762844198e-05,
"loss": 3.5171,
"step": 27872
},
{
"epoch": 10.215962441314554,
"grad_norm": 0.9701129198074341,
"learning_rate": 2.568440683240166e-05,
"loss": 3.4886,
"step": 28288
},
{
"epoch": 10.366197183098592,
"grad_norm": 0.8473976850509644,
"learning_rate": 2.426507032227427e-05,
"loss": 3.5134,
"step": 28704
},
{
"epoch": 10.501263994221741,
"eval_g2l_cer": 48.8336,
"eval_g2l_gen_len": 3.0502,
"eval_g2l_rouge1": 41.0241,
"eval_g2l_rouge2": 33.2994,
"eval_g2l_rougeL": 41.0374,
"eval_g2l_rougeLsum": 40.9554,
"eval_l2ex_cer": 85.2795,
"eval_l2ex_gen_len": 21.6999,
"eval_l2ex_rouge1": 28.6576,
"eval_l2ex_rouge2": 12.5848,
"eval_l2ex_rougeL": 25.1057,
"eval_l2ex_rougeLsum": 25.1478,
"eval_l2g_cer": 71.5555,
"eval_l2g_gen_len": 15.5923,
"eval_l2g_rouge1": 39.111,
"eval_l2g_rouge2": 26.3632,
"eval_l2g_rougeL": 37.134,
"eval_l2g_rougeLsum": 37.1562,
"eval_loss": 3.5716097354888916,
"eval_runtime": 190.1354,
"eval_samples_per_second": 52.179,
"eval_steps_per_second": 1.636,
"step": 29078
},
{
"epoch": 10.51643192488263,
"grad_norm": 0.9222161769866943,
"learning_rate": 2.2874244531456016e-05,
"loss": 3.4995,
"step": 29120
},
{
"epoch": 10.666666666666666,
"grad_norm": 0.8834406137466431,
"learning_rate": 2.1513455476919875e-05,
"loss": 3.5005,
"step": 29536
},
{
"epoch": 10.816901408450704,
"grad_norm": 1.2534151077270508,
"learning_rate": 2.0184196219268805e-05,
"loss": 3.4956,
"step": 29952
},
{
"epoch": 10.967136150234742,
"grad_norm": 1.0579476356506348,
"learning_rate": 1.8887925224546575e-05,
"loss": 3.4984,
"step": 30368
},
{
"epoch": 11.11737089201878,
"grad_norm": 0.9352797269821167,
"learning_rate": 1.7626064764005655e-05,
"loss": 3.4891,
"step": 30784
},
{
"epoch": 11.251354279523294,
"eval_g2l_cer": 48.1779,
"eval_g2l_gen_len": 3.0241,
"eval_g2l_rouge1": 41.3076,
"eval_g2l_rouge2": 33.5874,
"eval_g2l_rougeL": 41.3381,
"eval_g2l_rougeLsum": 41.2834,
"eval_l2ex_cer": 86.303,
"eval_l2ex_gen_len": 21.6927,
"eval_l2ex_rouge1": 28.5306,
"eval_l2ex_rouge2": 12.66,
"eval_l2ex_rougeL": 25.107,
"eval_l2ex_rougeLsum": 25.1229,
"eval_l2g_cer": 71.7607,
"eval_l2g_gen_len": 15.6002,
"eval_l2g_rouge1": 39.1998,
"eval_l2g_rouge2": 26.5146,
"eval_l2g_rougeL": 37.2299,
"eval_l2g_rougeLsum": 37.2583,
"eval_loss": 3.5692920684814453,
"eval_runtime": 191.2935,
"eval_samples_per_second": 51.863,
"eval_steps_per_second": 1.626,
"step": 31155
},
{
"epoch": 11.267605633802816,
"grad_norm": 0.8403520584106445,
"learning_rate": 1.6399999353588347e-05,
"loss": 3.4762,
"step": 31200
},
{
"epoch": 11.417840375586854,
"grad_norm": 0.8685266375541687,
"learning_rate": 1.5211074234832911e-05,
"loss": 3.491,
"step": 31616
},
{
"epoch": 11.568075117370892,
"grad_norm": 0.8662200570106506,
"learning_rate": 1.4060593898871712e-05,
"loss": 3.4818,
"step": 32032
},
{
"epoch": 11.71830985915493,
"grad_norm": 0.915972888469696,
"learning_rate": 1.2949820655140888e-05,
"loss": 3.4729,
"step": 32448
},
{
"epoch": 11.868544600938968,
"grad_norm": 0.9427916407585144,
"learning_rate": 1.187997324637174e-05,
"loss": 3.4837,
"step": 32864
},
{
"epoch": 12.001444564824846,
"eval_g2l_cer": 48.4635,
"eval_g2l_gen_len": 3.0374,
"eval_g2l_rouge1": 41.42,
"eval_g2l_rouge2": 33.7871,
"eval_g2l_rougeL": 41.41,
"eval_g2l_rougeLsum": 41.3653,
"eval_l2ex_cer": 84.6873,
"eval_l2ex_gen_len": 21.5406,
"eval_l2ex_rouge1": 28.7533,
"eval_l2ex_rouge2": 12.7721,
"eval_l2ex_rougeL": 25.3715,
"eval_l2ex_rougeLsum": 25.3817,
"eval_l2g_cer": 71.4847,
"eval_l2g_gen_len": 15.5437,
"eval_l2g_rouge1": 39.2147,
"eval_l2g_rouge2": 26.5099,
"eval_l2g_rougeL": 37.2362,
"eval_l2g_rougeLsum": 37.2641,
"eval_loss": 3.5653076171875,
"eval_runtime": 189.8727,
"eval_samples_per_second": 52.251,
"eval_steps_per_second": 1.638,
"step": 33232
},
{
"epoch": 12.018779342723004,
"grad_norm": 0.8259687423706055,
"learning_rate": 1.0852225511383663e-05,
"loss": 3.4764,
"step": 33280
},
{
"epoch": 12.169014084507042,
"grad_norm": 0.904097855091095,
"learning_rate": 9.86770509714574e-06,
"loss": 3.4791,
"step": 33696
},
{
"epoch": 12.31924882629108,
"grad_norm": 0.9662612080574036,
"learning_rate": 8.927492221520133e-06,
"loss": 3.4593,
"step": 34112
},
{
"epoch": 12.469483568075118,
"grad_norm": 0.9324942231178284,
"learning_rate": 8.032618488044715e-06,
"loss": 3.4564,
"step": 34528
},
{
"epoch": 12.619718309859154,
"grad_norm": 0.9966897964477539,
"learning_rate": 7.184065754055608e-06,
"loss": 3.4576,
"step": 34944
},
{
"epoch": 12.751534850126399,
"eval_g2l_cer": 47.8718,
"eval_g2l_gen_len": 3.0243,
"eval_g2l_rouge1": 41.399,
"eval_g2l_rouge2": 33.8189,
"eval_g2l_rougeL": 41.4105,
"eval_g2l_rougeLsum": 41.3515,
"eval_l2ex_cer": 84.0524,
"eval_l2ex_gen_len": 21.0206,
"eval_l2ex_rouge1": 28.7814,
"eval_l2ex_rouge2": 12.7663,
"eval_l2ex_rougeL": 25.3724,
"eval_l2ex_rougeLsum": 25.3895,
"eval_l2g_cer": 71.6622,
"eval_l2g_gen_len": 15.563,
"eval_l2g_rouge1": 39.1666,
"eval_l2g_rouge2": 26.5275,
"eval_l2g_rougeL": 37.1881,
"eval_l2g_rougeLsum": 37.2249,
"eval_loss": 3.564103841781616,
"eval_runtime": 190.2806,
"eval_samples_per_second": 52.139,
"eval_steps_per_second": 1.634,
"step": 35309
},
{
"epoch": 12.769953051643192,
"grad_norm": 1.0099953413009644,
"learning_rate": 6.382765053391182e-06,
"loss": 3.4757,
"step": 35360
},
{
"epoch": 12.92018779342723,
"grad_norm": 0.8347458243370056,
"learning_rate": 5.629595574859816e-06,
"loss": 3.4814,
"step": 35776
},
{
"epoch": 13.070422535211268,
"grad_norm": 0.8532468676567078,
"learning_rate": 4.925383697592043e-06,
"loss": 3.4667,
"step": 36192
},
{
"epoch": 13.220657276995306,
"grad_norm": 0.8852038383483887,
"learning_rate": 4.2709020843357075e-06,
"loss": 3.4512,
"step": 36608
},
{
"epoch": 13.370892018779342,
"grad_norm": 1.058424472808838,
"learning_rate": 3.666868833688726e-06,
"loss": 3.4616,
"step": 37024
},
{
"epoch": 13.501625135427952,
"eval_g2l_cer": 47.8581,
"eval_g2l_gen_len": 3.0221,
"eval_g2l_rouge1": 41.4693,
"eval_g2l_rouge2": 33.7773,
"eval_g2l_rougeL": 41.4822,
"eval_g2l_rougeLsum": 41.4356,
"eval_l2ex_cer": 84.3083,
"eval_l2ex_gen_len": 21.0319,
"eval_l2ex_rouge1": 28.654,
"eval_l2ex_rouge2": 12.8413,
"eval_l2ex_rougeL": 25.3941,
"eval_l2ex_rougeLsum": 25.4326,
"eval_l2g_cer": 71.0018,
"eval_l2g_gen_len": 15.3407,
"eval_l2g_rouge1": 39.2009,
"eval_l2g_rouge2": 26.5422,
"eval_l2g_rougeL": 37.2433,
"eval_l2g_rougeLsum": 37.2693,
"eval_loss": 3.5637874603271484,
"eval_runtime": 187.7571,
"eval_samples_per_second": 52.84,
"eval_steps_per_second": 1.656,
"step": 37386
}
],
"logging_steps": 416,
"max_steps": 41535,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 2077,
"total_flos": 7.17240637379838e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}