|
{ |
|
"best_metric": 3.5637874603271484, |
|
"best_model_checkpoint": "checkpoints/mt5-base/checkpoint-37386", |
|
"epoch": 13.501625135427952, |
|
"eval_steps": 2077, |
|
"global_step": 37386, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14987360057782592, |
|
"eval_g2l_cer": 286.0084, |
|
"eval_g2l_gen_len": 8.1826, |
|
"eval_g2l_rouge1": 5.5622, |
|
"eval_g2l_rouge2": 1.1913, |
|
"eval_g2l_rougeL": 5.4996, |
|
"eval_g2l_rougeLsum": 5.5114, |
|
"eval_l2ex_cer": 86.6247, |
|
"eval_l2ex_gen_len": 7.9383, |
|
"eval_l2ex_rouge1": 16.8571, |
|
"eval_l2ex_rouge2": 5.0595, |
|
"eval_l2ex_rougeL": 15.4605, |
|
"eval_l2ex_rougeLsum": 15.4778, |
|
"eval_l2g_cer": 87.9265, |
|
"eval_l2g_gen_len": 5.5834, |
|
"eval_l2g_rouge1": 12.3664, |
|
"eval_l2g_rouge2": 1.6791, |
|
"eval_l2g_rougeL": 11.57, |
|
"eval_l2g_rougeLsum": 11.5871, |
|
"eval_loss": 8.359071731567383, |
|
"eval_runtime": 145.7254, |
|
"eval_samples_per_second": 68.08, |
|
"eval_steps_per_second": 2.134, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.15023474178403756, |
|
"grad_norm": 1659.9119873046875, |
|
"learning_rate": 2.0038535645472063e-05, |
|
"loss": 15.088, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3004694835680751, |
|
"grad_norm": 8.369742393493652, |
|
"learning_rate": 4.0077071290944125e-05, |
|
"loss": 7.1403, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.4507042253521127, |
|
"grad_norm": 1.8832136392593384, |
|
"learning_rate": 6.0115606936416195e-05, |
|
"loss": 5.3841, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.6009389671361502, |
|
"grad_norm": 23.656179428100586, |
|
"learning_rate": 8.015414258188825e-05, |
|
"loss": 4.9928, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.7500902853015529, |
|
"eval_g2l_cer": 59.4485, |
|
"eval_g2l_gen_len": 2.9556, |
|
"eval_g2l_rouge1": 27.1975, |
|
"eval_g2l_rouge2": 17.2281, |
|
"eval_g2l_rougeL": 27.1916, |
|
"eval_g2l_rougeLsum": 27.2117, |
|
"eval_l2ex_cer": 95.5123, |
|
"eval_l2ex_gen_len": 23.6742, |
|
"eval_l2ex_rouge1": 25.7497, |
|
"eval_l2ex_rouge2": 11.1192, |
|
"eval_l2ex_rougeL": 22.9461, |
|
"eval_l2ex_rougeLsum": 22.9441, |
|
"eval_l2g_cer": 83.2502, |
|
"eval_l2g_gen_len": 15.9711, |
|
"eval_l2g_rouge1": 27.2934, |
|
"eval_l2g_rouge2": 14.8195, |
|
"eval_l2g_rougeL": 25.9582, |
|
"eval_l2g_rougeLsum": 25.9617, |
|
"eval_loss": 4.2178425788879395, |
|
"eval_runtime": 203.8137, |
|
"eval_samples_per_second": 48.677, |
|
"eval_steps_per_second": 1.526, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.7511737089201878, |
|
"grad_norm": 1.5714240074157715, |
|
"learning_rate": 9.999999747638704e-05, |
|
"loss": 4.7614, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9014084507042254, |
|
"grad_norm": 2.348027467727661, |
|
"learning_rate": 9.997217976013284e-05, |
|
"loss": 4.6037, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.051643192488263, |
|
"grad_norm": 2.6275577545166016, |
|
"learning_rate": 9.98898067640237e-05, |
|
"loss": 4.5136, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 1.2018779342723005, |
|
"grad_norm": 1.1122652292251587, |
|
"learning_rate": 9.975296886788363e-05, |
|
"loss": 4.4057, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 1.352112676056338, |
|
"grad_norm": 1.2248876094818115, |
|
"learning_rate": 9.956181621053908e-05, |
|
"loss": 4.3513, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 1.500180570603106, |
|
"eval_g2l_cer": 56.9353, |
|
"eval_g2l_gen_len": 3.093, |
|
"eval_g2l_rouge1": 32.8366, |
|
"eval_g2l_rouge2": 24.3793, |
|
"eval_g2l_rougeL": 32.7964, |
|
"eval_g2l_rougeLsum": 32.7681, |
|
"eval_l2ex_cer": 84.2827, |
|
"eval_l2ex_gen_len": 20.4604, |
|
"eval_l2ex_rouge1": 28.5353, |
|
"eval_l2ex_rouge2": 12.5551, |
|
"eval_l2ex_rougeL": 25.5058, |
|
"eval_l2ex_rougeLsum": 25.5427, |
|
"eval_l2g_cer": 81.1104, |
|
"eval_l2g_gen_len": 18.0072, |
|
"eval_l2g_rouge1": 32.688, |
|
"eval_l2g_rouge2": 18.9467, |
|
"eval_l2g_rougeL": 30.7295, |
|
"eval_l2g_rougeLsum": 30.7676, |
|
"eval_loss": 3.949233293533325, |
|
"eval_runtime": 197.691, |
|
"eval_samples_per_second": 50.184, |
|
"eval_steps_per_second": 1.573, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 1.5023474178403755, |
|
"grad_norm": 1.4981008768081665, |
|
"learning_rate": 9.931655852508637e-05, |
|
"loss": 4.3061, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.652582159624413, |
|
"grad_norm": 1.050997018814087, |
|
"learning_rate": 9.901746490877203e-05, |
|
"loss": 4.2525, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 1.8028169014084507, |
|
"grad_norm": 0.8377422094345093, |
|
"learning_rate": 9.866486352773886e-05, |
|
"loss": 4.2289, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 1.9530516431924883, |
|
"grad_norm": 0.968815267086029, |
|
"learning_rate": 9.82591412569612e-05, |
|
"loss": 4.1958, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 2.103286384976526, |
|
"grad_norm": 0.9952152967453003, |
|
"learning_rate": 9.780074325576496e-05, |
|
"loss": 4.1187, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 2.2502708559046587, |
|
"eval_g2l_cer": 53.8806, |
|
"eval_g2l_gen_len": 3.0548, |
|
"eval_g2l_rouge1": 35.439, |
|
"eval_g2l_rouge2": 27.2433, |
|
"eval_g2l_rougeL": 35.4384, |
|
"eval_g2l_rougeLsum": 35.3985, |
|
"eval_l2ex_cer": 89.3431, |
|
"eval_l2ex_gen_len": 23.4573, |
|
"eval_l2ex_rouge1": 27.8815, |
|
"eval_l2ex_rouge2": 12.1568, |
|
"eval_l2ex_rougeL": 24.5796, |
|
"eval_l2ex_rougeLsum": 24.6286, |
|
"eval_l2g_cer": 78.589, |
|
"eval_l2g_gen_len": 17.4946, |
|
"eval_l2g_rouge1": 35.6236, |
|
"eval_l2g_rouge2": 22.8027, |
|
"eval_l2g_rougeL": 33.8966, |
|
"eval_l2g_rougeLsum": 33.9001, |
|
"eval_loss": 3.8344309329986572, |
|
"eval_runtime": 202.9852, |
|
"eval_samples_per_second": 48.875, |
|
"eval_steps_per_second": 1.532, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 24.567110061645508, |
|
"learning_rate": 9.72901724793979e-05, |
|
"loss": 4.0993, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.403755868544601, |
|
"grad_norm": 0.9726364612579346, |
|
"learning_rate": 9.672798912718604e-05, |
|
"loss": 4.0734, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 2.5539906103286385, |
|
"grad_norm": 0.9216151833534241, |
|
"learning_rate": 9.611481002788184e-05, |
|
"loss": 4.0584, |
|
"step": 7072 |
|
}, |
|
{ |
|
"epoch": 2.704225352112676, |
|
"grad_norm": 0.7880883812904358, |
|
"learning_rate": 9.545130796287832e-05, |
|
"loss": 4.0312, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 2.8544600938967135, |
|
"grad_norm": 0.9635422229766846, |
|
"learning_rate": 9.473821092803199e-05, |
|
"loss": 4.0046, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 3.0003611412062114, |
|
"eval_g2l_cer": 52.062, |
|
"eval_g2l_gen_len": 3.0702, |
|
"eval_g2l_rouge1": 36.8811, |
|
"eval_g2l_rouge2": 28.8156, |
|
"eval_g2l_rougeL": 36.8925, |
|
"eval_g2l_rougeLsum": 36.8317, |
|
"eval_l2ex_cer": 90.1083, |
|
"eval_l2ex_gen_len": 22.4645, |
|
"eval_l2ex_rouge1": 27.5056, |
|
"eval_l2ex_rouge2": 12.5248, |
|
"eval_l2ex_rougeL": 24.4085, |
|
"eval_l2ex_rougeLsum": 24.4463, |
|
"eval_l2g_cer": 78.1779, |
|
"eval_l2g_gen_len": 17.8095, |
|
"eval_l2g_rouge1": 36.8332, |
|
"eval_l2g_rouge2": 23.9422, |
|
"eval_l2g_rougeL": 34.9672, |
|
"eval_l2g_rougeLsum": 34.995, |
|
"eval_loss": 3.7595808506011963, |
|
"eval_runtime": 202.3409, |
|
"eval_samples_per_second": 49.031, |
|
"eval_steps_per_second": 1.537, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 3.004694835680751, |
|
"grad_norm": 0.900855541229248, |
|
"learning_rate": 9.397630133490413e-05, |
|
"loss": 3.992, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 3.1549295774647885, |
|
"grad_norm": 0.8881470561027527, |
|
"learning_rate": 9.316641515229741e-05, |
|
"loss": 3.9362, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 3.3051643192488265, |
|
"grad_norm": 0.7969784140586853, |
|
"learning_rate": 9.230944098902894e-05, |
|
"loss": 3.9143, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 3.455399061032864, |
|
"grad_norm": 0.8603357672691345, |
|
"learning_rate": 9.1406319118947e-05, |
|
"loss": 3.9162, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 3.6056338028169015, |
|
"grad_norm": 0.9974511861801147, |
|
"learning_rate": 9.045804044926044e-05, |
|
"loss": 3.8987, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 3.7504514265077646, |
|
"eval_g2l_cer": 50.7917, |
|
"eval_g2l_gen_len": 3.0031, |
|
"eval_g2l_rouge1": 37.7135, |
|
"eval_g2l_rouge2": 29.9526, |
|
"eval_g2l_rougeL": 37.7649, |
|
"eval_g2l_rougeLsum": 37.7041, |
|
"eval_l2ex_cer": 86.8671, |
|
"eval_l2ex_gen_len": 22.2271, |
|
"eval_l2ex_rouge1": 28.7692, |
|
"eval_l2ex_rouge2": 12.8536, |
|
"eval_l2ex_rougeL": 25.3768, |
|
"eval_l2ex_rougeLsum": 25.4158, |
|
"eval_l2g_cer": 73.3411, |
|
"eval_l2g_gen_len": 15.6692, |
|
"eval_l2g_rouge1": 37.5152, |
|
"eval_l2g_rouge2": 24.5536, |
|
"eval_l2g_rougeL": 35.5225, |
|
"eval_l2g_rougeLsum": 35.5437, |
|
"eval_loss": 3.7121169567108154, |
|
"eval_runtime": 192.3527, |
|
"eval_samples_per_second": 51.577, |
|
"eval_steps_per_second": 1.617, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 3.755868544600939, |
|
"grad_norm": 0.8458616733551025, |
|
"learning_rate": 8.94656454333133e-05, |
|
"loss": 3.8883, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.9061032863849765, |
|
"grad_norm": 3.1263327598571777, |
|
"learning_rate": 8.843022292899726e-05, |
|
"loss": 3.8775, |
|
"step": 10816 |
|
}, |
|
{ |
|
"epoch": 4.056338028169014, |
|
"grad_norm": 1.013489842414856, |
|
"learning_rate": 8.735290900405437e-05, |
|
"loss": 3.8514, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 4.206572769953052, |
|
"grad_norm": 0.9674685001373291, |
|
"learning_rate": 8.623488568958123e-05, |
|
"loss": 3.7962, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 4.356807511737089, |
|
"grad_norm": 1.0607421398162842, |
|
"learning_rate": 8.507737968310197e-05, |
|
"loss": 3.8043, |
|
"step": 12064 |
|
}, |
|
{ |
|
"epoch": 4.500541711809317, |
|
"eval_g2l_cer": 50.088, |
|
"eval_g2l_gen_len": 3.0488, |
|
"eval_g2l_rouge1": 38.7702, |
|
"eval_g2l_rouge2": 30.6004, |
|
"eval_g2l_rougeL": 38.7959, |
|
"eval_g2l_rougeLsum": 38.7454, |
|
"eval_l2ex_cer": 84.5143, |
|
"eval_l2ex_gen_len": 20.52, |
|
"eval_l2ex_rouge1": 28.9181, |
|
"eval_l2ex_rouge2": 13.2853, |
|
"eval_l2ex_rougeL": 25.6409, |
|
"eval_l2ex_rougeLsum": 25.6588, |
|
"eval_l2g_cer": 72.4949, |
|
"eval_l2g_gen_len": 15.2432, |
|
"eval_l2g_rouge1": 37.6479, |
|
"eval_l2g_rouge2": 24.833, |
|
"eval_l2g_rougeL": 35.7678, |
|
"eval_l2g_rougeLsum": 35.776, |
|
"eval_loss": 3.674677848815918, |
|
"eval_runtime": 190.2532, |
|
"eval_samples_per_second": 52.146, |
|
"eval_steps_per_second": 1.635, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"grad_norm": 0.9242987632751465, |
|
"learning_rate": 8.388166100263313e-05, |
|
"loss": 3.804, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 4.657276995305164, |
|
"grad_norm": 0.8233311772346497, |
|
"learning_rate": 8.264904159321721e-05, |
|
"loss": 3.7844, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 4.807511737089202, |
|
"grad_norm": 1.918661117553711, |
|
"learning_rate": 8.138087388745395e-05, |
|
"loss": 3.7948, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 4.957746478873239, |
|
"grad_norm": 0.8277648091316223, |
|
"learning_rate": 8.00785493216083e-05, |
|
"loss": 3.7951, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 5.107981220657277, |
|
"grad_norm": 1.0518523454666138, |
|
"learning_rate": 7.874349680892367e-05, |
|
"loss": 3.7423, |
|
"step": 14144 |
|
}, |
|
{ |
|
"epoch": 5.250631997110871, |
|
"eval_g2l_cer": 49.743, |
|
"eval_g2l_gen_len": 3.0201, |
|
"eval_g2l_rouge1": 38.8263, |
|
"eval_g2l_rouge2": 31.1673, |
|
"eval_g2l_rougeL": 38.8286, |
|
"eval_g2l_rougeLsum": 38.7898, |
|
"eval_l2ex_cer": 86.565, |
|
"eval_l2ex_gen_len": 21.7523, |
|
"eval_l2ex_rouge1": 28.4984, |
|
"eval_l2ex_rouge2": 13.072, |
|
"eval_l2ex_rougeL": 25.2667, |
|
"eval_l2ex_rougeLsum": 25.2757, |
|
"eval_l2g_cer": 73.2917, |
|
"eval_l2g_gen_len": 16.0011, |
|
"eval_l2g_rouge1": 38.0438, |
|
"eval_l2g_rouge2": 25.3209, |
|
"eval_l2g_rougeL": 36.1091, |
|
"eval_l2g_rougeLsum": 36.1243, |
|
"eval_loss": 3.649608850479126, |
|
"eval_runtime": 197.4229, |
|
"eval_samples_per_second": 50.253, |
|
"eval_steps_per_second": 1.575, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 5.258215962441315, |
|
"grad_norm": 0.8540360331535339, |
|
"learning_rate": 7.737718117181538e-05, |
|
"loss": 3.7126, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 5.408450704225352, |
|
"grad_norm": 0.9189392328262329, |
|
"learning_rate": 7.598110153466441e-05, |
|
"loss": 3.7223, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 5.55868544600939, |
|
"grad_norm": 0.92618727684021, |
|
"learning_rate": 7.45567896789749e-05, |
|
"loss": 3.7139, |
|
"step": 15392 |
|
}, |
|
{ |
|
"epoch": 5.708920187793427, |
|
"grad_norm": 0.7882264852523804, |
|
"learning_rate": 7.310580836270044e-05, |
|
"loss": 3.7179, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 5.859154929577465, |
|
"grad_norm": 0.8529959321022034, |
|
"learning_rate": 7.162974960558259e-05, |
|
"loss": 3.7121, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 6.000722282412423, |
|
"eval_g2l_cer": 49.3934, |
|
"eval_g2l_gen_len": 3.0096, |
|
"eval_g2l_rouge1": 39.4408, |
|
"eval_g2l_rouge2": 31.7057, |
|
"eval_g2l_rougeL": 39.4639, |
|
"eval_g2l_rougeLsum": 39.4161, |
|
"eval_l2ex_cer": 86.119, |
|
"eval_l2ex_gen_len": 20.7112, |
|
"eval_l2ex_rouge1": 28.8739, |
|
"eval_l2ex_rouge2": 13.2661, |
|
"eval_l2ex_rougeL": 25.7042, |
|
"eval_l2ex_rougeLsum": 25.7118, |
|
"eval_l2g_cer": 73.625, |
|
"eval_l2g_gen_len": 15.9897, |
|
"eval_l2g_rouge1": 38.1171, |
|
"eval_l2g_rouge2": 25.6405, |
|
"eval_l2g_rougeL": 36.2592, |
|
"eval_l2g_rougeLsum": 36.2666, |
|
"eval_loss": 3.6273715496063232, |
|
"eval_runtime": 193.9276, |
|
"eval_samples_per_second": 51.158, |
|
"eval_steps_per_second": 1.604, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 6.009389671361502, |
|
"grad_norm": 0.7976297736167908, |
|
"learning_rate": 7.013023294238368e-05, |
|
"loss": 3.7191, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 6.15962441314554, |
|
"grad_norm": 0.8516309261322021, |
|
"learning_rate": 6.860890364592963e-05, |
|
"loss": 3.6428, |
|
"step": 17056 |
|
}, |
|
{ |
|
"epoch": 6.309859154929577, |
|
"grad_norm": 0.9273515343666077, |
|
"learning_rate": 6.706743092191335e-05, |
|
"loss": 3.6566, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 6.460093896713615, |
|
"grad_norm": 0.932829737663269, |
|
"learning_rate": 6.550750607743873e-05, |
|
"loss": 3.6627, |
|
"step": 17888 |
|
}, |
|
{ |
|
"epoch": 6.610328638497653, |
|
"grad_norm": 0.9968202114105225, |
|
"learning_rate": 6.393084066531485e-05, |
|
"loss": 3.6652, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 6.750812567713976, |
|
"eval_g2l_cer": 49.5579, |
|
"eval_g2l_gen_len": 2.9938, |
|
"eval_g2l_rouge1": 39.6581, |
|
"eval_g2l_rouge2": 32.026, |
|
"eval_g2l_rougeL": 39.6932, |
|
"eval_g2l_rougeLsum": 39.6518, |
|
"eval_l2ex_cer": 88.4427, |
|
"eval_l2ex_gen_len": 23.11, |
|
"eval_l2ex_rouge1": 28.1485, |
|
"eval_l2ex_rouge2": 12.4558, |
|
"eval_l2ex_rougeL": 24.9414, |
|
"eval_l2ex_rougeLsum": 24.9605, |
|
"eval_l2g_cer": 73.3296, |
|
"eval_l2g_gen_len": 16.3263, |
|
"eval_l2g_rouge1": 38.4506, |
|
"eval_l2g_rouge2": 25.7696, |
|
"eval_l2g_rougeL": 36.5748, |
|
"eval_l2g_rougeLsum": 36.6091, |
|
"eval_loss": 3.6120047569274902, |
|
"eval_runtime": 197.9501, |
|
"eval_samples_per_second": 50.119, |
|
"eval_steps_per_second": 1.571, |
|
"step": 18693 |
|
}, |
|
{ |
|
"epoch": 6.76056338028169, |
|
"grad_norm": 0.7791869640350342, |
|
"learning_rate": 6.233916460613673e-05, |
|
"loss": 3.6614, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 6.910798122065728, |
|
"grad_norm": 0.9385781288146973, |
|
"learning_rate": 6.0734224290212784e-05, |
|
"loss": 3.6471, |
|
"step": 19136 |
|
}, |
|
{ |
|
"epoch": 7.061032863849765, |
|
"grad_norm": 0.8267916440963745, |
|
"learning_rate": 5.9117780661421754e-05, |
|
"loss": 3.6264, |
|
"step": 19552 |
|
}, |
|
{ |
|
"epoch": 7.211267605633803, |
|
"grad_norm": 0.794131875038147, |
|
"learning_rate": 5.7491607285101345e-05, |
|
"loss": 3.6015, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 7.36150234741784, |
|
"grad_norm": 0.8748852610588074, |
|
"learning_rate": 5.585748840208869e-05, |
|
"loss": 3.5993, |
|
"step": 20384 |
|
}, |
|
{ |
|
"epoch": 7.500902853015529, |
|
"eval_g2l_cer": 50.088, |
|
"eval_g2l_gen_len": 3.0582, |
|
"eval_g2l_rouge1": 39.9874, |
|
"eval_g2l_rouge2": 32.4432, |
|
"eval_g2l_rougeL": 40.0195, |
|
"eval_g2l_rougeLsum": 39.9365, |
|
"eval_l2ex_cer": 87.6165, |
|
"eval_l2ex_gen_len": 22.7133, |
|
"eval_l2ex_rouge1": 28.1937, |
|
"eval_l2ex_rouge2": 12.5673, |
|
"eval_l2ex_rougeL": 24.9397, |
|
"eval_l2ex_rougeLsum": 24.921, |
|
"eval_l2g_cer": 72.7284, |
|
"eval_l2g_gen_len": 15.6759, |
|
"eval_l2g_rouge1": 38.4813, |
|
"eval_l2g_rouge2": 25.936, |
|
"eval_l2g_rougeL": 36.5693, |
|
"eval_l2g_rougeLsum": 36.5729, |
|
"eval_loss": 3.6013987064361572, |
|
"eval_runtime": 195.438, |
|
"eval_samples_per_second": 50.763, |
|
"eval_steps_per_second": 1.591, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 7.511737089201878, |
|
"grad_norm": 0.9019631743431091, |
|
"learning_rate": 5.4217216971047445e-05, |
|
"loss": 3.5978, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 7.661971830985916, |
|
"grad_norm": 0.8872570395469666, |
|
"learning_rate": 5.257259270122993e-05, |
|
"loss": 3.6113, |
|
"step": 21216 |
|
}, |
|
{ |
|
"epoch": 7.812206572769953, |
|
"grad_norm": 0.7394893169403076, |
|
"learning_rate": 5.0925420077832285e-05, |
|
"loss": 3.593, |
|
"step": 21632 |
|
}, |
|
{ |
|
"epoch": 7.962441314553991, |
|
"grad_norm": 0.8534842133522034, |
|
"learning_rate": 4.927750638210947e-05, |
|
"loss": 3.5963, |
|
"step": 22048 |
|
}, |
|
{ |
|
"epoch": 8.112676056338028, |
|
"grad_norm": 0.9047814607620239, |
|
"learning_rate": 4.7630659708422666e-05, |
|
"loss": 3.5722, |
|
"step": 22464 |
|
}, |
|
{ |
|
"epoch": 8.250993138317082, |
|
"eval_g2l_cer": 49.5716, |
|
"eval_g2l_gen_len": 3.0388, |
|
"eval_g2l_rouge1": 40.4088, |
|
"eval_g2l_rouge2": 32.7272, |
|
"eval_g2l_rougeL": 40.4374, |
|
"eval_g2l_rougeLsum": 40.3677, |
|
"eval_l2ex_cer": 83.5858, |
|
"eval_l2ex_gen_len": 20.4851, |
|
"eval_l2ex_rouge1": 29.084, |
|
"eval_l2ex_rouge2": 12.9208, |
|
"eval_l2ex_rougeL": 25.6832, |
|
"eval_l2ex_rougeLsum": 25.7033, |
|
"eval_l2g_cer": 72.1741, |
|
"eval_l2g_gen_len": 15.6461, |
|
"eval_l2g_rouge1": 38.8628, |
|
"eval_l2g_rouge2": 26.1912, |
|
"eval_l2g_rougeL": 36.9072, |
|
"eval_l2g_rougeLsum": 36.9086, |
|
"eval_loss": 3.5901942253112793, |
|
"eval_runtime": 190.412, |
|
"eval_samples_per_second": 52.103, |
|
"eval_steps_per_second": 1.633, |
|
"step": 22847 |
|
}, |
|
{ |
|
"epoch": 8.262910798122066, |
|
"grad_norm": 0.8366677761077881, |
|
"learning_rate": 4.598668698039414e-05, |
|
"loss": 3.5641, |
|
"step": 22880 |
|
}, |
|
{ |
|
"epoch": 8.413145539906104, |
|
"grad_norm": 0.8628195524215698, |
|
"learning_rate": 4.4347391968347015e-05, |
|
"loss": 3.5702, |
|
"step": 23296 |
|
}, |
|
{ |
|
"epoch": 8.56338028169014, |
|
"grad_norm": 0.9060849547386169, |
|
"learning_rate": 4.27145733102046e-05, |
|
"loss": 3.5508, |
|
"step": 23712 |
|
}, |
|
{ |
|
"epoch": 8.713615023474178, |
|
"grad_norm": 0.8726539015769958, |
|
"learning_rate": 4.109002253802116e-05, |
|
"loss": 3.5637, |
|
"step": 24128 |
|
}, |
|
{ |
|
"epoch": 8.863849765258216, |
|
"grad_norm": 0.9154978394508362, |
|
"learning_rate": 3.947552211230913e-05, |
|
"loss": 3.5435, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.001083423618635, |
|
"eval_g2l_cer": 48.6326, |
|
"eval_g2l_gen_len": 3.008, |
|
"eval_g2l_rouge1": 40.6427, |
|
"eval_g2l_rouge2": 33.0447, |
|
"eval_g2l_rougeL": 40.6651, |
|
"eval_g2l_rougeLsum": 40.6197, |
|
"eval_l2ex_cer": 85.6816, |
|
"eval_l2ex_gen_len": 20.9753, |
|
"eval_l2ex_rouge1": 28.5827, |
|
"eval_l2ex_rouge2": 12.8213, |
|
"eval_l2ex_rougeL": 25.352, |
|
"eval_l2ex_rougeLsum": 25.3642, |
|
"eval_l2g_cer": 72.7802, |
|
"eval_l2g_gen_len": 15.8102, |
|
"eval_l2g_rouge1": 38.814, |
|
"eval_l2g_rouge2": 26.1373, |
|
"eval_l2g_rougeL": 36.8943, |
|
"eval_l2g_rougeLsum": 36.9272, |
|
"eval_loss": 3.5814104080200195, |
|
"eval_runtime": 193.5202, |
|
"eval_samples_per_second": 51.266, |
|
"eval_steps_per_second": 1.607, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 9.014084507042254, |
|
"grad_norm": 0.9910312294960022, |
|
"learning_rate": 3.7872843466319744e-05, |
|
"loss": 3.5601, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 9.164319248826292, |
|
"grad_norm": 0.913223922252655, |
|
"learning_rate": 3.6283745062422726e-05, |
|
"loss": 3.5156, |
|
"step": 25376 |
|
}, |
|
{ |
|
"epoch": 9.314553990610328, |
|
"grad_norm": 0.9026065468788147, |
|
"learning_rate": 3.470997046271774e-05, |
|
"loss": 3.5337, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 9.464788732394366, |
|
"grad_norm": 0.9726517796516418, |
|
"learning_rate": 3.315324641599434e-05, |
|
"loss": 3.5294, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 9.615023474178404, |
|
"grad_norm": 0.954593300819397, |
|
"learning_rate": 3.161528096313964e-05, |
|
"loss": 3.5242, |
|
"step": 26624 |
|
}, |
|
{ |
|
"epoch": 9.751173708920188, |
|
"eval_g2l_cer": 48.3196, |
|
"eval_g2l_gen_len": 3.0196, |
|
"eval_g2l_rouge1": 41.1733, |
|
"eval_g2l_rouge2": 33.4761, |
|
"eval_g2l_rougeL": 41.172, |
|
"eval_g2l_rougeLsum": 41.1111, |
|
"eval_l2ex_cer": 86.3469, |
|
"eval_l2ex_gen_len": 21.333, |
|
"eval_l2ex_rouge1": 28.6196, |
|
"eval_l2ex_rouge2": 12.797, |
|
"eval_l2ex_rougeL": 25.331, |
|
"eval_l2ex_rougeLsum": 25.3251, |
|
"eval_l2g_cer": 71.8519, |
|
"eval_l2g_gen_len": 15.5771, |
|
"eval_l2g_rouge1": 38.9877, |
|
"eval_l2g_rouge2": 26.3016, |
|
"eval_l2g_rougeL": 36.97, |
|
"eval_l2g_rougeLsum": 37.0109, |
|
"eval_loss": 3.5751187801361084, |
|
"eval_runtime": 190.5769, |
|
"eval_samples_per_second": 52.058, |
|
"eval_steps_per_second": 1.632, |
|
"step": 27001 |
|
}, |
|
{ |
|
"epoch": 9.765258215962442, |
|
"grad_norm": 0.7817335724830627, |
|
"learning_rate": 3.00977615630722e-05, |
|
"loss": 3.5332, |
|
"step": 27040 |
|
}, |
|
{ |
|
"epoch": 9.915492957746478, |
|
"grad_norm": 0.8576836585998535, |
|
"learning_rate": 2.8602353241258667e-05, |
|
"loss": 3.5247, |
|
"step": 27456 |
|
}, |
|
{ |
|
"epoch": 10.065727699530516, |
|
"grad_norm": 0.924045741558075, |
|
"learning_rate": 2.7130696762844198e-05, |
|
"loss": 3.5171, |
|
"step": 27872 |
|
}, |
|
{ |
|
"epoch": 10.215962441314554, |
|
"grad_norm": 0.9701129198074341, |
|
"learning_rate": 2.568440683240166e-05, |
|
"loss": 3.4886, |
|
"step": 28288 |
|
}, |
|
{ |
|
"epoch": 10.366197183098592, |
|
"grad_norm": 0.8473976850509644, |
|
"learning_rate": 2.426507032227427e-05, |
|
"loss": 3.5134, |
|
"step": 28704 |
|
}, |
|
{ |
|
"epoch": 10.501263994221741, |
|
"eval_g2l_cer": 48.8336, |
|
"eval_g2l_gen_len": 3.0502, |
|
"eval_g2l_rouge1": 41.0241, |
|
"eval_g2l_rouge2": 33.2994, |
|
"eval_g2l_rougeL": 41.0374, |
|
"eval_g2l_rougeLsum": 40.9554, |
|
"eval_l2ex_cer": 85.2795, |
|
"eval_l2ex_gen_len": 21.6999, |
|
"eval_l2ex_rouge1": 28.6576, |
|
"eval_l2ex_rouge2": 12.5848, |
|
"eval_l2ex_rougeL": 25.1057, |
|
"eval_l2ex_rougeLsum": 25.1478, |
|
"eval_l2g_cer": 71.5555, |
|
"eval_l2g_gen_len": 15.5923, |
|
"eval_l2g_rouge1": 39.111, |
|
"eval_l2g_rouge2": 26.3632, |
|
"eval_l2g_rougeL": 37.134, |
|
"eval_l2g_rougeLsum": 37.1562, |
|
"eval_loss": 3.5716097354888916, |
|
"eval_runtime": 190.1354, |
|
"eval_samples_per_second": 52.179, |
|
"eval_steps_per_second": 1.636, |
|
"step": 29078 |
|
}, |
|
{ |
|
"epoch": 10.51643192488263, |
|
"grad_norm": 0.9222161769866943, |
|
"learning_rate": 2.2874244531456016e-05, |
|
"loss": 3.4995, |
|
"step": 29120 |
|
}, |
|
{ |
|
"epoch": 10.666666666666666, |
|
"grad_norm": 0.8834406137466431, |
|
"learning_rate": 2.1513455476919875e-05, |
|
"loss": 3.5005, |
|
"step": 29536 |
|
}, |
|
{ |
|
"epoch": 10.816901408450704, |
|
"grad_norm": 1.2534151077270508, |
|
"learning_rate": 2.0184196219268805e-05, |
|
"loss": 3.4956, |
|
"step": 29952 |
|
}, |
|
{ |
|
"epoch": 10.967136150234742, |
|
"grad_norm": 1.0579476356506348, |
|
"learning_rate": 1.8887925224546575e-05, |
|
"loss": 3.4984, |
|
"step": 30368 |
|
}, |
|
{ |
|
"epoch": 11.11737089201878, |
|
"grad_norm": 0.9352797269821167, |
|
"learning_rate": 1.7626064764005655e-05, |
|
"loss": 3.4891, |
|
"step": 30784 |
|
}, |
|
{ |
|
"epoch": 11.251354279523294, |
|
"eval_g2l_cer": 48.1779, |
|
"eval_g2l_gen_len": 3.0241, |
|
"eval_g2l_rouge1": 41.3076, |
|
"eval_g2l_rouge2": 33.5874, |
|
"eval_g2l_rougeL": 41.3381, |
|
"eval_g2l_rougeLsum": 41.2834, |
|
"eval_l2ex_cer": 86.303, |
|
"eval_l2ex_gen_len": 21.6927, |
|
"eval_l2ex_rouge1": 28.5306, |
|
"eval_l2ex_rouge2": 12.66, |
|
"eval_l2ex_rougeL": 25.107, |
|
"eval_l2ex_rougeLsum": 25.1229, |
|
"eval_l2g_cer": 71.7607, |
|
"eval_l2g_gen_len": 15.6002, |
|
"eval_l2g_rouge1": 39.1998, |
|
"eval_l2g_rouge2": 26.5146, |
|
"eval_l2g_rougeL": 37.2299, |
|
"eval_l2g_rougeLsum": 37.2583, |
|
"eval_loss": 3.5692920684814453, |
|
"eval_runtime": 191.2935, |
|
"eval_samples_per_second": 51.863, |
|
"eval_steps_per_second": 1.626, |
|
"step": 31155 |
|
}, |
|
{ |
|
"epoch": 11.267605633802816, |
|
"grad_norm": 0.8403520584106445, |
|
"learning_rate": 1.6399999353588347e-05, |
|
"loss": 3.4762, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 11.417840375586854, |
|
"grad_norm": 0.8685266375541687, |
|
"learning_rate": 1.5211074234832911e-05, |
|
"loss": 3.491, |
|
"step": 31616 |
|
}, |
|
{ |
|
"epoch": 11.568075117370892, |
|
"grad_norm": 0.8662200570106506, |
|
"learning_rate": 1.4060593898871712e-05, |
|
"loss": 3.4818, |
|
"step": 32032 |
|
}, |
|
{ |
|
"epoch": 11.71830985915493, |
|
"grad_norm": 0.915972888469696, |
|
"learning_rate": 1.2949820655140888e-05, |
|
"loss": 3.4729, |
|
"step": 32448 |
|
}, |
|
{ |
|
"epoch": 11.868544600938968, |
|
"grad_norm": 0.9427916407585144, |
|
"learning_rate": 1.187997324637174e-05, |
|
"loss": 3.4837, |
|
"step": 32864 |
|
}, |
|
{ |
|
"epoch": 12.001444564824846, |
|
"eval_g2l_cer": 48.4635, |
|
"eval_g2l_gen_len": 3.0374, |
|
"eval_g2l_rouge1": 41.42, |
|
"eval_g2l_rouge2": 33.7871, |
|
"eval_g2l_rougeL": 41.41, |
|
"eval_g2l_rougeLsum": 41.3653, |
|
"eval_l2ex_cer": 84.6873, |
|
"eval_l2ex_gen_len": 21.5406, |
|
"eval_l2ex_rouge1": 28.7533, |
|
"eval_l2ex_rouge2": 12.7721, |
|
"eval_l2ex_rougeL": 25.3715, |
|
"eval_l2ex_rougeLsum": 25.3817, |
|
"eval_l2g_cer": 71.4847, |
|
"eval_l2g_gen_len": 15.5437, |
|
"eval_l2g_rouge1": 39.2147, |
|
"eval_l2g_rouge2": 26.5099, |
|
"eval_l2g_rougeL": 37.2362, |
|
"eval_l2g_rougeLsum": 37.2641, |
|
"eval_loss": 3.5653076171875, |
|
"eval_runtime": 189.8727, |
|
"eval_samples_per_second": 52.251, |
|
"eval_steps_per_second": 1.638, |
|
"step": 33232 |
|
}, |
|
{ |
|
"epoch": 12.018779342723004, |
|
"grad_norm": 0.8259687423706055, |
|
"learning_rate": 1.0852225511383663e-05, |
|
"loss": 3.4764, |
|
"step": 33280 |
|
}, |
|
{ |
|
"epoch": 12.169014084507042, |
|
"grad_norm": 0.904097855091095, |
|
"learning_rate": 9.86770509714574e-06, |
|
"loss": 3.4791, |
|
"step": 33696 |
|
}, |
|
{ |
|
"epoch": 12.31924882629108, |
|
"grad_norm": 0.9662612080574036, |
|
"learning_rate": 8.927492221520133e-06, |
|
"loss": 3.4593, |
|
"step": 34112 |
|
}, |
|
{ |
|
"epoch": 12.469483568075118, |
|
"grad_norm": 0.9324942231178284, |
|
"learning_rate": 8.032618488044715e-06, |
|
"loss": 3.4564, |
|
"step": 34528 |
|
}, |
|
{ |
|
"epoch": 12.619718309859154, |
|
"grad_norm": 0.9966897964477539, |
|
"learning_rate": 7.184065754055608e-06, |
|
"loss": 3.4576, |
|
"step": 34944 |
|
}, |
|
{ |
|
"epoch": 12.751534850126399, |
|
"eval_g2l_cer": 47.8718, |
|
"eval_g2l_gen_len": 3.0243, |
|
"eval_g2l_rouge1": 41.399, |
|
"eval_g2l_rouge2": 33.8189, |
|
"eval_g2l_rougeL": 41.4105, |
|
"eval_g2l_rougeLsum": 41.3515, |
|
"eval_l2ex_cer": 84.0524, |
|
"eval_l2ex_gen_len": 21.0206, |
|
"eval_l2ex_rouge1": 28.7814, |
|
"eval_l2ex_rouge2": 12.7663, |
|
"eval_l2ex_rougeL": 25.3724, |
|
"eval_l2ex_rougeLsum": 25.3895, |
|
"eval_l2g_cer": 71.6622, |
|
"eval_l2g_gen_len": 15.563, |
|
"eval_l2g_rouge1": 39.1666, |
|
"eval_l2g_rouge2": 26.5275, |
|
"eval_l2g_rougeL": 37.1881, |
|
"eval_l2g_rougeLsum": 37.2249, |
|
"eval_loss": 3.564103841781616, |
|
"eval_runtime": 190.2806, |
|
"eval_samples_per_second": 52.139, |
|
"eval_steps_per_second": 1.634, |
|
"step": 35309 |
|
}, |
|
{ |
|
"epoch": 12.769953051643192, |
|
"grad_norm": 1.0099953413009644, |
|
"learning_rate": 6.382765053391182e-06, |
|
"loss": 3.4757, |
|
"step": 35360 |
|
}, |
|
{ |
|
"epoch": 12.92018779342723, |
|
"grad_norm": 0.8347458243370056, |
|
"learning_rate": 5.629595574859816e-06, |
|
"loss": 3.4814, |
|
"step": 35776 |
|
}, |
|
{ |
|
"epoch": 13.070422535211268, |
|
"grad_norm": 0.8532468676567078, |
|
"learning_rate": 4.925383697592043e-06, |
|
"loss": 3.4667, |
|
"step": 36192 |
|
}, |
|
{ |
|
"epoch": 13.220657276995306, |
|
"grad_norm": 0.8852038383483887, |
|
"learning_rate": 4.2709020843357075e-06, |
|
"loss": 3.4512, |
|
"step": 36608 |
|
}, |
|
{ |
|
"epoch": 13.370892018779342, |
|
"grad_norm": 1.058424472808838, |
|
"learning_rate": 3.666868833688726e-06, |
|
"loss": 3.4616, |
|
"step": 37024 |
|
}, |
|
{ |
|
"epoch": 13.501625135427952, |
|
"eval_g2l_cer": 47.8581, |
|
"eval_g2l_gen_len": 3.0221, |
|
"eval_g2l_rouge1": 41.4693, |
|
"eval_g2l_rouge2": 33.7773, |
|
"eval_g2l_rougeL": 41.4822, |
|
"eval_g2l_rougeLsum": 41.4356, |
|
"eval_l2ex_cer": 84.3083, |
|
"eval_l2ex_gen_len": 21.0319, |
|
"eval_l2ex_rouge1": 28.654, |
|
"eval_l2ex_rouge2": 12.8413, |
|
"eval_l2ex_rougeL": 25.3941, |
|
"eval_l2ex_rougeLsum": 25.4326, |
|
"eval_l2g_cer": 71.0018, |
|
"eval_l2g_gen_len": 15.3407, |
|
"eval_l2g_rouge1": 39.2009, |
|
"eval_l2g_rouge2": 26.5422, |
|
"eval_l2g_rougeL": 37.2433, |
|
"eval_l2g_rougeLsum": 37.2693, |
|
"eval_loss": 3.5637874603271484, |
|
"eval_runtime": 187.7571, |
|
"eval_samples_per_second": 52.84, |
|
"eval_steps_per_second": 1.656, |
|
"step": 37386 |
|
} |
|
], |
|
"logging_steps": 416, |
|
"max_steps": 41535, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 2077, |
|
"total_flos": 7.17240637379838e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|