|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 68.02721088435374, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 7.277314186096191, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.2508, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 4.874781131744385, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.8343, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 3.6367509365081787, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.5016, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 3.666282892227173, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.3941, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 3.6144936084747314, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3451, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 2.4187328815460205, |
|
"learning_rate": 3e-06, |
|
"loss": 0.3123, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 3.255674362182617, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.2263, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.7210884353741496, |
|
"grad_norm": 3.5080747604370117, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2111, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 2.3079421520233154, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.201, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.4013605442176873, |
|
"grad_norm": 2.2684943675994873, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.741496598639456, |
|
"grad_norm": 2.2747371196746826, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.1258, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 2.270559787750244, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1133, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.421768707482993, |
|
"grad_norm": 2.20074200630188, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0682, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 2.804894208908081, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0731, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 2.6068227291107178, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0672, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.442176870748299, |
|
"grad_norm": 3.2111849784851074, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0478, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.782312925170068, |
|
"grad_norm": 1.5456165075302124, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.0505, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 1.4710639715194702, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.462585034013605, |
|
"grad_norm": 2.914450168609619, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0368, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.802721088435375, |
|
"grad_norm": 2.29419207572937, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0441, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 0.8864838480949402, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.0412, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.482993197278912, |
|
"grad_norm": 1.3316959142684937, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.0347, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.8231292517006805, |
|
"grad_norm": 2.046053647994995, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.0299, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 1.579432725906372, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.0268, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.503401360544217, |
|
"grad_norm": 1.7325966358184814, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.019, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 8.843537414965986, |
|
"grad_norm": 1.6740517616271973, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.0262, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.183673469387756, |
|
"grad_norm": 0.65809166431427, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.0183, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.523809523809524, |
|
"grad_norm": 2.288142442703247, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.0177, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.863945578231293, |
|
"grad_norm": 1.3612937927246094, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0179, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 1.2634042501449585, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.0142, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.54421768707483, |
|
"grad_norm": 0.8320721387863159, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.0131, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 10.884353741496598, |
|
"grad_norm": 1.1112641096115112, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0148, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.224489795918368, |
|
"grad_norm": 0.4855366349220276, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.0124, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.564625850340136, |
|
"grad_norm": 1.5221891403198242, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.0116, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 11.904761904761905, |
|
"grad_norm": 1.4894431829452515, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.0144, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 12.244897959183673, |
|
"grad_norm": 1.2897030115127563, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.0099, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.585034013605442, |
|
"grad_norm": 0.5321520566940308, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.0068, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 12.92517006802721, |
|
"grad_norm": 1.2740304470062256, |
|
"learning_rate": 9e-06, |
|
"loss": 0.01, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.26530612244898, |
|
"grad_norm": 0.6653080582618713, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.0072, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"grad_norm": 1.0347912311553955, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0091, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"eval_loss": 0.4027358293533325, |
|
"eval_runtime": 102.3269, |
|
"eval_samples_per_second": 2.541, |
|
"eval_steps_per_second": 0.166, |
|
"eval_wer": 0.21894815367400225, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.945578231292517, |
|
"grad_norm": 0.6724150776863098, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.0119, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.44545045495033264, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.0069, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.625850340136054, |
|
"grad_norm": 1.5290805101394653, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.0078, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 14.965986394557824, |
|
"grad_norm": 0.5309610962867737, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.0053, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.306122448979592, |
|
"grad_norm": 0.1730959564447403, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.0031, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 15.646258503401361, |
|
"grad_norm": 0.06295394897460938, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.0044, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 15.986394557823129, |
|
"grad_norm": 0.4541076421737671, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.0036, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 16.3265306122449, |
|
"grad_norm": 0.24278602004051208, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.0045, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.3119202256202698, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.0026, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 17.006802721088434, |
|
"grad_norm": 0.7003461718559265, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0031, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.346938775510203, |
|
"grad_norm": 0.3013887107372284, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.0024, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 17.687074829931973, |
|
"grad_norm": 0.054453521966934204, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.0018, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.027210884353742, |
|
"grad_norm": 0.46942847967147827, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.0016, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 18.367346938775512, |
|
"grad_norm": 0.051955267786979675, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.0014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 18.707482993197278, |
|
"grad_norm": 0.022960973903536797, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.0007, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 19.047619047619047, |
|
"grad_norm": 0.060347653925418854, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0023, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.387755102040817, |
|
"grad_norm": 0.045802563428878784, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.0009, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 19.727891156462587, |
|
"grad_norm": 0.03706446662545204, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.0009, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.068027210884352, |
|
"grad_norm": 0.014720437116920948, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.0008, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 0.01641876809298992, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.74829931972789, |
|
"grad_norm": 0.051012102514505386, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.0006, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 21.08843537414966, |
|
"grad_norm": 0.07526645809412003, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.0006, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 0.014325820840895176, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.0013, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 21.768707482993197, |
|
"grad_norm": 0.013493416830897331, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.0008, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.108843537414966, |
|
"grad_norm": 0.09651411324739456, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 22.448979591836736, |
|
"grad_norm": 0.00849719438701868, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.0004, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 22.7891156462585, |
|
"grad_norm": 0.12997107207775116, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 23.12925170068027, |
|
"grad_norm": 0.017772994935512543, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.0004, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.46938775510204, |
|
"grad_norm": 0.015189074911177158, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.015632469207048416, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.0005, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.14965986394558, |
|
"grad_norm": 0.01429369393736124, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.0008, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 24.489795918367346, |
|
"grad_norm": 0.009108461439609528, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.0004, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.829931972789115, |
|
"grad_norm": 0.00893578864634037, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.0008, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 25.170068027210885, |
|
"grad_norm": 0.010999013669788837, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0017, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 25.510204081632654, |
|
"grad_norm": 0.039021287113428116, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.001, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 25.85034013605442, |
|
"grad_norm": 0.8390358686447144, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.0009, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.19047619047619, |
|
"grad_norm": 0.02965502440929413, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.0013, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 26.53061224489796, |
|
"grad_norm": 0.21533174812793732, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.0029, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 26.87074829931973, |
|
"grad_norm": 0.06805889308452606, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.0023, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 27.2108843537415, |
|
"grad_norm": 0.13776488602161407, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.2108843537415, |
|
"eval_loss": 0.4282212555408478, |
|
"eval_runtime": 102.1629, |
|
"eval_samples_per_second": 2.545, |
|
"eval_steps_per_second": 0.166, |
|
"eval_wer": 0.22603506154419992, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.551020408163264, |
|
"grad_norm": 0.7586637735366821, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.0045, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 27.891156462585034, |
|
"grad_norm": 0.19451989233493805, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.0051, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 28.231292517006803, |
|
"grad_norm": 0.1951601356267929, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0037, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.3530046045780182, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.0037, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 28.91156462585034, |
|
"grad_norm": 0.26006898283958435, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.0045, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 29.25170068027211, |
|
"grad_norm": 1.6727913618087769, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.0034, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 29.591836734693878, |
|
"grad_norm": 0.23586586117744446, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.0033, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 29.931972789115648, |
|
"grad_norm": 2.080326557159424, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.0041, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 30.272108843537413, |
|
"grad_norm": 0.43551284074783325, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.0056, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 30.612244897959183, |
|
"grad_norm": 0.40780606865882874, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.0034, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 30.952380952380953, |
|
"grad_norm": 0.2716953754425049, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.0059, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 31.292517006802722, |
|
"grad_norm": 0.7849836945533752, |
|
"learning_rate": 6e-06, |
|
"loss": 0.004, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 31.632653061224488, |
|
"grad_norm": 0.0561537891626358, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.0031, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 31.972789115646258, |
|
"grad_norm": 0.2189192771911621, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.0025, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 32.31292517006803, |
|
"grad_norm": 0.7395544052124023, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0035, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 32.6530612244898, |
|
"grad_norm": 0.8322564959526062, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.0029, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 32.993197278911566, |
|
"grad_norm": 0.1809442937374115, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.004, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"grad_norm": 0.09690059721469879, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.0019, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 33.673469387755105, |
|
"grad_norm": 0.29557883739471436, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.0023, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 34.01360544217687, |
|
"grad_norm": 0.026952777057886124, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 34.35374149659864, |
|
"grad_norm": 0.04151106998324394, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0008, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 34.69387755102041, |
|
"grad_norm": 0.01914023421704769, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.0012, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 35.034013605442176, |
|
"grad_norm": 0.15637150406837463, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.0011, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 35.374149659863946, |
|
"grad_norm": 0.18300969898700714, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.0013, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 0.019685134291648865, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.0009, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 36.054421768707485, |
|
"grad_norm": 0.012847774662077427, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.0004, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 36.394557823129254, |
|
"grad_norm": 0.011110267601907253, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.0003, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 36.734693877551024, |
|
"grad_norm": 0.010116774588823318, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.0003, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 37.074829931972786, |
|
"grad_norm": 0.00764122698456049, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.0003, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 37.414965986394556, |
|
"grad_norm": 0.02383231371641159, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0002, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 37.755102040816325, |
|
"grad_norm": 0.006380359176546335, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.0002, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 38.095238095238095, |
|
"grad_norm": 0.0054608238860964775, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 38.435374149659864, |
|
"grad_norm": 0.005097352433949709, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 38.775510204081634, |
|
"grad_norm": 0.005154197569936514, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 39.1156462585034, |
|
"grad_norm": 0.005586385726928711, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.0002, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 39.45578231292517, |
|
"grad_norm": 0.005172679666429758, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 39.795918367346935, |
|
"grad_norm": 0.004589286167174578, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.0002, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 40.136054421768705, |
|
"grad_norm": 0.005452868994325399, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 40.476190476190474, |
|
"grad_norm": 0.004887877497822046, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0002, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"grad_norm": 0.00491141015663743, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"eval_loss": 0.4444006681442261, |
|
"eval_runtime": 102.1694, |
|
"eval_samples_per_second": 2.545, |
|
"eval_steps_per_second": 0.166, |
|
"eval_wer": 0.21782916822081314, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 41.156462585034014, |
|
"grad_norm": 0.005289255175739527, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 41.49659863945578, |
|
"grad_norm": 0.004311501048505306, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 41.83673469387755, |
|
"grad_norm": 0.004461329896003008, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 42.17687074829932, |
|
"grad_norm": 0.0043508694507181644, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.0001, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 42.51700680272109, |
|
"grad_norm": 0.004281959030777216, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 0.004064427223056555, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 43.197278911564624, |
|
"grad_norm": 0.00377645343542099, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 43.53741496598639, |
|
"grad_norm": 0.004163255449384451, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0001, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 43.87755102040816, |
|
"grad_norm": 0.004156498704105616, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 44.21768707482993, |
|
"grad_norm": 0.002806807868182659, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 44.5578231292517, |
|
"grad_norm": 0.0035042332019656897, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 44.89795918367347, |
|
"grad_norm": 0.003332278924062848, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 45.23809523809524, |
|
"grad_norm": 0.003195433644577861, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 45.578231292517, |
|
"grad_norm": 0.0031893581617623568, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.0001, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 45.91836734693877, |
|
"grad_norm": 0.0031172255985438824, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.0001, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 46.25850340136054, |
|
"grad_norm": 0.0032927945721894503, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 46.59863945578231, |
|
"grad_norm": 0.003365216078236699, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.0001, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 46.93877551020408, |
|
"grad_norm": 0.003197253216058016, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 47.27891156462585, |
|
"grad_norm": 0.0033013387583196163, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.0001, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.0030345292761921883, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 47.95918367346939, |
|
"grad_norm": 0.002814779756590724, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 48.29931972789116, |
|
"grad_norm": 0.00269457814283669, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 48.63945578231292, |
|
"grad_norm": 0.0031415882986038923, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"grad_norm": 0.0029843435622751713, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 49.31972789115646, |
|
"grad_norm": 0.002468927064910531, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 49.65986394557823, |
|
"grad_norm": 0.002835317747667432, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.003936604596674442, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 50.34013605442177, |
|
"grad_norm": 0.0028534727171063423, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 50.68027210884354, |
|
"grad_norm": 0.0029034256003797054, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.0001, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 51.02040816326531, |
|
"grad_norm": 0.0032608446199446917, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.0001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 51.36054421768708, |
|
"grad_norm": 0.0026580116245895624, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.0001, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 51.70068027210884, |
|
"grad_norm": 0.0024348797742277384, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 52.04081632653061, |
|
"grad_norm": 0.002745755948126316, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.0001, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 52.38095238095238, |
|
"grad_norm": 0.0023754702415317297, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.0001, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 52.72108843537415, |
|
"grad_norm": 0.0026109826285392046, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0001, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 53.06122448979592, |
|
"grad_norm": 0.0025893962010741234, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.0001, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 53.40136054421769, |
|
"grad_norm": 0.0023246912751346827, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 53.74149659863946, |
|
"grad_norm": 0.002337114419788122, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.0001, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 54.08163265306123, |
|
"grad_norm": 0.002635551616549492, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 54.421768707483, |
|
"grad_norm": 0.002480172784999013, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 54.421768707483, |
|
"eval_loss": 0.4667165279388428, |
|
"eval_runtime": 102.3194, |
|
"eval_samples_per_second": 2.541, |
|
"eval_steps_per_second": 0.166, |
|
"eval_wer": 0.21596419246549795, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 54.76190476190476, |
|
"grad_norm": 0.00236712908372283, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 55.10204081632653, |
|
"grad_norm": 0.002560981782153249, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.0001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 55.4421768707483, |
|
"grad_norm": 0.002310627605766058, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.0001, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 55.78231292517007, |
|
"grad_norm": 0.0023427396081387997, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 56.12244897959184, |
|
"grad_norm": 0.002552021760493517, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 56.46258503401361, |
|
"grad_norm": 0.0021091082599014044, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 56.802721088435376, |
|
"grad_norm": 0.002378846053034067, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 57.142857142857146, |
|
"grad_norm": 0.0022475309669971466, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 57.48299319727891, |
|
"grad_norm": 0.0024991375394165516, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.0001, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 57.82312925170068, |
|
"grad_norm": 0.0026695560663938522, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 58.16326530612245, |
|
"grad_norm": 0.0023107912857085466, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.0001, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 58.50340136054422, |
|
"grad_norm": 0.0022581815719604492, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 58.843537414965986, |
|
"grad_norm": 0.0021738242357969284, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0001, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 59.183673469387756, |
|
"grad_norm": 0.0025273971259593964, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 59.523809523809526, |
|
"grad_norm": 0.0021680588833987713, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.0001, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 59.863945578231295, |
|
"grad_norm": 0.0023293830454349518, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 60.204081632653065, |
|
"grad_norm": 0.0019948012195527554, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.0001, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 60.54421768707483, |
|
"grad_norm": 0.00274069607257843, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.0001, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 60.8843537414966, |
|
"grad_norm": 0.002070850459858775, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.0001, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 61.224489795918366, |
|
"grad_norm": 0.002111976034939289, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 61.564625850340136, |
|
"grad_norm": 0.002158733317628503, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.0001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 61.904761904761905, |
|
"grad_norm": 0.002263331552967429, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 62.244897959183675, |
|
"grad_norm": 0.0020453669130802155, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.0001, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 62.585034013605444, |
|
"grad_norm": 0.0018243337981402874, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0001, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 62.925170068027214, |
|
"grad_norm": 0.0023292056284844875, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0001, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 63.265306122448976, |
|
"grad_norm": 0.0019000651082023978, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.0001, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 63.605442176870746, |
|
"grad_norm": 0.002300037071108818, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.0001, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 63.945578231292515, |
|
"grad_norm": 0.0020359556656330824, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0001, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 64.28571428571429, |
|
"grad_norm": 0.0019380120793357491, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.0001, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 64.62585034013605, |
|
"grad_norm": 0.0018156260484829545, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 64.96598639455782, |
|
"grad_norm": 0.0021546927746385336, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0001, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 65.3061224489796, |
|
"grad_norm": 0.0019719544798135757, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 65.64625850340136, |
|
"grad_norm": 0.0018750294111669064, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.0001, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 65.98639455782313, |
|
"grad_norm": 0.002267962321639061, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.0001, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 66.3265306122449, |
|
"grad_norm": 0.0019681702833622694, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.0001, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 66.66666666666667, |
|
"grad_norm": 0.0022395187988877296, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 67.00680272108843, |
|
"grad_norm": 0.0033925268799066544, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.0001, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 67.34693877551021, |
|
"grad_norm": 0.0019715563394129276, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 67.68707482993197, |
|
"grad_norm": 0.00179803348146379, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.0001, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"grad_norm": 0.0019662685226649046, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"eval_loss": 0.47325974702835083, |
|
"eval_runtime": 102.3255, |
|
"eval_samples_per_second": 2.541, |
|
"eval_steps_per_second": 0.166, |
|
"eval_wer": 0.21372622155911974, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"step": 5000, |
|
"total_flos": 5.4059634054660096e+20, |
|
"train_loss": 0.028010966634354553, |
|
"train_runtime": 44897.3386, |
|
"train_samples_per_second": 3.564, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 69, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.4059634054660096e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|