|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 68.02721088435374, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 6.3686203956604, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.2352, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 4.156219959259033, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7799, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 4.332057952880859, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.5115, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 3.7332186698913574, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4105, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 3.6035523414611816, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.374, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 2.3493571281433105, |
|
"learning_rate": 3e-06, |
|
"loss": 0.3344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 3.0683202743530273, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.248, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.7210884353741496, |
|
"grad_norm": 3.189012289047241, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.2395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 2.2101962566375732, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.2258, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.4013605442176873, |
|
"grad_norm": 2.2124788761138916, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.1494, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.741496598639456, |
|
"grad_norm": 3.446359395980835, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.1512, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 2.592689037322998, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.1359, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.421768707482993, |
|
"grad_norm": 3.1709280014038086, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.0859, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 2.485269546508789, |
|
"learning_rate": 6.98e-06, |
|
"loss": 0.0958, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 1.868928074836731, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.0846, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.442176870748299, |
|
"grad_norm": 3.283317804336548, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.0575, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.782312925170068, |
|
"grad_norm": 2.18278431892395, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.075, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 1.7689893245697021, |
|
"learning_rate": 8.98e-06, |
|
"loss": 0.0637, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.462585034013605, |
|
"grad_norm": 2.60971736907959, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.0517, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.802721088435375, |
|
"grad_norm": 1.903644323348999, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 0.0578, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 1.4152497053146362, |
|
"learning_rate": 9.946666666666667e-06, |
|
"loss": 0.0565, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.482993197278912, |
|
"grad_norm": 2.119438409805298, |
|
"learning_rate": 9.891111111111113e-06, |
|
"loss": 0.0436, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.8231292517006805, |
|
"grad_norm": 1.8895118236541748, |
|
"learning_rate": 9.835555555555556e-06, |
|
"loss": 0.042, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 1.7676234245300293, |
|
"learning_rate": 9.780000000000001e-06, |
|
"loss": 0.0351, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.503401360544217, |
|
"grad_norm": 1.8845597505569458, |
|
"learning_rate": 9.724444444444445e-06, |
|
"loss": 0.0279, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 8.843537414965986, |
|
"grad_norm": 9.495149612426758, |
|
"learning_rate": 9.66888888888889e-06, |
|
"loss": 0.031, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.183673469387756, |
|
"grad_norm": 1.6925195455551147, |
|
"learning_rate": 9.613333333333335e-06, |
|
"loss": 0.0303, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.523809523809524, |
|
"grad_norm": 1.4979898929595947, |
|
"learning_rate": 9.557777777777777e-06, |
|
"loss": 0.023, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.863945578231293, |
|
"grad_norm": 2.2269773483276367, |
|
"learning_rate": 9.502222222222223e-06, |
|
"loss": 0.0261, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 1.0259639024734497, |
|
"learning_rate": 9.446666666666667e-06, |
|
"loss": 0.0226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.54421768707483, |
|
"grad_norm": 1.9924999475479126, |
|
"learning_rate": 9.391111111111111e-06, |
|
"loss": 0.0205, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 10.884353741496598, |
|
"grad_norm": 1.616970419883728, |
|
"learning_rate": 9.335555555555557e-06, |
|
"loss": 0.0199, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.224489795918368, |
|
"grad_norm": 0.922492504119873, |
|
"learning_rate": 9.280000000000001e-06, |
|
"loss": 0.0161, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.564625850340136, |
|
"grad_norm": 2.508662223815918, |
|
"learning_rate": 9.224444444444445e-06, |
|
"loss": 0.0145, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 11.904761904761905, |
|
"grad_norm": 1.371565341949463, |
|
"learning_rate": 9.168888888888889e-06, |
|
"loss": 0.0179, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 12.244897959183673, |
|
"grad_norm": 1.303175687789917, |
|
"learning_rate": 9.113333333333335e-06, |
|
"loss": 0.0155, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.585034013605442, |
|
"grad_norm": 1.1102138757705688, |
|
"learning_rate": 9.057777777777779e-06, |
|
"loss": 0.012, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 12.92517006802721, |
|
"grad_norm": 0.8504889011383057, |
|
"learning_rate": 9.002222222222223e-06, |
|
"loss": 0.0121, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.26530612244898, |
|
"grad_norm": 0.8174204230308533, |
|
"learning_rate": 8.946666666666669e-06, |
|
"loss": 0.0106, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"grad_norm": 1.821559190750122, |
|
"learning_rate": 8.891111111111111e-06, |
|
"loss": 0.0112, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"eval_loss": 0.39124733209609985, |
|
"eval_runtime": 93.6528, |
|
"eval_samples_per_second": 2.776, |
|
"eval_steps_per_second": 0.182, |
|
"eval_wer": 0.23946288698246923, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.945578231292517, |
|
"grad_norm": 1.2810653448104858, |
|
"learning_rate": 8.835555555555557e-06, |
|
"loss": 0.0111, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 1.2741467952728271, |
|
"learning_rate": 8.78e-06, |
|
"loss": 0.0097, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.625850340136054, |
|
"grad_norm": 0.8524342179298401, |
|
"learning_rate": 8.724444444444445e-06, |
|
"loss": 0.0076, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 14.965986394557824, |
|
"grad_norm": 1.643485426902771, |
|
"learning_rate": 8.66888888888889e-06, |
|
"loss": 0.0074, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.306122448979592, |
|
"grad_norm": 0.40055137872695923, |
|
"learning_rate": 8.613333333333333e-06, |
|
"loss": 0.007, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 15.646258503401361, |
|
"grad_norm": 1.1712241172790527, |
|
"learning_rate": 8.557777777777778e-06, |
|
"loss": 0.0072, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 15.986394557823129, |
|
"grad_norm": 0.32212740182876587, |
|
"learning_rate": 8.502222222222223e-06, |
|
"loss": 0.007, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 16.3265306122449, |
|
"grad_norm": 0.2166888266801834, |
|
"learning_rate": 8.446666666666668e-06, |
|
"loss": 0.0054, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.12256942689418793, |
|
"learning_rate": 8.391111111111112e-06, |
|
"loss": 0.0039, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 17.006802721088434, |
|
"grad_norm": 0.26391106843948364, |
|
"learning_rate": 8.335555555555556e-06, |
|
"loss": 0.0042, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.346938775510203, |
|
"grad_norm": 0.24293136596679688, |
|
"learning_rate": 8.28e-06, |
|
"loss": 0.0036, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 17.687074829931973, |
|
"grad_norm": 0.27556732296943665, |
|
"learning_rate": 8.224444444444444e-06, |
|
"loss": 0.0028, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.027210884353742, |
|
"grad_norm": 0.9470342397689819, |
|
"learning_rate": 8.16888888888889e-06, |
|
"loss": 0.0042, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 18.367346938775512, |
|
"grad_norm": 0.14824901521205902, |
|
"learning_rate": 8.113333333333334e-06, |
|
"loss": 0.0036, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 18.707482993197278, |
|
"grad_norm": 1.2378164529800415, |
|
"learning_rate": 8.057777777777778e-06, |
|
"loss": 0.0046, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 19.047619047619047, |
|
"grad_norm": 2.7857964038848877, |
|
"learning_rate": 8.002222222222222e-06, |
|
"loss": 0.004, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.387755102040817, |
|
"grad_norm": 0.5624294281005859, |
|
"learning_rate": 7.946666666666666e-06, |
|
"loss": 0.0073, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 19.727891156462587, |
|
"grad_norm": 0.18347227573394775, |
|
"learning_rate": 7.891111111111112e-06, |
|
"loss": 0.0058, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.068027210884352, |
|
"grad_norm": 0.3734131455421448, |
|
"learning_rate": 7.835555555555556e-06, |
|
"loss": 0.0066, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 0.6362162828445435, |
|
"learning_rate": 7.78e-06, |
|
"loss": 0.0075, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.74829931972789, |
|
"grad_norm": 0.8834488391876221, |
|
"learning_rate": 7.724444444444446e-06, |
|
"loss": 0.0057, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 21.08843537414966, |
|
"grad_norm": 0.06029968708753586, |
|
"learning_rate": 7.66888888888889e-06, |
|
"loss": 0.0038, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 1.0105019807815552, |
|
"learning_rate": 7.613333333333334e-06, |
|
"loss": 0.0039, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 21.768707482993197, |
|
"grad_norm": 0.5381556153297424, |
|
"learning_rate": 7.557777777777779e-06, |
|
"loss": 0.0036, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.108843537414966, |
|
"grad_norm": 0.08822619915008545, |
|
"learning_rate": 7.502222222222223e-06, |
|
"loss": 0.004, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 22.448979591836736, |
|
"grad_norm": 0.43402913212776184, |
|
"learning_rate": 7.446666666666668e-06, |
|
"loss": 0.0029, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 22.7891156462585, |
|
"grad_norm": 0.9147214293479919, |
|
"learning_rate": 7.3911111111111125e-06, |
|
"loss": 0.0024, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 23.12925170068027, |
|
"grad_norm": 0.48390820622444153, |
|
"learning_rate": 7.335555555555556e-06, |
|
"loss": 0.0036, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.46938775510204, |
|
"grad_norm": 0.10725089907646179, |
|
"learning_rate": 7.280000000000001e-06, |
|
"loss": 0.0023, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.09872180968523026, |
|
"learning_rate": 7.224444444444445e-06, |
|
"loss": 0.0018, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.14965986394558, |
|
"grad_norm": 0.6679806113243103, |
|
"learning_rate": 7.1688888888888895e-06, |
|
"loss": 0.0017, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 24.489795918367346, |
|
"grad_norm": 0.02364278770983219, |
|
"learning_rate": 7.113333333333334e-06, |
|
"loss": 0.001, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.829931972789115, |
|
"grad_norm": 0.02158285863697529, |
|
"learning_rate": 7.057777777777778e-06, |
|
"loss": 0.0008, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 25.170068027210885, |
|
"grad_norm": 0.014277754351496696, |
|
"learning_rate": 7.0022222222222225e-06, |
|
"loss": 0.0007, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 25.510204081632654, |
|
"grad_norm": 0.012241716496646404, |
|
"learning_rate": 6.946666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 25.85034013605442, |
|
"grad_norm": 0.02822299115359783, |
|
"learning_rate": 6.891111111111111e-06, |
|
"loss": 0.0005, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.19047619047619, |
|
"grad_norm": 0.009908878244459629, |
|
"learning_rate": 6.835555555555556e-06, |
|
"loss": 0.0004, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 26.53061224489796, |
|
"grad_norm": 0.008494613692164421, |
|
"learning_rate": 6.780000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 26.87074829931973, |
|
"grad_norm": 0.007728059310466051, |
|
"learning_rate": 6.724444444444444e-06, |
|
"loss": 0.0004, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 27.2108843537415, |
|
"grad_norm": 0.007557597942650318, |
|
"learning_rate": 6.668888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.2108843537415, |
|
"eval_loss": 0.45324987173080444, |
|
"eval_runtime": 93.804, |
|
"eval_samples_per_second": 2.772, |
|
"eval_steps_per_second": 0.181, |
|
"eval_wer": 0.2245430809399478, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 27.551020408163264, |
|
"grad_norm": 0.009665679186582565, |
|
"learning_rate": 6.613333333333334e-06, |
|
"loss": 0.0004, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 27.891156462585034, |
|
"grad_norm": 0.006815009750425816, |
|
"learning_rate": 6.557777777777778e-06, |
|
"loss": 0.0004, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 28.231292517006803, |
|
"grad_norm": 0.007364605087786913, |
|
"learning_rate": 6.502222222222223e-06, |
|
"loss": 0.0003, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.006635705474764109, |
|
"learning_rate": 6.446666666666668e-06, |
|
"loss": 0.0003, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 28.91156462585034, |
|
"grad_norm": 0.008073186501860619, |
|
"learning_rate": 6.391111111111111e-06, |
|
"loss": 0.0003, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 29.25170068027211, |
|
"grad_norm": 0.006342068314552307, |
|
"learning_rate": 6.335555555555556e-06, |
|
"loss": 0.0003, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 29.591836734693878, |
|
"grad_norm": 0.006897253915667534, |
|
"learning_rate": 6.280000000000001e-06, |
|
"loss": 0.0003, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 29.931972789115648, |
|
"grad_norm": 0.006329766474664211, |
|
"learning_rate": 6.224444444444445e-06, |
|
"loss": 0.0003, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 30.272108843537413, |
|
"grad_norm": 0.006696599069982767, |
|
"learning_rate": 6.16888888888889e-06, |
|
"loss": 0.0003, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 30.612244897959183, |
|
"grad_norm": 0.0058494312688708305, |
|
"learning_rate": 6.113333333333333e-06, |
|
"loss": 0.0003, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 30.952380952380953, |
|
"grad_norm": 0.005851502064615488, |
|
"learning_rate": 6.057777777777778e-06, |
|
"loss": 0.0003, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 31.292517006802722, |
|
"grad_norm": 0.0047736396081745625, |
|
"learning_rate": 6.002222222222223e-06, |
|
"loss": 0.0003, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 31.632653061224488, |
|
"grad_norm": 0.006324047688394785, |
|
"learning_rate": 5.946666666666668e-06, |
|
"loss": 0.0003, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 31.972789115646258, |
|
"grad_norm": 0.005418767221271992, |
|
"learning_rate": 5.891111111111112e-06, |
|
"loss": 0.0003, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 32.31292517006803, |
|
"grad_norm": 0.005563849117606878, |
|
"learning_rate": 5.8355555555555565e-06, |
|
"loss": 0.0003, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 32.6530612244898, |
|
"grad_norm": 0.005108444020152092, |
|
"learning_rate": 5.78e-06, |
|
"loss": 0.0002, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 32.993197278911566, |
|
"grad_norm": 0.004787669517099857, |
|
"learning_rate": 5.724444444444445e-06, |
|
"loss": 0.0003, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"grad_norm": 0.004051292315125465, |
|
"learning_rate": 5.6688888888888895e-06, |
|
"loss": 0.0002, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 33.673469387755105, |
|
"grad_norm": 0.005220952443778515, |
|
"learning_rate": 5.613333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 34.01360544217687, |
|
"grad_norm": 0.0054339151829481125, |
|
"learning_rate": 5.557777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 34.35374149659864, |
|
"grad_norm": 0.004454713314771652, |
|
"learning_rate": 5.5022222222222224e-06, |
|
"loss": 0.0002, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 34.69387755102041, |
|
"grad_norm": 0.005186771042644978, |
|
"learning_rate": 5.4466666666666665e-06, |
|
"loss": 0.0002, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 35.034013605442176, |
|
"grad_norm": 0.004502983298152685, |
|
"learning_rate": 5.391111111111111e-06, |
|
"loss": 0.0002, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 35.374149659863946, |
|
"grad_norm": 0.004623442888259888, |
|
"learning_rate": 5.335555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 0.00428406847640872, |
|
"learning_rate": 5.28e-06, |
|
"loss": 0.0002, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 36.054421768707485, |
|
"grad_norm": 0.004207184072583914, |
|
"learning_rate": 5.224444444444445e-06, |
|
"loss": 0.0002, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 36.394557823129254, |
|
"grad_norm": 0.004264296032488346, |
|
"learning_rate": 5.168888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 36.734693877551024, |
|
"grad_norm": 0.0045384918339550495, |
|
"learning_rate": 5.113333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 37.074829931972786, |
|
"grad_norm": 0.0036523097660392523, |
|
"learning_rate": 5.057777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 37.414965986394556, |
|
"grad_norm": 0.003838042262941599, |
|
"learning_rate": 5.002222222222223e-06, |
|
"loss": 0.0002, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 37.755102040816325, |
|
"grad_norm": 0.0043487842194736, |
|
"learning_rate": 4.946666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 38.095238095238095, |
|
"grad_norm": 0.004179787822067738, |
|
"learning_rate": 4.891111111111111e-06, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 38.435374149659864, |
|
"grad_norm": 0.0036503339651972055, |
|
"learning_rate": 4.835555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 38.775510204081634, |
|
"grad_norm": 0.0033976498525589705, |
|
"learning_rate": 4.78e-06, |
|
"loss": 0.0002, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 39.1156462585034, |
|
"grad_norm": 0.0038732371758669615, |
|
"learning_rate": 4.724444444444445e-06, |
|
"loss": 0.0002, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 39.45578231292517, |
|
"grad_norm": 0.003690896322950721, |
|
"learning_rate": 4.66888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 39.795918367346935, |
|
"grad_norm": 0.005354354623705149, |
|
"learning_rate": 4.613333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 40.136054421768705, |
|
"grad_norm": 0.0036710058338940144, |
|
"learning_rate": 4.557777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 40.476190476190474, |
|
"grad_norm": 0.005290627479553223, |
|
"learning_rate": 4.502222222222223e-06, |
|
"loss": 0.0002, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"grad_norm": 0.003753775032237172, |
|
"learning_rate": 4.446666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"eval_loss": 0.4882185459136963, |
|
"eval_runtime": 93.7044, |
|
"eval_samples_per_second": 2.775, |
|
"eval_steps_per_second": 0.181, |
|
"eval_wer": 0.2174561730697501, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 41.156462585034014, |
|
"grad_norm": 0.004405771382153034, |
|
"learning_rate": 4.391111111111112e-06, |
|
"loss": 0.0002, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 41.49659863945578, |
|
"grad_norm": 0.0036535647232085466, |
|
"learning_rate": 4.3355555555555565e-06, |
|
"loss": 0.0002, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 41.83673469387755, |
|
"grad_norm": 0.0036972814705222845, |
|
"learning_rate": 4.2800000000000005e-06, |
|
"loss": 0.0002, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 42.17687074829932, |
|
"grad_norm": 0.004110525827854872, |
|
"learning_rate": 4.2244444444444446e-06, |
|
"loss": 0.0002, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 42.51700680272109, |
|
"grad_norm": 0.0035640313290059566, |
|
"learning_rate": 4.168888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 0.004424062091857195, |
|
"learning_rate": 4.1133333333333335e-06, |
|
"loss": 0.0002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 43.197278911564624, |
|
"grad_norm": 0.0032335869036614895, |
|
"learning_rate": 4.057777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 43.53741496598639, |
|
"grad_norm": 0.0037836297415196896, |
|
"learning_rate": 4.002222222222222e-06, |
|
"loss": 0.0002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 43.87755102040816, |
|
"grad_norm": 0.003560603130608797, |
|
"learning_rate": 3.946666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 44.21768707482993, |
|
"grad_norm": 0.003510043490678072, |
|
"learning_rate": 3.891111111111111e-06, |
|
"loss": 0.0002, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 44.5578231292517, |
|
"grad_norm": 0.0028691268526017666, |
|
"learning_rate": 3.835555555555555e-06, |
|
"loss": 0.0002, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 44.89795918367347, |
|
"grad_norm": 0.0031337698455899954, |
|
"learning_rate": 3.7800000000000002e-06, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 45.23809523809524, |
|
"grad_norm": 0.00317736086435616, |
|
"learning_rate": 3.724444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 45.578231292517, |
|
"grad_norm": 0.0029643489979207516, |
|
"learning_rate": 3.668888888888889e-06, |
|
"loss": 0.0002, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 45.91836734693877, |
|
"grad_norm": 0.003078688168898225, |
|
"learning_rate": 3.6133333333333336e-06, |
|
"loss": 0.0001, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 46.25850340136054, |
|
"grad_norm": 0.003043568693101406, |
|
"learning_rate": 3.5577777777777785e-06, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 46.59863945578231, |
|
"grad_norm": 0.003218689002096653, |
|
"learning_rate": 3.5022222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 46.93877551020408, |
|
"grad_norm": 0.003266324056312442, |
|
"learning_rate": 3.446666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 47.27891156462585, |
|
"grad_norm": 0.003477707039564848, |
|
"learning_rate": 3.391111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.0027373475022614002, |
|
"learning_rate": 3.335555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 47.95918367346939, |
|
"grad_norm": 0.002786448458209634, |
|
"learning_rate": 3.2800000000000004e-06, |
|
"loss": 0.0001, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 48.29931972789116, |
|
"grad_norm": 0.002394324168562889, |
|
"learning_rate": 3.2244444444444444e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 48.63945578231292, |
|
"grad_norm": 0.003250208217650652, |
|
"learning_rate": 3.1688888888888893e-06, |
|
"loss": 0.0001, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"grad_norm": 0.0029996377415955067, |
|
"learning_rate": 3.1133333333333337e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 49.31972789115646, |
|
"grad_norm": 0.0026746434159576893, |
|
"learning_rate": 3.0577777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 49.65986394557823, |
|
"grad_norm": 0.00262379739433527, |
|
"learning_rate": 3.0022222222222227e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0029098980594426394, |
|
"learning_rate": 2.946666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 50.34013605442177, |
|
"grad_norm": 0.002616139827296138, |
|
"learning_rate": 2.891111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 50.68027210884354, |
|
"grad_norm": 0.0029571950435638428, |
|
"learning_rate": 2.835555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 51.02040816326531, |
|
"grad_norm": 0.0027916007675230503, |
|
"learning_rate": 2.7800000000000005e-06, |
|
"loss": 0.0001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 51.36054421768708, |
|
"grad_norm": 0.002735557733103633, |
|
"learning_rate": 2.7244444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 51.70068027210884, |
|
"grad_norm": 0.0023191324435174465, |
|
"learning_rate": 2.6688888888888894e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 52.04081632653061, |
|
"grad_norm": 0.0034847650676965714, |
|
"learning_rate": 2.6133333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 52.38095238095238, |
|
"grad_norm": 0.002770556602627039, |
|
"learning_rate": 2.557777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 52.72108843537415, |
|
"grad_norm": 0.0030505817849189043, |
|
"learning_rate": 2.5022222222222224e-06, |
|
"loss": 0.0001, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 53.06122448979592, |
|
"grad_norm": 0.003404865274205804, |
|
"learning_rate": 2.446666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 53.40136054421769, |
|
"grad_norm": 0.0026544102001935244, |
|
"learning_rate": 2.3911111111111113e-06, |
|
"loss": 0.0001, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 53.74149659863946, |
|
"grad_norm": 0.00271439622156322, |
|
"learning_rate": 2.3355555555555557e-06, |
|
"loss": 0.0001, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 54.08163265306123, |
|
"grad_norm": 0.0033124638721346855, |
|
"learning_rate": 2.28e-06, |
|
"loss": 0.0001, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 54.421768707483, |
|
"grad_norm": 0.0025922644417732954, |
|
"learning_rate": 2.2244444444444447e-06, |
|
"loss": 0.0001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 54.421768707483, |
|
"eval_loss": 0.5051469206809998, |
|
"eval_runtime": 95.0455, |
|
"eval_samples_per_second": 2.736, |
|
"eval_steps_per_second": 0.179, |
|
"eval_wer": 0.21484520701230883, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 54.76190476190476, |
|
"grad_norm": 0.0020597511902451515, |
|
"learning_rate": 2.168888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 55.10204081632653, |
|
"grad_norm": 0.002817349275574088, |
|
"learning_rate": 2.1133333333333336e-06, |
|
"loss": 0.0001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 55.4421768707483, |
|
"grad_norm": 0.003287636674940586, |
|
"learning_rate": 2.057777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 55.78231292517007, |
|
"grad_norm": 0.00247744913212955, |
|
"learning_rate": 2.0022222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 56.12244897959184, |
|
"grad_norm": 0.003431103890761733, |
|
"learning_rate": 1.9466666666666665e-06, |
|
"loss": 0.0001, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 56.46258503401361, |
|
"grad_norm": 0.0024367747828364372, |
|
"learning_rate": 1.8911111111111114e-06, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 56.802721088435376, |
|
"grad_norm": 0.0022823926992714405, |
|
"learning_rate": 1.8355555555555557e-06, |
|
"loss": 0.0001, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 57.142857142857146, |
|
"grad_norm": 0.0022000963799655437, |
|
"learning_rate": 1.7800000000000001e-06, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 57.48299319727891, |
|
"grad_norm": 0.0023311020340770483, |
|
"learning_rate": 1.7244444444444448e-06, |
|
"loss": 0.0001, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 57.82312925170068, |
|
"grad_norm": 0.002466644160449505, |
|
"learning_rate": 1.668888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 58.16326530612245, |
|
"grad_norm": 0.0023317814338952303, |
|
"learning_rate": 1.6133333333333335e-06, |
|
"loss": 0.0001, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 58.50340136054422, |
|
"grad_norm": 0.0034895280841737986, |
|
"learning_rate": 1.5577777777777777e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 58.843537414965986, |
|
"grad_norm": 0.002141441684216261, |
|
"learning_rate": 1.5022222222222224e-06, |
|
"loss": 0.0001, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 59.183673469387756, |
|
"grad_norm": 0.0023929886519908905, |
|
"learning_rate": 1.4466666666666669e-06, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 59.523809523809526, |
|
"grad_norm": 0.002914367476478219, |
|
"learning_rate": 1.3911111111111111e-06, |
|
"loss": 0.0001, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 59.863945578231295, |
|
"grad_norm": 0.0023239688016474247, |
|
"learning_rate": 1.3355555555555558e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 60.204081632653065, |
|
"grad_norm": 0.00241728313267231, |
|
"learning_rate": 1.28e-06, |
|
"loss": 0.0001, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 60.54421768707483, |
|
"grad_norm": 0.0032376388553529978, |
|
"learning_rate": 1.2244444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 60.8843537414966, |
|
"grad_norm": 0.003632117761299014, |
|
"learning_rate": 1.168888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 61.224489795918366, |
|
"grad_norm": 0.002522936789318919, |
|
"learning_rate": 1.1133333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 61.564625850340136, |
|
"grad_norm": 0.002181953750550747, |
|
"learning_rate": 1.0577777777777779e-06, |
|
"loss": 0.0001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 61.904761904761905, |
|
"grad_norm": 0.0020987866446375847, |
|
"learning_rate": 1.0022222222222223e-06, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 62.244897959183675, |
|
"grad_norm": 0.002102503553032875, |
|
"learning_rate": 9.466666666666667e-07, |
|
"loss": 0.0001, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 62.585034013605444, |
|
"grad_norm": 0.0019837727304548025, |
|
"learning_rate": 8.911111111111112e-07, |
|
"loss": 0.0001, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 62.925170068027214, |
|
"grad_norm": 0.002303441520780325, |
|
"learning_rate": 8.355555555555556e-07, |
|
"loss": 0.0001, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 63.265306122448976, |
|
"grad_norm": 0.007395027671009302, |
|
"learning_rate": 7.8e-07, |
|
"loss": 0.0001, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 63.605442176870746, |
|
"grad_norm": 0.002733208704739809, |
|
"learning_rate": 7.244444444444446e-07, |
|
"loss": 0.0001, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 63.945578231292515, |
|
"grad_norm": 0.0020845523104071617, |
|
"learning_rate": 6.68888888888889e-07, |
|
"loss": 0.0001, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 64.28571428571429, |
|
"grad_norm": 0.0019409642554819584, |
|
"learning_rate": 6.133333333333333e-07, |
|
"loss": 0.0001, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 64.62585034013605, |
|
"grad_norm": 0.00258248602040112, |
|
"learning_rate": 5.577777777777779e-07, |
|
"loss": 0.0001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 64.96598639455782, |
|
"grad_norm": 0.0025006316136568785, |
|
"learning_rate": 5.022222222222222e-07, |
|
"loss": 0.0001, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 65.3061224489796, |
|
"grad_norm": 0.0022064538206905127, |
|
"learning_rate": 4.466666666666667e-07, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 65.64625850340136, |
|
"grad_norm": 0.002108414890244603, |
|
"learning_rate": 3.9111111111111115e-07, |
|
"loss": 0.0001, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 65.98639455782313, |
|
"grad_norm": 0.0021663971710950136, |
|
"learning_rate": 3.3555555555555556e-07, |
|
"loss": 0.0001, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 66.3265306122449, |
|
"grad_norm": 0.00204038736410439, |
|
"learning_rate": 2.8e-07, |
|
"loss": 0.0001, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 66.66666666666667, |
|
"grad_norm": 0.0022622975520789623, |
|
"learning_rate": 2.2444444444444445e-07, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 67.00680272108843, |
|
"grad_norm": 0.0033368293661624193, |
|
"learning_rate": 1.6888888888888888e-07, |
|
"loss": 0.0001, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 67.34693877551021, |
|
"grad_norm": 0.0019737225957214832, |
|
"learning_rate": 1.1333333333333336e-07, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 67.68707482993197, |
|
"grad_norm": 0.0019130747532472014, |
|
"learning_rate": 5.777777777777778e-08, |
|
"loss": 0.0001, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"grad_norm": 0.002000050852075219, |
|
"learning_rate": 2.2222222222222225e-09, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"eval_loss": 0.5118595957756042, |
|
"eval_runtime": 95.0278, |
|
"eval_samples_per_second": 2.736, |
|
"eval_steps_per_second": 0.179, |
|
"eval_wer": 0.21671018276762402, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 68.02721088435374, |
|
"step": 5000, |
|
"total_flos": 3.378304801456128e+20, |
|
"train_loss": 0.03018118931162171, |
|
"train_runtime": 39486.7724, |
|
"train_samples_per_second": 4.052, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 69, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.378304801456128e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|