whisper-large-v2-ec / trainer_state.json
wanasash's picture
End of training
52f5c2e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 68.02721088435374,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3401360544217687,
"grad_norm": 6.3686203956604,
"learning_rate": 5.000000000000001e-07,
"loss": 1.2352,
"step": 25
},
{
"epoch": 0.6802721088435374,
"grad_norm": 4.156219959259033,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7799,
"step": 50
},
{
"epoch": 1.0204081632653061,
"grad_norm": 4.332057952880859,
"learning_rate": 1.5e-06,
"loss": 0.5115,
"step": 75
},
{
"epoch": 1.3605442176870748,
"grad_norm": 3.7332186698913574,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.4105,
"step": 100
},
{
"epoch": 1.7006802721088436,
"grad_norm": 3.6035523414611816,
"learning_rate": 2.5e-06,
"loss": 0.374,
"step": 125
},
{
"epoch": 2.0408163265306123,
"grad_norm": 2.3493571281433105,
"learning_rate": 3e-06,
"loss": 0.3344,
"step": 150
},
{
"epoch": 2.380952380952381,
"grad_norm": 3.0683202743530273,
"learning_rate": 3.48e-06,
"loss": 0.248,
"step": 175
},
{
"epoch": 2.7210884353741496,
"grad_norm": 3.189012289047241,
"learning_rate": 3.980000000000001e-06,
"loss": 0.2395,
"step": 200
},
{
"epoch": 3.061224489795918,
"grad_norm": 2.2101962566375732,
"learning_rate": 4.48e-06,
"loss": 0.2258,
"step": 225
},
{
"epoch": 3.4013605442176873,
"grad_norm": 2.2124788761138916,
"learning_rate": 4.980000000000001e-06,
"loss": 0.1494,
"step": 250
},
{
"epoch": 3.741496598639456,
"grad_norm": 3.446359395980835,
"learning_rate": 5.480000000000001e-06,
"loss": 0.1512,
"step": 275
},
{
"epoch": 4.081632653061225,
"grad_norm": 2.592689037322998,
"learning_rate": 5.98e-06,
"loss": 0.1359,
"step": 300
},
{
"epoch": 4.421768707482993,
"grad_norm": 3.1709280014038086,
"learning_rate": 6.480000000000001e-06,
"loss": 0.0859,
"step": 325
},
{
"epoch": 4.761904761904762,
"grad_norm": 2.485269546508789,
"learning_rate": 6.98e-06,
"loss": 0.0958,
"step": 350
},
{
"epoch": 5.1020408163265305,
"grad_norm": 1.868928074836731,
"learning_rate": 7.48e-06,
"loss": 0.0846,
"step": 375
},
{
"epoch": 5.442176870748299,
"grad_norm": 3.283317804336548,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0575,
"step": 400
},
{
"epoch": 5.782312925170068,
"grad_norm": 2.18278431892395,
"learning_rate": 8.48e-06,
"loss": 0.075,
"step": 425
},
{
"epoch": 6.122448979591836,
"grad_norm": 1.7689893245697021,
"learning_rate": 8.98e-06,
"loss": 0.0637,
"step": 450
},
{
"epoch": 6.462585034013605,
"grad_norm": 2.60971736907959,
"learning_rate": 9.48e-06,
"loss": 0.0517,
"step": 475
},
{
"epoch": 6.802721088435375,
"grad_norm": 1.903644323348999,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0578,
"step": 500
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.4152497053146362,
"learning_rate": 9.946666666666667e-06,
"loss": 0.0565,
"step": 525
},
{
"epoch": 7.482993197278912,
"grad_norm": 2.119438409805298,
"learning_rate": 9.891111111111113e-06,
"loss": 0.0436,
"step": 550
},
{
"epoch": 7.8231292517006805,
"grad_norm": 1.8895118236541748,
"learning_rate": 9.835555555555556e-06,
"loss": 0.042,
"step": 575
},
{
"epoch": 8.16326530612245,
"grad_norm": 1.7676234245300293,
"learning_rate": 9.780000000000001e-06,
"loss": 0.0351,
"step": 600
},
{
"epoch": 8.503401360544217,
"grad_norm": 1.8845597505569458,
"learning_rate": 9.724444444444445e-06,
"loss": 0.0279,
"step": 625
},
{
"epoch": 8.843537414965986,
"grad_norm": 9.495149612426758,
"learning_rate": 9.66888888888889e-06,
"loss": 0.031,
"step": 650
},
{
"epoch": 9.183673469387756,
"grad_norm": 1.6925195455551147,
"learning_rate": 9.613333333333335e-06,
"loss": 0.0303,
"step": 675
},
{
"epoch": 9.523809523809524,
"grad_norm": 1.4979898929595947,
"learning_rate": 9.557777777777777e-06,
"loss": 0.023,
"step": 700
},
{
"epoch": 9.863945578231293,
"grad_norm": 2.2269773483276367,
"learning_rate": 9.502222222222223e-06,
"loss": 0.0261,
"step": 725
},
{
"epoch": 10.204081632653061,
"grad_norm": 1.0259639024734497,
"learning_rate": 9.446666666666667e-06,
"loss": 0.0226,
"step": 750
},
{
"epoch": 10.54421768707483,
"grad_norm": 1.9924999475479126,
"learning_rate": 9.391111111111111e-06,
"loss": 0.0205,
"step": 775
},
{
"epoch": 10.884353741496598,
"grad_norm": 1.616970419883728,
"learning_rate": 9.335555555555557e-06,
"loss": 0.0199,
"step": 800
},
{
"epoch": 11.224489795918368,
"grad_norm": 0.922492504119873,
"learning_rate": 9.280000000000001e-06,
"loss": 0.0161,
"step": 825
},
{
"epoch": 11.564625850340136,
"grad_norm": 2.508662223815918,
"learning_rate": 9.224444444444445e-06,
"loss": 0.0145,
"step": 850
},
{
"epoch": 11.904761904761905,
"grad_norm": 1.371565341949463,
"learning_rate": 9.168888888888889e-06,
"loss": 0.0179,
"step": 875
},
{
"epoch": 12.244897959183673,
"grad_norm": 1.303175687789917,
"learning_rate": 9.113333333333335e-06,
"loss": 0.0155,
"step": 900
},
{
"epoch": 12.585034013605442,
"grad_norm": 1.1102138757705688,
"learning_rate": 9.057777777777779e-06,
"loss": 0.012,
"step": 925
},
{
"epoch": 12.92517006802721,
"grad_norm": 0.8504889011383057,
"learning_rate": 9.002222222222223e-06,
"loss": 0.0121,
"step": 950
},
{
"epoch": 13.26530612244898,
"grad_norm": 0.8174204230308533,
"learning_rate": 8.946666666666669e-06,
"loss": 0.0106,
"step": 975
},
{
"epoch": 13.60544217687075,
"grad_norm": 1.821559190750122,
"learning_rate": 8.891111111111111e-06,
"loss": 0.0112,
"step": 1000
},
{
"epoch": 13.60544217687075,
"eval_loss": 0.39124733209609985,
"eval_runtime": 93.6528,
"eval_samples_per_second": 2.776,
"eval_steps_per_second": 0.182,
"eval_wer": 0.23946288698246923,
"step": 1000
},
{
"epoch": 13.945578231292517,
"grad_norm": 1.2810653448104858,
"learning_rate": 8.835555555555557e-06,
"loss": 0.0111,
"step": 1025
},
{
"epoch": 14.285714285714286,
"grad_norm": 1.2741467952728271,
"learning_rate": 8.78e-06,
"loss": 0.0097,
"step": 1050
},
{
"epoch": 14.625850340136054,
"grad_norm": 0.8524342179298401,
"learning_rate": 8.724444444444445e-06,
"loss": 0.0076,
"step": 1075
},
{
"epoch": 14.965986394557824,
"grad_norm": 1.643485426902771,
"learning_rate": 8.66888888888889e-06,
"loss": 0.0074,
"step": 1100
},
{
"epoch": 15.306122448979592,
"grad_norm": 0.40055137872695923,
"learning_rate": 8.613333333333333e-06,
"loss": 0.007,
"step": 1125
},
{
"epoch": 15.646258503401361,
"grad_norm": 1.1712241172790527,
"learning_rate": 8.557777777777778e-06,
"loss": 0.0072,
"step": 1150
},
{
"epoch": 15.986394557823129,
"grad_norm": 0.32212740182876587,
"learning_rate": 8.502222222222223e-06,
"loss": 0.007,
"step": 1175
},
{
"epoch": 16.3265306122449,
"grad_norm": 0.2166888266801834,
"learning_rate": 8.446666666666668e-06,
"loss": 0.0054,
"step": 1200
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.12256942689418793,
"learning_rate": 8.391111111111112e-06,
"loss": 0.0039,
"step": 1225
},
{
"epoch": 17.006802721088434,
"grad_norm": 0.26391106843948364,
"learning_rate": 8.335555555555556e-06,
"loss": 0.0042,
"step": 1250
},
{
"epoch": 17.346938775510203,
"grad_norm": 0.24293136596679688,
"learning_rate": 8.28e-06,
"loss": 0.0036,
"step": 1275
},
{
"epoch": 17.687074829931973,
"grad_norm": 0.27556732296943665,
"learning_rate": 8.224444444444444e-06,
"loss": 0.0028,
"step": 1300
},
{
"epoch": 18.027210884353742,
"grad_norm": 0.9470342397689819,
"learning_rate": 8.16888888888889e-06,
"loss": 0.0042,
"step": 1325
},
{
"epoch": 18.367346938775512,
"grad_norm": 0.14824901521205902,
"learning_rate": 8.113333333333334e-06,
"loss": 0.0036,
"step": 1350
},
{
"epoch": 18.707482993197278,
"grad_norm": 1.2378164529800415,
"learning_rate": 8.057777777777778e-06,
"loss": 0.0046,
"step": 1375
},
{
"epoch": 19.047619047619047,
"grad_norm": 2.7857964038848877,
"learning_rate": 8.002222222222222e-06,
"loss": 0.004,
"step": 1400
},
{
"epoch": 19.387755102040817,
"grad_norm": 0.5624294281005859,
"learning_rate": 7.946666666666666e-06,
"loss": 0.0073,
"step": 1425
},
{
"epoch": 19.727891156462587,
"grad_norm": 0.18347227573394775,
"learning_rate": 7.891111111111112e-06,
"loss": 0.0058,
"step": 1450
},
{
"epoch": 20.068027210884352,
"grad_norm": 0.3734131455421448,
"learning_rate": 7.835555555555556e-06,
"loss": 0.0066,
"step": 1475
},
{
"epoch": 20.408163265306122,
"grad_norm": 0.6362162828445435,
"learning_rate": 7.78e-06,
"loss": 0.0075,
"step": 1500
},
{
"epoch": 20.74829931972789,
"grad_norm": 0.8834488391876221,
"learning_rate": 7.724444444444446e-06,
"loss": 0.0057,
"step": 1525
},
{
"epoch": 21.08843537414966,
"grad_norm": 0.06029968708753586,
"learning_rate": 7.66888888888889e-06,
"loss": 0.0038,
"step": 1550
},
{
"epoch": 21.428571428571427,
"grad_norm": 1.0105019807815552,
"learning_rate": 7.613333333333334e-06,
"loss": 0.0039,
"step": 1575
},
{
"epoch": 21.768707482993197,
"grad_norm": 0.5381556153297424,
"learning_rate": 7.557777777777779e-06,
"loss": 0.0036,
"step": 1600
},
{
"epoch": 22.108843537414966,
"grad_norm": 0.08822619915008545,
"learning_rate": 7.502222222222223e-06,
"loss": 0.004,
"step": 1625
},
{
"epoch": 22.448979591836736,
"grad_norm": 0.43402913212776184,
"learning_rate": 7.446666666666668e-06,
"loss": 0.0029,
"step": 1650
},
{
"epoch": 22.7891156462585,
"grad_norm": 0.9147214293479919,
"learning_rate": 7.3911111111111125e-06,
"loss": 0.0024,
"step": 1675
},
{
"epoch": 23.12925170068027,
"grad_norm": 0.48390820622444153,
"learning_rate": 7.335555555555556e-06,
"loss": 0.0036,
"step": 1700
},
{
"epoch": 23.46938775510204,
"grad_norm": 0.10725089907646179,
"learning_rate": 7.280000000000001e-06,
"loss": 0.0023,
"step": 1725
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.09872180968523026,
"learning_rate": 7.224444444444445e-06,
"loss": 0.0018,
"step": 1750
},
{
"epoch": 24.14965986394558,
"grad_norm": 0.6679806113243103,
"learning_rate": 7.1688888888888895e-06,
"loss": 0.0017,
"step": 1775
},
{
"epoch": 24.489795918367346,
"grad_norm": 0.02364278770983219,
"learning_rate": 7.113333333333334e-06,
"loss": 0.001,
"step": 1800
},
{
"epoch": 24.829931972789115,
"grad_norm": 0.02158285863697529,
"learning_rate": 7.057777777777778e-06,
"loss": 0.0008,
"step": 1825
},
{
"epoch": 25.170068027210885,
"grad_norm": 0.014277754351496696,
"learning_rate": 7.0022222222222225e-06,
"loss": 0.0007,
"step": 1850
},
{
"epoch": 25.510204081632654,
"grad_norm": 0.012241716496646404,
"learning_rate": 6.946666666666667e-06,
"loss": 0.0005,
"step": 1875
},
{
"epoch": 25.85034013605442,
"grad_norm": 0.02822299115359783,
"learning_rate": 6.891111111111111e-06,
"loss": 0.0005,
"step": 1900
},
{
"epoch": 26.19047619047619,
"grad_norm": 0.009908878244459629,
"learning_rate": 6.835555555555556e-06,
"loss": 0.0004,
"step": 1925
},
{
"epoch": 26.53061224489796,
"grad_norm": 0.008494613692164421,
"learning_rate": 6.780000000000001e-06,
"loss": 0.0004,
"step": 1950
},
{
"epoch": 26.87074829931973,
"grad_norm": 0.007728059310466051,
"learning_rate": 6.724444444444444e-06,
"loss": 0.0004,
"step": 1975
},
{
"epoch": 27.2108843537415,
"grad_norm": 0.007557597942650318,
"learning_rate": 6.668888888888889e-06,
"loss": 0.0004,
"step": 2000
},
{
"epoch": 27.2108843537415,
"eval_loss": 0.45324987173080444,
"eval_runtime": 93.804,
"eval_samples_per_second": 2.772,
"eval_steps_per_second": 0.181,
"eval_wer": 0.2245430809399478,
"step": 2000
},
{
"epoch": 27.551020408163264,
"grad_norm": 0.009665679186582565,
"learning_rate": 6.613333333333334e-06,
"loss": 0.0004,
"step": 2025
},
{
"epoch": 27.891156462585034,
"grad_norm": 0.006815009750425816,
"learning_rate": 6.557777777777778e-06,
"loss": 0.0004,
"step": 2050
},
{
"epoch": 28.231292517006803,
"grad_norm": 0.007364605087786913,
"learning_rate": 6.502222222222223e-06,
"loss": 0.0003,
"step": 2075
},
{
"epoch": 28.571428571428573,
"grad_norm": 0.006635705474764109,
"learning_rate": 6.446666666666668e-06,
"loss": 0.0003,
"step": 2100
},
{
"epoch": 28.91156462585034,
"grad_norm": 0.008073186501860619,
"learning_rate": 6.391111111111111e-06,
"loss": 0.0003,
"step": 2125
},
{
"epoch": 29.25170068027211,
"grad_norm": 0.006342068314552307,
"learning_rate": 6.335555555555556e-06,
"loss": 0.0003,
"step": 2150
},
{
"epoch": 29.591836734693878,
"grad_norm": 0.006897253915667534,
"learning_rate": 6.280000000000001e-06,
"loss": 0.0003,
"step": 2175
},
{
"epoch": 29.931972789115648,
"grad_norm": 0.006329766474664211,
"learning_rate": 6.224444444444445e-06,
"loss": 0.0003,
"step": 2200
},
{
"epoch": 30.272108843537413,
"grad_norm": 0.006696599069982767,
"learning_rate": 6.16888888888889e-06,
"loss": 0.0003,
"step": 2225
},
{
"epoch": 30.612244897959183,
"grad_norm": 0.0058494312688708305,
"learning_rate": 6.113333333333333e-06,
"loss": 0.0003,
"step": 2250
},
{
"epoch": 30.952380952380953,
"grad_norm": 0.005851502064615488,
"learning_rate": 6.057777777777778e-06,
"loss": 0.0003,
"step": 2275
},
{
"epoch": 31.292517006802722,
"grad_norm": 0.0047736396081745625,
"learning_rate": 6.002222222222223e-06,
"loss": 0.0003,
"step": 2300
},
{
"epoch": 31.632653061224488,
"grad_norm": 0.006324047688394785,
"learning_rate": 5.946666666666668e-06,
"loss": 0.0003,
"step": 2325
},
{
"epoch": 31.972789115646258,
"grad_norm": 0.005418767221271992,
"learning_rate": 5.891111111111112e-06,
"loss": 0.0003,
"step": 2350
},
{
"epoch": 32.31292517006803,
"grad_norm": 0.005563849117606878,
"learning_rate": 5.8355555555555565e-06,
"loss": 0.0003,
"step": 2375
},
{
"epoch": 32.6530612244898,
"grad_norm": 0.005108444020152092,
"learning_rate": 5.78e-06,
"loss": 0.0002,
"step": 2400
},
{
"epoch": 32.993197278911566,
"grad_norm": 0.004787669517099857,
"learning_rate": 5.724444444444445e-06,
"loss": 0.0003,
"step": 2425
},
{
"epoch": 33.333333333333336,
"grad_norm": 0.004051292315125465,
"learning_rate": 5.6688888888888895e-06,
"loss": 0.0002,
"step": 2450
},
{
"epoch": 33.673469387755105,
"grad_norm": 0.005220952443778515,
"learning_rate": 5.613333333333334e-06,
"loss": 0.0002,
"step": 2475
},
{
"epoch": 34.01360544217687,
"grad_norm": 0.0054339151829481125,
"learning_rate": 5.557777777777778e-06,
"loss": 0.0002,
"step": 2500
},
{
"epoch": 34.35374149659864,
"grad_norm": 0.004454713314771652,
"learning_rate": 5.5022222222222224e-06,
"loss": 0.0002,
"step": 2525
},
{
"epoch": 34.69387755102041,
"grad_norm": 0.005186771042644978,
"learning_rate": 5.4466666666666665e-06,
"loss": 0.0002,
"step": 2550
},
{
"epoch": 35.034013605442176,
"grad_norm": 0.004502983298152685,
"learning_rate": 5.391111111111111e-06,
"loss": 0.0002,
"step": 2575
},
{
"epoch": 35.374149659863946,
"grad_norm": 0.004623442888259888,
"learning_rate": 5.335555555555556e-06,
"loss": 0.0002,
"step": 2600
},
{
"epoch": 35.714285714285715,
"grad_norm": 0.00428406847640872,
"learning_rate": 5.28e-06,
"loss": 0.0002,
"step": 2625
},
{
"epoch": 36.054421768707485,
"grad_norm": 0.004207184072583914,
"learning_rate": 5.224444444444445e-06,
"loss": 0.0002,
"step": 2650
},
{
"epoch": 36.394557823129254,
"grad_norm": 0.004264296032488346,
"learning_rate": 5.168888888888889e-06,
"loss": 0.0002,
"step": 2675
},
{
"epoch": 36.734693877551024,
"grad_norm": 0.0045384918339550495,
"learning_rate": 5.113333333333333e-06,
"loss": 0.0002,
"step": 2700
},
{
"epoch": 37.074829931972786,
"grad_norm": 0.0036523097660392523,
"learning_rate": 5.057777777777778e-06,
"loss": 0.0002,
"step": 2725
},
{
"epoch": 37.414965986394556,
"grad_norm": 0.003838042262941599,
"learning_rate": 5.002222222222223e-06,
"loss": 0.0002,
"step": 2750
},
{
"epoch": 37.755102040816325,
"grad_norm": 0.0043487842194736,
"learning_rate": 4.946666666666667e-06,
"loss": 0.0002,
"step": 2775
},
{
"epoch": 38.095238095238095,
"grad_norm": 0.004179787822067738,
"learning_rate": 4.891111111111111e-06,
"loss": 0.0002,
"step": 2800
},
{
"epoch": 38.435374149659864,
"grad_norm": 0.0036503339651972055,
"learning_rate": 4.835555555555556e-06,
"loss": 0.0002,
"step": 2825
},
{
"epoch": 38.775510204081634,
"grad_norm": 0.0033976498525589705,
"learning_rate": 4.78e-06,
"loss": 0.0002,
"step": 2850
},
{
"epoch": 39.1156462585034,
"grad_norm": 0.0038732371758669615,
"learning_rate": 4.724444444444445e-06,
"loss": 0.0002,
"step": 2875
},
{
"epoch": 39.45578231292517,
"grad_norm": 0.003690896322950721,
"learning_rate": 4.66888888888889e-06,
"loss": 0.0002,
"step": 2900
},
{
"epoch": 39.795918367346935,
"grad_norm": 0.005354354623705149,
"learning_rate": 4.613333333333334e-06,
"loss": 0.0002,
"step": 2925
},
{
"epoch": 40.136054421768705,
"grad_norm": 0.0036710058338940144,
"learning_rate": 4.557777777777778e-06,
"loss": 0.0002,
"step": 2950
},
{
"epoch": 40.476190476190474,
"grad_norm": 0.005290627479553223,
"learning_rate": 4.502222222222223e-06,
"loss": 0.0002,
"step": 2975
},
{
"epoch": 40.816326530612244,
"grad_norm": 0.003753775032237172,
"learning_rate": 4.446666666666667e-06,
"loss": 0.0002,
"step": 3000
},
{
"epoch": 40.816326530612244,
"eval_loss": 0.4882185459136963,
"eval_runtime": 93.7044,
"eval_samples_per_second": 2.775,
"eval_steps_per_second": 0.181,
"eval_wer": 0.2174561730697501,
"step": 3000
},
{
"epoch": 41.156462585034014,
"grad_norm": 0.004405771382153034,
"learning_rate": 4.391111111111112e-06,
"loss": 0.0002,
"step": 3025
},
{
"epoch": 41.49659863945578,
"grad_norm": 0.0036535647232085466,
"learning_rate": 4.3355555555555565e-06,
"loss": 0.0002,
"step": 3050
},
{
"epoch": 41.83673469387755,
"grad_norm": 0.0036972814705222845,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.0002,
"step": 3075
},
{
"epoch": 42.17687074829932,
"grad_norm": 0.004110525827854872,
"learning_rate": 4.2244444444444446e-06,
"loss": 0.0002,
"step": 3100
},
{
"epoch": 42.51700680272109,
"grad_norm": 0.0035640313290059566,
"learning_rate": 4.168888888888889e-06,
"loss": 0.0002,
"step": 3125
},
{
"epoch": 42.857142857142854,
"grad_norm": 0.004424062091857195,
"learning_rate": 4.1133333333333335e-06,
"loss": 0.0002,
"step": 3150
},
{
"epoch": 43.197278911564624,
"grad_norm": 0.0032335869036614895,
"learning_rate": 4.057777777777778e-06,
"loss": 0.0002,
"step": 3175
},
{
"epoch": 43.53741496598639,
"grad_norm": 0.0037836297415196896,
"learning_rate": 4.002222222222222e-06,
"loss": 0.0002,
"step": 3200
},
{
"epoch": 43.87755102040816,
"grad_norm": 0.003560603130608797,
"learning_rate": 3.946666666666667e-06,
"loss": 0.0002,
"step": 3225
},
{
"epoch": 44.21768707482993,
"grad_norm": 0.003510043490678072,
"learning_rate": 3.891111111111111e-06,
"loss": 0.0002,
"step": 3250
},
{
"epoch": 44.5578231292517,
"grad_norm": 0.0028691268526017666,
"learning_rate": 3.835555555555555e-06,
"loss": 0.0002,
"step": 3275
},
{
"epoch": 44.89795918367347,
"grad_norm": 0.0031337698455899954,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.0001,
"step": 3300
},
{
"epoch": 45.23809523809524,
"grad_norm": 0.00317736086435616,
"learning_rate": 3.724444444444445e-06,
"loss": 0.0001,
"step": 3325
},
{
"epoch": 45.578231292517,
"grad_norm": 0.0029643489979207516,
"learning_rate": 3.668888888888889e-06,
"loss": 0.0002,
"step": 3350
},
{
"epoch": 45.91836734693877,
"grad_norm": 0.003078688168898225,
"learning_rate": 3.6133333333333336e-06,
"loss": 0.0001,
"step": 3375
},
{
"epoch": 46.25850340136054,
"grad_norm": 0.003043568693101406,
"learning_rate": 3.5577777777777785e-06,
"loss": 0.0001,
"step": 3400
},
{
"epoch": 46.59863945578231,
"grad_norm": 0.003218689002096653,
"learning_rate": 3.5022222222222225e-06,
"loss": 0.0001,
"step": 3425
},
{
"epoch": 46.93877551020408,
"grad_norm": 0.003266324056312442,
"learning_rate": 3.446666666666667e-06,
"loss": 0.0001,
"step": 3450
},
{
"epoch": 47.27891156462585,
"grad_norm": 0.003477707039564848,
"learning_rate": 3.391111111111111e-06,
"loss": 0.0001,
"step": 3475
},
{
"epoch": 47.61904761904762,
"grad_norm": 0.0027373475022614002,
"learning_rate": 3.335555555555556e-06,
"loss": 0.0001,
"step": 3500
},
{
"epoch": 47.95918367346939,
"grad_norm": 0.002786448458209634,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.0001,
"step": 3525
},
{
"epoch": 48.29931972789116,
"grad_norm": 0.002394324168562889,
"learning_rate": 3.2244444444444444e-06,
"loss": 0.0001,
"step": 3550
},
{
"epoch": 48.63945578231292,
"grad_norm": 0.003250208217650652,
"learning_rate": 3.1688888888888893e-06,
"loss": 0.0001,
"step": 3575
},
{
"epoch": 48.97959183673469,
"grad_norm": 0.0029996377415955067,
"learning_rate": 3.1133333333333337e-06,
"loss": 0.0001,
"step": 3600
},
{
"epoch": 49.31972789115646,
"grad_norm": 0.0026746434159576893,
"learning_rate": 3.0577777777777778e-06,
"loss": 0.0001,
"step": 3625
},
{
"epoch": 49.65986394557823,
"grad_norm": 0.00262379739433527,
"learning_rate": 3.0022222222222227e-06,
"loss": 0.0001,
"step": 3650
},
{
"epoch": 50.0,
"grad_norm": 0.0029098980594426394,
"learning_rate": 2.946666666666667e-06,
"loss": 0.0001,
"step": 3675
},
{
"epoch": 50.34013605442177,
"grad_norm": 0.002616139827296138,
"learning_rate": 2.891111111111111e-06,
"loss": 0.0001,
"step": 3700
},
{
"epoch": 50.68027210884354,
"grad_norm": 0.0029571950435638428,
"learning_rate": 2.835555555555556e-06,
"loss": 0.0001,
"step": 3725
},
{
"epoch": 51.02040816326531,
"grad_norm": 0.0027916007675230503,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.0001,
"step": 3750
},
{
"epoch": 51.36054421768708,
"grad_norm": 0.002735557733103633,
"learning_rate": 2.7244444444444445e-06,
"loss": 0.0001,
"step": 3775
},
{
"epoch": 51.70068027210884,
"grad_norm": 0.0023191324435174465,
"learning_rate": 2.6688888888888894e-06,
"loss": 0.0001,
"step": 3800
},
{
"epoch": 52.04081632653061,
"grad_norm": 0.0034847650676965714,
"learning_rate": 2.6133333333333334e-06,
"loss": 0.0001,
"step": 3825
},
{
"epoch": 52.38095238095238,
"grad_norm": 0.002770556602627039,
"learning_rate": 2.557777777777778e-06,
"loss": 0.0001,
"step": 3850
},
{
"epoch": 52.72108843537415,
"grad_norm": 0.0030505817849189043,
"learning_rate": 2.5022222222222224e-06,
"loss": 0.0001,
"step": 3875
},
{
"epoch": 53.06122448979592,
"grad_norm": 0.003404865274205804,
"learning_rate": 2.446666666666667e-06,
"loss": 0.0001,
"step": 3900
},
{
"epoch": 53.40136054421769,
"grad_norm": 0.0026544102001935244,
"learning_rate": 2.3911111111111113e-06,
"loss": 0.0001,
"step": 3925
},
{
"epoch": 53.74149659863946,
"grad_norm": 0.00271439622156322,
"learning_rate": 2.3355555555555557e-06,
"loss": 0.0001,
"step": 3950
},
{
"epoch": 54.08163265306123,
"grad_norm": 0.0033124638721346855,
"learning_rate": 2.28e-06,
"loss": 0.0001,
"step": 3975
},
{
"epoch": 54.421768707483,
"grad_norm": 0.0025922644417732954,
"learning_rate": 2.2244444444444447e-06,
"loss": 0.0001,
"step": 4000
},
{
"epoch": 54.421768707483,
"eval_loss": 0.5051469206809998,
"eval_runtime": 95.0455,
"eval_samples_per_second": 2.736,
"eval_steps_per_second": 0.179,
"eval_wer": 0.21484520701230883,
"step": 4000
},
{
"epoch": 54.76190476190476,
"grad_norm": 0.0020597511902451515,
"learning_rate": 2.168888888888889e-06,
"loss": 0.0001,
"step": 4025
},
{
"epoch": 55.10204081632653,
"grad_norm": 0.002817349275574088,
"learning_rate": 2.1133333333333336e-06,
"loss": 0.0001,
"step": 4050
},
{
"epoch": 55.4421768707483,
"grad_norm": 0.003287636674940586,
"learning_rate": 2.057777777777778e-06,
"loss": 0.0001,
"step": 4075
},
{
"epoch": 55.78231292517007,
"grad_norm": 0.00247744913212955,
"learning_rate": 2.0022222222222225e-06,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 56.12244897959184,
"grad_norm": 0.003431103890761733,
"learning_rate": 1.9466666666666665e-06,
"loss": 0.0001,
"step": 4125
},
{
"epoch": 56.46258503401361,
"grad_norm": 0.0024367747828364372,
"learning_rate": 1.8911111111111114e-06,
"loss": 0.0001,
"step": 4150
},
{
"epoch": 56.802721088435376,
"grad_norm": 0.0022823926992714405,
"learning_rate": 1.8355555555555557e-06,
"loss": 0.0001,
"step": 4175
},
{
"epoch": 57.142857142857146,
"grad_norm": 0.0022000963799655437,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.0001,
"step": 4200
},
{
"epoch": 57.48299319727891,
"grad_norm": 0.0023311020340770483,
"learning_rate": 1.7244444444444448e-06,
"loss": 0.0001,
"step": 4225
},
{
"epoch": 57.82312925170068,
"grad_norm": 0.002466644160449505,
"learning_rate": 1.668888888888889e-06,
"loss": 0.0001,
"step": 4250
},
{
"epoch": 58.16326530612245,
"grad_norm": 0.0023317814338952303,
"learning_rate": 1.6133333333333335e-06,
"loss": 0.0001,
"step": 4275
},
{
"epoch": 58.50340136054422,
"grad_norm": 0.0034895280841737986,
"learning_rate": 1.5577777777777777e-06,
"loss": 0.0001,
"step": 4300
},
{
"epoch": 58.843537414965986,
"grad_norm": 0.002141441684216261,
"learning_rate": 1.5022222222222224e-06,
"loss": 0.0001,
"step": 4325
},
{
"epoch": 59.183673469387756,
"grad_norm": 0.0023929886519908905,
"learning_rate": 1.4466666666666669e-06,
"loss": 0.0001,
"step": 4350
},
{
"epoch": 59.523809523809526,
"grad_norm": 0.002914367476478219,
"learning_rate": 1.3911111111111111e-06,
"loss": 0.0001,
"step": 4375
},
{
"epoch": 59.863945578231295,
"grad_norm": 0.0023239688016474247,
"learning_rate": 1.3355555555555558e-06,
"loss": 0.0001,
"step": 4400
},
{
"epoch": 60.204081632653065,
"grad_norm": 0.00241728313267231,
"learning_rate": 1.28e-06,
"loss": 0.0001,
"step": 4425
},
{
"epoch": 60.54421768707483,
"grad_norm": 0.0032376388553529978,
"learning_rate": 1.2244444444444445e-06,
"loss": 0.0001,
"step": 4450
},
{
"epoch": 60.8843537414966,
"grad_norm": 0.003632117761299014,
"learning_rate": 1.168888888888889e-06,
"loss": 0.0001,
"step": 4475
},
{
"epoch": 61.224489795918366,
"grad_norm": 0.002522936789318919,
"learning_rate": 1.1133333333333334e-06,
"loss": 0.0001,
"step": 4500
},
{
"epoch": 61.564625850340136,
"grad_norm": 0.002181953750550747,
"learning_rate": 1.0577777777777779e-06,
"loss": 0.0001,
"step": 4525
},
{
"epoch": 61.904761904761905,
"grad_norm": 0.0020987866446375847,
"learning_rate": 1.0022222222222223e-06,
"loss": 0.0001,
"step": 4550
},
{
"epoch": 62.244897959183675,
"grad_norm": 0.002102503553032875,
"learning_rate": 9.466666666666667e-07,
"loss": 0.0001,
"step": 4575
},
{
"epoch": 62.585034013605444,
"grad_norm": 0.0019837727304548025,
"learning_rate": 8.911111111111112e-07,
"loss": 0.0001,
"step": 4600
},
{
"epoch": 62.925170068027214,
"grad_norm": 0.002303441520780325,
"learning_rate": 8.355555555555556e-07,
"loss": 0.0001,
"step": 4625
},
{
"epoch": 63.265306122448976,
"grad_norm": 0.007395027671009302,
"learning_rate": 7.8e-07,
"loss": 0.0001,
"step": 4650
},
{
"epoch": 63.605442176870746,
"grad_norm": 0.002733208704739809,
"learning_rate": 7.244444444444446e-07,
"loss": 0.0001,
"step": 4675
},
{
"epoch": 63.945578231292515,
"grad_norm": 0.0020845523104071617,
"learning_rate": 6.68888888888889e-07,
"loss": 0.0001,
"step": 4700
},
{
"epoch": 64.28571428571429,
"grad_norm": 0.0019409642554819584,
"learning_rate": 6.133333333333333e-07,
"loss": 0.0001,
"step": 4725
},
{
"epoch": 64.62585034013605,
"grad_norm": 0.00258248602040112,
"learning_rate": 5.577777777777779e-07,
"loss": 0.0001,
"step": 4750
},
{
"epoch": 64.96598639455782,
"grad_norm": 0.0025006316136568785,
"learning_rate": 5.022222222222222e-07,
"loss": 0.0001,
"step": 4775
},
{
"epoch": 65.3061224489796,
"grad_norm": 0.0022064538206905127,
"learning_rate": 4.466666666666667e-07,
"loss": 0.0001,
"step": 4800
},
{
"epoch": 65.64625850340136,
"grad_norm": 0.002108414890244603,
"learning_rate": 3.9111111111111115e-07,
"loss": 0.0001,
"step": 4825
},
{
"epoch": 65.98639455782313,
"grad_norm": 0.0021663971710950136,
"learning_rate": 3.3555555555555556e-07,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 66.3265306122449,
"grad_norm": 0.00204038736410439,
"learning_rate": 2.8e-07,
"loss": 0.0001,
"step": 4875
},
{
"epoch": 66.66666666666667,
"grad_norm": 0.0022622975520789623,
"learning_rate": 2.2444444444444445e-07,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 67.00680272108843,
"grad_norm": 0.0033368293661624193,
"learning_rate": 1.6888888888888888e-07,
"loss": 0.0001,
"step": 4925
},
{
"epoch": 67.34693877551021,
"grad_norm": 0.0019737225957214832,
"learning_rate": 1.1333333333333336e-07,
"loss": 0.0001,
"step": 4950
},
{
"epoch": 67.68707482993197,
"grad_norm": 0.0019130747532472014,
"learning_rate": 5.777777777777778e-08,
"loss": 0.0001,
"step": 4975
},
{
"epoch": 68.02721088435374,
"grad_norm": 0.002000050852075219,
"learning_rate": 2.2222222222222225e-09,
"loss": 0.0001,
"step": 5000
},
{
"epoch": 68.02721088435374,
"eval_loss": 0.5118595957756042,
"eval_runtime": 95.0278,
"eval_samples_per_second": 2.736,
"eval_steps_per_second": 0.179,
"eval_wer": 0.21671018276762402,
"step": 5000
},
{
"epoch": 68.02721088435374,
"step": 5000,
"total_flos": 3.378304801456128e+20,
"train_loss": 0.03018118931162171,
"train_runtime": 39486.7724,
"train_samples_per_second": 4.052,
"train_steps_per_second": 0.127
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 69,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.378304801456128e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}