whisper-large-v3-ec / trainer_state.json
wanasash's picture
End of training
2aee50d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 68.02721088435374,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3401360544217687,
"grad_norm": 7.277314186096191,
"learning_rate": 5.000000000000001e-07,
"loss": 1.2508,
"step": 25
},
{
"epoch": 0.6802721088435374,
"grad_norm": 4.874781131744385,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.8343,
"step": 50
},
{
"epoch": 1.0204081632653061,
"grad_norm": 3.6367509365081787,
"learning_rate": 1.5e-06,
"loss": 0.5016,
"step": 75
},
{
"epoch": 1.3605442176870748,
"grad_norm": 3.666282892227173,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.3941,
"step": 100
},
{
"epoch": 1.7006802721088436,
"grad_norm": 3.6144936084747314,
"learning_rate": 2.5e-06,
"loss": 0.3451,
"step": 125
},
{
"epoch": 2.0408163265306123,
"grad_norm": 2.4187328815460205,
"learning_rate": 3e-06,
"loss": 0.3123,
"step": 150
},
{
"epoch": 2.380952380952381,
"grad_norm": 3.255674362182617,
"learning_rate": 3.5e-06,
"loss": 0.2263,
"step": 175
},
{
"epoch": 2.7210884353741496,
"grad_norm": 3.5080747604370117,
"learning_rate": 4.000000000000001e-06,
"loss": 0.2111,
"step": 200
},
{
"epoch": 3.061224489795918,
"grad_norm": 2.3079421520233154,
"learning_rate": 4.5e-06,
"loss": 0.201,
"step": 225
},
{
"epoch": 3.4013605442176873,
"grad_norm": 2.2684943675994873,
"learning_rate": 5e-06,
"loss": 0.1196,
"step": 250
},
{
"epoch": 3.741496598639456,
"grad_norm": 2.2747371196746826,
"learning_rate": 5.500000000000001e-06,
"loss": 0.1258,
"step": 275
},
{
"epoch": 4.081632653061225,
"grad_norm": 2.270559787750244,
"learning_rate": 6e-06,
"loss": 0.1133,
"step": 300
},
{
"epoch": 4.421768707482993,
"grad_norm": 2.20074200630188,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0682,
"step": 325
},
{
"epoch": 4.761904761904762,
"grad_norm": 2.804894208908081,
"learning_rate": 7e-06,
"loss": 0.0731,
"step": 350
},
{
"epoch": 5.1020408163265305,
"grad_norm": 2.6068227291107178,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0672,
"step": 375
},
{
"epoch": 5.442176870748299,
"grad_norm": 3.2111849784851074,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0478,
"step": 400
},
{
"epoch": 5.782312925170068,
"grad_norm": 1.5456165075302124,
"learning_rate": 8.5e-06,
"loss": 0.0505,
"step": 425
},
{
"epoch": 6.122448979591836,
"grad_norm": 1.4710639715194702,
"learning_rate": 9e-06,
"loss": 0.0473,
"step": 450
},
{
"epoch": 6.462585034013605,
"grad_norm": 2.914450168609619,
"learning_rate": 9.5e-06,
"loss": 0.0368,
"step": 475
},
{
"epoch": 6.802721088435375,
"grad_norm": 2.29419207572937,
"learning_rate": 1e-05,
"loss": 0.0441,
"step": 500
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.8864838480949402,
"learning_rate": 9.944444444444445e-06,
"loss": 0.0412,
"step": 525
},
{
"epoch": 7.482993197278912,
"grad_norm": 1.3316959142684937,
"learning_rate": 9.88888888888889e-06,
"loss": 0.0347,
"step": 550
},
{
"epoch": 7.8231292517006805,
"grad_norm": 2.046053647994995,
"learning_rate": 9.833333333333333e-06,
"loss": 0.0299,
"step": 575
},
{
"epoch": 8.16326530612245,
"grad_norm": 1.579432725906372,
"learning_rate": 9.777777777777779e-06,
"loss": 0.0268,
"step": 600
},
{
"epoch": 8.503401360544217,
"grad_norm": 1.7325966358184814,
"learning_rate": 9.722222222222223e-06,
"loss": 0.019,
"step": 625
},
{
"epoch": 8.843537414965986,
"grad_norm": 1.6740517616271973,
"learning_rate": 9.666666666666667e-06,
"loss": 0.0262,
"step": 650
},
{
"epoch": 9.183673469387756,
"grad_norm": 0.65809166431427,
"learning_rate": 9.611111111111112e-06,
"loss": 0.0183,
"step": 675
},
{
"epoch": 9.523809523809524,
"grad_norm": 2.288142442703247,
"learning_rate": 9.555555555555556e-06,
"loss": 0.0177,
"step": 700
},
{
"epoch": 9.863945578231293,
"grad_norm": 1.3612937927246094,
"learning_rate": 9.5e-06,
"loss": 0.0179,
"step": 725
},
{
"epoch": 10.204081632653061,
"grad_norm": 1.2634042501449585,
"learning_rate": 9.444444444444445e-06,
"loss": 0.0142,
"step": 750
},
{
"epoch": 10.54421768707483,
"grad_norm": 0.8320721387863159,
"learning_rate": 9.38888888888889e-06,
"loss": 0.0131,
"step": 775
},
{
"epoch": 10.884353741496598,
"grad_norm": 1.1112641096115112,
"learning_rate": 9.333333333333334e-06,
"loss": 0.0148,
"step": 800
},
{
"epoch": 11.224489795918368,
"grad_norm": 0.4855366349220276,
"learning_rate": 9.277777777777778e-06,
"loss": 0.0124,
"step": 825
},
{
"epoch": 11.564625850340136,
"grad_norm": 1.5221891403198242,
"learning_rate": 9.222222222222224e-06,
"loss": 0.0116,
"step": 850
},
{
"epoch": 11.904761904761905,
"grad_norm": 1.4894431829452515,
"learning_rate": 9.166666666666666e-06,
"loss": 0.0144,
"step": 875
},
{
"epoch": 12.244897959183673,
"grad_norm": 1.2897030115127563,
"learning_rate": 9.111111111111112e-06,
"loss": 0.0099,
"step": 900
},
{
"epoch": 12.585034013605442,
"grad_norm": 0.5321520566940308,
"learning_rate": 9.055555555555556e-06,
"loss": 0.0068,
"step": 925
},
{
"epoch": 12.92517006802721,
"grad_norm": 1.2740304470062256,
"learning_rate": 9e-06,
"loss": 0.01,
"step": 950
},
{
"epoch": 13.26530612244898,
"grad_norm": 0.6653080582618713,
"learning_rate": 8.944444444444446e-06,
"loss": 0.0072,
"step": 975
},
{
"epoch": 13.60544217687075,
"grad_norm": 1.0347912311553955,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0091,
"step": 1000
},
{
"epoch": 13.60544217687075,
"eval_loss": 0.4027358293533325,
"eval_runtime": 102.3269,
"eval_samples_per_second": 2.541,
"eval_steps_per_second": 0.166,
"eval_wer": 0.21894815367400225,
"step": 1000
},
{
"epoch": 13.945578231292517,
"grad_norm": 0.6724150776863098,
"learning_rate": 8.833333333333334e-06,
"loss": 0.0119,
"step": 1025
},
{
"epoch": 14.285714285714286,
"grad_norm": 0.44545045495033264,
"learning_rate": 8.777777777777778e-06,
"loss": 0.0069,
"step": 1050
},
{
"epoch": 14.625850340136054,
"grad_norm": 1.5290805101394653,
"learning_rate": 8.722222222222224e-06,
"loss": 0.0078,
"step": 1075
},
{
"epoch": 14.965986394557824,
"grad_norm": 0.5309610962867737,
"learning_rate": 8.666666666666668e-06,
"loss": 0.0053,
"step": 1100
},
{
"epoch": 15.306122448979592,
"grad_norm": 0.1730959564447403,
"learning_rate": 8.611111111111112e-06,
"loss": 0.0031,
"step": 1125
},
{
"epoch": 15.646258503401361,
"grad_norm": 0.06295394897460938,
"learning_rate": 8.555555555555556e-06,
"loss": 0.0044,
"step": 1150
},
{
"epoch": 15.986394557823129,
"grad_norm": 0.4541076421737671,
"learning_rate": 8.5e-06,
"loss": 0.0036,
"step": 1175
},
{
"epoch": 16.3265306122449,
"grad_norm": 0.24278602004051208,
"learning_rate": 8.444444444444446e-06,
"loss": 0.0045,
"step": 1200
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.3119202256202698,
"learning_rate": 8.38888888888889e-06,
"loss": 0.0026,
"step": 1225
},
{
"epoch": 17.006802721088434,
"grad_norm": 0.7003461718559265,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0031,
"step": 1250
},
{
"epoch": 17.346938775510203,
"grad_norm": 0.3013887107372284,
"learning_rate": 8.277777777777778e-06,
"loss": 0.0024,
"step": 1275
},
{
"epoch": 17.687074829931973,
"grad_norm": 0.054453521966934204,
"learning_rate": 8.222222222222222e-06,
"loss": 0.0018,
"step": 1300
},
{
"epoch": 18.027210884353742,
"grad_norm": 0.46942847967147827,
"learning_rate": 8.166666666666668e-06,
"loss": 0.0016,
"step": 1325
},
{
"epoch": 18.367346938775512,
"grad_norm": 0.051955267786979675,
"learning_rate": 8.111111111111112e-06,
"loss": 0.0014,
"step": 1350
},
{
"epoch": 18.707482993197278,
"grad_norm": 0.022960973903536797,
"learning_rate": 8.055555555555557e-06,
"loss": 0.0007,
"step": 1375
},
{
"epoch": 19.047619047619047,
"grad_norm": 0.060347653925418854,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0023,
"step": 1400
},
{
"epoch": 19.387755102040817,
"grad_norm": 0.045802563428878784,
"learning_rate": 7.944444444444445e-06,
"loss": 0.0009,
"step": 1425
},
{
"epoch": 19.727891156462587,
"grad_norm": 0.03706446662545204,
"learning_rate": 7.88888888888889e-06,
"loss": 0.0009,
"step": 1450
},
{
"epoch": 20.068027210884352,
"grad_norm": 0.014720437116920948,
"learning_rate": 7.833333333333333e-06,
"loss": 0.0008,
"step": 1475
},
{
"epoch": 20.408163265306122,
"grad_norm": 0.01641876809298992,
"learning_rate": 7.77777777777778e-06,
"loss": 0.0005,
"step": 1500
},
{
"epoch": 20.74829931972789,
"grad_norm": 0.051012102514505386,
"learning_rate": 7.722222222222223e-06,
"loss": 0.0006,
"step": 1525
},
{
"epoch": 21.08843537414966,
"grad_norm": 0.07526645809412003,
"learning_rate": 7.666666666666667e-06,
"loss": 0.0006,
"step": 1550
},
{
"epoch": 21.428571428571427,
"grad_norm": 0.014325820840895176,
"learning_rate": 7.611111111111111e-06,
"loss": 0.0013,
"step": 1575
},
{
"epoch": 21.768707482993197,
"grad_norm": 0.013493416830897331,
"learning_rate": 7.555555555555556e-06,
"loss": 0.0008,
"step": 1600
},
{
"epoch": 22.108843537414966,
"grad_norm": 0.09651411324739456,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0007,
"step": 1625
},
{
"epoch": 22.448979591836736,
"grad_norm": 0.00849719438701868,
"learning_rate": 7.444444444444445e-06,
"loss": 0.0004,
"step": 1650
},
{
"epoch": 22.7891156462585,
"grad_norm": 0.12997107207775116,
"learning_rate": 7.38888888888889e-06,
"loss": 0.0004,
"step": 1675
},
{
"epoch": 23.12925170068027,
"grad_norm": 0.017772994935512543,
"learning_rate": 7.333333333333333e-06,
"loss": 0.0004,
"step": 1700
},
{
"epoch": 23.46938775510204,
"grad_norm": 0.015189074911177158,
"learning_rate": 7.277777777777778e-06,
"loss": 0.0005,
"step": 1725
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.015632469207048416,
"learning_rate": 7.222222222222223e-06,
"loss": 0.0005,
"step": 1750
},
{
"epoch": 24.14965986394558,
"grad_norm": 0.01429369393736124,
"learning_rate": 7.166666666666667e-06,
"loss": 0.0008,
"step": 1775
},
{
"epoch": 24.489795918367346,
"grad_norm": 0.009108461439609528,
"learning_rate": 7.111111111111112e-06,
"loss": 0.0004,
"step": 1800
},
{
"epoch": 24.829931972789115,
"grad_norm": 0.00893578864634037,
"learning_rate": 7.055555555555557e-06,
"loss": 0.0008,
"step": 1825
},
{
"epoch": 25.170068027210885,
"grad_norm": 0.010999013669788837,
"learning_rate": 7e-06,
"loss": 0.0017,
"step": 1850
},
{
"epoch": 25.510204081632654,
"grad_norm": 0.039021287113428116,
"learning_rate": 6.944444444444445e-06,
"loss": 0.001,
"step": 1875
},
{
"epoch": 25.85034013605442,
"grad_norm": 0.8390358686447144,
"learning_rate": 6.88888888888889e-06,
"loss": 0.0009,
"step": 1900
},
{
"epoch": 26.19047619047619,
"grad_norm": 0.02965502440929413,
"learning_rate": 6.833333333333334e-06,
"loss": 0.0013,
"step": 1925
},
{
"epoch": 26.53061224489796,
"grad_norm": 0.21533174812793732,
"learning_rate": 6.777777777777779e-06,
"loss": 0.0029,
"step": 1950
},
{
"epoch": 26.87074829931973,
"grad_norm": 0.06805889308452606,
"learning_rate": 6.7222222222222235e-06,
"loss": 0.0023,
"step": 1975
},
{
"epoch": 27.2108843537415,
"grad_norm": 0.13776488602161407,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0026,
"step": 2000
},
{
"epoch": 27.2108843537415,
"eval_loss": 0.4282212555408478,
"eval_runtime": 102.1629,
"eval_samples_per_second": 2.545,
"eval_steps_per_second": 0.166,
"eval_wer": 0.22603506154419992,
"step": 2000
},
{
"epoch": 27.551020408163264,
"grad_norm": 0.7586637735366821,
"learning_rate": 6.6111111111111115e-06,
"loss": 0.0045,
"step": 2025
},
{
"epoch": 27.891156462585034,
"grad_norm": 0.19451989233493805,
"learning_rate": 6.555555555555556e-06,
"loss": 0.0051,
"step": 2050
},
{
"epoch": 28.231292517006803,
"grad_norm": 0.1951601356267929,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0037,
"step": 2075
},
{
"epoch": 28.571428571428573,
"grad_norm": 0.3530046045780182,
"learning_rate": 6.444444444444445e-06,
"loss": 0.0037,
"step": 2100
},
{
"epoch": 28.91156462585034,
"grad_norm": 0.26006898283958435,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.0045,
"step": 2125
},
{
"epoch": 29.25170068027211,
"grad_norm": 1.6727913618087769,
"learning_rate": 6.333333333333333e-06,
"loss": 0.0034,
"step": 2150
},
{
"epoch": 29.591836734693878,
"grad_norm": 0.23586586117744446,
"learning_rate": 6.277777777777778e-06,
"loss": 0.0033,
"step": 2175
},
{
"epoch": 29.931972789115648,
"grad_norm": 2.080326557159424,
"learning_rate": 6.222222222222223e-06,
"loss": 0.0041,
"step": 2200
},
{
"epoch": 30.272108843537413,
"grad_norm": 0.43551284074783325,
"learning_rate": 6.166666666666667e-06,
"loss": 0.0056,
"step": 2225
},
{
"epoch": 30.612244897959183,
"grad_norm": 0.40780606865882874,
"learning_rate": 6.111111111111112e-06,
"loss": 0.0034,
"step": 2250
},
{
"epoch": 30.952380952380953,
"grad_norm": 0.2716953754425049,
"learning_rate": 6.055555555555555e-06,
"loss": 0.0059,
"step": 2275
},
{
"epoch": 31.292517006802722,
"grad_norm": 0.7849836945533752,
"learning_rate": 6e-06,
"loss": 0.004,
"step": 2300
},
{
"epoch": 31.632653061224488,
"grad_norm": 0.0561537891626358,
"learning_rate": 5.944444444444445e-06,
"loss": 0.0031,
"step": 2325
},
{
"epoch": 31.972789115646258,
"grad_norm": 0.2189192771911621,
"learning_rate": 5.88888888888889e-06,
"loss": 0.0025,
"step": 2350
},
{
"epoch": 32.31292517006803,
"grad_norm": 0.7395544052124023,
"learning_rate": 5.833333333333334e-06,
"loss": 0.0035,
"step": 2375
},
{
"epoch": 32.6530612244898,
"grad_norm": 0.8322564959526062,
"learning_rate": 5.777777777777778e-06,
"loss": 0.0029,
"step": 2400
},
{
"epoch": 32.993197278911566,
"grad_norm": 0.1809442937374115,
"learning_rate": 5.722222222222222e-06,
"loss": 0.004,
"step": 2425
},
{
"epoch": 33.333333333333336,
"grad_norm": 0.09690059721469879,
"learning_rate": 5.666666666666667e-06,
"loss": 0.0019,
"step": 2450
},
{
"epoch": 33.673469387755105,
"grad_norm": 0.29557883739471436,
"learning_rate": 5.611111111111112e-06,
"loss": 0.0023,
"step": 2475
},
{
"epoch": 34.01360544217687,
"grad_norm": 0.026952777057886124,
"learning_rate": 5.555555555555557e-06,
"loss": 0.001,
"step": 2500
},
{
"epoch": 34.35374149659864,
"grad_norm": 0.04151106998324394,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0008,
"step": 2525
},
{
"epoch": 34.69387755102041,
"grad_norm": 0.01914023421704769,
"learning_rate": 5.444444444444445e-06,
"loss": 0.0012,
"step": 2550
},
{
"epoch": 35.034013605442176,
"grad_norm": 0.15637150406837463,
"learning_rate": 5.388888888888889e-06,
"loss": 0.0011,
"step": 2575
},
{
"epoch": 35.374149659863946,
"grad_norm": 0.18300969898700714,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0013,
"step": 2600
},
{
"epoch": 35.714285714285715,
"grad_norm": 0.019685134291648865,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.0009,
"step": 2625
},
{
"epoch": 36.054421768707485,
"grad_norm": 0.012847774662077427,
"learning_rate": 5.2222222222222226e-06,
"loss": 0.0004,
"step": 2650
},
{
"epoch": 36.394557823129254,
"grad_norm": 0.011110267601907253,
"learning_rate": 5.1666666666666675e-06,
"loss": 0.0003,
"step": 2675
},
{
"epoch": 36.734693877551024,
"grad_norm": 0.010116774588823318,
"learning_rate": 5.1111111111111115e-06,
"loss": 0.0003,
"step": 2700
},
{
"epoch": 37.074829931972786,
"grad_norm": 0.00764122698456049,
"learning_rate": 5.0555555555555555e-06,
"loss": 0.0003,
"step": 2725
},
{
"epoch": 37.414965986394556,
"grad_norm": 0.02383231371641159,
"learning_rate": 5e-06,
"loss": 0.0002,
"step": 2750
},
{
"epoch": 37.755102040816325,
"grad_norm": 0.006380359176546335,
"learning_rate": 4.944444444444445e-06,
"loss": 0.0002,
"step": 2775
},
{
"epoch": 38.095238095238095,
"grad_norm": 0.0054608238860964775,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0002,
"step": 2800
},
{
"epoch": 38.435374149659864,
"grad_norm": 0.005097352433949709,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0002,
"step": 2825
},
{
"epoch": 38.775510204081634,
"grad_norm": 0.005154197569936514,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0002,
"step": 2850
},
{
"epoch": 39.1156462585034,
"grad_norm": 0.005586385726928711,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0002,
"step": 2875
},
{
"epoch": 39.45578231292517,
"grad_norm": 0.005172679666429758,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0002,
"step": 2900
},
{
"epoch": 39.795918367346935,
"grad_norm": 0.004589286167174578,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0002,
"step": 2925
},
{
"epoch": 40.136054421768705,
"grad_norm": 0.005452868994325399,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0002,
"step": 2950
},
{
"epoch": 40.476190476190474,
"grad_norm": 0.004887877497822046,
"learning_rate": 4.5e-06,
"loss": 0.0002,
"step": 2975
},
{
"epoch": 40.816326530612244,
"grad_norm": 0.00491141015663743,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0002,
"step": 3000
},
{
"epoch": 40.816326530612244,
"eval_loss": 0.4444006681442261,
"eval_runtime": 102.1694,
"eval_samples_per_second": 2.545,
"eval_steps_per_second": 0.166,
"eval_wer": 0.21782916822081314,
"step": 3000
},
{
"epoch": 41.156462585034014,
"grad_norm": 0.005289255175739527,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0002,
"step": 3025
},
{
"epoch": 41.49659863945578,
"grad_norm": 0.004311501048505306,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0001,
"step": 3050
},
{
"epoch": 41.83673469387755,
"grad_norm": 0.004461329896003008,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0002,
"step": 3075
},
{
"epoch": 42.17687074829932,
"grad_norm": 0.0043508694507181644,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0001,
"step": 3100
},
{
"epoch": 42.51700680272109,
"grad_norm": 0.004281959030777216,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0001,
"step": 3125
},
{
"epoch": 42.857142857142854,
"grad_norm": 0.004064427223056555,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0001,
"step": 3150
},
{
"epoch": 43.197278911564624,
"grad_norm": 0.00377645343542099,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0001,
"step": 3175
},
{
"epoch": 43.53741496598639,
"grad_norm": 0.004163255449384451,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0001,
"step": 3200
},
{
"epoch": 43.87755102040816,
"grad_norm": 0.004156498704105616,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0001,
"step": 3225
},
{
"epoch": 44.21768707482993,
"grad_norm": 0.002806807868182659,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0001,
"step": 3250
},
{
"epoch": 44.5578231292517,
"grad_norm": 0.0035042332019656897,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0001,
"step": 3275
},
{
"epoch": 44.89795918367347,
"grad_norm": 0.003332278924062848,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0001,
"step": 3300
},
{
"epoch": 45.23809523809524,
"grad_norm": 0.003195433644577861,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0001,
"step": 3325
},
{
"epoch": 45.578231292517,
"grad_norm": 0.0031893581617623568,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0001,
"step": 3350
},
{
"epoch": 45.91836734693877,
"grad_norm": 0.0031172255985438824,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0001,
"step": 3375
},
{
"epoch": 46.25850340136054,
"grad_norm": 0.0032927945721894503,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0001,
"step": 3400
},
{
"epoch": 46.59863945578231,
"grad_norm": 0.003365216078236699,
"learning_rate": 3.5e-06,
"loss": 0.0001,
"step": 3425
},
{
"epoch": 46.93877551020408,
"grad_norm": 0.003197253216058016,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0001,
"step": 3450
},
{
"epoch": 47.27891156462585,
"grad_norm": 0.0033013387583196163,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0001,
"step": 3475
},
{
"epoch": 47.61904761904762,
"grad_norm": 0.0030345292761921883,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0001,
"step": 3500
},
{
"epoch": 47.95918367346939,
"grad_norm": 0.002814779756590724,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0001,
"step": 3525
},
{
"epoch": 48.29931972789116,
"grad_norm": 0.00269457814283669,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.0001,
"step": 3550
},
{
"epoch": 48.63945578231292,
"grad_norm": 0.0031415882986038923,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0001,
"step": 3575
},
{
"epoch": 48.97959183673469,
"grad_norm": 0.0029843435622751713,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0001,
"step": 3600
},
{
"epoch": 49.31972789115646,
"grad_norm": 0.002468927064910531,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0001,
"step": 3625
},
{
"epoch": 49.65986394557823,
"grad_norm": 0.002835317747667432,
"learning_rate": 3e-06,
"loss": 0.0001,
"step": 3650
},
{
"epoch": 50.0,
"grad_norm": 0.003936604596674442,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0001,
"step": 3675
},
{
"epoch": 50.34013605442177,
"grad_norm": 0.0028534727171063423,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0001,
"step": 3700
},
{
"epoch": 50.68027210884354,
"grad_norm": 0.0029034256003797054,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0001,
"step": 3725
},
{
"epoch": 51.02040816326531,
"grad_norm": 0.0032608446199446917,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0001,
"step": 3750
},
{
"epoch": 51.36054421768708,
"grad_norm": 0.0026580116245895624,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.0001,
"step": 3775
},
{
"epoch": 51.70068027210884,
"grad_norm": 0.0024348797742277384,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0001,
"step": 3800
},
{
"epoch": 52.04081632653061,
"grad_norm": 0.002745755948126316,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.0001,
"step": 3825
},
{
"epoch": 52.38095238095238,
"grad_norm": 0.0023754702415317297,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0001,
"step": 3850
},
{
"epoch": 52.72108843537415,
"grad_norm": 0.0026109826285392046,
"learning_rate": 2.5e-06,
"loss": 0.0001,
"step": 3875
},
{
"epoch": 53.06122448979592,
"grad_norm": 0.0025893962010741234,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0001,
"step": 3900
},
{
"epoch": 53.40136054421769,
"grad_norm": 0.0023246912751346827,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0001,
"step": 3925
},
{
"epoch": 53.74149659863946,
"grad_norm": 0.002337114419788122,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0001,
"step": 3950
},
{
"epoch": 54.08163265306123,
"grad_norm": 0.002635551616549492,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0001,
"step": 3975
},
{
"epoch": 54.421768707483,
"grad_norm": 0.002480172784999013,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0001,
"step": 4000
},
{
"epoch": 54.421768707483,
"eval_loss": 0.4667165279388428,
"eval_runtime": 102.3194,
"eval_samples_per_second": 2.541,
"eval_steps_per_second": 0.166,
"eval_wer": 0.21596419246549795,
"step": 4000
},
{
"epoch": 54.76190476190476,
"grad_norm": 0.00236712908372283,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0001,
"step": 4025
},
{
"epoch": 55.10204081632653,
"grad_norm": 0.002560981782153249,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.0001,
"step": 4050
},
{
"epoch": 55.4421768707483,
"grad_norm": 0.002310627605766058,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0001,
"step": 4075
},
{
"epoch": 55.78231292517007,
"grad_norm": 0.0023427396081387997,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 56.12244897959184,
"grad_norm": 0.002552021760493517,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0001,
"step": 4125
},
{
"epoch": 56.46258503401361,
"grad_norm": 0.0021091082599014044,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0001,
"step": 4150
},
{
"epoch": 56.802721088435376,
"grad_norm": 0.002378846053034067,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0001,
"step": 4175
},
{
"epoch": 57.142857142857146,
"grad_norm": 0.0022475309669971466,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0001,
"step": 4200
},
{
"epoch": 57.48299319727891,
"grad_norm": 0.0024991375394165516,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0001,
"step": 4225
},
{
"epoch": 57.82312925170068,
"grad_norm": 0.0026695560663938522,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0001,
"step": 4250
},
{
"epoch": 58.16326530612245,
"grad_norm": 0.0023107912857085466,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.0001,
"step": 4275
},
{
"epoch": 58.50340136054422,
"grad_norm": 0.0022581815719604492,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0001,
"step": 4300
},
{
"epoch": 58.843537414965986,
"grad_norm": 0.0021738242357969284,
"learning_rate": 1.5e-06,
"loss": 0.0001,
"step": 4325
},
{
"epoch": 59.183673469387756,
"grad_norm": 0.0025273971259593964,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0001,
"step": 4350
},
{
"epoch": 59.523809523809526,
"grad_norm": 0.0021680588833987713,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0001,
"step": 4375
},
{
"epoch": 59.863945578231295,
"grad_norm": 0.0023293830454349518,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0001,
"step": 4400
},
{
"epoch": 60.204081632653065,
"grad_norm": 0.0019948012195527554,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0001,
"step": 4425
},
{
"epoch": 60.54421768707483,
"grad_norm": 0.00274069607257843,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0001,
"step": 4450
},
{
"epoch": 60.8843537414966,
"grad_norm": 0.002070850459858775,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0001,
"step": 4475
},
{
"epoch": 61.224489795918366,
"grad_norm": 0.002111976034939289,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0001,
"step": 4500
},
{
"epoch": 61.564625850340136,
"grad_norm": 0.002158733317628503,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0001,
"step": 4525
},
{
"epoch": 61.904761904761905,
"grad_norm": 0.002263331552967429,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0001,
"step": 4550
},
{
"epoch": 62.244897959183675,
"grad_norm": 0.0020453669130802155,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0001,
"step": 4575
},
{
"epoch": 62.585034013605444,
"grad_norm": 0.0018243337981402874,
"learning_rate": 8.88888888888889e-07,
"loss": 0.0001,
"step": 4600
},
{
"epoch": 62.925170068027214,
"grad_norm": 0.0023292056284844875,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0001,
"step": 4625
},
{
"epoch": 63.265306122448976,
"grad_norm": 0.0019000651082023978,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0001,
"step": 4650
},
{
"epoch": 63.605442176870746,
"grad_norm": 0.002300037071108818,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0001,
"step": 4675
},
{
"epoch": 63.945578231292515,
"grad_norm": 0.0020359556656330824,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0001,
"step": 4700
},
{
"epoch": 64.28571428571429,
"grad_norm": 0.0019380120793357491,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0001,
"step": 4725
},
{
"epoch": 64.62585034013605,
"grad_norm": 0.0018156260484829545,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0001,
"step": 4750
},
{
"epoch": 64.96598639455782,
"grad_norm": 0.0021546927746385336,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0001,
"step": 4775
},
{
"epoch": 65.3061224489796,
"grad_norm": 0.0019719544798135757,
"learning_rate": 4.444444444444445e-07,
"loss": 0.0001,
"step": 4800
},
{
"epoch": 65.64625850340136,
"grad_norm": 0.0018750294111669064,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0001,
"step": 4825
},
{
"epoch": 65.98639455782313,
"grad_norm": 0.002267962321639061,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 66.3265306122449,
"grad_norm": 0.0019681702833622694,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0001,
"step": 4875
},
{
"epoch": 66.66666666666667,
"grad_norm": 0.0022395187988877296,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 67.00680272108843,
"grad_norm": 0.0033925268799066544,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.0001,
"step": 4925
},
{
"epoch": 67.34693877551021,
"grad_norm": 0.0019715563394129276,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0001,
"step": 4950
},
{
"epoch": 67.68707482993197,
"grad_norm": 0.00179803348146379,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0001,
"step": 4975
},
{
"epoch": 68.02721088435374,
"grad_norm": 0.0019662685226649046,
"learning_rate": 0.0,
"loss": 0.0001,
"step": 5000
},
{
"epoch": 68.02721088435374,
"eval_loss": 0.47325974702835083,
"eval_runtime": 102.3255,
"eval_samples_per_second": 2.541,
"eval_steps_per_second": 0.166,
"eval_wer": 0.21372622155911974,
"step": 5000
},
{
"epoch": 68.02721088435374,
"step": 5000,
"total_flos": 5.4059634054660096e+20,
"train_loss": 0.028010966634354553,
"train_runtime": 44897.3386,
"train_samples_per_second": 3.564,
"train_steps_per_second": 0.111
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 69,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.4059634054660096e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}