diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4443 @@ +{ + "best_global_step": 46500, + "best_metric": 0.15599121044112013, + "best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v4/checkpoint-46500", + "epoch": 5.403631058518398, + "eval_steps": 500, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01080672178094775, + "grad_norm": 25.346445083618164, + "learning_rate": 1.94e-06, + "loss": 3.5433, + "step": 100 + }, + { + "epoch": 0.0216134435618955, + "grad_norm": 21.656307220458984, + "learning_rate": 3.94e-06, + "loss": 2.0264, + "step": 200 + }, + { + "epoch": 0.03242016534284325, + "grad_norm": 18.657211303710938, + "learning_rate": 5.94e-06, + "loss": 1.5688, + "step": 300 + }, + { + "epoch": 0.043226887123791, + "grad_norm": 16.42237663269043, + "learning_rate": 7.94e-06, + "loss": 1.3214, + "step": 400 + }, + { + "epoch": 0.054033608904738746, + "grad_norm": 18.631206512451172, + "learning_rate": 9.940000000000001e-06, + "loss": 1.1788, + "step": 500 + }, + { + "epoch": 0.054033608904738746, + "eval_loss": 0.967095136642456, + "eval_runtime": 5770.5819, + "eval_samples_per_second": 3.207, + "eval_steps_per_second": 0.802, + "eval_wer": 0.6590292385770924, + "step": 500 + }, + { + "epoch": 0.0648403306856865, + "grad_norm": 15.337555885314941, + "learning_rate": 1.1940000000000001e-05, + "loss": 1.0627, + "step": 600 + }, + { + "epoch": 0.07564705246663425, + "grad_norm": 14.623177528381348, + "learning_rate": 1.394e-05, + "loss": 0.9632, + "step": 700 + }, + { + "epoch": 0.086453774247582, + "grad_norm": 17.126712799072266, + "learning_rate": 1.5940000000000003e-05, + "loss": 0.906, + "step": 800 + }, + { + "epoch": 0.09726049602852975, + "grad_norm": 16.75067710876465, + "learning_rate": 1.794e-05, + "loss": 0.8503, + "step": 900 + }, + { + "epoch": 0.10806721780947749, + "grad_norm": 14.265076637268066, + "learning_rate": 1.9940000000000002e-05, + "loss": 0.8015, + "step": 1000 + }, + { + "epoch": 0.10806721780947749, + "eval_loss": 0.6976613402366638, + "eval_runtime": 5463.0331, + "eval_samples_per_second": 3.387, + "eval_steps_per_second": 0.847, + "eval_wer": 0.5304858499049883, + "step": 1000 + }, + { + "epoch": 0.11887393959042525, + "grad_norm": 13.737130165100098, + "learning_rate": 1.9784444444444446e-05, + "loss": 0.7589, + "step": 1100 + }, + { + "epoch": 0.129680661371373, + "grad_norm": 18.01378631591797, + "learning_rate": 1.9562222222222225e-05, + "loss": 0.7589, + "step": 1200 + }, + { + "epoch": 0.14048738315232073, + "grad_norm": 11.696120262145996, + "learning_rate": 1.934e-05, + "loss": 0.7087, + "step": 1300 + }, + { + "epoch": 0.1512941049332685, + "grad_norm": 13.419560432434082, + "learning_rate": 1.911777777777778e-05, + "loss": 0.683, + "step": 1400 + }, + { + "epoch": 0.16210082671421625, + "grad_norm": 12.753211975097656, + "learning_rate": 1.8895555555555557e-05, + "loss": 0.6498, + "step": 1500 + }, + { + "epoch": 0.16210082671421625, + "eval_loss": 0.5724753737449646, + "eval_runtime": 4564.4621, + "eval_samples_per_second": 4.054, + "eval_steps_per_second": 1.013, + "eval_wer": 0.6670133485560569, + "step": 1500 + }, + { + "epoch": 0.172907548495164, + "grad_norm": 11.64907455444336, + "learning_rate": 1.8673333333333333e-05, + "loss": 0.6216, + "step": 1600 + }, + { + "epoch": 0.18371427027611173, + "grad_norm": 13.781865119934082, + "learning_rate": 1.8451111111111113e-05, + "loss": 0.6138, + "step": 1700 + }, + { + "epoch": 0.1945209920570595, + "grad_norm": 12.58388900756836, + "learning_rate": 1.822888888888889e-05, + "loss": 0.595, + "step": 1800 + }, + { + "epoch": 0.20532771383800724, + "grad_norm": 14.661055564880371, + "learning_rate": 1.8006666666666668e-05, + "loss": 0.5938, + "step": 1900 + }, + { + "epoch": 0.21613443561895498, + "grad_norm": 11.948161125183105, + "learning_rate": 1.7784444444444448e-05, + "loss": 0.5828, + "step": 2000 + }, + { + "epoch": 0.21613443561895498, + "eval_loss": 0.5093731880187988, + "eval_runtime": 5328.6402, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.868, + "eval_wer": 0.4828939857208768, + "step": 2000 + }, + { + "epoch": 0.22694115739990273, + "grad_norm": 12.322188377380371, + "learning_rate": 1.7562222222222224e-05, + "loss": 0.5752, + "step": 2100 + }, + { + "epoch": 0.2377478791808505, + "grad_norm": 17.046159744262695, + "learning_rate": 1.734e-05, + "loss": 0.5663, + "step": 2200 + }, + { + "epoch": 0.24855460096179824, + "grad_norm": 10.154263496398926, + "learning_rate": 1.711777777777778e-05, + "loss": 0.537, + "step": 2300 + }, + { + "epoch": 0.259361322742746, + "grad_norm": 11.958285331726074, + "learning_rate": 1.6895555555555556e-05, + "loss": 0.5246, + "step": 2400 + }, + { + "epoch": 0.27016804452369375, + "grad_norm": 10.264266014099121, + "learning_rate": 1.6673333333333335e-05, + "loss": 0.5226, + "step": 2500 + }, + { + "epoch": 0.27016804452369375, + "eval_loss": 0.46415480971336365, + "eval_runtime": 4645.745, + "eval_samples_per_second": 3.983, + "eval_steps_per_second": 0.996, + "eval_wer": 0.38602898052064843, + "step": 2500 + }, + { + "epoch": 0.28097476630464147, + "grad_norm": 12.049257278442383, + "learning_rate": 1.6451111111111115e-05, + "loss": 0.493, + "step": 2600 + }, + { + "epoch": 0.29178148808558924, + "grad_norm": 9.821508407592773, + "learning_rate": 1.622888888888889e-05, + "loss": 0.5153, + "step": 2700 + }, + { + "epoch": 0.302588209866537, + "grad_norm": 10.481095314025879, + "learning_rate": 1.6006666666666667e-05, + "loss": 0.5, + "step": 2800 + }, + { + "epoch": 0.3133949316474847, + "grad_norm": 10.193309783935547, + "learning_rate": 1.5784444444444447e-05, + "loss": 0.5248, + "step": 2900 + }, + { + "epoch": 0.3242016534284325, + "grad_norm": 12.328668594360352, + "learning_rate": 1.5562222222222223e-05, + "loss": 0.4955, + "step": 3000 + }, + { + "epoch": 0.3242016534284325, + "eval_loss": 0.4340818226337433, + "eval_runtime": 4456.9484, + "eval_samples_per_second": 4.152, + "eval_steps_per_second": 1.038, + "eval_wer": 0.39154200455117727, + "step": 3000 + }, + { + "epoch": 0.33500837520938026, + "grad_norm": 12.583343505859375, + "learning_rate": 1.5340000000000002e-05, + "loss": 0.5082, + "step": 3100 + }, + { + "epoch": 0.345815096990328, + "grad_norm": 8.40932846069336, + "learning_rate": 1.511777777777778e-05, + "loss": 0.4905, + "step": 3200 + }, + { + "epoch": 0.35662181877127574, + "grad_norm": 14.150980949401855, + "learning_rate": 1.4895555555555556e-05, + "loss": 0.466, + "step": 3300 + }, + { + "epoch": 0.36742854055222346, + "grad_norm": 13.014771461486816, + "learning_rate": 1.4673333333333336e-05, + "loss": 0.4788, + "step": 3400 + }, + { + "epoch": 0.37823526233317123, + "grad_norm": 11.843710899353027, + "learning_rate": 1.4451111111111112e-05, + "loss": 0.4616, + "step": 3500 + }, + { + "epoch": 0.37823526233317123, + "eval_loss": 0.4127795398235321, + "eval_runtime": 4528.1925, + "eval_samples_per_second": 4.086, + "eval_steps_per_second": 1.022, + "eval_wer": 0.35399088200564593, + "step": 3500 + }, + { + "epoch": 0.389041984114119, + "grad_norm": 11.520469665527344, + "learning_rate": 1.422888888888889e-05, + "loss": 0.4695, + "step": 3600 + }, + { + "epoch": 0.3998487058950667, + "grad_norm": 10.21032428741455, + "learning_rate": 1.400666666666667e-05, + "loss": 0.47, + "step": 3700 + }, + { + "epoch": 0.4106554276760145, + "grad_norm": 9.393896102905273, + "learning_rate": 1.3784444444444445e-05, + "loss": 0.4656, + "step": 3800 + }, + { + "epoch": 0.42146214945696225, + "grad_norm": 10.503016471862793, + "learning_rate": 1.3562222222222223e-05, + "loss": 0.4446, + "step": 3900 + }, + { + "epoch": 0.43226887123790997, + "grad_norm": 10.747596740722656, + "learning_rate": 1.3340000000000001e-05, + "loss": 0.4474, + "step": 4000 + }, + { + "epoch": 0.43226887123790997, + "eval_loss": 0.3900074064731598, + "eval_runtime": 4858.8536, + "eval_samples_per_second": 3.808, + "eval_steps_per_second": 0.952, + "eval_wer": 0.36136504038974343, + "step": 4000 + }, + { + "epoch": 0.44307559301885774, + "grad_norm": 13.275285720825195, + "learning_rate": 1.3117777777777779e-05, + "loss": 0.4488, + "step": 4100 + }, + { + "epoch": 0.45388231479980545, + "grad_norm": 11.318832397460938, + "learning_rate": 1.2897777777777778e-05, + "loss": 0.4292, + "step": 4200 + }, + { + "epoch": 0.4646890365807532, + "grad_norm": 10.3064546585083, + "learning_rate": 1.2675555555555557e-05, + "loss": 0.4302, + "step": 4300 + }, + { + "epoch": 0.475495758361701, + "grad_norm": 11.634562492370605, + "learning_rate": 1.2453333333333335e-05, + "loss": 0.426, + "step": 4400 + }, + { + "epoch": 0.4863024801426487, + "grad_norm": 10.647918701171875, + "learning_rate": 1.2231111111111111e-05, + "loss": 0.4387, + "step": 4500 + }, + { + "epoch": 0.4863024801426487, + "eval_loss": 0.37359631061553955, + "eval_runtime": 4990.4878, + "eval_samples_per_second": 3.708, + "eval_steps_per_second": 0.927, + "eval_wer": 0.35633684967821144, + "step": 4500 + }, + { + "epoch": 0.4971092019235965, + "grad_norm": 9.396610260009766, + "learning_rate": 1.200888888888889e-05, + "loss": 0.4195, + "step": 4600 + }, + { + "epoch": 0.5079159237045442, + "grad_norm": 10.845105171203613, + "learning_rate": 1.1786666666666668e-05, + "loss": 0.4056, + "step": 4700 + }, + { + "epoch": 0.518722645485492, + "grad_norm": 9.404190063476562, + "learning_rate": 1.1564444444444445e-05, + "loss": 0.4306, + "step": 4800 + }, + { + "epoch": 0.5295293672664397, + "grad_norm": 9.176289558410645, + "learning_rate": 1.1342222222222224e-05, + "loss": 0.4239, + "step": 4900 + }, + { + "epoch": 0.5403360890473875, + "grad_norm": 10.088706016540527, + "learning_rate": 1.1120000000000002e-05, + "loss": 0.4154, + "step": 5000 + }, + { + "epoch": 0.5403360890473875, + "eval_loss": 0.36057594418525696, + "eval_runtime": 5945.658, + "eval_samples_per_second": 3.112, + "eval_steps_per_second": 0.778, + "eval_wer": 0.32743452795220485, + "step": 5000 + }, + { + "epoch": 0.5511428108283353, + "grad_norm": 9.688194274902344, + "learning_rate": 1.0897777777777778e-05, + "loss": 0.4115, + "step": 5100 + }, + { + "epoch": 0.5619495326092829, + "grad_norm": 9.752260208129883, + "learning_rate": 1.0675555555555558e-05, + "loss": 0.3854, + "step": 5200 + }, + { + "epoch": 0.5727562543902307, + "grad_norm": 10.447392463684082, + "learning_rate": 1.0453333333333334e-05, + "loss": 0.4141, + "step": 5300 + }, + { + "epoch": 0.5835629761711785, + "grad_norm": 11.185776710510254, + "learning_rate": 1.0231111111111112e-05, + "loss": 0.3924, + "step": 5400 + }, + { + "epoch": 0.5943696979521262, + "grad_norm": 10.3914794921875, + "learning_rate": 1.000888888888889e-05, + "loss": 0.419, + "step": 5500 + }, + { + "epoch": 0.5943696979521262, + "eval_loss": 0.3494803309440613, + "eval_runtime": 6902.9208, + "eval_samples_per_second": 2.681, + "eval_steps_per_second": 0.67, + "eval_wer": 0.314375307908257, + "step": 5500 + }, + { + "epoch": 0.605176419733074, + "grad_norm": 11.420536041259766, + "learning_rate": 9.786666666666667e-06, + "loss": 0.4096, + "step": 5600 + }, + { + "epoch": 0.6159831415140217, + "grad_norm": 9.05328369140625, + "learning_rate": 9.564444444444445e-06, + "loss": 0.3917, + "step": 5700 + }, + { + "epoch": 0.6267898632949694, + "grad_norm": 10.281911849975586, + "learning_rate": 9.342222222222223e-06, + "loss": 0.3965, + "step": 5800 + }, + { + "epoch": 0.6375965850759172, + "grad_norm": 10.587265014648438, + "learning_rate": 9.12e-06, + "loss": 0.374, + "step": 5900 + }, + { + "epoch": 0.648403306856865, + "grad_norm": 7.721372127532959, + "learning_rate": 8.897777777777779e-06, + "loss": 0.3799, + "step": 6000 + }, + { + "epoch": 0.648403306856865, + "eval_loss": 0.3397567868232727, + "eval_runtime": 7002.8513, + "eval_samples_per_second": 2.642, + "eval_steps_per_second": 0.661, + "eval_wer": 0.2921668139413039, + "step": 6000 + }, + { + "epoch": 0.6592100286378128, + "grad_norm": 6.785597324371338, + "learning_rate": 8.675555555555556e-06, + "loss": 0.3953, + "step": 6100 + }, + { + "epoch": 0.6700167504187605, + "grad_norm": 9.53781509399414, + "learning_rate": 8.453333333333334e-06, + "loss": 0.3786, + "step": 6200 + }, + { + "epoch": 0.6808234721997082, + "grad_norm": 8.857239723205566, + "learning_rate": 8.231111111111112e-06, + "loss": 0.3744, + "step": 6300 + }, + { + "epoch": 0.691630193980656, + "grad_norm": 9.638261795043945, + "learning_rate": 8.00888888888889e-06, + "loss": 0.3809, + "step": 6400 + }, + { + "epoch": 0.7024369157616037, + "grad_norm": 8.304004669189453, + "learning_rate": 7.786666666666666e-06, + "loss": 0.3802, + "step": 6500 + }, + { + "epoch": 0.7024369157616037, + "eval_loss": 0.3289755880832672, + "eval_runtime": 5885.8991, + "eval_samples_per_second": 3.144, + "eval_steps_per_second": 0.786, + "eval_wer": 0.3044049452998538, + "step": 6500 + }, + { + "epoch": 0.7132436375425515, + "grad_norm": 9.978581428527832, + "learning_rate": 7.564444444444446e-06, + "loss": 0.3537, + "step": 6600 + }, + { + "epoch": 0.7240503593234993, + "grad_norm": 10.849929809570312, + "learning_rate": 7.342222222222223e-06, + "loss": 0.3762, + "step": 6700 + }, + { + "epoch": 0.7348570811044469, + "grad_norm": 11.856138229370117, + "learning_rate": 7.1200000000000004e-06, + "loss": 0.3477, + "step": 6800 + }, + { + "epoch": 0.7456638028853947, + "grad_norm": 10.761491775512695, + "learning_rate": 6.897777777777779e-06, + "loss": 0.361, + "step": 6900 + }, + { + "epoch": 0.7564705246663425, + "grad_norm": 9.24421501159668, + "learning_rate": 6.675555555555556e-06, + "loss": 0.3611, + "step": 7000 + }, + { + "epoch": 0.7564705246663425, + "eval_loss": 0.3224972188472748, + "eval_runtime": 5632.6127, + "eval_samples_per_second": 3.285, + "eval_steps_per_second": 0.821, + "eval_wer": 0.2823372093932546, + "step": 7000 + }, + { + "epoch": 0.7672772464472902, + "grad_norm": 10.52470874786377, + "learning_rate": 6.453333333333334e-06, + "loss": 0.3638, + "step": 7100 + }, + { + "epoch": 0.778083968228238, + "grad_norm": 9.080463409423828, + "learning_rate": 6.231111111111111e-06, + "loss": 0.3532, + "step": 7200 + }, + { + "epoch": 0.7888906900091858, + "grad_norm": 8.789374351501465, + "learning_rate": 6.00888888888889e-06, + "loss": 0.3592, + "step": 7300 + }, + { + "epoch": 0.7996974117901334, + "grad_norm": 8.97732162475586, + "learning_rate": 5.7866666666666674e-06, + "loss": 0.3611, + "step": 7400 + }, + { + "epoch": 0.8105041335710812, + "grad_norm": 10.455592155456543, + "learning_rate": 5.5644444444444444e-06, + "loss": 0.3548, + "step": 7500 + }, + { + "epoch": 0.8105041335710812, + "eval_loss": 0.31678903102874756, + "eval_runtime": 3060.9871, + "eval_samples_per_second": 6.045, + "eval_steps_per_second": 1.511, + "eval_wer": 0.27332869353060313, + "step": 7500 + }, + { + "epoch": 0.821310855352029, + "grad_norm": 8.56920051574707, + "learning_rate": 5.342222222222223e-06, + "loss": 0.3628, + "step": 7600 + }, + { + "epoch": 0.8321175771329767, + "grad_norm": 11.37761402130127, + "learning_rate": 5.12e-06, + "loss": 0.3353, + "step": 7700 + }, + { + "epoch": 0.8429242989139245, + "grad_norm": 9.396086692810059, + "learning_rate": 4.897777777777778e-06, + "loss": 0.3704, + "step": 7800 + }, + { + "epoch": 0.8537310206948722, + "grad_norm": 10.0977144241333, + "learning_rate": 4.677777777777778e-06, + "loss": 0.364, + "step": 7900 + }, + { + "epoch": 0.8645377424758199, + "grad_norm": 8.653088569641113, + "learning_rate": 4.455555555555555e-06, + "loss": 0.346, + "step": 8000 + }, + { + "epoch": 0.8645377424758199, + "eval_loss": 0.3104597330093384, + "eval_runtime": 3053.8514, + "eval_samples_per_second": 6.059, + "eval_steps_per_second": 1.515, + "eval_wer": 0.26601709428444076, + "step": 8000 + }, + { + "epoch": 0.8753444642567677, + "grad_norm": 9.058122634887695, + "learning_rate": 4.233333333333334e-06, + "loss": 0.3382, + "step": 8100 + }, + { + "epoch": 0.8861511860377155, + "grad_norm": 12.135452270507812, + "learning_rate": 4.011111111111111e-06, + "loss": 0.3456, + "step": 8200 + }, + { + "epoch": 0.8969579078186632, + "grad_norm": 6.601293563842773, + "learning_rate": 3.7888888888888893e-06, + "loss": 0.3404, + "step": 8300 + }, + { + "epoch": 0.9077646295996109, + "grad_norm": 9.51930046081543, + "learning_rate": 3.566666666666667e-06, + "loss": 0.3479, + "step": 8400 + }, + { + "epoch": 0.9185713513805587, + "grad_norm": 7.031350135803223, + "learning_rate": 3.3444444444444445e-06, + "loss": 0.3547, + "step": 8500 + }, + { + "epoch": 0.9185713513805587, + "eval_loss": 0.3063461184501648, + "eval_runtime": 3070.7291, + "eval_samples_per_second": 6.026, + "eval_steps_per_second": 1.506, + "eval_wer": 0.27081068822871623, + "step": 8500 + }, + { + "epoch": 0.9293780731615064, + "grad_norm": 11.10822868347168, + "learning_rate": 3.1222222222222228e-06, + "loss": 0.3454, + "step": 8600 + }, + { + "epoch": 0.9401847949424542, + "grad_norm": 9.607211112976074, + "learning_rate": 2.9e-06, + "loss": 0.3319, + "step": 8700 + }, + { + "epoch": 0.950991516723402, + "grad_norm": 10.614663124084473, + "learning_rate": 2.677777777777778e-06, + "loss": 0.3441, + "step": 8800 + }, + { + "epoch": 0.9617982385043498, + "grad_norm": 8.344138145446777, + "learning_rate": 2.455555555555556e-06, + "loss": 0.3466, + "step": 8900 + }, + { + "epoch": 0.9726049602852974, + "grad_norm": 11.955930709838867, + "learning_rate": 2.2333333333333333e-06, + "loss": 0.3211, + "step": 9000 + }, + { + "epoch": 0.9726049602852974, + "eval_loss": 0.30189329385757446, + "eval_runtime": 3095.8164, + "eval_samples_per_second": 5.977, + "eval_steps_per_second": 1.494, + "eval_wer": 0.28268910454413937, + "step": 9000 + }, + { + "epoch": 0.9834116820662452, + "grad_norm": 9.438616752624512, + "learning_rate": 2.011111111111111e-06, + "loss": 0.343, + "step": 9100 + }, + { + "epoch": 0.994218403847193, + "grad_norm": 10.029309272766113, + "learning_rate": 1.788888888888889e-06, + "loss": 0.3582, + "step": 9200 + }, + { + "epoch": 1.0050791592370454, + "grad_norm": 9.47360610961914, + "learning_rate": 1.566666666666667e-06, + "loss": 0.3024, + "step": 9300 + }, + { + "epoch": 1.0158858810179932, + "grad_norm": 9.3403959274292, + "learning_rate": 1.3444444444444446e-06, + "loss": 0.2811, + "step": 9400 + }, + { + "epoch": 1.026692602798941, + "grad_norm": 9.723664283752441, + "learning_rate": 1.1222222222222222e-06, + "loss": 0.2718, + "step": 9500 + }, + { + "epoch": 1.026692602798941, + "eval_loss": 0.2989746034145355, + "eval_runtime": 3189.5179, + "eval_samples_per_second": 5.802, + "eval_steps_per_second": 1.45, + "eval_wer": 0.2659936346077151, + "step": 9500 + }, + { + "epoch": 1.0374993245798887, + "grad_norm": 7.739469051361084, + "learning_rate": 9.000000000000001e-07, + "loss": 0.2765, + "step": 9600 + }, + { + "epoch": 1.0483060463608365, + "grad_norm": 8.379693984985352, + "learning_rate": 6.777777777777779e-07, + "loss": 0.2872, + "step": 9700 + }, + { + "epoch": 1.0591127681417842, + "grad_norm": 8.849838256835938, + "learning_rate": 4.5555555555555563e-07, + "loss": 0.2782, + "step": 9800 + }, + { + "epoch": 1.069919489922732, + "grad_norm": 8.006597518920898, + "learning_rate": 2.3333333333333336e-07, + "loss": 0.2673, + "step": 9900 + }, + { + "epoch": 1.0807262117036798, + "grad_norm": 10.859480857849121, + "learning_rate": 1.1111111111111112e-08, + "loss": 0.2859, + "step": 10000 + }, + { + "epoch": 1.0807262117036798, + "eval_loss": 0.2979792058467865, + "eval_runtime": 3174.7879, + "eval_samples_per_second": 5.828, + "eval_steps_per_second": 1.457, + "eval_wer": 0.2586507557925852, + "step": 10000 + }, + { + "epoch": 1.0915329334846273, + "grad_norm": 8.480154037475586, + "learning_rate": 1.7735555555555558e-05, + "loss": 0.3044, + "step": 10100 + }, + { + "epoch": 1.102339655265575, + "grad_norm": 8.65846061706543, + "learning_rate": 1.7691111111111113e-05, + "loss": 0.3126, + "step": 10200 + }, + { + "epoch": 1.1131463770465229, + "grad_norm": 8.054668426513672, + "learning_rate": 1.764711111111111e-05, + "loss": 0.315, + "step": 10300 + }, + { + "epoch": 1.1239530988274706, + "grad_norm": 9.317754745483398, + "learning_rate": 1.7602666666666667e-05, + "loss": 0.3139, + "step": 10400 + }, + { + "epoch": 1.1347598206084184, + "grad_norm": 6.9345879554748535, + "learning_rate": 1.7558222222222222e-05, + "loss": 0.2917, + "step": 10500 + }, + { + "epoch": 1.1347598206084184, + "eval_loss": 0.3268890976905823, + "eval_runtime": 5010.973, + "eval_samples_per_second": 3.693, + "eval_steps_per_second": 0.923, + "eval_wer": 0.2518552694343872, + "step": 10500 + }, + { + "epoch": 1.1455665423893662, + "grad_norm": 10.198206901550293, + "learning_rate": 1.7513777777777777e-05, + "loss": 0.3099, + "step": 10600 + }, + { + "epoch": 1.156373264170314, + "grad_norm": 10.446975708007812, + "learning_rate": 1.7469333333333332e-05, + "loss": 0.3109, + "step": 10700 + }, + { + "epoch": 1.1671799859512617, + "grad_norm": 8.202065467834473, + "learning_rate": 1.742488888888889e-05, + "loss": 0.3033, + "step": 10800 + }, + { + "epoch": 1.1779867077322095, + "grad_norm": 9.471212387084961, + "learning_rate": 1.7380444444444446e-05, + "loss": 0.3121, + "step": 10900 + }, + { + "epoch": 1.1887934295131573, + "grad_norm": 9.272053718566895, + "learning_rate": 1.7336e-05, + "loss": 0.3117, + "step": 11000 + }, + { + "epoch": 1.1887934295131573, + "eval_loss": 0.32140180468559265, + "eval_runtime": 4975.5976, + "eval_samples_per_second": 3.719, + "eval_steps_per_second": 0.93, + "eval_wer": 0.2575403310942375, + "step": 11000 + }, + { + "epoch": 1.199600151294105, + "grad_norm": 8.912075996398926, + "learning_rate": 1.7291555555555557e-05, + "loss": 0.3163, + "step": 11100 + }, + { + "epoch": 1.2104068730750526, + "grad_norm": 12.307350158691406, + "learning_rate": 1.7247111111111112e-05, + "loss": 0.3087, + "step": 11200 + }, + { + "epoch": 1.2212135948560003, + "grad_norm": 8.338894844055176, + "learning_rate": 1.7202666666666667e-05, + "loss": 0.3264, + "step": 11300 + }, + { + "epoch": 1.232020316636948, + "grad_norm": 10.600968360900879, + "learning_rate": 1.7158222222222222e-05, + "loss": 0.3144, + "step": 11400 + }, + { + "epoch": 1.2428270384178959, + "grad_norm": 9.626172065734863, + "learning_rate": 1.711377777777778e-05, + "loss": 0.3204, + "step": 11500 + }, + { + "epoch": 1.2428270384178959, + "eval_loss": 0.3168378174304962, + "eval_runtime": 5060.1288, + "eval_samples_per_second": 3.657, + "eval_steps_per_second": 0.914, + "eval_wer": 0.2646173335731434, + "step": 11500 + }, + { + "epoch": 1.2536337601988436, + "grad_norm": 8.312841415405273, + "learning_rate": 1.7069333333333336e-05, + "loss": 0.2968, + "step": 11600 + }, + { + "epoch": 1.2644404819797914, + "grad_norm": 8.717096328735352, + "learning_rate": 1.702488888888889e-05, + "loss": 0.3145, + "step": 11700 + }, + { + "epoch": 1.2752472037607392, + "grad_norm": 7.836411952972412, + "learning_rate": 1.6980444444444447e-05, + "loss": 0.3144, + "step": 11800 + }, + { + "epoch": 1.286053925541687, + "grad_norm": 7.561498165130615, + "learning_rate": 1.6936000000000002e-05, + "loss": 0.315, + "step": 11900 + }, + { + "epoch": 1.2968606473226347, + "grad_norm": 9.085077285766602, + "learning_rate": 1.6891555555555557e-05, + "loss": 0.2962, + "step": 12000 + }, + { + "epoch": 1.2968606473226347, + "eval_loss": 0.3087281286716461, + "eval_runtime": 5226.3325, + "eval_samples_per_second": 3.541, + "eval_steps_per_second": 0.885, + "eval_wer": 0.24104817835610226, + "step": 12000 + }, + { + "epoch": 1.3076673691035825, + "grad_norm": 10.332176208496094, + "learning_rate": 1.6847111111111112e-05, + "loss": 0.3092, + "step": 12100 + }, + { + "epoch": 1.3184740908845303, + "grad_norm": 10.159818649291992, + "learning_rate": 1.6802666666666668e-05, + "loss": 0.301, + "step": 12200 + }, + { + "epoch": 1.3292808126654778, + "grad_norm": 8.238092422485352, + "learning_rate": 1.6758222222222226e-05, + "loss": 0.307, + "step": 12300 + }, + { + "epoch": 1.3400875344464258, + "grad_norm": 6.4258317947387695, + "learning_rate": 1.671377777777778e-05, + "loss": 0.3022, + "step": 12400 + }, + { + "epoch": 1.3508942562273734, + "grad_norm": 10.840997695922852, + "learning_rate": 1.6669333333333337e-05, + "loss": 0.2961, + "step": 12500 + }, + { + "epoch": 1.3508942562273734, + "eval_loss": 0.3057025372982025, + "eval_runtime": 5051.0013, + "eval_samples_per_second": 3.663, + "eval_steps_per_second": 0.916, + "eval_wer": 0.23847543380852212, + "step": 12500 + }, + { + "epoch": 1.3617009780083211, + "grad_norm": 12.025620460510254, + "learning_rate": 1.6624888888888892e-05, + "loss": 0.2959, + "step": 12600 + }, + { + "epoch": 1.372507699789269, + "grad_norm": 7.729722023010254, + "learning_rate": 1.6580444444444447e-05, + "loss": 0.2917, + "step": 12700 + }, + { + "epoch": 1.3833144215702167, + "grad_norm": 10.813538551330566, + "learning_rate": 1.6536000000000002e-05, + "loss": 0.3176, + "step": 12800 + }, + { + "epoch": 1.3941211433511644, + "grad_norm": 9.306085586547852, + "learning_rate": 1.6491555555555558e-05, + "loss": 0.3092, + "step": 12900 + }, + { + "epoch": 1.4049278651321122, + "grad_norm": 8.277687072753906, + "learning_rate": 1.6447111111111113e-05, + "loss": 0.2887, + "step": 13000 + }, + { + "epoch": 1.4049278651321122, + "eval_loss": 0.298722505569458, + "eval_runtime": 5198.4556, + "eval_samples_per_second": 3.56, + "eval_steps_per_second": 0.89, + "eval_wer": 0.22810625669578274, + "step": 13000 + }, + { + "epoch": 1.41573458691306, + "grad_norm": 6.493645191192627, + "learning_rate": 1.640311111111111e-05, + "loss": 0.2895, + "step": 13100 + }, + { + "epoch": 1.4265413086940077, + "grad_norm": 12.606965065002441, + "learning_rate": 1.6358666666666666e-05, + "loss": 0.3072, + "step": 13200 + }, + { + "epoch": 1.4373480304749555, + "grad_norm": 11.579826354980469, + "learning_rate": 1.6314222222222225e-05, + "loss": 0.3046, + "step": 13300 + }, + { + "epoch": 1.448154752255903, + "grad_norm": 8.03853702545166, + "learning_rate": 1.626977777777778e-05, + "loss": 0.2983, + "step": 13400 + }, + { + "epoch": 1.458961474036851, + "grad_norm": 5.478261470794678, + "learning_rate": 1.6225333333333335e-05, + "loss": 0.2981, + "step": 13500 + }, + { + "epoch": 1.458961474036851, + "eval_loss": 0.29534754157066345, + "eval_runtime": 5398.8982, + "eval_samples_per_second": 3.427, + "eval_steps_per_second": 0.857, + "eval_wer": 0.23218042055380478, + "step": 13500 + }, + { + "epoch": 1.4697681958177986, + "grad_norm": 11.128421783447266, + "learning_rate": 1.618088888888889e-05, + "loss": 0.2863, + "step": 13600 + }, + { + "epoch": 1.4805749175987464, + "grad_norm": 9.209829330444336, + "learning_rate": 1.6136444444444446e-05, + "loss": 0.2945, + "step": 13700 + }, + { + "epoch": 1.4913816393796941, + "grad_norm": 7.881196022033691, + "learning_rate": 1.6092e-05, + "loss": 0.2881, + "step": 13800 + }, + { + "epoch": 1.502188361160642, + "grad_norm": 7.5840630531311035, + "learning_rate": 1.6047555555555556e-05, + "loss": 0.2881, + "step": 13900 + }, + { + "epoch": 1.5129950829415897, + "grad_norm": 9.444194793701172, + "learning_rate": 1.600311111111111e-05, + "loss": 0.2994, + "step": 14000 + }, + { + "epoch": 1.5129950829415897, + "eval_loss": 0.29087820649147034, + "eval_runtime": 5347.4253, + "eval_samples_per_second": 3.46, + "eval_steps_per_second": 0.865, + "eval_wer": 0.23219606033828855, + "step": 14000 + }, + { + "epoch": 1.5238018047225375, + "grad_norm": 8.870946884155273, + "learning_rate": 1.595866666666667e-05, + "loss": 0.2817, + "step": 14100 + }, + { + "epoch": 1.5346085265034852, + "grad_norm": 8.750350952148438, + "learning_rate": 1.5914222222222225e-05, + "loss": 0.2748, + "step": 14200 + }, + { + "epoch": 1.5454152482844328, + "grad_norm": 8.175148010253906, + "learning_rate": 1.586977777777778e-05, + "loss": 0.2941, + "step": 14300 + }, + { + "epoch": 1.5562219700653808, + "grad_norm": 8.30854320526123, + "learning_rate": 1.5825333333333336e-05, + "loss": 0.2861, + "step": 14400 + }, + { + "epoch": 1.5670286918463283, + "grad_norm": 6.031162261962891, + "learning_rate": 1.578088888888889e-05, + "loss": 0.2818, + "step": 14500 + }, + { + "epoch": 1.5670286918463283, + "eval_loss": 0.2847795784473419, + "eval_runtime": 4891.0559, + "eval_samples_per_second": 3.783, + "eval_steps_per_second": 0.946, + "eval_wer": 0.21997356876422242, + "step": 14500 + }, + { + "epoch": 1.5778354136272763, + "grad_norm": 9.509627342224121, + "learning_rate": 1.5736444444444446e-05, + "loss": 0.276, + "step": 14600 + }, + { + "epoch": 1.5886421354082239, + "grad_norm": 9.79079532623291, + "learning_rate": 1.5692e-05, + "loss": 0.2918, + "step": 14700 + }, + { + "epoch": 1.5994488571891716, + "grad_norm": 5.595622539520264, + "learning_rate": 1.5647555555555557e-05, + "loss": 0.282, + "step": 14800 + }, + { + "epoch": 1.6102555789701194, + "grad_norm": 8.00671100616455, + "learning_rate": 1.5603111111111112e-05, + "loss": 0.2884, + "step": 14900 + }, + { + "epoch": 1.6210623007510672, + "grad_norm": 9.086261749267578, + "learning_rate": 1.5558666666666667e-05, + "loss": 0.2851, + "step": 15000 + }, + { + "epoch": 1.6210623007510672, + "eval_loss": 0.2829968333244324, + "eval_runtime": 4762.5655, + "eval_samples_per_second": 3.885, + "eval_steps_per_second": 0.971, + "eval_wer": 0.21664229466917945, + "step": 15000 + }, + { + "epoch": 1.631869022532015, + "grad_norm": 7.383193016052246, + "learning_rate": 1.5514222222222222e-05, + "loss": 0.2811, + "step": 15100 + }, + { + "epoch": 1.6426757443129627, + "grad_norm": 8.950287818908691, + "learning_rate": 1.5469777777777778e-05, + "loss": 0.2897, + "step": 15200 + }, + { + "epoch": 1.6534824660939105, + "grad_norm": 9.632174491882324, + "learning_rate": 1.5425333333333333e-05, + "loss": 0.2622, + "step": 15300 + }, + { + "epoch": 1.664289187874858, + "grad_norm": 9.395116806030273, + "learning_rate": 1.5380888888888888e-05, + "loss": 0.2839, + "step": 15400 + }, + { + "epoch": 1.675095909655806, + "grad_norm": 7.435296535491943, + "learning_rate": 1.5336444444444443e-05, + "loss": 0.275, + "step": 15500 + }, + { + "epoch": 1.675095909655806, + "eval_loss": 0.2770063579082489, + "eval_runtime": 3549.6262, + "eval_samples_per_second": 5.213, + "eval_steps_per_second": 1.303, + "eval_wer": 0.2128887463930747, + "step": 15500 + }, + { + "epoch": 1.6859026314367536, + "grad_norm": 6.951478004455566, + "learning_rate": 1.5292e-05, + "loss": 0.2573, + "step": 15600 + }, + { + "epoch": 1.6967093532177016, + "grad_norm": 9.954898834228516, + "learning_rate": 1.5247555555555557e-05, + "loss": 0.2722, + "step": 15700 + }, + { + "epoch": 1.707516074998649, + "grad_norm": 7.309504985809326, + "learning_rate": 1.5203111111111112e-05, + "loss": 0.2578, + "step": 15800 + }, + { + "epoch": 1.7183227967795969, + "grad_norm": 7.3711042404174805, + "learning_rate": 1.5158666666666668e-05, + "loss": 0.2718, + "step": 15900 + }, + { + "epoch": 1.7291295185605446, + "grad_norm": 10.445212364196777, + "learning_rate": 1.5114222222222223e-05, + "loss": 0.2689, + "step": 16000 + }, + { + "epoch": 1.7291295185605446, + "eval_loss": 0.27603384852409363, + "eval_runtime": 3570.6477, + "eval_samples_per_second": 5.182, + "eval_steps_per_second": 1.296, + "eval_wer": 0.21191907975508098, + "step": 16000 + }, + { + "epoch": 1.7399362403414924, + "grad_norm": 5.5510573387146, + "learning_rate": 1.5069777777777778e-05, + "loss": 0.2831, + "step": 16100 + }, + { + "epoch": 1.7507429621224402, + "grad_norm": 7.619734287261963, + "learning_rate": 1.5025333333333333e-05, + "loss": 0.2596, + "step": 16200 + }, + { + "epoch": 1.761549683903388, + "grad_norm": 8.503314018249512, + "learning_rate": 1.4980888888888889e-05, + "loss": 0.2741, + "step": 16300 + }, + { + "epoch": 1.7723564056843357, + "grad_norm": 7.427919387817383, + "learning_rate": 1.4936444444444447e-05, + "loss": 0.2826, + "step": 16400 + }, + { + "epoch": 1.7831631274652833, + "grad_norm": 6.663356781005859, + "learning_rate": 1.4892000000000002e-05, + "loss": 0.2796, + "step": 16500 + }, + { + "epoch": 1.7831631274652833, + "eval_loss": 0.26777052879333496, + "eval_runtime": 3498.6608, + "eval_samples_per_second": 5.289, + "eval_steps_per_second": 1.322, + "eval_wer": 0.20020488117673738, + "step": 16500 + }, + { + "epoch": 1.7939698492462313, + "grad_norm": 7.476739883422852, + "learning_rate": 1.4847555555555558e-05, + "loss": 0.2507, + "step": 16600 + }, + { + "epoch": 1.8047765710271788, + "grad_norm": 7.695949077606201, + "learning_rate": 1.4803111111111113e-05, + "loss": 0.2578, + "step": 16700 + }, + { + "epoch": 1.8155832928081268, + "grad_norm": 9.240167617797852, + "learning_rate": 1.4758666666666668e-05, + "loss": 0.2917, + "step": 16800 + }, + { + "epoch": 1.8263900145890744, + "grad_norm": 8.233548164367676, + "learning_rate": 1.4714222222222223e-05, + "loss": 0.2587, + "step": 16900 + }, + { + "epoch": 1.8371967363700221, + "grad_norm": 9.109882354736328, + "learning_rate": 1.4669777777777779e-05, + "loss": 0.2717, + "step": 17000 + }, + { + "epoch": 1.8371967363700221, + "eval_loss": 0.2652583122253418, + "eval_runtime": 3449.0658, + "eval_samples_per_second": 5.365, + "eval_steps_per_second": 1.341, + "eval_wer": 0.20007976290086724, + "step": 17000 + }, + { + "epoch": 1.84800345815097, + "grad_norm": 6.163556098937988, + "learning_rate": 1.4625333333333334e-05, + "loss": 0.2607, + "step": 17100 + }, + { + "epoch": 1.8588101799319177, + "grad_norm": 9.27730941772461, + "learning_rate": 1.4581333333333334e-05, + "loss": 0.2581, + "step": 17200 + }, + { + "epoch": 1.8696169017128654, + "grad_norm": 8.40946102142334, + "learning_rate": 1.4536888888888889e-05, + "loss": 0.2577, + "step": 17300 + }, + { + "epoch": 1.8804236234938132, + "grad_norm": 8.552946090698242, + "learning_rate": 1.4492444444444444e-05, + "loss": 0.2741, + "step": 17400 + }, + { + "epoch": 1.891230345274761, + "grad_norm": 6.056818962097168, + "learning_rate": 1.4448000000000001e-05, + "loss": 0.2661, + "step": 17500 + }, + { + "epoch": 1.891230345274761, + "eval_loss": 0.2625672221183777, + "eval_runtime": 3434.9257, + "eval_samples_per_second": 5.387, + "eval_steps_per_second": 1.347, + "eval_wer": 0.20144824404319708, + "step": 17500 + }, + { + "epoch": 1.9020370670557085, + "grad_norm": 7.4529571533203125, + "learning_rate": 1.4403555555555556e-05, + "loss": 0.2638, + "step": 17600 + }, + { + "epoch": 1.9128437888366565, + "grad_norm": 6.99146032333374, + "learning_rate": 1.4359111111111112e-05, + "loss": 0.268, + "step": 17700 + }, + { + "epoch": 1.923650510617604, + "grad_norm": 6.872374534606934, + "learning_rate": 1.4314666666666669e-05, + "loss": 0.2469, + "step": 17800 + }, + { + "epoch": 1.934457232398552, + "grad_norm": 8.856480598449707, + "learning_rate": 1.4270222222222224e-05, + "loss": 0.2685, + "step": 17900 + }, + { + "epoch": 1.9452639541794996, + "grad_norm": 9.830224990844727, + "learning_rate": 1.4225777777777779e-05, + "loss": 0.2612, + "step": 18000 + }, + { + "epoch": 1.9452639541794996, + "eval_loss": 0.2572856843471527, + "eval_runtime": 3453.1421, + "eval_samples_per_second": 5.359, + "eval_steps_per_second": 1.34, + "eval_wer": 0.19530180874107556, + "step": 18000 + }, + { + "epoch": 1.9560706759604474, + "grad_norm": 7.073034286499023, + "learning_rate": 1.4181333333333334e-05, + "loss": 0.2631, + "step": 18100 + }, + { + "epoch": 1.9668773977413951, + "grad_norm": 8.844318389892578, + "learning_rate": 1.4136888888888891e-05, + "loss": 0.263, + "step": 18200 + }, + { + "epoch": 1.977684119522343, + "grad_norm": 7.525826454162598, + "learning_rate": 1.4092444444444446e-05, + "loss": 0.2643, + "step": 18300 + }, + { + "epoch": 1.9884908413032907, + "grad_norm": 7.551785945892334, + "learning_rate": 1.4048000000000002e-05, + "loss": 0.2491, + "step": 18400 + }, + { + "epoch": 1.9992975630842384, + "grad_norm": 8.573739051818848, + "learning_rate": 1.4003555555555557e-05, + "loss": 0.2532, + "step": 18500 + }, + { + "epoch": 1.9992975630842384, + "eval_loss": 0.2554282248020172, + "eval_runtime": 3437.8823, + "eval_samples_per_second": 5.382, + "eval_steps_per_second": 1.346, + "eval_wer": 0.19536436787901063, + "step": 18500 + }, + { + "epoch": 2.0101583184740908, + "grad_norm": 250204.390625, + "learning_rate": 1.3959111111111112e-05, + "loss": 0.2066, + "step": 18600 + }, + { + "epoch": 2.0209650402550388, + "grad_norm": 299758.4375, + "learning_rate": 1.3914666666666667e-05, + "loss": 0.1958, + "step": 18700 + }, + { + "epoch": 2.0317717620359863, + "grad_norm": 245057.34375, + "learning_rate": 1.3870222222222223e-05, + "loss": 0.2, + "step": 18800 + }, + { + "epoch": 2.0425784838169343, + "grad_norm": 334399.46875, + "learning_rate": 1.3825777777777778e-05, + "loss": 0.192, + "step": 18900 + }, + { + "epoch": 2.053385205597882, + "grad_norm": 236247.03125, + "learning_rate": 1.3781333333333335e-05, + "loss": 0.1993, + "step": 19000 + }, + { + "epoch": 2.053385205597882, + "eval_loss": 0.2527328431606293, + "eval_runtime": 2819.5348, + "eval_samples_per_second": 6.563, + "eval_steps_per_second": 0.82, + "eval_wer": 0.19490299423673943, + "step": 19000 + }, + { + "epoch": 2.06419192737883, + "grad_norm": 260942.1875, + "learning_rate": 1.373688888888889e-05, + "loss": 0.1947, + "step": 19100 + }, + { + "epoch": 2.0749986491597774, + "grad_norm": 247902.484375, + "learning_rate": 1.3692444444444445e-05, + "loss": 0.1969, + "step": 19200 + }, + { + "epoch": 2.085805370940725, + "grad_norm": 229633.375, + "learning_rate": 1.3648e-05, + "loss": 0.1934, + "step": 19300 + }, + { + "epoch": 2.096612092721673, + "grad_norm": 214819.4375, + "learning_rate": 1.3603555555555556e-05, + "loss": 0.1936, + "step": 19400 + }, + { + "epoch": 2.1074188145026205, + "grad_norm": 225145.53125, + "learning_rate": 1.3559111111111113e-05, + "loss": 0.2009, + "step": 19500 + }, + { + "epoch": 2.1074188145026205, + "eval_loss": 0.25053051114082336, + "eval_runtime": 2775.1844, + "eval_samples_per_second": 6.668, + "eval_steps_per_second": 0.833, + "eval_wer": 0.1897340454648535, + "step": 19500 + }, + { + "epoch": 2.1182255362835685, + "grad_norm": 335387.5, + "learning_rate": 1.3514666666666668e-05, + "loss": 0.1993, + "step": 19600 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 276303.59375, + "learning_rate": 1.3470222222222223e-05, + "loss": 0.1927, + "step": 19700 + }, + { + "epoch": 2.139838979845464, + "grad_norm": 314395.15625, + "learning_rate": 1.342577777777778e-05, + "loss": 0.1828, + "step": 19800 + }, + { + "epoch": 2.1506457016264116, + "grad_norm": 224658.71875, + "learning_rate": 1.3381333333333335e-05, + "loss": 0.1998, + "step": 19900 + }, + { + "epoch": 2.1614524234073595, + "grad_norm": 269057.15625, + "learning_rate": 1.333688888888889e-05, + "loss": 0.1929, + "step": 20000 + }, + { + "epoch": 2.1614524234073595, + "eval_loss": 0.24842554330825806, + "eval_runtime": 2752.0603, + "eval_samples_per_second": 6.724, + "eval_steps_per_second": 0.84, + "eval_wer": 0.19266650505556032, + "step": 20000 + }, + { + "epoch": 2.172259145188307, + "grad_norm": 295629.75, + "learning_rate": 1.3292444444444446e-05, + "loss": 0.1881, + "step": 20100 + }, + { + "epoch": 2.1830658669692546, + "grad_norm": 253882.09375, + "learning_rate": 1.3248000000000001e-05, + "loss": 0.1918, + "step": 20200 + }, + { + "epoch": 2.1938725887502026, + "grad_norm": 220421.125, + "learning_rate": 1.3203555555555556e-05, + "loss": 0.1944, + "step": 20300 + }, + { + "epoch": 2.20467931053115, + "grad_norm": 167626.5625, + "learning_rate": 1.3159111111111111e-05, + "loss": 0.194, + "step": 20400 + }, + { + "epoch": 2.215486032312098, + "grad_norm": 303924.09375, + "learning_rate": 1.3114666666666667e-05, + "loss": 0.2011, + "step": 20500 + }, + { + "epoch": 2.215486032312098, + "eval_loss": 0.24535807967185974, + "eval_runtime": 2748.7126, + "eval_samples_per_second": 6.732, + "eval_steps_per_second": 0.841, + "eval_wer": 0.1894759890208713, + "step": 20500 + }, + { + "epoch": 2.2262927540930457, + "grad_norm": 202871.59375, + "learning_rate": 1.3070222222222223e-05, + "loss": 0.2008, + "step": 20600 + }, + { + "epoch": 2.2370994758739937, + "grad_norm": 263771.4375, + "learning_rate": 1.3025777777777779e-05, + "loss": 0.1995, + "step": 20700 + }, + { + "epoch": 2.2479061976549413, + "grad_norm": 214771.859375, + "learning_rate": 1.2981333333333334e-05, + "loss": 0.1911, + "step": 20800 + }, + { + "epoch": 2.2587129194358893, + "grad_norm": 286280.8125, + "learning_rate": 1.293688888888889e-05, + "loss": 0.1939, + "step": 20900 + }, + { + "epoch": 2.269519641216837, + "grad_norm": 310779.4375, + "learning_rate": 1.2892444444444444e-05, + "loss": 0.1828, + "step": 21000 + }, + { + "epoch": 2.269519641216837, + "eval_loss": 0.24438022077083588, + "eval_runtime": 2733.7874, + "eval_samples_per_second": 6.769, + "eval_steps_per_second": 0.846, + "eval_wer": 0.1892179325768891, + "step": 21000 + }, + { + "epoch": 2.280326362997785, + "grad_norm": 254828.265625, + "learning_rate": 1.2848e-05, + "loss": 0.1933, + "step": 21100 + }, + { + "epoch": 2.2911330847787323, + "grad_norm": 239186.609375, + "learning_rate": 1.2803555555555557e-05, + "loss": 0.2002, + "step": 21200 + }, + { + "epoch": 2.3019398065596803, + "grad_norm": 239042.359375, + "learning_rate": 1.2759111111111113e-05, + "loss": 0.1924, + "step": 21300 + }, + { + "epoch": 2.312746528340628, + "grad_norm": 260667.75, + "learning_rate": 1.2714666666666669e-05, + "loss": 0.2005, + "step": 21400 + }, + { + "epoch": 2.3235532501215754, + "grad_norm": 245939.078125, + "learning_rate": 1.2670222222222224e-05, + "loss": 0.1823, + "step": 21500 + }, + { + "epoch": 2.3235532501215754, + "eval_loss": 0.24371393024921417, + "eval_runtime": 2738.6211, + "eval_samples_per_second": 6.757, + "eval_steps_per_second": 0.845, + "eval_wer": 0.18452599723175814, + "step": 21500 + }, + { + "epoch": 2.3343599719025234, + "grad_norm": 315165.90625, + "learning_rate": 1.262577777777778e-05, + "loss": 0.1958, + "step": 21600 + }, + { + "epoch": 2.345166693683471, + "grad_norm": 236173.65625, + "learning_rate": 1.2581333333333334e-05, + "loss": 0.1884, + "step": 21700 + }, + { + "epoch": 2.355973415464419, + "grad_norm": 176315.5, + "learning_rate": 1.253688888888889e-05, + "loss": 0.1953, + "step": 21800 + }, + { + "epoch": 2.3667801372453665, + "grad_norm": 257501.171875, + "learning_rate": 1.2492444444444445e-05, + "loss": 0.1968, + "step": 21900 + }, + { + "epoch": 2.3775868590263145, + "grad_norm": 253721.75, + "learning_rate": 1.2448e-05, + "loss": 0.186, + "step": 22000 + }, + { + "epoch": 2.3775868590263145, + "eval_loss": 0.24074091017246246, + "eval_runtime": 2749.0556, + "eval_samples_per_second": 6.731, + "eval_steps_per_second": 0.841, + "eval_wer": 0.18391604563689112, + "step": 22000 + }, + { + "epoch": 2.388393580807262, + "grad_norm": 173236.4375, + "learning_rate": 1.2403555555555557e-05, + "loss": 0.2004, + "step": 22100 + }, + { + "epoch": 2.39920030258821, + "grad_norm": 219634.15625, + "learning_rate": 1.2359111111111112e-05, + "loss": 0.1986, + "step": 22200 + }, + { + "epoch": 2.4100070243691576, + "grad_norm": 276572.0, + "learning_rate": 1.2314666666666667e-05, + "loss": 0.1821, + "step": 22300 + }, + { + "epoch": 2.420813746150105, + "grad_norm": 166507.25, + "learning_rate": 1.2270222222222223e-05, + "loss": 0.1785, + "step": 22400 + }, + { + "epoch": 2.431620467931053, + "grad_norm": 264705.6875, + "learning_rate": 1.2225777777777778e-05, + "loss": 0.1898, + "step": 22500 + }, + { + "epoch": 2.431620467931053, + "eval_loss": 0.23902596533298492, + "eval_runtime": 2754.9189, + "eval_samples_per_second": 6.717, + "eval_steps_per_second": 0.84, + "eval_wer": 0.18278216126181782, + "step": 22500 + }, + { + "epoch": 2.4424271897120007, + "grad_norm": 222752.09375, + "learning_rate": 1.2181333333333333e-05, + "loss": 0.1884, + "step": 22600 + }, + { + "epoch": 2.4532339114929487, + "grad_norm": 206168.15625, + "learning_rate": 1.2136888888888888e-05, + "loss": 0.1911, + "step": 22700 + }, + { + "epoch": 2.464040633273896, + "grad_norm": 255710.078125, + "learning_rate": 1.2092444444444444e-05, + "loss": 0.1876, + "step": 22800 + }, + { + "epoch": 2.474847355054844, + "grad_norm": 158791.09375, + "learning_rate": 1.2048000000000002e-05, + "loss": 0.1962, + "step": 22900 + }, + { + "epoch": 2.4856540768357918, + "grad_norm": 322706.125, + "learning_rate": 1.2003555555555557e-05, + "loss": 0.1789, + "step": 23000 + }, + { + "epoch": 2.4856540768357918, + "eval_loss": 0.2363387644290924, + "eval_runtime": 2786.1335, + "eval_samples_per_second": 6.641, + "eval_steps_per_second": 0.83, + "eval_wer": 0.17904425277019684, + "step": 23000 + }, + { + "epoch": 2.4964607986167398, + "grad_norm": 262923.15625, + "learning_rate": 1.1959111111111113e-05, + "loss": 0.1861, + "step": 23100 + }, + { + "epoch": 2.5072675203976873, + "grad_norm": 216068.8125, + "learning_rate": 1.1914666666666668e-05, + "loss": 0.1827, + "step": 23200 + }, + { + "epoch": 2.518074242178635, + "grad_norm": 188912.265625, + "learning_rate": 1.1870222222222223e-05, + "loss": 0.1827, + "step": 23300 + }, + { + "epoch": 2.528880963959583, + "grad_norm": 222542.171875, + "learning_rate": 1.1825777777777778e-05, + "loss": 0.19, + "step": 23400 + }, + { + "epoch": 2.539687685740531, + "grad_norm": 202856.296875, + "learning_rate": 1.1781333333333334e-05, + "loss": 0.1765, + "step": 23500 + }, + { + "epoch": 2.539687685740531, + "eval_loss": 0.2353278398513794, + "eval_runtime": 2813.1319, + "eval_samples_per_second": 6.578, + "eval_steps_per_second": 0.822, + "eval_wer": 0.17970112371851515, + "step": 23500 + }, + { + "epoch": 2.5504944075214784, + "grad_norm": 238512.4375, + "learning_rate": 1.1736888888888889e-05, + "loss": 0.1869, + "step": 23600 + }, + { + "epoch": 2.561301129302426, + "grad_norm": 276903.75, + "learning_rate": 1.1692444444444446e-05, + "loss": 0.1963, + "step": 23700 + }, + { + "epoch": 2.572107851083374, + "grad_norm": 329353.65625, + "learning_rate": 1.1648000000000001e-05, + "loss": 0.1839, + "step": 23800 + }, + { + "epoch": 2.5829145728643215, + "grad_norm": 330138.1875, + "learning_rate": 1.1603555555555556e-05, + "loss": 0.1808, + "step": 23900 + }, + { + "epoch": 2.5937212946452695, + "grad_norm": 279948.03125, + "learning_rate": 1.1559111111111111e-05, + "loss": 0.1808, + "step": 24000 + }, + { + "epoch": 2.5937212946452695, + "eval_loss": 0.23201151192188263, + "eval_runtime": 3188.0735, + "eval_samples_per_second": 5.804, + "eval_steps_per_second": 0.726, + "eval_wer": 0.17965420436506385, + "step": 24000 + }, + { + "epoch": 2.604528016426217, + "grad_norm": 216210.78125, + "learning_rate": 1.1514666666666667e-05, + "loss": 0.1831, + "step": 24100 + }, + { + "epoch": 2.615334738207165, + "grad_norm": 282255.96875, + "learning_rate": 1.1470222222222222e-05, + "loss": 0.1826, + "step": 24200 + }, + { + "epoch": 2.6261414599881125, + "grad_norm": 260694.609375, + "learning_rate": 1.1425777777777777e-05, + "loss": 0.189, + "step": 24300 + }, + { + "epoch": 2.6369481817690605, + "grad_norm": 212081.46875, + "learning_rate": 1.1381333333333336e-05, + "loss": 0.1859, + "step": 24400 + }, + { + "epoch": 2.647754903550008, + "grad_norm": 217712.515625, + "learning_rate": 1.1336888888888891e-05, + "loss": 0.1771, + "step": 24500 + }, + { + "epoch": 2.647754903550008, + "eval_loss": 0.22906863689422607, + "eval_runtime": 3261.465, + "eval_samples_per_second": 5.674, + "eval_steps_per_second": 0.709, + "eval_wer": 0.17766795173562508, + "step": 24500 + }, + { + "epoch": 2.6585616253309556, + "grad_norm": 298979.4375, + "learning_rate": 1.1292444444444446e-05, + "loss": 0.1884, + "step": 24600 + }, + { + "epoch": 2.6693683471119036, + "grad_norm": 220226.265625, + "learning_rate": 1.1248000000000001e-05, + "loss": 0.1938, + "step": 24700 + }, + { + "epoch": 2.6801750688928516, + "grad_norm": 208763.828125, + "learning_rate": 1.1203555555555557e-05, + "loss": 0.1764, + "step": 24800 + }, + { + "epoch": 2.690981790673799, + "grad_norm": 141644.71875, + "learning_rate": 1.1159111111111112e-05, + "loss": 0.1797, + "step": 24900 + }, + { + "epoch": 2.7017885124547467, + "grad_norm": 232710.078125, + "learning_rate": 1.1114666666666667e-05, + "loss": 0.183, + "step": 25000 + }, + { + "epoch": 2.7017885124547467, + "eval_loss": 0.22756607830524445, + "eval_runtime": 2974.7193, + "eval_samples_per_second": 6.22, + "eval_steps_per_second": 0.778, + "eval_wer": 0.17883311567966592, + "step": 25000 + }, + { + "epoch": 2.7125952342356947, + "grad_norm": 348240.0, + "learning_rate": 1.1070222222222222e-05, + "loss": 0.1888, + "step": 25100 + }, + { + "epoch": 2.7234019560166423, + "grad_norm": 219766.265625, + "learning_rate": 1.102577777777778e-05, + "loss": 0.1839, + "step": 25200 + }, + { + "epoch": 2.7342086777975902, + "grad_norm": 240334.796875, + "learning_rate": 1.0981333333333334e-05, + "loss": 0.1802, + "step": 25300 + }, + { + "epoch": 2.745015399578538, + "grad_norm": 226478.640625, + "learning_rate": 1.093688888888889e-05, + "loss": 0.18, + "step": 25400 + }, + { + "epoch": 2.7558221213594853, + "grad_norm": 248438.5, + "learning_rate": 1.0892444444444445e-05, + "loss": 0.178, + "step": 25500 + }, + { + "epoch": 2.7558221213594853, + "eval_loss": 0.22500741481781006, + "eval_runtime": 2728.4865, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.848, + "eval_wer": 0.17539236309323658, + "step": 25500 + }, + { + "epoch": 2.7666288431404333, + "grad_norm": 229927.078125, + "learning_rate": 1.0848e-05, + "loss": 0.191, + "step": 25600 + }, + { + "epoch": 2.7774355649213813, + "grad_norm": 300608.5, + "learning_rate": 1.0803555555555555e-05, + "loss": 0.1982, + "step": 25700 + }, + { + "epoch": 2.788242286702329, + "grad_norm": 293853.34375, + "learning_rate": 1.075911111111111e-05, + "loss": 0.1747, + "step": 25800 + }, + { + "epoch": 2.7990490084832764, + "grad_norm": 210441.359375, + "learning_rate": 1.0714666666666666e-05, + "loss": 0.1808, + "step": 25900 + }, + { + "epoch": 2.8098557302642244, + "grad_norm": 175316.484375, + "learning_rate": 1.0670222222222224e-05, + "loss": 0.1829, + "step": 26000 + }, + { + "epoch": 2.8098557302642244, + "eval_loss": 0.2231319695711136, + "eval_runtime": 2895.6838, + "eval_samples_per_second": 6.39, + "eval_steps_per_second": 0.799, + "eval_wer": 0.17550966147686484, + "step": 26000 + }, + { + "epoch": 2.820662452045172, + "grad_norm": 162894.484375, + "learning_rate": 1.062577777777778e-05, + "loss": 0.1772, + "step": 26100 + }, + { + "epoch": 2.83146917382612, + "grad_norm": 399308.75, + "learning_rate": 1.0581333333333335e-05, + "loss": 0.183, + "step": 26200 + }, + { + "epoch": 2.8422758956070675, + "grad_norm": 237068.109375, + "learning_rate": 1.053688888888889e-05, + "loss": 0.1867, + "step": 26300 + }, + { + "epoch": 2.8530826173880155, + "grad_norm": 274371.4375, + "learning_rate": 1.0492444444444445e-05, + "loss": 0.189, + "step": 26400 + }, + { + "epoch": 2.863889339168963, + "grad_norm": 218764.46875, + "learning_rate": 1.0448e-05, + "loss": 0.183, + "step": 26500 + }, + { + "epoch": 2.863889339168963, + "eval_loss": 0.2216072529554367, + "eval_runtime": 2986.971, + "eval_samples_per_second": 6.195, + "eval_steps_per_second": 0.774, + "eval_wer": 0.17901297320122928, + "step": 26500 + }, + { + "epoch": 2.874696060949911, + "grad_norm": 200908.984375, + "learning_rate": 1.0403555555555556e-05, + "loss": 0.1714, + "step": 26600 + }, + { + "epoch": 2.8855027827308586, + "grad_norm": 247789.078125, + "learning_rate": 1.0359111111111111e-05, + "loss": 0.1775, + "step": 26700 + }, + { + "epoch": 2.896309504511806, + "grad_norm": 237418.84375, + "learning_rate": 1.0314666666666668e-05, + "loss": 0.1695, + "step": 26800 + }, + { + "epoch": 2.907116226292754, + "grad_norm": 253792.0, + "learning_rate": 1.0270222222222223e-05, + "loss": 0.1806, + "step": 26900 + }, + { + "epoch": 2.917922948073702, + "grad_norm": 205986.421875, + "learning_rate": 1.0225777777777778e-05, + "loss": 0.1812, + "step": 27000 + }, + { + "epoch": 2.917922948073702, + "eval_loss": 0.21981683373451233, + "eval_runtime": 2808.8113, + "eval_samples_per_second": 6.588, + "eval_steps_per_second": 0.823, + "eval_wer": 0.1729369169292847, + "step": 27000 + }, + { + "epoch": 2.9287296698546497, + "grad_norm": 194869.09375, + "learning_rate": 1.0181333333333334e-05, + "loss": 0.1805, + "step": 27100 + }, + { + "epoch": 2.939536391635597, + "grad_norm": 223082.046875, + "learning_rate": 1.0136888888888889e-05, + "loss": 0.1797, + "step": 27200 + }, + { + "epoch": 2.950343113416545, + "grad_norm": 186045.15625, + "learning_rate": 1.0092444444444444e-05, + "loss": 0.1809, + "step": 27300 + }, + { + "epoch": 2.9611498351974928, + "grad_norm": 307597.75, + "learning_rate": 1.0048e-05, + "loss": 0.1752, + "step": 27400 + }, + { + "epoch": 2.9719565569784407, + "grad_norm": 256261.78125, + "learning_rate": 1.0003555555555558e-05, + "loss": 0.1697, + "step": 27500 + }, + { + "epoch": 2.9719565569784407, + "eval_loss": 0.21857349574565887, + "eval_runtime": 2775.0986, + "eval_samples_per_second": 6.668, + "eval_steps_per_second": 0.833, + "eval_wer": 0.17267104059306063, + "step": 27500 + }, + { + "epoch": 2.9827632787593883, + "grad_norm": 140632.125, + "learning_rate": 9.959111111111111e-06, + "loss": 0.1773, + "step": 27600 + }, + { + "epoch": 2.993570000540336, + "grad_norm": 238205.53125, + "learning_rate": 9.914666666666668e-06, + "loss": 0.1799, + "step": 27700 + }, + { + "epoch": 3.0044307559301884, + "grad_norm": 153436.546875, + "learning_rate": 9.870222222222224e-06, + "loss": 0.1618, + "step": 27800 + }, + { + "epoch": 3.0152374777111364, + "grad_norm": 179979.90625, + "learning_rate": 9.825777777777779e-06, + "loss": 0.1261, + "step": 27900 + }, + { + "epoch": 3.026044199492084, + "grad_norm": 237624.703125, + "learning_rate": 9.781333333333334e-06, + "loss": 0.1317, + "step": 28000 + }, + { + "epoch": 3.026044199492084, + "eval_loss": 0.21732862293720245, + "eval_runtime": 2726.3235, + "eval_samples_per_second": 6.787, + "eval_steps_per_second": 0.848, + "eval_wer": 0.17278051908444703, + "step": 28000 + }, + { + "epoch": 3.036850921273032, + "grad_norm": 245701.53125, + "learning_rate": 9.73688888888889e-06, + "loss": 0.1383, + "step": 28100 + }, + { + "epoch": 3.0476576430539795, + "grad_norm": 94987.546875, + "learning_rate": 9.692444444444446e-06, + "loss": 0.1317, + "step": 28200 + }, + { + "epoch": 3.0584643648349275, + "grad_norm": 204097.234375, + "learning_rate": 9.648000000000001e-06, + "loss": 0.1349, + "step": 28300 + }, + { + "epoch": 3.069271086615875, + "grad_norm": 142045.625, + "learning_rate": 9.603555555555557e-06, + "loss": 0.1308, + "step": 28400 + }, + { + "epoch": 3.080077808396823, + "grad_norm": 192114.71875, + "learning_rate": 9.559111111111112e-06, + "loss": 0.1298, + "step": 28500 + }, + { + "epoch": 3.080077808396823, + "eval_loss": 0.21591147780418396, + "eval_runtime": 2949.2136, + "eval_samples_per_second": 6.274, + "eval_steps_per_second": 0.784, + "eval_wer": 0.16897223156264907, + "step": 28500 + }, + { + "epoch": 3.0908845301777705, + "grad_norm": 84732.625, + "learning_rate": 9.514666666666667e-06, + "loss": 0.1182, + "step": 28600 + }, + { + "epoch": 3.1016912519587185, + "grad_norm": 176436.625, + "learning_rate": 9.470222222222222e-06, + "loss": 0.1275, + "step": 28700 + }, + { + "epoch": 3.112497973739666, + "grad_norm": 216536.46875, + "learning_rate": 9.425777777777778e-06, + "loss": 0.1346, + "step": 28800 + }, + { + "epoch": 3.1233046955206136, + "grad_norm": 227679.296875, + "learning_rate": 9.381333333333335e-06, + "loss": 0.1255, + "step": 28900 + }, + { + "epoch": 3.1341114173015616, + "grad_norm": 172016.46875, + "learning_rate": 9.33688888888889e-06, + "loss": 0.1272, + "step": 29000 + }, + { + "epoch": 3.1341114173015616, + "eval_loss": 0.21611380577087402, + "eval_runtime": 2875.8761, + "eval_samples_per_second": 6.434, + "eval_steps_per_second": 0.804, + "eval_wer": 0.16858123695055482, + "step": 29000 + }, + { + "epoch": 3.144918139082509, + "grad_norm": 135140.53125, + "learning_rate": 9.292444444444445e-06, + "loss": 0.1327, + "step": 29100 + }, + { + "epoch": 3.155724860863457, + "grad_norm": 245684.453125, + "learning_rate": 9.248e-06, + "loss": 0.1359, + "step": 29200 + }, + { + "epoch": 3.1665315826444047, + "grad_norm": 184601.390625, + "learning_rate": 9.203555555555557e-06, + "loss": 0.131, + "step": 29300 + }, + { + "epoch": 3.1773383044253527, + "grad_norm": 157958.8125, + "learning_rate": 9.159111111111112e-06, + "loss": 0.129, + "step": 29400 + }, + { + "epoch": 3.1881450262063002, + "grad_norm": 174601.015625, + "learning_rate": 9.114666666666668e-06, + "loss": 0.1389, + "step": 29500 + }, + { + "epoch": 3.1881450262063002, + "eval_loss": 0.21482256054878235, + "eval_runtime": 2995.308, + "eval_samples_per_second": 6.178, + "eval_steps_per_second": 0.772, + "eval_wer": 0.17062222882568678, + "step": 29500 + }, + { + "epoch": 3.1989517479872482, + "grad_norm": 366836.96875, + "learning_rate": 9.070222222222223e-06, + "loss": 0.1375, + "step": 29600 + }, + { + "epoch": 3.209758469768196, + "grad_norm": 217750.703125, + "learning_rate": 9.025777777777778e-06, + "loss": 0.1393, + "step": 29700 + }, + { + "epoch": 3.2205651915491433, + "grad_norm": 427549.6875, + "learning_rate": 8.981333333333333e-06, + "loss": 0.1211, + "step": 29800 + }, + { + "epoch": 3.2313719133300913, + "grad_norm": 139396.234375, + "learning_rate": 8.93688888888889e-06, + "loss": 0.1278, + "step": 29900 + }, + { + "epoch": 3.242178635111039, + "grad_norm": 165933.109375, + "learning_rate": 8.892444444444445e-06, + "loss": 0.1379, + "step": 30000 + }, + { + "epoch": 3.242178635111039, + "eval_loss": 0.2138548046350479, + "eval_runtime": 3184.9662, + "eval_samples_per_second": 5.81, + "eval_steps_per_second": 0.726, + "eval_wer": 0.16925374768335694, + "step": 30000 + }, + { + "epoch": 3.252985356891987, + "grad_norm": 289672.5, + "learning_rate": 8.848e-06, + "loss": 0.1407, + "step": 30100 + }, + { + "epoch": 3.2637920786729344, + "grad_norm": 145135.828125, + "learning_rate": 8.803555555555556e-06, + "loss": 0.1319, + "step": 30200 + }, + { + "epoch": 3.2745988004538824, + "grad_norm": 184688.515625, + "learning_rate": 8.759111111111111e-06, + "loss": 0.1308, + "step": 30300 + }, + { + "epoch": 3.28540552223483, + "grad_norm": 201466.8125, + "learning_rate": 8.714666666666666e-06, + "loss": 0.1322, + "step": 30400 + }, + { + "epoch": 3.296212244015778, + "grad_norm": 194344.265625, + "learning_rate": 8.670222222222223e-06, + "loss": 0.1312, + "step": 30500 + }, + { + "epoch": 3.296212244015778, + "eval_loss": 0.21327927708625793, + "eval_runtime": 3054.7075, + "eval_samples_per_second": 6.058, + "eval_steps_per_second": 0.757, + "eval_wer": 0.1713885782653915, + "step": 30500 + }, + { + "epoch": 3.3070189657967255, + "grad_norm": 204070.3125, + "learning_rate": 8.625777777777779e-06, + "loss": 0.1222, + "step": 30600 + }, + { + "epoch": 3.3178256875776735, + "grad_norm": 258432.828125, + "learning_rate": 8.581333333333334e-06, + "loss": 0.1277, + "step": 30700 + }, + { + "epoch": 3.328632409358621, + "grad_norm": 251962.796875, + "learning_rate": 8.53688888888889e-06, + "loss": 0.1265, + "step": 30800 + }, + { + "epoch": 3.339439131139569, + "grad_norm": 226804.796875, + "learning_rate": 8.492444444444446e-06, + "loss": 0.1292, + "step": 30900 + }, + { + "epoch": 3.3502458529205166, + "grad_norm": 191085.5625, + "learning_rate": 8.448000000000001e-06, + "loss": 0.1212, + "step": 31000 + }, + { + "epoch": 3.3502458529205166, + "eval_loss": 0.21162918210029602, + "eval_runtime": 3559.2497, + "eval_samples_per_second": 5.199, + "eval_steps_per_second": 0.65, + "eval_wer": 0.17063786861017055, + "step": 31000 + }, + { + "epoch": 3.361052574701464, + "grad_norm": 186760.765625, + "learning_rate": 8.403555555555556e-06, + "loss": 0.1281, + "step": 31100 + }, + { + "epoch": 3.371859296482412, + "grad_norm": 234347.296875, + "learning_rate": 8.359111111111112e-06, + "loss": 0.1338, + "step": 31200 + }, + { + "epoch": 3.3826660182633597, + "grad_norm": 223205.296875, + "learning_rate": 8.314666666666667e-06, + "loss": 0.1365, + "step": 31300 + }, + { + "epoch": 3.3934727400443077, + "grad_norm": 227340.140625, + "learning_rate": 8.270222222222222e-06, + "loss": 0.1263, + "step": 31400 + }, + { + "epoch": 3.404279461825255, + "grad_norm": 330275.71875, + "learning_rate": 8.225777777777777e-06, + "loss": 0.1265, + "step": 31500 + }, + { + "epoch": 3.404279461825255, + "eval_loss": 0.21032755076885223, + "eval_runtime": 3585.0413, + "eval_samples_per_second": 5.161, + "eval_steps_per_second": 0.645, + "eval_wer": 0.16658716442887417, + "step": 31500 + }, + { + "epoch": 3.415086183606203, + "grad_norm": 202428.109375, + "learning_rate": 8.181333333333334e-06, + "loss": 0.1302, + "step": 31600 + }, + { + "epoch": 3.4258929053871507, + "grad_norm": 139416.578125, + "learning_rate": 8.13688888888889e-06, + "loss": 0.126, + "step": 31700 + }, + { + "epoch": 3.4366996271680987, + "grad_norm": 151699.484375, + "learning_rate": 8.092444444444445e-06, + "loss": 0.1273, + "step": 31800 + }, + { + "epoch": 3.4475063489490463, + "grad_norm": 127831.4609375, + "learning_rate": 8.048e-06, + "loss": 0.1347, + "step": 31900 + }, + { + "epoch": 3.458313070729994, + "grad_norm": 225660.5625, + "learning_rate": 8.003555555555557e-06, + "loss": 0.1261, + "step": 32000 + }, + { + "epoch": 3.458313070729994, + "eval_loss": 0.20947901904582977, + "eval_runtime": 3432.2646, + "eval_samples_per_second": 5.391, + "eval_steps_per_second": 0.674, + "eval_wer": 0.17062222882568678, + "step": 32000 + }, + { + "epoch": 3.469119792510942, + "grad_norm": 5.6989545822143555, + "learning_rate": 7.959111111111112e-06, + "loss": 0.1326, + "step": 32100 + }, + { + "epoch": 3.4799265142918894, + "grad_norm": 5.444442272186279, + "learning_rate": 7.914666666666667e-06, + "loss": 0.1391, + "step": 32200 + }, + { + "epoch": 3.4907332360728374, + "grad_norm": 5.629488945007324, + "learning_rate": 7.870222222222222e-06, + "loss": 0.134, + "step": 32300 + }, + { + "epoch": 3.501539957853785, + "grad_norm": 9.071991920471191, + "learning_rate": 7.82577777777778e-06, + "loss": 0.1345, + "step": 32400 + }, + { + "epoch": 3.512346679634733, + "grad_norm": 8.57175064086914, + "learning_rate": 7.781333333333335e-06, + "loss": 0.127, + "step": 32500 + }, + { + "epoch": 3.512346679634733, + "eval_loss": 0.20792409777641296, + "eval_runtime": 3540.4171, + "eval_samples_per_second": 5.227, + "eval_steps_per_second": 1.307, + "eval_wer": 0.16730659451512758, + "step": 32500 + }, + { + "epoch": 3.5231534014156805, + "grad_norm": 7.08974552154541, + "learning_rate": 7.73688888888889e-06, + "loss": 0.1325, + "step": 32600 + }, + { + "epoch": 3.5339601231966284, + "grad_norm": 5.81699275970459, + "learning_rate": 7.692444444444445e-06, + "loss": 0.1337, + "step": 32700 + }, + { + "epoch": 3.544766844977576, + "grad_norm": 8.329341888427734, + "learning_rate": 7.648444444444445e-06, + "loss": 0.1295, + "step": 32800 + }, + { + "epoch": 3.5555735667585235, + "grad_norm": 9.390524864196777, + "learning_rate": 7.604e-06, + "loss": 0.1308, + "step": 32900 + }, + { + "epoch": 3.5663802885394715, + "grad_norm": 7.076089859008789, + "learning_rate": 7.5595555555555565e-06, + "loss": 0.1346, + "step": 33000 + }, + { + "epoch": 3.5663802885394715, + "eval_loss": 0.20613741874694824, + "eval_runtime": 3517.8125, + "eval_samples_per_second": 5.26, + "eval_steps_per_second": 1.315, + "eval_wer": 0.1682840810453632, + "step": 33000 + }, + { + "epoch": 3.5771870103204195, + "grad_norm": 6.325503826141357, + "learning_rate": 7.515111111111112e-06, + "loss": 0.14, + "step": 33100 + }, + { + "epoch": 3.587993732101367, + "grad_norm": 7.135802745819092, + "learning_rate": 7.470666666666667e-06, + "loss": 0.1292, + "step": 33200 + }, + { + "epoch": 3.5988004538823146, + "grad_norm": 5.185844898223877, + "learning_rate": 7.426222222222222e-06, + "loss": 0.1375, + "step": 33300 + }, + { + "epoch": 3.6096071756632626, + "grad_norm": 7.516198635101318, + "learning_rate": 7.381777777777779e-06, + "loss": 0.1287, + "step": 33400 + }, + { + "epoch": 3.62041389744421, + "grad_norm": 6.644392490386963, + "learning_rate": 7.337333333333334e-06, + "loss": 0.1283, + "step": 33500 + }, + { + "epoch": 3.62041389744421, + "eval_loss": 0.20456381142139435, + "eval_runtime": 3492.6992, + "eval_samples_per_second": 5.298, + "eval_steps_per_second": 1.324, + "eval_wer": 0.16519522360981867, + "step": 33500 + }, + { + "epoch": 3.631220619225158, + "grad_norm": 7.233791351318359, + "learning_rate": 7.2928888888888895e-06, + "loss": 0.1373, + "step": 33600 + }, + { + "epoch": 3.6420273410061057, + "grad_norm": 5.153164386749268, + "learning_rate": 7.248444444444445e-06, + "loss": 0.1368, + "step": 33700 + }, + { + "epoch": 3.6528340627870537, + "grad_norm": 6.022379398345947, + "learning_rate": 7.204000000000001e-06, + "loss": 0.1377, + "step": 33800 + }, + { + "epoch": 3.6636407845680012, + "grad_norm": 7.33857536315918, + "learning_rate": 7.159555555555556e-06, + "loss": 0.1343, + "step": 33900 + }, + { + "epoch": 3.6744475063489492, + "grad_norm": 6.584815502166748, + "learning_rate": 7.115111111111111e-06, + "loss": 0.1244, + "step": 34000 + }, + { + "epoch": 3.6744475063489492, + "eval_loss": 0.20398086309432983, + "eval_runtime": 3469.4987, + "eval_samples_per_second": 5.333, + "eval_steps_per_second": 1.333, + "eval_wer": 0.168432658997959, + "step": 34000 + }, + { + "epoch": 3.685254228129897, + "grad_norm": 6.824450492858887, + "learning_rate": 7.0706666666666665e-06, + "loss": 0.1255, + "step": 34100 + }, + { + "epoch": 3.6960609499108443, + "grad_norm": 5.974719047546387, + "learning_rate": 7.0262222222222234e-06, + "loss": 0.1302, + "step": 34200 + }, + { + "epoch": 3.7068676716917923, + "grad_norm": 6.354248523712158, + "learning_rate": 6.981777777777779e-06, + "loss": 0.1245, + "step": 34300 + }, + { + "epoch": 3.7176743934727403, + "grad_norm": 5.096312999725342, + "learning_rate": 6.937333333333334e-06, + "loss": 0.1369, + "step": 34400 + }, + { + "epoch": 3.728481115253688, + "grad_norm": 5.251643180847168, + "learning_rate": 6.892888888888889e-06, + "loss": 0.1207, + "step": 34500 + }, + { + "epoch": 3.728481115253688, + "eval_loss": 0.20263046026229858, + "eval_runtime": 3278.7844, + "eval_samples_per_second": 5.644, + "eval_steps_per_second": 1.411, + "eval_wer": 0.16479640910548252, + "step": 34500 + }, + { + "epoch": 3.7392878370346354, + "grad_norm": 7.432106971740723, + "learning_rate": 6.848444444444445e-06, + "loss": 0.1337, + "step": 34600 + }, + { + "epoch": 3.7500945588155834, + "grad_norm": 4.93491268157959, + "learning_rate": 6.804e-06, + "loss": 0.1257, + "step": 34700 + }, + { + "epoch": 3.760901280596531, + "grad_norm": 6.047059059143066, + "learning_rate": 6.760000000000001e-06, + "loss": 0.1206, + "step": 34800 + }, + { + "epoch": 3.771708002377479, + "grad_norm": 6.542396545410156, + "learning_rate": 6.7155555555555566e-06, + "loss": 0.1271, + "step": 34900 + }, + { + "epoch": 3.7825147241584265, + "grad_norm": 5.706289768218994, + "learning_rate": 6.671111111111112e-06, + "loss": 0.1239, + "step": 35000 + }, + { + "epoch": 3.7825147241584265, + "eval_loss": 0.20222991704940796, + "eval_runtime": 3317.3157, + "eval_samples_per_second": 5.578, + "eval_steps_per_second": 1.395, + "eval_wer": 0.16217674520445108, + "step": 35000 + }, + { + "epoch": 3.793321445939374, + "grad_norm": 7.686710834503174, + "learning_rate": 6.626666666666667e-06, + "loss": 0.1298, + "step": 35100 + }, + { + "epoch": 3.804128167720322, + "grad_norm": 7.791649341583252, + "learning_rate": 6.582222222222223e-06, + "loss": 0.1276, + "step": 35200 + }, + { + "epoch": 3.81493488950127, + "grad_norm": 5.835906505584717, + "learning_rate": 6.537777777777778e-06, + "loss": 0.1244, + "step": 35300 + }, + { + "epoch": 3.8257416112822176, + "grad_norm": 8.771524429321289, + "learning_rate": 6.4933333333333336e-06, + "loss": 0.1316, + "step": 35400 + }, + { + "epoch": 3.836548333063165, + "grad_norm": 7.212921619415283, + "learning_rate": 6.448888888888889e-06, + "loss": 0.1308, + "step": 35500 + }, + { + "epoch": 3.836548333063165, + "eval_loss": 0.19980210065841675, + "eval_runtime": 3260.0277, + "eval_samples_per_second": 5.676, + "eval_steps_per_second": 1.419, + "eval_wer": 0.16239570218722385, + "step": 35500 + }, + { + "epoch": 3.847355054844113, + "grad_norm": 5.587503910064697, + "learning_rate": 6.404444444444446e-06, + "loss": 0.1317, + "step": 35600 + }, + { + "epoch": 3.8581617766250607, + "grad_norm": 8.271342277526855, + "learning_rate": 6.360444444444445e-06, + "loss": 0.1316, + "step": 35700 + }, + { + "epoch": 3.8689684984060086, + "grad_norm": 6.529531955718994, + "learning_rate": 6.316000000000001e-06, + "loss": 0.1257, + "step": 35800 + }, + { + "epoch": 3.879775220186956, + "grad_norm": 5.135924816131592, + "learning_rate": 6.271555555555556e-06, + "loss": 0.1309, + "step": 35900 + }, + { + "epoch": 3.890581941967904, + "grad_norm": 4.710616588592529, + "learning_rate": 6.2271111111111115e-06, + "loss": 0.1272, + "step": 36000 + }, + { + "epoch": 3.890581941967904, + "eval_loss": 0.19968418776988983, + "eval_runtime": 3207.2086, + "eval_samples_per_second": 5.77, + "eval_steps_per_second": 1.442, + "eval_wer": 0.16488242792014327, + "step": 36000 + }, + { + "epoch": 3.9013886637488517, + "grad_norm": 9.243986129760742, + "learning_rate": 6.182666666666667e-06, + "loss": 0.1301, + "step": 36100 + }, + { + "epoch": 3.9121953855297997, + "grad_norm": 6.014384746551514, + "learning_rate": 6.138222222222223e-06, + "loss": 0.1261, + "step": 36200 + }, + { + "epoch": 3.9230021073107473, + "grad_norm": 5.140791893005371, + "learning_rate": 6.093777777777779e-06, + "loss": 0.1219, + "step": 36300 + }, + { + "epoch": 3.933808829091695, + "grad_norm": 4.738403797149658, + "learning_rate": 6.049333333333334e-06, + "loss": 0.1244, + "step": 36400 + }, + { + "epoch": 3.944615550872643, + "grad_norm": 4.881937026977539, + "learning_rate": 6.004888888888889e-06, + "loss": 0.1328, + "step": 36500 + }, + { + "epoch": 3.944615550872643, + "eval_loss": 0.19876359403133392, + "eval_runtime": 3246.3553, + "eval_samples_per_second": 5.7, + "eval_steps_per_second": 1.425, + "eval_wer": 0.1647103902908218, + "step": 36500 + }, + { + "epoch": 3.955422272653591, + "grad_norm": 9.000225067138672, + "learning_rate": 5.960444444444445e-06, + "loss": 0.1238, + "step": 36600 + }, + { + "epoch": 3.9662289944345384, + "grad_norm": 4.82861328125, + "learning_rate": 5.916000000000001e-06, + "loss": 0.1268, + "step": 36700 + }, + { + "epoch": 3.977035716215486, + "grad_norm": 4.868381023406982, + "learning_rate": 5.871555555555556e-06, + "loss": 0.1262, + "step": 36800 + }, + { + "epoch": 3.987842437996434, + "grad_norm": 10.557507514953613, + "learning_rate": 5.827111111111111e-06, + "loss": 0.134, + "step": 36900 + }, + { + "epoch": 3.9986491597773814, + "grad_norm": 6.664336204528809, + "learning_rate": 5.782666666666667e-06, + "loss": 0.1256, + "step": 37000 + }, + { + "epoch": 3.9986491597773814, + "eval_loss": 0.19714923202991486, + "eval_runtime": 3358.3537, + "eval_samples_per_second": 5.51, + "eval_steps_per_second": 1.377, + "eval_wer": 0.1653203418856888, + "step": 37000 + }, + { + "epoch": 4.009509915167234, + "grad_norm": 6.824110507965088, + "learning_rate": 5.738222222222223e-06, + "loss": 0.095, + "step": 37100 + }, + { + "epoch": 4.0203166369481815, + "grad_norm": 6.033724308013916, + "learning_rate": 5.6937777777777785e-06, + "loss": 0.0924, + "step": 37200 + }, + { + "epoch": 4.031123358729129, + "grad_norm": 8.13729476928711, + "learning_rate": 5.649333333333334e-06, + "loss": 0.1009, + "step": 37300 + }, + { + "epoch": 4.0419300805100775, + "grad_norm": 7.620489597320557, + "learning_rate": 5.60488888888889e-06, + "loss": 0.0994, + "step": 37400 + }, + { + "epoch": 4.052736802291025, + "grad_norm": 5.248648166656494, + "learning_rate": 5.560444444444445e-06, + "loss": 0.0953, + "step": 37500 + }, + { + "epoch": 4.052736802291025, + "eval_loss": 0.19735735654830933, + "eval_runtime": 3623.9668, + "eval_samples_per_second": 5.106, + "eval_steps_per_second": 1.277, + "eval_wer": 0.1603938097733013, + "step": 37500 + }, + { + "epoch": 4.063543524071973, + "grad_norm": 5.766596794128418, + "learning_rate": 5.516e-06, + "loss": 0.0947, + "step": 37600 + }, + { + "epoch": 4.07435024585292, + "grad_norm": 4.6069231033325195, + "learning_rate": 5.4715555555555554e-06, + "loss": 0.0957, + "step": 37700 + }, + { + "epoch": 4.085156967633869, + "grad_norm": 5.434189319610596, + "learning_rate": 5.4271111111111115e-06, + "loss": 0.0992, + "step": 37800 + }, + { + "epoch": 4.095963689414816, + "grad_norm": 2.9330973625183105, + "learning_rate": 5.382666666666667e-06, + "loss": 0.0943, + "step": 37900 + }, + { + "epoch": 4.106770411195764, + "grad_norm": 4.690386772155762, + "learning_rate": 5.338222222222223e-06, + "loss": 0.0946, + "step": 38000 + }, + { + "epoch": 4.106770411195764, + "eval_loss": 0.19785380363464355, + "eval_runtime": 3496.2995, + "eval_samples_per_second": 5.292, + "eval_steps_per_second": 1.323, + "eval_wer": 0.16248954089412648, + "step": 38000 + }, + { + "epoch": 4.117577132976711, + "grad_norm": 5.448973655700684, + "learning_rate": 5.293777777777778e-06, + "loss": 0.0928, + "step": 38100 + }, + { + "epoch": 4.12838385475766, + "grad_norm": 6.168562889099121, + "learning_rate": 5.249333333333334e-06, + "loss": 0.0977, + "step": 38200 + }, + { + "epoch": 4.139190576538607, + "grad_norm": 6.410705089569092, + "learning_rate": 5.204888888888889e-06, + "loss": 0.0954, + "step": 38300 + }, + { + "epoch": 4.149997298319555, + "grad_norm": 6.880079746246338, + "learning_rate": 5.160444444444445e-06, + "loss": 0.0982, + "step": 38400 + }, + { + "epoch": 4.160804020100502, + "grad_norm": 4.514254570007324, + "learning_rate": 5.116000000000001e-06, + "loss": 0.0933, + "step": 38500 + }, + { + "epoch": 4.160804020100502, + "eval_loss": 0.19639329612255096, + "eval_runtime": 3441.5727, + "eval_samples_per_second": 5.377, + "eval_steps_per_second": 1.344, + "eval_wer": 0.16088646298454007, + "step": 38500 + }, + { + "epoch": 4.17161074188145, + "grad_norm": 5.864041328430176, + "learning_rate": 5.071555555555556e-06, + "loss": 0.0946, + "step": 38600 + }, + { + "epoch": 4.182417463662398, + "grad_norm": 5.394285678863525, + "learning_rate": 5.027111111111111e-06, + "loss": 0.098, + "step": 38700 + }, + { + "epoch": 4.193224185443346, + "grad_norm": 5.213718891143799, + "learning_rate": 4.982666666666667e-06, + "loss": 0.0948, + "step": 38800 + }, + { + "epoch": 4.204030907224293, + "grad_norm": 3.7767562866210938, + "learning_rate": 4.938222222222222e-06, + "loss": 0.099, + "step": 38900 + }, + { + "epoch": 4.214837629005241, + "grad_norm": 8.426477432250977, + "learning_rate": 4.8937777777777785e-06, + "loss": 0.1025, + "step": 39000 + }, + { + "epoch": 4.214837629005241, + "eval_loss": 0.19617383182048798, + "eval_runtime": 3407.3151, + "eval_samples_per_second": 5.431, + "eval_steps_per_second": 1.358, + "eval_wer": 0.16335754893297572, + "step": 39000 + }, + { + "epoch": 4.225644350786189, + "grad_norm": 6.989054203033447, + "learning_rate": 4.849333333333334e-06, + "loss": 0.0922, + "step": 39100 + }, + { + "epoch": 4.236451072567137, + "grad_norm": 7.133777618408203, + "learning_rate": 4.80488888888889e-06, + "loss": 0.0971, + "step": 39200 + }, + { + "epoch": 4.2472577943480845, + "grad_norm": 5.765046119689941, + "learning_rate": 4.760444444444445e-06, + "loss": 0.0919, + "step": 39300 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 5.539346694946289, + "learning_rate": 4.716e-06, + "loss": 0.096, + "step": 39400 + }, + { + "epoch": 4.2688712379099805, + "grad_norm": 6.360944747924805, + "learning_rate": 4.6715555555555555e-06, + "loss": 0.1002, + "step": 39500 + }, + { + "epoch": 4.2688712379099805, + "eval_loss": 0.1956612765789032, + "eval_runtime": 3418.2414, + "eval_samples_per_second": 5.413, + "eval_steps_per_second": 1.353, + "eval_wer": 0.16324025054934743, + "step": 39500 + }, + { + "epoch": 4.279677959690928, + "grad_norm": 6.404228210449219, + "learning_rate": 4.6271111111111116e-06, + "loss": 0.1, + "step": 39600 + }, + { + "epoch": 4.290484681471876, + "grad_norm": 5.106740474700928, + "learning_rate": 4.582666666666667e-06, + "loss": 0.1012, + "step": 39700 + }, + { + "epoch": 4.301291403252823, + "grad_norm": 7.007205009460449, + "learning_rate": 4.538222222222223e-06, + "loss": 0.103, + "step": 39800 + }, + { + "epoch": 4.312098125033771, + "grad_norm": 7.048201084136963, + "learning_rate": 4.493777777777778e-06, + "loss": 0.0937, + "step": 39900 + }, + { + "epoch": 4.322904846814719, + "grad_norm": 5.77664852142334, + "learning_rate": 4.449333333333334e-06, + "loss": 0.0976, + "step": 40000 + }, + { + "epoch": 4.322904846814719, + "eval_loss": 0.1948525756597519, + "eval_runtime": 3400.5876, + "eval_samples_per_second": 5.441, + "eval_steps_per_second": 1.36, + "eval_wer": 0.16208290649754847, + "step": 40000 + }, + { + "epoch": 4.333711568595667, + "grad_norm": 7.6147918701171875, + "learning_rate": 4.404888888888889e-06, + "loss": 0.094, + "step": 40100 + }, + { + "epoch": 4.344518290376614, + "grad_norm": 2.64292049407959, + "learning_rate": 4.360444444444445e-06, + "loss": 0.0987, + "step": 40200 + }, + { + "epoch": 4.355325012157562, + "grad_norm": 4.686502456665039, + "learning_rate": 4.316e-06, + "loss": 0.0985, + "step": 40300 + }, + { + "epoch": 4.366131733938509, + "grad_norm": 6.833780288696289, + "learning_rate": 4.271555555555556e-06, + "loss": 0.0866, + "step": 40400 + }, + { + "epoch": 4.376938455719458, + "grad_norm": 3.5335001945495605, + "learning_rate": 4.227111111111111e-06, + "loss": 0.0983, + "step": 40500 + }, + { + "epoch": 4.376938455719458, + "eval_loss": 0.19357165694236755, + "eval_runtime": 3407.3735, + "eval_samples_per_second": 5.431, + "eval_steps_per_second": 1.358, + "eval_wer": 0.1605111081569296, + "step": 40500 + }, + { + "epoch": 4.387745177500405, + "grad_norm": 6.259922981262207, + "learning_rate": 4.183111111111112e-06, + "loss": 0.0941, + "step": 40600 + }, + { + "epoch": 4.398551899281353, + "grad_norm": 3.454116106033325, + "learning_rate": 4.138666666666667e-06, + "loss": 0.0931, + "step": 40700 + }, + { + "epoch": 4.4093586210623, + "grad_norm": 5.945463180541992, + "learning_rate": 4.0942222222222225e-06, + "loss": 0.0973, + "step": 40800 + }, + { + "epoch": 4.420165342843249, + "grad_norm": 6.534635066986084, + "learning_rate": 4.049777777777778e-06, + "loss": 0.0941, + "step": 40900 + }, + { + "epoch": 4.430972064624196, + "grad_norm": 6.562328815460205, + "learning_rate": 4.005333333333334e-06, + "loss": 0.0995, + "step": 41000 + }, + { + "epoch": 4.430972064624196, + "eval_loss": 0.1934925764799118, + "eval_runtime": 3377.5938, + "eval_samples_per_second": 5.478, + "eval_steps_per_second": 1.37, + "eval_wer": 0.1607769844931537, + "step": 41000 + }, + { + "epoch": 4.441778786405144, + "grad_norm": 4.691957473754883, + "learning_rate": 3.960888888888889e-06, + "loss": 0.0907, + "step": 41100 + }, + { + "epoch": 4.4525855081860914, + "grad_norm": 7.0055646896362305, + "learning_rate": 3.916444444444445e-06, + "loss": 0.0984, + "step": 41200 + }, + { + "epoch": 4.46339222996704, + "grad_norm": 4.472280979156494, + "learning_rate": 3.872e-06, + "loss": 0.0954, + "step": 41300 + }, + { + "epoch": 4.474198951747987, + "grad_norm": 7.1928277015686035, + "learning_rate": 3.8275555555555564e-06, + "loss": 0.0989, + "step": 41400 + }, + { + "epoch": 4.485005673528935, + "grad_norm": 6.270200729370117, + "learning_rate": 3.7831111111111112e-06, + "loss": 0.0877, + "step": 41500 + }, + { + "epoch": 4.485005673528935, + "eval_loss": 0.19299255311489105, + "eval_runtime": 3437.135, + "eval_samples_per_second": 5.384, + "eval_steps_per_second": 1.346, + "eval_wer": 0.16083954363108877, + "step": 41500 + }, + { + "epoch": 4.4958123953098825, + "grad_norm": 4.897816181182861, + "learning_rate": 3.7386666666666673e-06, + "loss": 0.0878, + "step": 41600 + }, + { + "epoch": 4.50661911709083, + "grad_norm": 8.050458908081055, + "learning_rate": 3.6942222222222226e-06, + "loss": 0.0973, + "step": 41700 + }, + { + "epoch": 4.5174258388717785, + "grad_norm": 6.027979373931885, + "learning_rate": 3.649777777777778e-06, + "loss": 0.0967, + "step": 41800 + }, + { + "epoch": 4.528232560652726, + "grad_norm": 7.365042686462402, + "learning_rate": 3.6053333333333334e-06, + "loss": 0.0965, + "step": 41900 + }, + { + "epoch": 4.539039282433674, + "grad_norm": 6.175548076629639, + "learning_rate": 3.560888888888889e-06, + "loss": 0.0985, + "step": 42000 + }, + { + "epoch": 4.539039282433674, + "eval_loss": 0.19284753501415253, + "eval_runtime": 3493.7006, + "eval_samples_per_second": 5.296, + "eval_steps_per_second": 1.324, + "eval_wer": 0.1633028096872825, + "step": 42000 + }, + { + "epoch": 4.549846004214621, + "grad_norm": 3.834568738937378, + "learning_rate": 3.5164444444444447e-06, + "loss": 0.0946, + "step": 42100 + }, + { + "epoch": 4.56065272599557, + "grad_norm": 5.286928176879883, + "learning_rate": 3.4720000000000004e-06, + "loss": 0.0984, + "step": 42200 + }, + { + "epoch": 4.571459447776517, + "grad_norm": 4.384335994720459, + "learning_rate": 3.4275555555555556e-06, + "loss": 0.0859, + "step": 42300 + }, + { + "epoch": 4.582266169557465, + "grad_norm": 6.872238636016846, + "learning_rate": 3.3831111111111113e-06, + "loss": 0.1006, + "step": 42400 + }, + { + "epoch": 4.593072891338412, + "grad_norm": 5.914927005767822, + "learning_rate": 3.338666666666667e-06, + "loss": 0.0887, + "step": 42500 + }, + { + "epoch": 4.593072891338412, + "eval_loss": 0.19172823429107666, + "eval_runtime": 3473.6538, + "eval_samples_per_second": 5.327, + "eval_steps_per_second": 1.332, + "eval_wer": 0.1616449925320029, + "step": 42500 + }, + { + "epoch": 4.603879613119361, + "grad_norm": 2.9186370372772217, + "learning_rate": 3.2942222222222226e-06, + "loss": 0.0929, + "step": 42600 + }, + { + "epoch": 4.614686334900308, + "grad_norm": 7.138390064239502, + "learning_rate": 3.249777777777778e-06, + "loss": 0.1014, + "step": 42700 + }, + { + "epoch": 4.625493056681256, + "grad_norm": 5.447595596313477, + "learning_rate": 3.2053333333333334e-06, + "loss": 0.0942, + "step": 42800 + }, + { + "epoch": 4.636299778462203, + "grad_norm": 5.631972789764404, + "learning_rate": 3.160888888888889e-06, + "loss": 0.0928, + "step": 42900 + }, + { + "epoch": 4.647106500243151, + "grad_norm": 6.55267333984375, + "learning_rate": 3.1164444444444448e-06, + "loss": 0.0909, + "step": 43000 + }, + { + "epoch": 4.647106500243151, + "eval_loss": 0.1917807012796402, + "eval_runtime": 3531.9979, + "eval_samples_per_second": 5.239, + "eval_steps_per_second": 1.31, + "eval_wer": 0.1603547103120919, + "step": 43000 + }, + { + "epoch": 4.657913222024099, + "grad_norm": 5.544260501861572, + "learning_rate": 3.0728888888888893e-06, + "loss": 0.0952, + "step": 43100 + }, + { + "epoch": 4.668719943805047, + "grad_norm": 6.711052417755127, + "learning_rate": 3.028444444444445e-06, + "loss": 0.0893, + "step": 43200 + }, + { + "epoch": 4.679526665585994, + "grad_norm": 5.341217994689941, + "learning_rate": 2.984e-06, + "loss": 0.0894, + "step": 43300 + }, + { + "epoch": 4.690333387366942, + "grad_norm": 6.262836933135986, + "learning_rate": 2.9395555555555562e-06, + "loss": 0.0933, + "step": 43400 + }, + { + "epoch": 4.7011401091478895, + "grad_norm": 5.641539096832275, + "learning_rate": 2.8951111111111114e-06, + "loss": 0.0908, + "step": 43500 + }, + { + "epoch": 4.7011401091478895, + "eval_loss": 0.19096316397190094, + "eval_runtime": 3434.1139, + "eval_samples_per_second": 5.388, + "eval_steps_per_second": 1.347, + "eval_wer": 0.1592677452904699, + "step": 43500 + }, + { + "epoch": 4.711946830928838, + "grad_norm": 5.90605354309082, + "learning_rate": 2.850666666666667e-06, + "loss": 0.0955, + "step": 43600 + }, + { + "epoch": 4.7227535527097855, + "grad_norm": 7.320056438446045, + "learning_rate": 2.8062222222222223e-06, + "loss": 0.0862, + "step": 43700 + }, + { + "epoch": 4.733560274490733, + "grad_norm": 7.9307026863098145, + "learning_rate": 2.7617777777777784e-06, + "loss": 0.0968, + "step": 43800 + }, + { + "epoch": 4.7443669962716815, + "grad_norm": 3.4138481616973877, + "learning_rate": 2.7173333333333336e-06, + "loss": 0.095, + "step": 43900 + }, + { + "epoch": 4.755173718052629, + "grad_norm": 5.649805068969727, + "learning_rate": 2.6728888888888893e-06, + "loss": 0.0931, + "step": 44000 + }, + { + "epoch": 4.755173718052629, + "eval_loss": 0.19024226069450378, + "eval_runtime": 3527.1779, + "eval_samples_per_second": 5.246, + "eval_steps_per_second": 1.312, + "eval_wer": 0.1579227238248657, + "step": 44000 + }, + { + "epoch": 4.7659804398335766, + "grad_norm": 5.45325231552124, + "learning_rate": 2.6284444444444445e-06, + "loss": 0.0897, + "step": 44100 + }, + { + "epoch": 4.776787161614524, + "grad_norm": 4.2618408203125, + "learning_rate": 2.5840000000000006e-06, + "loss": 0.0921, + "step": 44200 + }, + { + "epoch": 4.787593883395472, + "grad_norm": 6.174403190612793, + "learning_rate": 2.539555555555556e-06, + "loss": 0.0954, + "step": 44300 + }, + { + "epoch": 4.79840060517642, + "grad_norm": 4.927825927734375, + "learning_rate": 2.495111111111111e-06, + "loss": 0.0891, + "step": 44400 + }, + { + "epoch": 4.809207326957368, + "grad_norm": 4.512660503387451, + "learning_rate": 2.4506666666666667e-06, + "loss": 0.0938, + "step": 44500 + }, + { + "epoch": 4.809207326957368, + "eval_loss": 0.18895868957042694, + "eval_runtime": 4201.807, + "eval_samples_per_second": 4.404, + "eval_steps_per_second": 1.101, + "eval_wer": 0.158172960376606, + "step": 44500 + }, + { + "epoch": 4.820014048738315, + "grad_norm": 5.174787998199463, + "learning_rate": 2.4062222222222223e-06, + "loss": 0.0925, + "step": 44600 + }, + { + "epoch": 4.830820770519263, + "grad_norm": 6.067021369934082, + "learning_rate": 2.361777777777778e-06, + "loss": 0.0917, + "step": 44700 + }, + { + "epoch": 4.84162749230021, + "grad_norm": 7.221127033233643, + "learning_rate": 2.3173333333333336e-06, + "loss": 0.1004, + "step": 44800 + }, + { + "epoch": 4.852434214081159, + "grad_norm": 6.763819217681885, + "learning_rate": 2.2728888888888893e-06, + "loss": 0.0934, + "step": 44900 + }, + { + "epoch": 4.863240935862106, + "grad_norm": 6.756659030914307, + "learning_rate": 2.228444444444445e-06, + "loss": 0.0925, + "step": 45000 + }, + { + "epoch": 4.863240935862106, + "eval_loss": 0.18888415396213531, + "eval_runtime": 4200.6639, + "eval_samples_per_second": 4.405, + "eval_steps_per_second": 1.101, + "eval_wer": 0.15939286356634005, + "step": 45000 + }, + { + "epoch": 4.874047657643054, + "grad_norm": 5.333858966827393, + "learning_rate": 2.184e-06, + "loss": 0.0937, + "step": 45100 + }, + { + "epoch": 4.884854379424001, + "grad_norm": 5.267432689666748, + "learning_rate": 2.139555555555556e-06, + "loss": 0.1001, + "step": 45200 + }, + { + "epoch": 4.89566110120495, + "grad_norm": 6.311913967132568, + "learning_rate": 2.0951111111111115e-06, + "loss": 0.09, + "step": 45300 + }, + { + "epoch": 4.906467822985897, + "grad_norm": 8.137591361999512, + "learning_rate": 2.0506666666666667e-06, + "loss": 0.0918, + "step": 45400 + }, + { + "epoch": 4.917274544766845, + "grad_norm": 8.778470993041992, + "learning_rate": 2.0062222222222224e-06, + "loss": 0.0943, + "step": 45500 + }, + { + "epoch": 4.917274544766845, + "eval_loss": 0.18823765218257904, + "eval_runtime": 4132.8456, + "eval_samples_per_second": 4.477, + "eval_steps_per_second": 1.119, + "eval_wer": 0.15777414587226987, + "step": 45500 + }, + { + "epoch": 4.928081266547792, + "grad_norm": 6.091644287109375, + "learning_rate": 1.961777777777778e-06, + "loss": 0.0917, + "step": 45600 + }, + { + "epoch": 4.938887988328741, + "grad_norm": 4.328017234802246, + "learning_rate": 1.9173333333333337e-06, + "loss": 0.0931, + "step": 45700 + }, + { + "epoch": 4.949694710109688, + "grad_norm": 6.655763626098633, + "learning_rate": 1.8728888888888891e-06, + "loss": 0.0909, + "step": 45800 + }, + { + "epoch": 4.960501431890636, + "grad_norm": 4.749749183654785, + "learning_rate": 1.8284444444444445e-06, + "loss": 0.0899, + "step": 45900 + }, + { + "epoch": 4.9713081536715835, + "grad_norm": 5.6424455642700195, + "learning_rate": 1.7840000000000002e-06, + "loss": 0.0918, + "step": 46000 + }, + { + "epoch": 4.9713081536715835, + "eval_loss": 0.18786819279193878, + "eval_runtime": 4143.7949, + "eval_samples_per_second": 4.465, + "eval_steps_per_second": 1.116, + "eval_wer": 0.1583919173593788, + "step": 46000 + }, + { + "epoch": 4.982114875452531, + "grad_norm": 5.470109462738037, + "learning_rate": 1.74e-06, + "loss": 0.0994, + "step": 46100 + }, + { + "epoch": 4.9929215972334795, + "grad_norm": 5.171388149261475, + "learning_rate": 1.6955555555555555e-06, + "loss": 0.0943, + "step": 46200 + }, + { + "epoch": 5.0036742854055225, + "grad_norm": 6.812433242797852, + "learning_rate": 1.6511111111111112e-06, + "loss": 0.0892, + "step": 46300 + }, + { + "epoch": 5.01448100718647, + "grad_norm": 3.1371068954467773, + "learning_rate": 1.606666666666667e-06, + "loss": 0.0758, + "step": 46400 + }, + { + "epoch": 5.025287728967418, + "grad_norm": 3.814182758331299, + "learning_rate": 1.5622222222222225e-06, + "loss": 0.0791, + "step": 46500 + }, + { + "epoch": 5.025287728967418, + "eval_loss": 0.18768277764320374, + "eval_runtime": 4051.6008, + "eval_samples_per_second": 4.567, + "eval_steps_per_second": 1.142, + "eval_wer": 0.15599121044112013, + "step": 46500 + }, + { + "epoch": 5.036094450748365, + "grad_norm": 5.319475173950195, + "learning_rate": 1.5177777777777781e-06, + "loss": 0.0759, + "step": 46600 + }, + { + "epoch": 5.046901172529314, + "grad_norm": 7.009033679962158, + "learning_rate": 1.4733333333333336e-06, + "loss": 0.0757, + "step": 46700 + }, + { + "epoch": 5.057707894310261, + "grad_norm": 4.960785865783691, + "learning_rate": 1.4288888888888892e-06, + "loss": 0.0835, + "step": 46800 + }, + { + "epoch": 5.068514616091209, + "grad_norm": 3.7821145057678223, + "learning_rate": 1.3844444444444446e-06, + "loss": 0.0775, + "step": 46900 + }, + { + "epoch": 5.079321337872156, + "grad_norm": 5.3354668617248535, + "learning_rate": 1.34e-06, + "loss": 0.077, + "step": 47000 + }, + { + "epoch": 5.079321337872156, + "eval_loss": 0.18772615492343903, + "eval_runtime": 4041.9986, + "eval_samples_per_second": 4.578, + "eval_steps_per_second": 1.144, + "eval_wer": 0.15872035283353794, + "step": 47000 + }, + { + "epoch": 5.090128059653105, + "grad_norm": 4.3733320236206055, + "learning_rate": 1.2955555555555557e-06, + "loss": 0.0809, + "step": 47100 + }, + { + "epoch": 5.100934781434052, + "grad_norm": 2.8010852336883545, + "learning_rate": 1.2511111111111112e-06, + "loss": 0.0733, + "step": 47200 + }, + { + "epoch": 5.111741503215, + "grad_norm": 10.156082153320312, + "learning_rate": 1.2066666666666668e-06, + "loss": 0.0736, + "step": 47300 + }, + { + "epoch": 5.122548224995947, + "grad_norm": 8.13224983215332, + "learning_rate": 1.1622222222222223e-06, + "loss": 0.0797, + "step": 47400 + }, + { + "epoch": 5.133354946776895, + "grad_norm": 3.623875856399536, + "learning_rate": 1.117777777777778e-06, + "loss": 0.0769, + "step": 47500 + }, + { + "epoch": 5.133354946776895, + "eval_loss": 0.18783515691757202, + "eval_runtime": 4090.0533, + "eval_samples_per_second": 4.524, + "eval_steps_per_second": 1.131, + "eval_wer": 0.15967437968704792, + "step": 47500 + }, + { + "epoch": 5.144161668557843, + "grad_norm": 4.278363227844238, + "learning_rate": 1.0733333333333334e-06, + "loss": 0.0765, + "step": 47600 + }, + { + "epoch": 5.154968390338791, + "grad_norm": 5.6777191162109375, + "learning_rate": 1.028888888888889e-06, + "loss": 0.074, + "step": 47700 + }, + { + "epoch": 5.165775112119738, + "grad_norm": 5.946367263793945, + "learning_rate": 9.844444444444445e-07, + "loss": 0.0777, + "step": 47800 + }, + { + "epoch": 5.176581833900686, + "grad_norm": 6.455644607543945, + "learning_rate": 9.400000000000001e-07, + "loss": 0.0804, + "step": 47900 + }, + { + "epoch": 5.187388555681634, + "grad_norm": 4.086187839508057, + "learning_rate": 8.955555555555557e-07, + "loss": 0.0744, + "step": 48000 + }, + { + "epoch": 5.187388555681634, + "eval_loss": 0.18759387731552124, + "eval_runtime": 4109.5273, + "eval_samples_per_second": 4.503, + "eval_steps_per_second": 1.126, + "eval_wer": 0.15845447649731387, + "step": 48000 + }, + { + "epoch": 5.198195277462582, + "grad_norm": 5.9285736083984375, + "learning_rate": 8.511111111111112e-07, + "loss": 0.0775, + "step": 48100 + }, + { + "epoch": 5.2090019992435295, + "grad_norm": 4.34613037109375, + "learning_rate": 8.066666666666667e-07, + "loss": 0.0772, + "step": 48200 + }, + { + "epoch": 5.219808721024477, + "grad_norm": 5.6380109786987305, + "learning_rate": 7.622222222222223e-07, + "loss": 0.0736, + "step": 48300 + }, + { + "epoch": 5.230615442805425, + "grad_norm": 6.854168891906738, + "learning_rate": 7.177777777777778e-07, + "loss": 0.0748, + "step": 48400 + }, + { + "epoch": 5.241422164586373, + "grad_norm": 5.549808502197266, + "learning_rate": 6.733333333333334e-07, + "loss": 0.0775, + "step": 48500 + }, + { + "epoch": 5.241422164586373, + "eval_loss": 0.18737368285655975, + "eval_runtime": 4061.569, + "eval_samples_per_second": 4.556, + "eval_steps_per_second": 1.139, + "eval_wer": 0.1595258017344521, + "step": 48500 + }, + { + "epoch": 5.2522288863673205, + "grad_norm": 4.706333160400391, + "learning_rate": 6.288888888888889e-07, + "loss": 0.0804, + "step": 48600 + }, + { + "epoch": 5.263035608148268, + "grad_norm": 6.192058563232422, + "learning_rate": 5.844444444444445e-07, + "loss": 0.0727, + "step": 48700 + }, + { + "epoch": 5.273842329929216, + "grad_norm": 6.242217540740967, + "learning_rate": 5.4e-07, + "loss": 0.0779, + "step": 48800 + }, + { + "epoch": 5.284649051710164, + "grad_norm": 4.3639326095581055, + "learning_rate": 4.955555555555556e-07, + "loss": 0.0763, + "step": 48900 + }, + { + "epoch": 5.295455773491112, + "grad_norm": 4.59783411026001, + "learning_rate": 4.511111111111111e-07, + "loss": 0.069, + "step": 49000 + }, + { + "epoch": 5.295455773491112, + "eval_loss": 0.1873014122247696, + "eval_runtime": 4056.6992, + "eval_samples_per_second": 4.561, + "eval_steps_per_second": 1.14, + "eval_wer": 0.15788362436365627, + "step": 49000 + }, + { + "epoch": 5.306262495272059, + "grad_norm": 2.637141227722168, + "learning_rate": 4.0666666666666666e-07, + "loss": 0.0755, + "step": 49100 + }, + { + "epoch": 5.317069217053007, + "grad_norm": 4.836416244506836, + "learning_rate": 3.622222222222223e-07, + "loss": 0.0792, + "step": 49200 + }, + { + "epoch": 5.327875938833955, + "grad_norm": 3.9841196537017822, + "learning_rate": 3.177777777777778e-07, + "loss": 0.0779, + "step": 49300 + }, + { + "epoch": 5.338682660614903, + "grad_norm": 5.457947731018066, + "learning_rate": 2.7333333333333335e-07, + "loss": 0.0745, + "step": 49400 + }, + { + "epoch": 5.34948938239585, + "grad_norm": 2.749351978302002, + "learning_rate": 2.2888888888888892e-07, + "loss": 0.0761, + "step": 49500 + }, + { + "epoch": 5.34948938239585, + "eval_loss": 0.18704187870025635, + "eval_runtime": 4067.5033, + "eval_samples_per_second": 4.549, + "eval_steps_per_second": 1.137, + "eval_wer": 0.15749262975156203, + "step": 49500 + }, + { + "epoch": 5.360404171394608, + "grad_norm": 4.275832653045654, + "learning_rate": 1.8444444444444446e-07, + "loss": 0.0745, + "step": 49600 + }, + { + "epoch": 5.371210893175555, + "grad_norm": 4.123929023742676, + "learning_rate": 1.4e-07, + "loss": 0.0791, + "step": 49700 + }, + { + "epoch": 5.382017614956503, + "grad_norm": 4.660633563995361, + "learning_rate": 9.555555555555556e-08, + "loss": 0.0788, + "step": 49800 + }, + { + "epoch": 5.39282433673745, + "grad_norm": 7.454755783081055, + "learning_rate": 5.111111111111112e-08, + "loss": 0.0782, + "step": 49900 + }, + { + "epoch": 5.403631058518398, + "grad_norm": 5.680870056152344, + "learning_rate": 6.666666666666667e-09, + "loss": 0.0711, + "step": 50000 + }, + { + "epoch": 5.403631058518398, + "eval_loss": 0.1868782788515091, + "eval_runtime": 3402.9501, + "eval_samples_per_second": 5.438, + "eval_steps_per_second": 1.359, + "eval_wer": 0.15829025876023428, + "step": 50000 + }, + { + "epoch": 5.403631058518398, + "step": 50000, + "total_flos": 1.969520657154048e+19, + "train_loss": 0.0007634645557403565, + "train_runtime": 4719.3215, + "train_samples_per_second": 169.516, + "train_steps_per_second": 10.595 + } + ], + "logging_steps": 100, + "max_steps": 50000, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.969520657154048e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}