diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,5513 +1,7485 @@ { - "best_global_step": 38630, - "best_metric": 0.8082292432035268, - "best_model_checkpoint": "./whisper-urdu-base-finetuned/checkpoint-38630", - "epoch": 10.0, + "best_global_step": 17385, + "best_metric": 29.814983318168032, + "best_model_checkpoint": "./whisper-urdu-base-Finetuned/checkpoint-17385", + "epoch": 15.0, "eval_steps": 500, - "global_step": 38630, + "global_step": 52155, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.012943308309603935, - "grad_norm": 2.339423418045044, - "learning_rate": 1.2684442143411856e-07, - "loss": 0.0468, + "epoch": 0.014380212827149842, + "grad_norm": 23.471492767333984, + "learning_rate": 8.819018404907975e-08, + "loss": 1.4892, "step": 50 }, { - "epoch": 0.02588661661920787, - "grad_norm": 4.33876371383667, - "learning_rate": 2.5627750453015794e-07, - "loss": 0.0428, + "epoch": 0.028760425654299683, + "grad_norm": 16.729503631591797, + "learning_rate": 1.8404907975460125e-07, + "loss": 1.5035, "step": 100 }, { - "epoch": 0.03882992492881181, - "grad_norm": 4.59096622467041, - "learning_rate": 3.8571058762619726e-07, - "loss": 0.054, + "epoch": 0.04314063848144953, + "grad_norm": 13.223711967468262, + "learning_rate": 2.7990797546012273e-07, + "loss": 1.4444, "step": 150 }, { - "epoch": 0.05177323323841574, - "grad_norm": 5.390439033508301, - "learning_rate": 5.151436707222367e-07, - "loss": 0.0454, + "epoch": 0.057520851308599366, + "grad_norm": 14.540345191955566, + "learning_rate": 3.757668711656442e-07, + "loss": 1.3724, "step": 200 }, { - "epoch": 0.06471654154801967, - "grad_norm": 3.9699249267578125, - "learning_rate": 6.44576753818276e-07, - "loss": 0.03, + "epoch": 0.0719010641357492, + "grad_norm": 16.051429748535156, + "learning_rate": 4.7162576687116565e-07, + "loss": 1.2672, "step": 250 }, { - "epoch": 0.07765984985762361, - "grad_norm": 5.007837295532227, - "learning_rate": 7.740098369143153e-07, - "loss": 0.0364, + "epoch": 0.08628127696289906, + "grad_norm": 14.352593421936035, + "learning_rate": 5.674846625766872e-07, + "loss": 1.2827, "step": 300 }, { - "epoch": 0.09060315816722754, - "grad_norm": 1.7915890216827393, - "learning_rate": 9.034429200103548e-07, - "loss": 0.033, + "epoch": 0.1006614897900489, + "grad_norm": 14.53902816772461, + "learning_rate": 6.633435582822087e-07, + "loss": 1.2681, "step": 350 }, { - "epoch": 0.10354646647683148, - "grad_norm": 1.8916817903518677, - "learning_rate": 1.032876003106394e-06, - "loss": 0.0453, + "epoch": 0.11504170261719873, + "grad_norm": 15.994046211242676, + "learning_rate": 7.592024539877302e-07, + "loss": 1.1566, "step": 400 }, { - "epoch": 0.11648977478643541, - "grad_norm": 5.6407318115234375, - "learning_rate": 1.1623090862024335e-06, - "loss": 0.0493, + "epoch": 0.12942191544434858, + "grad_norm": 14.95603084564209, + "learning_rate": 8.550613496932516e-07, + "loss": 1.1417, "step": 450 }, { - "epoch": 0.12943308309603935, - "grad_norm": 1.5208466053009033, - "learning_rate": 1.2917421692984728e-06, - "loss": 0.0457, + "epoch": 0.1438021282714984, + "grad_norm": 16.78563690185547, + "learning_rate": 9.509202453987732e-07, + "loss": 1.0313, "step": 500 }, { - "epoch": 0.1423763914056433, - "grad_norm": 0.9354443550109863, - "learning_rate": 1.4211752523945122e-06, - "loss": 0.0486, + "epoch": 0.15818234109864826, + "grad_norm": 15.35020923614502, + "learning_rate": 1.0467791411042947e-06, + "loss": 1.0333, "step": 550 }, { - "epoch": 0.15531969971524723, - "grad_norm": 3.7830538749694824, - "learning_rate": 1.5506083354905516e-06, - "loss": 0.0377, + "epoch": 0.1725625539257981, + "grad_norm": 16.852476119995117, + "learning_rate": 1.1426380368098162e-06, + "loss": 0.9992, "step": 600 }, { - "epoch": 0.16826300802485114, - "grad_norm": 1.2487603425979614, - "learning_rate": 1.67745275692467e-06, - "loss": 0.069, + "epoch": 0.18694276675294794, + "grad_norm": 15.992314338684082, + "learning_rate": 1.2384969325153376e-06, + "loss": 1.0123, "step": 650 }, { - "epoch": 0.18120631633445508, - "grad_norm": 0.8733393549919128, - "learning_rate": 1.8068858400207096e-06, - "loss": 0.0457, + "epoch": 0.2013229795800978, + "grad_norm": 13.733108520507812, + "learning_rate": 1.3343558282208591e-06, + "loss": 0.9268, "step": 700 }, { - "epoch": 0.19414962464405902, - "grad_norm": 3.48146390914917, - "learning_rate": 1.936318923116749e-06, - "loss": 0.0372, + "epoch": 0.21570319240724764, + "grad_norm": 12.484084129333496, + "learning_rate": 1.4302147239263805e-06, + "loss": 0.9396, "step": 750 }, { - "epoch": 0.20709293295366296, - "grad_norm": 3.9726881980895996, - "learning_rate": 2.065752006212788e-06, - "loss": 0.0493, + "epoch": 0.23008340523439746, + "grad_norm": 16.921466827392578, + "learning_rate": 1.526073619631902e-06, + "loss": 0.9088, "step": 800 }, { - "epoch": 0.2200362412632669, - "grad_norm": 1.6841055154800415, - "learning_rate": 2.1951850893088276e-06, - "loss": 0.046, + "epoch": 0.24446361806154732, + "grad_norm": 13.716622352600098, + "learning_rate": 1.6219325153374236e-06, + "loss": 0.8569, "step": 850 }, { - "epoch": 0.23297954957287081, - "grad_norm": 1.1303517818450928, - "learning_rate": 2.324618172404867e-06, - "loss": 0.0484, + "epoch": 0.25884383088869717, + "grad_norm": 10.53323745727539, + "learning_rate": 1.717791411042945e-06, + "loss": 0.907, "step": 900 }, { - "epoch": 0.24592285788247475, - "grad_norm": 1.6165255308151245, - "learning_rate": 2.4540512555009063e-06, - "loss": 0.0617, + "epoch": 0.273224043715847, + "grad_norm": 16.710988998413086, + "learning_rate": 1.8136503067484665e-06, + "loss": 0.8163, "step": 950 }, { - "epoch": 0.2588661661920787, - "grad_norm": 4.751931667327881, - "learning_rate": 2.5834843385969457e-06, - "loss": 0.0516, + "epoch": 0.2876042565429968, + "grad_norm": 12.834067344665527, + "learning_rate": 1.909509202453988e-06, + "loss": 0.861, "step": 1000 }, { - "epoch": 0.27180947450168264, - "grad_norm": 2.619171142578125, - "learning_rate": 2.712917421692985e-06, - "loss": 0.0491, + "epoch": 0.30198446937014667, + "grad_norm": 13.926976203918457, + "learning_rate": 2.0053680981595094e-06, + "loss": 0.8421, "step": 1050 }, { - "epoch": 0.2847527828112866, - "grad_norm": 3.5204107761383057, - "learning_rate": 2.8423505047890244e-06, - "loss": 0.0451, + "epoch": 0.3163646821972965, + "grad_norm": 11.775642395019531, + "learning_rate": 2.101226993865031e-06, + "loss": 0.8082, "step": 1100 }, { - "epoch": 0.2976960911208905, - "grad_norm": 1.6330580711364746, - "learning_rate": 2.971783587885064e-06, - "loss": 0.0329, + "epoch": 0.3307448950244464, + "grad_norm": 9.644237518310547, + "learning_rate": 2.195168711656442e-06, + "loss": 0.8467, "step": 1150 }, { - "epoch": 0.31063939943049446, - "grad_norm": 9.043530464172363, - "learning_rate": 3.101216670981103e-06, - "loss": 0.0528, + "epoch": 0.3451251078515962, + "grad_norm": 11.889148712158203, + "learning_rate": 2.2910276073619635e-06, + "loss": 0.8592, "step": 1200 }, { - "epoch": 0.3235827077400984, - "grad_norm": 4.8967604637146, - "learning_rate": 3.2306497540771426e-06, - "loss": 0.052, + "epoch": 0.359505320678746, + "grad_norm": 13.074509620666504, + "learning_rate": 2.3868865030674846e-06, + "loss": 0.8304, "step": 1250 }, { - "epoch": 0.3365260160497023, - "grad_norm": 2.9386487007141113, - "learning_rate": 3.360082837173182e-06, - "loss": 0.0431, + "epoch": 0.3738855335058959, + "grad_norm": 11.115412712097168, + "learning_rate": 2.482745398773006e-06, + "loss": 0.8083, "step": 1300 }, { - "epoch": 0.3494693243593062, - "grad_norm": 3.271523952484131, - "learning_rate": 3.4895159202692213e-06, - "loss": 0.0435, + "epoch": 0.3882657463330457, + "grad_norm": 11.341413497924805, + "learning_rate": 2.5786042944785278e-06, + "loss": 0.7475, "step": 1350 }, { - "epoch": 0.36241263266891016, - "grad_norm": 3.5081090927124023, - "learning_rate": 3.6189490033652603e-06, - "loss": 0.0571, + "epoch": 0.4026459591601956, + "grad_norm": 15.837775230407715, + "learning_rate": 2.6744631901840497e-06, + "loss": 0.8191, "step": 1400 }, { - "epoch": 0.3753559409785141, - "grad_norm": 5.796409606933594, - "learning_rate": 3.7483820864612997e-06, - "loss": 0.0529, + "epoch": 0.41702617198734543, + "grad_norm": 9.811896324157715, + "learning_rate": 2.770322085889571e-06, + "loss": 0.8058, "step": 1450 }, { - "epoch": 0.38829924928811804, - "grad_norm": 3.4982869625091553, - "learning_rate": 3.877815169557339e-06, - "loss": 0.0566, + "epoch": 0.4314063848144953, + "grad_norm": 17.052064895629883, + "learning_rate": 2.8661809815950924e-06, + "loss": 0.8048, "step": 1500 }, { - "epoch": 0.401242557597722, - "grad_norm": 4.461122035980225, - "learning_rate": 4.007248252653379e-06, - "loss": 0.0562, + "epoch": 0.4457865976416451, + "grad_norm": 12.882123947143555, + "learning_rate": 2.9620398773006136e-06, + "loss": 0.7975, "step": 1550 }, { - "epoch": 0.4141858659073259, - "grad_norm": 3.034180164337158, - "learning_rate": 4.136681335749418e-06, - "loss": 0.0466, + "epoch": 0.46016681046879493, + "grad_norm": 10.48272705078125, + "learning_rate": 3.0578987730061356e-06, + "loss": 0.7632, "step": 1600 }, { - "epoch": 0.42712917421692986, - "grad_norm": 5.0971598625183105, - "learning_rate": 4.266114418845458e-06, - "loss": 0.0501, + "epoch": 0.4745470232959448, + "grad_norm": 10.434325218200684, + "learning_rate": 3.1537576687116567e-06, + "loss": 0.741, "step": 1650 }, { - "epoch": 0.4400724825265338, - "grad_norm": 5.404353618621826, - "learning_rate": 4.3955475019414965e-06, - "loss": 0.0424, + "epoch": 0.48892723612309463, + "grad_norm": 10.206270217895508, + "learning_rate": 3.2496165644171783e-06, + "loss": 0.7092, "step": 1700 }, { - "epoch": 0.45301579083613774, - "grad_norm": 2.975407838821411, - "learning_rate": 4.524980585037536e-06, - "loss": 0.0562, + "epoch": 0.5033074489502445, + "grad_norm": 10.550788879394531, + "learning_rate": 3.3454754601227e-06, + "loss": 0.8114, "step": 1750 }, { - "epoch": 0.46595909914574163, - "grad_norm": 8.076371192932129, - "learning_rate": 4.654413668133575e-06, - "loss": 0.0377, + "epoch": 0.5176876617773943, + "grad_norm": 16.394256591796875, + "learning_rate": 3.4413343558282214e-06, + "loss": 0.693, "step": 1800 }, { - "epoch": 0.47890240745534557, - "grad_norm": 5.4830451011657715, - "learning_rate": 4.781258089567694e-06, - "loss": 0.0436, + "epoch": 0.5320678746045442, + "grad_norm": 11.813963890075684, + "learning_rate": 3.5371932515337425e-06, + "loss": 0.7206, "step": 1850 }, { - "epoch": 0.4918457157649495, - "grad_norm": 2.6184751987457275, - "learning_rate": 4.910691172663733e-06, - "loss": 0.0414, + "epoch": 0.546448087431694, + "grad_norm": 9.364368438720703, + "learning_rate": 3.6330521472392645e-06, + "loss": 0.724, "step": 1900 }, { - "epoch": 0.5047890240745535, - "grad_norm": 4.777491092681885, - "learning_rate": 5.040124255759773e-06, - "loss": 0.0469, + "epoch": 0.5608283002588438, + "grad_norm": 12.862541198730469, + "learning_rate": 3.7289110429447857e-06, + "loss": 0.7655, "step": 1950 }, { - "epoch": 0.5177323323841574, - "grad_norm": 8.820015907287598, - "learning_rate": 5.169557338855812e-06, - "loss": 0.0654, + "epoch": 0.5752085130859936, + "grad_norm": 13.642607688903809, + "learning_rate": 3.824769938650307e-06, + "loss": 0.7057, "step": 2000 }, { - "epoch": 0.5306756406937613, - "grad_norm": 4.339293003082275, - "learning_rate": 5.298990421951852e-06, - "loss": 0.0473, + "epoch": 0.5895887259131435, + "grad_norm": 11.985353469848633, + "learning_rate": 3.920628834355829e-06, + "loss": 0.6694, "step": 2050 }, { - "epoch": 0.5436189490033653, - "grad_norm": 7.162559509277344, - "learning_rate": 5.428423505047891e-06, - "loss": 0.0473, + "epoch": 0.6039689387402933, + "grad_norm": 11.238410949707031, + "learning_rate": 4.01648773006135e-06, + "loss": 0.7038, "step": 2100 }, { - "epoch": 0.5565622573129692, - "grad_norm": 7.811385154724121, - "learning_rate": 5.557856588143931e-06, - "loss": 0.0516, + "epoch": 0.6183491515674432, + "grad_norm": 9.400872230529785, + "learning_rate": 4.112346625766872e-06, + "loss": 0.6472, "step": 2150 }, { - "epoch": 0.5695055656225732, - "grad_norm": 2.8982834815979004, - "learning_rate": 5.68728967123997e-06, - "loss": 0.0497, + "epoch": 0.632729364394593, + "grad_norm": 12.408802032470703, + "learning_rate": 4.2082055214723935e-06, + "loss": 0.6344, "step": 2200 }, { - "epoch": 0.582448873932177, - "grad_norm": 4.9137749671936035, - "learning_rate": 5.8167227543360085e-06, - "loss": 0.0613, + "epoch": 0.6471095772217429, + "grad_norm": 11.850605010986328, + "learning_rate": 4.304064417177914e-06, + "loss": 0.6805, "step": 2250 }, { - "epoch": 0.595392182241781, - "grad_norm": 4.650545597076416, - "learning_rate": 5.946155837432048e-06, - "loss": 0.0475, + "epoch": 0.6614897900488927, + "grad_norm": 10.33031177520752, + "learning_rate": 4.399923312883437e-06, + "loss": 0.666, "step": 2300 }, { - "epoch": 0.6083354905513849, - "grad_norm": 6.333342552185059, - "learning_rate": 6.075588920528087e-06, - "loss": 0.0793, + "epoch": 0.6758700028760426, + "grad_norm": 12.607449531555176, + "learning_rate": 4.495782208588957e-06, + "loss": 0.6406, "step": 2350 }, { - "epoch": 0.6212787988609889, - "grad_norm": 5.444154739379883, - "learning_rate": 6.205022003624127e-06, - "loss": 0.0573, + "epoch": 0.6902502157031924, + "grad_norm": 9.545390129089355, + "learning_rate": 4.591641104294479e-06, + "loss": 0.616, "step": 2400 }, { - "epoch": 0.6342221071705928, - "grad_norm": 1.8880912065505981, - "learning_rate": 6.334455086720166e-06, - "loss": 0.0409, + "epoch": 0.7046304285303423, + "grad_norm": 10.442790985107422, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.6494, "step": 2450 }, { - "epoch": 0.6471654154801968, - "grad_norm": 5.30678653717041, - "learning_rate": 6.463888169816206e-06, - "loss": 0.0546, + "epoch": 0.719010641357492, + "grad_norm": 14.192750930786133, + "learning_rate": 4.783358895705522e-06, + "loss": 0.6867, "step": 2500 }, { - "epoch": 0.6601087237898007, - "grad_norm": 11.33090591430664, - "learning_rate": 6.593321252912245e-06, - "loss": 0.055, + "epoch": 0.7333908541846419, + "grad_norm": 11.300060272216797, + "learning_rate": 4.8792177914110436e-06, + "loss": 0.6188, "step": 2550 }, { - "epoch": 0.6730520320994046, - "grad_norm": 2.6539409160614014, - "learning_rate": 6.722754336008285e-06, - "loss": 0.0466, + "epoch": 0.7477710670117917, + "grad_norm": 10.002638816833496, + "learning_rate": 4.975076687116564e-06, + "loss": 0.6386, "step": 2600 }, { - "epoch": 0.6859953404090086, - "grad_norm": 5.582194805145264, - "learning_rate": 6.8521874191043236e-06, - "loss": 0.0559, + "epoch": 0.7621512798389416, + "grad_norm": 12.050922393798828, + "learning_rate": 5.070935582822087e-06, + "loss": 0.6755, "step": 2650 }, { - "epoch": 0.6989386487186124, - "grad_norm": 4.70722770690918, - "learning_rate": 6.9816205022003625e-06, - "loss": 0.0505, + "epoch": 0.7765314926660914, + "grad_norm": 14.300544738769531, + "learning_rate": 5.166794478527608e-06, + "loss": 0.6353, "step": 2700 }, { - "epoch": 0.7118819570282164, - "grad_norm": 2.8403987884521484, - "learning_rate": 7.111053585296402e-06, - "loss": 0.0508, + "epoch": 0.7909117054932413, + "grad_norm": 10.415207862854004, + "learning_rate": 5.262653374233129e-06, + "loss": 0.6137, "step": 2750 }, { - "epoch": 0.7248252653378203, - "grad_norm": 9.0087890625, - "learning_rate": 7.240486668392441e-06, - "loss": 0.0793, + "epoch": 0.8052919183203912, + "grad_norm": 13.669732093811035, + "learning_rate": 5.3585122699386505e-06, + "loss": 0.6029, "step": 2800 }, { - "epoch": 0.7377685736474243, - "grad_norm": 5.515995025634766, - "learning_rate": 7.369919751488481e-06, - "loss": 0.0635, + "epoch": 0.819672131147541, + "grad_norm": 13.034013748168945, + "learning_rate": 5.454371165644172e-06, + "loss": 0.6157, "step": 2850 }, { - "epoch": 0.7507118819570282, - "grad_norm": 3.9393441677093506, - "learning_rate": 7.49935283458452e-06, - "loss": 0.0509, + "epoch": 0.8340523439746909, + "grad_norm": 15.495798110961914, + "learning_rate": 5.550230061349694e-06, + "loss": 0.6618, "step": 2900 }, { - "epoch": 0.7636551902666322, - "grad_norm": 5.0234599113464355, - "learning_rate": 7.62878591768056e-06, - "loss": 0.0528, + "epoch": 0.8484325568018407, + "grad_norm": 11.731856346130371, + "learning_rate": 5.646088957055214e-06, + "loss": 0.6095, "step": 2950 }, { - "epoch": 0.7765984985762361, - "grad_norm": 2.9625911712646484, - "learning_rate": 7.758219000776599e-06, - "loss": 0.0435, + "epoch": 0.8628127696289906, + "grad_norm": 12.040966987609863, + "learning_rate": 5.741947852760737e-06, + "loss": 0.592, "step": 3000 }, { - "epoch": 0.78954180688584, - "grad_norm": 5.779876708984375, - "learning_rate": 7.88765208387264e-06, - "loss": 0.0522, + "epoch": 0.8771929824561403, + "grad_norm": 9.733925819396973, + "learning_rate": 5.837806748466258e-06, + "loss": 0.5719, "step": 3050 }, { - "epoch": 0.802485115195444, - "grad_norm": 1.6520557403564453, - "learning_rate": 8.017085166968678e-06, - "loss": 0.0481, + "epoch": 0.8915731952832902, + "grad_norm": 10.204453468322754, + "learning_rate": 5.93366564417178e-06, + "loss": 0.6326, "step": 3100 }, { - "epoch": 0.8154284235050479, - "grad_norm": 10.486522674560547, - "learning_rate": 8.146518250064717e-06, - "loss": 0.0571, + "epoch": 0.90595340811044, + "grad_norm": 9.83051872253418, + "learning_rate": 6.029524539877301e-06, + "loss": 0.6055, "step": 3150 }, { - "epoch": 0.8283717318146518, - "grad_norm": 5.2048139572143555, - "learning_rate": 8.275951333160756e-06, - "loss": 0.0652, + "epoch": 0.9203336209375899, + "grad_norm": 9.672574996948242, + "learning_rate": 6.125383435582822e-06, + "loss": 0.6185, "step": 3200 }, { - "epoch": 0.8413150401242557, - "grad_norm": 6.738128185272217, - "learning_rate": 8.405384416256795e-06, - "loss": 0.0732, + "epoch": 0.9347138337647397, + "grad_norm": 11.126357078552246, + "learning_rate": 6.221242331288344e-06, + "loss": 0.6148, "step": 3250 }, { - "epoch": 0.8542583484338597, - "grad_norm": 10.937406539916992, - "learning_rate": 8.534817499352836e-06, - "loss": 0.0928, + "epoch": 0.9490940465918896, + "grad_norm": 11.742711067199707, + "learning_rate": 6.317101226993866e-06, + "loss": 0.5618, "step": 3300 }, { - "epoch": 0.8672016567434636, - "grad_norm": 6.5531182289123535, - "learning_rate": 8.664250582448875e-06, - "loss": 0.0716, + "epoch": 0.9634742594190394, + "grad_norm": 12.149309158325195, + "learning_rate": 6.412960122699387e-06, + "loss": 0.5703, "step": 3350 }, { - "epoch": 0.8801449650530676, - "grad_norm": 9.179738998413086, - "learning_rate": 8.793683665544914e-06, - "loss": 0.0589, + "epoch": 0.9778544722461893, + "grad_norm": 12.387944221496582, + "learning_rate": 6.5088190184049084e-06, + "loss": 0.5643, "step": 3400 }, { - "epoch": 0.8930882733626715, - "grad_norm": 3.9006974697113037, - "learning_rate": 8.923116748640953e-06, - "loss": 0.0626, + "epoch": 0.9922346850733391, + "grad_norm": 12.418594360351562, + "learning_rate": 6.60467791411043e-06, + "loss": 0.5775, "step": 3450 }, { - "epoch": 0.9060315816722755, - "grad_norm": 1.9278124570846558, - "learning_rate": 9.052549831736993e-06, - "loss": 0.0668, + "epoch": 1.0, + "eval_cer": 18.3972824256867, + "eval_exact_match": 13.749595600129409, + "eval_loss": 0.5526189804077148, + "eval_runtime": 563.0464, + "eval_samples_per_second": 5.49, + "eval_steps_per_second": 0.687, + "eval_wer": 41.78040643008796, + "step": 3477 + }, + { + "epoch": 1.006614897900489, + "grad_norm": 9.082464218139648, + "learning_rate": 6.7005368098159516e-06, + "loss": 0.5456, "step": 3500 }, { - "epoch": 0.9189748899818794, - "grad_norm": 1.9330800771713257, - "learning_rate": 9.181982914833032e-06, - "loss": 0.0595, + "epoch": 1.0209951107276387, + "grad_norm": 11.163743019104004, + "learning_rate": 6.796395705521472e-06, + "loss": 0.5261, "step": 3550 }, { - "epoch": 0.9319181982914833, - "grad_norm": 6.718048572540283, - "learning_rate": 9.311415997929071e-06, - "loss": 0.0804, + "epoch": 1.0353753235547887, + "grad_norm": 10.806875228881836, + "learning_rate": 6.892254601226994e-06, + "loss": 0.4876, "step": 3600 }, { - "epoch": 0.9448615066010873, - "grad_norm": 6.918898582458496, - "learning_rate": 9.44084908102511e-06, - "loss": 0.0716, + "epoch": 1.0497555363819384, + "grad_norm": 9.591877937316895, + "learning_rate": 6.988113496932516e-06, + "loss": 0.506, "step": 3650 }, { - "epoch": 0.9578048149106911, - "grad_norm": 4.3917131423950195, - "learning_rate": 9.57028216412115e-06, - "loss": 0.0694, + "epoch": 1.0641357492090884, + "grad_norm": 8.583000183105469, + "learning_rate": 7.083972392638038e-06, + "loss": 0.4655, "step": 3700 }, { - "epoch": 0.9707481232202951, - "grad_norm": 11.285859107971191, - "learning_rate": 9.69971524721719e-06, - "loss": 0.0715, + "epoch": 1.0785159620362381, + "grad_norm": 13.249222755432129, + "learning_rate": 7.1798312883435585e-06, + "loss": 0.5047, "step": 3750 }, { - "epoch": 0.983691431529899, - "grad_norm": 4.574735641479492, - "learning_rate": 9.829148330313229e-06, - "loss": 0.0601, + "epoch": 1.092896174863388, + "grad_norm": 12.311944961547852, + "learning_rate": 7.27569018404908e-06, + "loss": 0.547, "step": 3800 }, { - "epoch": 0.996634739839503, - "grad_norm": 4.355649948120117, - "learning_rate": 9.955992751747347e-06, - "loss": 0.0735, + "epoch": 1.1072763876905378, + "grad_norm": 8.189698219299316, + "learning_rate": 7.371549079754602e-06, + "loss": 0.5139, "step": 3850 }, { - "epoch": 1.0, - "eval_loss": 0.07532492280006409, - "eval_runtime": 88.308, - "eval_samples_per_second": 5.662, - "eval_steps_per_second": 0.713, - "eval_wer": 9.278891571323607, - "step": 3863 - }, - { - "epoch": 1.009578048149107, - "grad_norm": 6.590569019317627, - "learning_rate": 9.990508240572958e-06, - "loss": 0.0535, + "epoch": 1.1216566005176876, + "grad_norm": 9.570950508117676, + "learning_rate": 7.467407975460123e-06, + "loss": 0.4389, "step": 3900 }, { - "epoch": 1.0225213564587108, - "grad_norm": 1.8211477994918823, - "learning_rate": 9.976126786895621e-06, - "loss": 0.0632, + "epoch": 1.1360368133448375, + "grad_norm": 10.758018493652344, + "learning_rate": 7.563266871165644e-06, + "loss": 0.4707, "step": 3950 }, { - "epoch": 1.0354646647683148, - "grad_norm": 6.469516754150391, - "learning_rate": 9.961745333218282e-06, - "loss": 0.0686, + "epoch": 1.1504170261719873, + "grad_norm": 8.146496772766113, + "learning_rate": 7.659125766871166e-06, + "loss": 0.4788, "step": 4000 }, { - "epoch": 1.0484079730779188, - "grad_norm": 8.549825668334961, - "learning_rate": 9.947363879540945e-06, - "loss": 0.0613, + "epoch": 1.1647972389991372, + "grad_norm": 10.104730606079102, + "learning_rate": 7.754984662576687e-06, + "loss": 0.4696, "step": 4050 }, { - "epoch": 1.0613512813875225, - "grad_norm": 6.740059852600098, - "learning_rate": 9.933270054937154e-06, - "loss": 0.0629, + "epoch": 1.179177451826287, + "grad_norm": 14.087817192077637, + "learning_rate": 7.85084355828221e-06, + "loss": 0.4656, "step": 4100 }, { - "epoch": 1.0742945896971265, - "grad_norm": 6.783405780792236, - "learning_rate": 9.918888601259817e-06, - "loss": 0.1005, + "epoch": 1.193557664653437, + "grad_norm": 13.676599502563477, + "learning_rate": 7.94670245398773e-06, + "loss": 0.5019, "step": 4150 }, { - "epoch": 1.0872378980067305, - "grad_norm": 3.628119707107544, - "learning_rate": 9.904507147582478e-06, - "loss": 0.0599, + "epoch": 1.2079378774805867, + "grad_norm": 10.928653717041016, + "learning_rate": 8.042561349693253e-06, + "loss": 0.46, "step": 4200 }, { - "epoch": 1.1001812063163345, - "grad_norm": 10.390827178955078, - "learning_rate": 9.89012569390514e-06, - "loss": 0.0713, + "epoch": 1.2223180903077366, + "grad_norm": 7.687665939331055, + "learning_rate": 8.138420245398773e-06, + "loss": 0.483, "step": 4250 }, { - "epoch": 1.1131245146259383, - "grad_norm": 4.781597137451172, - "learning_rate": 9.875744240227803e-06, - "loss": 0.0758, + "epoch": 1.2366983031348864, + "grad_norm": 12.936370849609375, + "learning_rate": 8.234279141104296e-06, + "loss": 0.5129, "step": 4300 }, { - "epoch": 1.1260678229355423, - "grad_norm": 2.381582498550415, - "learning_rate": 9.861362786550466e-06, - "loss": 0.0587, + "epoch": 1.2510785159620363, + "grad_norm": 10.121984481811523, + "learning_rate": 8.330138036809816e-06, + "loss": 0.4699, "step": 4350 }, { - "epoch": 1.1390111312451463, - "grad_norm": 3.9035913944244385, - "learning_rate": 9.846981332873127e-06, - "loss": 0.0482, + "epoch": 1.265458728789186, + "grad_norm": 13.878461837768555, + "learning_rate": 8.425996932515337e-06, + "loss": 0.4942, "step": 4400 }, { - "epoch": 1.1519544395547503, - "grad_norm": 3.4371120929718018, - "learning_rate": 9.83259987919579e-06, - "loss": 0.0584, + "epoch": 1.2798389416163358, + "grad_norm": 13.46450424194336, + "learning_rate": 8.52185582822086e-06, + "loss": 0.4761, "step": 4450 }, { - "epoch": 1.164897747864354, - "grad_norm": 5.601978302001953, - "learning_rate": 9.818218425518452e-06, - "loss": 0.0567, + "epoch": 1.2942191544434858, + "grad_norm": 10.65233325958252, + "learning_rate": 8.617714723926382e-06, + "loss": 0.5097, "step": 4500 }, { - "epoch": 1.177841056173958, - "grad_norm": 5.46005916595459, - "learning_rate": 9.803836971841113e-06, - "loss": 0.0696, + "epoch": 1.3085993672706355, + "grad_norm": 8.347841262817383, + "learning_rate": 8.713573619631903e-06, + "loss": 0.5041, "step": 4550 }, { - "epoch": 1.190784364483562, - "grad_norm": 6.306366443634033, - "learning_rate": 9.789455518163776e-06, - "loss": 0.0628, + "epoch": 1.3229795800977855, + "grad_norm": 11.127716064453125, + "learning_rate": 8.809432515337423e-06, + "loss": 0.487, "step": 4600 }, { - "epoch": 1.2037276727931658, - "grad_norm": 6.448446273803711, - "learning_rate": 9.775074064486439e-06, - "loss": 0.0712, + "epoch": 1.3373597929249352, + "grad_norm": 9.677105903625488, + "learning_rate": 8.905291411042946e-06, + "loss": 0.5197, "step": 4650 }, { - "epoch": 1.2166709811027698, - "grad_norm": 5.446472644805908, - "learning_rate": 9.760692610809102e-06, - "loss": 0.0549, + "epoch": 1.3517400057520852, + "grad_norm": 11.255023002624512, + "learning_rate": 9.001150306748467e-06, + "loss": 0.4535, "step": 4700 }, { - "epoch": 1.2296142894123738, - "grad_norm": 8.122842788696289, - "learning_rate": 9.746311157131764e-06, - "loss": 0.0804, + "epoch": 1.366120218579235, + "grad_norm": 23.518421173095703, + "learning_rate": 9.097009202453987e-06, + "loss": 0.491, "step": 4750 }, { - "epoch": 1.2425575977219778, - "grad_norm": 17.312549591064453, - "learning_rate": 9.731929703454425e-06, - "loss": 0.0598, + "epoch": 1.380500431406385, + "grad_norm": 11.598997116088867, + "learning_rate": 9.19286809815951e-06, + "loss": 0.4558, "step": 4800 }, { - "epoch": 1.2555009060315816, - "grad_norm": 8.815750122070312, - "learning_rate": 9.717548249777088e-06, - "loss": 0.086, + "epoch": 1.3948806442335346, + "grad_norm": 11.334724426269531, + "learning_rate": 9.288726993865032e-06, + "loss": 0.4815, "step": 4850 }, { - "epoch": 1.2684442143411856, - "grad_norm": 7.290472030639648, - "learning_rate": 9.70316679609975e-06, - "loss": 0.0646, + "epoch": 1.4092608570606844, + "grad_norm": 10.41954231262207, + "learning_rate": 9.384585889570553e-06, + "loss": 0.4685, "step": 4900 }, { - "epoch": 1.2813875226507896, - "grad_norm": 6.4685258865356445, - "learning_rate": 9.688785342422414e-06, - "loss": 0.0717, + "epoch": 1.4236410698878343, + "grad_norm": 12.354069709777832, + "learning_rate": 9.480444785276073e-06, + "loss": 0.5032, "step": 4950 }, { - "epoch": 1.2943308309603934, - "grad_norm": 6.5981974601745605, - "learning_rate": 9.674403888745075e-06, - "loss": 0.0605, + "epoch": 1.4380212827149843, + "grad_norm": 10.590629577636719, + "learning_rate": 9.576303680981596e-06, + "loss": 0.5324, "step": 5000 }, { - "epoch": 1.3072741392699974, - "grad_norm": 4.253500461578369, - "learning_rate": 9.660022435067737e-06, - "loss": 0.0657, + "epoch": 1.452401495542134, + "grad_norm": 8.815053939819336, + "learning_rate": 9.672162576687117e-06, + "loss": 0.4855, "step": 5050 }, { - "epoch": 1.3202174475796014, - "grad_norm": 14.457869529724121, - "learning_rate": 9.6456409813904e-06, - "loss": 0.0701, + "epoch": 1.4667817083692838, + "grad_norm": 10.232723236083984, + "learning_rate": 9.768021472392639e-06, + "loss": 0.4658, "step": 5100 }, { - "epoch": 1.3331607558892054, - "grad_norm": 10.046952247619629, - "learning_rate": 9.631259527713061e-06, - "loss": 0.0636, + "epoch": 1.4811619211964338, + "grad_norm": 7.862874984741211, + "learning_rate": 9.86388036809816e-06, + "loss": 0.4221, "step": 5150 }, { - "epoch": 1.3461040641988093, - "grad_norm": 0.7242924571037292, - "learning_rate": 9.616878074035725e-06, - "loss": 0.0731, + "epoch": 1.4955421340235835, + "grad_norm": 9.856216430664062, + "learning_rate": 9.959739263803682e-06, + "loss": 0.5341, "step": 5200 }, { - "epoch": 1.3590473725084131, - "grad_norm": 5.0954155921936035, - "learning_rate": 9.602496620358386e-06, - "loss": 0.0768, + "epoch": 1.5099223468507335, + "grad_norm": 10.387688636779785, + "learning_rate": 9.99403481113786e-06, + "loss": 0.5097, "step": 5250 }, { - "epoch": 1.3719906808180171, - "grad_norm": 3.536829710006714, - "learning_rate": 9.588115166681049e-06, - "loss": 0.0676, + "epoch": 1.5243025596778832, + "grad_norm": 7.723312854766846, + "learning_rate": 9.983382688169752e-06, + "loss": 0.463, "step": 5300 }, { - "epoch": 1.384933989127621, - "grad_norm": 16.024459838867188, - "learning_rate": 9.573733713003712e-06, - "loss": 0.0778, + "epoch": 1.538682772505033, + "grad_norm": 7.347230911254883, + "learning_rate": 9.972730565201646e-06, + "loss": 0.4504, "step": 5350 }, { - "epoch": 1.3978772974372249, - "grad_norm": 8.860788345336914, - "learning_rate": 9.559352259326373e-06, - "loss": 0.0618, + "epoch": 1.553062985332183, + "grad_norm": 10.801654815673828, + "learning_rate": 9.962078442233537e-06, + "loss": 0.4538, "step": 5400 }, { - "epoch": 1.4108206057468289, - "grad_norm": 8.013275146484375, - "learning_rate": 9.544970805649036e-06, - "loss": 0.0682, + "epoch": 1.5674431981593329, + "grad_norm": 15.177916526794434, + "learning_rate": 9.951426319265431e-06, + "loss": 0.46, "step": 5450 }, { - "epoch": 1.4237639140564329, - "grad_norm": 1.6208149194717407, - "learning_rate": 9.530589351971698e-06, - "loss": 0.0658, + "epoch": 1.5818234109864826, + "grad_norm": 11.820477485656738, + "learning_rate": 9.940774196297323e-06, + "loss": 0.4222, "step": 5500 }, { - "epoch": 1.4367072223660369, - "grad_norm": 3.301064968109131, - "learning_rate": 9.516207898294361e-06, - "loss": 0.0578, + "epoch": 1.5962036238136323, + "grad_norm": 9.901071548461914, + "learning_rate": 9.930122073329216e-06, + "loss": 0.4434, "step": 5550 }, { - "epoch": 1.4496505306756406, - "grad_norm": 8.253787994384766, - "learning_rate": 9.501826444617022e-06, - "loss": 0.0613, + "epoch": 1.6105838366407823, + "grad_norm": 9.764754295349121, + "learning_rate": 9.919469950361108e-06, + "loss": 0.445, "step": 5600 }, { - "epoch": 1.4625938389852446, - "grad_norm": 5.881521224975586, - "learning_rate": 9.487444990939685e-06, - "loss": 0.0652, + "epoch": 1.6249640494679323, + "grad_norm": 11.520148277282715, + "learning_rate": 9.908817827393e-06, + "loss": 0.3914, "step": 5650 }, { - "epoch": 1.4755371472948486, - "grad_norm": 5.812432289123535, - "learning_rate": 9.473063537262347e-06, - "loss": 0.0632, + "epoch": 1.639344262295082, + "grad_norm": 9.413511276245117, + "learning_rate": 9.898165704424892e-06, + "loss": 0.4412, "step": 5700 }, { - "epoch": 1.4884804556044524, - "grad_norm": 7.349687576293945, - "learning_rate": 9.458682083585008e-06, - "loss": 0.0633, + "epoch": 1.6537244751222318, + "grad_norm": 12.053434371948242, + "learning_rate": 9.887513581456785e-06, + "loss": 0.4456, "step": 5750 }, { - "epoch": 1.5014237639140564, - "grad_norm": 6.8907694816589355, - "learning_rate": 9.444300629907673e-06, - "loss": 0.0621, + "epoch": 1.6681046879493815, + "grad_norm": 11.89907455444336, + "learning_rate": 9.876861458488679e-06, + "loss": 0.4393, "step": 5800 }, { - "epoch": 1.5143670722236604, - "grad_norm": 6.606383800506592, - "learning_rate": 9.429919176230334e-06, - "loss": 0.0827, + "epoch": 1.6824849007765315, + "grad_norm": 7.352341651916504, + "learning_rate": 9.86620933552057e-06, + "loss": 0.4248, "step": 5850 }, { - "epoch": 1.5273103805332644, - "grad_norm": 7.548374176025391, - "learning_rate": 9.415537722552997e-06, - "loss": 0.0616, + "epoch": 1.6968651136036814, + "grad_norm": 11.370620727539062, + "learning_rate": 9.855557212552462e-06, + "loss": 0.4763, "step": 5900 }, { - "epoch": 1.5402536888428684, - "grad_norm": 2.861259698867798, - "learning_rate": 9.40115626887566e-06, - "loss": 0.0681, + "epoch": 1.7112453264308312, + "grad_norm": 9.851892471313477, + "learning_rate": 9.844905089584354e-06, + "loss": 0.4597, "step": 5950 }, { - "epoch": 1.5531969971524722, - "grad_norm": 1.8648054599761963, - "learning_rate": 9.38677481519832e-06, - "loss": 0.0716, + "epoch": 1.725625539257981, + "grad_norm": 10.234861373901367, + "learning_rate": 9.834252966616248e-06, + "loss": 0.4656, "step": 6000 }, { - "epoch": 1.566140305462076, - "grad_norm": 8.61605167388916, - "learning_rate": 9.372393361520983e-06, - "loss": 0.0632, + "epoch": 1.7400057520851309, + "grad_norm": 11.657553672790527, + "learning_rate": 9.82360084364814e-06, + "loss": 0.4528, "step": 6050 }, { - "epoch": 1.57908361377168, - "grad_norm": 7.054296493530273, - "learning_rate": 9.358011907843646e-06, - "loss": 0.067, + "epoch": 1.7543859649122808, + "grad_norm": 7.751284122467041, + "learning_rate": 9.812948720680033e-06, + "loss": 0.3989, "step": 6100 }, { - "epoch": 1.592026922081284, - "grad_norm": 16.833148956298828, - "learning_rate": 9.343630454166308e-06, - "loss": 0.0657, + "epoch": 1.7687661777394306, + "grad_norm": 9.346424102783203, + "learning_rate": 9.802296597711925e-06, + "loss": 0.4792, "step": 6150 }, { - "epoch": 1.604970230390888, - "grad_norm": 8.74778938293457, - "learning_rate": 9.32924900048897e-06, - "loss": 0.0824, + "epoch": 1.7831463905665803, + "grad_norm": 8.10052490234375, + "learning_rate": 9.791644474743818e-06, + "loss": 0.4463, "step": 6200 }, { - "epoch": 1.617913538700492, - "grad_norm": 7.7294440269470215, - "learning_rate": 9.314867546811632e-06, - "loss": 0.0756, + "epoch": 1.7975266033937303, + "grad_norm": 9.026034355163574, + "learning_rate": 9.78099235177571e-06, + "loss": 0.4321, "step": 6250 }, { - "epoch": 1.630856847010096, - "grad_norm": 5.917527675628662, - "learning_rate": 9.300486093134295e-06, - "loss": 0.0625, + "epoch": 1.8119068162208802, + "grad_norm": 13.529166221618652, + "learning_rate": 9.770340228807602e-06, + "loss": 0.4613, "step": 6300 }, { - "epoch": 1.6438001553196997, - "grad_norm": 6.116061687469482, - "learning_rate": 9.286104639456958e-06, - "loss": 0.0733, + "epoch": 1.82628702904803, + "grad_norm": 9.607820510864258, + "learning_rate": 9.759688105839494e-06, + "loss": 0.4339, "step": 6350 }, { - "epoch": 1.6567434636293037, - "grad_norm": 5.308782577514648, - "learning_rate": 9.27172318577962e-06, - "loss": 0.0614, + "epoch": 1.8406672418751797, + "grad_norm": 10.008755683898926, + "learning_rate": 9.749035982871387e-06, + "loss": 0.3851, "step": 6400 }, { - "epoch": 1.6696867719389075, - "grad_norm": 4.262322902679443, - "learning_rate": 9.257341732102281e-06, - "loss": 0.0695, + "epoch": 1.8550474547023295, + "grad_norm": 7.168424606323242, + "learning_rate": 9.738383859903279e-06, + "loss": 0.4214, "step": 6450 }, { - "epoch": 1.6826300802485115, - "grad_norm": 10.316951751708984, - "learning_rate": 9.242960278424944e-06, - "loss": 0.0608, + "epoch": 1.8694276675294794, + "grad_norm": 7.5019755363464355, + "learning_rate": 9.727731736935173e-06, + "loss": 0.4304, "step": 6500 }, { - "epoch": 1.6955733885581155, - "grad_norm": 10.672906875610352, - "learning_rate": 9.228578824747607e-06, - "loss": 0.0497, + "epoch": 1.8838078803566294, + "grad_norm": 7.817502975463867, + "learning_rate": 9.717079613967064e-06, + "loss": 0.4, "step": 6550 }, { - "epoch": 1.7085166968677195, - "grad_norm": 9.800777435302734, - "learning_rate": 9.214197371070268e-06, - "loss": 0.0869, + "epoch": 1.8981880931837791, + "grad_norm": 8.979328155517578, + "learning_rate": 9.706427490998956e-06, + "loss": 0.4736, "step": 6600 }, { - "epoch": 1.7214600051773234, - "grad_norm": 0.5741531848907471, - "learning_rate": 9.19981591739293e-06, - "loss": 0.0719, + "epoch": 1.9125683060109289, + "grad_norm": 8.398733139038086, + "learning_rate": 9.69577536803085e-06, + "loss": 0.4746, "step": 6650 }, { - "epoch": 1.7344033134869272, - "grad_norm": 1.716304063796997, - "learning_rate": 9.185434463715593e-06, - "loss": 0.0819, + "epoch": 1.9269485188380788, + "grad_norm": 10.292840957641602, + "learning_rate": 9.685123245062742e-06, + "loss": 0.449, "step": 6700 }, { - "epoch": 1.7473466217965312, - "grad_norm": 5.458923816680908, - "learning_rate": 9.171053010038256e-06, - "loss": 0.0911, + "epoch": 1.9413287316652288, + "grad_norm": 11.16744613647461, + "learning_rate": 9.674471122094633e-06, + "loss": 0.3774, "step": 6750 }, { - "epoch": 1.760289930106135, - "grad_norm": 5.717566967010498, - "learning_rate": 9.156671556360917e-06, - "loss": 0.0728, + "epoch": 1.9557089444923785, + "grad_norm": 12.648231506347656, + "learning_rate": 9.663818999126527e-06, + "loss": 0.4256, "step": 6800 }, { - "epoch": 1.773233238415739, - "grad_norm": 2.0101475715637207, - "learning_rate": 9.14229010268358e-06, - "loss": 0.0688, + "epoch": 1.9700891573195283, + "grad_norm": 10.38377571105957, + "learning_rate": 9.65316687615842e-06, + "loss": 0.4204, "step": 6850 }, { - "epoch": 1.786176546725343, - "grad_norm": 4.805422306060791, - "learning_rate": 9.127908649006242e-06, - "loss": 0.0722, + "epoch": 1.984469370146678, + "grad_norm": 9.374861717224121, + "learning_rate": 9.642514753190312e-06, + "loss": 0.4236, "step": 6900 }, { - "epoch": 1.799119855034947, - "grad_norm": 8.908441543579102, - "learning_rate": 9.113527195328905e-06, - "loss": 0.0742, + "epoch": 1.998849582973828, + "grad_norm": 10.195486068725586, + "learning_rate": 9.631862630222204e-06, + "loss": 0.3982, "step": 6950 }, { - "epoch": 1.812063163344551, - "grad_norm": 5.328374862670898, - "learning_rate": 9.099145741651568e-06, - "loss": 0.0705, + "epoch": 2.0, + "eval_cer": 14.873390788222366, + "eval_exact_match": 24.005176318343576, + "eval_loss": 0.4264937937259674, + "eval_runtime": 636.0349, + "eval_samples_per_second": 4.86, + "eval_steps_per_second": 0.608, + "eval_wer": 34.128669160516296, + "step": 6954 + }, + { + "epoch": 2.013229795800978, + "grad_norm": 7.0081095695495605, + "learning_rate": 9.621210507254096e-06, + "loss": 0.3498, "step": 7000 }, { - "epoch": 1.8250064716541547, - "grad_norm": 5.544122219085693, - "learning_rate": 9.084764287974229e-06, - "loss": 0.0803, + "epoch": 2.0276100086281277, + "grad_norm": 10.705326080322266, + "learning_rate": 9.61055838428599e-06, + "loss": 0.3268, "step": 7050 }, { - "epoch": 1.8379497799637587, - "grad_norm": 12.470900535583496, - "learning_rate": 9.070382834296892e-06, - "loss": 0.0687, + "epoch": 2.0419902214552774, + "grad_norm": 14.648818969726562, + "learning_rate": 9.599906261317881e-06, + "loss": 0.3224, "step": 7100 }, { - "epoch": 1.8508930882733625, - "grad_norm": 6.980189323425293, - "learning_rate": 9.056001380619553e-06, - "loss": 0.0738, + "epoch": 2.056370434282427, + "grad_norm": 7.217474460601807, + "learning_rate": 9.589254138349775e-06, + "loss": 0.3272, "step": 7150 }, { - "epoch": 1.8638363965829665, - "grad_norm": 5.287197589874268, - "learning_rate": 9.041619926942215e-06, - "loss": 0.0723, + "epoch": 2.0707506471095773, + "grad_norm": 8.819473266601562, + "learning_rate": 9.578602015381667e-06, + "loss": 0.3513, "step": 7200 }, { - "epoch": 1.8767797048925705, - "grad_norm": 7.992635250091553, - "learning_rate": 9.027238473264878e-06, - "loss": 0.0766, + "epoch": 2.085130859936727, + "grad_norm": 8.353473663330078, + "learning_rate": 9.567949892413558e-06, + "loss": 0.3253, "step": 7250 }, { - "epoch": 1.8897230132021745, - "grad_norm": 13.58613395690918, - "learning_rate": 9.01285701958754e-06, - "loss": 0.0672, + "epoch": 2.099511072763877, + "grad_norm": 7.005558967590332, + "learning_rate": 9.557297769445452e-06, + "loss": 0.305, "step": 7300 }, { - "epoch": 1.9026663215117785, - "grad_norm": 6.183566093444824, - "learning_rate": 8.998475565910203e-06, - "loss": 0.0636, + "epoch": 2.1138912855910266, + "grad_norm": 6.802879810333252, + "learning_rate": 9.546645646477344e-06, + "loss": 0.3212, "step": 7350 }, { - "epoch": 1.9156096298213825, - "grad_norm": 4.56492805480957, - "learning_rate": 8.984094112232865e-06, - "loss": 0.0823, + "epoch": 2.1282714984181768, + "grad_norm": 8.071789741516113, + "learning_rate": 9.535993523509236e-06, + "loss": 0.349, "step": 7400 }, { - "epoch": 1.9285529381309863, - "grad_norm": 3.8052022457122803, - "learning_rate": 8.969712658555527e-06, - "loss": 0.094, + "epoch": 2.1426517112453265, + "grad_norm": 11.298611640930176, + "learning_rate": 9.525341400541129e-06, + "loss": 0.336, "step": 7450 }, { - "epoch": 1.9414962464405903, - "grad_norm": 4.8918843269348145, - "learning_rate": 8.95533120487819e-06, - "loss": 0.0707, + "epoch": 2.1570319240724762, + "grad_norm": 6.722599506378174, + "learning_rate": 9.514689277573021e-06, + "loss": 0.3396, "step": 7500 }, { - "epoch": 1.954439554750194, - "grad_norm": 8.336883544921875, - "learning_rate": 8.940949751200853e-06, - "loss": 0.082, + "epoch": 2.171412136899626, + "grad_norm": 7.209293365478516, + "learning_rate": 9.504037154604914e-06, + "loss": 0.2993, "step": 7550 }, { - "epoch": 1.967382863059798, - "grad_norm": 11.596981048583984, - "learning_rate": 8.926568297523515e-06, - "loss": 0.0914, + "epoch": 2.185792349726776, + "grad_norm": 6.970348358154297, + "learning_rate": 9.493385031636806e-06, + "loss": 0.3546, "step": 7600 }, { - "epoch": 1.980326171369402, - "grad_norm": 3.7584636211395264, - "learning_rate": 8.912186843846176e-06, - "loss": 0.0697, + "epoch": 2.200172562553926, + "grad_norm": 10.960278511047363, + "learning_rate": 9.48294595112806e-06, + "loss": 0.3277, "step": 7650 }, { - "epoch": 1.993269479679006, - "grad_norm": 1.8307932615280151, - "learning_rate": 8.898093019242386e-06, - "loss": 0.0597, + "epoch": 2.2145527753810756, + "grad_norm": 10.269000053405762, + "learning_rate": 9.472293828159954e-06, + "loss": 0.3541, "step": 7700 }, { - "epoch": 2.0, - "eval_loss": 0.0625610426068306, - "eval_runtime": 88.6808, - "eval_samples_per_second": 5.638, - "eval_steps_per_second": 0.71, - "eval_wer": 7.819880340086072, - "step": 7726 - }, - { - "epoch": 2.00621278798861, - "grad_norm": 5.234423637390137, - "learning_rate": 8.883711565565049e-06, - "loss": 0.0493, + "epoch": 2.2289329882082254, + "grad_norm": 12.237759590148926, + "learning_rate": 9.461641705191846e-06, + "loss": 0.3031, "step": 7750 }, { - "epoch": 2.019156096298214, - "grad_norm": 6.8441386222839355, - "learning_rate": 8.869330111887711e-06, - "loss": 0.0531, + "epoch": 2.243313201035375, + "grad_norm": 4.310180187225342, + "learning_rate": 9.450989582223737e-06, + "loss": 0.2765, "step": 7800 }, { - "epoch": 2.0320994046078176, - "grad_norm": 11.543621063232422, - "learning_rate": 8.854948658210372e-06, - "loss": 0.0447, + "epoch": 2.2576934138625253, + "grad_norm": 7.034964084625244, + "learning_rate": 9.44033745925563e-06, + "loss": 0.3177, "step": 7850 }, { - "epoch": 2.0450427129174216, - "grad_norm": 8.563196182250977, - "learning_rate": 8.840567204533035e-06, - "loss": 0.0466, + "epoch": 2.272073626689675, + "grad_norm": 6.76523494720459, + "learning_rate": 9.429685336287523e-06, + "loss": 0.3158, "step": 7900 }, { - "epoch": 2.0579860212270256, - "grad_norm": 2.31270432472229, - "learning_rate": 8.826185750855698e-06, - "loss": 0.0487, + "epoch": 2.286453839516825, + "grad_norm": 5.0587992668151855, + "learning_rate": 9.419033213319414e-06, + "loss": 0.3333, "step": 7950 }, { - "epoch": 2.0709293295366296, - "grad_norm": 2.1112735271453857, - "learning_rate": 8.811804297178359e-06, - "loss": 0.0424, + "epoch": 2.3008340523439745, + "grad_norm": 7.236569404602051, + "learning_rate": 9.408381090351308e-06, + "loss": 0.3666, "step": 8000 }, { - "epoch": 2.0838726378462336, - "grad_norm": 2.2178971767425537, - "learning_rate": 8.797422843501021e-06, - "loss": 0.0472, + "epoch": 2.3152142651711247, + "grad_norm": 10.984098434448242, + "learning_rate": 9.3977289673832e-06, + "loss": 0.356, "step": 8050 }, { - "epoch": 2.0968159461558376, - "grad_norm": 7.748543739318848, - "learning_rate": 8.783041389823684e-06, - "loss": 0.0467, + "epoch": 2.3295944779982745, + "grad_norm": 9.4267578125, + "learning_rate": 9.387076844415093e-06, + "loss": 0.3267, "step": 8100 }, { - "epoch": 2.1097592544654415, - "grad_norm": 13.174030303955078, - "learning_rate": 8.768659936146347e-06, - "loss": 0.0582, + "epoch": 2.343974690825424, + "grad_norm": 9.876848220825195, + "learning_rate": 9.376424721446985e-06, + "loss": 0.3435, "step": 8150 }, { - "epoch": 2.122702562775045, - "grad_norm": 8.9285306930542, - "learning_rate": 8.754278482469008e-06, - "loss": 0.0388, + "epoch": 2.358354903652574, + "grad_norm": 6.698373317718506, + "learning_rate": 9.365772598478877e-06, + "loss": 0.345, "step": 8200 }, { - "epoch": 2.135645871084649, - "grad_norm": 7.205421447753906, - "learning_rate": 8.73989702879167e-06, - "loss": 0.049, + "epoch": 2.372735116479724, + "grad_norm": 6.455428123474121, + "learning_rate": 9.35512047551077e-06, + "loss": 0.3392, "step": 8250 }, { - "epoch": 2.148589179394253, - "grad_norm": 13.037263870239258, - "learning_rate": 8.725515575114333e-06, - "loss": 0.0463, + "epoch": 2.387115329306874, + "grad_norm": 8.121882438659668, + "learning_rate": 9.344468352542662e-06, + "loss": 0.3911, "step": 8300 }, { - "epoch": 2.161532487703857, - "grad_norm": 7.653538703918457, - "learning_rate": 8.711134121436996e-06, - "loss": 0.047, + "epoch": 2.4014955421340236, + "grad_norm": 9.90318489074707, + "learning_rate": 9.333816229574554e-06, + "loss": 0.3262, "step": 8350 }, { - "epoch": 2.174475796013461, - "grad_norm": 3.7179276943206787, - "learning_rate": 8.696752667759659e-06, - "loss": 0.0455, + "epoch": 2.4158757549611733, + "grad_norm": 5.209615707397461, + "learning_rate": 9.323164106606448e-06, + "loss": 0.2945, "step": 8400 }, { - "epoch": 2.187419104323065, - "grad_norm": 5.476615905761719, - "learning_rate": 8.68237121408232e-06, - "loss": 0.0439, + "epoch": 2.430255967788323, + "grad_norm": 11.901735305786133, + "learning_rate": 9.312511983638341e-06, + "loss": 0.3513, "step": 8450 }, { - "epoch": 2.200362412632669, - "grad_norm": 2.110492467880249, - "learning_rate": 8.667989760404983e-06, - "loss": 0.0411, + "epoch": 2.4446361806154733, + "grad_norm": 8.748673439025879, + "learning_rate": 9.301859860670233e-06, + "loss": 0.3522, "step": 8500 }, { - "epoch": 2.213305720942273, - "grad_norm": 5.301363468170166, - "learning_rate": 8.653608306727645e-06, - "loss": 0.0517, + "epoch": 2.459016393442623, + "grad_norm": 8.899870872497559, + "learning_rate": 9.291207737702125e-06, + "loss": 0.3626, "step": 8550 }, { - "epoch": 2.2262490292518766, - "grad_norm": 9.795352935791016, - "learning_rate": 8.639226853050306e-06, - "loss": 0.0564, + "epoch": 2.4733966062697728, + "grad_norm": 5.895473957061768, + "learning_rate": 9.280555614734017e-06, + "loss": 0.2879, "step": 8600 }, { - "epoch": 2.2391923375614806, - "grad_norm": 1.353123426437378, - "learning_rate": 8.624845399372969e-06, - "loss": 0.0485, + "epoch": 2.4877768190969225, + "grad_norm": 7.918704509735107, + "learning_rate": 9.269903491765908e-06, + "loss": 0.3287, "step": 8650 }, { - "epoch": 2.2521356458710846, - "grad_norm": 9.473326683044434, - "learning_rate": 8.610463945695632e-06, - "loss": 0.0645, + "epoch": 2.5021570319240727, + "grad_norm": 9.351618766784668, + "learning_rate": 9.259251368797802e-06, + "loss": 0.3304, "step": 8700 }, { - "epoch": 2.2650789541806886, - "grad_norm": 4.406058311462402, - "learning_rate": 8.596082492018294e-06, - "loss": 0.0542, + "epoch": 2.5165372447512224, + "grad_norm": 8.402332305908203, + "learning_rate": 9.248599245829695e-06, + "loss": 0.3474, "step": 8750 }, { - "epoch": 2.2780222624902926, - "grad_norm": 17.06141471862793, - "learning_rate": 8.581701038340955e-06, - "loss": 0.0502, + "epoch": 2.530917457578372, + "grad_norm": 6.852490425109863, + "learning_rate": 9.237947122861587e-06, + "loss": 0.31, "step": 8800 }, { - "epoch": 2.2909655707998966, - "grad_norm": 8.37076187133789, - "learning_rate": 8.567319584663618e-06, - "loss": 0.0554, + "epoch": 2.545297670405522, + "grad_norm": 11.796733856201172, + "learning_rate": 9.227294999893479e-06, + "loss": 0.3437, "step": 8850 }, { - "epoch": 2.3039088791095006, - "grad_norm": 10.135491371154785, - "learning_rate": 8.552938130986281e-06, - "loss": 0.0469, + "epoch": 2.5596778832326716, + "grad_norm": 7.547921180725098, + "learning_rate": 9.216642876925373e-06, + "loss": 0.3274, "step": 8900 }, { - "epoch": 2.316852187419104, - "grad_norm": 7.701286315917969, - "learning_rate": 8.538556677308944e-06, - "loss": 0.0468, + "epoch": 2.574058096059822, + "grad_norm": 8.241381645202637, + "learning_rate": 9.205990753957264e-06, + "loss": 0.3764, "step": 8950 }, { - "epoch": 2.329795495728708, - "grad_norm": 0.7516931891441345, - "learning_rate": 8.524175223631606e-06, - "loss": 0.0443, + "epoch": 2.5884383088869716, + "grad_norm": 9.02524471282959, + "learning_rate": 9.195338630989156e-06, + "loss": 0.352, "step": 9000 }, { - "epoch": 2.342738804038312, - "grad_norm": 8.913509368896484, - "learning_rate": 8.509793769954267e-06, - "loss": 0.0619, + "epoch": 2.6028185217141213, + "grad_norm": 8.434934616088867, + "learning_rate": 9.18468650802105e-06, + "loss": 0.3003, "step": 9050 }, { - "epoch": 2.355682112347916, - "grad_norm": 10.149544715881348, - "learning_rate": 8.49541231627693e-06, - "loss": 0.051, + "epoch": 2.617198734541271, + "grad_norm": 8.362035751342773, + "learning_rate": 9.174034385052942e-06, + "loss": 0.3191, "step": 9100 }, { - "epoch": 2.36862542065752, - "grad_norm": 8.469508171081543, - "learning_rate": 8.481030862599591e-06, - "loss": 0.0481, + "epoch": 2.6315789473684212, + "grad_norm": 8.652226448059082, + "learning_rate": 9.163382262084835e-06, + "loss": 0.3382, "step": 9150 }, { - "epoch": 2.381568728967124, - "grad_norm": 2.6739773750305176, - "learning_rate": 8.466649408922255e-06, - "loss": 0.0565, + "epoch": 2.645959160195571, + "grad_norm": 6.507467269897461, + "learning_rate": 9.152730139116727e-06, + "loss": 0.3578, "step": 9200 }, { - "epoch": 2.394512037276728, - "grad_norm": 7.850672245025635, - "learning_rate": 8.452267955244916e-06, - "loss": 0.0469, + "epoch": 2.6603393730227207, + "grad_norm": 5.656821250915527, + "learning_rate": 9.142078016148619e-06, + "loss": 0.349, "step": 9250 }, { - "epoch": 2.4074553455863317, - "grad_norm": 4.025714874267578, - "learning_rate": 8.43788650156758e-06, - "loss": 0.0554, + "epoch": 2.6747195858498705, + "grad_norm": 5.652177333831787, + "learning_rate": 9.13142589318051e-06, + "loss": 0.3395, "step": 9300 }, { - "epoch": 2.4203986538959357, - "grad_norm": 8.857705116271973, - "learning_rate": 8.423505047890242e-06, - "loss": 0.0449, + "epoch": 2.68909979867702, + "grad_norm": 8.659424781799316, + "learning_rate": 9.120773770212404e-06, + "loss": 0.3426, "step": 9350 }, { - "epoch": 2.4333419622055397, - "grad_norm": 7.978151321411133, - "learning_rate": 8.409123594212903e-06, - "loss": 0.0768, + "epoch": 2.7034800115041704, + "grad_norm": 9.938313484191895, + "learning_rate": 9.110121647244296e-06, + "loss": 0.2836, "step": 9400 }, { - "epoch": 2.4462852705151437, - "grad_norm": 7.44215726852417, - "learning_rate": 8.394742140535566e-06, - "loss": 0.0612, + "epoch": 2.71786022433132, + "grad_norm": 7.737242698669434, + "learning_rate": 9.09946952427619e-06, + "loss": 0.3578, "step": 9450 }, { - "epoch": 2.4592285788247477, - "grad_norm": 3.768834352493286, - "learning_rate": 8.380360686858228e-06, - "loss": 0.0405, + "epoch": 2.73224043715847, + "grad_norm": 7.589531898498535, + "learning_rate": 9.088817401308081e-06, + "loss": 0.3401, "step": 9500 }, { - "epoch": 2.4721718871343517, - "grad_norm": 4.792430877685547, - "learning_rate": 8.365979233180891e-06, - "loss": 0.049, + "epoch": 2.74662064998562, + "grad_norm": 7.070167541503906, + "learning_rate": 9.078165278339975e-06, + "loss": 0.3121, "step": 9550 }, { - "epoch": 2.4851151954439556, - "grad_norm": 7.07329797744751, - "learning_rate": 8.351597779503554e-06, - "loss": 0.0516, + "epoch": 2.76100086281277, + "grad_norm": 6.6532135009765625, + "learning_rate": 9.067513155371866e-06, + "loss": 0.314, "step": 9600 }, { - "epoch": 2.498058503753559, - "grad_norm": 7.043036937713623, - "learning_rate": 8.337216325826215e-06, - "loss": 0.0589, + "epoch": 2.7753810756399195, + "grad_norm": 7.483386993408203, + "learning_rate": 9.056861032403758e-06, + "loss": 0.2932, "step": 9650 }, { - "epoch": 2.511001812063163, - "grad_norm": 8.471428871154785, - "learning_rate": 8.322834872148878e-06, - "loss": 0.0433, + "epoch": 2.7897612884670693, + "grad_norm": 6.589616298675537, + "learning_rate": 9.04620890943565e-06, + "loss": 0.3297, "step": 9700 }, { - "epoch": 2.523945120372767, - "grad_norm": 5.081391334533691, - "learning_rate": 8.308453418471539e-06, - "loss": 0.0603, + "epoch": 2.804141501294219, + "grad_norm": 5.69865083694458, + "learning_rate": 9.035556786467544e-06, + "loss": 0.3095, "step": 9750 }, { - "epoch": 2.536888428682371, - "grad_norm": 6.474493980407715, - "learning_rate": 8.294071964794203e-06, - "loss": 0.0529, + "epoch": 2.8185217141213688, + "grad_norm": 7.261324882507324, + "learning_rate": 9.024904663499437e-06, + "loss": 0.3565, "step": 9800 }, { - "epoch": 2.549831736991975, - "grad_norm": 6.623453617095947, - "learning_rate": 8.279690511116864e-06, - "loss": 0.0491, + "epoch": 2.832901926948519, + "grad_norm": 6.90970516204834, + "learning_rate": 9.014252540531329e-06, + "loss": 0.3413, "step": 9850 }, { - "epoch": 2.562775045301579, - "grad_norm": 5.389576435089111, - "learning_rate": 8.265309057439527e-06, - "loss": 0.0491, + "epoch": 2.8472821397756687, + "grad_norm": 6.3184638023376465, + "learning_rate": 9.00360041756322e-06, + "loss": 0.2946, "step": 9900 }, { - "epoch": 2.575718353611183, - "grad_norm": 8.212656021118164, - "learning_rate": 8.25092760376219e-06, - "loss": 0.0544, + "epoch": 2.8616623526028184, + "grad_norm": 6.538010120391846, + "learning_rate": 8.992948294595114e-06, + "loss": 0.2856, "step": 9950 }, { - "epoch": 2.5886616619207867, - "grad_norm": 0.44550031423568726, - "learning_rate": 8.23654615008485e-06, - "loss": 0.0578, + "epoch": 2.8760425654299686, + "grad_norm": 5.910469055175781, + "learning_rate": 8.982296171627006e-06, + "loss": 0.3076, "step": 10000 }, { - "epoch": 2.6016049702303907, - "grad_norm": 1.0146311521530151, - "learning_rate": 8.222164696407513e-06, - "loss": 0.0363, + "epoch": 2.8904227782571184, + "grad_norm": 8.695311546325684, + "learning_rate": 8.971644048658898e-06, + "loss": 0.3139, "step": 10050 }, { - "epoch": 2.6145482785399947, - "grad_norm": 2.9840240478515625, - "learning_rate": 8.207783242730176e-06, - "loss": 0.0464, + "epoch": 2.904802991084268, + "grad_norm": 8.625020980834961, + "learning_rate": 8.960991925690791e-06, + "loss": 0.3253, "step": 10100 }, { - "epoch": 2.6274915868495987, - "grad_norm": 9.97850513458252, - "learning_rate": 8.193401789052839e-06, - "loss": 0.0486, + "epoch": 2.919183203911418, + "grad_norm": 6.140915870666504, + "learning_rate": 8.950339802722683e-06, + "loss": 0.2948, "step": 10150 }, { - "epoch": 2.6404348951592027, - "grad_norm": 7.187142372131348, - "learning_rate": 8.179020335375501e-06, - "loss": 0.0621, + "epoch": 2.9335634167385676, + "grad_norm": 8.719711303710938, + "learning_rate": 8.939687679754577e-06, + "loss": 0.3389, "step": 10200 }, { - "epoch": 2.6533782034688067, - "grad_norm": 8.673319816589355, - "learning_rate": 8.164638881698162e-06, - "loss": 0.0495, + "epoch": 2.9479436295657173, + "grad_norm": 8.315242767333984, + "learning_rate": 8.929035556786469e-06, + "loss": 0.367, "step": 10250 }, { - "epoch": 2.6663215117784107, - "grad_norm": 7.366682529449463, - "learning_rate": 8.150257428020825e-06, - "loss": 0.0426, + "epoch": 2.9623238423928675, + "grad_norm": 12.108076095581055, + "learning_rate": 8.91838343381836e-06, + "loss": 0.2995, "step": 10300 }, { - "epoch": 2.6792648200880143, - "grad_norm": 4.433532238006592, - "learning_rate": 8.135875974343488e-06, - "loss": 0.0489, + "epoch": 2.9767040552200172, + "grad_norm": 9.179291725158691, + "learning_rate": 8.907731310850252e-06, + "loss": 0.3159, "step": 10350 }, { - "epoch": 2.6922081283976187, - "grad_norm": 1.0996958017349243, - "learning_rate": 8.12149452066615e-06, - "loss": 0.0534, + "epoch": 2.991084268047167, + "grad_norm": 9.146824836730957, + "learning_rate": 8.897079187882146e-06, + "loss": 0.3391, "step": 10400 }, { - "epoch": 2.7051514367072222, - "grad_norm": 4.040085792541504, - "learning_rate": 8.107113066988811e-06, - "loss": 0.0456, + "epoch": 3.0, + "eval_cer": 14.196217450349874, + "eval_exact_match": 27.208023293432547, + "eval_loss": 0.38856202363967896, + "eval_runtime": 642.2814, + "eval_samples_per_second": 4.813, + "eval_steps_per_second": 0.603, + "eval_wer": 31.91285006571631, + "step": 10431 + }, + { + "epoch": 3.0054644808743167, + "grad_norm": 10.213428497314453, + "learning_rate": 8.886427064914038e-06, + "loss": 0.32, "step": 10450 }, { - "epoch": 2.7180947450168262, - "grad_norm": 6.1070685386657715, - "learning_rate": 8.092731613311474e-06, - "loss": 0.0685, + "epoch": 3.019844693701467, + "grad_norm": 8.025985717773438, + "learning_rate": 8.875774941945931e-06, + "loss": 0.2483, "step": 10500 }, { - "epoch": 2.7310380533264302, - "grad_norm": 5.145371913909912, - "learning_rate": 8.078350159634137e-06, - "loss": 0.0537, + "epoch": 3.0342249065286166, + "grad_norm": 6.563110828399658, + "learning_rate": 8.865122818977823e-06, + "loss": 0.2502, "step": 10550 }, { - "epoch": 2.7439813616360342, - "grad_norm": 5.4013190269470215, - "learning_rate": 8.063968705956798e-06, - "loss": 0.051, + "epoch": 3.0486051193557664, + "grad_norm": 8.243467330932617, + "learning_rate": 8.854470696009716e-06, + "loss": 0.268, "step": 10600 }, { - "epoch": 2.7569246699456382, - "grad_norm": 0.9301003217697144, - "learning_rate": 8.049587252279462e-06, - "loss": 0.0469, + "epoch": 3.062985332182916, + "grad_norm": 11.389385223388672, + "learning_rate": 8.843818573041608e-06, + "loss": 0.2364, "step": 10650 }, { - "epoch": 2.769867978255242, - "grad_norm": 2.0756452083587646, - "learning_rate": 8.035205798602123e-06, - "loss": 0.0508, + "epoch": 3.0773655450100663, + "grad_norm": 4.425471782684326, + "learning_rate": 8.8331664500735e-06, + "loss": 0.2299, "step": 10700 }, { - "epoch": 2.782811286564846, - "grad_norm": 5.310762882232666, - "learning_rate": 8.020824344924786e-06, - "loss": 0.0559, + "epoch": 3.091745757837216, + "grad_norm": 9.625814437866211, + "learning_rate": 8.822514327105392e-06, + "loss": 0.2299, "step": 10750 }, { - "epoch": 2.7957545948744498, - "grad_norm": 10.922019004821777, - "learning_rate": 8.006442891247447e-06, - "loss": 0.0558, + "epoch": 3.106125970664366, + "grad_norm": 7.440898895263672, + "learning_rate": 8.811862204137285e-06, + "loss": 0.2512, "step": 10800 }, { - "epoch": 2.8086979031840538, - "grad_norm": 7.071091651916504, - "learning_rate": 7.99206143757011e-06, - "loss": 0.059, + "epoch": 3.1205061834915155, + "grad_norm": 6.551193714141846, + "learning_rate": 8.801210081169179e-06, + "loss": 0.1982, "step": 10850 }, { - "epoch": 2.8216412114936578, - "grad_norm": 8.605588912963867, - "learning_rate": 7.977679983892773e-06, - "loss": 0.0502, + "epoch": 3.1348863963186657, + "grad_norm": 6.635011196136475, + "learning_rate": 8.79055795820107e-06, + "loss": 0.2463, "step": 10900 }, { - "epoch": 2.8345845198032618, - "grad_norm": 5.918191909790039, - "learning_rate": 7.963298530215435e-06, - "loss": 0.044, + "epoch": 3.1492666091458155, + "grad_norm": 8.052849769592285, + "learning_rate": 8.779905835232963e-06, + "loss": 0.2599, "step": 10950 }, { - "epoch": 2.8475278281128658, - "grad_norm": 10.315163612365723, - "learning_rate": 7.948917076538098e-06, - "loss": 0.0639, + "epoch": 3.163646821972965, + "grad_norm": 4.785330772399902, + "learning_rate": 8.769253712264854e-06, + "loss": 0.2289, "step": 11000 }, { - "epoch": 2.8604711364224693, - "grad_norm": 8.651927947998047, - "learning_rate": 7.934535622860759e-06, - "loss": 0.0632, + "epoch": 3.178027034800115, + "grad_norm": 8.820205688476562, + "learning_rate": 8.758601589296748e-06, + "loss": 0.202, "step": 11050 }, { - "epoch": 2.8734144447320737, - "grad_norm": 6.735222816467285, - "learning_rate": 7.920154169183422e-06, - "loss": 0.0594, + "epoch": 3.1924072476272647, + "grad_norm": 7.627030849456787, + "learning_rate": 8.74794946632864e-06, + "loss": 0.2258, "step": 11100 }, { - "epoch": 2.8863577530416773, - "grad_norm": 8.701018333435059, - "learning_rate": 7.905772715506084e-06, - "loss": 0.067, + "epoch": 3.206787460454415, + "grad_norm": 5.451107025146484, + "learning_rate": 8.737297343360533e-06, + "loss": 0.2356, "step": 11150 }, { - "epoch": 2.8993010613512813, - "grad_norm": 7.249449729919434, - "learning_rate": 7.891391261828745e-06, - "loss": 0.0562, + "epoch": 3.2211676732815646, + "grad_norm": 7.880435943603516, + "learning_rate": 8.726645220392425e-06, + "loss": 0.2461, "step": 11200 }, { - "epoch": 2.9122443696608853, - "grad_norm": 9.386366844177246, - "learning_rate": 7.87700980815141e-06, - "loss": 0.0443, + "epoch": 3.2355478861087144, + "grad_norm": 4.726194858551025, + "learning_rate": 8.715993097424319e-06, + "loss": 0.2412, "step": 11250 }, { - "epoch": 2.9251876779704893, - "grad_norm": 5.82024621963501, - "learning_rate": 7.86262835447407e-06, - "loss": 0.0435, + "epoch": 3.249928098935864, + "grad_norm": 5.893133163452148, + "learning_rate": 8.70534097445621e-06, + "loss": 0.2371, "step": 11300 }, { - "epoch": 2.9381309862800933, - "grad_norm": 9.714224815368652, - "learning_rate": 7.848246900796734e-06, - "loss": 0.0584, + "epoch": 3.2643083117630143, + "grad_norm": 12.14467716217041, + "learning_rate": 8.694688851488102e-06, + "loss": 0.2727, "step": 11350 }, { - "epoch": 2.9510742945896973, - "grad_norm": 10.820244789123535, - "learning_rate": 7.833865447119395e-06, - "loss": 0.051, + "epoch": 3.278688524590164, + "grad_norm": 5.388182163238525, + "learning_rate": 8.684036728519994e-06, + "loss": 0.2421, "step": 11400 }, { - "epoch": 2.9640176028993013, - "grad_norm": 9.436473846435547, - "learning_rate": 7.819483993442057e-06, - "loss": 0.0762, + "epoch": 3.2930687374173138, + "grad_norm": 5.713125228881836, + "learning_rate": 8.673384605551887e-06, + "loss": 0.2723, "step": 11450 }, { - "epoch": 2.976960911208905, - "grad_norm": 4.0043745040893555, - "learning_rate": 7.80510253976472e-06, - "loss": 0.0542, + "epoch": 3.3074489502444635, + "grad_norm": 9.101131439208984, + "learning_rate": 8.66273248258378e-06, + "loss": 0.2431, "step": 11500 }, { - "epoch": 2.989904219518509, - "grad_norm": 6.734024524688721, - "learning_rate": 7.790721086087383e-06, - "loss": 0.0678, + "epoch": 3.3218291630716132, + "grad_norm": 5.532032012939453, + "learning_rate": 8.652080359615673e-06, + "loss": 0.2335, "step": 11550 }, { - "epoch": 3.0, - "eval_loss": 0.047223061323165894, - "eval_runtime": 88.8982, - "eval_samples_per_second": 5.624, - "eval_steps_per_second": 0.709, - "eval_wer": 7.588957699170777, - "step": 11589 - }, - { - "epoch": 3.002847527828113, - "grad_norm": 7.159914970397949, - "learning_rate": 7.776339632410045e-06, - "loss": 0.0545, + "epoch": 3.3362093758987634, + "grad_norm": 7.009913444519043, + "learning_rate": 8.641428236647565e-06, + "loss": 0.2523, "step": 11600 }, { - "epoch": 3.015790836137717, - "grad_norm": 8.731955528259277, - "learning_rate": 7.761958178732706e-06, - "loss": 0.0283, + "epoch": 3.350589588725913, + "grad_norm": 9.884986877441406, + "learning_rate": 8.630776113679456e-06, + "loss": 0.231, "step": 11650 }, { - "epoch": 3.028734144447321, - "grad_norm": 9.880719184875488, - "learning_rate": 7.74757672505537e-06, - "loss": 0.0372, + "epoch": 3.364969801553063, + "grad_norm": 7.3720383644104, + "learning_rate": 8.62012399071135e-06, + "loss": 0.2393, "step": 11700 }, { - "epoch": 3.041677452756925, - "grad_norm": 4.863366603851318, - "learning_rate": 7.733195271378032e-06, - "loss": 0.0301, + "epoch": 3.3793500143802127, + "grad_norm": 6.84011173248291, + "learning_rate": 8.609684910202604e-06, + "loss": 0.2389, "step": 11750 }, { - "epoch": 3.054620761066529, - "grad_norm": 3.5925040245056152, - "learning_rate": 7.718813817700693e-06, - "loss": 0.0319, + "epoch": 3.393730227207363, + "grad_norm": 7.825477600097656, + "learning_rate": 8.599032787234497e-06, + "loss": 0.2277, "step": 11800 }, { - "epoch": 3.0675640693761324, - "grad_norm": 5.193197727203369, - "learning_rate": 7.704432364023357e-06, - "loss": 0.0284, + "epoch": 3.4081104400345126, + "grad_norm": 6.673378944396973, + "learning_rate": 8.58838066426639e-06, + "loss": 0.2867, "step": 11850 }, { - "epoch": 3.0805073776857363, - "grad_norm": 7.325606822967529, - "learning_rate": 7.690050910346018e-06, - "loss": 0.0221, + "epoch": 3.4224906528616623, + "grad_norm": 8.94361686706543, + "learning_rate": 8.577728541298281e-06, + "loss": 0.2202, "step": 11900 }, { - "epoch": 3.0934506859953403, - "grad_norm": 1.1931557655334473, - "learning_rate": 7.675669456668681e-06, - "loss": 0.0318, + "epoch": 3.436870865688812, + "grad_norm": 5.589390754699707, + "learning_rate": 8.567076418330173e-06, + "loss": 0.2679, "step": 11950 }, { - "epoch": 3.1063939943049443, - "grad_norm": 3.640929698944092, - "learning_rate": 7.661288002991342e-06, - "loss": 0.0339, + "epoch": 3.451251078515962, + "grad_norm": 7.234531402587891, + "learning_rate": 8.556424295362066e-06, + "loss": 0.2575, "step": 12000 }, { - "epoch": 3.1193373026145483, - "grad_norm": 5.155591011047363, - "learning_rate": 7.647194178387553e-06, - "loss": 0.0294, + "epoch": 3.465631291343112, + "grad_norm": 7.071993827819824, + "learning_rate": 8.545772172393958e-06, + "loss": 0.2994, "step": 12050 }, { - "epoch": 3.1322806109241523, - "grad_norm": 4.072420120239258, - "learning_rate": 7.632812724710214e-06, - "loss": 0.0371, + "epoch": 3.4800115041702617, + "grad_norm": 8.433942794799805, + "learning_rate": 8.535120049425852e-06, + "loss": 0.2371, "step": 12100 }, { - "epoch": 3.1452239192337563, - "grad_norm": 4.188466548919678, - "learning_rate": 7.618431271032877e-06, - "loss": 0.0253, + "epoch": 3.4943917169974115, + "grad_norm": 6.680046081542969, + "learning_rate": 8.524467926457744e-06, + "loss": 0.2582, "step": 12150 }, { - "epoch": 3.15816722754336, - "grad_norm": 7.71252965927124, - "learning_rate": 7.604049817355539e-06, - "loss": 0.0291, + "epoch": 3.5087719298245617, + "grad_norm": 7.790559768676758, + "learning_rate": 8.513815803489635e-06, + "loss": 0.2281, "step": 12200 }, { - "epoch": 3.171110535852964, - "grad_norm": 1.7614275217056274, - "learning_rate": 7.589668363678201e-06, - "loss": 0.0477, + "epoch": 3.5231521426517114, + "grad_norm": 7.5396504402160645, + "learning_rate": 8.503163680521529e-06, + "loss": 0.2344, "step": 12250 }, { - "epoch": 3.184053844162568, - "grad_norm": 6.398949146270752, - "learning_rate": 7.575286910000863e-06, - "loss": 0.0281, + "epoch": 3.537532355478861, + "grad_norm": 7.825564384460449, + "learning_rate": 8.49251155755342e-06, + "loss": 0.2167, "step": 12300 }, { - "epoch": 3.196997152472172, - "grad_norm": 5.648083686828613, - "learning_rate": 7.560905456323526e-06, - "loss": 0.0385, + "epoch": 3.551912568306011, + "grad_norm": 11.225576400756836, + "learning_rate": 8.481859434585313e-06, + "loss": 0.2513, "step": 12350 }, { - "epoch": 3.209940460781776, - "grad_norm": 1.6101973056793213, - "learning_rate": 7.546524002646188e-06, - "loss": 0.0291, + "epoch": 3.5662927811331606, + "grad_norm": 7.664831161499023, + "learning_rate": 8.471207311617206e-06, + "loss": 0.25, "step": 12400 }, { - "epoch": 3.22288376909138, - "grad_norm": 1.436286449432373, - "learning_rate": 7.532142548968851e-06, - "loss": 0.0443, + "epoch": 3.5806729939603104, + "grad_norm": 6.975928783416748, + "learning_rate": 8.4605551886491e-06, + "loss": 0.2538, "step": 12450 }, { - "epoch": 3.235827077400984, - "grad_norm": 1.658963680267334, - "learning_rate": 7.517761095291513e-06, - "loss": 0.0371, + "epoch": 3.5950532067874605, + "grad_norm": 6.575336456298828, + "learning_rate": 8.449903065680991e-06, + "loss": 0.2026, "step": 12500 }, { - "epoch": 3.2487703857105874, - "grad_norm": 8.697310447692871, - "learning_rate": 7.5033796416141745e-06, - "loss": 0.0299, + "epoch": 3.6094334196146103, + "grad_norm": 9.712886810302734, + "learning_rate": 8.439250942712883e-06, + "loss": 0.253, "step": 12550 }, { - "epoch": 3.2617136940201914, - "grad_norm": 6.439472198486328, - "learning_rate": 7.488998187936837e-06, - "loss": 0.0308, + "epoch": 3.62381363244176, + "grad_norm": 9.543643951416016, + "learning_rate": 8.428598819744775e-06, + "loss": 0.2724, "step": 12600 }, { - "epoch": 3.2746570023297954, - "grad_norm": 3.5517160892486572, - "learning_rate": 7.4746167342595e-06, - "loss": 0.0357, + "epoch": 3.63819384526891, + "grad_norm": 5.0453290939331055, + "learning_rate": 8.417946696776669e-06, + "loss": 0.224, "step": 12650 }, { - "epoch": 3.2876003106393994, - "grad_norm": 2.6896841526031494, - "learning_rate": 7.460235280582162e-06, - "loss": 0.0298, + "epoch": 3.65257405809606, + "grad_norm": 9.198199272155762, + "learning_rate": 8.40729457380856e-06, + "loss": 0.3038, "step": 12700 }, { - "epoch": 3.3005436189490034, - "grad_norm": 3.5411911010742188, - "learning_rate": 7.4458538269048245e-06, - "loss": 0.0249, + "epoch": 3.6669542709232097, + "grad_norm": 6.135719299316406, + "learning_rate": 8.396642450840454e-06, + "loss": 0.2396, "step": 12750 }, { - "epoch": 3.3134869272586074, - "grad_norm": 0.766302227973938, - "learning_rate": 7.431472373227486e-06, - "loss": 0.0207, + "epoch": 3.6813344837503594, + "grad_norm": 6.354711055755615, + "learning_rate": 8.385990327872346e-06, + "loss": 0.2184, "step": 12800 }, { - "epoch": 3.3264302355682114, - "grad_norm": 7.974555969238281, - "learning_rate": 7.417090919550148e-06, - "loss": 0.0258, + "epoch": 3.695714696577509, + "grad_norm": 8.384834289550781, + "learning_rate": 8.375338204904238e-06, + "loss": 0.2651, "step": 12850 }, { - "epoch": 3.339373543877815, - "grad_norm": 8.336533546447754, - "learning_rate": 7.40270946587281e-06, - "loss": 0.0335, + "epoch": 3.710094909404659, + "grad_norm": 6.415876388549805, + "learning_rate": 8.364686081936131e-06, + "loss": 0.2534, "step": 12900 }, { - "epoch": 3.352316852187419, - "grad_norm": 4.762045383453369, - "learning_rate": 7.38861564126902e-06, - "loss": 0.0391, + "epoch": 3.724475122231809, + "grad_norm": 5.291842460632324, + "learning_rate": 8.354033958968023e-06, + "loss": 0.2478, "step": 12950 }, { - "epoch": 3.365260160497023, - "grad_norm": 3.297501564025879, - "learning_rate": 7.374234187591682e-06, - "loss": 0.0427, + "epoch": 3.738855335058959, + "grad_norm": 7.500908851623535, + "learning_rate": 8.343381835999915e-06, + "loss": 0.2665, "step": 13000 }, { - "epoch": 3.378203468806627, - "grad_norm": 5.205377578735352, - "learning_rate": 7.359852733914344e-06, - "loss": 0.0517, + "epoch": 3.7532355478861086, + "grad_norm": 8.378747940063477, + "learning_rate": 8.332729713031808e-06, + "loss": 0.2486, "step": 13050 }, { - "epoch": 3.391146777116231, - "grad_norm": 5.596180438995361, - "learning_rate": 7.345471280237007e-06, - "loss": 0.0422, + "epoch": 3.7676157607132588, + "grad_norm": 6.815693378448486, + "learning_rate": 8.3220775900637e-06, + "loss": 0.2307, "step": 13100 }, { - "epoch": 3.404090085425835, - "grad_norm": 5.4441657066345215, - "learning_rate": 7.3310898265596695e-06, - "loss": 0.0367, + "epoch": 3.7819959735404085, + "grad_norm": 8.40038013458252, + "learning_rate": 8.311425467095593e-06, + "loss": 0.2093, "step": 13150 }, { - "epoch": 3.417033393735439, - "grad_norm": 5.369819641113281, - "learning_rate": 7.316708372882331e-06, - "loss": 0.0245, + "epoch": 3.7963761863675582, + "grad_norm": 10.500531196594238, + "learning_rate": 8.300773344127485e-06, + "loss": 0.2823, "step": 13200 }, { - "epoch": 3.429976702045043, - "grad_norm": 1.0381672382354736, - "learning_rate": 7.302326919204994e-06, - "loss": 0.0377, + "epoch": 3.810756399194708, + "grad_norm": 10.80937385559082, + "learning_rate": 8.290121221159377e-06, + "loss": 0.2498, "step": 13250 }, { - "epoch": 3.4429200103546465, - "grad_norm": 2.6047282218933105, - "learning_rate": 7.287945465527656e-06, - "loss": 0.0556, + "epoch": 3.8251366120218577, + "grad_norm": 7.230983257293701, + "learning_rate": 8.27946909819127e-06, + "loss": 0.2232, "step": 13300 }, { - "epoch": 3.4558633186642504, - "grad_norm": 3.447537899017334, - "learning_rate": 7.273564011850318e-06, - "loss": 0.0331, + "epoch": 3.839516824849008, + "grad_norm": 7.7391581535339355, + "learning_rate": 8.268816975223162e-06, + "loss": 0.2481, "step": 13350 }, { - "epoch": 3.4688066269738544, - "grad_norm": 12.265509605407715, - "learning_rate": 7.259182558172981e-06, - "loss": 0.0458, + "epoch": 3.8538970376761577, + "grad_norm": 9.080586433410645, + "learning_rate": 8.258164852255054e-06, + "loss": 0.2103, "step": 13400 }, { - "epoch": 3.4817499352834584, - "grad_norm": 6.993318557739258, - "learning_rate": 7.244801104495643e-06, - "loss": 0.0328, + "epoch": 3.8682772505033074, + "grad_norm": 6.053225517272949, + "learning_rate": 8.247512729286948e-06, + "loss": 0.2465, "step": 13450 }, { - "epoch": 3.4946932435930624, - "grad_norm": 1.927647590637207, - "learning_rate": 7.230419650818305e-06, - "loss": 0.036, + "epoch": 3.882657463330457, + "grad_norm": 11.269394874572754, + "learning_rate": 8.236860606318841e-06, + "loss": 0.2421, "step": 13500 }, { - "epoch": 3.5076365519026664, - "grad_norm": 4.639932632446289, - "learning_rate": 7.216038197140968e-06, - "loss": 0.0359, + "epoch": 3.8970376761576073, + "grad_norm": 8.83229923248291, + "learning_rate": 8.226208483350733e-06, + "loss": 0.2483, "step": 13550 }, { - "epoch": 3.52057986021227, - "grad_norm": 6.088189125061035, - "learning_rate": 7.20165674346363e-06, - "loss": 0.0505, + "epoch": 3.911417888984757, + "grad_norm": 10.498775482177734, + "learning_rate": 8.215556360382625e-06, + "loss": 0.2675, "step": 13600 }, { - "epoch": 3.5335231685218744, - "grad_norm": 3.452139377593994, - "learning_rate": 7.187275289786292e-06, - "loss": 0.0334, + "epoch": 3.925798101811907, + "grad_norm": 9.883252143859863, + "learning_rate": 8.204904237414517e-06, + "loss": 0.2459, "step": 13650 }, { - "epoch": 3.546466476831478, - "grad_norm": 3.6713337898254395, - "learning_rate": 7.172893836108954e-06, - "loss": 0.0305, + "epoch": 3.9401783146390565, + "grad_norm": 7.784640312194824, + "learning_rate": 8.194252114446409e-06, + "loss": 0.2557, "step": 13700 }, { - "epoch": 3.559409785141082, - "grad_norm": 4.258627891540527, - "learning_rate": 7.158512382431617e-06, - "loss": 0.0257, + "epoch": 3.9545585274662063, + "grad_norm": 6.6263580322265625, + "learning_rate": 8.183599991478302e-06, + "loss": 0.2452, "step": 13750 }, { - "epoch": 3.572353093450686, - "grad_norm": 7.825601577758789, - "learning_rate": 7.144130928754279e-06, - "loss": 0.0261, + "epoch": 3.9689387402933565, + "grad_norm": 7.17158842086792, + "learning_rate": 8.172947868510196e-06, + "loss": 0.2633, "step": 13800 }, { - "epoch": 3.58529640176029, - "grad_norm": 4.783618927001953, - "learning_rate": 7.129749475076942e-06, - "loss": 0.0347, + "epoch": 3.983318953120506, + "grad_norm": 7.404563903808594, + "learning_rate": 8.162295745542087e-06, + "loss": 0.2466, "step": 13850 }, { - "epoch": 3.598239710069894, - "grad_norm": 4.300550937652588, - "learning_rate": 7.1153680213996035e-06, - "loss": 0.029, + "epoch": 3.997699165947656, + "grad_norm": 6.2970356941223145, + "learning_rate": 8.15164362257398e-06, + "loss": 0.2471, "step": 13900 }, { - "epoch": 3.611183018379498, - "grad_norm": 10.43149471282959, - "learning_rate": 7.100986567722265e-06, - "loss": 0.0345, + "epoch": 4.0, + "eval_cer": 13.438005335977888, + "eval_exact_match": 28.53445486897444, + "eval_loss": 0.3774589002132416, + "eval_runtime": 644.1212, + "eval_samples_per_second": 4.799, + "eval_steps_per_second": 0.601, + "eval_wer": 30.43170559094126, + "step": 13908 + }, + { + "epoch": 4.012079378774806, + "grad_norm": 4.716179847717285, + "learning_rate": 8.140991499605873e-06, + "loss": 0.2031, "step": 13950 }, { - "epoch": 3.624126326689102, - "grad_norm": 5.455187797546387, - "learning_rate": 7.086605114044929e-06, - "loss": 0.0362, + "epoch": 4.026459591601956, + "grad_norm": 4.862895965576172, + "learning_rate": 8.130339376637765e-06, + "loss": 0.1836, "step": 14000 }, { - "epoch": 3.6370696349987055, - "grad_norm": 7.492135047912598, - "learning_rate": 7.072223660367591e-06, - "loss": 0.0295, + "epoch": 4.040839804429106, + "grad_norm": 9.47144603729248, + "learning_rate": 8.11990029612902e-06, + "loss": 0.1846, "step": 14050 }, { - "epoch": 3.6500129433083095, - "grad_norm": 5.982784271240234, - "learning_rate": 7.057842206690253e-06, - "loss": 0.0458, + "epoch": 4.055220017256255, + "grad_norm": 5.121662139892578, + "learning_rate": 8.109248173160912e-06, + "loss": 0.2046, "step": 14100 }, { - "epoch": 3.6629562516179135, - "grad_norm": 13.213232040405273, - "learning_rate": 7.043460753012915e-06, - "loss": 0.0299, + "epoch": 4.069600230083405, + "grad_norm": 5.951107501983643, + "learning_rate": 8.098596050192804e-06, + "loss": 0.1814, "step": 14150 }, { - "epoch": 3.6758995599275175, - "grad_norm": 0.33053404092788696, - "learning_rate": 7.029079299335577e-06, - "loss": 0.0272, + "epoch": 4.083980442910555, + "grad_norm": 6.741371154785156, + "learning_rate": 8.087943927224696e-06, + "loss": 0.1676, "step": 14200 }, { - "epoch": 3.6888428682371215, - "grad_norm": 2.8715531826019287, - "learning_rate": 7.014697845658239e-06, - "loss": 0.0406, + "epoch": 4.098360655737705, + "grad_norm": 4.553111553192139, + "learning_rate": 8.07729180425659e-06, + "loss": 0.1883, "step": 14250 }, { - "epoch": 3.7017861765467255, - "grad_norm": 7.417051315307617, - "learning_rate": 7.000316391980903e-06, - "loss": 0.0484, + "epoch": 4.112740868564854, + "grad_norm": 8.288504600524902, + "learning_rate": 8.066639681288481e-06, + "loss": 0.1822, "step": 14300 }, { - "epoch": 3.7147294848563295, - "grad_norm": 7.245136737823486, - "learning_rate": 6.9859349383035645e-06, - "loss": 0.0392, + "epoch": 4.127121081392005, + "grad_norm": 7.422959804534912, + "learning_rate": 8.055987558320375e-06, + "loss": 0.1707, "step": 14350 }, { - "epoch": 3.727672793165933, - "grad_norm": 6.493204116821289, - "learning_rate": 6.971553484626226e-06, - "loss": 0.0301, + "epoch": 4.141501294219155, + "grad_norm": 6.3395795822143555, + "learning_rate": 8.045335435352266e-06, + "loss": 0.1699, "step": 14400 }, { - "epoch": 3.740616101475537, - "grad_norm": 7.734640121459961, - "learning_rate": 6.957172030948889e-06, - "loss": 0.0317, + "epoch": 4.155881507046304, + "grad_norm": 4.627769947052002, + "learning_rate": 8.034683312384158e-06, + "loss": 0.1814, "step": 14450 }, { - "epoch": 3.753559409785141, - "grad_norm": 2.9053096771240234, - "learning_rate": 6.942790577271551e-06, - "loss": 0.0381, + "epoch": 4.170261719873454, + "grad_norm": 8.252847671508789, + "learning_rate": 8.024031189416052e-06, + "loss": 0.1829, "step": 14500 }, { - "epoch": 3.766502718094745, - "grad_norm": 8.95727825164795, - "learning_rate": 6.928409123594213e-06, - "loss": 0.0301, + "epoch": 4.184641932700604, + "grad_norm": 9.430807113647461, + "learning_rate": 8.013379066447944e-06, + "loss": 0.1892, "step": 14550 }, { - "epoch": 3.779446026404349, - "grad_norm": 3.024991512298584, - "learning_rate": 6.914027669916876e-06, - "loss": 0.0371, + "epoch": 4.199022145527754, + "grad_norm": 4.477791786193848, + "learning_rate": 8.002726943479835e-06, + "loss": 0.1738, "step": 14600 }, { - "epoch": 3.792389334713953, - "grad_norm": 2.2586114406585693, - "learning_rate": 6.899646216239538e-06, - "loss": 0.0331, + "epoch": 4.213402358354903, + "grad_norm": 5.070639610290527, + "learning_rate": 7.992074820511729e-06, + "loss": 0.1781, "step": 14650 }, { - "epoch": 3.805332643023557, - "grad_norm": 2.5965662002563477, - "learning_rate": 6.8852647625622e-06, - "loss": 0.0266, + "epoch": 4.227782571182053, + "grad_norm": 5.11230993270874, + "learning_rate": 7.981422697543622e-06, + "loss": 0.1749, "step": 14700 }, { - "epoch": 3.8182759513331606, - "grad_norm": 14.025388717651367, - "learning_rate": 6.870883308884863e-06, - "loss": 0.0325, + "epoch": 4.242162784009203, + "grad_norm": 6.374794960021973, + "learning_rate": 7.970770574575514e-06, + "loss": 0.1765, "step": 14750 }, { - "epoch": 3.8312192596427646, - "grad_norm": 12.150018692016602, - "learning_rate": 6.856501855207525e-06, - "loss": 0.0371, + "epoch": 4.2565429968363535, + "grad_norm": 5.7725958824157715, + "learning_rate": 7.960118451607406e-06, + "loss": 0.1818, "step": 14800 }, { - "epoch": 3.8441625679523685, - "grad_norm": 5.943263530731201, - "learning_rate": 6.842120401530187e-06, - "loss": 0.029, + "epoch": 4.270923209663503, + "grad_norm": 11.874361991882324, + "learning_rate": 7.949466328639298e-06, + "loss": 0.2013, "step": 14850 }, { - "epoch": 3.8571058762619725, - "grad_norm": 3.0075082778930664, - "learning_rate": 6.82773894785285e-06, - "loss": 0.0423, + "epoch": 4.285303422490653, + "grad_norm": 8.364473342895508, + "learning_rate": 7.938814205671191e-06, + "loss": 0.1729, "step": 14900 }, { - "epoch": 3.8700491845715765, - "grad_norm": 7.091434001922607, - "learning_rate": 6.813357494175512e-06, - "loss": 0.0333, + "epoch": 4.299683635317803, + "grad_norm": 4.075738906860352, + "learning_rate": 7.928162082703083e-06, + "loss": 0.166, "step": 14950 }, { - "epoch": 3.8829924928811805, - "grad_norm": 9.822648048400879, - "learning_rate": 6.798976040498174e-06, - "loss": 0.0623, + "epoch": 4.3140638481449525, + "grad_norm": 10.251448631286621, + "learning_rate": 7.917509959734977e-06, + "loss": 0.1713, "step": 15000 }, { - "epoch": 3.8959358011907845, - "grad_norm": 4.8575663566589355, - "learning_rate": 6.784594586820837e-06, - "loss": 0.0334, + "epoch": 4.328444060972102, + "grad_norm": 5.512724876403809, + "learning_rate": 7.906857836766868e-06, + "loss": 0.1826, "step": 15050 }, { - "epoch": 3.908879109500388, - "grad_norm": 6.463123321533203, - "learning_rate": 6.7702131331434985e-06, - "loss": 0.0309, + "epoch": 4.342824273799252, + "grad_norm": 6.489710807800293, + "learning_rate": 7.89620571379876e-06, + "loss": 0.1873, "step": 15100 }, { - "epoch": 3.921822417809992, - "grad_norm": 5.0641374588012695, - "learning_rate": 6.75583167946616e-06, - "loss": 0.0332, + "epoch": 4.357204486626402, + "grad_norm": 5.90891695022583, + "learning_rate": 7.885553590830654e-06, + "loss": 0.1675, "step": 15150 }, { - "epoch": 3.934765726119596, - "grad_norm": 2.091432809829712, - "learning_rate": 6.741450225788824e-06, - "loss": 0.0383, + "epoch": 4.371584699453552, + "grad_norm": 8.128609657287598, + "learning_rate": 7.874901467862546e-06, + "loss": 0.178, "step": 15200 }, { - "epoch": 3.9477090344292, - "grad_norm": 2.943432092666626, - "learning_rate": 6.727068772111486e-06, - "loss": 0.0541, + "epoch": 4.385964912280702, + "grad_norm": 6.603288173675537, + "learning_rate": 7.864249344894437e-06, + "loss": 0.1972, "step": 15250 }, { - "epoch": 3.960652342738804, - "grad_norm": 7.013586044311523, - "learning_rate": 6.712687318434148e-06, - "loss": 0.0281, + "epoch": 4.400345125107852, + "grad_norm": 9.97610855102539, + "learning_rate": 7.853597221926331e-06, + "loss": 0.1578, "step": 15300 }, { - "epoch": 3.973595651048408, - "grad_norm": 5.849566459655762, - "learning_rate": 6.69830586475681e-06, - "loss": 0.0343, + "epoch": 4.4147253379350015, + "grad_norm": 7.688840389251709, + "learning_rate": 7.842945098958223e-06, + "loss": 0.1868, "step": 15350 }, { - "epoch": 3.986538959358012, - "grad_norm": 8.706452369689941, - "learning_rate": 6.683924411079472e-06, - "loss": 0.0488, + "epoch": 4.429105550762151, + "grad_norm": 5.798449516296387, + "learning_rate": 7.832292975990116e-06, + "loss": 0.1832, "step": 15400 }, { - "epoch": 3.9994822676676156, - "grad_norm": 3.453444004058838, - "learning_rate": 6.669542957402134e-06, - "loss": 0.0232, + "epoch": 4.443485763589301, + "grad_norm": 5.3437819480896, + "learning_rate": 7.821640853022008e-06, + "loss": 0.1854, "step": 15450 }, { - "epoch": 4.0, - "eval_loss": 0.03156248852610588, - "eval_runtime": 89.3284, - "eval_samples_per_second": 5.597, - "eval_steps_per_second": 0.705, - "eval_wer": 4.15660753647528, - "step": 15452 - }, - { - "epoch": 4.01242557597722, - "grad_norm": 2.5590574741363525, - "learning_rate": 6.655161503724798e-06, - "loss": 0.0184, + "epoch": 4.457865976416451, + "grad_norm": 5.963441371917725, + "learning_rate": 7.8109887300539e-06, + "loss": 0.1807, "step": 15500 }, { - "epoch": 4.025368884286824, - "grad_norm": 0.7780801057815552, - "learning_rate": 6.6407800500474595e-06, - "loss": 0.0192, + "epoch": 4.4722461892436005, + "grad_norm": 5.895477771759033, + "learning_rate": 7.800336607085793e-06, + "loss": 0.1869, "step": 15550 }, { - "epoch": 4.038312192596428, - "grad_norm": 10.055984497070312, - "learning_rate": 6.626398596370121e-06, - "loss": 0.0238, + "epoch": 4.48662640207075, + "grad_norm": 9.634597778320312, + "learning_rate": 7.789684484117685e-06, + "loss": 0.1965, "step": 15600 }, { - "epoch": 4.051255500906032, - "grad_norm": 1.114243745803833, - "learning_rate": 6.612017142692784e-06, - "loss": 0.025, + "epoch": 4.5010066148979, + "grad_norm": 8.483346939086914, + "learning_rate": 7.779032361149577e-06, + "loss": 0.2081, "step": 15650 }, { - "epoch": 4.064198809215635, - "grad_norm": 3.681232452392578, - "learning_rate": 6.597635689015446e-06, - "loss": 0.02, + "epoch": 4.515386827725051, + "grad_norm": 9.691530227661133, + "learning_rate": 7.76838023818147e-06, + "loss": 0.1884, "step": 15700 }, { - "epoch": 4.07714211752524, - "grad_norm": 8.334362983703613, - "learning_rate": 6.583254235338109e-06, - "loss": 0.028, + "epoch": 4.5297670405522, + "grad_norm": 4.665674686431885, + "learning_rate": 7.757728115213362e-06, + "loss": 0.207, "step": 15750 }, { - "epoch": 4.090085425834843, - "grad_norm": 12.538928985595703, - "learning_rate": 6.568872781660771e-06, - "loss": 0.0223, + "epoch": 4.54414725337935, + "grad_norm": 8.650114059448242, + "learning_rate": 7.747075992245256e-06, + "loss": 0.183, "step": 15800 }, { - "epoch": 4.103028734144448, - "grad_norm": 4.050449371337891, - "learning_rate": 6.554491327983433e-06, - "loss": 0.0189, + "epoch": 4.5585274662065, + "grad_norm": 15.80134105682373, + "learning_rate": 7.736423869277148e-06, + "loss": 0.2165, "step": 15850 }, { - "epoch": 4.115972042454051, - "grad_norm": 1.1800466775894165, - "learning_rate": 6.540109874306095e-06, - "loss": 0.0205, + "epoch": 4.57290767903365, + "grad_norm": 6.1917595863342285, + "learning_rate": 7.72577174630904e-06, + "loss": 0.1897, "step": 15900 }, { - "epoch": 4.128915350763656, - "grad_norm": 5.886932373046875, - "learning_rate": 6.525728420628758e-06, - "loss": 0.0184, + "epoch": 4.587287891860799, + "grad_norm": 10.442479133605957, + "learning_rate": 7.715119623340931e-06, + "loss": 0.2089, "step": 15950 }, { - "epoch": 4.141858659073259, - "grad_norm": 4.0201497077941895, - "learning_rate": 6.51134696695142e-06, - "loss": 0.0188, + "epoch": 4.601668104687949, + "grad_norm": 8.971199035644531, + "learning_rate": 7.704467500372825e-06, + "loss": 0.1874, "step": 16000 }, { - "epoch": 4.154801967382863, - "grad_norm": 1.844307780265808, - "learning_rate": 6.4969655132740824e-06, - "loss": 0.0175, + "epoch": 4.6160483175151, + "grad_norm": 6.518112659454346, + "learning_rate": 7.693815377404717e-06, + "loss": 0.1992, "step": 16050 }, { - "epoch": 4.167745275692467, - "grad_norm": 3.237921953201294, - "learning_rate": 6.482584059596745e-06, - "loss": 0.0243, + "epoch": 4.630428530342249, + "grad_norm": 8.385542869567871, + "learning_rate": 7.68316325443661e-06, + "loss": 0.2166, "step": 16100 }, { - "epoch": 4.180688584002071, - "grad_norm": 1.648651123046875, - "learning_rate": 6.468202605919407e-06, - "loss": 0.0166, + "epoch": 4.644808743169399, + "grad_norm": 3.330723285675049, + "learning_rate": 7.672511131468502e-06, + "loss": 0.1674, "step": 16150 }, { - "epoch": 4.193631892311675, - "grad_norm": 5.503207206726074, - "learning_rate": 6.453821152242069e-06, - "loss": 0.0216, + "epoch": 4.659188955996549, + "grad_norm": 6.961243152618408, + "learning_rate": 7.662072050959756e-06, + "loss": 0.1608, "step": 16200 }, { - "epoch": 4.206575200621279, - "grad_norm": 5.58413553237915, - "learning_rate": 6.439439698564731e-06, - "loss": 0.0269, + "epoch": 4.673569168823699, + "grad_norm": 9.489914894104004, + "learning_rate": 7.65141992799165e-06, + "loss": 0.188, "step": 16250 }, { - "epoch": 4.219518508930883, - "grad_norm": 3.219493865966797, - "learning_rate": 6.4250582448873935e-06, - "loss": 0.0154, + "epoch": 4.687949381650848, + "grad_norm": 9.499531745910645, + "learning_rate": 7.640767805023543e-06, + "loss": 0.1744, "step": 16300 }, { - "epoch": 4.232461817240487, - "grad_norm": 7.937684059143066, - "learning_rate": 6.410676791210056e-06, - "loss": 0.0177, + "epoch": 4.702329594477998, + "grad_norm": 6.8072357177734375, + "learning_rate": 7.630115682055435e-06, + "loss": 0.2042, "step": 16350 }, { - "epoch": 4.24540512555009, - "grad_norm": 4.213293552398682, - "learning_rate": 6.396295337532719e-06, - "loss": 0.0202, + "epoch": 4.716709807305148, + "grad_norm": 8.239463806152344, + "learning_rate": 7.619463559087327e-06, + "loss": 0.1809, "step": 16400 }, { - "epoch": 4.258348433859695, - "grad_norm": 3.651660680770874, - "learning_rate": 6.381913883855381e-06, - "loss": 0.0213, + "epoch": 4.731090020132298, + "grad_norm": 6.3425774574279785, + "learning_rate": 7.6088114361192185e-06, + "loss": 0.2196, "step": 16450 }, { - "epoch": 4.271291742169298, - "grad_norm": 5.593703746795654, - "learning_rate": 6.367532430178043e-06, - "loss": 0.0158, + "epoch": 4.745470232959448, + "grad_norm": 8.281980514526367, + "learning_rate": 7.598159313151111e-06, + "loss": 0.1765, "step": 16500 }, { - "epoch": 4.284235050478903, - "grad_norm": 8.395854949951172, - "learning_rate": 6.3531509765007045e-06, - "loss": 0.0203, + "epoch": 4.759850445786598, + "grad_norm": 8.303557395935059, + "learning_rate": 7.587507190183005e-06, + "loss": 0.1871, "step": 16550 }, { - "epoch": 4.297178358788506, - "grad_norm": 5.291663646697998, - "learning_rate": 6.338769522823367e-06, - "loss": 0.0182, + "epoch": 4.774230658613748, + "grad_norm": 5.258712291717529, + "learning_rate": 7.5768550672148965e-06, + "loss": 0.1778, "step": 16600 }, { - "epoch": 4.310121667098111, - "grad_norm": 2.6728503704071045, - "learning_rate": 6.32438806914603e-06, - "loss": 0.0147, + "epoch": 4.7886108714408975, + "grad_norm": 10.266637802124023, + "learning_rate": 7.566202944246789e-06, + "loss": 0.1564, "step": 16650 }, { - "epoch": 4.323064975407714, - "grad_norm": 2.701340675354004, - "learning_rate": 6.310006615468693e-06, - "loss": 0.032, + "epoch": 4.802991084268047, + "grad_norm": 11.076053619384766, + "learning_rate": 7.555550821278681e-06, + "loss": 0.2155, "step": 16700 }, { - "epoch": 4.336008283717318, - "grad_norm": 5.171128273010254, - "learning_rate": 6.2956251617913545e-06, - "loss": 0.0182, + "epoch": 4.817371297095197, + "grad_norm": 17.03055763244629, + "learning_rate": 7.5448986983105745e-06, + "loss": 0.2128, "step": 16750 }, { - "epoch": 4.348951592026922, - "grad_norm": 8.16347885131836, - "learning_rate": 6.281243708114016e-06, - "loss": 0.0359, + "epoch": 4.831751509922347, + "grad_norm": 6.175199508666992, + "learning_rate": 7.534246575342466e-06, + "loss": 0.2263, "step": 16800 }, { - "epoch": 4.361894900336526, - "grad_norm": 6.198201656341553, - "learning_rate": 6.266862254436678e-06, - "loss": 0.0261, + "epoch": 4.846131722749496, + "grad_norm": 4.761529445648193, + "learning_rate": 7.523594452374359e-06, + "loss": 0.1777, "step": 16850 }, { - "epoch": 4.37483820864613, - "grad_norm": 5.718491077423096, - "learning_rate": 6.252480800759341e-06, - "loss": 0.0193, + "epoch": 4.860511935576646, + "grad_norm": 6.692610263824463, + "learning_rate": 7.513155371865614e-06, + "loss": 0.1988, "step": 16900 }, { - "epoch": 4.387781516955734, - "grad_norm": 8.977401733398438, - "learning_rate": 6.238099347082004e-06, - "loss": 0.021, + "epoch": 4.874892148403797, + "grad_norm": 10.397446632385254, + "learning_rate": 7.502503248897506e-06, + "loss": 0.1994, "step": 16950 }, { - "epoch": 4.400724825265338, - "grad_norm": 2.3729536533355713, - "learning_rate": 6.223717893404666e-06, - "loss": 0.0207, + "epoch": 4.8892723612309466, + "grad_norm": 5.788293838500977, + "learning_rate": 7.491851125929398e-06, + "loss": 0.1767, "step": 17000 }, { - "epoch": 4.413668133574942, - "grad_norm": 3.5691659450531006, - "learning_rate": 6.209336439727328e-06, - "loss": 0.0256, + "epoch": 4.903652574058096, + "grad_norm": 8.58465576171875, + "learning_rate": 7.481199002961291e-06, + "loss": 0.1768, "step": 17050 }, { - "epoch": 4.426611441884546, - "grad_norm": 3.9508790969848633, - "learning_rate": 6.195242615123537e-06, - "loss": 0.0198, + "epoch": 4.918032786885246, + "grad_norm": 6.761914253234863, + "learning_rate": 7.470546879993184e-06, + "loss": 0.1976, "step": 17100 }, { - "epoch": 4.43955475019415, - "grad_norm": 6.141160488128662, - "learning_rate": 6.1808611614462e-06, - "loss": 0.0187, + "epoch": 4.932412999712396, + "grad_norm": 3.966207265853882, + "learning_rate": 7.4598947570250754e-06, + "loss": 0.2133, "step": 17150 }, { - "epoch": 4.452498058503753, - "grad_norm": 0.2836528420448303, - "learning_rate": 6.166479707768862e-06, - "loss": 0.0187, + "epoch": 4.9467932125395455, + "grad_norm": 9.118393898010254, + "learning_rate": 7.449242634056968e-06, + "loss": 0.1913, "step": 17200 }, { - "epoch": 4.465441366813358, - "grad_norm": 6.005315780639648, - "learning_rate": 6.152098254091524e-06, - "loss": 0.0261, + "epoch": 4.961173425366695, + "grad_norm": 7.876191139221191, + "learning_rate": 7.43859051108886e-06, + "loss": 0.1956, "step": 17250 }, { - "epoch": 4.478384675122961, - "grad_norm": 2.222322702407837, - "learning_rate": 6.137716800414186e-06, - "loss": 0.0183, + "epoch": 4.975553638193845, + "grad_norm": 6.255248546600342, + "learning_rate": 7.427938388120753e-06, + "loss": 0.1809, "step": 17300 }, { - "epoch": 4.491327983432566, - "grad_norm": 4.63626766204834, - "learning_rate": 6.123335346736849e-06, - "loss": 0.0209, + "epoch": 4.989933851020995, + "grad_norm": 6.979002475738525, + "learning_rate": 7.417286265152645e-06, + "loss": 0.1923, "step": 17350 }, { - "epoch": 4.504271291742169, - "grad_norm": 1.04603111743927, - "learning_rate": 6.108953893059511e-06, - "loss": 0.0343, + "epoch": 5.0, + "eval_cer": 13.269267061622775, + "eval_exact_match": 30.41087026852151, + "eval_loss": 0.382281094789505, + "eval_runtime": 670.1921, + "eval_samples_per_second": 4.612, + "eval_steps_per_second": 0.577, + "eval_wer": 29.814983318168032, + "step": 17385 + }, + { + "epoch": 5.004314063848145, + "grad_norm": 6.436083793640137, + "learning_rate": 7.406634142184538e-06, + "loss": 0.1804, "step": 17400 }, { - "epoch": 4.517214600051773, - "grad_norm": 0.18591836094856262, - "learning_rate": 6.094572439382173e-06, - "loss": 0.0205, + "epoch": 5.018694276675295, + "grad_norm": 5.177228927612305, + "learning_rate": 7.39598201921643e-06, + "loss": 0.1212, "step": 17450 }, { - "epoch": 4.530157908361377, - "grad_norm": 4.46800422668457, - "learning_rate": 6.080190985704836e-06, - "loss": 0.0307, + "epoch": 5.033074489502445, + "grad_norm": 5.305710792541504, + "learning_rate": 7.385329896248323e-06, + "loss": 0.1539, "step": 17500 }, { - "epoch": 4.543101216670981, - "grad_norm": 2.5701541900634766, - "learning_rate": 6.065809532027498e-06, - "loss": 0.0201, + "epoch": 5.047454702329595, + "grad_norm": 6.128417491912842, + "learning_rate": 7.374677773280216e-06, + "loss": 0.1337, "step": 17550 }, { - "epoch": 4.556044524980585, - "grad_norm": 3.805527448654175, - "learning_rate": 6.05142807835016e-06, - "loss": 0.0247, + "epoch": 5.061834915156744, + "grad_norm": 3.904686689376831, + "learning_rate": 7.364025650312108e-06, + "loss": 0.1316, "step": 17600 }, { - "epoch": 4.568987833290189, - "grad_norm": 5.005966663360596, - "learning_rate": 6.037046624672822e-06, - "loss": 0.0166, + "epoch": 5.076215127983894, + "grad_norm": 6.186351776123047, + "learning_rate": 7.3533735273439995e-06, + "loss": 0.1392, "step": 17650 }, { - "epoch": 4.581931141599793, - "grad_norm": 2.1261184215545654, - "learning_rate": 6.022665170995484e-06, - "loss": 0.0192, + "epoch": 5.090595340811044, + "grad_norm": 4.8498921394348145, + "learning_rate": 7.342721404375893e-06, + "loss": 0.1321, "step": 17700 }, { - "epoch": 4.594874449909397, - "grad_norm": 3.359769582748413, - "learning_rate": 6.008283717318147e-06, - "loss": 0.0186, + "epoch": 5.1049755536381936, + "grad_norm": 6.682563781738281, + "learning_rate": 7.332069281407786e-06, + "loss": 0.1203, "step": 17750 }, { - "epoch": 4.607817758219001, - "grad_norm": 4.658329010009766, - "learning_rate": 5.99390226364081e-06, - "loss": 0.0156, + "epoch": 5.119355766465343, + "grad_norm": 4.511005878448486, + "learning_rate": 7.3214171584396775e-06, + "loss": 0.1096, "step": 17800 }, { - "epoch": 4.620761066528605, - "grad_norm": 1.4093743562698364, - "learning_rate": 5.979520809963472e-06, - "loss": 0.0249, + "epoch": 5.133735979292494, + "grad_norm": 6.848805904388428, + "learning_rate": 7.31076503547157e-06, + "loss": 0.1351, "step": 17850 }, { - "epoch": 4.633704374838208, - "grad_norm": 7.107546806335449, - "learning_rate": 5.9651393562861336e-06, - "loss": 0.019, + "epoch": 5.148116192119644, + "grad_norm": 5.430601596832275, + "learning_rate": 7.300112912503462e-06, + "loss": 0.1479, "step": 17900 }, { - "epoch": 4.646647683147813, - "grad_norm": 3.7134788036346436, - "learning_rate": 5.9507579026087954e-06, - "loss": 0.0316, + "epoch": 5.162496404946793, + "grad_norm": 4.823909759521484, + "learning_rate": 7.2894607895353555e-06, + "loss": 0.1428, "step": 17950 }, { - "epoch": 4.659590991457416, - "grad_norm": 9.994954109191895, - "learning_rate": 5.936376448931458e-06, - "loss": 0.021, + "epoch": 5.176876617773943, + "grad_norm": 6.366703510284424, + "learning_rate": 7.278808666567247e-06, + "loss": 0.1712, "step": 18000 }, { - "epoch": 4.672534299767021, - "grad_norm": 5.7871527671813965, - "learning_rate": 5.921994995254121e-06, - "loss": 0.0201, + "epoch": 5.191256830601093, + "grad_norm": 3.8682894706726074, + "learning_rate": 7.26815654359914e-06, + "loss": 0.1464, "step": 18050 }, { - "epoch": 4.685477608076624, - "grad_norm": 4.141567230224609, - "learning_rate": 5.9076135415767836e-06, - "loss": 0.0341, + "epoch": 5.205637043428243, + "grad_norm": 19.300844192504883, + "learning_rate": 7.257504420631032e-06, + "loss": 0.144, "step": 18100 }, { - "epoch": 4.698420916386228, - "grad_norm": 3.703082799911499, - "learning_rate": 5.8932320878994454e-06, - "loss": 0.0209, + "epoch": 5.220017256255392, + "grad_norm": 9.907816886901855, + "learning_rate": 7.246852297662925e-06, + "loss": 0.1462, "step": 18150 }, { - "epoch": 4.711364224695832, - "grad_norm": 1.5418035984039307, - "learning_rate": 5.878850634222107e-06, - "loss": 0.0205, + "epoch": 5.234397469082542, + "grad_norm": 6.529069900512695, + "learning_rate": 7.236200174694817e-06, + "loss": 0.1598, "step": 18200 }, { - "epoch": 4.724307533005436, - "grad_norm": 2.696366310119629, - "learning_rate": 5.864469180544769e-06, - "loss": 0.0322, + "epoch": 5.248777681909692, + "grad_norm": 5.564172744750977, + "learning_rate": 7.22554805172671e-06, + "loss": 0.145, "step": 18250 }, { - "epoch": 4.73725084131504, - "grad_norm": 12.889842987060547, - "learning_rate": 5.850087726867433e-06, - "loss": 0.0299, + "epoch": 5.2631578947368425, + "grad_norm": 7.95921516418457, + "learning_rate": 7.214895928758602e-06, + "loss": 0.1243, "step": 18300 }, { - "epoch": 4.750194149624644, - "grad_norm": 14.697345733642578, - "learning_rate": 5.835706273190095e-06, - "loss": 0.021, + "epoch": 5.277538107563992, + "grad_norm": 5.626272201538086, + "learning_rate": 7.204243805790495e-06, + "loss": 0.133, "step": 18350 }, { - "epoch": 4.763137457934248, - "grad_norm": 14.185206413269043, - "learning_rate": 5.821324819512757e-06, - "loss": 0.028, + "epoch": 5.291918320391142, + "grad_norm": 6.12129545211792, + "learning_rate": 7.193591682822387e-06, + "loss": 0.1331, "step": 18400 }, { - "epoch": 4.776080766243852, - "grad_norm": 1.6946377754211426, - "learning_rate": 5.806943365835419e-06, - "loss": 0.0407, + "epoch": 5.306298533218292, + "grad_norm": 6.381573677062988, + "learning_rate": 7.18293955985428e-06, + "loss": 0.1425, "step": 18450 }, { - "epoch": 4.789024074553456, - "grad_norm": 0.4647742509841919, - "learning_rate": 5.792561912158081e-06, - "loss": 0.0245, + "epoch": 5.320678746045441, + "grad_norm": 4.725672721862793, + "learning_rate": 7.1722874368861715e-06, + "loss": 0.1423, "step": 18500 }, { - "epoch": 4.80196738286306, - "grad_norm": 0.9601898193359375, - "learning_rate": 5.778180458480743e-06, - "loss": 0.023, + "epoch": 5.335058958872591, + "grad_norm": 5.827751159667969, + "learning_rate": 7.161635313918065e-06, + "loss": 0.1448, "step": 18550 }, { - "epoch": 4.814910691172663, - "grad_norm": 8.700115203857422, - "learning_rate": 5.7637990048034065e-06, - "loss": 0.0197, + "epoch": 5.349439171699741, + "grad_norm": 6.432129859924316, + "learning_rate": 7.150983190949957e-06, + "loss": 0.144, "step": 18600 }, { - "epoch": 4.827853999482268, - "grad_norm": 0.46070945262908936, - "learning_rate": 5.749417551126068e-06, - "loss": 0.0207, + "epoch": 5.363819384526891, + "grad_norm": 5.147676944732666, + "learning_rate": 7.1403310679818495e-06, + "loss": 0.1156, "step": 18650 }, { - "epoch": 4.840797307791871, - "grad_norm": 3.087283134460449, - "learning_rate": 5.735036097448731e-06, - "loss": 0.0228, + "epoch": 5.37819959735404, + "grad_norm": 5.276687145233154, + "learning_rate": 7.129678945013741e-06, + "loss": 0.137, "step": 18700 }, { - "epoch": 4.853740616101476, - "grad_norm": 2.9574930667877197, - "learning_rate": 5.720654643771393e-06, - "loss": 0.0251, + "epoch": 5.392579810181191, + "grad_norm": 7.534707069396973, + "learning_rate": 7.119026822045634e-06, + "loss": 0.1195, "step": 18750 }, { - "epoch": 4.866683924411079, - "grad_norm": 0.7309446334838867, - "learning_rate": 5.706273190094055e-06, - "loss": 0.0209, + "epoch": 5.406960023008341, + "grad_norm": 4.414114952087402, + "learning_rate": 7.1083746990775275e-06, + "loss": 0.142, "step": 18800 }, { - "epoch": 4.879627232720684, - "grad_norm": 3.813610553741455, - "learning_rate": 5.691891736416717e-06, - "loss": 0.0179, + "epoch": 5.4213402358354905, + "grad_norm": 10.028677940368652, + "learning_rate": 7.097722576109419e-06, + "loss": 0.1355, "step": 18850 }, { - "epoch": 4.892570541030287, - "grad_norm": 6.62191104888916, - "learning_rate": 5.67751028273938e-06, - "loss": 0.0229, + "epoch": 5.43572044866264, + "grad_norm": 8.05347728729248, + "learning_rate": 7.087070453141311e-06, + "loss": 0.1443, "step": 18900 }, { - "epoch": 4.905513849339892, - "grad_norm": 1.3516626358032227, - "learning_rate": 5.663128829062042e-06, - "loss": 0.0202, + "epoch": 5.45010066148979, + "grad_norm": 6.495317459106445, + "learning_rate": 7.076418330173204e-06, + "loss": 0.145, "step": 18950 }, { - "epoch": 4.918457157649495, - "grad_norm": 8.537408828735352, - "learning_rate": 5.648747375384705e-06, - "loss": 0.0219, + "epoch": 5.46448087431694, + "grad_norm": 10.462991714477539, + "learning_rate": 7.065766207205097e-06, + "loss": 0.156, "step": 19000 }, { - "epoch": 4.931400465959099, - "grad_norm": 7.586127758026123, - "learning_rate": 5.634365921707367e-06, - "loss": 0.0263, + "epoch": 5.4788610871440895, + "grad_norm": 8.963197708129883, + "learning_rate": 7.055114084236989e-06, + "loss": 0.1627, "step": 19050 }, { - "epoch": 4.944343774268703, - "grad_norm": 2.9053454399108887, - "learning_rate": 5.6199844680300286e-06, - "loss": 0.0173, + "epoch": 5.493241299971239, + "grad_norm": 8.749103546142578, + "learning_rate": 7.044461961268882e-06, + "loss": 0.1396, "step": 19100 }, { - "epoch": 4.957287082578307, - "grad_norm": 3.9602510929107666, - "learning_rate": 5.60560301435269e-06, - "loss": 0.0216, + "epoch": 5.507621512798389, + "grad_norm": 5.2716569900512695, + "learning_rate": 7.0338098383007736e-06, + "loss": 0.1304, "step": 19150 }, { - "epoch": 4.970230390887911, - "grad_norm": 8.381765365600586, - "learning_rate": 5.591221560675354e-06, - "loss": 0.0213, + "epoch": 5.52200172562554, + "grad_norm": 9.617664337158203, + "learning_rate": 7.023157715332667e-06, + "loss": 0.1444, "step": 19200 }, { - "epoch": 4.983173699197515, - "grad_norm": 3.1814215183258057, - "learning_rate": 5.577127736071563e-06, - "loss": 0.028, + "epoch": 5.536381938452689, + "grad_norm": 5.282269477844238, + "learning_rate": 7.012505592364559e-06, + "loss": 0.1331, "step": 19250 }, { - "epoch": 4.996117007507118, - "grad_norm": 9.668580055236816, - "learning_rate": 5.5627462823942245e-06, - "loss": 0.0346, + "epoch": 5.550762151279839, + "grad_norm": 11.784239768981934, + "learning_rate": 7.0018534693964516e-06, + "loss": 0.1672, "step": 19300 }, { - "epoch": 5.0, - "eval_loss": 0.019977210089564323, - "eval_runtime": 88.6374, - "eval_samples_per_second": 5.641, - "eval_steps_per_second": 0.711, - "eval_wer": 3.1069591686784928, - "step": 19315 - }, - { - "epoch": 5.009060315816723, - "grad_norm": 0.802038848400116, - "learning_rate": 5.548364828716886e-06, - "loss": 0.0186, + "epoch": 5.565142364106989, + "grad_norm": 2.5350866317749023, + "learning_rate": 6.991201346428343e-06, + "loss": 0.1092, "step": 19350 }, { - "epoch": 5.022003624126326, - "grad_norm": 3.1706273555755615, - "learning_rate": 5.53398337503955e-06, - "loss": 0.0236, + "epoch": 5.5795225769341386, + "grad_norm": 8.985469818115234, + "learning_rate": 6.980549223460236e-06, + "loss": 0.1484, "step": 19400 }, { - "epoch": 5.034946932435931, - "grad_norm": 3.518181562423706, - "learning_rate": 5.519601921362212e-06, - "loss": 0.0136, + "epoch": 5.593902789761288, + "grad_norm": 7.215806484222412, + "learning_rate": 6.969897100492129e-06, + "loss": 0.1468, "step": 19450 }, { - "epoch": 5.047890240745534, - "grad_norm": 4.777484893798828, - "learning_rate": 5.50550809675842e-06, - "loss": 0.012, + "epoch": 5.608283002588438, + "grad_norm": 4.80800199508667, + "learning_rate": 6.959244977524021e-06, + "loss": 0.1137, "step": 19500 }, { - "epoch": 5.060833549055139, - "grad_norm": 16.247772216796875, - "learning_rate": 5.491126643081083e-06, - "loss": 0.0107, + "epoch": 5.622663215415588, + "grad_norm": 6.728760719299316, + "learning_rate": 6.948592854555913e-06, + "loss": 0.1317, "step": 19550 }, { - "epoch": 5.073776857364742, - "grad_norm": 1.9312938451766968, - "learning_rate": 5.476745189403745e-06, - "loss": 0.0086, + "epoch": 5.6370434282427375, + "grad_norm": 4.241484642028809, + "learning_rate": 6.937940731587806e-06, + "loss": 0.1471, "step": 19600 }, { - "epoch": 5.086720165674346, - "grad_norm": 0.5916788578033447, - "learning_rate": 5.4623637357264085e-06, - "loss": 0.0128, + "epoch": 5.651423641069888, + "grad_norm": 4.9774274826049805, + "learning_rate": 6.9272886086196985e-06, + "loss": 0.1384, "step": 19650 }, { - "epoch": 5.09966347398395, - "grad_norm": 0.7647702097892761, - "learning_rate": 5.44798228204907e-06, - "loss": 0.0127, + "epoch": 5.665803853897038, + "grad_norm": 10.97520923614502, + "learning_rate": 6.916636485651591e-06, + "loss": 0.1534, "step": 19700 }, { - "epoch": 5.112606782293554, - "grad_norm": 1.958817481994629, - "learning_rate": 5.433600828371732e-06, - "loss": 0.0166, + "epoch": 5.680184066724188, + "grad_norm": 7.5881757736206055, + "learning_rate": 6.905984362683483e-06, + "loss": 0.1406, "step": 19750 }, { - "epoch": 5.125550090603158, - "grad_norm": 5.420085430145264, - "learning_rate": 5.419219374694394e-06, - "loss": 0.0115, + "epoch": 5.694564279551337, + "grad_norm": 7.9618706703186035, + "learning_rate": 6.895332239715376e-06, + "loss": 0.1472, "step": 19800 }, { - "epoch": 5.138493398912762, - "grad_norm": 4.039155960083008, - "learning_rate": 5.404837921017057e-06, - "loss": 0.0099, + "epoch": 5.708944492378487, + "grad_norm": 5.872225761413574, + "learning_rate": 6.884680116747269e-06, + "loss": 0.1375, "step": 19850 }, { - "epoch": 5.151436707222366, - "grad_norm": 3.975069999694824, - "learning_rate": 5.390456467339719e-06, - "loss": 0.0211, + "epoch": 5.723324705205637, + "grad_norm": 7.904740333557129, + "learning_rate": 6.874027993779161e-06, + "loss": 0.121, "step": 19900 }, { - "epoch": 5.16438001553197, - "grad_norm": 2.953425168991089, - "learning_rate": 5.376075013662381e-06, - "loss": 0.0131, + "epoch": 5.737704918032787, + "grad_norm": 4.407808780670166, + "learning_rate": 6.863375870811053e-06, + "loss": 0.143, "step": 19950 }, { - "epoch": 5.1773233238415735, - "grad_norm": 0.4892643392086029, - "learning_rate": 5.361693559985044e-06, - "loss": 0.0107, + "epoch": 5.752085130859937, + "grad_norm": 5.364521503448486, + "learning_rate": 6.8527237478429455e-06, + "loss": 0.1235, "step": 20000 }, { - "epoch": 5.190266632151178, - "grad_norm": 0.9816207885742188, - "learning_rate": 5.347312106307706e-06, - "loss": 0.0119, + "epoch": 5.766465343687086, + "grad_norm": 6.775603771209717, + "learning_rate": 6.842071624874839e-06, + "loss": 0.144, "step": 20050 }, { - "epoch": 5.2032099404607814, - "grad_norm": 0.6787395477294922, - "learning_rate": 5.332930652630368e-06, - "loss": 0.0107, + "epoch": 5.780845556514237, + "grad_norm": 5.694010257720947, + "learning_rate": 6.831419501906731e-06, + "loss": 0.1615, "step": 20100 }, { - "epoch": 5.216153248770386, - "grad_norm": 2.689342737197876, - "learning_rate": 5.318549198953031e-06, - "loss": 0.0118, + "epoch": 5.7952257693413864, + "grad_norm": 9.528105735778809, + "learning_rate": 6.820767378938623e-06, + "loss": 0.1348, "step": 20150 }, { - "epoch": 5.229096557079989, - "grad_norm": 1.7226523160934448, - "learning_rate": 5.304167745275693e-06, - "loss": 0.0147, + "epoch": 5.809605982168536, + "grad_norm": 5.058722972869873, + "learning_rate": 6.810115255970515e-06, + "loss": 0.1134, "step": 20200 }, { - "epoch": 5.242039865389594, - "grad_norm": 0.6867370009422302, - "learning_rate": 5.289786291598355e-06, - "loss": 0.0091, + "epoch": 5.823986194995686, + "grad_norm": 8.179247856140137, + "learning_rate": 6.799463133002407e-06, + "loss": 0.1532, "step": 20250 }, { - "epoch": 5.254983173699197, - "grad_norm": 2.8953654766082764, - "learning_rate": 5.275404837921018e-06, - "loss": 0.0105, + "epoch": 5.838366407822836, + "grad_norm": 7.784989356994629, + "learning_rate": 6.788811010034301e-06, + "loss": 0.1186, "step": 20300 }, { - "epoch": 5.267926482008802, - "grad_norm": 6.105691432952881, - "learning_rate": 5.26102338424368e-06, - "loss": 0.0114, + "epoch": 5.852746620649985, + "grad_norm": 7.662126064300537, + "learning_rate": 6.778158887066193e-06, + "loss": 0.1244, "step": 20350 }, { - "epoch": 5.280869790318405, - "grad_norm": 2.8763232231140137, - "learning_rate": 5.246641930566342e-06, - "loss": 0.0152, + "epoch": 5.867126833477135, + "grad_norm": 10.274040222167969, + "learning_rate": 6.767506764098085e-06, + "loss": 0.1177, "step": 20400 }, { - "epoch": 5.293813098628009, - "grad_norm": 0.7701404094696045, - "learning_rate": 5.232260476889004e-06, - "loss": 0.0136, + "epoch": 5.881507046304286, + "grad_norm": 8.004316329956055, + "learning_rate": 6.756854641129977e-06, + "loss": 0.1703, "step": 20450 }, { - "epoch": 5.306756406937613, - "grad_norm": 5.06765604019165, - "learning_rate": 5.217879023211667e-06, - "loss": 0.0118, + "epoch": 5.8958872591314355, + "grad_norm": 11.958636283874512, + "learning_rate": 6.7462025181618705e-06, + "loss": 0.1279, "step": 20500 }, { - "epoch": 5.319699715247217, - "grad_norm": 0.8552833795547485, - "learning_rate": 5.203497569534329e-06, - "loss": 0.0152, + "epoch": 5.910267471958585, + "grad_norm": 7.620950698852539, + "learning_rate": 6.735550395193763e-06, + "loss": 0.1494, "step": 20550 }, { - "epoch": 5.332643023556821, - "grad_norm": 8.169344902038574, - "learning_rate": 5.189116115856992e-06, - "loss": 0.0137, + "epoch": 5.924647684785735, + "grad_norm": 6.0643439292907715, + "learning_rate": 6.724898272225655e-06, + "loss": 0.1367, "step": 20600 }, { - "epoch": 5.345586331866425, - "grad_norm": 2.8536713123321533, - "learning_rate": 5.1747346621796535e-06, - "loss": 0.0179, + "epoch": 5.939027897612885, + "grad_norm": 6.858212471008301, + "learning_rate": 6.714246149257548e-06, + "loss": 0.1566, "step": 20650 }, { - "epoch": 5.358529640176029, - "grad_norm": 4.006629943847656, - "learning_rate": 5.160353208502315e-06, - "loss": 0.0148, + "epoch": 5.9534081104400345, + "grad_norm": 6.515738010406494, + "learning_rate": 6.70359402628944e-06, + "loss": 0.1564, "step": 20700 }, { - "epoch": 5.371472948485633, - "grad_norm": 2.0308613777160645, - "learning_rate": 5.145971754824978e-06, - "loss": 0.0096, + "epoch": 5.967788323267184, + "grad_norm": 5.500386714935303, + "learning_rate": 6.692941903321333e-06, + "loss": 0.1369, "step": 20750 }, { - "epoch": 5.3844162567952365, - "grad_norm": 0.8264743089675903, - "learning_rate": 5.131590301147641e-06, - "loss": 0.012, + "epoch": 5.982168536094334, + "grad_norm": 9.36228084564209, + "learning_rate": 6.682289780353225e-06, + "loss": 0.1471, "step": 20800 }, { - "epoch": 5.397359565104841, - "grad_norm": 1.9350370168685913, - "learning_rate": 5.117208847470303e-06, - "loss": 0.021, + "epoch": 5.996548748921484, + "grad_norm": 7.526278495788574, + "learning_rate": 6.671637657385117e-06, + "loss": 0.1467, "step": 20850 }, { - "epoch": 5.4103028734144445, - "grad_norm": 3.08841872215271, - "learning_rate": 5.102827393792965e-06, - "loss": 0.01, + "epoch": 6.0, + "eval_cer": 13.272967462376176, + "eval_exact_match": 29.699126496279522, + "eval_loss": 0.3985883593559265, + "eval_runtime": 682.6938, + "eval_samples_per_second": 4.528, + "eval_steps_per_second": 0.567, + "eval_wer": 29.863849290600886, + "step": 20862 + }, + { + "epoch": 6.0109289617486334, + "grad_norm": 7.559967041015625, + "learning_rate": 6.660985534417009e-06, + "loss": 0.0903, "step": 20900 }, { - "epoch": 5.423246181724049, - "grad_norm": 3.0373833179473877, - "learning_rate": 5.088445940115627e-06, - "loss": 0.0123, + "epoch": 6.025309174575784, + "grad_norm": 5.093621730804443, + "learning_rate": 6.650333411448903e-06, + "loss": 0.0905, "step": 20950 }, { - "epoch": 5.4361894900336525, - "grad_norm": 0.12145110964775085, - "learning_rate": 5.074064486438289e-06, - "loss": 0.0119, + "epoch": 6.039689387402934, + "grad_norm": 2.763209581375122, + "learning_rate": 6.6396812884807946e-06, + "loss": 0.0865, "step": 21000 }, { - "epoch": 5.449132798343257, - "grad_norm": 2.9393069744110107, - "learning_rate": 5.059683032760951e-06, - "loss": 0.0115, + "epoch": 6.054069600230084, + "grad_norm": 5.117705345153809, + "learning_rate": 6.629029165512687e-06, + "loss": 0.0836, "step": 21050 }, { - "epoch": 5.4620761066528605, - "grad_norm": 0.9441426992416382, - "learning_rate": 5.0453015790836146e-06, - "loss": 0.0127, + "epoch": 6.068449813057233, + "grad_norm": 4.683279514312744, + "learning_rate": 6.618377042544579e-06, + "loss": 0.1157, "step": 21100 }, { - "epoch": 5.475019414962464, - "grad_norm": 1.0003094673156738, - "learning_rate": 5.030920125406276e-06, - "loss": 0.0166, + "epoch": 6.082830025884383, + "grad_norm": 5.123689651489258, + "learning_rate": 6.6077249195764726e-06, + "loss": 0.1068, "step": 21150 }, { - "epoch": 5.4879627232720685, - "grad_norm": 3.650327682495117, - "learning_rate": 5.016538671728939e-06, - "loss": 0.012, + "epoch": 6.097210238711533, + "grad_norm": 3.3333637714385986, + "learning_rate": 6.597072796608364e-06, + "loss": 0.0927, "step": 21200 }, { - "epoch": 5.500906031581672, - "grad_norm": 2.2948007583618164, - "learning_rate": 5.002157218051601e-06, - "loss": 0.0204, + "epoch": 6.1115904515386825, + "grad_norm": 8.096817016601562, + "learning_rate": 6.586420673640257e-06, + "loss": 0.1283, "step": 21250 }, { - "epoch": 5.5138493398912765, - "grad_norm": 1.089821219444275, - "learning_rate": 4.987775764374264e-06, - "loss": 0.0087, + "epoch": 6.125970664365832, + "grad_norm": 2.0491201877593994, + "learning_rate": 6.575768550672149e-06, + "loss": 0.0854, "step": 21300 }, { - "epoch": 5.52679264820088, - "grad_norm": 2.964750051498413, - "learning_rate": 4.973394310696926e-06, - "loss": 0.009, + "epoch": 6.140350877192983, + "grad_norm": 5.6842522621154785, + "learning_rate": 6.565116427704042e-06, + "loss": 0.0874, "step": 21350 }, { - "epoch": 5.5397359565104844, - "grad_norm": 2.492793083190918, - "learning_rate": 4.9590128570195875e-06, - "loss": 0.013, + "epoch": 6.154731090020133, + "grad_norm": 3.7879271507263184, + "learning_rate": 6.554464304735935e-06, + "loss": 0.1081, "step": 21400 }, { - "epoch": 5.552679264820088, - "grad_norm": 18.94529151916504, - "learning_rate": 4.94463140334225e-06, - "loss": 0.0217, + "epoch": 6.169111302847282, + "grad_norm": 5.444622993469238, + "learning_rate": 6.543812181767827e-06, + "loss": 0.0984, "step": 21450 }, { - "epoch": 5.565622573129692, - "grad_norm": 0.21670395135879517, - "learning_rate": 4.930249949664913e-06, - "loss": 0.0289, + "epoch": 6.183491515674432, + "grad_norm": 6.1683220863342285, + "learning_rate": 6.533160058799719e-06, + "loss": 0.1284, "step": 21500 }, { - "epoch": 5.578565881439296, - "grad_norm": 1.121397852897644, - "learning_rate": 4.915868495987575e-06, - "loss": 0.0089, + "epoch": 6.197871728501582, + "grad_norm": 6.120113372802734, + "learning_rate": 6.522507935831611e-06, + "loss": 0.0873, "step": 21550 }, { - "epoch": 5.5915091897488995, - "grad_norm": 2.1545515060424805, - "learning_rate": 4.9014870423102375e-06, - "loss": 0.015, + "epoch": 6.212251941328732, + "grad_norm": 4.103006839752197, + "learning_rate": 6.511855812863505e-06, + "loss": 0.1003, "step": 21600 }, { - "epoch": 5.604452498058504, - "grad_norm": 13.737982749938965, - "learning_rate": 4.887105588632899e-06, - "loss": 0.0217, + "epoch": 6.226632154155881, + "grad_norm": 5.822710990905762, + "learning_rate": 6.501416732354758e-06, + "loss": 0.1237, "step": 21650 }, { - "epoch": 5.6173958063681075, - "grad_norm": 3.22420072555542, - "learning_rate": 4.872724134955561e-06, - "loss": 0.0095, + "epoch": 6.241012366983031, + "grad_norm": 7.072727680206299, + "learning_rate": 6.4907646093866515e-06, + "loss": 0.1167, "step": 21700 }, { - "epoch": 5.630339114677712, - "grad_norm": 2.609224557876587, - "learning_rate": 4.858342681278224e-06, - "loss": 0.0156, + "epoch": 6.255392579810181, + "grad_norm": 7.321803092956543, + "learning_rate": 6.480112486418544e-06, + "loss": 0.0953, "step": 21750 }, { - "epoch": 5.6432824229873155, - "grad_norm": 4.490423679351807, - "learning_rate": 4.843961227600887e-06, - "loss": 0.0101, + "epoch": 6.2697727926373314, + "grad_norm": 3.972445011138916, + "learning_rate": 6.469460363450436e-06, + "loss": 0.0835, "step": 21800 }, { - "epoch": 5.656225731296919, - "grad_norm": 3.0184295177459717, - "learning_rate": 4.8295797739235485e-06, - "loss": 0.0126, + "epoch": 6.284153005464481, + "grad_norm": 10.883913040161133, + "learning_rate": 6.458808240482329e-06, + "loss": 0.1098, "step": 21850 }, { - "epoch": 5.6691690396065235, - "grad_norm": 4.014559745788574, - "learning_rate": 4.815198320246211e-06, - "loss": 0.0126, + "epoch": 6.298533218291631, + "grad_norm": 11.737595558166504, + "learning_rate": 6.448156117514221e-06, + "loss": 0.1187, "step": 21900 }, { - "epoch": 5.682112347916127, - "grad_norm": 7.090182304382324, - "learning_rate": 4.800816866568873e-06, - "loss": 0.0182, + "epoch": 6.312913431118781, + "grad_norm": 8.772595405578613, + "learning_rate": 6.437503994546114e-06, + "loss": 0.1036, "step": 21950 }, { - "epoch": 5.6950556562257315, - "grad_norm": 3.3935253620147705, - "learning_rate": 4.786435412891535e-06, - "loss": 0.0155, + "epoch": 6.32729364394593, + "grad_norm": 4.347011566162109, + "learning_rate": 6.426851871578006e-06, + "loss": 0.1062, "step": 22000 }, { - "epoch": 5.707998964535335, - "grad_norm": 3.5761795043945312, - "learning_rate": 4.772053959214198e-06, - "loss": 0.0102, + "epoch": 6.34167385677308, + "grad_norm": 7.275223255157471, + "learning_rate": 6.4161997486098985e-06, + "loss": 0.1015, "step": 22050 }, { - "epoch": 5.7209422728449395, - "grad_norm": 2.370244026184082, - "learning_rate": 4.75767250553686e-06, - "loss": 0.0206, + "epoch": 6.35605406960023, + "grad_norm": 7.314542770385742, + "learning_rate": 6.405547625641791e-06, + "loss": 0.0847, "step": 22100 }, { - "epoch": 5.733885581154543, - "grad_norm": 0.3268122971057892, - "learning_rate": 4.743291051859522e-06, - "loss": 0.0143, + "epoch": 6.37043428242738, + "grad_norm": 5.3802032470703125, + "learning_rate": 6.394895502673684e-06, + "loss": 0.1099, "step": 22150 }, { - "epoch": 5.7468288894641475, - "grad_norm": 1.703277587890625, - "learning_rate": 4.728909598182185e-06, - "loss": 0.0143, + "epoch": 6.384814495254529, + "grad_norm": 1.958891749382019, + "learning_rate": 6.384243379705576e-06, + "loss": 0.0812, "step": 22200 }, { - "epoch": 5.759772197773751, - "grad_norm": 2.766359329223633, - "learning_rate": 4.714528144504847e-06, - "loss": 0.0114, + "epoch": 6.39919470808168, + "grad_norm": 4.854043483734131, + "learning_rate": 6.373591256737468e-06, + "loss": 0.0933, "step": 22250 }, { - "epoch": 5.772715506083355, - "grad_norm": 0.3259863555431366, - "learning_rate": 4.700146690827509e-06, - "loss": 0.0114, + "epoch": 6.41357492090883, + "grad_norm": 4.149712562561035, + "learning_rate": 6.36293913376936e-06, + "loss": 0.0879, "step": 22300 }, { - "epoch": 5.785658814392959, - "grad_norm": 12.3453369140625, - "learning_rate": 4.685765237150171e-06, - "loss": 0.0109, + "epoch": 6.4279551337359795, + "grad_norm": 7.347317218780518, + "learning_rate": 6.352287010801254e-06, + "loss": 0.1436, "step": 22350 }, { - "epoch": 5.798602122702563, - "grad_norm": 4.0501179695129395, - "learning_rate": 4.671383783472834e-06, - "loss": 0.0134, + "epoch": 6.442335346563129, + "grad_norm": 8.191205978393555, + "learning_rate": 6.3416348878331454e-06, + "loss": 0.0867, "step": 22400 }, { - "epoch": 5.811545431012167, - "grad_norm": 3.24855637550354, - "learning_rate": 4.657002329795496e-06, - "loss": 0.015, + "epoch": 6.456715559390279, + "grad_norm": 5.150350093841553, + "learning_rate": 6.330982764865038e-06, + "loss": 0.1041, "step": 22450 }, { - "epoch": 5.824488739321771, - "grad_norm": 4.302082061767578, - "learning_rate": 4.642620876118159e-06, - "loss": 0.0272, + "epoch": 6.471095772217429, + "grad_norm": 2.917862892150879, + "learning_rate": 6.32033064189693e-06, + "loss": 0.1003, "step": 22500 }, { - "epoch": 5.837432047631374, - "grad_norm": 6.086068630218506, - "learning_rate": 4.628239422440821e-06, - "loss": 0.0134, + "epoch": 6.4854759850445785, + "grad_norm": 4.510298252105713, + "learning_rate": 6.309678518928823e-06, + "loss": 0.1065, "step": 22550 }, { - "epoch": 5.850375355940979, - "grad_norm": 0.44153615832328796, - "learning_rate": 4.613857968763483e-06, - "loss": 0.0168, + "epoch": 6.499856197871728, + "grad_norm": 4.758791923522949, + "learning_rate": 6.299026395960715e-06, + "loss": 0.0937, "step": 22600 }, { - "epoch": 5.863318664250582, - "grad_norm": 1.0860470533370972, - "learning_rate": 4.599476515086145e-06, - "loss": 0.0118, + "epoch": 6.514236410698878, + "grad_norm": 3.964017629623413, + "learning_rate": 6.288374272992608e-06, + "loss": 0.0999, "step": 22650 }, { - "epoch": 5.876261972560187, - "grad_norm": 3.069711923599243, - "learning_rate": 4.585095061408807e-06, - "loss": 0.0111, + "epoch": 6.528616623526029, + "grad_norm": 6.44435977935791, + "learning_rate": 6.2777221500245e-06, + "loss": 0.1097, "step": 22700 }, { - "epoch": 5.88920528086979, - "grad_norm": 4.1225504875183105, - "learning_rate": 4.57071360773147e-06, - "loss": 0.0118, + "epoch": 6.542996836353178, + "grad_norm": 3.654205083847046, + "learning_rate": 6.267070027056393e-06, + "loss": 0.1055, "step": 22750 }, { - "epoch": 5.9021485891793946, - "grad_norm": 1.6695607900619507, - "learning_rate": 4.5563321540541325e-06, - "loss": 0.0087, + "epoch": 6.557377049180328, + "grad_norm": 10.302783966064453, + "learning_rate": 6.256417904088286e-06, + "loss": 0.0881, "step": 22800 }, { - "epoch": 5.915091897488998, - "grad_norm": 3.2536139488220215, - "learning_rate": 4.541950700376794e-06, - "loss": 0.0143, + "epoch": 6.571757262007478, + "grad_norm": 2.7906553745269775, + "learning_rate": 6.245765781120178e-06, + "loss": 0.1293, "step": 22850 }, { - "epoch": 5.9280352057986025, - "grad_norm": 1.4655452966690063, - "learning_rate": 4.527569246699457e-06, - "loss": 0.0152, + "epoch": 6.5861374748346275, + "grad_norm": 10.464545249938965, + "learning_rate": 6.2351136581520695e-06, + "loss": 0.106, "step": 22900 }, { - "epoch": 5.940978514108206, - "grad_norm": 4.390911102294922, - "learning_rate": 4.513187793022119e-06, - "loss": 0.0117, + "epoch": 6.600517687661777, + "grad_norm": 5.289254188537598, + "learning_rate": 6.224461535183962e-06, + "loss": 0.0826, "step": 22950 }, { - "epoch": 5.95392182241781, - "grad_norm": 0.6409261226654053, - "learning_rate": 4.498806339344781e-06, - "loss": 0.014, + "epoch": 6.614897900488927, + "grad_norm": 10.255924224853516, + "learning_rate": 6.213809412215856e-06, + "loss": 0.1187, "step": 23000 }, { - "epoch": 5.966865130727414, - "grad_norm": 9.11255931854248, - "learning_rate": 4.4844248856674435e-06, - "loss": 0.0135, + "epoch": 6.629278113316077, + "grad_norm": 3.343120813369751, + "learning_rate": 6.2031572892477475e-06, + "loss": 0.098, "step": 23050 }, { - "epoch": 5.979808439037018, - "grad_norm": 3.790682554244995, - "learning_rate": 4.470043431990106e-06, - "loss": 0.0109, + "epoch": 6.6436583261432265, + "grad_norm": 5.467416286468506, + "learning_rate": 6.19250516627964e-06, + "loss": 0.1044, "step": 23100 }, { - "epoch": 5.992751747346622, - "grad_norm": 6.534693241119385, - "learning_rate": 4.455661978312768e-06, - "loss": 0.0138, + "epoch": 6.658038538970377, + "grad_norm": 6.790123462677002, + "learning_rate": 6.181853043311532e-06, + "loss": 0.0977, "step": 23150 }, { - "epoch": 6.0, - "eval_loss": 0.013288498856127262, - "eval_runtime": 88.7687, - "eval_samples_per_second": 5.633, - "eval_steps_per_second": 0.71, - "eval_wer": 2.2567439907630944, - "step": 23178 - }, - { - "epoch": 6.005695055656226, - "grad_norm": 0.7158689498901367, - "learning_rate": 4.441280524635431e-06, - "loss": 0.0119, + "epoch": 6.672418751797527, + "grad_norm": 3.312417507171631, + "learning_rate": 6.1712009203434255e-06, + "loss": 0.1101, "step": 23200 }, { - "epoch": 6.01863836396583, - "grad_norm": 2.5632243156433105, - "learning_rate": 4.426899070958093e-06, - "loss": 0.008, + "epoch": 6.686798964624677, + "grad_norm": 2.861590623855591, + "learning_rate": 6.160548797375317e-06, + "loss": 0.1179, "step": 23250 }, { - "epoch": 6.031581672275434, - "grad_norm": 1.491152286529541, - "learning_rate": 4.4125176172807545e-06, - "loss": 0.0084, + "epoch": 6.701179177451826, + "grad_norm": 9.671860694885254, + "learning_rate": 6.14989667440721e-06, + "loss": 0.1035, "step": 23300 }, { - "epoch": 6.044524980585037, - "grad_norm": 0.8352173566818237, - "learning_rate": 4.398136163603417e-06, - "loss": 0.0121, + "epoch": 6.715559390278976, + "grad_norm": 4.190345764160156, + "learning_rate": 6.139244551439102e-06, + "loss": 0.089, "step": 23350 }, { - "epoch": 6.057468288894642, - "grad_norm": 0.7601485848426819, - "learning_rate": 4.38375470992608e-06, - "loss": 0.0084, + "epoch": 6.729939603106126, + "grad_norm": 6.613372325897217, + "learning_rate": 6.128592428470995e-06, + "loss": 0.0932, "step": 23400 }, { - "epoch": 6.070411597204245, - "grad_norm": 1.012165904045105, - "learning_rate": 4.369373256248742e-06, - "loss": 0.0149, + "epoch": 6.744319815933276, + "grad_norm": 3.291827440261841, + "learning_rate": 6.117940305502887e-06, + "loss": 0.0974, "step": 23450 }, { - "epoch": 6.08335490551385, - "grad_norm": 0.7509778141975403, - "learning_rate": 4.3549918025714045e-06, - "loss": 0.0045, + "epoch": 6.758700028760425, + "grad_norm": 5.153611660003662, + "learning_rate": 6.10728818253478e-06, + "loss": 0.0949, "step": 23500 }, { - "epoch": 6.096298213823453, - "grad_norm": 6.754235744476318, - "learning_rate": 4.340610348894066e-06, - "loss": 0.0057, + "epoch": 6.773080241587575, + "grad_norm": 4.838674068450928, + "learning_rate": 6.096636059566672e-06, + "loss": 0.1116, "step": 23550 }, { - "epoch": 6.109241522133058, - "grad_norm": 0.37281331419944763, - "learning_rate": 4.326228895216728e-06, - "loss": 0.0057, + "epoch": 6.787460454414726, + "grad_norm": 6.954973220825195, + "learning_rate": 6.085983936598565e-06, + "loss": 0.0857, "step": 23600 }, { - "epoch": 6.122184830442661, - "grad_norm": 0.06306509673595428, - "learning_rate": 4.311847441539391e-06, - "loss": 0.0069, + "epoch": 6.801840667241875, + "grad_norm": 4.507706642150879, + "learning_rate": 6.075331813630457e-06, + "loss": 0.0993, "step": 23650 }, { - "epoch": 6.135128138752265, - "grad_norm": 0.4581661522388458, - "learning_rate": 4.297465987862054e-06, - "loss": 0.0055, + "epoch": 6.816220880069025, + "grad_norm": 3.1070141792297363, + "learning_rate": 6.06467969066235e-06, + "loss": 0.1041, "step": 23700 }, { - "epoch": 6.148071447061869, - "grad_norm": 0.8070671558380127, - "learning_rate": 4.2830845341847156e-06, - "loss": 0.0087, + "epoch": 6.830601092896175, + "grad_norm": 7.887919902801514, + "learning_rate": 6.0540275676942415e-06, + "loss": 0.1237, "step": 23750 }, { - "epoch": 6.161014755371473, - "grad_norm": 0.4274216294288635, - "learning_rate": 4.268703080507378e-06, - "loss": 0.016, + "epoch": 6.844981305723325, + "grad_norm": 9.148138046264648, + "learning_rate": 6.043375444726134e-06, + "loss": 0.1154, "step": 23800 }, { - "epoch": 6.173958063681077, - "grad_norm": 0.6105465292930603, - "learning_rate": 4.25432162683004e-06, - "loss": 0.0063, + "epoch": 6.859361518550474, + "grad_norm": 6.1009135246276855, + "learning_rate": 6.032723321758028e-06, + "loss": 0.0929, "step": 23850 }, { - "epoch": 6.186901371990681, - "grad_norm": 0.4398168921470642, - "learning_rate": 4.239940173152703e-06, - "loss": 0.0052, + "epoch": 6.873741731377624, + "grad_norm": Infinity, + "learning_rate": 6.0220711987899195e-06, + "loss": 0.091, "step": 23900 }, { - "epoch": 6.199844680300285, - "grad_norm": 2.373279333114624, - "learning_rate": 4.225558719475365e-06, - "loss": 0.008, + "epoch": 6.888121944204774, + "grad_norm": 4.2684502601623535, + "learning_rate": 6.011632118281174e-06, + "loss": 0.1043, "step": 23950 }, { - "epoch": 6.212787988609889, - "grad_norm": 2.742097854614258, - "learning_rate": 4.2111772657980275e-06, - "loss": 0.0062, + "epoch": 6.902502157031924, + "grad_norm": 2.119816541671753, + "learning_rate": 6.000979995313066e-06, + "loss": 0.1195, "step": 24000 }, { - "epoch": 6.225731296919492, - "grad_norm": 1.6418052911758423, - "learning_rate": 4.19679581212069e-06, - "loss": 0.0056, + "epoch": 6.916882369859074, + "grad_norm": 7.403483867645264, + "learning_rate": 5.990327872344959e-06, + "loss": 0.102, "step": 24050 }, { - "epoch": 6.238674605229097, - "grad_norm": 1.0858538150787354, - "learning_rate": 4.182414358443352e-06, - "loss": 0.0047, + "epoch": 6.931262582686224, + "grad_norm": 4.3805389404296875, + "learning_rate": 5.979675749376851e-06, + "loss": 0.1003, "step": 24100 }, { - "epoch": 6.2516179135387, - "grad_norm": 1.6831880807876587, - "learning_rate": 4.168032904766014e-06, - "loss": 0.0045, + "epoch": 6.945642795513374, + "grad_norm": 2.759535551071167, + "learning_rate": 5.969023626408744e-06, + "loss": 0.1001, "step": 24150 }, { - "epoch": 6.264561221848305, - "grad_norm": 0.5000109076499939, - "learning_rate": 4.153651451088677e-06, - "loss": 0.01, + "epoch": 6.9600230083405235, + "grad_norm": 5.0249786376953125, + "learning_rate": 5.958371503440637e-06, + "loss": 0.121, "step": 24200 }, { - "epoch": 6.277504530157908, - "grad_norm": 0.6169405579566956, - "learning_rate": 4.1392699974113385e-06, - "loss": 0.008, + "epoch": 6.974403221167673, + "grad_norm": 9.556915283203125, + "learning_rate": 5.947719380472529e-06, + "loss": 0.0954, "step": 24250 }, { - "epoch": 6.290447838467513, - "grad_norm": 1.9843913316726685, - "learning_rate": 4.124888543734001e-06, - "loss": 0.0095, + "epoch": 6.988783433994823, + "grad_norm": 3.05369234085083, + "learning_rate": 5.93706725750442e-06, + "loss": 0.0943, "step": 24300 }, { - "epoch": 6.303391146777116, - "grad_norm": 1.331559419631958, - "learning_rate": 4.110507090056663e-06, - "loss": 0.0073, + "epoch": 7.0, + "eval_cer": 13.352156038498968, + "eval_exact_match": 29.181494661921707, + "eval_loss": 0.42374831438064575, + "eval_runtime": 557.3202, + "eval_samples_per_second": 5.546, + "eval_steps_per_second": 0.694, + "eval_wer": 30.392949819701414, + "step": 24339 + }, + { + "epoch": 7.003163646821973, + "grad_norm": 2.838285446166992, + "learning_rate": 5.926415134536314e-06, + "loss": 0.0993, "step": 24350 }, { - "epoch": 6.31633445508672, - "grad_norm": 0.6855106949806213, - "learning_rate": 4.096125636379326e-06, - "loss": 0.0054, + "epoch": 7.017543859649122, + "grad_norm": 2.000446319580078, + "learning_rate": 5.915763011568207e-06, + "loss": 0.0541, "step": 24400 }, { - "epoch": 6.329277763396324, - "grad_norm": 0.35360315442085266, - "learning_rate": 4.081744182701988e-06, - "loss": 0.006, + "epoch": 7.031924072476273, + "grad_norm": 4.931741237640381, + "learning_rate": 5.905110888600098e-06, + "loss": 0.0588, "step": 24450 }, { - "epoch": 6.342221071705928, - "grad_norm": 1.6724082231521606, - "learning_rate": 4.06736272902465e-06, - "loss": 0.0032, + "epoch": 7.046304285303423, + "grad_norm": 3.232409954071045, + "learning_rate": 5.894458765631991e-06, + "loss": 0.0737, "step": 24500 }, { - "epoch": 6.355164380015532, - "grad_norm": 1.0881156921386719, - "learning_rate": 4.052981275347312e-06, - "loss": 0.0052, + "epoch": 7.0606844981305725, + "grad_norm": 4.831768989562988, + "learning_rate": 5.883806642663883e-06, + "loss": 0.0701, "step": 24550 }, { - "epoch": 6.368107688325136, - "grad_norm": 0.6318166255950928, - "learning_rate": 4.038599821669975e-06, - "loss": 0.0081, + "epoch": 7.075064710957722, + "grad_norm": 0.9118911027908325, + "learning_rate": 5.873154519695776e-06, + "loss": 0.0598, "step": 24600 }, { - "epoch": 6.38105099663474, - "grad_norm": 3.3334782123565674, - "learning_rate": 4.024218367992637e-06, - "loss": 0.0121, + "epoch": 7.089444923784872, + "grad_norm": 4.0269036293029785, + "learning_rate": 5.862502396727668e-06, + "loss": 0.065, "step": 24650 }, { - "epoch": 6.393994304944344, - "grad_norm": 0.2339646816253662, - "learning_rate": 4.0098369143152995e-06, - "loss": 0.0072, + "epoch": 7.103825136612022, + "grad_norm": 3.6321322917938232, + "learning_rate": 5.851850273759561e-06, + "loss": 0.0635, "step": 24700 }, { - "epoch": 6.406937613253948, - "grad_norm": 1.0727429389953613, - "learning_rate": 3.995743089711508e-06, - "loss": 0.0074, + "epoch": 7.1182053494391715, + "grad_norm": 3.7061376571655273, + "learning_rate": 5.841198150791453e-06, + "loss": 0.0594, "step": 24750 }, { - "epoch": 6.419880921563552, - "grad_norm": 5.548860549926758, - "learning_rate": 3.981649265107718e-06, - "loss": 0.0072, + "epoch": 7.132585562266321, + "grad_norm": 1.4980014562606812, + "learning_rate": 5.830546027823346e-06, + "loss": 0.0626, "step": 24800 }, { - "epoch": 6.432824229873155, - "grad_norm": 0.37892910838127136, - "learning_rate": 3.9672678114303795e-06, - "loss": 0.0105, + "epoch": 7.146965775093471, + "grad_norm": 5.368781089782715, + "learning_rate": 5.819893904855238e-06, + "loss": 0.0754, "step": 24850 }, { - "epoch": 6.44576753818276, - "grad_norm": 0.9245821237564087, - "learning_rate": 3.952886357753042e-06, - "loss": 0.0059, + "epoch": 7.161345987920622, + "grad_norm": 6.7934346199035645, + "learning_rate": 5.809241781887131e-06, + "loss": 0.0719, "step": 24900 }, { - "epoch": 6.458710846492363, - "grad_norm": 1.4176559448242188, - "learning_rate": 3.938504904075705e-06, - "loss": 0.005, + "epoch": 7.175726200747771, + "grad_norm": 4.865309238433838, + "learning_rate": 5.7985896589190225e-06, + "loss": 0.0663, "step": 24950 }, { - "epoch": 6.471654154801968, - "grad_norm": 2.894819736480713, - "learning_rate": 3.924123450398367e-06, - "loss": 0.0068, + "epoch": 7.190106413574921, + "grad_norm": 5.22443151473999, + "learning_rate": 5.787937535950916e-06, + "loss": 0.0689, "step": 25000 }, { - "epoch": 6.484597463111571, - "grad_norm": 1.3159211874008179, - "learning_rate": 3.909741996721029e-06, - "loss": 0.0068, + "epoch": 7.204486626402071, + "grad_norm": 4.36142110824585, + "learning_rate": 5.777285412982808e-06, + "loss": 0.0589, "step": 25050 }, { - "epoch": 6.497540771421175, - "grad_norm": 1.8089011907577515, - "learning_rate": 3.895360543043691e-06, - "loss": 0.0067, + "epoch": 7.218866839229221, + "grad_norm": 9.197185516357422, + "learning_rate": 5.7666332900147005e-06, + "loss": 0.099, "step": 25100 }, { - "epoch": 6.510484079730779, - "grad_norm": 1.673920750617981, - "learning_rate": 3.880979089366353e-06, - "loss": 0.0109, + "epoch": 7.23324705205637, + "grad_norm": 8.748276710510254, + "learning_rate": 5.755981167046592e-06, + "loss": 0.0682, "step": 25150 }, { - "epoch": 6.523427388040383, - "grad_norm": 0.7830101251602173, - "learning_rate": 3.866597635689016e-06, - "loss": 0.0055, + "epoch": 7.24762726488352, + "grad_norm": 6.202966213226318, + "learning_rate": 5.745329044078485e-06, + "loss": 0.0687, "step": 25200 }, { - "epoch": 6.536370696349987, - "grad_norm": 1.6252669095993042, - "learning_rate": 3.852216182011679e-06, - "loss": 0.0066, + "epoch": 7.26200747771067, + "grad_norm": 4.0932393074035645, + "learning_rate": 5.7346769211103785e-06, + "loss": 0.0683, "step": 25250 }, { - "epoch": 6.549314004659591, - "grad_norm": 2.7717552185058594, - "learning_rate": 3.8378347283343405e-06, - "loss": 0.0059, + "epoch": 7.27638769053782, + "grad_norm": 4.145828723907471, + "learning_rate": 5.72402479814227e-06, + "loss": 0.0617, "step": 25300 }, { - "epoch": 6.562257312969195, - "grad_norm": 7.495051383972168, - "learning_rate": 3.823453274657002e-06, - "loss": 0.015, + "epoch": 7.29076790336497, + "grad_norm": 4.058443069458008, + "learning_rate": 5.713372675174162e-06, + "loss": 0.0526, "step": 25350 }, { - "epoch": 6.575200621278799, - "grad_norm": 0.43886011838912964, - "learning_rate": 3.809071820979665e-06, - "loss": 0.0046, + "epoch": 7.30514811619212, + "grad_norm": 5.975832939147949, + "learning_rate": 5.702720552206055e-06, + "loss": 0.0797, "step": 25400 }, { - "epoch": 6.588143929588403, - "grad_norm": 0.8297833204269409, - "learning_rate": 3.7946903673023274e-06, - "loss": 0.0113, + "epoch": 7.31952832901927, + "grad_norm": 4.953127861022949, + "learning_rate": 5.692068429237948e-06, + "loss": 0.0874, "step": 25450 }, { - "epoch": 6.601087237898007, - "grad_norm": 3.7396538257598877, - "learning_rate": 3.7803089136249893e-06, - "loss": 0.0063, + "epoch": 7.333908541846419, + "grad_norm": 2.921626567840576, + "learning_rate": 5.68141630626984e-06, + "loss": 0.0698, "step": 25500 }, { - "epoch": 6.61403054620761, - "grad_norm": 3.463552236557007, - "learning_rate": 3.765927459947652e-06, - "loss": 0.0065, + "epoch": 7.348288754673569, + "grad_norm": 3.6298840045928955, + "learning_rate": 5.670764183301733e-06, + "loss": 0.0898, "step": 25550 }, { - "epoch": 6.626973854517215, - "grad_norm": 6.3341450691223145, - "learning_rate": 3.7515460062703143e-06, - "loss": 0.0076, + "epoch": 7.362668967500719, + "grad_norm": 7.3155927658081055, + "learning_rate": 5.660112060333625e-06, + "loss": 0.0787, "step": 25600 }, { - "epoch": 6.639917162826818, - "grad_norm": 1.700925350189209, - "learning_rate": 3.737164552592976e-06, - "loss": 0.0054, + "epoch": 7.377049180327869, + "grad_norm": 4.376722812652588, + "learning_rate": 5.649459937365518e-06, + "loss": 0.0701, "step": 25650 }, { - "epoch": 6.652860471136423, - "grad_norm": 6.3853044509887695, - "learning_rate": 3.722783098915639e-06, - "loss": 0.0097, + "epoch": 7.391429393155018, + "grad_norm": 5.4138360023498535, + "learning_rate": 5.63880781439741e-06, + "loss": 0.0783, "step": 25700 }, { - "epoch": 6.665803779446026, - "grad_norm": 6.10149621963501, - "learning_rate": 3.7084016452383007e-06, - "loss": 0.0082, + "epoch": 7.405809605982169, + "grad_norm": 6.097675800323486, + "learning_rate": 5.628155691429303e-06, + "loss": 0.0734, "step": 25750 }, { - "epoch": 6.67874708775563, - "grad_norm": 1.308225393295288, - "learning_rate": 3.694020191560963e-06, - "loss": 0.0099, + "epoch": 7.420189818809319, + "grad_norm": 4.135824203491211, + "learning_rate": 5.6175035684611944e-06, + "loss": 0.0825, "step": 25800 }, { - "epoch": 6.691690396065234, - "grad_norm": 4.188955307006836, - "learning_rate": 3.6796387378836257e-06, - "loss": 0.0112, + "epoch": 7.4345700316364685, + "grad_norm": 14.297745704650879, + "learning_rate": 5.606851445493087e-06, + "loss": 0.0898, "step": 25850 }, { - "epoch": 6.704633704374838, - "grad_norm": 1.8746811151504517, - "learning_rate": 3.6652572842062876e-06, - "loss": 0.0118, + "epoch": 7.448950244463618, + "grad_norm": 5.197240829467773, + "learning_rate": 5.59619932252498e-06, + "loss": 0.0746, "step": 25900 }, { - "epoch": 6.717577012684442, - "grad_norm": 0.3944805860519409, - "learning_rate": 3.65087583052895e-06, - "loss": 0.0063, + "epoch": 7.463330457290768, + "grad_norm": 5.998583793640137, + "learning_rate": 5.5855471995568724e-06, + "loss": 0.0772, "step": 25950 }, { - "epoch": 6.730520320994046, - "grad_norm": 1.3446645736694336, - "learning_rate": 3.6364943768516126e-06, - "loss": 0.0292, + "epoch": 7.477710670117918, + "grad_norm": 6.657192707061768, + "learning_rate": 5.574895076588764e-06, + "loss": 0.0839, "step": 26000 }, { - "epoch": 6.74346362930365, - "grad_norm": 10.001498222351074, - "learning_rate": 3.6221129231742745e-06, - "loss": 0.0078, + "epoch": 7.492090882945067, + "grad_norm": 4.947038173675537, + "learning_rate": 5.564242953620657e-06, + "loss": 0.089, "step": 26050 }, { - "epoch": 6.756406937613254, - "grad_norm": 3.632220983505249, - "learning_rate": 3.607731469496937e-06, - "loss": 0.0044, + "epoch": 7.506471095772217, + "grad_norm": 1.8709932565689087, + "learning_rate": 5.55359083065255e-06, + "loss": 0.0747, "step": 26100 }, { - "epoch": 6.769350245922858, - "grad_norm": 4.4222259521484375, - "learning_rate": 3.5933500158195995e-06, - "loss": 0.0093, + "epoch": 7.520851308599367, + "grad_norm": 6.512670993804932, + "learning_rate": 5.542938707684442e-06, + "loss": 0.0762, "step": 26150 }, { - "epoch": 6.782293554232462, - "grad_norm": 1.0133709907531738, - "learning_rate": 3.5789685621422614e-06, - "loss": 0.0072, + "epoch": 7.5352315214265175, + "grad_norm": 5.61523962020874, + "learning_rate": 5.532286584716334e-06, + "loss": 0.0685, "step": 26200 }, { - "epoch": 6.795236862542065, - "grad_norm": 0.1933288425207138, - "learning_rate": 3.564587108464924e-06, - "loss": 0.0063, + "epoch": 7.549611734253667, + "grad_norm": 8.071199417114258, + "learning_rate": 5.521634461748227e-06, + "loss": 0.0692, "step": 26250 }, { - "epoch": 6.80818017085167, - "grad_norm": 1.596628189086914, - "learning_rate": 3.5502056547875864e-06, - "loss": 0.0055, + "epoch": 7.563991947080817, + "grad_norm": 5.3938798904418945, + "learning_rate": 5.510982338780119e-06, + "loss": 0.0668, "step": 26300 }, { - "epoch": 6.821123479161273, - "grad_norm": 0.2668837010860443, - "learning_rate": 3.5358242011102482e-06, - "loss": 0.0059, + "epoch": 7.578372159907967, + "grad_norm": 10.504203796386719, + "learning_rate": 5.500330215812012e-06, + "loss": 0.0799, "step": 26350 }, { - "epoch": 6.834066787470878, - "grad_norm": 1.4524122476577759, - "learning_rate": 3.521442747432911e-06, - "loss": 0.0144, + "epoch": 7.5927523727351165, + "grad_norm": 7.276119709014893, + "learning_rate": 5.489678092843904e-06, + "loss": 0.08, "step": 26400 }, { - "epoch": 6.847010095780481, - "grad_norm": 0.7154669761657715, - "learning_rate": 3.5070612937555732e-06, - "loss": 0.0083, + "epoch": 7.607132585562266, + "grad_norm": 4.988595008850098, + "learning_rate": 5.4790259698757965e-06, + "loss": 0.0803, "step": 26450 }, { - "epoch": 6.859953404090086, - "grad_norm": 3.9259557723999023, - "learning_rate": 3.492679840078235e-06, - "loss": 0.0054, + "epoch": 7.621512798389416, + "grad_norm": 7.97799015045166, + "learning_rate": 5.46837384690769e-06, + "loss": 0.0787, "step": 26500 }, { - "epoch": 6.872896712399689, - "grad_norm": 2.91253924369812, - "learning_rate": 3.478298386400898e-06, - "loss": 0.0058, + "epoch": 7.635893011216566, + "grad_norm": 11.047422409057617, + "learning_rate": 5.457721723939582e-06, + "loss": 0.0737, "step": 26550 }, { - "epoch": 6.885840020709293, - "grad_norm": 0.6866968870162964, - "learning_rate": 3.46391693272356e-06, - "loss": 0.0089, + "epoch": 7.6502732240437155, + "grad_norm": 8.083878517150879, + "learning_rate": 5.447069600971474e-06, + "loss": 0.0582, "step": 26600 }, { - "epoch": 6.898783329018897, - "grad_norm": 0.4900106191635132, - "learning_rate": 3.449535479046222e-06, - "loss": 0.015, + "epoch": 7.664653436870866, + "grad_norm": 4.277724742889404, + "learning_rate": 5.436417478003366e-06, + "loss": 0.0666, "step": 26650 }, { - "epoch": 6.911726637328501, - "grad_norm": 0.8514009118080139, - "learning_rate": 3.4351540253688847e-06, - "loss": 0.0071, + "epoch": 7.679033649698016, + "grad_norm": 6.85367488861084, + "learning_rate": 5.425765355035258e-06, + "loss": 0.0818, "step": 26700 }, { - "epoch": 6.924669945638105, - "grad_norm": 1.1547664403915405, - "learning_rate": 3.420772571691547e-06, - "loss": 0.007, + "epoch": 7.693413862525166, + "grad_norm": 10.06192684173584, + "learning_rate": 5.415326274526513e-06, + "loss": 0.0785, "step": 26750 }, { - "epoch": 6.937613253947709, - "grad_norm": 1.6039056777954102, - "learning_rate": 3.406391118014209e-06, - "loss": 0.0049, + "epoch": 7.707794075352315, + "grad_norm": 3.9791293144226074, + "learning_rate": 5.404674151558406e-06, + "loss": 0.0706, "step": 26800 }, { - "epoch": 6.950556562257313, - "grad_norm": 0.2261231243610382, - "learning_rate": 3.3920096643368716e-06, - "loss": 0.006, + "epoch": 7.722174288179465, + "grad_norm": 5.46040678024292, + "learning_rate": 5.394022028590299e-06, + "loss": 0.0751, "step": 26850 }, { - "epoch": 6.963499870566917, - "grad_norm": 2.033464193344116, - "learning_rate": 3.377628210659534e-06, - "loss": 0.0076, + "epoch": 7.736554501006615, + "grad_norm": 6.385767936706543, + "learning_rate": 5.383369905622191e-06, + "loss": 0.0794, "step": 26900 }, { - "epoch": 6.97644317887652, - "grad_norm": 2.289121150970459, - "learning_rate": 3.3632467569821957e-06, - "loss": 0.0114, + "epoch": 7.7509347138337645, + "grad_norm": 3.5459275245666504, + "learning_rate": 5.372717782654084e-06, + "loss": 0.0789, "step": 26950 }, { - "epoch": 6.989386487186125, - "grad_norm": 1.8845149278640747, - "learning_rate": 3.3488653033048584e-06, - "loss": 0.0067, + "epoch": 7.765314926660914, + "grad_norm": 10.974268913269043, + "learning_rate": 5.3620656596859755e-06, + "loss": 0.0734, "step": 27000 }, { - "epoch": 7.0, - "eval_loss": 0.0072191799990832806, - "eval_runtime": 89.4574, - "eval_samples_per_second": 5.589, - "eval_steps_per_second": 0.704, - "eval_wer": 2.004828382491865, - "step": 27041 - }, - { - "epoch": 7.002329795495728, - "grad_norm": 0.09408234804868698, - "learning_rate": 3.3344838496275207e-06, - "loss": 0.0059, + "epoch": 7.779695139488064, + "grad_norm": 6.879441261291504, + "learning_rate": 5.351413536717869e-06, + "loss": 0.0723, "step": 27050 }, { - "epoch": 7.015273103805333, - "grad_norm": 0.22423428297042847, - "learning_rate": 3.3201023959501826e-06, - "loss": 0.0033, + "epoch": 7.794075352315215, + "grad_norm": 2.2463290691375732, + "learning_rate": 5.340761413749761e-06, + "loss": 0.0813, "step": 27100 }, { - "epoch": 7.028216412114936, - "grad_norm": 0.37666457891464233, - "learning_rate": 3.3057209422728453e-06, - "loss": 0.0047, + "epoch": 7.808455565142364, + "grad_norm": 1.1412861347198486, + "learning_rate": 5.3301092907816535e-06, + "loss": 0.0655, "step": 27150 }, { - "epoch": 7.041159720424541, - "grad_norm": 0.46821069717407227, - "learning_rate": 3.2913394885955076e-06, - "loss": 0.0023, + "epoch": 7.822835777969514, + "grad_norm": 2.3895058631896973, + "learning_rate": 5.319457167813545e-06, + "loss": 0.0712, "step": 27200 }, { - "epoch": 7.054103028734144, - "grad_norm": 0.20124687254428864, - "learning_rate": 3.2769580349181695e-06, - "loss": 0.0042, + "epoch": 7.837215990796664, + "grad_norm": 8.584156036376953, + "learning_rate": 5.308805044845439e-06, + "loss": 0.0875, "step": 27250 }, { - "epoch": 7.067046337043748, - "grad_norm": 0.15090468525886536, - "learning_rate": 3.262576581240832e-06, - "loss": 0.0026, + "epoch": 7.851596203623814, + "grad_norm": 5.652284145355225, + "learning_rate": 5.298152921877331e-06, + "loss": 0.0638, "step": 27300 }, { - "epoch": 7.079989645353352, - "grad_norm": 0.24649310111999512, - "learning_rate": 3.2481951275634945e-06, - "loss": 0.0104, + "epoch": 7.865976416450963, + "grad_norm": 3.194467067718506, + "learning_rate": 5.287500798909223e-06, + "loss": 0.0673, "step": 27350 }, { - "epoch": 7.092932953662956, - "grad_norm": 0.5062503814697266, - "learning_rate": 3.2338136738861563e-06, - "loss": 0.0044, + "epoch": 7.880356629278113, + "grad_norm": 5.687691688537598, + "learning_rate": 5.276848675941115e-06, + "loss": 0.0657, "step": 27400 }, { - "epoch": 7.10587626197256, - "grad_norm": 1.3508222103118896, - "learning_rate": 3.219432220208819e-06, - "loss": 0.0038, + "epoch": 7.894736842105263, + "grad_norm": 6.773519039154053, + "learning_rate": 5.266196552973008e-06, + "loss": 0.0555, "step": 27450 }, { - "epoch": 7.118819570282164, - "grad_norm": 0.41208532452583313, - "learning_rate": 3.2050507665314814e-06, - "loss": 0.0039, + "epoch": 7.909117054932413, + "grad_norm": 6.663157939910889, + "learning_rate": 5.2555444300049004e-06, + "loss": 0.0638, "step": 27500 }, { - "epoch": 7.131762878591768, - "grad_norm": 0.5694621801376343, - "learning_rate": 3.1906693128541436e-06, - "loss": 0.0051, + "epoch": 7.923497267759563, + "grad_norm": 6.29538106918335, + "learning_rate": 5.244892307036793e-06, + "loss": 0.0783, "step": 27550 }, { - "epoch": 7.144706186901372, - "grad_norm": 0.21710887551307678, - "learning_rate": 3.176287859176806e-06, - "loss": 0.003, + "epoch": 7.937877480586713, + "grad_norm": 1.8918122053146362, + "learning_rate": 5.234240184068685e-06, + "loss": 0.0744, "step": 27600 }, { - "epoch": 7.1576494952109755, - "grad_norm": 18.37504005432129, - "learning_rate": 3.1619064054994682e-06, - "loss": 0.0074, + "epoch": 7.952257693413863, + "grad_norm": 3.5075416564941406, + "learning_rate": 5.223588061100578e-06, + "loss": 0.0662, "step": 27650 }, { - "epoch": 7.17059280352058, - "grad_norm": 0.10800693929195404, - "learning_rate": 3.1475249518221305e-06, - "loss": 0.0098, + "epoch": 7.966637906241012, + "grad_norm": 7.569982051849365, + "learning_rate": 5.21293593813247e-06, + "loss": 0.0709, "step": 27700 }, { - "epoch": 7.1835361118301835, - "grad_norm": 0.9992019534111023, - "learning_rate": 3.133143498144793e-06, - "loss": 0.0037, + "epoch": 7.981018119068162, + "grad_norm": 3.7677760124206543, + "learning_rate": 5.202283815164363e-06, + "loss": 0.0601, "step": 27750 }, { - "epoch": 7.196479420139788, - "grad_norm": 0.5260515213012695, - "learning_rate": 3.118762044467455e-06, - "loss": 0.0059, + "epoch": 7.995398331895312, + "grad_norm": 4.82183837890625, + "learning_rate": 5.191631692196255e-06, + "loss": 0.0725, "step": 27800 }, { - "epoch": 7.2094227284493915, - "grad_norm": 0.16195982694625854, - "learning_rate": 3.1043805907901174e-06, - "loss": 0.0033, + "epoch": 8.0, + "eval_cer": 13.277777983355598, + "eval_exact_match": 29.407958589453255, + "eval_loss": 0.4477599859237671, + "eval_runtime": 645.2252, + "eval_samples_per_second": 4.791, + "eval_steps_per_second": 0.6, + "eval_wer": 30.136824722811983, + "step": 27816 + }, + { + "epoch": 8.009778544722462, + "grad_norm": 2.5289196968078613, + "learning_rate": 5.180979569228147e-06, + "loss": 0.0487, "step": 27850 }, { - "epoch": 7.222366036758996, - "grad_norm": 0.0777490958571434, - "learning_rate": 3.0899991371127797e-06, - "loss": 0.0031, + "epoch": 8.024158757549612, + "grad_norm": 1.1554434299468994, + "learning_rate": 5.170327446260041e-06, + "loss": 0.0341, "step": 27900 }, { - "epoch": 7.2353093450685995, - "grad_norm": 2.360994577407837, - "learning_rate": 3.0756176834354416e-06, - "loss": 0.0042, + "epoch": 8.038538970376761, + "grad_norm": 5.862917900085449, + "learning_rate": 5.159675323291933e-06, + "loss": 0.0486, "step": 27950 }, { - "epoch": 7.248252653378204, - "grad_norm": 0.7095078825950623, - "learning_rate": 3.0612362297581043e-06, - "loss": 0.0024, + "epoch": 8.052919183203912, + "grad_norm": 3.4061875343322754, + "learning_rate": 5.1490232003238246e-06, + "loss": 0.0426, "step": 28000 }, { - "epoch": 7.2611959616878075, - "grad_norm": 0.8493836522102356, - "learning_rate": 3.0468547760807666e-06, - "loss": 0.0061, + "epoch": 8.06729939603106, + "grad_norm": 5.212263584136963, + "learning_rate": 5.138371077355717e-06, + "loss": 0.0446, "step": 28050 }, { - "epoch": 7.274139269997411, - "grad_norm": 3.106424570083618, - "learning_rate": 3.0324733224034284e-06, - "loss": 0.0025, + "epoch": 8.081679608858211, + "grad_norm": 2.194293737411499, + "learning_rate": 5.127718954387609e-06, + "loss": 0.0504, "step": 28100 }, { - "epoch": 7.2870825783070154, - "grad_norm": 0.9348524808883667, - "learning_rate": 3.018091868726091e-06, - "loss": 0.003, + "epoch": 8.09605982168536, + "grad_norm": 5.226876258850098, + "learning_rate": 5.1170668314195025e-06, + "loss": 0.0578, "step": 28150 }, { - "epoch": 7.300025886616619, - "grad_norm": 4.065819263458252, - "learning_rate": 3.0037104150487534e-06, - "loss": 0.0026, + "epoch": 8.11044003451251, + "grad_norm": 3.4195008277893066, + "learning_rate": 5.106414708451395e-06, + "loss": 0.038, "step": 28200 }, { - "epoch": 7.312969194926223, - "grad_norm": 0.16182902455329895, - "learning_rate": 2.9893289613714153e-06, - "loss": 0.0035, + "epoch": 8.124820247339661, + "grad_norm": 2.808178424835205, + "learning_rate": 5.095762585483287e-06, + "loss": 0.0489, "step": 28250 }, { - "epoch": 7.325912503235827, - "grad_norm": 0.36588823795318604, - "learning_rate": 2.974947507694078e-06, - "loss": 0.0042, + "epoch": 8.13920046016681, + "grad_norm": 0.4899284839630127, + "learning_rate": 5.085110462515179e-06, + "loss": 0.049, "step": 28300 }, { - "epoch": 7.3388558115454305, - "grad_norm": 0.47314420342445374, - "learning_rate": 2.9605660540167403e-06, - "loss": 0.0033, + "epoch": 8.15358067299396, + "grad_norm": 13.659078598022461, + "learning_rate": 5.074458339547072e-06, + "loss": 0.0555, "step": 28350 }, { - "epoch": 7.351799119855035, - "grad_norm": 7.304609298706055, - "learning_rate": 2.946184600339402e-06, - "loss": 0.0028, + "epoch": 8.16796088582111, + "grad_norm": 6.391570568084717, + "learning_rate": 5.063806216578965e-06, + "loss": 0.092, "step": 28400 }, { - "epoch": 7.3647424281646385, - "grad_norm": 0.591309130191803, - "learning_rate": 2.931803146662065e-06, - "loss": 0.0057, + "epoch": 8.18234109864826, + "grad_norm": 2.4988396167755127, + "learning_rate": 5.053154093610857e-06, + "loss": 0.0359, "step": 28450 }, { - "epoch": 7.377685736474243, - "grad_norm": 0.7642752528190613, - "learning_rate": 2.917421692984727e-06, - "loss": 0.0038, + "epoch": 8.19672131147541, + "grad_norm": 5.211406230926514, + "learning_rate": 5.0425019706427495e-06, + "loss": 0.0393, "step": 28500 }, { - "epoch": 7.3906290447838465, - "grad_norm": 0.9871138334274292, - "learning_rate": 2.903040239307389e-06, - "loss": 0.0038, + "epoch": 8.21110152430256, + "grad_norm": 7.385959148406982, + "learning_rate": 5.031849847674642e-06, + "loss": 0.0776, "step": 28550 }, { - "epoch": 7.403572353093451, - "grad_norm": 0.7224917411804199, - "learning_rate": 2.8886587856300518e-06, - "loss": 0.0057, + "epoch": 8.225481737129709, + "grad_norm": 2.5291340351104736, + "learning_rate": 5.021197724706535e-06, + "loss": 0.0513, "step": 28600 }, { - "epoch": 7.4165156614030545, - "grad_norm": 0.7096822261810303, - "learning_rate": 2.874277331952714e-06, - "loss": 0.0031, + "epoch": 8.23986194995686, + "grad_norm": 2.038648843765259, + "learning_rate": 5.010545601738427e-06, + "loss": 0.0483, "step": 28650 }, { - "epoch": 7.429458969712659, - "grad_norm": 1.5942363739013672, - "learning_rate": 2.859895878275376e-06, - "loss": 0.0041, + "epoch": 8.25424216278401, + "grad_norm": 6.53122091293335, + "learning_rate": 4.999893478770319e-06, + "loss": 0.0542, "step": 28700 }, { - "epoch": 7.4424022780222625, - "grad_norm": 0.6390734910964966, - "learning_rate": 2.8455144245980386e-06, - "loss": 0.0031, + "epoch": 8.268622375611159, + "grad_norm": 2.258378267288208, + "learning_rate": 4.989241355802212e-06, + "loss": 0.0537, "step": 28750 }, { - "epoch": 7.455345586331866, - "grad_norm": 0.8184775114059448, - "learning_rate": 2.831132970920701e-06, - "loss": 0.0088, + "epoch": 8.28300258843831, + "grad_norm": 3.007385492324829, + "learning_rate": 4.978589232834104e-06, + "loss": 0.0454, "step": 28800 }, { - "epoch": 7.4682888946414705, - "grad_norm": 0.07598695158958435, - "learning_rate": 2.816751517243363e-06, - "loss": 0.0033, + "epoch": 8.297382801265458, + "grad_norm": 2.712270736694336, + "learning_rate": 4.9679371098659965e-06, + "loss": 0.0453, "step": 28850 }, { - "epoch": 7.481232202951074, - "grad_norm": 0.4833034873008728, - "learning_rate": 2.8023700635660255e-06, - "loss": 0.0045, + "epoch": 8.311763014092609, + "grad_norm": 2.8502933979034424, + "learning_rate": 4.957284986897889e-06, + "loss": 0.0478, "step": 28900 }, { - "epoch": 7.4941755112606785, - "grad_norm": 0.30186623334884644, - "learning_rate": 2.787988609888688e-06, - "loss": 0.0053, + "epoch": 8.326143226919758, + "grad_norm": 6.113819122314453, + "learning_rate": 4.946632863929782e-06, + "loss": 0.0417, "step": 28950 }, { - "epoch": 7.507118819570282, - "grad_norm": 0.9921897053718567, - "learning_rate": 2.7736071562113497e-06, - "loss": 0.0026, + "epoch": 8.340523439746908, + "grad_norm": 11.011100769042969, + "learning_rate": 4.935980740961674e-06, + "loss": 0.0429, "step": 29000 }, { - "epoch": 7.5200621278798865, - "grad_norm": 2.7632157802581787, - "learning_rate": 2.7592257025340124e-06, - "loss": 0.0123, + "epoch": 8.354903652574059, + "grad_norm": 3.958904981613159, + "learning_rate": 4.925328617993566e-06, + "loss": 0.0459, "step": 29050 }, { - "epoch": 7.53300543618949, - "grad_norm": 0.11869651824235916, - "learning_rate": 2.7448442488566747e-06, - "loss": 0.0027, + "epoch": 8.369283865401208, + "grad_norm": 1.1051028966903687, + "learning_rate": 4.914676495025459e-06, + "loss": 0.0331, "step": 29100 }, { - "epoch": 7.545948744499094, - "grad_norm": 0.14628329873085022, - "learning_rate": 2.7304627951793374e-06, - "loss": 0.003, + "epoch": 8.383664078228358, + "grad_norm": 4.358214378356934, + "learning_rate": 4.904024372057352e-06, + "loss": 0.0484, "step": 29150 }, { - "epoch": 7.558892052808698, - "grad_norm": 0.4393390715122223, - "learning_rate": 2.7160813415019993e-06, - "loss": 0.0026, + "epoch": 8.398044291055507, + "grad_norm": 3.0590085983276367, + "learning_rate": 4.893585291548606e-06, + "loss": 0.0499, "step": 29200 }, { - "epoch": 7.571835361118302, - "grad_norm": 2.097261428833008, - "learning_rate": 2.7016998878246616e-06, - "loss": 0.0026, + "epoch": 8.412424503882658, + "grad_norm": 4.3461480140686035, + "learning_rate": 4.882933168580499e-06, + "loss": 0.0613, "step": 29250 }, { - "epoch": 7.584778669427906, - "grad_norm": 0.23214209079742432, - "learning_rate": 2.6873184341473243e-06, - "loss": 0.0152, + "epoch": 8.426804716709807, + "grad_norm": 1.7905633449554443, + "learning_rate": 4.872281045612391e-06, + "loss": 0.0639, "step": 29300 }, { - "epoch": 7.59772197773751, - "grad_norm": 6.099156379699707, - "learning_rate": 2.672936980469986e-06, - "loss": 0.0061, + "epoch": 8.441184929536957, + "grad_norm": 2.587761163711548, + "learning_rate": 4.861628922644284e-06, + "loss": 0.0492, "step": 29350 }, { - "epoch": 7.610665286047114, - "grad_norm": 1.0887069702148438, - "learning_rate": 2.6585555267926484e-06, - "loss": 0.0119, + "epoch": 8.455565142364106, + "grad_norm": 3.7630443572998047, + "learning_rate": 4.850976799676176e-06, + "loss": 0.0451, "step": 29400 }, { - "epoch": 7.623608594356718, - "grad_norm": 1.0223588943481445, - "learning_rate": 2.644174073115311e-06, - "loss": 0.0027, + "epoch": 8.469945355191257, + "grad_norm": 3.300318956375122, + "learning_rate": 4.840324676708068e-06, + "loss": 0.0517, "step": 29450 }, { - "epoch": 7.636551902666321, - "grad_norm": 6.282520771026611, - "learning_rate": 2.629792619437973e-06, - "loss": 0.0035, + "epoch": 8.484325568018406, + "grad_norm": 4.164731025695801, + "learning_rate": 4.829672553739961e-06, + "loss": 0.0482, "step": 29500 }, { - "epoch": 7.6494952109759256, - "grad_norm": 0.21501053869724274, - "learning_rate": 2.6154111657606353e-06, - "loss": 0.009, + "epoch": 8.498705780845556, + "grad_norm": 0.9260162115097046, + "learning_rate": 4.819020430771853e-06, + "loss": 0.0484, "step": 29550 }, { - "epoch": 7.662438519285529, - "grad_norm": 1.1203105449676514, - "learning_rate": 2.6013173411568443e-06, - "loss": 0.006, + "epoch": 8.513085993672707, + "grad_norm": 4.3906660079956055, + "learning_rate": 4.808368307803746e-06, + "loss": 0.0433, "step": 29600 }, { - "epoch": 7.6753818275951335, - "grad_norm": 0.24988429248332977, - "learning_rate": 2.586935887479506e-06, - "loss": 0.0026, + "epoch": 8.527466206499856, + "grad_norm": 4.874326705932617, + "learning_rate": 4.797716184835638e-06, + "loss": 0.0533, "step": 29650 }, { - "epoch": 7.688325135904737, - "grad_norm": 0.8392144441604614, - "learning_rate": 2.572554433802169e-06, - "loss": 0.0047, + "epoch": 8.541846419327007, + "grad_norm": 4.6293463706970215, + "learning_rate": 4.7870640618675306e-06, + "loss": 0.0539, "step": 29700 }, { - "epoch": 7.7012684442143415, - "grad_norm": 0.7785108089447021, - "learning_rate": 2.5581729801248312e-06, - "loss": 0.0037, + "epoch": 8.556226632154155, + "grad_norm": 1.4482494592666626, + "learning_rate": 4.776411938899423e-06, + "loss": 0.0526, "step": 29750 }, { - "epoch": 7.714211752523945, - "grad_norm": 0.2849646806716919, - "learning_rate": 2.543791526447493e-06, - "loss": 0.0027, + "epoch": 8.570606844981306, + "grad_norm": 12.653058052062988, + "learning_rate": 4.765759815931316e-06, + "loss": 0.0518, "step": 29800 }, { - "epoch": 7.7271550608335495, - "grad_norm": 0.3449094593524933, - "learning_rate": 2.529410072770156e-06, - "loss": 0.0114, + "epoch": 8.584987057808455, + "grad_norm": 3.2734711170196533, + "learning_rate": 4.755107692963208e-06, + "loss": 0.0505, "step": 29850 }, { - "epoch": 7.740098369143153, - "grad_norm": 0.7601585984230042, - "learning_rate": 2.515028619092818e-06, - "loss": 0.0024, + "epoch": 8.599367270635605, + "grad_norm": 3.620457410812378, + "learning_rate": 4.7444555699951e-06, + "loss": 0.0365, "step": 29900 }, { - "epoch": 7.753041677452757, - "grad_norm": 0.6022003889083862, - "learning_rate": 2.50064716541548e-06, - "loss": 0.005, + "epoch": 8.613747483462756, + "grad_norm": 5.427806377410889, + "learning_rate": 4.733803447026993e-06, + "loss": 0.0532, "step": 29950 }, { - "epoch": 7.765984985762361, - "grad_norm": 0.08920400589704514, - "learning_rate": 2.4862657117381427e-06, - "loss": 0.0025, + "epoch": 8.628127696289905, + "grad_norm": 3.4758636951446533, + "learning_rate": 4.723151324058886e-06, + "loss": 0.0487, "step": 30000 }, { - "epoch": 7.778928294071965, - "grad_norm": 0.5146584510803223, - "learning_rate": 2.471884258060805e-06, - "loss": 0.0035, + "epoch": 8.642507909117056, + "grad_norm": 1.5267037153244019, + "learning_rate": 4.7124992010907775e-06, + "loss": 0.0471, "step": 30050 }, { - "epoch": 7.791871602381569, - "grad_norm": 0.6136813759803772, - "learning_rate": 2.4575028043834673e-06, - "loss": 0.0033, + "epoch": 8.656888121944204, + "grad_norm": 3.624415159225464, + "learning_rate": 4.70184707812267e-06, + "loss": 0.0487, "step": 30100 }, { - "epoch": 7.804814910691173, - "grad_norm": 5.361100673675537, - "learning_rate": 2.4431213507061295e-06, - "loss": 0.0039, + "epoch": 8.671268334771355, + "grad_norm": 5.009232521057129, + "learning_rate": 4.691194955154563e-06, + "loss": 0.0562, "step": 30150 }, { - "epoch": 7.817758219000776, - "grad_norm": 0.617695152759552, - "learning_rate": 2.428739897028792e-06, - "loss": 0.0026, + "epoch": 8.685648547598504, + "grad_norm": 5.1064653396606445, + "learning_rate": 4.680542832186455e-06, + "loss": 0.0584, "step": 30200 }, { - "epoch": 7.830701527310381, - "grad_norm": 0.42767393589019775, - "learning_rate": 2.414358443351454e-06, - "loss": 0.0022, + "epoch": 8.700028760425655, + "grad_norm": 3.2716023921966553, + "learning_rate": 4.669890709218347e-06, + "loss": 0.04, "step": 30250 }, { - "epoch": 7.843644835619984, - "grad_norm": 0.09423399716615677, - "learning_rate": 2.3999769896741164e-06, - "loss": 0.0038, + "epoch": 8.714408973252803, + "grad_norm": 4.310882091522217, + "learning_rate": 4.65923858625024e-06, + "loss": 0.0592, "step": 30300 }, { - "epoch": 7.856588143929589, - "grad_norm": 0.18421663343906403, - "learning_rate": 2.3855955359967787e-06, - "loss": 0.0048, + "epoch": 8.728789186079954, + "grad_norm": 2.118687152862549, + "learning_rate": 4.648586463282133e-06, + "loss": 0.0407, "step": 30350 }, { - "epoch": 7.869531452239192, - "grad_norm": 0.05506595969200134, - "learning_rate": 2.371214082319441e-06, - "loss": 0.0034, + "epoch": 8.743169398907105, + "grad_norm": 1.0022046566009521, + "learning_rate": 4.6379343403140245e-06, + "loss": 0.0448, "step": 30400 }, { - "epoch": 7.882474760548797, - "grad_norm": 1.0411120653152466, - "learning_rate": 2.3568326286421033e-06, - "loss": 0.0065, + "epoch": 8.757549611734254, + "grad_norm": 3.623133897781372, + "learning_rate": 4.627282217345917e-06, + "loss": 0.0522, "step": 30450 }, { - "epoch": 7.8954180688584, - "grad_norm": 0.05043673887848854, - "learning_rate": 2.3424511749647656e-06, - "loss": 0.0018, + "epoch": 8.771929824561404, + "grad_norm": 6.213899612426758, + "learning_rate": 4.61663009437781e-06, + "loss": 0.0479, "step": 30500 }, { - "epoch": 7.908361377168005, - "grad_norm": 0.8202661275863647, - "learning_rate": 2.328069721287428e-06, - "loss": 0.0107, + "epoch": 8.786310037388553, + "grad_norm": 2.589521884918213, + "learning_rate": 4.6059779714097025e-06, + "loss": 0.063, "step": 30550 }, { - "epoch": 7.921304685477608, - "grad_norm": 1.809882640838623, - "learning_rate": 2.31368826761009e-06, - "loss": 0.0038, + "epoch": 8.800690250215704, + "grad_norm": 5.372649669647217, + "learning_rate": 4.595325848441594e-06, + "loss": 0.0501, "step": 30600 }, { - "epoch": 7.934247993787212, - "grad_norm": 0.9416866898536682, - "learning_rate": 2.2993068139327525e-06, - "loss": 0.0031, + "epoch": 8.815070463042852, + "grad_norm": 5.467080116271973, + "learning_rate": 4.584673725473488e-06, + "loss": 0.0588, "step": 30650 }, { - "epoch": 7.947191302096816, - "grad_norm": 0.5891124606132507, - "learning_rate": 2.2849253602554148e-06, - "loss": 0.0023, + "epoch": 8.829450675870003, + "grad_norm": 4.744200706481934, + "learning_rate": 4.57402160250538e-06, + "loss": 0.051, "step": 30700 }, { - "epoch": 7.96013461040642, - "grad_norm": 0.15361438691616058, - "learning_rate": 2.270543906578077e-06, - "loss": 0.0021, + "epoch": 8.843830888697152, + "grad_norm": 5.33046293258667, + "learning_rate": 4.563369479537272e-06, + "loss": 0.0526, "step": 30750 }, { - "epoch": 7.973077918716024, - "grad_norm": 1.8306443691253662, - "learning_rate": 2.2561624529007393e-06, - "loss": 0.0049, + "epoch": 8.858211101524303, + "grad_norm": 7.1137871742248535, + "learning_rate": 4.552717356569165e-06, + "loss": 0.0554, "step": 30800 }, { - "epoch": 7.986021227025628, - "grad_norm": 0.06569012254476547, - "learning_rate": 2.2417809992234016e-06, - "loss": 0.0042, + "epoch": 8.872591314351453, + "grad_norm": 1.1814603805541992, + "learning_rate": 4.542065233601058e-06, + "loss": 0.0488, "step": 30850 }, { - "epoch": 7.998964535335231, - "grad_norm": 0.14215713739395142, - "learning_rate": 2.227399545546064e-06, - "loss": 0.0021, + "epoch": 8.886971527178602, + "grad_norm": 0.9678062796592712, + "learning_rate": 4.5314131106329494e-06, + "loss": 0.0474, "step": 30900 }, { - "epoch": 8.0, - "eval_loss": 0.0042533595114946365, - "eval_runtime": 89.1255, - "eval_samples_per_second": 5.61, - "eval_steps_per_second": 0.707, - "eval_wer": 1.490500682271439, - "step": 30904 - }, - { - "epoch": 8.011907843644835, - "grad_norm": 0.9032062292098999, - "learning_rate": 2.2130180918687262e-06, - "loss": 0.0022, + "epoch": 8.901351740005753, + "grad_norm": 0.7941696047782898, + "learning_rate": 4.520760987664842e-06, + "loss": 0.0383, "step": 30950 }, { - "epoch": 8.02485115195444, - "grad_norm": 0.09088978916406631, - "learning_rate": 2.1986366381913885e-06, - "loss": 0.0016, + "epoch": 8.915731952832902, + "grad_norm": 2.5805575847625732, + "learning_rate": 4.510108864696735e-06, + "loss": 0.0476, "step": 31000 }, { - "epoch": 8.037794460264044, - "grad_norm": 0.1776304394006729, - "learning_rate": 2.184255184514051e-06, - "loss": 0.0063, + "epoch": 8.930112165660052, + "grad_norm": 5.610886096954346, + "learning_rate": 4.499456741728627e-06, + "loss": 0.049, "step": 31050 }, { - "epoch": 8.050737768573647, - "grad_norm": 1.3590532541275024, - "learning_rate": 2.169873730836713e-06, - "loss": 0.0018, + "epoch": 8.944492378487201, + "grad_norm": 2.6645700931549072, + "learning_rate": 4.488804618760519e-06, + "loss": 0.0535, "step": 31100 }, { - "epoch": 8.06368107688325, - "grad_norm": 2.1883018016815186, - "learning_rate": 2.1554922771593754e-06, - "loss": 0.0028, + "epoch": 8.958872591314352, + "grad_norm": 2.6777915954589844, + "learning_rate": 4.478152495792412e-06, + "loss": 0.0643, "step": 31150 }, { - "epoch": 8.076624385192856, - "grad_norm": 3.1580357551574707, - "learning_rate": 2.1411108234820377e-06, - "loss": 0.0089, + "epoch": 8.9732528041415, + "grad_norm": 3.8960158824920654, + "learning_rate": 4.467500372824305e-06, + "loss": 0.0406, "step": 31200 }, { - "epoch": 8.08956769350246, - "grad_norm": 0.19922618567943573, - "learning_rate": 2.1267293698047e-06, - "loss": 0.0016, + "epoch": 8.987633016968651, + "grad_norm": 1.2076961994171143, + "learning_rate": 4.4570612923155586e-06, + "loss": 0.0604, "step": 31250 }, { - "epoch": 8.102511001812063, - "grad_norm": 0.3133656084537506, - "learning_rate": 2.1123479161273623e-06, - "loss": 0.001, + "epoch": 9.0, + "eval_cer": 14.173645005754123, + "eval_exact_match": 29.63442251698479, + "eval_loss": 0.47565555572509766, + "eval_runtime": 642.3765, + "eval_samples_per_second": 4.812, + "eval_steps_per_second": 0.602, + "eval_wer": 31.004616991878137, + "step": 31293 + }, + { + "epoch": 9.002013229795802, + "grad_norm": 2.3566198348999023, + "learning_rate": 4.446409169347451e-06, + "loss": 0.0511, "step": 31300 }, { - "epoch": 8.115454310121667, - "grad_norm": 0.1543108969926834, - "learning_rate": 2.0979664624500245e-06, - "loss": 0.0071, + "epoch": 9.01639344262295, + "grad_norm": 4.359884262084961, + "learning_rate": 4.435757046379344e-06, + "loss": 0.0273, "step": 31350 }, { - "epoch": 8.12839761843127, - "grad_norm": 0.06812497228384018, - "learning_rate": 2.083585008772687e-06, - "loss": 0.0057, + "epoch": 9.030773655450101, + "grad_norm": 5.240658760070801, + "learning_rate": 4.4251049234112366e-06, + "loss": 0.0308, "step": 31400 }, { - "epoch": 8.141340926740876, - "grad_norm": 0.7921668887138367, - "learning_rate": 2.069203555095349e-06, - "loss": 0.0019, + "epoch": 9.04515386827725, + "grad_norm": 7.096233367919922, + "learning_rate": 4.414452800443128e-06, + "loss": 0.0329, "step": 31450 }, { - "epoch": 8.15428423505048, - "grad_norm": 0.7293940782546997, - "learning_rate": 2.0548221014180114e-06, - "loss": 0.0024, + "epoch": 9.0595340811044, + "grad_norm": 3.892852544784546, + "learning_rate": 4.403800677475021e-06, + "loss": 0.0237, "step": 31500 }, { - "epoch": 8.167227543360083, - "grad_norm": 0.2699018120765686, - "learning_rate": 2.0407282768142205e-06, - "loss": 0.0036, + "epoch": 9.07391429393155, + "grad_norm": 2.9136476516723633, + "learning_rate": 4.393148554506914e-06, + "loss": 0.0304, "step": 31550 }, { - "epoch": 8.180170851669686, - "grad_norm": 1.0701220035552979, - "learning_rate": 2.026346823136883e-06, - "loss": 0.0021, + "epoch": 9.0882945067587, + "grad_norm": 3.994499683380127, + "learning_rate": 4.3824964315388055e-06, + "loss": 0.0308, "step": 31600 }, { - "epoch": 8.193114159979292, - "grad_norm": 0.35062670707702637, - "learning_rate": 2.011965369459545e-06, - "loss": 0.0013, + "epoch": 9.102674719585849, + "grad_norm": 5.810770034790039, + "learning_rate": 4.371844308570698e-06, + "loss": 0.0435, "step": 31650 }, { - "epoch": 8.206057468288895, - "grad_norm": 2.343193531036377, - "learning_rate": 1.9975839157822073e-06, - "loss": 0.005, + "epoch": 9.117054932413, + "grad_norm": 1.6180803775787354, + "learning_rate": 4.361192185602591e-06, + "loss": 0.0455, "step": 31700 }, { - "epoch": 8.219000776598499, - "grad_norm": 0.1934152990579605, - "learning_rate": 1.9832024621048696e-06, - "loss": 0.0042, + "epoch": 9.13143514524015, + "grad_norm": 3.995206594467163, + "learning_rate": 4.3505400626344835e-06, + "loss": 0.0248, "step": 31750 }, { - "epoch": 8.231944084908102, - "grad_norm": 1.2814443111419678, - "learning_rate": 1.968821008427532e-06, - "loss": 0.0031, + "epoch": 9.1458153580673, + "grad_norm": 3.7829082012176514, + "learning_rate": 4.339887939666375e-06, + "loss": 0.025, "step": 31800 }, { - "epoch": 8.244887393217706, - "grad_norm": 0.23100686073303223, - "learning_rate": 1.954439554750194e-06, - "loss": 0.001, + "epoch": 9.16019557089445, + "grad_norm": 4.479654312133789, + "learning_rate": 4.329235816698268e-06, + "loss": 0.0219, "step": 31850 }, { - "epoch": 8.257830701527311, - "grad_norm": 1.0474891662597656, - "learning_rate": 1.9400581010728565e-06, - "loss": 0.0017, + "epoch": 9.174575783721599, + "grad_norm": 4.6671061515808105, + "learning_rate": 4.318583693730161e-06, + "loss": 0.0422, "step": 31900 }, { - "epoch": 8.270774009836915, - "grad_norm": 0.1752719134092331, - "learning_rate": 1.9256766473955188e-06, - "loss": 0.0025, + "epoch": 9.18895599654875, + "grad_norm": 2.3317782878875732, + "learning_rate": 4.307931570762053e-06, + "loss": 0.0381, "step": 31950 }, { - "epoch": 8.283717318146518, - "grad_norm": 0.07388792932033539, - "learning_rate": 1.911295193718181e-06, - "loss": 0.0012, + "epoch": 9.203336209375898, + "grad_norm": 2.4453017711639404, + "learning_rate": 4.297279447793945e-06, + "loss": 0.0449, "step": 32000 }, { - "epoch": 8.296660626456122, - "grad_norm": 0.2670608460903168, - "learning_rate": 1.8969137400408436e-06, - "loss": 0.0027, + "epoch": 9.217716422203049, + "grad_norm": 3.565532684326172, + "learning_rate": 4.286627324825839e-06, + "loss": 0.0289, "step": 32050 }, { - "epoch": 8.309603934765725, - "grad_norm": 0.19774726033210754, - "learning_rate": 1.8825322863635057e-06, - "loss": 0.0016, + "epoch": 9.232096635030198, + "grad_norm": 3.2108683586120605, + "learning_rate": 4.2759752018577305e-06, + "loss": 0.0343, "step": 32100 }, { - "epoch": 8.32254724307533, - "grad_norm": 0.5155441761016846, - "learning_rate": 1.868150832686168e-06, - "loss": 0.0028, + "epoch": 9.246476847857348, + "grad_norm": 1.387044906616211, + "learning_rate": 4.265323078889623e-06, + "loss": 0.0277, "step": 32150 }, { - "epoch": 8.335490551384934, - "grad_norm": 0.2909785509109497, - "learning_rate": 1.8537693790088305e-06, - "loss": 0.0013, + "epoch": 9.260857060684499, + "grad_norm": 2.9393656253814697, + "learning_rate": 4.254670955921516e-06, + "loss": 0.0332, "step": 32200 }, { - "epoch": 8.348433859694538, - "grad_norm": 0.20075345039367676, - "learning_rate": 1.8393879253314927e-06, - "loss": 0.0017, + "epoch": 9.275237273511648, + "grad_norm": 2.9192564487457275, + "learning_rate": 4.2440188329534085e-06, + "loss": 0.0248, "step": 32250 }, { - "epoch": 8.361377168004141, - "grad_norm": 0.44085758924484253, - "learning_rate": 1.8250064716541548e-06, - "loss": 0.0022, + "epoch": 9.289617486338798, + "grad_norm": 1.02425217628479, + "learning_rate": 4.2333667099853e-06, + "loss": 0.0237, "step": 32300 }, { - "epoch": 8.374320476313747, - "grad_norm": 1.4371730089187622, - "learning_rate": 1.8106250179768173e-06, - "loss": 0.0013, + "epoch": 9.303997699165947, + "grad_norm": 1.3429416418075562, + "learning_rate": 4.222714587017193e-06, + "loss": 0.0255, "step": 32350 }, { - "epoch": 8.38726378462335, - "grad_norm": 0.15547557175159454, - "learning_rate": 1.7962435642994796e-06, - "loss": 0.0016, + "epoch": 9.318377911993098, + "grad_norm": 3.7899997234344482, + "learning_rate": 4.212062464049086e-06, + "loss": 0.0406, "step": 32400 }, { - "epoch": 8.400207092932954, - "grad_norm": 0.3929384648799896, - "learning_rate": 1.7818621106221417e-06, - "loss": 0.0013, + "epoch": 9.332758124820247, + "grad_norm": 3.700133800506592, + "learning_rate": 4.2014103410809775e-06, + "loss": 0.0314, "step": 32450 }, { - "epoch": 8.413150401242557, - "grad_norm": 0.329222172498703, - "learning_rate": 1.7674806569448042e-06, - "loss": 0.0016, + "epoch": 9.347138337647397, + "grad_norm": 2.2443103790283203, + "learning_rate": 4.19075821811287e-06, + "loss": 0.0251, "step": 32500 }, { - "epoch": 8.42609370955216, - "grad_norm": 0.24657496809959412, - "learning_rate": 1.7530992032674665e-06, - "loss": 0.0033, + "epoch": 9.361518550474546, + "grad_norm": 4.76594352722168, + "learning_rate": 4.180106095144763e-06, + "loss": 0.0458, "step": 32550 }, { - "epoch": 8.439037017861766, - "grad_norm": 0.1905100792646408, - "learning_rate": 1.7387177495901286e-06, - "loss": 0.0011, + "epoch": 9.375898763301697, + "grad_norm": 12.763227462768555, + "learning_rate": 4.1694539721766555e-06, + "loss": 0.0384, "step": 32600 }, { - "epoch": 8.45198032617137, - "grad_norm": 0.06774311512708664, - "learning_rate": 1.724336295912791e-06, - "loss": 0.0028, + "epoch": 9.390278976128847, + "grad_norm": 4.207976818084717, + "learning_rate": 4.158801849208547e-06, + "loss": 0.0251, "step": 32650 }, { - "epoch": 8.464923634480973, - "grad_norm": 0.20226095616817474, - "learning_rate": 1.7099548422354534e-06, - "loss": 0.0014, + "epoch": 9.404659188955996, + "grad_norm": 1.1125296354293823, + "learning_rate": 4.14814972624044e-06, + "loss": 0.0281, "step": 32700 }, { - "epoch": 8.477866942790577, - "grad_norm": 0.5388538241386414, - "learning_rate": 1.6955733885581155e-06, - "loss": 0.0085, + "epoch": 9.419039401783147, + "grad_norm": 9.574271202087402, + "learning_rate": 4.137497603272333e-06, + "loss": 0.0446, "step": 32750 }, { - "epoch": 8.49081025110018, - "grad_norm": 0.04724876210093498, - "learning_rate": 1.681191934880778e-06, - "loss": 0.0015, + "epoch": 9.433419614610296, + "grad_norm": 2.636265516281128, + "learning_rate": 4.126845480304225e-06, + "loss": 0.0273, "step": 32800 }, { - "epoch": 8.503753559409786, - "grad_norm": 0.1351761817932129, - "learning_rate": 1.6668104812034402e-06, - "loss": 0.002, + "epoch": 9.447799827437446, + "grad_norm": 0.5665758848190308, + "learning_rate": 4.116193357336117e-06, + "loss": 0.033, "step": 32850 }, { - "epoch": 8.51669686771939, - "grad_norm": 0.16377896070480347, - "learning_rate": 1.6524290275261023e-06, - "loss": 0.0014, + "epoch": 9.462180040264595, + "grad_norm": 2.313138008117676, + "learning_rate": 4.10554123436801e-06, + "loss": 0.0344, "step": 32900 }, { - "epoch": 8.529640176028993, - "grad_norm": 0.0653943344950676, - "learning_rate": 1.6380475738487648e-06, - "loss": 0.0016, + "epoch": 9.476560253091746, + "grad_norm": 3.437344551086426, + "learning_rate": 4.094889111399902e-06, + "loss": 0.031, "step": 32950 }, { - "epoch": 8.542583484338596, - "grad_norm": 0.4834960699081421, - "learning_rate": 1.6236661201714271e-06, - "loss": 0.0034, + "epoch": 9.490940465918897, + "grad_norm": 5.028070449829102, + "learning_rate": 4.084236988431795e-06, + "loss": 0.034, "step": 33000 }, { - "epoch": 8.555526792648202, - "grad_norm": 0.437788188457489, - "learning_rate": 1.6092846664940894e-06, - "loss": 0.0052, + "epoch": 9.505320678746045, + "grad_norm": 4.613243103027344, + "learning_rate": 4.073584865463687e-06, + "loss": 0.0348, "step": 33050 }, { - "epoch": 8.568470100957805, - "grad_norm": 2.195469617843628, - "learning_rate": 1.5949032128167515e-06, - "loss": 0.0036, + "epoch": 9.519700891573196, + "grad_norm": 1.9250500202178955, + "learning_rate": 4.0629327424955796e-06, + "loss": 0.0304, "step": 33100 }, { - "epoch": 8.581413409267409, - "grad_norm": 0.12040536105632782, - "learning_rate": 1.580521759139414e-06, - "loss": 0.0031, + "epoch": 9.534081104400345, + "grad_norm": 1.927820086479187, + "learning_rate": 4.052280619527472e-06, + "loss": 0.0296, "step": 33150 }, { - "epoch": 8.594356717577012, - "grad_norm": 0.2154337614774704, - "learning_rate": 1.5661403054620763e-06, - "loss": 0.0025, + "epoch": 9.548461317227495, + "grad_norm": 3.1357529163360596, + "learning_rate": 4.041628496559364e-06, + "loss": 0.0285, "step": 33200 }, { - "epoch": 8.607300025886616, - "grad_norm": 0.1478249430656433, - "learning_rate": 1.5517588517847384e-06, - "loss": 0.0022, + "epoch": 9.562841530054644, + "grad_norm": 8.994431495666504, + "learning_rate": 4.0309763735912576e-06, + "loss": 0.0293, "step": 33250 }, { - "epoch": 8.620243334196221, - "grad_norm": 0.16750039160251617, - "learning_rate": 1.5373773981074009e-06, - "loss": 0.0021, + "epoch": 9.577221742881795, + "grad_norm": 6.852480888366699, + "learning_rate": 4.020324250623149e-06, + "loss": 0.0326, "step": 33300 }, { - "epoch": 8.633186642505825, - "grad_norm": 0.38158321380615234, - "learning_rate": 1.5229959444300632e-06, - "loss": 0.001, + "epoch": 9.591601955708944, + "grad_norm": 2.4288530349731445, + "learning_rate": 4.009672127655042e-06, + "loss": 0.0489, "step": 33350 }, { - "epoch": 8.646129950815428, - "grad_norm": 1.1184005737304688, - "learning_rate": 1.5086144907527252e-06, - "loss": 0.007, + "epoch": 9.605982168536094, + "grad_norm": 4.439000606536865, + "learning_rate": 3.999020004686935e-06, + "loss": 0.0359, "step": 33400 }, { - "epoch": 8.659073259125032, - "grad_norm": 1.4656065702438354, - "learning_rate": 1.4942330370753877e-06, - "loss": 0.0013, + "epoch": 9.620362381363243, + "grad_norm": 3.07681941986084, + "learning_rate": 3.988367881718827e-06, + "loss": 0.0413, "step": 33450 }, { - "epoch": 8.672016567434635, - "grad_norm": 0.3482512831687927, - "learning_rate": 1.47985158339805e-06, - "loss": 0.0024, + "epoch": 9.634742594190394, + "grad_norm": 6.422308444976807, + "learning_rate": 3.977715758750719e-06, + "loss": 0.0355, "step": 33500 }, { - "epoch": 8.68495987574424, - "grad_norm": 0.04078083485364914, - "learning_rate": 1.4654701297207121e-06, - "loss": 0.0021, + "epoch": 9.649122807017545, + "grad_norm": 3.161259412765503, + "learning_rate": 3.967063635782612e-06, + "loss": 0.0326, "step": 33550 }, { - "epoch": 8.697903184053844, - "grad_norm": 0.1383834183216095, - "learning_rate": 1.4510886760433746e-06, - "loss": 0.0025, + "epoch": 9.663503019844693, + "grad_norm": 4.7213311195373535, + "learning_rate": 3.9564115128145045e-06, + "loss": 0.0244, "step": 33600 }, { - "epoch": 8.710846492363448, - "grad_norm": 0.1986149251461029, - "learning_rate": 1.436707222366037e-06, - "loss": 0.0011, + "epoch": 9.677883232671844, + "grad_norm": 5.186020851135254, + "learning_rate": 3.945759389846397e-06, + "loss": 0.0341, "step": 33650 }, { - "epoch": 8.723789800673051, - "grad_norm": 0.5224486589431763, - "learning_rate": 1.422325768688699e-06, - "loss": 0.002, + "epoch": 9.692263445498993, + "grad_norm": 3.0158615112304688, + "learning_rate": 3.935107266878289e-06, + "loss": 0.0386, "step": 33700 }, { - "epoch": 8.736733108982657, - "grad_norm": 0.14714999496936798, - "learning_rate": 1.4079443150113615e-06, - "loss": 0.0024, + "epoch": 9.706643658326144, + "grad_norm": 2.6093173027038574, + "learning_rate": 3.924455143910182e-06, + "loss": 0.0351, "step": 33750 }, { - "epoch": 8.74967641729226, - "grad_norm": 0.07352601736783981, - "learning_rate": 1.3935628613340238e-06, - "loss": 0.0025, + "epoch": 9.721023871153292, + "grad_norm": 8.223328590393066, + "learning_rate": 3.913803020942074e-06, + "loss": 0.0321, "step": 33800 }, { - "epoch": 8.762619725601864, - "grad_norm": 0.04641982167959213, - "learning_rate": 1.379181407656686e-06, - "loss": 0.0011, + "epoch": 9.735404083980443, + "grad_norm": 2.530656576156616, + "learning_rate": 3.903150897973966e-06, + "loss": 0.0317, "step": 33850 }, { - "epoch": 8.775563033911467, - "grad_norm": 0.20494569838047028, - "learning_rate": 1.3647999539793484e-06, - "loss": 0.0014, + "epoch": 9.749784296807594, + "grad_norm": 7.995259761810303, + "learning_rate": 3.892498775005859e-06, + "loss": 0.0352, "step": 33900 }, { - "epoch": 8.788506342221071, - "grad_norm": 0.3108866214752197, - "learning_rate": 1.3504185003020107e-06, - "loss": 0.0031, + "epoch": 9.764164509634742, + "grad_norm": 2.557913303375244, + "learning_rate": 3.8818466520377515e-06, + "loss": 0.0518, "step": 33950 }, { - "epoch": 8.801449650530676, - "grad_norm": 1.0901867151260376, - "learning_rate": 1.336037046624673e-06, - "loss": 0.006, + "epoch": 9.778544722461893, + "grad_norm": 1.9365257024765015, + "learning_rate": 3.871194529069644e-06, + "loss": 0.031, "step": 34000 }, { - "epoch": 8.81439295884028, - "grad_norm": 0.1557256281375885, - "learning_rate": 1.3216555929473352e-06, - "loss": 0.002, + "epoch": 9.792924935289042, + "grad_norm": 1.5110323429107666, + "learning_rate": 3.860542406101536e-06, + "loss": 0.029, "step": 34050 }, { - "epoch": 8.827336267149883, - "grad_norm": 0.32064932584762573, - "learning_rate": 1.3072741392699975e-06, - "loss": 0.0016, + "epoch": 9.807305148116193, + "grad_norm": 3.2484145164489746, + "learning_rate": 3.849890283133429e-06, + "loss": 0.0299, "step": 34100 }, { - "epoch": 8.840279575459487, - "grad_norm": 0.035750892013311386, - "learning_rate": 1.2928926855926598e-06, - "loss": 0.0016, + "epoch": 9.821685360943341, + "grad_norm": 2.2870161533355713, + "learning_rate": 3.839238160165321e-06, + "loss": 0.0361, "step": 34150 }, { - "epoch": 8.853222883769092, - "grad_norm": 0.11652498692274094, - "learning_rate": 1.278511231915322e-06, - "loss": 0.0017, + "epoch": 9.836065573770492, + "grad_norm": 4.6563544273376465, + "learning_rate": 3.828586037197214e-06, + "loss": 0.0562, "step": 34200 }, { - "epoch": 8.866166192078696, - "grad_norm": 0.22541067004203796, - "learning_rate": 1.2641297782379844e-06, - "loss": 0.002, + "epoch": 9.850445786597641, + "grad_norm": 3.728471517562866, + "learning_rate": 3.817933914229106e-06, + "loss": 0.0293, "step": 34250 }, { - "epoch": 8.8791095003883, - "grad_norm": 0.09035801142454147, - "learning_rate": 1.2497483245606467e-06, - "loss": 0.0024, + "epoch": 9.864825999424792, + "grad_norm": 1.556528925895691, + "learning_rate": 3.807281791260999e-06, + "loss": 0.0291, "step": 34300 }, { - "epoch": 8.892052808697903, - "grad_norm": 0.8051474690437317, - "learning_rate": 1.235366870883309e-06, - "loss": 0.0015, + "epoch": 9.87920621225194, + "grad_norm": 6.98478889465332, + "learning_rate": 3.796629668292891e-06, + "loss": 0.0425, "step": 34350 }, { - "epoch": 8.904996117007506, - "grad_norm": 0.12439941614866257, - "learning_rate": 1.2209854172059713e-06, - "loss": 0.0014, + "epoch": 9.893586425079091, + "grad_norm": 0.3747137188911438, + "learning_rate": 3.7859775453247838e-06, + "loss": 0.0284, "step": 34400 }, { - "epoch": 8.917939425317112, - "grad_norm": 0.0911746621131897, - "learning_rate": 1.2066039635286336e-06, - "loss": 0.0016, + "epoch": 9.907966637906242, + "grad_norm": 0.44150209426879883, + "learning_rate": 3.775325422356676e-06, + "loss": 0.0305, "step": 34450 }, { - "epoch": 8.930882733626715, - "grad_norm": 0.10455431789159775, - "learning_rate": 1.1922225098512959e-06, - "loss": 0.0035, + "epoch": 9.92234685073339, + "grad_norm": 8.117648124694824, + "learning_rate": 3.7646732993885683e-06, + "loss": 0.0322, "step": 34500 }, { - "epoch": 8.943826041936319, - "grad_norm": 0.0844273790717125, - "learning_rate": 1.1778410561739582e-06, - "loss": 0.0012, + "epoch": 9.936727063560541, + "grad_norm": 2.4635517597198486, + "learning_rate": 3.754021176420461e-06, + "loss": 0.0385, "step": 34550 }, { - "epoch": 8.956769350245922, - "grad_norm": 0.1838880330324173, - "learning_rate": 1.1634596024966204e-06, - "loss": 0.003, + "epoch": 9.95110727638769, + "grad_norm": 0.3196696937084198, + "learning_rate": 3.743369053452353e-06, + "loss": 0.0289, "step": 34600 }, { - "epoch": 8.969712658555526, - "grad_norm": 1.8350099325180054, - "learning_rate": 1.1490781488192827e-06, - "loss": 0.0018, + "epoch": 9.96548748921484, + "grad_norm": 6.403079032897949, + "learning_rate": 3.732716930484246e-06, + "loss": 0.0415, "step": 34650 }, { - "epoch": 8.982655966865131, - "grad_norm": 0.18555675446987152, - "learning_rate": 1.134696695141945e-06, - "loss": 0.0015, + "epoch": 9.97986770204199, + "grad_norm": 4.431683540344238, + "learning_rate": 3.722064807516138e-06, + "loss": 0.0415, "step": 34700 }, { - "epoch": 8.995599275174735, - "grad_norm": 0.14013200998306274, - "learning_rate": 1.1203152414646073e-06, - "loss": 0.0011, + "epoch": 9.99424791486914, + "grad_norm": 2.5951316356658936, + "learning_rate": 3.7114126845480307e-06, + "loss": 0.0366, "step": 34750 }, { - "epoch": 9.0, - "eval_loss": 0.0022843414917588234, - "eval_runtime": 89.3025, - "eval_samples_per_second": 5.599, - "eval_steps_per_second": 0.705, - "eval_wer": 1.1861026556103704, - "step": 34767 - }, - { - "epoch": 9.008542583484338, - "grad_norm": 0.13073372840881348, - "learning_rate": 1.1059337877872696e-06, - "loss": 0.0014, + "epoch": 10.0, + "eval_cer": 13.986404727632001, + "eval_exact_match": 29.472662568747975, + "eval_loss": 0.5040012001991272, + "eval_runtime": 634.5205, + "eval_samples_per_second": 4.871, + "eval_steps_per_second": 0.61, + "eval_wer": 31.04168772958582, + "step": 34770 + }, + { + "epoch": 10.00862812769629, + "grad_norm": 1.0161616802215576, + "learning_rate": 3.700760561579923e-06, + "loss": 0.0278, "step": 34800 }, { - "epoch": 9.021485891793942, - "grad_norm": 0.13120625913143158, - "learning_rate": 1.091552334109932e-06, - "loss": 0.0007, + "epoch": 10.02300834052344, + "grad_norm": 0.9947869777679443, + "learning_rate": 3.6901084386118157e-06, + "loss": 0.023, "step": 34850 }, { - "epoch": 9.034429200103547, - "grad_norm": 0.15788908302783966, - "learning_rate": 1.0771708804325942e-06, - "loss": 0.0012, + "epoch": 10.03738855335059, + "grad_norm": 6.685502529144287, + "learning_rate": 3.679456315643708e-06, + "loss": 0.0177, "step": 34900 }, { - "epoch": 9.04737250841315, - "grad_norm": 0.1373605579137802, - "learning_rate": 1.0627894267552565e-06, - "loss": 0.004, + "epoch": 10.051768766177739, + "grad_norm": 1.0945637226104736, + "learning_rate": 3.6688041926756006e-06, + "loss": 0.0174, "step": 34950 }, { - "epoch": 9.060315816722754, - "grad_norm": 0.09479701519012451, - "learning_rate": 1.0484079730779188e-06, - "loss": 0.0021, + "epoch": 10.06614897900489, + "grad_norm": 2.21243953704834, + "learning_rate": 3.658152069707493e-06, + "loss": 0.0148, "step": 35000 }, { - "epoch": 9.073259125032358, - "grad_norm": 0.1045953705906868, - "learning_rate": 1.034026519400581e-06, - "loss": 0.0014, + "epoch": 10.080529191832039, + "grad_norm": 1.7478631734848022, + "learning_rate": 3.6474999467393855e-06, + "loss": 0.0261, "step": 35050 }, { - "epoch": 9.086202433341962, - "grad_norm": 0.07537753134965897, - "learning_rate": 1.0196450657232434e-06, - "loss": 0.0008, + "epoch": 10.09490940465919, + "grad_norm": 2.5913538932800293, + "learning_rate": 3.6368478237712777e-06, + "loss": 0.0144, "step": 35100 }, { - "epoch": 9.099145741651567, - "grad_norm": 0.5165144801139832, - "learning_rate": 1.0052636120459057e-06, - "loss": 0.002, + "epoch": 10.109289617486338, + "grad_norm": 1.9573723077774048, + "learning_rate": 3.626195700803171e-06, + "loss": 0.0226, "step": 35150 }, { - "epoch": 9.11208904996117, - "grad_norm": 0.13497541844844818, - "learning_rate": 9.90882158368568e-07, - "loss": 0.0009, + "epoch": 10.123669830313489, + "grad_norm": 0.7810266017913818, + "learning_rate": 3.6155435778350626e-06, + "loss": 0.0155, "step": 35200 }, { - "epoch": 9.125032358270774, - "grad_norm": 0.06942334771156311, - "learning_rate": 9.765007046912302e-07, - "loss": 0.0012, + "epoch": 10.13805004314064, + "grad_norm": 2.6045634746551514, + "learning_rate": 3.604891454866955e-06, + "loss": 0.0235, "step": 35250 }, { - "epoch": 9.137975666580378, - "grad_norm": 0.19452160596847534, - "learning_rate": 9.621192510138925e-07, - "loss": 0.0014, + "epoch": 10.152430255967788, + "grad_norm": 1.5610133409500122, + "learning_rate": 3.594239331898848e-06, + "loss": 0.0156, "step": 35300 }, { - "epoch": 9.150918974889981, - "grad_norm": 0.12441800534725189, - "learning_rate": 9.477377973365548e-07, - "loss": 0.0009, + "epoch": 10.166810468794939, + "grad_norm": 2.555940866470337, + "learning_rate": 3.583800251390102e-06, + "loss": 0.0192, "step": 35350 }, { - "epoch": 9.163862283199586, - "grad_norm": 0.1729976236820221, - "learning_rate": 9.333563436592172e-07, - "loss": 0.0008, + "epoch": 10.181190681622088, + "grad_norm": 3.59891676902771, + "learning_rate": 3.573148128421995e-06, + "loss": 0.038, "step": 35400 }, { - "epoch": 9.17680559150919, - "grad_norm": 0.12662172317504883, - "learning_rate": 9.189748899818794e-07, - "loss": 0.0015, + "epoch": 10.195570894449238, + "grad_norm": 0.2640541195869446, + "learning_rate": 3.5624960054538873e-06, + "loss": 0.025, "step": 35450 }, { - "epoch": 9.189748899818794, - "grad_norm": 0.07149961590766907, - "learning_rate": 9.048810653780884e-07, - "loss": 0.0017, + "epoch": 10.209951107276387, + "grad_norm": 7.407700061798096, + "learning_rate": 3.55184388248578e-06, + "loss": 0.0243, "step": 35500 }, { - "epoch": 9.202692208128397, - "grad_norm": 0.09453389793634415, - "learning_rate": 8.904996117007508e-07, - "loss": 0.0008, + "epoch": 10.224331320103538, + "grad_norm": 4.053565979003906, + "learning_rate": 3.541191759517672e-06, + "loss": 0.0219, "step": 35550 }, { - "epoch": 9.215635516438002, - "grad_norm": 0.06293300539255142, - "learning_rate": 8.76118158023413e-07, - "loss": 0.0009, + "epoch": 10.238711532930687, + "grad_norm": 1.889561653137207, + "learning_rate": 3.530539636549565e-06, + "loss": 0.0218, "step": 35600 }, { - "epoch": 9.228578824747606, - "grad_norm": 0.09109367430210114, - "learning_rate": 8.617367043460753e-07, - "loss": 0.0007, + "epoch": 10.253091745757837, + "grad_norm": 3.4324212074279785, + "learning_rate": 3.519887513581457e-06, + "loss": 0.0206, "step": 35650 }, { - "epoch": 9.24152213305721, - "grad_norm": 0.12085200101137161, - "learning_rate": 8.473552506687377e-07, - "loss": 0.0018, + "epoch": 10.267471958584988, + "grad_norm": 0.9233732223510742, + "learning_rate": 3.5092353906133497e-06, + "loss": 0.0317, "step": 35700 }, { - "epoch": 9.254465441366813, - "grad_norm": 0.11523808538913727, - "learning_rate": 8.329737969913999e-07, - "loss": 0.0032, + "epoch": 10.281852171412137, + "grad_norm": 1.797395944595337, + "learning_rate": 3.498583267645242e-06, + "loss": 0.0214, "step": 35750 }, { - "epoch": 9.267408749676417, - "grad_norm": 0.28072428703308105, - "learning_rate": 8.185923433140623e-07, - "loss": 0.0008, + "epoch": 10.296232384239287, + "grad_norm": 4.241499900817871, + "learning_rate": 3.4879311446771346e-06, + "loss": 0.0204, "step": 35800 }, { - "epoch": 9.280352057986022, - "grad_norm": 0.08222024142742157, - "learning_rate": 8.042108896367246e-07, - "loss": 0.0007, + "epoch": 10.310612597066436, + "grad_norm": 2.2444257736206055, + "learning_rate": 3.477279021709027e-06, + "loss": 0.0245, "step": 35850 }, { - "epoch": 9.293295366295625, - "grad_norm": 0.29703882336616516, - "learning_rate": 7.898294359593868e-07, - "loss": 0.0011, + "epoch": 10.324992809893587, + "grad_norm": 0.8318943977355957, + "learning_rate": 3.4666268987409196e-06, + "loss": 0.0274, "step": 35900 }, { - "epoch": 9.306238674605229, - "grad_norm": 0.19992585480213165, - "learning_rate": 7.754479822820492e-07, - "loss": 0.0007, + "epoch": 10.339373022720736, + "grad_norm": 3.0525383949279785, + "learning_rate": 3.455974775772812e-06, + "loss": 0.0164, "step": 35950 }, { - "epoch": 9.319181982914833, - "grad_norm": 0.04905041307210922, - "learning_rate": 7.610665286047115e-07, - "loss": 0.0009, + "epoch": 10.353753235547886, + "grad_norm": 1.6571319103240967, + "learning_rate": 3.445322652804704e-06, + "loss": 0.0188, "step": 36000 }, { - "epoch": 9.332125291224436, - "grad_norm": 0.15756992995738983, - "learning_rate": 7.466850749273736e-07, - "loss": 0.005, + "epoch": 10.368133448375035, + "grad_norm": 1.5004198551177979, + "learning_rate": 3.4346705298365967e-06, + "loss": 0.024, "step": 36050 }, { - "epoch": 9.345068599534041, - "grad_norm": 0.18442897498607635, - "learning_rate": 7.32303621250036e-07, - "loss": 0.0009, + "epoch": 10.382513661202186, + "grad_norm": 2.033853769302368, + "learning_rate": 3.424018406868489e-06, + "loss": 0.0175, "step": 36100 }, { - "epoch": 9.358011907843645, - "grad_norm": 0.06329531967639923, - "learning_rate": 7.179221675726982e-07, - "loss": 0.0011, + "epoch": 10.396893874029336, + "grad_norm": 2.2097420692443848, + "learning_rate": 3.4133662839003816e-06, + "loss": 0.0176, "step": 36150 }, { - "epoch": 9.370955216153249, - "grad_norm": 0.11927127093076706, - "learning_rate": 7.035407138953606e-07, - "loss": 0.0007, + "epoch": 10.411274086856485, + "grad_norm": 1.751978874206543, + "learning_rate": 3.402714160932274e-06, + "loss": 0.0309, "step": 36200 }, { - "epoch": 9.383898524462852, - "grad_norm": 0.06844917684793472, - "learning_rate": 6.891592602180229e-07, - "loss": 0.0018, + "epoch": 10.425654299683636, + "grad_norm": 0.9982848167419434, + "learning_rate": 3.3920620379641665e-06, + "loss": 0.0159, "step": 36250 }, { - "epoch": 9.396841832772457, - "grad_norm": 0.08744735270738602, - "learning_rate": 6.747778065406851e-07, - "loss": 0.0043, + "epoch": 10.440034512510785, + "grad_norm": 1.5475460290908813, + "learning_rate": 3.3814099149960588e-06, + "loss": 0.0175, "step": 36300 }, { - "epoch": 9.409785141082061, - "grad_norm": 0.18723489344120026, - "learning_rate": 6.603963528633475e-07, - "loss": 0.0007, + "epoch": 10.454414725337935, + "grad_norm": 4.196291446685791, + "learning_rate": 3.3707577920279514e-06, + "loss": 0.0223, "step": 36350 }, { - "epoch": 9.422728449391665, - "grad_norm": 0.086359903216362, - "learning_rate": 6.460148991860098e-07, - "loss": 0.0039, + "epoch": 10.468794938165084, + "grad_norm": 0.9562397003173828, + "learning_rate": 3.3601056690598437e-06, + "loss": 0.0241, "step": 36400 }, { - "epoch": 9.435671757701268, - "grad_norm": 0.10816863179206848, - "learning_rate": 6.31633445508672e-07, - "loss": 0.0015, + "epoch": 10.483175150992235, + "grad_norm": 1.4568724632263184, + "learning_rate": 3.3494535460917368e-06, + "loss": 0.018, "step": 36450 }, { - "epoch": 9.448615066010872, - "grad_norm": 0.09978567808866501, - "learning_rate": 6.172519918313344e-07, - "loss": 0.0054, + "epoch": 10.497555363819384, + "grad_norm": 2.8558595180511475, + "learning_rate": 3.3388014231236286e-06, + "loss": 0.0175, "step": 36500 }, { - "epoch": 9.461558374320477, - "grad_norm": 0.07749635726213455, - "learning_rate": 6.028705381539967e-07, - "loss": 0.0013, + "epoch": 10.511935576646534, + "grad_norm": 2.386277198791504, + "learning_rate": 3.3281493001555217e-06, + "loss": 0.0266, "step": 36550 }, { - "epoch": 9.47450168263008, - "grad_norm": 0.18575559556484222, - "learning_rate": 5.88489084476659e-07, - "loss": 0.0007, + "epoch": 10.526315789473685, + "grad_norm": 8.753107070922852, + "learning_rate": 3.317497177187414e-06, + "loss": 0.0237, "step": 36600 }, { - "epoch": 9.487444990939684, - "grad_norm": 0.3773919641971588, - "learning_rate": 5.741076307993213e-07, - "loss": 0.0019, + "epoch": 10.540696002300834, + "grad_norm": 1.261927843093872, + "learning_rate": 3.3068450542193057e-06, + "loss": 0.0172, "step": 36650 }, { - "epoch": 9.500388299249288, - "grad_norm": 0.13482239842414856, - "learning_rate": 5.597261771219835e-07, - "loss": 0.0006, + "epoch": 10.555076215127984, + "grad_norm": 1.744277834892273, + "learning_rate": 3.296405973710561e-06, + "loss": 0.0208, "step": 36700 }, { - "epoch": 9.513331607558893, - "grad_norm": 0.16534963250160217, - "learning_rate": 5.453447234446458e-07, - "loss": 0.0007, + "epoch": 10.569456427955133, + "grad_norm": 2.993377447128296, + "learning_rate": 3.2857538507424532e-06, + "loss": 0.025, "step": 36750 }, { - "epoch": 9.526274915868497, - "grad_norm": 0.22458544373512268, - "learning_rate": 5.309632697673081e-07, - "loss": 0.0013, + "epoch": 10.583836640782284, + "grad_norm": 3.2228963375091553, + "learning_rate": 3.275101727774346e-06, + "loss": 0.0205, "step": 36800 }, { - "epoch": 9.5392182241781, - "grad_norm": 0.07351688295602798, - "learning_rate": 5.165818160899704e-07, - "loss": 0.0019, + "epoch": 10.598216853609433, + "grad_norm": 1.915059208869934, + "learning_rate": 3.264449604806238e-06, + "loss": 0.0231, "step": 36850 }, { - "epoch": 9.552161532487704, - "grad_norm": 0.7084305286407471, - "learning_rate": 5.022003624126327e-07, - "loss": 0.0024, + "epoch": 10.612597066436583, + "grad_norm": 2.1153347492218018, + "learning_rate": 3.253797481838131e-06, + "loss": 0.0212, "step": 36900 }, { - "epoch": 9.565104840797307, - "grad_norm": 0.09942048788070679, - "learning_rate": 4.87818908735295e-07, - "loss": 0.0019, + "epoch": 10.626977279263734, + "grad_norm": 1.1697392463684082, + "learning_rate": 3.243145358870023e-06, + "loss": 0.0234, "step": 36950 }, { - "epoch": 9.578048149106912, - "grad_norm": 1.1455363035202026, - "learning_rate": 4.734374550579573e-07, - "loss": 0.0013, + "epoch": 10.641357492090883, + "grad_norm": 1.4635300636291504, + "learning_rate": 3.2324932359019157e-06, + "loss": 0.015, "step": 37000 }, { - "epoch": 9.590991457416516, - "grad_norm": 0.07915141433477402, - "learning_rate": 4.590560013806196e-07, - "loss": 0.0081, + "epoch": 10.655737704918034, + "grad_norm": 0.8504502773284912, + "learning_rate": 3.221841112933808e-06, + "loss": 0.0168, "step": 37050 }, { - "epoch": 9.60393476572612, - "grad_norm": 0.1777876317501068, - "learning_rate": 4.4467454770328193e-07, - "loss": 0.0014, + "epoch": 10.670117917745182, + "grad_norm": 3.238915205001831, + "learning_rate": 3.2111889899657006e-06, + "loss": 0.0219, "step": 37100 }, { - "epoch": 9.616878074035723, - "grad_norm": 0.10249053686857224, - "learning_rate": 4.3029309402594417e-07, - "loss": 0.0007, + "epoch": 10.684498130572333, + "grad_norm": 2.170586109161377, + "learning_rate": 3.200536866997593e-06, + "loss": 0.0191, "step": 37150 }, { - "epoch": 9.629821382345327, - "grad_norm": 0.10651733726263046, - "learning_rate": 4.1591164034860646e-07, - "loss": 0.0007, + "epoch": 10.698878343399482, + "grad_norm": 1.3201878070831299, + "learning_rate": 3.1898847440294855e-06, + "loss": 0.0225, "step": 37200 }, { - "epoch": 9.642764690654932, - "grad_norm": 0.03686549514532089, - "learning_rate": 4.0153018667126875e-07, - "loss": 0.0046, + "epoch": 10.713258556226632, + "grad_norm": 3.716623306274414, + "learning_rate": 3.1792326210613778e-06, + "loss": 0.0184, "step": 37250 }, { - "epoch": 9.655707998964536, - "grad_norm": 0.09571921825408936, - "learning_rate": 3.871487329939311e-07, - "loss": 0.0009, + "epoch": 10.727638769053781, + "grad_norm": 1.4733976125717163, + "learning_rate": 3.1685804980932704e-06, + "loss": 0.0171, "step": 37300 }, { - "epoch": 9.66865130727414, - "grad_norm": 0.08373486995697021, - "learning_rate": 3.727672793165934e-07, - "loss": 0.0013, + "epoch": 10.742018981880932, + "grad_norm": 3.570805311203003, + "learning_rate": 3.1579283751251627e-06, + "loss": 0.0238, "step": 37350 }, { - "epoch": 9.681594615583743, - "grad_norm": 3.18973708152771, - "learning_rate": 3.583858256392556e-07, - "loss": 0.0013, + "epoch": 10.75639919470808, + "grad_norm": 1.6730691194534302, + "learning_rate": 3.147276252157055e-06, + "loss": 0.0323, "step": 37400 }, { - "epoch": 9.694537923893346, - "grad_norm": 0.06650124490261078, - "learning_rate": 3.440043719619179e-07, - "loss": 0.0014, + "epoch": 10.770779407535231, + "grad_norm": 2.75645112991333, + "learning_rate": 3.1366241291889476e-06, + "loss": 0.0261, "step": 37450 }, { - "epoch": 9.707481232202952, - "grad_norm": 0.04685758426785469, - "learning_rate": 3.2962291828458026e-07, - "loss": 0.0011, + "epoch": 10.785159620362382, + "grad_norm": 4.228376865386963, + "learning_rate": 3.12597200622084e-06, + "loss": 0.0251, "step": 37500 }, { - "epoch": 9.720424540512555, - "grad_norm": 0.23590688407421112, - "learning_rate": 3.1524146460724255e-07, - "loss": 0.0009, + "epoch": 10.799539833189531, + "grad_norm": 1.2543505430221558, + "learning_rate": 3.1153198832527325e-06, + "loss": 0.0221, "step": 37550 }, { - "epoch": 9.733367848822159, - "grad_norm": 0.6385647058486938, - "learning_rate": 3.0086001092990484e-07, - "loss": 0.0007, + "epoch": 10.813920046016682, + "grad_norm": 1.6210709810256958, + "learning_rate": 3.1046677602846247e-06, + "loss": 0.0251, "step": 37600 }, { - "epoch": 9.746311157131762, - "grad_norm": 0.9988199472427368, - "learning_rate": 2.8647855725256713e-07, - "loss": 0.0011, + "epoch": 10.82830025884383, + "grad_norm": 3.5723814964294434, + "learning_rate": 3.0940156373165174e-06, + "loss": 0.0282, "step": 37650 }, { - "epoch": 9.759254465441368, - "grad_norm": 0.09181234240531921, - "learning_rate": 2.720971035752294e-07, - "loss": 0.001, + "epoch": 10.842680471670981, + "grad_norm": 5.559849739074707, + "learning_rate": 3.0833635143484096e-06, + "loss": 0.0198, "step": 37700 }, { - "epoch": 9.772197773750971, - "grad_norm": 0.12787938117980957, - "learning_rate": 2.577156498978917e-07, - "loss": 0.0007, + "epoch": 10.85706068449813, + "grad_norm": 0.5983803272247314, + "learning_rate": 3.0727113913803023e-06, + "loss": 0.021, "step": 37750 }, { - "epoch": 9.785141082060575, - "grad_norm": 0.24333082139492035, - "learning_rate": 2.43334196220554e-07, - "loss": 0.0017, + "epoch": 10.87144089732528, + "grad_norm": 3.475189447402954, + "learning_rate": 3.0620592684121945e-06, + "loss": 0.0239, "step": 37800 }, { - "epoch": 9.798084390370178, - "grad_norm": 0.11896287649869919, - "learning_rate": 2.2895274254321627e-07, - "loss": 0.0038, + "epoch": 10.885821110152431, + "grad_norm": 0.9569188952445984, + "learning_rate": 3.0514071454440876e-06, + "loss": 0.0268, "step": 37850 }, { - "epoch": 9.811027698679782, - "grad_norm": 0.9559854865074158, - "learning_rate": 2.145712888658786e-07, - "loss": 0.0008, + "epoch": 10.90020132297958, + "grad_norm": 2.901486396789551, + "learning_rate": 3.0407550224759794e-06, + "loss": 0.0269, "step": 37900 }, { - "epoch": 9.823971006989387, - "grad_norm": 0.10168687999248505, - "learning_rate": 2.0018983518854086e-07, - "loss": 0.0009, + "epoch": 10.91458153580673, + "grad_norm": 4.810159683227539, + "learning_rate": 3.0301028995078725e-06, + "loss": 0.0392, "step": 37950 }, { - "epoch": 9.83691431529899, - "grad_norm": 0.5217211246490479, - "learning_rate": 1.8580838151120317e-07, - "loss": 0.0008, + "epoch": 10.92896174863388, + "grad_norm": 2.7533440589904785, + "learning_rate": 3.0194507765397648e-06, + "loss": 0.0272, "step": 38000 }, { - "epoch": 9.849857623608594, - "grad_norm": 0.03806522488594055, - "learning_rate": 1.7142692783386547e-07, - "loss": 0.0022, + "epoch": 10.94334196146103, + "grad_norm": 3.2379181385040283, + "learning_rate": 3.0087986535716574e-06, + "loss": 0.0224, "step": 38050 }, { - "epoch": 9.862800931918198, - "grad_norm": 0.05464790016412735, - "learning_rate": 1.5704547415652776e-07, - "loss": 0.0007, + "epoch": 10.957722174288179, + "grad_norm": 0.7579954266548157, + "learning_rate": 2.9981465306035497e-06, + "loss": 0.0176, "step": 38100 }, { - "epoch": 9.875744240227803, - "grad_norm": 0.16794097423553467, - "learning_rate": 1.4266402047919005e-07, - "loss": 0.0008, + "epoch": 10.97210238711533, + "grad_norm": 1.6765286922454834, + "learning_rate": 2.987494407635442e-06, + "loss": 0.0213, "step": 38150 }, { - "epoch": 9.888687548537407, - "grad_norm": 0.4726152718067169, - "learning_rate": 1.2828256680185234e-07, - "loss": 0.0008, + "epoch": 10.986482599942478, + "grad_norm": 2.7176685333251953, + "learning_rate": 2.9768422846673346e-06, + "loss": 0.0297, "step": 38200 }, { - "epoch": 9.90163085684701, - "grad_norm": 0.09927275031805038, - "learning_rate": 1.1390111312451463e-07, - "loss": 0.0009, + "epoch": 11.0, + "eval_cer": 13.969382884166356, + "eval_exact_match": 29.828534454868976, + "eval_loss": 0.5317708849906921, + "eval_runtime": 574.4057, + "eval_samples_per_second": 5.381, + "eval_steps_per_second": 0.674, + "eval_wer": 30.89171974522293, + "step": 38247 + }, + { + "epoch": 11.000862812769629, + "grad_norm": 0.3571583330631256, + "learning_rate": 2.966190161699227e-06, + "loss": 0.0215, "step": 38250 }, { - "epoch": 9.914574165156614, - "grad_norm": 0.07914838194847107, - "learning_rate": 9.951965944717692e-08, - "loss": 0.0013, + "epoch": 11.01524302559678, + "grad_norm": 0.43957722187042236, + "learning_rate": 2.9555380387311195e-06, + "loss": 0.0294, "step": 38300 }, { - "epoch": 9.927517473466217, - "grad_norm": 2.0464367866516113, - "learning_rate": 8.513820576983922e-08, - "loss": 0.0014, + "epoch": 11.029623238423929, + "grad_norm": 3.6876516342163086, + "learning_rate": 2.9448859157630117e-06, + "loss": 0.0127, "step": 38350 }, { - "epoch": 9.940460781775823, - "grad_norm": 0.0418660007417202, - "learning_rate": 7.075675209250152e-08, - "loss": 0.0008, + "epoch": 11.04400345125108, + "grad_norm": 0.5603832006454468, + "learning_rate": 2.9342337927949044e-06, + "loss": 0.0237, "step": 38400 }, { - "epoch": 9.953404090085426, - "grad_norm": 0.08231345564126968, - "learning_rate": 5.637529841516381e-08, - "loss": 0.0007, + "epoch": 11.058383664078228, + "grad_norm": 0.8400231003761292, + "learning_rate": 2.9235816698267966e-06, + "loss": 0.0255, "step": 38450 }, { - "epoch": 9.96634739839503, - "grad_norm": 0.05043479800224304, - "learning_rate": 4.19938447378261e-08, - "loss": 0.0009, + "epoch": 11.072763876905379, + "grad_norm": 0.4847192168235779, + "learning_rate": 2.9129295468586893e-06, + "loss": 0.012, "step": 38500 }, { - "epoch": 9.979290706704633, - "grad_norm": 0.11412689834833145, - "learning_rate": 2.7612391060488395e-08, - "loss": 0.0008, + "epoch": 11.087144089732528, + "grad_norm": 0.965175986289978, + "learning_rate": 2.9022774238905815e-06, + "loss": 0.0146, "step": 38550 }, { - "epoch": 9.992234015014237, - "grad_norm": 0.06143497675657272, - "learning_rate": 1.323093738315069e-08, - "loss": 0.0013, + "epoch": 11.101524302559678, + "grad_norm": 1.6067537069320679, + "learning_rate": 2.891625300922474e-06, + "loss": 0.0115, "step": 38600 }, { - "epoch": 10.0, - "eval_loss": 0.0013781202724203467, - "eval_runtime": 89.6083, - "eval_samples_per_second": 5.58, - "eval_steps_per_second": 0.703, - "eval_wer": 0.8082292432035268, - "step": 38630 + "epoch": 11.115904515386827, + "grad_norm": 2.414959192276001, + "learning_rate": 2.8809731779543665e-06, + "loss": 0.0213, + "step": 38650 + }, + { + "epoch": 11.130284728213978, + "grad_norm": 0.14320816099643707, + "learning_rate": 2.870321054986259e-06, + "loss": 0.0115, + "step": 38700 + }, + { + "epoch": 11.144664941041128, + "grad_norm": 0.369263231754303, + "learning_rate": 2.8596689320181514e-06, + "loss": 0.0104, + "step": 38750 + }, + { + "epoch": 11.159045153868277, + "grad_norm": 1.0977699756622314, + "learning_rate": 2.8490168090500436e-06, + "loss": 0.0117, + "step": 38800 + }, + { + "epoch": 11.173425366695428, + "grad_norm": 2.078298807144165, + "learning_rate": 2.8383646860819363e-06, + "loss": 0.0102, + "step": 38850 + }, + { + "epoch": 11.187805579522577, + "grad_norm": 6.753006458282471, + "learning_rate": 2.8277125631138285e-06, + "loss": 0.0159, + "step": 38900 + }, + { + "epoch": 11.202185792349727, + "grad_norm": 1.2461603879928589, + "learning_rate": 2.817060440145721e-06, + "loss": 0.0128, + "step": 38950 + }, + { + "epoch": 11.216566005176876, + "grad_norm": 4.858871936798096, + "learning_rate": 2.8064083171776134e-06, + "loss": 0.0162, + "step": 39000 + }, + { + "epoch": 11.230946218004027, + "grad_norm": 1.9736993312835693, + "learning_rate": 2.795756194209506e-06, + "loss": 0.0141, + "step": 39050 + }, + { + "epoch": 11.245326430831176, + "grad_norm": 0.4936278164386749, + "learning_rate": 2.7851040712413983e-06, + "loss": 0.0106, + "step": 39100 + }, + { + "epoch": 11.259706643658326, + "grad_norm": 4.108026027679443, + "learning_rate": 2.7744519482732914e-06, + "loss": 0.0105, + "step": 39150 + }, + { + "epoch": 11.274086856485477, + "grad_norm": 2.6271684169769287, + "learning_rate": 2.7637998253051832e-06, + "loss": 0.0117, + "step": 39200 + }, + { + "epoch": 11.288467069312626, + "grad_norm": 5.211180210113525, + "learning_rate": 2.7531477023370763e-06, + "loss": 0.0145, + "step": 39250 + }, + { + "epoch": 11.302847282139776, + "grad_norm": 3.0652973651885986, + "learning_rate": 2.7424955793689686e-06, + "loss": 0.0132, + "step": 39300 + }, + { + "epoch": 11.317227494966925, + "grad_norm": 2.2404987812042236, + "learning_rate": 2.7318434564008612e-06, + "loss": 0.0103, + "step": 39350 + }, + { + "epoch": 11.331607707794076, + "grad_norm": 3.8141326904296875, + "learning_rate": 2.7211913334327535e-06, + "loss": 0.0173, + "step": 39400 + }, + { + "epoch": 11.345987920621225, + "grad_norm": 1.008122205734253, + "learning_rate": 2.710539210464646e-06, + "loss": 0.0105, + "step": 39450 + }, + { + "epoch": 11.360368133448375, + "grad_norm": 2.658837080001831, + "learning_rate": 2.6998870874965384e-06, + "loss": 0.0119, + "step": 39500 + }, + { + "epoch": 11.374748346275524, + "grad_norm": 0.9930040836334229, + "learning_rate": 2.6892349645284306e-06, + "loss": 0.0115, + "step": 39550 + }, + { + "epoch": 11.389128559102675, + "grad_norm": 7.481790542602539, + "learning_rate": 2.6785828415603233e-06, + "loss": 0.0181, + "step": 39600 + }, + { + "epoch": 11.403508771929825, + "grad_norm": 2.2908804416656494, + "learning_rate": 2.6679307185922155e-06, + "loss": 0.0149, + "step": 39650 + }, + { + "epoch": 11.417888984756974, + "grad_norm": 0.39877453446388245, + "learning_rate": 2.657278595624108e-06, + "loss": 0.0217, + "step": 39700 + }, + { + "epoch": 11.432269197584125, + "grad_norm": 1.4716085195541382, + "learning_rate": 2.6466264726560004e-06, + "loss": 0.0122, + "step": 39750 + }, + { + "epoch": 11.446649410411274, + "grad_norm": 0.24586166441440582, + "learning_rate": 2.6361873921472553e-06, + "loss": 0.0116, + "step": 39800 + }, + { + "epoch": 11.461029623238424, + "grad_norm": 0.5717408061027527, + "learning_rate": 2.6255352691791475e-06, + "loss": 0.0116, + "step": 39850 + }, + { + "epoch": 11.475409836065573, + "grad_norm": 0.5635934472084045, + "learning_rate": 2.61488314621104e-06, + "loss": 0.0105, + "step": 39900 + }, + { + "epoch": 11.489790048892724, + "grad_norm": 0.6389666199684143, + "learning_rate": 2.6042310232429324e-06, + "loss": 0.013, + "step": 39950 + }, + { + "epoch": 11.504170261719873, + "grad_norm": 0.6271231174468994, + "learning_rate": 2.593578900274825e-06, + "loss": 0.0114, + "step": 40000 + }, + { + "epoch": 11.518550474547023, + "grad_norm": 1.498686671257019, + "learning_rate": 2.5829267773067173e-06, + "loss": 0.0158, + "step": 40050 + }, + { + "epoch": 11.532930687374174, + "grad_norm": 2.7515854835510254, + "learning_rate": 2.57227465433861e-06, + "loss": 0.0127, + "step": 40100 + }, + { + "epoch": 11.547310900201323, + "grad_norm": 1.1767064332962036, + "learning_rate": 2.5616225313705022e-06, + "loss": 0.01, + "step": 40150 + }, + { + "epoch": 11.561691113028473, + "grad_norm": 1.7428771257400513, + "learning_rate": 2.550970408402395e-06, + "loss": 0.0166, + "step": 40200 + }, + { + "epoch": 11.576071325855622, + "grad_norm": 3.2989461421966553, + "learning_rate": 2.540318285434287e-06, + "loss": 0.0093, + "step": 40250 + }, + { + "epoch": 11.590451538682773, + "grad_norm": 10.773270606994629, + "learning_rate": 2.5296661624661794e-06, + "loss": 0.0119, + "step": 40300 + }, + { + "epoch": 11.604831751509922, + "grad_norm": 1.0619690418243408, + "learning_rate": 2.519014039498072e-06, + "loss": 0.0169, + "step": 40350 + }, + { + "epoch": 11.619211964337072, + "grad_norm": 2.1145946979522705, + "learning_rate": 2.5083619165299643e-06, + "loss": 0.011, + "step": 40400 + }, + { + "epoch": 11.633592177164221, + "grad_norm": 2.9951775074005127, + "learning_rate": 2.4977097935618574e-06, + "loss": 0.0092, + "step": 40450 + }, + { + "epoch": 11.647972389991372, + "grad_norm": 1.3778321743011475, + "learning_rate": 2.4870576705937496e-06, + "loss": 0.0161, + "step": 40500 + }, + { + "epoch": 11.662352602818522, + "grad_norm": 0.7039114236831665, + "learning_rate": 2.4764055476256423e-06, + "loss": 0.0175, + "step": 40550 + }, + { + "epoch": 11.676732815645671, + "grad_norm": 1.171001672744751, + "learning_rate": 2.4657534246575345e-06, + "loss": 0.0121, + "step": 40600 + }, + { + "epoch": 11.691113028472822, + "grad_norm": 5.978137969970703, + "learning_rate": 2.4551013016894268e-06, + "loss": 0.0191, + "step": 40650 + }, + { + "epoch": 11.70549324129997, + "grad_norm": 2.9138379096984863, + "learning_rate": 2.4444491787213194e-06, + "loss": 0.0154, + "step": 40700 + }, + { + "epoch": 11.719873454127121, + "grad_norm": 2.822955369949341, + "learning_rate": 2.4337970557532117e-06, + "loss": 0.0126, + "step": 40750 + }, + { + "epoch": 11.73425366695427, + "grad_norm": 4.79074764251709, + "learning_rate": 2.4231449327851043e-06, + "loss": 0.0146, + "step": 40800 + }, + { + "epoch": 11.748633879781421, + "grad_norm": 0.6291407346725464, + "learning_rate": 2.4124928098169966e-06, + "loss": 0.0123, + "step": 40850 + }, + { + "epoch": 11.76301409260857, + "grad_norm": 0.731688916683197, + "learning_rate": 2.4018406868488892e-06, + "loss": 0.017, + "step": 40900 + }, + { + "epoch": 11.77739430543572, + "grad_norm": 1.3242790699005127, + "learning_rate": 2.3911885638807815e-06, + "loss": 0.0282, + "step": 40950 + }, + { + "epoch": 11.791774518262871, + "grad_norm": 0.9765246510505676, + "learning_rate": 2.380536440912674e-06, + "loss": 0.0111, + "step": 41000 + }, + { + "epoch": 11.80615473109002, + "grad_norm": 4.890609264373779, + "learning_rate": 2.3698843179445664e-06, + "loss": 0.0248, + "step": 41050 + }, + { + "epoch": 11.82053494391717, + "grad_norm": 1.8218705654144287, + "learning_rate": 2.359232194976459e-06, + "loss": 0.0086, + "step": 41100 + }, + { + "epoch": 11.83491515674432, + "grad_norm": 2.010406494140625, + "learning_rate": 2.3485800720083517e-06, + "loss": 0.0165, + "step": 41150 + }, + { + "epoch": 11.84929536957147, + "grad_norm": 6.489707946777344, + "learning_rate": 2.337927949040244e-06, + "loss": 0.0149, + "step": 41200 + }, + { + "epoch": 11.863675582398619, + "grad_norm": 1.2019199132919312, + "learning_rate": 2.3272758260721366e-06, + "loss": 0.02, + "step": 41250 + }, + { + "epoch": 11.87805579522577, + "grad_norm": 3.039811134338379, + "learning_rate": 2.316623703104029e-06, + "loss": 0.0107, + "step": 41300 + }, + { + "epoch": 11.892436008052918, + "grad_norm": 4.356081962585449, + "learning_rate": 2.305971580135921e-06, + "loss": 0.0153, + "step": 41350 + }, + { + "epoch": 11.906816220880069, + "grad_norm": 0.5067114233970642, + "learning_rate": 2.2953194571678138e-06, + "loss": 0.0137, + "step": 41400 + }, + { + "epoch": 11.92119643370722, + "grad_norm": 3.4604523181915283, + "learning_rate": 2.284667334199706e-06, + "loss": 0.0149, + "step": 41450 + }, + { + "epoch": 11.935576646534368, + "grad_norm": 2.847642421722412, + "learning_rate": 2.2740152112315987e-06, + "loss": 0.0169, + "step": 41500 + }, + { + "epoch": 11.949956859361519, + "grad_norm": 1.4214160442352295, + "learning_rate": 2.263363088263491e-06, + "loss": 0.0127, + "step": 41550 + }, + { + "epoch": 11.964337072188668, + "grad_norm": 1.0439202785491943, + "learning_rate": 2.2527109652953836e-06, + "loss": 0.0113, + "step": 41600 + }, + { + "epoch": 11.978717285015819, + "grad_norm": 3.5821151733398438, + "learning_rate": 2.242058842327276e-06, + "loss": 0.0222, + "step": 41650 + }, + { + "epoch": 11.993097497842967, + "grad_norm": 0.11335547268390656, + "learning_rate": 2.2314067193591685e-06, + "loss": 0.0117, + "step": 41700 + }, + { + "epoch": 12.0, + "eval_cer": 13.6104440110864, + "eval_exact_match": 29.76383047557425, + "eval_loss": 0.5622881054878235, + "eval_runtime": 557.7038, + "eval_samples_per_second": 5.542, + "eval_steps_per_second": 0.694, + "eval_wer": 30.62379941360833, + "step": 41724 + }, + { + "epoch": 12.007477710670118, + "grad_norm": 2.750352621078491, + "learning_rate": 2.2207545963910607e-06, + "loss": 0.0268, + "step": 41750 + }, + { + "epoch": 12.021857923497267, + "grad_norm": 0.496599406003952, + "learning_rate": 2.2101024734229534e-06, + "loss": 0.0103, + "step": 41800 + }, + { + "epoch": 12.036238136324418, + "grad_norm": 1.3365594148635864, + "learning_rate": 2.199450350454846e-06, + "loss": 0.0095, + "step": 41850 + }, + { + "epoch": 12.050618349151568, + "grad_norm": 1.1375505924224854, + "learning_rate": 2.1887982274867383e-06, + "loss": 0.008, + "step": 41900 + }, + { + "epoch": 12.064998561978717, + "grad_norm": 0.3005922734737396, + "learning_rate": 2.178146104518631e-06, + "loss": 0.0123, + "step": 41950 + }, + { + "epoch": 12.079378774805868, + "grad_norm": 0.9608623385429382, + "learning_rate": 2.1674939815505232e-06, + "loss": 0.0058, + "step": 42000 + }, + { + "epoch": 12.093758987633016, + "grad_norm": 0.6064761281013489, + "learning_rate": 2.1568418585824155e-06, + "loss": 0.0139, + "step": 42050 + }, + { + "epoch": 12.108139200460167, + "grad_norm": 0.9701228737831116, + "learning_rate": 2.146189735614308e-06, + "loss": 0.0085, + "step": 42100 + }, + { + "epoch": 12.122519413287316, + "grad_norm": 0.2135896384716034, + "learning_rate": 2.1355376126462004e-06, + "loss": 0.0056, + "step": 42150 + }, + { + "epoch": 12.136899626114467, + "grad_norm": 0.40170741081237793, + "learning_rate": 2.124885489678093e-06, + "loss": 0.0209, + "step": 42200 + }, + { + "epoch": 12.151279838941615, + "grad_norm": 2.322916269302368, + "learning_rate": 2.1142333667099853e-06, + "loss": 0.0055, + "step": 42250 + }, + { + "epoch": 12.165660051768766, + "grad_norm": 0.6060650944709778, + "learning_rate": 2.103581243741878e-06, + "loss": 0.0072, + "step": 42300 + }, + { + "epoch": 12.180040264595917, + "grad_norm": 0.4080408215522766, + "learning_rate": 2.09292912077377e-06, + "loss": 0.0086, + "step": 42350 + }, + { + "epoch": 12.194420477423066, + "grad_norm": 0.7013692259788513, + "learning_rate": 2.082276997805663e-06, + "loss": 0.0118, + "step": 42400 + }, + { + "epoch": 12.208800690250216, + "grad_norm": 0.908812403678894, + "learning_rate": 2.0716248748375555e-06, + "loss": 0.0076, + "step": 42450 + }, + { + "epoch": 12.223180903077365, + "grad_norm": 0.19862985610961914, + "learning_rate": 2.0609727518694478e-06, + "loss": 0.0071, + "step": 42500 + }, + { + "epoch": 12.237561115904516, + "grad_norm": 0.47320783138275146, + "learning_rate": 2.0503206289013404e-06, + "loss": 0.0147, + "step": 42550 + }, + { + "epoch": 12.251941328731665, + "grad_norm": 0.26755332946777344, + "learning_rate": 2.0396685059332327e-06, + "loss": 0.0063, + "step": 42600 + }, + { + "epoch": 12.266321541558815, + "grad_norm": 7.937039852142334, + "learning_rate": 2.0290163829651253e-06, + "loss": 0.0077, + "step": 42650 + }, + { + "epoch": 12.280701754385966, + "grad_norm": 0.5405824780464172, + "learning_rate": 2.0183642599970176e-06, + "loss": 0.0042, + "step": 42700 + }, + { + "epoch": 12.295081967213115, + "grad_norm": 0.10481663048267365, + "learning_rate": 2.0077121370289102e-06, + "loss": 0.0047, + "step": 42750 + }, + { + "epoch": 12.309462180040265, + "grad_norm": 3.8102521896362305, + "learning_rate": 1.9970600140608025e-06, + "loss": 0.0079, + "step": 42800 + }, + { + "epoch": 12.323842392867414, + "grad_norm": 0.1108594760298729, + "learning_rate": 1.9864078910926947e-06, + "loss": 0.0095, + "step": 42850 + }, + { + "epoch": 12.338222605694565, + "grad_norm": 0.49097001552581787, + "learning_rate": 1.9757557681245874e-06, + "loss": 0.0067, + "step": 42900 + }, + { + "epoch": 12.352602818521714, + "grad_norm": 0.8421845436096191, + "learning_rate": 1.9651036451564796e-06, + "loss": 0.0053, + "step": 42950 + }, + { + "epoch": 12.366983031348864, + "grad_norm": 1.1990987062454224, + "learning_rate": 1.9544515221883723e-06, + "loss": 0.0073, + "step": 43000 + }, + { + "epoch": 12.381363244176013, + "grad_norm": 4.306312561035156, + "learning_rate": 1.9437993992202645e-06, + "loss": 0.0128, + "step": 43050 + }, + { + "epoch": 12.395743457003164, + "grad_norm": 0.10761965066194534, + "learning_rate": 1.933147276252157e-06, + "loss": 0.0091, + "step": 43100 + }, + { + "epoch": 12.410123669830314, + "grad_norm": 0.727603018283844, + "learning_rate": 1.92249515328405e-06, + "loss": 0.0143, + "step": 43150 + }, + { + "epoch": 12.424503882657463, + "grad_norm": 1.3019673824310303, + "learning_rate": 1.911843030315942e-06, + "loss": 0.0063, + "step": 43200 + }, + { + "epoch": 12.438884095484614, + "grad_norm": 0.9548994898796082, + "learning_rate": 1.9011909073478346e-06, + "loss": 0.0081, + "step": 43250 + }, + { + "epoch": 12.453264308311763, + "grad_norm": 1.5599642992019653, + "learning_rate": 1.890538784379727e-06, + "loss": 0.0087, + "step": 43300 + }, + { + "epoch": 12.467644521138913, + "grad_norm": 9.21058177947998, + "learning_rate": 1.8798866614116197e-06, + "loss": 0.0054, + "step": 43350 + }, + { + "epoch": 12.482024733966062, + "grad_norm": 1.004815936088562, + "learning_rate": 1.8692345384435121e-06, + "loss": 0.0107, + "step": 43400 + }, + { + "epoch": 12.496404946793213, + "grad_norm": 3.4639337062835693, + "learning_rate": 1.8585824154754046e-06, + "loss": 0.0078, + "step": 43450 + }, + { + "epoch": 12.510785159620362, + "grad_norm": 0.7338430285453796, + "learning_rate": 1.8479302925072968e-06, + "loss": 0.0075, + "step": 43500 + }, + { + "epoch": 12.525165372447512, + "grad_norm": 0.6672789454460144, + "learning_rate": 1.8372781695391893e-06, + "loss": 0.0067, + "step": 43550 + }, + { + "epoch": 12.539545585274663, + "grad_norm": 0.2793140411376953, + "learning_rate": 1.8266260465710817e-06, + "loss": 0.0091, + "step": 43600 + }, + { + "epoch": 12.553925798101812, + "grad_norm": 0.16195808351039886, + "learning_rate": 1.8159739236029742e-06, + "loss": 0.0126, + "step": 43650 + }, + { + "epoch": 12.568306010928962, + "grad_norm": 4.185794830322266, + "learning_rate": 1.8053218006348666e-06, + "loss": 0.0073, + "step": 43700 + }, + { + "epoch": 12.582686223756111, + "grad_norm": 0.42922982573509216, + "learning_rate": 1.794669677666759e-06, + "loss": 0.0055, + "step": 43750 + }, + { + "epoch": 12.597066436583262, + "grad_norm": 2.3991310596466064, + "learning_rate": 1.7840175546986515e-06, + "loss": 0.01, + "step": 43800 + }, + { + "epoch": 12.61144664941041, + "grad_norm": 9.338133811950684, + "learning_rate": 1.773365431730544e-06, + "loss": 0.0123, + "step": 43850 + }, + { + "epoch": 12.625826862237561, + "grad_norm": 0.12896443903446198, + "learning_rate": 1.7627133087624365e-06, + "loss": 0.0095, + "step": 43900 + }, + { + "epoch": 12.64020707506471, + "grad_norm": 0.19092139601707458, + "learning_rate": 1.752061185794329e-06, + "loss": 0.0102, + "step": 43950 + }, + { + "epoch": 12.65458728789186, + "grad_norm": 1.3799223899841309, + "learning_rate": 1.7414090628262214e-06, + "loss": 0.0077, + "step": 44000 + }, + { + "epoch": 12.668967500719011, + "grad_norm": 0.8221663236618042, + "learning_rate": 1.730756939858114e-06, + "loss": 0.0053, + "step": 44050 + }, + { + "epoch": 12.68334771354616, + "grad_norm": 0.5113213658332825, + "learning_rate": 1.7201048168900065e-06, + "loss": 0.0186, + "step": 44100 + }, + { + "epoch": 12.697727926373311, + "grad_norm": 0.6714016199111938, + "learning_rate": 1.709452693921899e-06, + "loss": 0.0195, + "step": 44150 + }, + { + "epoch": 12.71210813920046, + "grad_norm": 0.6827043890953064, + "learning_rate": 1.6988005709537914e-06, + "loss": 0.0094, + "step": 44200 + }, + { + "epoch": 12.72648835202761, + "grad_norm": 2.618487596511841, + "learning_rate": 1.6881484479856836e-06, + "loss": 0.0155, + "step": 44250 + }, + { + "epoch": 12.74086856485476, + "grad_norm": 1.4105720520019531, + "learning_rate": 1.677496325017576e-06, + "loss": 0.0073, + "step": 44300 + }, + { + "epoch": 12.75524877768191, + "grad_norm": 1.7199159860610962, + "learning_rate": 1.6668442020494685e-06, + "loss": 0.0124, + "step": 44350 + }, + { + "epoch": 12.769628990509059, + "grad_norm": 0.8179599046707153, + "learning_rate": 1.656192079081361e-06, + "loss": 0.0098, + "step": 44400 + }, + { + "epoch": 12.78400920333621, + "grad_norm": 0.7759775519371033, + "learning_rate": 1.6455399561132534e-06, + "loss": 0.0053, + "step": 44450 + }, + { + "epoch": 12.79838941616336, + "grad_norm": 2.021124839782715, + "learning_rate": 1.634887833145146e-06, + "loss": 0.0115, + "step": 44500 + }, + { + "epoch": 12.812769628990509, + "grad_norm": 0.36427387595176697, + "learning_rate": 1.6244487526364005e-06, + "loss": 0.0085, + "step": 44550 + }, + { + "epoch": 12.82714984181766, + "grad_norm": 1.6332464218139648, + "learning_rate": 1.613796629668293e-06, + "loss": 0.0067, + "step": 44600 + }, + { + "epoch": 12.841530054644808, + "grad_norm": 1.1689702272415161, + "learning_rate": 1.6031445067001854e-06, + "loss": 0.0102, + "step": 44650 + }, + { + "epoch": 12.855910267471959, + "grad_norm": 0.23096215724945068, + "learning_rate": 1.592492383732078e-06, + "loss": 0.0067, + "step": 44700 + }, + { + "epoch": 12.870290480299108, + "grad_norm": 0.3984571099281311, + "learning_rate": 1.5818402607639705e-06, + "loss": 0.0061, + "step": 44750 + }, + { + "epoch": 12.884670693126258, + "grad_norm": 0.7753713726997375, + "learning_rate": 1.571188137795863e-06, + "loss": 0.0104, + "step": 44800 + }, + { + "epoch": 12.899050905953407, + "grad_norm": 3.2979679107666016, + "learning_rate": 1.5605360148277554e-06, + "loss": 0.0092, + "step": 44850 + }, + { + "epoch": 12.913431118780558, + "grad_norm": 1.4297447204589844, + "learning_rate": 1.549883891859648e-06, + "loss": 0.0064, + "step": 44900 + }, + { + "epoch": 12.927811331607709, + "grad_norm": 0.1881074607372284, + "learning_rate": 1.5392317688915401e-06, + "loss": 0.0058, + "step": 44950 + }, + { + "epoch": 12.942191544434857, + "grad_norm": 0.6955016851425171, + "learning_rate": 1.5285796459234326e-06, + "loss": 0.0058, + "step": 45000 + }, + { + "epoch": 12.956571757262008, + "grad_norm": 0.531470000743866, + "learning_rate": 1.517927522955325e-06, + "loss": 0.0151, + "step": 45050 + }, + { + "epoch": 12.970951970089157, + "grad_norm": 0.6474351286888123, + "learning_rate": 1.5072753999872175e-06, + "loss": 0.0092, + "step": 45100 + }, + { + "epoch": 12.985332182916308, + "grad_norm": 0.5572118759155273, + "learning_rate": 1.49662327701911e-06, + "loss": 0.0074, + "step": 45150 + }, + { + "epoch": 12.999712395743456, + "grad_norm": 0.18271704018115997, + "learning_rate": 1.4859711540510024e-06, + "loss": 0.0083, + "step": 45200 + }, + { + "epoch": 13.0, + "eval_cer": 13.789543407551038, + "eval_exact_match": 29.50501455839534, + "eval_loss": 0.5821194052696228, + "eval_runtime": 557.2431, + "eval_samples_per_second": 5.547, + "eval_steps_per_second": 0.694, + "eval_wer": 30.918680281737604, + "step": 45201 + }, + { + "epoch": 13.014092608570607, + "grad_norm": 2.6434476375579834, + "learning_rate": 1.4753190310828949e-06, + "loss": 0.0055, + "step": 45250 + }, + { + "epoch": 13.028472821397756, + "grad_norm": 0.5062028169631958, + "learning_rate": 1.4646669081147873e-06, + "loss": 0.0056, + "step": 45300 + }, + { + "epoch": 13.042853034224906, + "grad_norm": 1.317589521408081, + "learning_rate": 1.4540147851466798e-06, + "loss": 0.0109, + "step": 45350 + }, + { + "epoch": 13.057233247052057, + "grad_norm": 0.1666550487279892, + "learning_rate": 1.4433626621785724e-06, + "loss": 0.0188, + "step": 45400 + }, + { + "epoch": 13.071613459879206, + "grad_norm": 0.7953757643699646, + "learning_rate": 1.4327105392104649e-06, + "loss": 0.018, + "step": 45450 + }, + { + "epoch": 13.085993672706357, + "grad_norm": 0.8918612003326416, + "learning_rate": 1.4220584162423573e-06, + "loss": 0.01, + "step": 45500 + }, + { + "epoch": 13.100373885533505, + "grad_norm": 0.14003297686576843, + "learning_rate": 1.4114062932742498e-06, + "loss": 0.0071, + "step": 45550 + }, + { + "epoch": 13.114754098360656, + "grad_norm": 0.5129509568214417, + "learning_rate": 1.4007541703061423e-06, + "loss": 0.0042, + "step": 45600 + }, + { + "epoch": 13.129134311187805, + "grad_norm": 0.31907811760902405, + "learning_rate": 1.3901020473380347e-06, + "loss": 0.0037, + "step": 45650 + }, + { + "epoch": 13.143514524014956, + "grad_norm": 0.3167027235031128, + "learning_rate": 1.379449924369927e-06, + "loss": 0.0047, + "step": 45700 + }, + { + "epoch": 13.157894736842104, + "grad_norm": 0.20696650445461273, + "learning_rate": 1.3687978014018194e-06, + "loss": 0.0047, + "step": 45750 + }, + { + "epoch": 13.172274949669255, + "grad_norm": 0.3045828640460968, + "learning_rate": 1.3581456784337119e-06, + "loss": 0.0098, + "step": 45800 + }, + { + "epoch": 13.186655162496406, + "grad_norm": 0.6138525605201721, + "learning_rate": 1.3474935554656043e-06, + "loss": 0.0047, + "step": 45850 + }, + { + "epoch": 13.201035375323555, + "grad_norm": 0.6493815183639526, + "learning_rate": 1.3368414324974968e-06, + "loss": 0.0044, + "step": 45900 + }, + { + "epoch": 13.215415588150705, + "grad_norm": 0.22776393592357635, + "learning_rate": 1.3261893095293892e-06, + "loss": 0.0049, + "step": 45950 + }, + { + "epoch": 13.229795800977854, + "grad_norm": 0.2620120346546173, + "learning_rate": 1.3155371865612817e-06, + "loss": 0.0067, + "step": 46000 + }, + { + "epoch": 13.244176013805005, + "grad_norm": 0.24613003432750702, + "learning_rate": 1.3048850635931743e-06, + "loss": 0.0051, + "step": 46050 + }, + { + "epoch": 13.258556226632153, + "grad_norm": 3.182950735092163, + "learning_rate": 1.2942329406250668e-06, + "loss": 0.0067, + "step": 46100 + }, + { + "epoch": 13.272936439459304, + "grad_norm": 0.5545868873596191, + "learning_rate": 1.2835808176569592e-06, + "loss": 0.0057, + "step": 46150 + }, + { + "epoch": 13.287316652286453, + "grad_norm": 0.2146327942609787, + "learning_rate": 1.2729286946888517e-06, + "loss": 0.0073, + "step": 46200 + }, + { + "epoch": 13.301696865113604, + "grad_norm": 1.4346431493759155, + "learning_rate": 1.2622765717207441e-06, + "loss": 0.0039, + "step": 46250 + }, + { + "epoch": 13.316077077940754, + "grad_norm": 0.5773711204528809, + "learning_rate": 1.2516244487526366e-06, + "loss": 0.0037, + "step": 46300 + }, + { + "epoch": 13.330457290767903, + "grad_norm": 1.615006923675537, + "learning_rate": 1.2409723257845288e-06, + "loss": 0.0051, + "step": 46350 + }, + { + "epoch": 13.344837503595054, + "grad_norm": 0.2363695204257965, + "learning_rate": 1.2303202028164215e-06, + "loss": 0.0042, + "step": 46400 + }, + { + "epoch": 13.359217716422203, + "grad_norm": 0.8145747184753418, + "learning_rate": 1.219668079848314e-06, + "loss": 0.0053, + "step": 46450 + }, + { + "epoch": 13.373597929249353, + "grad_norm": 0.19969283044338226, + "learning_rate": 1.2090159568802064e-06, + "loss": 0.005, + "step": 46500 + }, + { + "epoch": 13.387978142076502, + "grad_norm": 0.25231361389160156, + "learning_rate": 1.1983638339120989e-06, + "loss": 0.0093, + "step": 46550 + }, + { + "epoch": 13.402358354903653, + "grad_norm": 0.7276626229286194, + "learning_rate": 1.1877117109439911e-06, + "loss": 0.0047, + "step": 46600 + }, + { + "epoch": 13.416738567730803, + "grad_norm": 0.37422502040863037, + "learning_rate": 1.1770595879758836e-06, + "loss": 0.0048, + "step": 46650 + }, + { + "epoch": 13.431118780557952, + "grad_norm": 0.5370374321937561, + "learning_rate": 1.1664074650077762e-06, + "loss": 0.0034, + "step": 46700 + }, + { + "epoch": 13.445498993385103, + "grad_norm": 0.1501835584640503, + "learning_rate": 1.1557553420396687e-06, + "loss": 0.0035, + "step": 46750 + }, + { + "epoch": 13.459879206212252, + "grad_norm": 1.0305863618850708, + "learning_rate": 1.1451032190715611e-06, + "loss": 0.0095, + "step": 46800 + }, + { + "epoch": 13.474259419039402, + "grad_norm": 0.5468102097511292, + "learning_rate": 1.1344510961034536e-06, + "loss": 0.0067, + "step": 46850 + }, + { + "epoch": 13.488639631866551, + "grad_norm": 1.6052461862564087, + "learning_rate": 1.123798973135346e-06, + "loss": 0.0079, + "step": 46900 + }, + { + "epoch": 13.503019844693702, + "grad_norm": 0.2779121994972229, + "learning_rate": 1.1131468501672383e-06, + "loss": 0.0042, + "step": 46950 + }, + { + "epoch": 13.51740005752085, + "grad_norm": 0.3229649066925049, + "learning_rate": 1.1024947271991307e-06, + "loss": 0.0048, + "step": 47000 + }, + { + "epoch": 13.531780270348001, + "grad_norm": 0.7473021149635315, + "learning_rate": 1.0918426042310234e-06, + "loss": 0.0053, + "step": 47050 + }, + { + "epoch": 13.54616048317515, + "grad_norm": 0.5433935523033142, + "learning_rate": 1.0811904812629159e-06, + "loss": 0.0061, + "step": 47100 + }, + { + "epoch": 13.5605406960023, + "grad_norm": 0.10840893536806107, + "learning_rate": 1.0705383582948083e-06, + "loss": 0.0063, + "step": 47150 + }, + { + "epoch": 13.574920908829451, + "grad_norm": 0.28310108184814453, + "learning_rate": 1.0598862353267008e-06, + "loss": 0.0042, + "step": 47200 + }, + { + "epoch": 13.5893011216566, + "grad_norm": 0.20783166587352753, + "learning_rate": 1.0492341123585932e-06, + "loss": 0.0046, + "step": 47250 + }, + { + "epoch": 13.60368133448375, + "grad_norm": 0.16169953346252441, + "learning_rate": 1.0385819893904855e-06, + "loss": 0.0034, + "step": 47300 + }, + { + "epoch": 13.6180615473109, + "grad_norm": 0.5683311820030212, + "learning_rate": 1.0279298664223781e-06, + "loss": 0.0042, + "step": 47350 + }, + { + "epoch": 13.63244176013805, + "grad_norm": 0.2614581882953644, + "learning_rate": 1.0172777434542706e-06, + "loss": 0.0056, + "step": 47400 + }, + { + "epoch": 13.6468219729652, + "grad_norm": 0.5136411190032959, + "learning_rate": 1.006625620486163e-06, + "loss": 0.0119, + "step": 47450 + }, + { + "epoch": 13.66120218579235, + "grad_norm": 0.407742977142334, + "learning_rate": 9.959734975180555e-07, + "loss": 0.0057, + "step": 47500 + }, + { + "epoch": 13.6755823986195, + "grad_norm": 0.1719302237033844, + "learning_rate": 9.85321374549948e-07, + "loss": 0.0046, + "step": 47550 + }, + { + "epoch": 13.68996261144665, + "grad_norm": 0.7320683598518372, + "learning_rate": 9.746692515818404e-07, + "loss": 0.0069, + "step": 47600 + }, + { + "epoch": 13.7043428242738, + "grad_norm": 0.5749322772026062, + "learning_rate": 9.640171286137326e-07, + "loss": 0.0049, + "step": 47650 + }, + { + "epoch": 13.718723037100949, + "grad_norm": 0.5423254370689392, + "learning_rate": 9.533650056456252e-07, + "loss": 0.0066, + "step": 47700 + }, + { + "epoch": 13.7331032499281, + "grad_norm": 1.9814480543136597, + "learning_rate": 9.427128826775176e-07, + "loss": 0.0071, + "step": 47750 + }, + { + "epoch": 13.747483462755248, + "grad_norm": 0.5714386701583862, + "learning_rate": 9.320607597094101e-07, + "loss": 0.0068, + "step": 47800 + }, + { + "epoch": 13.761863675582399, + "grad_norm": 0.18976716697216034, + "learning_rate": 9.214086367413027e-07, + "loss": 0.0053, + "step": 47850 + }, + { + "epoch": 13.776243888409548, + "grad_norm": 0.26020359992980957, + "learning_rate": 9.109695562325573e-07, + "loss": 0.0055, + "step": 47900 + }, + { + "epoch": 13.790624101236698, + "grad_norm": 1.036350131034851, + "learning_rate": 9.003174332644497e-07, + "loss": 0.003, + "step": 47950 + }, + { + "epoch": 13.805004314063847, + "grad_norm": 1.325669765472412, + "learning_rate": 8.896653102963421e-07, + "loss": 0.0059, + "step": 48000 + }, + { + "epoch": 13.819384526890998, + "grad_norm": 0.8311572670936584, + "learning_rate": 8.790131873282345e-07, + "loss": 0.0031, + "step": 48050 + }, + { + "epoch": 13.833764739718148, + "grad_norm": 0.34271135926246643, + "learning_rate": 8.68361064360127e-07, + "loss": 0.0053, + "step": 48100 + }, + { + "epoch": 13.848144952545297, + "grad_norm": 0.18856693804264069, + "learning_rate": 8.577089413920194e-07, + "loss": 0.0032, + "step": 48150 + }, + { + "epoch": 13.862525165372448, + "grad_norm": 1.3996261358261108, + "learning_rate": 8.47056818423912e-07, + "loss": 0.0037, + "step": 48200 + }, + { + "epoch": 13.876905378199597, + "grad_norm": 0.18578016757965088, + "learning_rate": 8.364046954558045e-07, + "loss": 0.0062, + "step": 48250 + }, + { + "epoch": 13.891285591026747, + "grad_norm": 0.18855896592140198, + "learning_rate": 8.257525724876969e-07, + "loss": 0.0047, + "step": 48300 + }, + { + "epoch": 13.905665803853896, + "grad_norm": 2.869654417037964, + "learning_rate": 8.151004495195894e-07, + "loss": 0.0111, + "step": 48350 + }, + { + "epoch": 13.920046016681047, + "grad_norm": 0.22920694947242737, + "learning_rate": 8.044483265514817e-07, + "loss": 0.0096, + "step": 48400 + }, + { + "epoch": 13.934426229508198, + "grad_norm": 2.748331069946289, + "learning_rate": 7.937962035833742e-07, + "loss": 0.0072, + "step": 48450 + }, + { + "epoch": 13.948806442335346, + "grad_norm": 0.12169869244098663, + "learning_rate": 7.831440806152667e-07, + "loss": 0.0033, + "step": 48500 + }, + { + "epoch": 13.963186655162497, + "grad_norm": 0.36550626158714294, + "learning_rate": 7.724919576471592e-07, + "loss": 0.0126, + "step": 48550 + }, + { + "epoch": 13.977566867989646, + "grad_norm": 0.23332859575748444, + "learning_rate": 7.618398346790516e-07, + "loss": 0.0052, + "step": 48600 + }, + { + "epoch": 13.991947080816797, + "grad_norm": 0.3605318069458008, + "learning_rate": 7.511877117109441e-07, + "loss": 0.0062, + "step": 48650 + }, + { + "epoch": 14.0, + "eval_cer": 13.924237994974856, + "eval_exact_match": 29.602070527337432, + "eval_loss": 0.6003495454788208, + "eval_runtime": 557.0774, + "eval_samples_per_second": 5.549, + "eval_steps_per_second": 0.695, + "eval_wer": 31.061908131971826, + "step": 48678 + }, + { + "epoch": 14.006327293643945, + "grad_norm": 0.058249905705451965, + "learning_rate": 7.405355887428365e-07, + "loss": 0.008, + "step": 48700 + }, + { + "epoch": 14.020707506471096, + "grad_norm": 0.1586432307958603, + "learning_rate": 7.298834657747289e-07, + "loss": 0.0029, + "step": 48750 + }, + { + "epoch": 14.035087719298245, + "grad_norm": 0.30648553371429443, + "learning_rate": 7.192313428066213e-07, + "loss": 0.0057, + "step": 48800 + }, + { + "epoch": 14.049467932125395, + "grad_norm": 0.6734074354171753, + "learning_rate": 7.085792198385139e-07, + "loss": 0.0042, + "step": 48850 + }, + { + "epoch": 14.063848144952546, + "grad_norm": 0.473695307970047, + "learning_rate": 6.979270968704063e-07, + "loss": 0.004, + "step": 48900 + }, + { + "epoch": 14.078228357779695, + "grad_norm": 0.1658676266670227, + "learning_rate": 6.872749739022988e-07, + "loss": 0.0052, + "step": 48950 + }, + { + "epoch": 14.092608570606846, + "grad_norm": 0.2595652639865875, + "learning_rate": 6.766228509341913e-07, + "loss": 0.0054, + "step": 49000 + }, + { + "epoch": 14.106988783433994, + "grad_norm": 0.23910076916217804, + "learning_rate": 6.659707279660837e-07, + "loss": 0.0038, + "step": 49050 + }, + { + "epoch": 14.121368996261145, + "grad_norm": 0.2196033000946045, + "learning_rate": 6.553186049979761e-07, + "loss": 0.0024, + "step": 49100 + }, + { + "epoch": 14.135749209088294, + "grad_norm": 0.4591156244277954, + "learning_rate": 6.446664820298685e-07, + "loss": 0.0121, + "step": 49150 + }, + { + "epoch": 14.150129421915445, + "grad_norm": 0.15816880762577057, + "learning_rate": 6.340143590617611e-07, + "loss": 0.003, + "step": 49200 + }, + { + "epoch": 14.164509634742593, + "grad_norm": 0.2226686328649521, + "learning_rate": 6.233622360936535e-07, + "loss": 0.0048, + "step": 49250 + }, + { + "epoch": 14.178889847569744, + "grad_norm": 0.23074030876159668, + "learning_rate": 6.12710113125546e-07, + "loss": 0.0023, + "step": 49300 + }, + { + "epoch": 14.193270060396895, + "grad_norm": 0.23567402362823486, + "learning_rate": 6.020579901574384e-07, + "loss": 0.0054, + "step": 49350 + }, + { + "epoch": 14.207650273224044, + "grad_norm": 1.8261570930480957, + "learning_rate": 5.914058671893309e-07, + "loss": 0.0054, + "step": 49400 + }, + { + "epoch": 14.222030486051194, + "grad_norm": 0.061844538897275925, + "learning_rate": 5.807537442212233e-07, + "loss": 0.0036, + "step": 49450 + }, + { + "epoch": 14.236410698878343, + "grad_norm": 0.20559872686862946, + "learning_rate": 5.701016212531158e-07, + "loss": 0.0029, + "step": 49500 + }, + { + "epoch": 14.250790911705494, + "grad_norm": 0.2641324996948242, + "learning_rate": 5.594494982850082e-07, + "loss": 0.0028, + "step": 49550 + }, + { + "epoch": 14.265171124532642, + "grad_norm": 0.06046381965279579, + "learning_rate": 5.487973753169007e-07, + "loss": 0.0023, + "step": 49600 + }, + { + "epoch": 14.279551337359793, + "grad_norm": 0.11998997628688812, + "learning_rate": 5.381452523487932e-07, + "loss": 0.0053, + "step": 49650 + }, + { + "epoch": 14.293931550186942, + "grad_norm": 0.9233036041259766, + "learning_rate": 5.274931293806856e-07, + "loss": 0.0031, + "step": 49700 + }, + { + "epoch": 14.308311763014093, + "grad_norm": 0.34108608961105347, + "learning_rate": 5.168410064125781e-07, + "loss": 0.0111, + "step": 49750 + }, + { + "epoch": 14.322691975841243, + "grad_norm": 1.6962041854858398, + "learning_rate": 5.061888834444705e-07, + "loss": 0.0023, + "step": 49800 + }, + { + "epoch": 14.337072188668392, + "grad_norm": 0.2750893235206604, + "learning_rate": 4.95536760476363e-07, + "loss": 0.0042, + "step": 49850 + }, + { + "epoch": 14.351452401495543, + "grad_norm": 0.15498597919940948, + "learning_rate": 4.848846375082554e-07, + "loss": 0.0043, + "step": 49900 + }, + { + "epoch": 14.365832614322692, + "grad_norm": 1.3390775918960571, + "learning_rate": 4.742325145401479e-07, + "loss": 0.003, + "step": 49950 + }, + { + "epoch": 14.380212827149842, + "grad_norm": 0.1733628660440445, + "learning_rate": 4.6358039157204033e-07, + "loss": 0.0023, + "step": 50000 + }, + { + "epoch": 14.394593039976991, + "grad_norm": 0.22354567050933838, + "learning_rate": 4.5292826860393283e-07, + "loss": 0.0025, + "step": 50050 + }, + { + "epoch": 14.408973252804142, + "grad_norm": 0.3741397261619568, + "learning_rate": 4.4227614563582523e-07, + "loss": 0.004, + "step": 50100 + }, + { + "epoch": 14.42335346563129, + "grad_norm": 0.5939965844154358, + "learning_rate": 4.316240226677177e-07, + "loss": 0.0026, + "step": 50150 + }, + { + "epoch": 14.437733678458441, + "grad_norm": 0.12893730401992798, + "learning_rate": 4.209718996996102e-07, + "loss": 0.0056, + "step": 50200 + }, + { + "epoch": 14.452113891285592, + "grad_norm": 0.3216068744659424, + "learning_rate": 4.1031977673150265e-07, + "loss": 0.0024, + "step": 50250 + }, + { + "epoch": 14.46649410411274, + "grad_norm": 0.24595089256763458, + "learning_rate": 3.9966765376339505e-07, + "loss": 0.0033, + "step": 50300 + }, + { + "epoch": 14.480874316939891, + "grad_norm": 0.23311227560043335, + "learning_rate": 3.8901553079528756e-07, + "loss": 0.0072, + "step": 50350 + }, + { + "epoch": 14.49525452976704, + "grad_norm": 1.1418853998184204, + "learning_rate": 3.7836340782718e-07, + "loss": 0.0019, + "step": 50400 + }, + { + "epoch": 14.50963474259419, + "grad_norm": 0.1964561641216278, + "learning_rate": 3.677112848590724e-07, + "loss": 0.0022, + "step": 50450 + }, + { + "epoch": 14.52401495542134, + "grad_norm": 0.25227129459381104, + "learning_rate": 3.5705916189096486e-07, + "loss": 0.0024, + "step": 50500 + }, + { + "epoch": 14.53839516824849, + "grad_norm": 1.3018821477890015, + "learning_rate": 3.4640703892285737e-07, + "loss": 0.0036, + "step": 50550 + }, + { + "epoch": 14.55277538107564, + "grad_norm": 0.0698321983218193, + "learning_rate": 3.357549159547498e-07, + "loss": 0.0024, + "step": 50600 + }, + { + "epoch": 14.56715559390279, + "grad_norm": 0.261180579662323, + "learning_rate": 3.251027929866422e-07, + "loss": 0.0037, + "step": 50650 + }, + { + "epoch": 14.58153580672994, + "grad_norm": 0.31282150745391846, + "learning_rate": 3.1445067001853473e-07, + "loss": 0.0036, + "step": 50700 + }, + { + "epoch": 14.59591601955709, + "grad_norm": 0.2965436577796936, + "learning_rate": 3.037985470504272e-07, + "loss": 0.0053, + "step": 50750 + }, + { + "epoch": 14.61029623238424, + "grad_norm": 0.2643774747848511, + "learning_rate": 2.9314642408231964e-07, + "loss": 0.0032, + "step": 50800 + }, + { + "epoch": 14.624676445211389, + "grad_norm": 0.39520788192749023, + "learning_rate": 2.824943011142121e-07, + "loss": 0.0027, + "step": 50850 + }, + { + "epoch": 14.63905665803854, + "grad_norm": 0.160085991024971, + "learning_rate": 2.7184217814610454e-07, + "loss": 0.0029, + "step": 50900 + }, + { + "epoch": 14.653436870865688, + "grad_norm": 0.40090009570121765, + "learning_rate": 2.61190055177997e-07, + "loss": 0.0031, + "step": 50950 + }, + { + "epoch": 14.667817083692839, + "grad_norm": 0.21048611402511597, + "learning_rate": 2.5053793220988945e-07, + "loss": 0.01, + "step": 51000 + }, + { + "epoch": 14.682197296519988, + "grad_norm": 0.13856516778469086, + "learning_rate": 2.398858092417819e-07, + "loss": 0.0038, + "step": 51050 + }, + { + "epoch": 14.696577509347138, + "grad_norm": 0.6128761768341064, + "learning_rate": 2.2923368627367436e-07, + "loss": 0.0068, + "step": 51100 + }, + { + "epoch": 14.710957722174289, + "grad_norm": 0.1999235451221466, + "learning_rate": 2.185815633055668e-07, + "loss": 0.0036, + "step": 51150 + }, + { + "epoch": 14.725337935001438, + "grad_norm": 0.19555184245109558, + "learning_rate": 2.079294403374593e-07, + "loss": 0.0052, + "step": 51200 + }, + { + "epoch": 14.739718147828588, + "grad_norm": 0.2652062475681305, + "learning_rate": 1.9727731736935172e-07, + "loss": 0.0056, + "step": 51250 + }, + { + "epoch": 14.754098360655737, + "grad_norm": 0.36996254324913025, + "learning_rate": 1.866251944012442e-07, + "loss": 0.0022, + "step": 51300 + }, + { + "epoch": 14.768478573482888, + "grad_norm": 0.32936665415763855, + "learning_rate": 1.761861138924988e-07, + "loss": 0.0058, + "step": 51350 + }, + { + "epoch": 14.782858786310037, + "grad_norm": 0.17573754489421844, + "learning_rate": 1.6553399092439124e-07, + "loss": 0.0041, + "step": 51400 + }, + { + "epoch": 14.797238999137187, + "grad_norm": 0.4205591082572937, + "learning_rate": 1.548818679562837e-07, + "loss": 0.0023, + "step": 51450 + }, + { + "epoch": 14.811619211964338, + "grad_norm": 0.6410862803459167, + "learning_rate": 1.4422974498817615e-07, + "loss": 0.0024, + "step": 51500 + }, + { + "epoch": 14.825999424791487, + "grad_norm": 0.3451874554157257, + "learning_rate": 1.335776220200686e-07, + "loss": 0.0031, + "step": 51550 + }, + { + "epoch": 14.840379637618637, + "grad_norm": 0.38606250286102295, + "learning_rate": 1.2292549905196106e-07, + "loss": 0.0118, + "step": 51600 + }, + { + "epoch": 14.854759850445786, + "grad_norm": 0.15578801929950714, + "learning_rate": 1.1227337608385353e-07, + "loss": 0.0025, + "step": 51650 + }, + { + "epoch": 14.869140063272937, + "grad_norm": 3.025273084640503, + "learning_rate": 1.0162125311574598e-07, + "loss": 0.0049, + "step": 51700 + }, + { + "epoch": 14.883520276100086, + "grad_norm": 0.24030663073062897, + "learning_rate": 9.096913014763843e-08, + "loss": 0.0041, + "step": 51750 + }, + { + "epoch": 14.897900488927236, + "grad_norm": 0.10516488552093506, + "learning_rate": 8.03170071795309e-08, + "loss": 0.0085, + "step": 51800 + }, + { + "epoch": 14.912280701754385, + "grad_norm": 0.2681816816329956, + "learning_rate": 6.966488421142334e-08, + "loss": 0.0051, + "step": 51850 + }, + { + "epoch": 14.926660914581536, + "grad_norm": 0.33148375153541565, + "learning_rate": 5.90127612433158e-08, + "loss": 0.0035, + "step": 51900 + }, + { + "epoch": 14.941041127408685, + "grad_norm": 0.2675136923789978, + "learning_rate": 4.836063827520825e-08, + "loss": 0.0039, + "step": 51950 + }, + { + "epoch": 14.955421340235835, + "grad_norm": 0.220230832695961, + "learning_rate": 3.770851530710071e-08, + "loss": 0.0037, + "step": 52000 + }, + { + "epoch": 14.969801553062986, + "grad_norm": 0.11165965348482132, + "learning_rate": 2.7056392338993164e-08, + "loss": 0.0078, + "step": 52050 + }, + { + "epoch": 14.984181765890135, + "grad_norm": 0.27582016587257385, + "learning_rate": 1.6404269370885617e-08, + "loss": 0.0022, + "step": 52100 + }, + { + "epoch": 14.998561978717285, + "grad_norm": 0.5561297535896301, + "learning_rate": 5.752146402778074e-09, + "loss": 0.0046, + "step": 52150 + }, + { + "epoch": 15.0, + "eval_cer": 13.645967858319056, + "eval_exact_match": 29.50501455839534, + "eval_loss": 0.6109118461608887, + "eval_runtime": 557.2778, + "eval_samples_per_second": 5.547, + "eval_steps_per_second": 0.694, + "eval_wer": 30.72490142553837, + "step": 52155 } ], "logging_steps": 50, - "max_steps": 38630, + "max_steps": 52155, "num_input_tokens_seen": 0, - "num_train_epochs": 10, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { @@ -5521,7 +7493,7 @@ "attributes": {} } }, - "total_flos": 2.00436846133248e+19, + "total_flos": 2.70582931980288e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null