{ "best_global_step": 38630, "best_metric": 0.8082292432035268, "best_model_checkpoint": "./whisper-urdu-base-finetuned/checkpoint-38630", "epoch": 10.0, "eval_steps": 500, "global_step": 38630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012943308309603935, "grad_norm": 2.339423418045044, "learning_rate": 1.2684442143411856e-07, "loss": 0.0468, "step": 50 }, { "epoch": 0.02588661661920787, "grad_norm": 4.33876371383667, "learning_rate": 2.5627750453015794e-07, "loss": 0.0428, "step": 100 }, { "epoch": 0.03882992492881181, "grad_norm": 4.59096622467041, "learning_rate": 3.8571058762619726e-07, "loss": 0.054, "step": 150 }, { "epoch": 0.05177323323841574, "grad_norm": 5.390439033508301, "learning_rate": 5.151436707222367e-07, "loss": 0.0454, "step": 200 }, { "epoch": 0.06471654154801967, "grad_norm": 3.9699249267578125, "learning_rate": 6.44576753818276e-07, "loss": 0.03, "step": 250 }, { "epoch": 0.07765984985762361, "grad_norm": 5.007837295532227, "learning_rate": 7.740098369143153e-07, "loss": 0.0364, "step": 300 }, { "epoch": 0.09060315816722754, "grad_norm": 1.7915890216827393, "learning_rate": 9.034429200103548e-07, "loss": 0.033, "step": 350 }, { "epoch": 0.10354646647683148, "grad_norm": 1.8916817903518677, "learning_rate": 1.032876003106394e-06, "loss": 0.0453, "step": 400 }, { "epoch": 0.11648977478643541, "grad_norm": 5.6407318115234375, "learning_rate": 1.1623090862024335e-06, "loss": 0.0493, "step": 450 }, { "epoch": 0.12943308309603935, "grad_norm": 1.5208466053009033, "learning_rate": 1.2917421692984728e-06, "loss": 0.0457, "step": 500 }, { "epoch": 0.1423763914056433, "grad_norm": 0.9354443550109863, "learning_rate": 1.4211752523945122e-06, "loss": 0.0486, "step": 550 }, { "epoch": 0.15531969971524723, "grad_norm": 3.7830538749694824, "learning_rate": 1.5506083354905516e-06, "loss": 0.0377, "step": 600 }, { "epoch": 0.16826300802485114, "grad_norm": 1.2487603425979614, "learning_rate": 1.67745275692467e-06, "loss": 0.069, "step": 650 }, { "epoch": 0.18120631633445508, "grad_norm": 0.8733393549919128, "learning_rate": 1.8068858400207096e-06, "loss": 0.0457, "step": 700 }, { "epoch": 0.19414962464405902, "grad_norm": 3.48146390914917, "learning_rate": 1.936318923116749e-06, "loss": 0.0372, "step": 750 }, { "epoch": 0.20709293295366296, "grad_norm": 3.9726881980895996, "learning_rate": 2.065752006212788e-06, "loss": 0.0493, "step": 800 }, { "epoch": 0.2200362412632669, "grad_norm": 1.6841055154800415, "learning_rate": 2.1951850893088276e-06, "loss": 0.046, "step": 850 }, { "epoch": 0.23297954957287081, "grad_norm": 1.1303517818450928, "learning_rate": 2.324618172404867e-06, "loss": 0.0484, "step": 900 }, { "epoch": 0.24592285788247475, "grad_norm": 1.6165255308151245, "learning_rate": 2.4540512555009063e-06, "loss": 0.0617, "step": 950 }, { "epoch": 0.2588661661920787, "grad_norm": 4.751931667327881, "learning_rate": 2.5834843385969457e-06, "loss": 0.0516, "step": 1000 }, { "epoch": 0.27180947450168264, "grad_norm": 2.619171142578125, "learning_rate": 2.712917421692985e-06, "loss": 0.0491, "step": 1050 }, { "epoch": 0.2847527828112866, "grad_norm": 3.5204107761383057, "learning_rate": 2.8423505047890244e-06, "loss": 0.0451, "step": 1100 }, { "epoch": 0.2976960911208905, "grad_norm": 1.6330580711364746, "learning_rate": 2.971783587885064e-06, "loss": 0.0329, "step": 1150 }, { "epoch": 0.31063939943049446, "grad_norm": 9.043530464172363, "learning_rate": 3.101216670981103e-06, "loss": 0.0528, "step": 1200 }, { "epoch": 0.3235827077400984, "grad_norm": 4.8967604637146, "learning_rate": 3.2306497540771426e-06, "loss": 0.052, "step": 1250 }, { "epoch": 0.3365260160497023, "grad_norm": 2.9386487007141113, "learning_rate": 3.360082837173182e-06, "loss": 0.0431, "step": 1300 }, { "epoch": 0.3494693243593062, "grad_norm": 3.271523952484131, "learning_rate": 3.4895159202692213e-06, "loss": 0.0435, "step": 1350 }, { "epoch": 0.36241263266891016, "grad_norm": 3.5081090927124023, "learning_rate": 3.6189490033652603e-06, "loss": 0.0571, "step": 1400 }, { "epoch": 0.3753559409785141, "grad_norm": 5.796409606933594, "learning_rate": 3.7483820864612997e-06, "loss": 0.0529, "step": 1450 }, { "epoch": 0.38829924928811804, "grad_norm": 3.4982869625091553, "learning_rate": 3.877815169557339e-06, "loss": 0.0566, "step": 1500 }, { "epoch": 0.401242557597722, "grad_norm": 4.461122035980225, "learning_rate": 4.007248252653379e-06, "loss": 0.0562, "step": 1550 }, { "epoch": 0.4141858659073259, "grad_norm": 3.034180164337158, "learning_rate": 4.136681335749418e-06, "loss": 0.0466, "step": 1600 }, { "epoch": 0.42712917421692986, "grad_norm": 5.0971598625183105, "learning_rate": 4.266114418845458e-06, "loss": 0.0501, "step": 1650 }, { "epoch": 0.4400724825265338, "grad_norm": 5.404353618621826, "learning_rate": 4.3955475019414965e-06, "loss": 0.0424, "step": 1700 }, { "epoch": 0.45301579083613774, "grad_norm": 2.975407838821411, "learning_rate": 4.524980585037536e-06, "loss": 0.0562, "step": 1750 }, { "epoch": 0.46595909914574163, "grad_norm": 8.076371192932129, "learning_rate": 4.654413668133575e-06, "loss": 0.0377, "step": 1800 }, { "epoch": 0.47890240745534557, "grad_norm": 5.4830451011657715, "learning_rate": 4.781258089567694e-06, "loss": 0.0436, "step": 1850 }, { "epoch": 0.4918457157649495, "grad_norm": 2.6184751987457275, "learning_rate": 4.910691172663733e-06, "loss": 0.0414, "step": 1900 }, { "epoch": 0.5047890240745535, "grad_norm": 4.777491092681885, "learning_rate": 5.040124255759773e-06, "loss": 0.0469, "step": 1950 }, { "epoch": 0.5177323323841574, "grad_norm": 8.820015907287598, "learning_rate": 5.169557338855812e-06, "loss": 0.0654, "step": 2000 }, { "epoch": 0.5306756406937613, "grad_norm": 4.339293003082275, "learning_rate": 5.298990421951852e-06, "loss": 0.0473, "step": 2050 }, { "epoch": 0.5436189490033653, "grad_norm": 7.162559509277344, "learning_rate": 5.428423505047891e-06, "loss": 0.0473, "step": 2100 }, { "epoch": 0.5565622573129692, "grad_norm": 7.811385154724121, "learning_rate": 5.557856588143931e-06, "loss": 0.0516, "step": 2150 }, { "epoch": 0.5695055656225732, "grad_norm": 2.8982834815979004, "learning_rate": 5.68728967123997e-06, "loss": 0.0497, "step": 2200 }, { "epoch": 0.582448873932177, "grad_norm": 4.9137749671936035, "learning_rate": 5.8167227543360085e-06, "loss": 0.0613, "step": 2250 }, { "epoch": 0.595392182241781, "grad_norm": 4.650545597076416, "learning_rate": 5.946155837432048e-06, "loss": 0.0475, "step": 2300 }, { "epoch": 0.6083354905513849, "grad_norm": 6.333342552185059, "learning_rate": 6.075588920528087e-06, "loss": 0.0793, "step": 2350 }, { "epoch": 0.6212787988609889, "grad_norm": 5.444154739379883, "learning_rate": 6.205022003624127e-06, "loss": 0.0573, "step": 2400 }, { "epoch": 0.6342221071705928, "grad_norm": 1.8880912065505981, "learning_rate": 6.334455086720166e-06, "loss": 0.0409, "step": 2450 }, { "epoch": 0.6471654154801968, "grad_norm": 5.30678653717041, "learning_rate": 6.463888169816206e-06, "loss": 0.0546, "step": 2500 }, { "epoch": 0.6601087237898007, "grad_norm": 11.33090591430664, "learning_rate": 6.593321252912245e-06, "loss": 0.055, "step": 2550 }, { "epoch": 0.6730520320994046, "grad_norm": 2.6539409160614014, "learning_rate": 6.722754336008285e-06, "loss": 0.0466, "step": 2600 }, { "epoch": 0.6859953404090086, "grad_norm": 5.582194805145264, "learning_rate": 6.8521874191043236e-06, "loss": 0.0559, "step": 2650 }, { "epoch": 0.6989386487186124, "grad_norm": 4.70722770690918, "learning_rate": 6.9816205022003625e-06, "loss": 0.0505, "step": 2700 }, { "epoch": 0.7118819570282164, "grad_norm": 2.8403987884521484, "learning_rate": 7.111053585296402e-06, "loss": 0.0508, "step": 2750 }, { "epoch": 0.7248252653378203, "grad_norm": 9.0087890625, "learning_rate": 7.240486668392441e-06, "loss": 0.0793, "step": 2800 }, { "epoch": 0.7377685736474243, "grad_norm": 5.515995025634766, "learning_rate": 7.369919751488481e-06, "loss": 0.0635, "step": 2850 }, { "epoch": 0.7507118819570282, "grad_norm": 3.9393441677093506, "learning_rate": 7.49935283458452e-06, "loss": 0.0509, "step": 2900 }, { "epoch": 0.7636551902666322, "grad_norm": 5.0234599113464355, "learning_rate": 7.62878591768056e-06, "loss": 0.0528, "step": 2950 }, { "epoch": 0.7765984985762361, "grad_norm": 2.9625911712646484, "learning_rate": 7.758219000776599e-06, "loss": 0.0435, "step": 3000 }, { "epoch": 0.78954180688584, "grad_norm": 5.779876708984375, "learning_rate": 7.88765208387264e-06, "loss": 0.0522, "step": 3050 }, { "epoch": 0.802485115195444, "grad_norm": 1.6520557403564453, "learning_rate": 8.017085166968678e-06, "loss": 0.0481, "step": 3100 }, { "epoch": 0.8154284235050479, "grad_norm": 10.486522674560547, "learning_rate": 8.146518250064717e-06, "loss": 0.0571, "step": 3150 }, { "epoch": 0.8283717318146518, "grad_norm": 5.2048139572143555, "learning_rate": 8.275951333160756e-06, "loss": 0.0652, "step": 3200 }, { "epoch": 0.8413150401242557, "grad_norm": 6.738128185272217, "learning_rate": 8.405384416256795e-06, "loss": 0.0732, "step": 3250 }, { "epoch": 0.8542583484338597, "grad_norm": 10.937406539916992, "learning_rate": 8.534817499352836e-06, "loss": 0.0928, "step": 3300 }, { "epoch": 0.8672016567434636, "grad_norm": 6.5531182289123535, "learning_rate": 8.664250582448875e-06, "loss": 0.0716, "step": 3350 }, { "epoch": 0.8801449650530676, "grad_norm": 9.179738998413086, "learning_rate": 8.793683665544914e-06, "loss": 0.0589, "step": 3400 }, { "epoch": 0.8930882733626715, "grad_norm": 3.9006974697113037, "learning_rate": 8.923116748640953e-06, "loss": 0.0626, "step": 3450 }, { "epoch": 0.9060315816722755, "grad_norm": 1.9278124570846558, "learning_rate": 9.052549831736993e-06, "loss": 0.0668, "step": 3500 }, { "epoch": 0.9189748899818794, "grad_norm": 1.9330800771713257, "learning_rate": 9.181982914833032e-06, "loss": 0.0595, "step": 3550 }, { "epoch": 0.9319181982914833, "grad_norm": 6.718048572540283, "learning_rate": 9.311415997929071e-06, "loss": 0.0804, "step": 3600 }, { "epoch": 0.9448615066010873, "grad_norm": 6.918898582458496, "learning_rate": 9.44084908102511e-06, "loss": 0.0716, "step": 3650 }, { "epoch": 0.9578048149106911, "grad_norm": 4.3917131423950195, "learning_rate": 9.57028216412115e-06, "loss": 0.0694, "step": 3700 }, { "epoch": 0.9707481232202951, "grad_norm": 11.285859107971191, "learning_rate": 9.69971524721719e-06, "loss": 0.0715, "step": 3750 }, { "epoch": 0.983691431529899, "grad_norm": 4.574735641479492, "learning_rate": 9.829148330313229e-06, "loss": 0.0601, "step": 3800 }, { "epoch": 0.996634739839503, "grad_norm": 4.355649948120117, "learning_rate": 9.955992751747347e-06, "loss": 0.0735, "step": 3850 }, { "epoch": 1.0, "eval_loss": 0.07532492280006409, "eval_runtime": 88.308, "eval_samples_per_second": 5.662, "eval_steps_per_second": 0.713, "eval_wer": 9.278891571323607, "step": 3863 }, { "epoch": 1.009578048149107, "grad_norm": 6.590569019317627, "learning_rate": 9.990508240572958e-06, "loss": 0.0535, "step": 3900 }, { "epoch": 1.0225213564587108, "grad_norm": 1.8211477994918823, "learning_rate": 9.976126786895621e-06, "loss": 0.0632, "step": 3950 }, { "epoch": 1.0354646647683148, "grad_norm": 6.469516754150391, "learning_rate": 9.961745333218282e-06, "loss": 0.0686, "step": 4000 }, { "epoch": 1.0484079730779188, "grad_norm": 8.549825668334961, "learning_rate": 9.947363879540945e-06, "loss": 0.0613, "step": 4050 }, { "epoch": 1.0613512813875225, "grad_norm": 6.740059852600098, "learning_rate": 9.933270054937154e-06, "loss": 0.0629, "step": 4100 }, { "epoch": 1.0742945896971265, "grad_norm": 6.783405780792236, "learning_rate": 9.918888601259817e-06, "loss": 0.1005, "step": 4150 }, { "epoch": 1.0872378980067305, "grad_norm": 3.628119707107544, "learning_rate": 9.904507147582478e-06, "loss": 0.0599, "step": 4200 }, { "epoch": 1.1001812063163345, "grad_norm": 10.390827178955078, "learning_rate": 9.89012569390514e-06, "loss": 0.0713, "step": 4250 }, { "epoch": 1.1131245146259383, "grad_norm": 4.781597137451172, "learning_rate": 9.875744240227803e-06, "loss": 0.0758, "step": 4300 }, { "epoch": 1.1260678229355423, "grad_norm": 2.381582498550415, "learning_rate": 9.861362786550466e-06, "loss": 0.0587, "step": 4350 }, { "epoch": 1.1390111312451463, "grad_norm": 3.9035913944244385, "learning_rate": 9.846981332873127e-06, "loss": 0.0482, "step": 4400 }, { "epoch": 1.1519544395547503, "grad_norm": 3.4371120929718018, "learning_rate": 9.83259987919579e-06, "loss": 0.0584, "step": 4450 }, { "epoch": 1.164897747864354, "grad_norm": 5.601978302001953, "learning_rate": 9.818218425518452e-06, "loss": 0.0567, "step": 4500 }, { "epoch": 1.177841056173958, "grad_norm": 5.46005916595459, "learning_rate": 9.803836971841113e-06, "loss": 0.0696, "step": 4550 }, { "epoch": 1.190784364483562, "grad_norm": 6.306366443634033, "learning_rate": 9.789455518163776e-06, "loss": 0.0628, "step": 4600 }, { "epoch": 1.2037276727931658, "grad_norm": 6.448446273803711, "learning_rate": 9.775074064486439e-06, "loss": 0.0712, "step": 4650 }, { "epoch": 1.2166709811027698, "grad_norm": 5.446472644805908, "learning_rate": 9.760692610809102e-06, "loss": 0.0549, "step": 4700 }, { "epoch": 1.2296142894123738, "grad_norm": 8.122842788696289, "learning_rate": 9.746311157131764e-06, "loss": 0.0804, "step": 4750 }, { "epoch": 1.2425575977219778, "grad_norm": 17.312549591064453, "learning_rate": 9.731929703454425e-06, "loss": 0.0598, "step": 4800 }, { "epoch": 1.2555009060315816, "grad_norm": 8.815750122070312, "learning_rate": 9.717548249777088e-06, "loss": 0.086, "step": 4850 }, { "epoch": 1.2684442143411856, "grad_norm": 7.290472030639648, "learning_rate": 9.70316679609975e-06, "loss": 0.0646, "step": 4900 }, { "epoch": 1.2813875226507896, "grad_norm": 6.4685258865356445, "learning_rate": 9.688785342422414e-06, "loss": 0.0717, "step": 4950 }, { "epoch": 1.2943308309603934, "grad_norm": 6.5981974601745605, "learning_rate": 9.674403888745075e-06, "loss": 0.0605, "step": 5000 }, { "epoch": 1.3072741392699974, "grad_norm": 4.253500461578369, "learning_rate": 9.660022435067737e-06, "loss": 0.0657, "step": 5050 }, { "epoch": 1.3202174475796014, "grad_norm": 14.457869529724121, "learning_rate": 9.6456409813904e-06, "loss": 0.0701, "step": 5100 }, { "epoch": 1.3331607558892054, "grad_norm": 10.046952247619629, "learning_rate": 9.631259527713061e-06, "loss": 0.0636, "step": 5150 }, { "epoch": 1.3461040641988093, "grad_norm": 0.7242924571037292, "learning_rate": 9.616878074035725e-06, "loss": 0.0731, "step": 5200 }, { "epoch": 1.3590473725084131, "grad_norm": 5.0954155921936035, "learning_rate": 9.602496620358386e-06, "loss": 0.0768, "step": 5250 }, { "epoch": 1.3719906808180171, "grad_norm": 3.536829710006714, "learning_rate": 9.588115166681049e-06, "loss": 0.0676, "step": 5300 }, { "epoch": 1.384933989127621, "grad_norm": 16.024459838867188, "learning_rate": 9.573733713003712e-06, "loss": 0.0778, "step": 5350 }, { "epoch": 1.3978772974372249, "grad_norm": 8.860788345336914, "learning_rate": 9.559352259326373e-06, "loss": 0.0618, "step": 5400 }, { "epoch": 1.4108206057468289, "grad_norm": 8.013275146484375, "learning_rate": 9.544970805649036e-06, "loss": 0.0682, "step": 5450 }, { "epoch": 1.4237639140564329, "grad_norm": 1.6208149194717407, "learning_rate": 9.530589351971698e-06, "loss": 0.0658, "step": 5500 }, { "epoch": 1.4367072223660369, "grad_norm": 3.301064968109131, "learning_rate": 9.516207898294361e-06, "loss": 0.0578, "step": 5550 }, { "epoch": 1.4496505306756406, "grad_norm": 8.253787994384766, "learning_rate": 9.501826444617022e-06, "loss": 0.0613, "step": 5600 }, { "epoch": 1.4625938389852446, "grad_norm": 5.881521224975586, "learning_rate": 9.487444990939685e-06, "loss": 0.0652, "step": 5650 }, { "epoch": 1.4755371472948486, "grad_norm": 5.812432289123535, "learning_rate": 9.473063537262347e-06, "loss": 0.0632, "step": 5700 }, { "epoch": 1.4884804556044524, "grad_norm": 7.349687576293945, "learning_rate": 9.458682083585008e-06, "loss": 0.0633, "step": 5750 }, { "epoch": 1.5014237639140564, "grad_norm": 6.8907694816589355, "learning_rate": 9.444300629907673e-06, "loss": 0.0621, "step": 5800 }, { "epoch": 1.5143670722236604, "grad_norm": 6.606383800506592, "learning_rate": 9.429919176230334e-06, "loss": 0.0827, "step": 5850 }, { "epoch": 1.5273103805332644, "grad_norm": 7.548374176025391, "learning_rate": 9.415537722552997e-06, "loss": 0.0616, "step": 5900 }, { "epoch": 1.5402536888428684, "grad_norm": 2.861259698867798, "learning_rate": 9.40115626887566e-06, "loss": 0.0681, "step": 5950 }, { "epoch": 1.5531969971524722, "grad_norm": 1.8648054599761963, "learning_rate": 9.38677481519832e-06, "loss": 0.0716, "step": 6000 }, { "epoch": 1.566140305462076, "grad_norm": 8.61605167388916, "learning_rate": 9.372393361520983e-06, "loss": 0.0632, "step": 6050 }, { "epoch": 1.57908361377168, "grad_norm": 7.054296493530273, "learning_rate": 9.358011907843646e-06, "loss": 0.067, "step": 6100 }, { "epoch": 1.592026922081284, "grad_norm": 16.833148956298828, "learning_rate": 9.343630454166308e-06, "loss": 0.0657, "step": 6150 }, { "epoch": 1.604970230390888, "grad_norm": 8.74778938293457, "learning_rate": 9.32924900048897e-06, "loss": 0.0824, "step": 6200 }, { "epoch": 1.617913538700492, "grad_norm": 7.7294440269470215, "learning_rate": 9.314867546811632e-06, "loss": 0.0756, "step": 6250 }, { "epoch": 1.630856847010096, "grad_norm": 5.917527675628662, "learning_rate": 9.300486093134295e-06, "loss": 0.0625, "step": 6300 }, { "epoch": 1.6438001553196997, "grad_norm": 6.116061687469482, "learning_rate": 9.286104639456958e-06, "loss": 0.0733, "step": 6350 }, { "epoch": 1.6567434636293037, "grad_norm": 5.308782577514648, "learning_rate": 9.27172318577962e-06, "loss": 0.0614, "step": 6400 }, { "epoch": 1.6696867719389075, "grad_norm": 4.262322902679443, "learning_rate": 9.257341732102281e-06, "loss": 0.0695, "step": 6450 }, { "epoch": 1.6826300802485115, "grad_norm": 10.316951751708984, "learning_rate": 9.242960278424944e-06, "loss": 0.0608, "step": 6500 }, { "epoch": 1.6955733885581155, "grad_norm": 10.672906875610352, "learning_rate": 9.228578824747607e-06, "loss": 0.0497, "step": 6550 }, { "epoch": 1.7085166968677195, "grad_norm": 9.800777435302734, "learning_rate": 9.214197371070268e-06, "loss": 0.0869, "step": 6600 }, { "epoch": 1.7214600051773234, "grad_norm": 0.5741531848907471, "learning_rate": 9.19981591739293e-06, "loss": 0.0719, "step": 6650 }, { "epoch": 1.7344033134869272, "grad_norm": 1.716304063796997, "learning_rate": 9.185434463715593e-06, "loss": 0.0819, "step": 6700 }, { "epoch": 1.7473466217965312, "grad_norm": 5.458923816680908, "learning_rate": 9.171053010038256e-06, "loss": 0.0911, "step": 6750 }, { "epoch": 1.760289930106135, "grad_norm": 5.717566967010498, "learning_rate": 9.156671556360917e-06, "loss": 0.0728, "step": 6800 }, { "epoch": 1.773233238415739, "grad_norm": 2.0101475715637207, "learning_rate": 9.14229010268358e-06, "loss": 0.0688, "step": 6850 }, { "epoch": 1.786176546725343, "grad_norm": 4.805422306060791, "learning_rate": 9.127908649006242e-06, "loss": 0.0722, "step": 6900 }, { "epoch": 1.799119855034947, "grad_norm": 8.908441543579102, "learning_rate": 9.113527195328905e-06, "loss": 0.0742, "step": 6950 }, { "epoch": 1.812063163344551, "grad_norm": 5.328374862670898, "learning_rate": 9.099145741651568e-06, "loss": 0.0705, "step": 7000 }, { "epoch": 1.8250064716541547, "grad_norm": 5.544122219085693, "learning_rate": 9.084764287974229e-06, "loss": 0.0803, "step": 7050 }, { "epoch": 1.8379497799637587, "grad_norm": 12.470900535583496, "learning_rate": 9.070382834296892e-06, "loss": 0.0687, "step": 7100 }, { "epoch": 1.8508930882733625, "grad_norm": 6.980189323425293, "learning_rate": 9.056001380619553e-06, "loss": 0.0738, "step": 7150 }, { "epoch": 1.8638363965829665, "grad_norm": 5.287197589874268, "learning_rate": 9.041619926942215e-06, "loss": 0.0723, "step": 7200 }, { "epoch": 1.8767797048925705, "grad_norm": 7.992635250091553, "learning_rate": 9.027238473264878e-06, "loss": 0.0766, "step": 7250 }, { "epoch": 1.8897230132021745, "grad_norm": 13.58613395690918, "learning_rate": 9.01285701958754e-06, "loss": 0.0672, "step": 7300 }, { "epoch": 1.9026663215117785, "grad_norm": 6.183566093444824, "learning_rate": 8.998475565910203e-06, "loss": 0.0636, "step": 7350 }, { "epoch": 1.9156096298213825, "grad_norm": 4.56492805480957, "learning_rate": 8.984094112232865e-06, "loss": 0.0823, "step": 7400 }, { "epoch": 1.9285529381309863, "grad_norm": 3.8052022457122803, "learning_rate": 8.969712658555527e-06, "loss": 0.094, "step": 7450 }, { "epoch": 1.9414962464405903, "grad_norm": 4.8918843269348145, "learning_rate": 8.95533120487819e-06, "loss": 0.0707, "step": 7500 }, { "epoch": 1.954439554750194, "grad_norm": 8.336883544921875, "learning_rate": 8.940949751200853e-06, "loss": 0.082, "step": 7550 }, { "epoch": 1.967382863059798, "grad_norm": 11.596981048583984, "learning_rate": 8.926568297523515e-06, "loss": 0.0914, "step": 7600 }, { "epoch": 1.980326171369402, "grad_norm": 3.7584636211395264, "learning_rate": 8.912186843846176e-06, "loss": 0.0697, "step": 7650 }, { "epoch": 1.993269479679006, "grad_norm": 1.8307932615280151, "learning_rate": 8.898093019242386e-06, "loss": 0.0597, "step": 7700 }, { "epoch": 2.0, "eval_loss": 0.0625610426068306, "eval_runtime": 88.6808, "eval_samples_per_second": 5.638, "eval_steps_per_second": 0.71, "eval_wer": 7.819880340086072, "step": 7726 }, { "epoch": 2.00621278798861, "grad_norm": 5.234423637390137, "learning_rate": 8.883711565565049e-06, "loss": 0.0493, "step": 7750 }, { "epoch": 2.019156096298214, "grad_norm": 6.8441386222839355, "learning_rate": 8.869330111887711e-06, "loss": 0.0531, "step": 7800 }, { "epoch": 2.0320994046078176, "grad_norm": 11.543621063232422, "learning_rate": 8.854948658210372e-06, "loss": 0.0447, "step": 7850 }, { "epoch": 2.0450427129174216, "grad_norm": 8.563196182250977, "learning_rate": 8.840567204533035e-06, "loss": 0.0466, "step": 7900 }, { "epoch": 2.0579860212270256, "grad_norm": 2.31270432472229, "learning_rate": 8.826185750855698e-06, "loss": 0.0487, "step": 7950 }, { "epoch": 2.0709293295366296, "grad_norm": 2.1112735271453857, "learning_rate": 8.811804297178359e-06, "loss": 0.0424, "step": 8000 }, { "epoch": 2.0838726378462336, "grad_norm": 2.2178971767425537, "learning_rate": 8.797422843501021e-06, "loss": 0.0472, "step": 8050 }, { "epoch": 2.0968159461558376, "grad_norm": 7.748543739318848, "learning_rate": 8.783041389823684e-06, "loss": 0.0467, "step": 8100 }, { "epoch": 2.1097592544654415, "grad_norm": 13.174030303955078, "learning_rate": 8.768659936146347e-06, "loss": 0.0582, "step": 8150 }, { "epoch": 2.122702562775045, "grad_norm": 8.9285306930542, "learning_rate": 8.754278482469008e-06, "loss": 0.0388, "step": 8200 }, { "epoch": 2.135645871084649, "grad_norm": 7.205421447753906, "learning_rate": 8.73989702879167e-06, "loss": 0.049, "step": 8250 }, { "epoch": 2.148589179394253, "grad_norm": 13.037263870239258, "learning_rate": 8.725515575114333e-06, "loss": 0.0463, "step": 8300 }, { "epoch": 2.161532487703857, "grad_norm": 7.653538703918457, "learning_rate": 8.711134121436996e-06, "loss": 0.047, "step": 8350 }, { "epoch": 2.174475796013461, "grad_norm": 3.7179276943206787, "learning_rate": 8.696752667759659e-06, "loss": 0.0455, "step": 8400 }, { "epoch": 2.187419104323065, "grad_norm": 5.476615905761719, "learning_rate": 8.68237121408232e-06, "loss": 0.0439, "step": 8450 }, { "epoch": 2.200362412632669, "grad_norm": 2.110492467880249, "learning_rate": 8.667989760404983e-06, "loss": 0.0411, "step": 8500 }, { "epoch": 2.213305720942273, "grad_norm": 5.301363468170166, "learning_rate": 8.653608306727645e-06, "loss": 0.0517, "step": 8550 }, { "epoch": 2.2262490292518766, "grad_norm": 9.795352935791016, "learning_rate": 8.639226853050306e-06, "loss": 0.0564, "step": 8600 }, { "epoch": 2.2391923375614806, "grad_norm": 1.353123426437378, "learning_rate": 8.624845399372969e-06, "loss": 0.0485, "step": 8650 }, { "epoch": 2.2521356458710846, "grad_norm": 9.473326683044434, "learning_rate": 8.610463945695632e-06, "loss": 0.0645, "step": 8700 }, { "epoch": 2.2650789541806886, "grad_norm": 4.406058311462402, "learning_rate": 8.596082492018294e-06, "loss": 0.0542, "step": 8750 }, { "epoch": 2.2780222624902926, "grad_norm": 17.06141471862793, "learning_rate": 8.581701038340955e-06, "loss": 0.0502, "step": 8800 }, { "epoch": 2.2909655707998966, "grad_norm": 8.37076187133789, "learning_rate": 8.567319584663618e-06, "loss": 0.0554, "step": 8850 }, { "epoch": 2.3039088791095006, "grad_norm": 10.135491371154785, "learning_rate": 8.552938130986281e-06, "loss": 0.0469, "step": 8900 }, { "epoch": 2.316852187419104, "grad_norm": 7.701286315917969, "learning_rate": 8.538556677308944e-06, "loss": 0.0468, "step": 8950 }, { "epoch": 2.329795495728708, "grad_norm": 0.7516931891441345, "learning_rate": 8.524175223631606e-06, "loss": 0.0443, "step": 9000 }, { "epoch": 2.342738804038312, "grad_norm": 8.913509368896484, "learning_rate": 8.509793769954267e-06, "loss": 0.0619, "step": 9050 }, { "epoch": 2.355682112347916, "grad_norm": 10.149544715881348, "learning_rate": 8.49541231627693e-06, "loss": 0.051, "step": 9100 }, { "epoch": 2.36862542065752, "grad_norm": 8.469508171081543, "learning_rate": 8.481030862599591e-06, "loss": 0.0481, "step": 9150 }, { "epoch": 2.381568728967124, "grad_norm": 2.6739773750305176, "learning_rate": 8.466649408922255e-06, "loss": 0.0565, "step": 9200 }, { "epoch": 2.394512037276728, "grad_norm": 7.850672245025635, "learning_rate": 8.452267955244916e-06, "loss": 0.0469, "step": 9250 }, { "epoch": 2.4074553455863317, "grad_norm": 4.025714874267578, "learning_rate": 8.43788650156758e-06, "loss": 0.0554, "step": 9300 }, { "epoch": 2.4203986538959357, "grad_norm": 8.857705116271973, "learning_rate": 8.423505047890242e-06, "loss": 0.0449, "step": 9350 }, { "epoch": 2.4333419622055397, "grad_norm": 7.978151321411133, "learning_rate": 8.409123594212903e-06, "loss": 0.0768, "step": 9400 }, { "epoch": 2.4462852705151437, "grad_norm": 7.44215726852417, "learning_rate": 8.394742140535566e-06, "loss": 0.0612, "step": 9450 }, { "epoch": 2.4592285788247477, "grad_norm": 3.768834352493286, "learning_rate": 8.380360686858228e-06, "loss": 0.0405, "step": 9500 }, { "epoch": 2.4721718871343517, "grad_norm": 4.792430877685547, "learning_rate": 8.365979233180891e-06, "loss": 0.049, "step": 9550 }, { "epoch": 2.4851151954439556, "grad_norm": 7.07329797744751, "learning_rate": 8.351597779503554e-06, "loss": 0.0516, "step": 9600 }, { "epoch": 2.498058503753559, "grad_norm": 7.043036937713623, "learning_rate": 8.337216325826215e-06, "loss": 0.0589, "step": 9650 }, { "epoch": 2.511001812063163, "grad_norm": 8.471428871154785, "learning_rate": 8.322834872148878e-06, "loss": 0.0433, "step": 9700 }, { "epoch": 2.523945120372767, "grad_norm": 5.081391334533691, "learning_rate": 8.308453418471539e-06, "loss": 0.0603, "step": 9750 }, { "epoch": 2.536888428682371, "grad_norm": 6.474493980407715, "learning_rate": 8.294071964794203e-06, "loss": 0.0529, "step": 9800 }, { "epoch": 2.549831736991975, "grad_norm": 6.623453617095947, "learning_rate": 8.279690511116864e-06, "loss": 0.0491, "step": 9850 }, { "epoch": 2.562775045301579, "grad_norm": 5.389576435089111, "learning_rate": 8.265309057439527e-06, "loss": 0.0491, "step": 9900 }, { "epoch": 2.575718353611183, "grad_norm": 8.212656021118164, "learning_rate": 8.25092760376219e-06, "loss": 0.0544, "step": 9950 }, { "epoch": 2.5886616619207867, "grad_norm": 0.44550031423568726, "learning_rate": 8.23654615008485e-06, "loss": 0.0578, "step": 10000 }, { "epoch": 2.6016049702303907, "grad_norm": 1.0146311521530151, "learning_rate": 8.222164696407513e-06, "loss": 0.0363, "step": 10050 }, { "epoch": 2.6145482785399947, "grad_norm": 2.9840240478515625, "learning_rate": 8.207783242730176e-06, "loss": 0.0464, "step": 10100 }, { "epoch": 2.6274915868495987, "grad_norm": 9.97850513458252, "learning_rate": 8.193401789052839e-06, "loss": 0.0486, "step": 10150 }, { "epoch": 2.6404348951592027, "grad_norm": 7.187142372131348, "learning_rate": 8.179020335375501e-06, "loss": 0.0621, "step": 10200 }, { "epoch": 2.6533782034688067, "grad_norm": 8.673319816589355, "learning_rate": 8.164638881698162e-06, "loss": 0.0495, "step": 10250 }, { "epoch": 2.6663215117784107, "grad_norm": 7.366682529449463, "learning_rate": 8.150257428020825e-06, "loss": 0.0426, "step": 10300 }, { "epoch": 2.6792648200880143, "grad_norm": 4.433532238006592, "learning_rate": 8.135875974343488e-06, "loss": 0.0489, "step": 10350 }, { "epoch": 2.6922081283976187, "grad_norm": 1.0996958017349243, "learning_rate": 8.12149452066615e-06, "loss": 0.0534, "step": 10400 }, { "epoch": 2.7051514367072222, "grad_norm": 4.040085792541504, "learning_rate": 8.107113066988811e-06, "loss": 0.0456, "step": 10450 }, { "epoch": 2.7180947450168262, "grad_norm": 6.1070685386657715, "learning_rate": 8.092731613311474e-06, "loss": 0.0685, "step": 10500 }, { "epoch": 2.7310380533264302, "grad_norm": 5.145371913909912, "learning_rate": 8.078350159634137e-06, "loss": 0.0537, "step": 10550 }, { "epoch": 2.7439813616360342, "grad_norm": 5.4013190269470215, "learning_rate": 8.063968705956798e-06, "loss": 0.051, "step": 10600 }, { "epoch": 2.7569246699456382, "grad_norm": 0.9301003217697144, "learning_rate": 8.049587252279462e-06, "loss": 0.0469, "step": 10650 }, { "epoch": 2.769867978255242, "grad_norm": 2.0756452083587646, "learning_rate": 8.035205798602123e-06, "loss": 0.0508, "step": 10700 }, { "epoch": 2.782811286564846, "grad_norm": 5.310762882232666, "learning_rate": 8.020824344924786e-06, "loss": 0.0559, "step": 10750 }, { "epoch": 2.7957545948744498, "grad_norm": 10.922019004821777, "learning_rate": 8.006442891247447e-06, "loss": 0.0558, "step": 10800 }, { "epoch": 2.8086979031840538, "grad_norm": 7.071091651916504, "learning_rate": 7.99206143757011e-06, "loss": 0.059, "step": 10850 }, { "epoch": 2.8216412114936578, "grad_norm": 8.605588912963867, "learning_rate": 7.977679983892773e-06, "loss": 0.0502, "step": 10900 }, { "epoch": 2.8345845198032618, "grad_norm": 5.918191909790039, "learning_rate": 7.963298530215435e-06, "loss": 0.044, "step": 10950 }, { "epoch": 2.8475278281128658, "grad_norm": 10.315163612365723, "learning_rate": 7.948917076538098e-06, "loss": 0.0639, "step": 11000 }, { "epoch": 2.8604711364224693, "grad_norm": 8.651927947998047, "learning_rate": 7.934535622860759e-06, "loss": 0.0632, "step": 11050 }, { "epoch": 2.8734144447320737, "grad_norm": 6.735222816467285, "learning_rate": 7.920154169183422e-06, "loss": 0.0594, "step": 11100 }, { "epoch": 2.8863577530416773, "grad_norm": 8.701018333435059, "learning_rate": 7.905772715506084e-06, "loss": 0.067, "step": 11150 }, { "epoch": 2.8993010613512813, "grad_norm": 7.249449729919434, "learning_rate": 7.891391261828745e-06, "loss": 0.0562, "step": 11200 }, { "epoch": 2.9122443696608853, "grad_norm": 9.386366844177246, "learning_rate": 7.87700980815141e-06, "loss": 0.0443, "step": 11250 }, { "epoch": 2.9251876779704893, "grad_norm": 5.82024621963501, "learning_rate": 7.86262835447407e-06, "loss": 0.0435, "step": 11300 }, { "epoch": 2.9381309862800933, "grad_norm": 9.714224815368652, "learning_rate": 7.848246900796734e-06, "loss": 0.0584, "step": 11350 }, { "epoch": 2.9510742945896973, "grad_norm": 10.820244789123535, "learning_rate": 7.833865447119395e-06, "loss": 0.051, "step": 11400 }, { "epoch": 2.9640176028993013, "grad_norm": 9.436473846435547, "learning_rate": 7.819483993442057e-06, "loss": 0.0762, "step": 11450 }, { "epoch": 2.976960911208905, "grad_norm": 4.0043745040893555, "learning_rate": 7.80510253976472e-06, "loss": 0.0542, "step": 11500 }, { "epoch": 2.989904219518509, "grad_norm": 6.734024524688721, "learning_rate": 7.790721086087383e-06, "loss": 0.0678, "step": 11550 }, { "epoch": 3.0, "eval_loss": 0.047223061323165894, "eval_runtime": 88.8982, "eval_samples_per_second": 5.624, "eval_steps_per_second": 0.709, "eval_wer": 7.588957699170777, "step": 11589 }, { "epoch": 3.002847527828113, "grad_norm": 7.159914970397949, "learning_rate": 7.776339632410045e-06, "loss": 0.0545, "step": 11600 }, { "epoch": 3.015790836137717, "grad_norm": 8.731955528259277, "learning_rate": 7.761958178732706e-06, "loss": 0.0283, "step": 11650 }, { "epoch": 3.028734144447321, "grad_norm": 9.880719184875488, "learning_rate": 7.74757672505537e-06, "loss": 0.0372, "step": 11700 }, { "epoch": 3.041677452756925, "grad_norm": 4.863366603851318, "learning_rate": 7.733195271378032e-06, "loss": 0.0301, "step": 11750 }, { "epoch": 3.054620761066529, "grad_norm": 3.5925040245056152, "learning_rate": 7.718813817700693e-06, "loss": 0.0319, "step": 11800 }, { "epoch": 3.0675640693761324, "grad_norm": 5.193197727203369, "learning_rate": 7.704432364023357e-06, "loss": 0.0284, "step": 11850 }, { "epoch": 3.0805073776857363, "grad_norm": 7.325606822967529, "learning_rate": 7.690050910346018e-06, "loss": 0.0221, "step": 11900 }, { "epoch": 3.0934506859953403, "grad_norm": 1.1931557655334473, "learning_rate": 7.675669456668681e-06, "loss": 0.0318, "step": 11950 }, { "epoch": 3.1063939943049443, "grad_norm": 3.640929698944092, "learning_rate": 7.661288002991342e-06, "loss": 0.0339, "step": 12000 }, { "epoch": 3.1193373026145483, "grad_norm": 5.155591011047363, "learning_rate": 7.647194178387553e-06, "loss": 0.0294, "step": 12050 }, { "epoch": 3.1322806109241523, "grad_norm": 4.072420120239258, "learning_rate": 7.632812724710214e-06, "loss": 0.0371, "step": 12100 }, { "epoch": 3.1452239192337563, "grad_norm": 4.188466548919678, "learning_rate": 7.618431271032877e-06, "loss": 0.0253, "step": 12150 }, { "epoch": 3.15816722754336, "grad_norm": 7.71252965927124, "learning_rate": 7.604049817355539e-06, "loss": 0.0291, "step": 12200 }, { "epoch": 3.171110535852964, "grad_norm": 1.7614275217056274, "learning_rate": 7.589668363678201e-06, "loss": 0.0477, "step": 12250 }, { "epoch": 3.184053844162568, "grad_norm": 6.398949146270752, "learning_rate": 7.575286910000863e-06, "loss": 0.0281, "step": 12300 }, { "epoch": 3.196997152472172, "grad_norm": 5.648083686828613, "learning_rate": 7.560905456323526e-06, "loss": 0.0385, "step": 12350 }, { "epoch": 3.209940460781776, "grad_norm": 1.6101973056793213, "learning_rate": 7.546524002646188e-06, "loss": 0.0291, "step": 12400 }, { "epoch": 3.22288376909138, "grad_norm": 1.436286449432373, "learning_rate": 7.532142548968851e-06, "loss": 0.0443, "step": 12450 }, { "epoch": 3.235827077400984, "grad_norm": 1.658963680267334, "learning_rate": 7.517761095291513e-06, "loss": 0.0371, "step": 12500 }, { "epoch": 3.2487703857105874, "grad_norm": 8.697310447692871, "learning_rate": 7.5033796416141745e-06, "loss": 0.0299, "step": 12550 }, { "epoch": 3.2617136940201914, "grad_norm": 6.439472198486328, "learning_rate": 7.488998187936837e-06, "loss": 0.0308, "step": 12600 }, { "epoch": 3.2746570023297954, "grad_norm": 3.5517160892486572, "learning_rate": 7.4746167342595e-06, "loss": 0.0357, "step": 12650 }, { "epoch": 3.2876003106393994, "grad_norm": 2.6896841526031494, "learning_rate": 7.460235280582162e-06, "loss": 0.0298, "step": 12700 }, { "epoch": 3.3005436189490034, "grad_norm": 3.5411911010742188, "learning_rate": 7.4458538269048245e-06, "loss": 0.0249, "step": 12750 }, { "epoch": 3.3134869272586074, "grad_norm": 0.766302227973938, "learning_rate": 7.431472373227486e-06, "loss": 0.0207, "step": 12800 }, { "epoch": 3.3264302355682114, "grad_norm": 7.974555969238281, "learning_rate": 7.417090919550148e-06, "loss": 0.0258, "step": 12850 }, { "epoch": 3.339373543877815, "grad_norm": 8.336533546447754, "learning_rate": 7.40270946587281e-06, "loss": 0.0335, "step": 12900 }, { "epoch": 3.352316852187419, "grad_norm": 4.762045383453369, "learning_rate": 7.38861564126902e-06, "loss": 0.0391, "step": 12950 }, { "epoch": 3.365260160497023, "grad_norm": 3.297501564025879, "learning_rate": 7.374234187591682e-06, "loss": 0.0427, "step": 13000 }, { "epoch": 3.378203468806627, "grad_norm": 5.205377578735352, "learning_rate": 7.359852733914344e-06, "loss": 0.0517, "step": 13050 }, { "epoch": 3.391146777116231, "grad_norm": 5.596180438995361, "learning_rate": 7.345471280237007e-06, "loss": 0.0422, "step": 13100 }, { "epoch": 3.404090085425835, "grad_norm": 5.4441657066345215, "learning_rate": 7.3310898265596695e-06, "loss": 0.0367, "step": 13150 }, { "epoch": 3.417033393735439, "grad_norm": 5.369819641113281, "learning_rate": 7.316708372882331e-06, "loss": 0.0245, "step": 13200 }, { "epoch": 3.429976702045043, "grad_norm": 1.0381672382354736, "learning_rate": 7.302326919204994e-06, "loss": 0.0377, "step": 13250 }, { "epoch": 3.4429200103546465, "grad_norm": 2.6047282218933105, "learning_rate": 7.287945465527656e-06, "loss": 0.0556, "step": 13300 }, { "epoch": 3.4558633186642504, "grad_norm": 3.447537899017334, "learning_rate": 7.273564011850318e-06, "loss": 0.0331, "step": 13350 }, { "epoch": 3.4688066269738544, "grad_norm": 12.265509605407715, "learning_rate": 7.259182558172981e-06, "loss": 0.0458, "step": 13400 }, { "epoch": 3.4817499352834584, "grad_norm": 6.993318557739258, "learning_rate": 7.244801104495643e-06, "loss": 0.0328, "step": 13450 }, { "epoch": 3.4946932435930624, "grad_norm": 1.927647590637207, "learning_rate": 7.230419650818305e-06, "loss": 0.036, "step": 13500 }, { "epoch": 3.5076365519026664, "grad_norm": 4.639932632446289, "learning_rate": 7.216038197140968e-06, "loss": 0.0359, "step": 13550 }, { "epoch": 3.52057986021227, "grad_norm": 6.088189125061035, "learning_rate": 7.20165674346363e-06, "loss": 0.0505, "step": 13600 }, { "epoch": 3.5335231685218744, "grad_norm": 3.452139377593994, "learning_rate": 7.187275289786292e-06, "loss": 0.0334, "step": 13650 }, { "epoch": 3.546466476831478, "grad_norm": 3.6713337898254395, "learning_rate": 7.172893836108954e-06, "loss": 0.0305, "step": 13700 }, { "epoch": 3.559409785141082, "grad_norm": 4.258627891540527, "learning_rate": 7.158512382431617e-06, "loss": 0.0257, "step": 13750 }, { "epoch": 3.572353093450686, "grad_norm": 7.825601577758789, "learning_rate": 7.144130928754279e-06, "loss": 0.0261, "step": 13800 }, { "epoch": 3.58529640176029, "grad_norm": 4.783618927001953, "learning_rate": 7.129749475076942e-06, "loss": 0.0347, "step": 13850 }, { "epoch": 3.598239710069894, "grad_norm": 4.300550937652588, "learning_rate": 7.1153680213996035e-06, "loss": 0.029, "step": 13900 }, { "epoch": 3.611183018379498, "grad_norm": 10.43149471282959, "learning_rate": 7.100986567722265e-06, "loss": 0.0345, "step": 13950 }, { "epoch": 3.624126326689102, "grad_norm": 5.455187797546387, "learning_rate": 7.086605114044929e-06, "loss": 0.0362, "step": 14000 }, { "epoch": 3.6370696349987055, "grad_norm": 7.492135047912598, "learning_rate": 7.072223660367591e-06, "loss": 0.0295, "step": 14050 }, { "epoch": 3.6500129433083095, "grad_norm": 5.982784271240234, "learning_rate": 7.057842206690253e-06, "loss": 0.0458, "step": 14100 }, { "epoch": 3.6629562516179135, "grad_norm": 13.213232040405273, "learning_rate": 7.043460753012915e-06, "loss": 0.0299, "step": 14150 }, { "epoch": 3.6758995599275175, "grad_norm": 0.33053404092788696, "learning_rate": 7.029079299335577e-06, "loss": 0.0272, "step": 14200 }, { "epoch": 3.6888428682371215, "grad_norm": 2.8715531826019287, "learning_rate": 7.014697845658239e-06, "loss": 0.0406, "step": 14250 }, { "epoch": 3.7017861765467255, "grad_norm": 7.417051315307617, "learning_rate": 7.000316391980903e-06, "loss": 0.0484, "step": 14300 }, { "epoch": 3.7147294848563295, "grad_norm": 7.245136737823486, "learning_rate": 6.9859349383035645e-06, "loss": 0.0392, "step": 14350 }, { "epoch": 3.727672793165933, "grad_norm": 6.493204116821289, "learning_rate": 6.971553484626226e-06, "loss": 0.0301, "step": 14400 }, { "epoch": 3.740616101475537, "grad_norm": 7.734640121459961, "learning_rate": 6.957172030948889e-06, "loss": 0.0317, "step": 14450 }, { "epoch": 3.753559409785141, "grad_norm": 2.9053096771240234, "learning_rate": 6.942790577271551e-06, "loss": 0.0381, "step": 14500 }, { "epoch": 3.766502718094745, "grad_norm": 8.95727825164795, "learning_rate": 6.928409123594213e-06, "loss": 0.0301, "step": 14550 }, { "epoch": 3.779446026404349, "grad_norm": 3.024991512298584, "learning_rate": 6.914027669916876e-06, "loss": 0.0371, "step": 14600 }, { "epoch": 3.792389334713953, "grad_norm": 2.2586114406585693, "learning_rate": 6.899646216239538e-06, "loss": 0.0331, "step": 14650 }, { "epoch": 3.805332643023557, "grad_norm": 2.5965662002563477, "learning_rate": 6.8852647625622e-06, "loss": 0.0266, "step": 14700 }, { "epoch": 3.8182759513331606, "grad_norm": 14.025388717651367, "learning_rate": 6.870883308884863e-06, "loss": 0.0325, "step": 14750 }, { "epoch": 3.8312192596427646, "grad_norm": 12.150018692016602, "learning_rate": 6.856501855207525e-06, "loss": 0.0371, "step": 14800 }, { "epoch": 3.8441625679523685, "grad_norm": 5.943263530731201, "learning_rate": 6.842120401530187e-06, "loss": 0.029, "step": 14850 }, { "epoch": 3.8571058762619725, "grad_norm": 3.0075082778930664, "learning_rate": 6.82773894785285e-06, "loss": 0.0423, "step": 14900 }, { "epoch": 3.8700491845715765, "grad_norm": 7.091434001922607, "learning_rate": 6.813357494175512e-06, "loss": 0.0333, "step": 14950 }, { "epoch": 3.8829924928811805, "grad_norm": 9.822648048400879, "learning_rate": 6.798976040498174e-06, "loss": 0.0623, "step": 15000 }, { "epoch": 3.8959358011907845, "grad_norm": 4.8575663566589355, "learning_rate": 6.784594586820837e-06, "loss": 0.0334, "step": 15050 }, { "epoch": 3.908879109500388, "grad_norm": 6.463123321533203, "learning_rate": 6.7702131331434985e-06, "loss": 0.0309, "step": 15100 }, { "epoch": 3.921822417809992, "grad_norm": 5.0641374588012695, "learning_rate": 6.75583167946616e-06, "loss": 0.0332, "step": 15150 }, { "epoch": 3.934765726119596, "grad_norm": 2.091432809829712, "learning_rate": 6.741450225788824e-06, "loss": 0.0383, "step": 15200 }, { "epoch": 3.9477090344292, "grad_norm": 2.943432092666626, "learning_rate": 6.727068772111486e-06, "loss": 0.0541, "step": 15250 }, { "epoch": 3.960652342738804, "grad_norm": 7.013586044311523, "learning_rate": 6.712687318434148e-06, "loss": 0.0281, "step": 15300 }, { "epoch": 3.973595651048408, "grad_norm": 5.849566459655762, "learning_rate": 6.69830586475681e-06, "loss": 0.0343, "step": 15350 }, { "epoch": 3.986538959358012, "grad_norm": 8.706452369689941, "learning_rate": 6.683924411079472e-06, "loss": 0.0488, "step": 15400 }, { "epoch": 3.9994822676676156, "grad_norm": 3.453444004058838, "learning_rate": 6.669542957402134e-06, "loss": 0.0232, "step": 15450 }, { "epoch": 4.0, "eval_loss": 0.03156248852610588, "eval_runtime": 89.3284, "eval_samples_per_second": 5.597, "eval_steps_per_second": 0.705, "eval_wer": 4.15660753647528, "step": 15452 }, { "epoch": 4.01242557597722, "grad_norm": 2.5590574741363525, "learning_rate": 6.655161503724798e-06, "loss": 0.0184, "step": 15500 }, { "epoch": 4.025368884286824, "grad_norm": 0.7780801057815552, "learning_rate": 6.6407800500474595e-06, "loss": 0.0192, "step": 15550 }, { "epoch": 4.038312192596428, "grad_norm": 10.055984497070312, "learning_rate": 6.626398596370121e-06, "loss": 0.0238, "step": 15600 }, { "epoch": 4.051255500906032, "grad_norm": 1.114243745803833, "learning_rate": 6.612017142692784e-06, "loss": 0.025, "step": 15650 }, { "epoch": 4.064198809215635, "grad_norm": 3.681232452392578, "learning_rate": 6.597635689015446e-06, "loss": 0.02, "step": 15700 }, { "epoch": 4.07714211752524, "grad_norm": 8.334362983703613, "learning_rate": 6.583254235338109e-06, "loss": 0.028, "step": 15750 }, { "epoch": 4.090085425834843, "grad_norm": 12.538928985595703, "learning_rate": 6.568872781660771e-06, "loss": 0.0223, "step": 15800 }, { "epoch": 4.103028734144448, "grad_norm": 4.050449371337891, "learning_rate": 6.554491327983433e-06, "loss": 0.0189, "step": 15850 }, { "epoch": 4.115972042454051, "grad_norm": 1.1800466775894165, "learning_rate": 6.540109874306095e-06, "loss": 0.0205, "step": 15900 }, { "epoch": 4.128915350763656, "grad_norm": 5.886932373046875, "learning_rate": 6.525728420628758e-06, "loss": 0.0184, "step": 15950 }, { "epoch": 4.141858659073259, "grad_norm": 4.0201497077941895, "learning_rate": 6.51134696695142e-06, "loss": 0.0188, "step": 16000 }, { "epoch": 4.154801967382863, "grad_norm": 1.844307780265808, "learning_rate": 6.4969655132740824e-06, "loss": 0.0175, "step": 16050 }, { "epoch": 4.167745275692467, "grad_norm": 3.237921953201294, "learning_rate": 6.482584059596745e-06, "loss": 0.0243, "step": 16100 }, { "epoch": 4.180688584002071, "grad_norm": 1.648651123046875, "learning_rate": 6.468202605919407e-06, "loss": 0.0166, "step": 16150 }, { "epoch": 4.193631892311675, "grad_norm": 5.503207206726074, "learning_rate": 6.453821152242069e-06, "loss": 0.0216, "step": 16200 }, { "epoch": 4.206575200621279, "grad_norm": 5.58413553237915, "learning_rate": 6.439439698564731e-06, "loss": 0.0269, "step": 16250 }, { "epoch": 4.219518508930883, "grad_norm": 3.219493865966797, "learning_rate": 6.4250582448873935e-06, "loss": 0.0154, "step": 16300 }, { "epoch": 4.232461817240487, "grad_norm": 7.937684059143066, "learning_rate": 6.410676791210056e-06, "loss": 0.0177, "step": 16350 }, { "epoch": 4.24540512555009, "grad_norm": 4.213293552398682, "learning_rate": 6.396295337532719e-06, "loss": 0.0202, "step": 16400 }, { "epoch": 4.258348433859695, "grad_norm": 3.651660680770874, "learning_rate": 6.381913883855381e-06, "loss": 0.0213, "step": 16450 }, { "epoch": 4.271291742169298, "grad_norm": 5.593703746795654, "learning_rate": 6.367532430178043e-06, "loss": 0.0158, "step": 16500 }, { "epoch": 4.284235050478903, "grad_norm": 8.395854949951172, "learning_rate": 6.3531509765007045e-06, "loss": 0.0203, "step": 16550 }, { "epoch": 4.297178358788506, "grad_norm": 5.291663646697998, "learning_rate": 6.338769522823367e-06, "loss": 0.0182, "step": 16600 }, { "epoch": 4.310121667098111, "grad_norm": 2.6728503704071045, "learning_rate": 6.32438806914603e-06, "loss": 0.0147, "step": 16650 }, { "epoch": 4.323064975407714, "grad_norm": 2.701340675354004, "learning_rate": 6.310006615468693e-06, "loss": 0.032, "step": 16700 }, { "epoch": 4.336008283717318, "grad_norm": 5.171128273010254, "learning_rate": 6.2956251617913545e-06, "loss": 0.0182, "step": 16750 }, { "epoch": 4.348951592026922, "grad_norm": 8.16347885131836, "learning_rate": 6.281243708114016e-06, "loss": 0.0359, "step": 16800 }, { "epoch": 4.361894900336526, "grad_norm": 6.198201656341553, "learning_rate": 6.266862254436678e-06, "loss": 0.0261, "step": 16850 }, { "epoch": 4.37483820864613, "grad_norm": 5.718491077423096, "learning_rate": 6.252480800759341e-06, "loss": 0.0193, "step": 16900 }, { "epoch": 4.387781516955734, "grad_norm": 8.977401733398438, "learning_rate": 6.238099347082004e-06, "loss": 0.021, "step": 16950 }, { "epoch": 4.400724825265338, "grad_norm": 2.3729536533355713, "learning_rate": 6.223717893404666e-06, "loss": 0.0207, "step": 17000 }, { "epoch": 4.413668133574942, "grad_norm": 3.5691659450531006, "learning_rate": 6.209336439727328e-06, "loss": 0.0256, "step": 17050 }, { "epoch": 4.426611441884546, "grad_norm": 3.9508790969848633, "learning_rate": 6.195242615123537e-06, "loss": 0.0198, "step": 17100 }, { "epoch": 4.43955475019415, "grad_norm": 6.141160488128662, "learning_rate": 6.1808611614462e-06, "loss": 0.0187, "step": 17150 }, { "epoch": 4.452498058503753, "grad_norm": 0.2836528420448303, "learning_rate": 6.166479707768862e-06, "loss": 0.0187, "step": 17200 }, { "epoch": 4.465441366813358, "grad_norm": 6.005315780639648, "learning_rate": 6.152098254091524e-06, "loss": 0.0261, "step": 17250 }, { "epoch": 4.478384675122961, "grad_norm": 2.222322702407837, "learning_rate": 6.137716800414186e-06, "loss": 0.0183, "step": 17300 }, { "epoch": 4.491327983432566, "grad_norm": 4.63626766204834, "learning_rate": 6.123335346736849e-06, "loss": 0.0209, "step": 17350 }, { "epoch": 4.504271291742169, "grad_norm": 1.04603111743927, "learning_rate": 6.108953893059511e-06, "loss": 0.0343, "step": 17400 }, { "epoch": 4.517214600051773, "grad_norm": 0.18591836094856262, "learning_rate": 6.094572439382173e-06, "loss": 0.0205, "step": 17450 }, { "epoch": 4.530157908361377, "grad_norm": 4.46800422668457, "learning_rate": 6.080190985704836e-06, "loss": 0.0307, "step": 17500 }, { "epoch": 4.543101216670981, "grad_norm": 2.5701541900634766, "learning_rate": 6.065809532027498e-06, "loss": 0.0201, "step": 17550 }, { "epoch": 4.556044524980585, "grad_norm": 3.805527448654175, "learning_rate": 6.05142807835016e-06, "loss": 0.0247, "step": 17600 }, { "epoch": 4.568987833290189, "grad_norm": 5.005966663360596, "learning_rate": 6.037046624672822e-06, "loss": 0.0166, "step": 17650 }, { "epoch": 4.581931141599793, "grad_norm": 2.1261184215545654, "learning_rate": 6.022665170995484e-06, "loss": 0.0192, "step": 17700 }, { "epoch": 4.594874449909397, "grad_norm": 3.359769582748413, "learning_rate": 6.008283717318147e-06, "loss": 0.0186, "step": 17750 }, { "epoch": 4.607817758219001, "grad_norm": 4.658329010009766, "learning_rate": 5.99390226364081e-06, "loss": 0.0156, "step": 17800 }, { "epoch": 4.620761066528605, "grad_norm": 1.4093743562698364, "learning_rate": 5.979520809963472e-06, "loss": 0.0249, "step": 17850 }, { "epoch": 4.633704374838208, "grad_norm": 7.107546806335449, "learning_rate": 5.9651393562861336e-06, "loss": 0.019, "step": 17900 }, { "epoch": 4.646647683147813, "grad_norm": 3.7134788036346436, "learning_rate": 5.9507579026087954e-06, "loss": 0.0316, "step": 17950 }, { "epoch": 4.659590991457416, "grad_norm": 9.994954109191895, "learning_rate": 5.936376448931458e-06, "loss": 0.021, "step": 18000 }, { "epoch": 4.672534299767021, "grad_norm": 5.7871527671813965, "learning_rate": 5.921994995254121e-06, "loss": 0.0201, "step": 18050 }, { "epoch": 4.685477608076624, "grad_norm": 4.141567230224609, "learning_rate": 5.9076135415767836e-06, "loss": 0.0341, "step": 18100 }, { "epoch": 4.698420916386228, "grad_norm": 3.703082799911499, "learning_rate": 5.8932320878994454e-06, "loss": 0.0209, "step": 18150 }, { "epoch": 4.711364224695832, "grad_norm": 1.5418035984039307, "learning_rate": 5.878850634222107e-06, "loss": 0.0205, "step": 18200 }, { "epoch": 4.724307533005436, "grad_norm": 2.696366310119629, "learning_rate": 5.864469180544769e-06, "loss": 0.0322, "step": 18250 }, { "epoch": 4.73725084131504, "grad_norm": 12.889842987060547, "learning_rate": 5.850087726867433e-06, "loss": 0.0299, "step": 18300 }, { "epoch": 4.750194149624644, "grad_norm": 14.697345733642578, "learning_rate": 5.835706273190095e-06, "loss": 0.021, "step": 18350 }, { "epoch": 4.763137457934248, "grad_norm": 14.185206413269043, "learning_rate": 5.821324819512757e-06, "loss": 0.028, "step": 18400 }, { "epoch": 4.776080766243852, "grad_norm": 1.6946377754211426, "learning_rate": 5.806943365835419e-06, "loss": 0.0407, "step": 18450 }, { "epoch": 4.789024074553456, "grad_norm": 0.4647742509841919, "learning_rate": 5.792561912158081e-06, "loss": 0.0245, "step": 18500 }, { "epoch": 4.80196738286306, "grad_norm": 0.9601898193359375, "learning_rate": 5.778180458480743e-06, "loss": 0.023, "step": 18550 }, { "epoch": 4.814910691172663, "grad_norm": 8.700115203857422, "learning_rate": 5.7637990048034065e-06, "loss": 0.0197, "step": 18600 }, { "epoch": 4.827853999482268, "grad_norm": 0.46070945262908936, "learning_rate": 5.749417551126068e-06, "loss": 0.0207, "step": 18650 }, { "epoch": 4.840797307791871, "grad_norm": 3.087283134460449, "learning_rate": 5.735036097448731e-06, "loss": 0.0228, "step": 18700 }, { "epoch": 4.853740616101476, "grad_norm": 2.9574930667877197, "learning_rate": 5.720654643771393e-06, "loss": 0.0251, "step": 18750 }, { "epoch": 4.866683924411079, "grad_norm": 0.7309446334838867, "learning_rate": 5.706273190094055e-06, "loss": 0.0209, "step": 18800 }, { "epoch": 4.879627232720684, "grad_norm": 3.813610553741455, "learning_rate": 5.691891736416717e-06, "loss": 0.0179, "step": 18850 }, { "epoch": 4.892570541030287, "grad_norm": 6.62191104888916, "learning_rate": 5.67751028273938e-06, "loss": 0.0229, "step": 18900 }, { "epoch": 4.905513849339892, "grad_norm": 1.3516626358032227, "learning_rate": 5.663128829062042e-06, "loss": 0.0202, "step": 18950 }, { "epoch": 4.918457157649495, "grad_norm": 8.537408828735352, "learning_rate": 5.648747375384705e-06, "loss": 0.0219, "step": 19000 }, { "epoch": 4.931400465959099, "grad_norm": 7.586127758026123, "learning_rate": 5.634365921707367e-06, "loss": 0.0263, "step": 19050 }, { "epoch": 4.944343774268703, "grad_norm": 2.9053454399108887, "learning_rate": 5.6199844680300286e-06, "loss": 0.0173, "step": 19100 }, { "epoch": 4.957287082578307, "grad_norm": 3.9602510929107666, "learning_rate": 5.60560301435269e-06, "loss": 0.0216, "step": 19150 }, { "epoch": 4.970230390887911, "grad_norm": 8.381765365600586, "learning_rate": 5.591221560675354e-06, "loss": 0.0213, "step": 19200 }, { "epoch": 4.983173699197515, "grad_norm": 3.1814215183258057, "learning_rate": 5.577127736071563e-06, "loss": 0.028, "step": 19250 }, { "epoch": 4.996117007507118, "grad_norm": 9.668580055236816, "learning_rate": 5.5627462823942245e-06, "loss": 0.0346, "step": 19300 }, { "epoch": 5.0, "eval_loss": 0.019977210089564323, "eval_runtime": 88.6374, "eval_samples_per_second": 5.641, "eval_steps_per_second": 0.711, "eval_wer": 3.1069591686784928, "step": 19315 }, { "epoch": 5.009060315816723, "grad_norm": 0.802038848400116, "learning_rate": 5.548364828716886e-06, "loss": 0.0186, "step": 19350 }, { "epoch": 5.022003624126326, "grad_norm": 3.1706273555755615, "learning_rate": 5.53398337503955e-06, "loss": 0.0236, "step": 19400 }, { "epoch": 5.034946932435931, "grad_norm": 3.518181562423706, "learning_rate": 5.519601921362212e-06, "loss": 0.0136, "step": 19450 }, { "epoch": 5.047890240745534, "grad_norm": 4.777484893798828, "learning_rate": 5.50550809675842e-06, "loss": 0.012, "step": 19500 }, { "epoch": 5.060833549055139, "grad_norm": 16.247772216796875, "learning_rate": 5.491126643081083e-06, "loss": 0.0107, "step": 19550 }, { "epoch": 5.073776857364742, "grad_norm": 1.9312938451766968, "learning_rate": 5.476745189403745e-06, "loss": 0.0086, "step": 19600 }, { "epoch": 5.086720165674346, "grad_norm": 0.5916788578033447, "learning_rate": 5.4623637357264085e-06, "loss": 0.0128, "step": 19650 }, { "epoch": 5.09966347398395, "grad_norm": 0.7647702097892761, "learning_rate": 5.44798228204907e-06, "loss": 0.0127, "step": 19700 }, { "epoch": 5.112606782293554, "grad_norm": 1.958817481994629, "learning_rate": 5.433600828371732e-06, "loss": 0.0166, "step": 19750 }, { "epoch": 5.125550090603158, "grad_norm": 5.420085430145264, "learning_rate": 5.419219374694394e-06, "loss": 0.0115, "step": 19800 }, { "epoch": 5.138493398912762, "grad_norm": 4.039155960083008, "learning_rate": 5.404837921017057e-06, "loss": 0.0099, "step": 19850 }, { "epoch": 5.151436707222366, "grad_norm": 3.975069999694824, "learning_rate": 5.390456467339719e-06, "loss": 0.0211, "step": 19900 }, { "epoch": 5.16438001553197, "grad_norm": 2.953425168991089, "learning_rate": 5.376075013662381e-06, "loss": 0.0131, "step": 19950 }, { "epoch": 5.1773233238415735, "grad_norm": 0.4892643392086029, "learning_rate": 5.361693559985044e-06, "loss": 0.0107, "step": 20000 }, { "epoch": 5.190266632151178, "grad_norm": 0.9816207885742188, "learning_rate": 5.347312106307706e-06, "loss": 0.0119, "step": 20050 }, { "epoch": 5.2032099404607814, "grad_norm": 0.6787395477294922, "learning_rate": 5.332930652630368e-06, "loss": 0.0107, "step": 20100 }, { "epoch": 5.216153248770386, "grad_norm": 2.689342737197876, "learning_rate": 5.318549198953031e-06, "loss": 0.0118, "step": 20150 }, { "epoch": 5.229096557079989, "grad_norm": 1.7226523160934448, "learning_rate": 5.304167745275693e-06, "loss": 0.0147, "step": 20200 }, { "epoch": 5.242039865389594, "grad_norm": 0.6867370009422302, "learning_rate": 5.289786291598355e-06, "loss": 0.0091, "step": 20250 }, { "epoch": 5.254983173699197, "grad_norm": 2.8953654766082764, "learning_rate": 5.275404837921018e-06, "loss": 0.0105, "step": 20300 }, { "epoch": 5.267926482008802, "grad_norm": 6.105691432952881, "learning_rate": 5.26102338424368e-06, "loss": 0.0114, "step": 20350 }, { "epoch": 5.280869790318405, "grad_norm": 2.8763232231140137, "learning_rate": 5.246641930566342e-06, "loss": 0.0152, "step": 20400 }, { "epoch": 5.293813098628009, "grad_norm": 0.7701404094696045, "learning_rate": 5.232260476889004e-06, "loss": 0.0136, "step": 20450 }, { "epoch": 5.306756406937613, "grad_norm": 5.06765604019165, "learning_rate": 5.217879023211667e-06, "loss": 0.0118, "step": 20500 }, { "epoch": 5.319699715247217, "grad_norm": 0.8552833795547485, "learning_rate": 5.203497569534329e-06, "loss": 0.0152, "step": 20550 }, { "epoch": 5.332643023556821, "grad_norm": 8.169344902038574, "learning_rate": 5.189116115856992e-06, "loss": 0.0137, "step": 20600 }, { "epoch": 5.345586331866425, "grad_norm": 2.8536713123321533, "learning_rate": 5.1747346621796535e-06, "loss": 0.0179, "step": 20650 }, { "epoch": 5.358529640176029, "grad_norm": 4.006629943847656, "learning_rate": 5.160353208502315e-06, "loss": 0.0148, "step": 20700 }, { "epoch": 5.371472948485633, "grad_norm": 2.0308613777160645, "learning_rate": 5.145971754824978e-06, "loss": 0.0096, "step": 20750 }, { "epoch": 5.3844162567952365, "grad_norm": 0.8264743089675903, "learning_rate": 5.131590301147641e-06, "loss": 0.012, "step": 20800 }, { "epoch": 5.397359565104841, "grad_norm": 1.9350370168685913, "learning_rate": 5.117208847470303e-06, "loss": 0.021, "step": 20850 }, { "epoch": 5.4103028734144445, "grad_norm": 3.08841872215271, "learning_rate": 5.102827393792965e-06, "loss": 0.01, "step": 20900 }, { "epoch": 5.423246181724049, "grad_norm": 3.0373833179473877, "learning_rate": 5.088445940115627e-06, "loss": 0.0123, "step": 20950 }, { "epoch": 5.4361894900336525, "grad_norm": 0.12145110964775085, "learning_rate": 5.074064486438289e-06, "loss": 0.0119, "step": 21000 }, { "epoch": 5.449132798343257, "grad_norm": 2.9393069744110107, "learning_rate": 5.059683032760951e-06, "loss": 0.0115, "step": 21050 }, { "epoch": 5.4620761066528605, "grad_norm": 0.9441426992416382, "learning_rate": 5.0453015790836146e-06, "loss": 0.0127, "step": 21100 }, { "epoch": 5.475019414962464, "grad_norm": 1.0003094673156738, "learning_rate": 5.030920125406276e-06, "loss": 0.0166, "step": 21150 }, { "epoch": 5.4879627232720685, "grad_norm": 3.650327682495117, "learning_rate": 5.016538671728939e-06, "loss": 0.012, "step": 21200 }, { "epoch": 5.500906031581672, "grad_norm": 2.2948007583618164, "learning_rate": 5.002157218051601e-06, "loss": 0.0204, "step": 21250 }, { "epoch": 5.5138493398912765, "grad_norm": 1.089821219444275, "learning_rate": 4.987775764374264e-06, "loss": 0.0087, "step": 21300 }, { "epoch": 5.52679264820088, "grad_norm": 2.964750051498413, "learning_rate": 4.973394310696926e-06, "loss": 0.009, "step": 21350 }, { "epoch": 5.5397359565104844, "grad_norm": 2.492793083190918, "learning_rate": 4.9590128570195875e-06, "loss": 0.013, "step": 21400 }, { "epoch": 5.552679264820088, "grad_norm": 18.94529151916504, "learning_rate": 4.94463140334225e-06, "loss": 0.0217, "step": 21450 }, { "epoch": 5.565622573129692, "grad_norm": 0.21670395135879517, "learning_rate": 4.930249949664913e-06, "loss": 0.0289, "step": 21500 }, { "epoch": 5.578565881439296, "grad_norm": 1.121397852897644, "learning_rate": 4.915868495987575e-06, "loss": 0.0089, "step": 21550 }, { "epoch": 5.5915091897488995, "grad_norm": 2.1545515060424805, "learning_rate": 4.9014870423102375e-06, "loss": 0.015, "step": 21600 }, { "epoch": 5.604452498058504, "grad_norm": 13.737982749938965, "learning_rate": 4.887105588632899e-06, "loss": 0.0217, "step": 21650 }, { "epoch": 5.6173958063681075, "grad_norm": 3.22420072555542, "learning_rate": 4.872724134955561e-06, "loss": 0.0095, "step": 21700 }, { "epoch": 5.630339114677712, "grad_norm": 2.609224557876587, "learning_rate": 4.858342681278224e-06, "loss": 0.0156, "step": 21750 }, { "epoch": 5.6432824229873155, "grad_norm": 4.490423679351807, "learning_rate": 4.843961227600887e-06, "loss": 0.0101, "step": 21800 }, { "epoch": 5.656225731296919, "grad_norm": 3.0184295177459717, "learning_rate": 4.8295797739235485e-06, "loss": 0.0126, "step": 21850 }, { "epoch": 5.6691690396065235, "grad_norm": 4.014559745788574, "learning_rate": 4.815198320246211e-06, "loss": 0.0126, "step": 21900 }, { "epoch": 5.682112347916127, "grad_norm": 7.090182304382324, "learning_rate": 4.800816866568873e-06, "loss": 0.0182, "step": 21950 }, { "epoch": 5.6950556562257315, "grad_norm": 3.3935253620147705, "learning_rate": 4.786435412891535e-06, "loss": 0.0155, "step": 22000 }, { "epoch": 5.707998964535335, "grad_norm": 3.5761795043945312, "learning_rate": 4.772053959214198e-06, "loss": 0.0102, "step": 22050 }, { "epoch": 5.7209422728449395, "grad_norm": 2.370244026184082, "learning_rate": 4.75767250553686e-06, "loss": 0.0206, "step": 22100 }, { "epoch": 5.733885581154543, "grad_norm": 0.3268122971057892, "learning_rate": 4.743291051859522e-06, "loss": 0.0143, "step": 22150 }, { "epoch": 5.7468288894641475, "grad_norm": 1.703277587890625, "learning_rate": 4.728909598182185e-06, "loss": 0.0143, "step": 22200 }, { "epoch": 5.759772197773751, "grad_norm": 2.766359329223633, "learning_rate": 4.714528144504847e-06, "loss": 0.0114, "step": 22250 }, { "epoch": 5.772715506083355, "grad_norm": 0.3259863555431366, "learning_rate": 4.700146690827509e-06, "loss": 0.0114, "step": 22300 }, { "epoch": 5.785658814392959, "grad_norm": 12.3453369140625, "learning_rate": 4.685765237150171e-06, "loss": 0.0109, "step": 22350 }, { "epoch": 5.798602122702563, "grad_norm": 4.0501179695129395, "learning_rate": 4.671383783472834e-06, "loss": 0.0134, "step": 22400 }, { "epoch": 5.811545431012167, "grad_norm": 3.24855637550354, "learning_rate": 4.657002329795496e-06, "loss": 0.015, "step": 22450 }, { "epoch": 5.824488739321771, "grad_norm": 4.302082061767578, "learning_rate": 4.642620876118159e-06, "loss": 0.0272, "step": 22500 }, { "epoch": 5.837432047631374, "grad_norm": 6.086068630218506, "learning_rate": 4.628239422440821e-06, "loss": 0.0134, "step": 22550 }, { "epoch": 5.850375355940979, "grad_norm": 0.44153615832328796, "learning_rate": 4.613857968763483e-06, "loss": 0.0168, "step": 22600 }, { "epoch": 5.863318664250582, "grad_norm": 1.0860470533370972, "learning_rate": 4.599476515086145e-06, "loss": 0.0118, "step": 22650 }, { "epoch": 5.876261972560187, "grad_norm": 3.069711923599243, "learning_rate": 4.585095061408807e-06, "loss": 0.0111, "step": 22700 }, { "epoch": 5.88920528086979, "grad_norm": 4.1225504875183105, "learning_rate": 4.57071360773147e-06, "loss": 0.0118, "step": 22750 }, { "epoch": 5.9021485891793946, "grad_norm": 1.6695607900619507, "learning_rate": 4.5563321540541325e-06, "loss": 0.0087, "step": 22800 }, { "epoch": 5.915091897488998, "grad_norm": 3.2536139488220215, "learning_rate": 4.541950700376794e-06, "loss": 0.0143, "step": 22850 }, { "epoch": 5.9280352057986025, "grad_norm": 1.4655452966690063, "learning_rate": 4.527569246699457e-06, "loss": 0.0152, "step": 22900 }, { "epoch": 5.940978514108206, "grad_norm": 4.390911102294922, "learning_rate": 4.513187793022119e-06, "loss": 0.0117, "step": 22950 }, { "epoch": 5.95392182241781, "grad_norm": 0.6409261226654053, "learning_rate": 4.498806339344781e-06, "loss": 0.014, "step": 23000 }, { "epoch": 5.966865130727414, "grad_norm": 9.11255931854248, "learning_rate": 4.4844248856674435e-06, "loss": 0.0135, "step": 23050 }, { "epoch": 5.979808439037018, "grad_norm": 3.790682554244995, "learning_rate": 4.470043431990106e-06, "loss": 0.0109, "step": 23100 }, { "epoch": 5.992751747346622, "grad_norm": 6.534693241119385, "learning_rate": 4.455661978312768e-06, "loss": 0.0138, "step": 23150 }, { "epoch": 6.0, "eval_loss": 0.013288498856127262, "eval_runtime": 88.7687, "eval_samples_per_second": 5.633, "eval_steps_per_second": 0.71, "eval_wer": 2.2567439907630944, "step": 23178 }, { "epoch": 6.005695055656226, "grad_norm": 0.7158689498901367, "learning_rate": 4.441280524635431e-06, "loss": 0.0119, "step": 23200 }, { "epoch": 6.01863836396583, "grad_norm": 2.5632243156433105, "learning_rate": 4.426899070958093e-06, "loss": 0.008, "step": 23250 }, { "epoch": 6.031581672275434, "grad_norm": 1.491152286529541, "learning_rate": 4.4125176172807545e-06, "loss": 0.0084, "step": 23300 }, { "epoch": 6.044524980585037, "grad_norm": 0.8352173566818237, "learning_rate": 4.398136163603417e-06, "loss": 0.0121, "step": 23350 }, { "epoch": 6.057468288894642, "grad_norm": 0.7601485848426819, "learning_rate": 4.38375470992608e-06, "loss": 0.0084, "step": 23400 }, { "epoch": 6.070411597204245, "grad_norm": 1.012165904045105, "learning_rate": 4.369373256248742e-06, "loss": 0.0149, "step": 23450 }, { "epoch": 6.08335490551385, "grad_norm": 0.7509778141975403, "learning_rate": 4.3549918025714045e-06, "loss": 0.0045, "step": 23500 }, { "epoch": 6.096298213823453, "grad_norm": 6.754235744476318, "learning_rate": 4.340610348894066e-06, "loss": 0.0057, "step": 23550 }, { "epoch": 6.109241522133058, "grad_norm": 0.37281331419944763, "learning_rate": 4.326228895216728e-06, "loss": 0.0057, "step": 23600 }, { "epoch": 6.122184830442661, "grad_norm": 0.06306509673595428, "learning_rate": 4.311847441539391e-06, "loss": 0.0069, "step": 23650 }, { "epoch": 6.135128138752265, "grad_norm": 0.4581661522388458, "learning_rate": 4.297465987862054e-06, "loss": 0.0055, "step": 23700 }, { "epoch": 6.148071447061869, "grad_norm": 0.8070671558380127, "learning_rate": 4.2830845341847156e-06, "loss": 0.0087, "step": 23750 }, { "epoch": 6.161014755371473, "grad_norm": 0.4274216294288635, "learning_rate": 4.268703080507378e-06, "loss": 0.016, "step": 23800 }, { "epoch": 6.173958063681077, "grad_norm": 0.6105465292930603, "learning_rate": 4.25432162683004e-06, "loss": 0.0063, "step": 23850 }, { "epoch": 6.186901371990681, "grad_norm": 0.4398168921470642, "learning_rate": 4.239940173152703e-06, "loss": 0.0052, "step": 23900 }, { "epoch": 6.199844680300285, "grad_norm": 2.373279333114624, "learning_rate": 4.225558719475365e-06, "loss": 0.008, "step": 23950 }, { "epoch": 6.212787988609889, "grad_norm": 2.742097854614258, "learning_rate": 4.2111772657980275e-06, "loss": 0.0062, "step": 24000 }, { "epoch": 6.225731296919492, "grad_norm": 1.6418052911758423, "learning_rate": 4.19679581212069e-06, "loss": 0.0056, "step": 24050 }, { "epoch": 6.238674605229097, "grad_norm": 1.0858538150787354, "learning_rate": 4.182414358443352e-06, "loss": 0.0047, "step": 24100 }, { "epoch": 6.2516179135387, "grad_norm": 1.6831880807876587, "learning_rate": 4.168032904766014e-06, "loss": 0.0045, "step": 24150 }, { "epoch": 6.264561221848305, "grad_norm": 0.5000109076499939, "learning_rate": 4.153651451088677e-06, "loss": 0.01, "step": 24200 }, { "epoch": 6.277504530157908, "grad_norm": 0.6169405579566956, "learning_rate": 4.1392699974113385e-06, "loss": 0.008, "step": 24250 }, { "epoch": 6.290447838467513, "grad_norm": 1.9843913316726685, "learning_rate": 4.124888543734001e-06, "loss": 0.0095, "step": 24300 }, { "epoch": 6.303391146777116, "grad_norm": 1.331559419631958, "learning_rate": 4.110507090056663e-06, "loss": 0.0073, "step": 24350 }, { "epoch": 6.31633445508672, "grad_norm": 0.6855106949806213, "learning_rate": 4.096125636379326e-06, "loss": 0.0054, "step": 24400 }, { "epoch": 6.329277763396324, "grad_norm": 0.35360315442085266, "learning_rate": 4.081744182701988e-06, "loss": 0.006, "step": 24450 }, { "epoch": 6.342221071705928, "grad_norm": 1.6724082231521606, "learning_rate": 4.06736272902465e-06, "loss": 0.0032, "step": 24500 }, { "epoch": 6.355164380015532, "grad_norm": 1.0881156921386719, "learning_rate": 4.052981275347312e-06, "loss": 0.0052, "step": 24550 }, { "epoch": 6.368107688325136, "grad_norm": 0.6318166255950928, "learning_rate": 4.038599821669975e-06, "loss": 0.0081, "step": 24600 }, { "epoch": 6.38105099663474, "grad_norm": 3.3334782123565674, "learning_rate": 4.024218367992637e-06, "loss": 0.0121, "step": 24650 }, { "epoch": 6.393994304944344, "grad_norm": 0.2339646816253662, "learning_rate": 4.0098369143152995e-06, "loss": 0.0072, "step": 24700 }, { "epoch": 6.406937613253948, "grad_norm": 1.0727429389953613, "learning_rate": 3.995743089711508e-06, "loss": 0.0074, "step": 24750 }, { "epoch": 6.419880921563552, "grad_norm": 5.548860549926758, "learning_rate": 3.981649265107718e-06, "loss": 0.0072, "step": 24800 }, { "epoch": 6.432824229873155, "grad_norm": 0.37892910838127136, "learning_rate": 3.9672678114303795e-06, "loss": 0.0105, "step": 24850 }, { "epoch": 6.44576753818276, "grad_norm": 0.9245821237564087, "learning_rate": 3.952886357753042e-06, "loss": 0.0059, "step": 24900 }, { "epoch": 6.458710846492363, "grad_norm": 1.4176559448242188, "learning_rate": 3.938504904075705e-06, "loss": 0.005, "step": 24950 }, { "epoch": 6.471654154801968, "grad_norm": 2.894819736480713, "learning_rate": 3.924123450398367e-06, "loss": 0.0068, "step": 25000 }, { "epoch": 6.484597463111571, "grad_norm": 1.3159211874008179, "learning_rate": 3.909741996721029e-06, "loss": 0.0068, "step": 25050 }, { "epoch": 6.497540771421175, "grad_norm": 1.8089011907577515, "learning_rate": 3.895360543043691e-06, "loss": 0.0067, "step": 25100 }, { "epoch": 6.510484079730779, "grad_norm": 1.673920750617981, "learning_rate": 3.880979089366353e-06, "loss": 0.0109, "step": 25150 }, { "epoch": 6.523427388040383, "grad_norm": 0.7830101251602173, "learning_rate": 3.866597635689016e-06, "loss": 0.0055, "step": 25200 }, { "epoch": 6.536370696349987, "grad_norm": 1.6252669095993042, "learning_rate": 3.852216182011679e-06, "loss": 0.0066, "step": 25250 }, { "epoch": 6.549314004659591, "grad_norm": 2.7717552185058594, "learning_rate": 3.8378347283343405e-06, "loss": 0.0059, "step": 25300 }, { "epoch": 6.562257312969195, "grad_norm": 7.495051383972168, "learning_rate": 3.823453274657002e-06, "loss": 0.015, "step": 25350 }, { "epoch": 6.575200621278799, "grad_norm": 0.43886011838912964, "learning_rate": 3.809071820979665e-06, "loss": 0.0046, "step": 25400 }, { "epoch": 6.588143929588403, "grad_norm": 0.8297833204269409, "learning_rate": 3.7946903673023274e-06, "loss": 0.0113, "step": 25450 }, { "epoch": 6.601087237898007, "grad_norm": 3.7396538257598877, "learning_rate": 3.7803089136249893e-06, "loss": 0.0063, "step": 25500 }, { "epoch": 6.61403054620761, "grad_norm": 3.463552236557007, "learning_rate": 3.765927459947652e-06, "loss": 0.0065, "step": 25550 }, { "epoch": 6.626973854517215, "grad_norm": 6.3341450691223145, "learning_rate": 3.7515460062703143e-06, "loss": 0.0076, "step": 25600 }, { "epoch": 6.639917162826818, "grad_norm": 1.700925350189209, "learning_rate": 3.737164552592976e-06, "loss": 0.0054, "step": 25650 }, { "epoch": 6.652860471136423, "grad_norm": 6.3853044509887695, "learning_rate": 3.722783098915639e-06, "loss": 0.0097, "step": 25700 }, { "epoch": 6.665803779446026, "grad_norm": 6.10149621963501, "learning_rate": 3.7084016452383007e-06, "loss": 0.0082, "step": 25750 }, { "epoch": 6.67874708775563, "grad_norm": 1.308225393295288, "learning_rate": 3.694020191560963e-06, "loss": 0.0099, "step": 25800 }, { "epoch": 6.691690396065234, "grad_norm": 4.188955307006836, "learning_rate": 3.6796387378836257e-06, "loss": 0.0112, "step": 25850 }, { "epoch": 6.704633704374838, "grad_norm": 1.8746811151504517, "learning_rate": 3.6652572842062876e-06, "loss": 0.0118, "step": 25900 }, { "epoch": 6.717577012684442, "grad_norm": 0.3944805860519409, "learning_rate": 3.65087583052895e-06, "loss": 0.0063, "step": 25950 }, { "epoch": 6.730520320994046, "grad_norm": 1.3446645736694336, "learning_rate": 3.6364943768516126e-06, "loss": 0.0292, "step": 26000 }, { "epoch": 6.74346362930365, "grad_norm": 10.001498222351074, "learning_rate": 3.6221129231742745e-06, "loss": 0.0078, "step": 26050 }, { "epoch": 6.756406937613254, "grad_norm": 3.632220983505249, "learning_rate": 3.607731469496937e-06, "loss": 0.0044, "step": 26100 }, { "epoch": 6.769350245922858, "grad_norm": 4.4222259521484375, "learning_rate": 3.5933500158195995e-06, "loss": 0.0093, "step": 26150 }, { "epoch": 6.782293554232462, "grad_norm": 1.0133709907531738, "learning_rate": 3.5789685621422614e-06, "loss": 0.0072, "step": 26200 }, { "epoch": 6.795236862542065, "grad_norm": 0.1933288425207138, "learning_rate": 3.564587108464924e-06, "loss": 0.0063, "step": 26250 }, { "epoch": 6.80818017085167, "grad_norm": 1.596628189086914, "learning_rate": 3.5502056547875864e-06, "loss": 0.0055, "step": 26300 }, { "epoch": 6.821123479161273, "grad_norm": 0.2668837010860443, "learning_rate": 3.5358242011102482e-06, "loss": 0.0059, "step": 26350 }, { "epoch": 6.834066787470878, "grad_norm": 1.4524122476577759, "learning_rate": 3.521442747432911e-06, "loss": 0.0144, "step": 26400 }, { "epoch": 6.847010095780481, "grad_norm": 0.7154669761657715, "learning_rate": 3.5070612937555732e-06, "loss": 0.0083, "step": 26450 }, { "epoch": 6.859953404090086, "grad_norm": 3.9259557723999023, "learning_rate": 3.492679840078235e-06, "loss": 0.0054, "step": 26500 }, { "epoch": 6.872896712399689, "grad_norm": 2.91253924369812, "learning_rate": 3.478298386400898e-06, "loss": 0.0058, "step": 26550 }, { "epoch": 6.885840020709293, "grad_norm": 0.6866968870162964, "learning_rate": 3.46391693272356e-06, "loss": 0.0089, "step": 26600 }, { "epoch": 6.898783329018897, "grad_norm": 0.4900106191635132, "learning_rate": 3.449535479046222e-06, "loss": 0.015, "step": 26650 }, { "epoch": 6.911726637328501, "grad_norm": 0.8514009118080139, "learning_rate": 3.4351540253688847e-06, "loss": 0.0071, "step": 26700 }, { "epoch": 6.924669945638105, "grad_norm": 1.1547664403915405, "learning_rate": 3.420772571691547e-06, "loss": 0.007, "step": 26750 }, { "epoch": 6.937613253947709, "grad_norm": 1.6039056777954102, "learning_rate": 3.406391118014209e-06, "loss": 0.0049, "step": 26800 }, { "epoch": 6.950556562257313, "grad_norm": 0.2261231243610382, "learning_rate": 3.3920096643368716e-06, "loss": 0.006, "step": 26850 }, { "epoch": 6.963499870566917, "grad_norm": 2.033464193344116, "learning_rate": 3.377628210659534e-06, "loss": 0.0076, "step": 26900 }, { "epoch": 6.97644317887652, "grad_norm": 2.289121150970459, "learning_rate": 3.3632467569821957e-06, "loss": 0.0114, "step": 26950 }, { "epoch": 6.989386487186125, "grad_norm": 1.8845149278640747, "learning_rate": 3.3488653033048584e-06, "loss": 0.0067, "step": 27000 }, { "epoch": 7.0, "eval_loss": 0.0072191799990832806, "eval_runtime": 89.4574, "eval_samples_per_second": 5.589, "eval_steps_per_second": 0.704, "eval_wer": 2.004828382491865, "step": 27041 }, { "epoch": 7.002329795495728, "grad_norm": 0.09408234804868698, "learning_rate": 3.3344838496275207e-06, "loss": 0.0059, "step": 27050 }, { "epoch": 7.015273103805333, "grad_norm": 0.22423428297042847, "learning_rate": 3.3201023959501826e-06, "loss": 0.0033, "step": 27100 }, { "epoch": 7.028216412114936, "grad_norm": 0.37666457891464233, "learning_rate": 3.3057209422728453e-06, "loss": 0.0047, "step": 27150 }, { "epoch": 7.041159720424541, "grad_norm": 0.46821069717407227, "learning_rate": 3.2913394885955076e-06, "loss": 0.0023, "step": 27200 }, { "epoch": 7.054103028734144, "grad_norm": 0.20124687254428864, "learning_rate": 3.2769580349181695e-06, "loss": 0.0042, "step": 27250 }, { "epoch": 7.067046337043748, "grad_norm": 0.15090468525886536, "learning_rate": 3.262576581240832e-06, "loss": 0.0026, "step": 27300 }, { "epoch": 7.079989645353352, "grad_norm": 0.24649310111999512, "learning_rate": 3.2481951275634945e-06, "loss": 0.0104, "step": 27350 }, { "epoch": 7.092932953662956, "grad_norm": 0.5062503814697266, "learning_rate": 3.2338136738861563e-06, "loss": 0.0044, "step": 27400 }, { "epoch": 7.10587626197256, "grad_norm": 1.3508222103118896, "learning_rate": 3.219432220208819e-06, "loss": 0.0038, "step": 27450 }, { "epoch": 7.118819570282164, "grad_norm": 0.41208532452583313, "learning_rate": 3.2050507665314814e-06, "loss": 0.0039, "step": 27500 }, { "epoch": 7.131762878591768, "grad_norm": 0.5694621801376343, "learning_rate": 3.1906693128541436e-06, "loss": 0.0051, "step": 27550 }, { "epoch": 7.144706186901372, "grad_norm": 0.21710887551307678, "learning_rate": 3.176287859176806e-06, "loss": 0.003, "step": 27600 }, { "epoch": 7.1576494952109755, "grad_norm": 18.37504005432129, "learning_rate": 3.1619064054994682e-06, "loss": 0.0074, "step": 27650 }, { "epoch": 7.17059280352058, "grad_norm": 0.10800693929195404, "learning_rate": 3.1475249518221305e-06, "loss": 0.0098, "step": 27700 }, { "epoch": 7.1835361118301835, "grad_norm": 0.9992019534111023, "learning_rate": 3.133143498144793e-06, "loss": 0.0037, "step": 27750 }, { "epoch": 7.196479420139788, "grad_norm": 0.5260515213012695, "learning_rate": 3.118762044467455e-06, "loss": 0.0059, "step": 27800 }, { "epoch": 7.2094227284493915, "grad_norm": 0.16195982694625854, "learning_rate": 3.1043805907901174e-06, "loss": 0.0033, "step": 27850 }, { "epoch": 7.222366036758996, "grad_norm": 0.0777490958571434, "learning_rate": 3.0899991371127797e-06, "loss": 0.0031, "step": 27900 }, { "epoch": 7.2353093450685995, "grad_norm": 2.360994577407837, "learning_rate": 3.0756176834354416e-06, "loss": 0.0042, "step": 27950 }, { "epoch": 7.248252653378204, "grad_norm": 0.7095078825950623, "learning_rate": 3.0612362297581043e-06, "loss": 0.0024, "step": 28000 }, { "epoch": 7.2611959616878075, "grad_norm": 0.8493836522102356, "learning_rate": 3.0468547760807666e-06, "loss": 0.0061, "step": 28050 }, { "epoch": 7.274139269997411, "grad_norm": 3.106424570083618, "learning_rate": 3.0324733224034284e-06, "loss": 0.0025, "step": 28100 }, { "epoch": 7.2870825783070154, "grad_norm": 0.9348524808883667, "learning_rate": 3.018091868726091e-06, "loss": 0.003, "step": 28150 }, { "epoch": 7.300025886616619, "grad_norm": 4.065819263458252, "learning_rate": 3.0037104150487534e-06, "loss": 0.0026, "step": 28200 }, { "epoch": 7.312969194926223, "grad_norm": 0.16182902455329895, "learning_rate": 2.9893289613714153e-06, "loss": 0.0035, "step": 28250 }, { "epoch": 7.325912503235827, "grad_norm": 0.36588823795318604, "learning_rate": 2.974947507694078e-06, "loss": 0.0042, "step": 28300 }, { "epoch": 7.3388558115454305, "grad_norm": 0.47314420342445374, "learning_rate": 2.9605660540167403e-06, "loss": 0.0033, "step": 28350 }, { "epoch": 7.351799119855035, "grad_norm": 7.304609298706055, "learning_rate": 2.946184600339402e-06, "loss": 0.0028, "step": 28400 }, { "epoch": 7.3647424281646385, "grad_norm": 0.591309130191803, "learning_rate": 2.931803146662065e-06, "loss": 0.0057, "step": 28450 }, { "epoch": 7.377685736474243, "grad_norm": 0.7642752528190613, "learning_rate": 2.917421692984727e-06, "loss": 0.0038, "step": 28500 }, { "epoch": 7.3906290447838465, "grad_norm": 0.9871138334274292, "learning_rate": 2.903040239307389e-06, "loss": 0.0038, "step": 28550 }, { "epoch": 7.403572353093451, "grad_norm": 0.7224917411804199, "learning_rate": 2.8886587856300518e-06, "loss": 0.0057, "step": 28600 }, { "epoch": 7.4165156614030545, "grad_norm": 0.7096822261810303, "learning_rate": 2.874277331952714e-06, "loss": 0.0031, "step": 28650 }, { "epoch": 7.429458969712659, "grad_norm": 1.5942363739013672, "learning_rate": 2.859895878275376e-06, "loss": 0.0041, "step": 28700 }, { "epoch": 7.4424022780222625, "grad_norm": 0.6390734910964966, "learning_rate": 2.8455144245980386e-06, "loss": 0.0031, "step": 28750 }, { "epoch": 7.455345586331866, "grad_norm": 0.8184775114059448, "learning_rate": 2.831132970920701e-06, "loss": 0.0088, "step": 28800 }, { "epoch": 7.4682888946414705, "grad_norm": 0.07598695158958435, "learning_rate": 2.816751517243363e-06, "loss": 0.0033, "step": 28850 }, { "epoch": 7.481232202951074, "grad_norm": 0.4833034873008728, "learning_rate": 2.8023700635660255e-06, "loss": 0.0045, "step": 28900 }, { "epoch": 7.4941755112606785, "grad_norm": 0.30186623334884644, "learning_rate": 2.787988609888688e-06, "loss": 0.0053, "step": 28950 }, { "epoch": 7.507118819570282, "grad_norm": 0.9921897053718567, "learning_rate": 2.7736071562113497e-06, "loss": 0.0026, "step": 29000 }, { "epoch": 7.5200621278798865, "grad_norm": 2.7632157802581787, "learning_rate": 2.7592257025340124e-06, "loss": 0.0123, "step": 29050 }, { "epoch": 7.53300543618949, "grad_norm": 0.11869651824235916, "learning_rate": 2.7448442488566747e-06, "loss": 0.0027, "step": 29100 }, { "epoch": 7.545948744499094, "grad_norm": 0.14628329873085022, "learning_rate": 2.7304627951793374e-06, "loss": 0.003, "step": 29150 }, { "epoch": 7.558892052808698, "grad_norm": 0.4393390715122223, "learning_rate": 2.7160813415019993e-06, "loss": 0.0026, "step": 29200 }, { "epoch": 7.571835361118302, "grad_norm": 2.097261428833008, "learning_rate": 2.7016998878246616e-06, "loss": 0.0026, "step": 29250 }, { "epoch": 7.584778669427906, "grad_norm": 0.23214209079742432, "learning_rate": 2.6873184341473243e-06, "loss": 0.0152, "step": 29300 }, { "epoch": 7.59772197773751, "grad_norm": 6.099156379699707, "learning_rate": 2.672936980469986e-06, "loss": 0.0061, "step": 29350 }, { "epoch": 7.610665286047114, "grad_norm": 1.0887069702148438, "learning_rate": 2.6585555267926484e-06, "loss": 0.0119, "step": 29400 }, { "epoch": 7.623608594356718, "grad_norm": 1.0223588943481445, "learning_rate": 2.644174073115311e-06, "loss": 0.0027, "step": 29450 }, { "epoch": 7.636551902666321, "grad_norm": 6.282520771026611, "learning_rate": 2.629792619437973e-06, "loss": 0.0035, "step": 29500 }, { "epoch": 7.6494952109759256, "grad_norm": 0.21501053869724274, "learning_rate": 2.6154111657606353e-06, "loss": 0.009, "step": 29550 }, { "epoch": 7.662438519285529, "grad_norm": 1.1203105449676514, "learning_rate": 2.6013173411568443e-06, "loss": 0.006, "step": 29600 }, { "epoch": 7.6753818275951335, "grad_norm": 0.24988429248332977, "learning_rate": 2.586935887479506e-06, "loss": 0.0026, "step": 29650 }, { "epoch": 7.688325135904737, "grad_norm": 0.8392144441604614, "learning_rate": 2.572554433802169e-06, "loss": 0.0047, "step": 29700 }, { "epoch": 7.7012684442143415, "grad_norm": 0.7785108089447021, "learning_rate": 2.5581729801248312e-06, "loss": 0.0037, "step": 29750 }, { "epoch": 7.714211752523945, "grad_norm": 0.2849646806716919, "learning_rate": 2.543791526447493e-06, "loss": 0.0027, "step": 29800 }, { "epoch": 7.7271550608335495, "grad_norm": 0.3449094593524933, "learning_rate": 2.529410072770156e-06, "loss": 0.0114, "step": 29850 }, { "epoch": 7.740098369143153, "grad_norm": 0.7601585984230042, "learning_rate": 2.515028619092818e-06, "loss": 0.0024, "step": 29900 }, { "epoch": 7.753041677452757, "grad_norm": 0.6022003889083862, "learning_rate": 2.50064716541548e-06, "loss": 0.005, "step": 29950 }, { "epoch": 7.765984985762361, "grad_norm": 0.08920400589704514, "learning_rate": 2.4862657117381427e-06, "loss": 0.0025, "step": 30000 }, { "epoch": 7.778928294071965, "grad_norm": 0.5146584510803223, "learning_rate": 2.471884258060805e-06, "loss": 0.0035, "step": 30050 }, { "epoch": 7.791871602381569, "grad_norm": 0.6136813759803772, "learning_rate": 2.4575028043834673e-06, "loss": 0.0033, "step": 30100 }, { "epoch": 7.804814910691173, "grad_norm": 5.361100673675537, "learning_rate": 2.4431213507061295e-06, "loss": 0.0039, "step": 30150 }, { "epoch": 7.817758219000776, "grad_norm": 0.617695152759552, "learning_rate": 2.428739897028792e-06, "loss": 0.0026, "step": 30200 }, { "epoch": 7.830701527310381, "grad_norm": 0.42767393589019775, "learning_rate": 2.414358443351454e-06, "loss": 0.0022, "step": 30250 }, { "epoch": 7.843644835619984, "grad_norm": 0.09423399716615677, "learning_rate": 2.3999769896741164e-06, "loss": 0.0038, "step": 30300 }, { "epoch": 7.856588143929589, "grad_norm": 0.18421663343906403, "learning_rate": 2.3855955359967787e-06, "loss": 0.0048, "step": 30350 }, { "epoch": 7.869531452239192, "grad_norm": 0.05506595969200134, "learning_rate": 2.371214082319441e-06, "loss": 0.0034, "step": 30400 }, { "epoch": 7.882474760548797, "grad_norm": 1.0411120653152466, "learning_rate": 2.3568326286421033e-06, "loss": 0.0065, "step": 30450 }, { "epoch": 7.8954180688584, "grad_norm": 0.05043673887848854, "learning_rate": 2.3424511749647656e-06, "loss": 0.0018, "step": 30500 }, { "epoch": 7.908361377168005, "grad_norm": 0.8202661275863647, "learning_rate": 2.328069721287428e-06, "loss": 0.0107, "step": 30550 }, { "epoch": 7.921304685477608, "grad_norm": 1.809882640838623, "learning_rate": 2.31368826761009e-06, "loss": 0.0038, "step": 30600 }, { "epoch": 7.934247993787212, "grad_norm": 0.9416866898536682, "learning_rate": 2.2993068139327525e-06, "loss": 0.0031, "step": 30650 }, { "epoch": 7.947191302096816, "grad_norm": 0.5891124606132507, "learning_rate": 2.2849253602554148e-06, "loss": 0.0023, "step": 30700 }, { "epoch": 7.96013461040642, "grad_norm": 0.15361438691616058, "learning_rate": 2.270543906578077e-06, "loss": 0.0021, "step": 30750 }, { "epoch": 7.973077918716024, "grad_norm": 1.8306443691253662, "learning_rate": 2.2561624529007393e-06, "loss": 0.0049, "step": 30800 }, { "epoch": 7.986021227025628, "grad_norm": 0.06569012254476547, "learning_rate": 2.2417809992234016e-06, "loss": 0.0042, "step": 30850 }, { "epoch": 7.998964535335231, "grad_norm": 0.14215713739395142, "learning_rate": 2.227399545546064e-06, "loss": 0.0021, "step": 30900 }, { "epoch": 8.0, "eval_loss": 0.0042533595114946365, "eval_runtime": 89.1255, "eval_samples_per_second": 5.61, "eval_steps_per_second": 0.707, "eval_wer": 1.490500682271439, "step": 30904 }, { "epoch": 8.011907843644835, "grad_norm": 0.9032062292098999, "learning_rate": 2.2130180918687262e-06, "loss": 0.0022, "step": 30950 }, { "epoch": 8.02485115195444, "grad_norm": 0.09088978916406631, "learning_rate": 2.1986366381913885e-06, "loss": 0.0016, "step": 31000 }, { "epoch": 8.037794460264044, "grad_norm": 0.1776304394006729, "learning_rate": 2.184255184514051e-06, "loss": 0.0063, "step": 31050 }, { "epoch": 8.050737768573647, "grad_norm": 1.3590532541275024, "learning_rate": 2.169873730836713e-06, "loss": 0.0018, "step": 31100 }, { "epoch": 8.06368107688325, "grad_norm": 2.1883018016815186, "learning_rate": 2.1554922771593754e-06, "loss": 0.0028, "step": 31150 }, { "epoch": 8.076624385192856, "grad_norm": 3.1580357551574707, "learning_rate": 2.1411108234820377e-06, "loss": 0.0089, "step": 31200 }, { "epoch": 8.08956769350246, "grad_norm": 0.19922618567943573, "learning_rate": 2.1267293698047e-06, "loss": 0.0016, "step": 31250 }, { "epoch": 8.102511001812063, "grad_norm": 0.3133656084537506, "learning_rate": 2.1123479161273623e-06, "loss": 0.001, "step": 31300 }, { "epoch": 8.115454310121667, "grad_norm": 0.1543108969926834, "learning_rate": 2.0979664624500245e-06, "loss": 0.0071, "step": 31350 }, { "epoch": 8.12839761843127, "grad_norm": 0.06812497228384018, "learning_rate": 2.083585008772687e-06, "loss": 0.0057, "step": 31400 }, { "epoch": 8.141340926740876, "grad_norm": 0.7921668887138367, "learning_rate": 2.069203555095349e-06, "loss": 0.0019, "step": 31450 }, { "epoch": 8.15428423505048, "grad_norm": 0.7293940782546997, "learning_rate": 2.0548221014180114e-06, "loss": 0.0024, "step": 31500 }, { "epoch": 8.167227543360083, "grad_norm": 0.2699018120765686, "learning_rate": 2.0407282768142205e-06, "loss": 0.0036, "step": 31550 }, { "epoch": 8.180170851669686, "grad_norm": 1.0701220035552979, "learning_rate": 2.026346823136883e-06, "loss": 0.0021, "step": 31600 }, { "epoch": 8.193114159979292, "grad_norm": 0.35062670707702637, "learning_rate": 2.011965369459545e-06, "loss": 0.0013, "step": 31650 }, { "epoch": 8.206057468288895, "grad_norm": 2.343193531036377, "learning_rate": 1.9975839157822073e-06, "loss": 0.005, "step": 31700 }, { "epoch": 8.219000776598499, "grad_norm": 0.1934152990579605, "learning_rate": 1.9832024621048696e-06, "loss": 0.0042, "step": 31750 }, { "epoch": 8.231944084908102, "grad_norm": 1.2814443111419678, "learning_rate": 1.968821008427532e-06, "loss": 0.0031, "step": 31800 }, { "epoch": 8.244887393217706, "grad_norm": 0.23100686073303223, "learning_rate": 1.954439554750194e-06, "loss": 0.001, "step": 31850 }, { "epoch": 8.257830701527311, "grad_norm": 1.0474891662597656, "learning_rate": 1.9400581010728565e-06, "loss": 0.0017, "step": 31900 }, { "epoch": 8.270774009836915, "grad_norm": 0.1752719134092331, "learning_rate": 1.9256766473955188e-06, "loss": 0.0025, "step": 31950 }, { "epoch": 8.283717318146518, "grad_norm": 0.07388792932033539, "learning_rate": 1.911295193718181e-06, "loss": 0.0012, "step": 32000 }, { "epoch": 8.296660626456122, "grad_norm": 0.2670608460903168, "learning_rate": 1.8969137400408436e-06, "loss": 0.0027, "step": 32050 }, { "epoch": 8.309603934765725, "grad_norm": 0.19774726033210754, "learning_rate": 1.8825322863635057e-06, "loss": 0.0016, "step": 32100 }, { "epoch": 8.32254724307533, "grad_norm": 0.5155441761016846, "learning_rate": 1.868150832686168e-06, "loss": 0.0028, "step": 32150 }, { "epoch": 8.335490551384934, "grad_norm": 0.2909785509109497, "learning_rate": 1.8537693790088305e-06, "loss": 0.0013, "step": 32200 }, { "epoch": 8.348433859694538, "grad_norm": 0.20075345039367676, "learning_rate": 1.8393879253314927e-06, "loss": 0.0017, "step": 32250 }, { "epoch": 8.361377168004141, "grad_norm": 0.44085758924484253, "learning_rate": 1.8250064716541548e-06, "loss": 0.0022, "step": 32300 }, { "epoch": 8.374320476313747, "grad_norm": 1.4371730089187622, "learning_rate": 1.8106250179768173e-06, "loss": 0.0013, "step": 32350 }, { "epoch": 8.38726378462335, "grad_norm": 0.15547557175159454, "learning_rate": 1.7962435642994796e-06, "loss": 0.0016, "step": 32400 }, { "epoch": 8.400207092932954, "grad_norm": 0.3929384648799896, "learning_rate": 1.7818621106221417e-06, "loss": 0.0013, "step": 32450 }, { "epoch": 8.413150401242557, "grad_norm": 0.329222172498703, "learning_rate": 1.7674806569448042e-06, "loss": 0.0016, "step": 32500 }, { "epoch": 8.42609370955216, "grad_norm": 0.24657496809959412, "learning_rate": 1.7530992032674665e-06, "loss": 0.0033, "step": 32550 }, { "epoch": 8.439037017861766, "grad_norm": 0.1905100792646408, "learning_rate": 1.7387177495901286e-06, "loss": 0.0011, "step": 32600 }, { "epoch": 8.45198032617137, "grad_norm": 0.06774311512708664, "learning_rate": 1.724336295912791e-06, "loss": 0.0028, "step": 32650 }, { "epoch": 8.464923634480973, "grad_norm": 0.20226095616817474, "learning_rate": 1.7099548422354534e-06, "loss": 0.0014, "step": 32700 }, { "epoch": 8.477866942790577, "grad_norm": 0.5388538241386414, "learning_rate": 1.6955733885581155e-06, "loss": 0.0085, "step": 32750 }, { "epoch": 8.49081025110018, "grad_norm": 0.04724876210093498, "learning_rate": 1.681191934880778e-06, "loss": 0.0015, "step": 32800 }, { "epoch": 8.503753559409786, "grad_norm": 0.1351761817932129, "learning_rate": 1.6668104812034402e-06, "loss": 0.002, "step": 32850 }, { "epoch": 8.51669686771939, "grad_norm": 0.16377896070480347, "learning_rate": 1.6524290275261023e-06, "loss": 0.0014, "step": 32900 }, { "epoch": 8.529640176028993, "grad_norm": 0.0653943344950676, "learning_rate": 1.6380475738487648e-06, "loss": 0.0016, "step": 32950 }, { "epoch": 8.542583484338596, "grad_norm": 0.4834960699081421, "learning_rate": 1.6236661201714271e-06, "loss": 0.0034, "step": 33000 }, { "epoch": 8.555526792648202, "grad_norm": 0.437788188457489, "learning_rate": 1.6092846664940894e-06, "loss": 0.0052, "step": 33050 }, { "epoch": 8.568470100957805, "grad_norm": 2.195469617843628, "learning_rate": 1.5949032128167515e-06, "loss": 0.0036, "step": 33100 }, { "epoch": 8.581413409267409, "grad_norm": 0.12040536105632782, "learning_rate": 1.580521759139414e-06, "loss": 0.0031, "step": 33150 }, { "epoch": 8.594356717577012, "grad_norm": 0.2154337614774704, "learning_rate": 1.5661403054620763e-06, "loss": 0.0025, "step": 33200 }, { "epoch": 8.607300025886616, "grad_norm": 0.1478249430656433, "learning_rate": 1.5517588517847384e-06, "loss": 0.0022, "step": 33250 }, { "epoch": 8.620243334196221, "grad_norm": 0.16750039160251617, "learning_rate": 1.5373773981074009e-06, "loss": 0.0021, "step": 33300 }, { "epoch": 8.633186642505825, "grad_norm": 0.38158321380615234, "learning_rate": 1.5229959444300632e-06, "loss": 0.001, "step": 33350 }, { "epoch": 8.646129950815428, "grad_norm": 1.1184005737304688, "learning_rate": 1.5086144907527252e-06, "loss": 0.007, "step": 33400 }, { "epoch": 8.659073259125032, "grad_norm": 1.4656065702438354, "learning_rate": 1.4942330370753877e-06, "loss": 0.0013, "step": 33450 }, { "epoch": 8.672016567434635, "grad_norm": 0.3482512831687927, "learning_rate": 1.47985158339805e-06, "loss": 0.0024, "step": 33500 }, { "epoch": 8.68495987574424, "grad_norm": 0.04078083485364914, "learning_rate": 1.4654701297207121e-06, "loss": 0.0021, "step": 33550 }, { "epoch": 8.697903184053844, "grad_norm": 0.1383834183216095, "learning_rate": 1.4510886760433746e-06, "loss": 0.0025, "step": 33600 }, { "epoch": 8.710846492363448, "grad_norm": 0.1986149251461029, "learning_rate": 1.436707222366037e-06, "loss": 0.0011, "step": 33650 }, { "epoch": 8.723789800673051, "grad_norm": 0.5224486589431763, "learning_rate": 1.422325768688699e-06, "loss": 0.002, "step": 33700 }, { "epoch": 8.736733108982657, "grad_norm": 0.14714999496936798, "learning_rate": 1.4079443150113615e-06, "loss": 0.0024, "step": 33750 }, { "epoch": 8.74967641729226, "grad_norm": 0.07352601736783981, "learning_rate": 1.3935628613340238e-06, "loss": 0.0025, "step": 33800 }, { "epoch": 8.762619725601864, "grad_norm": 0.04641982167959213, "learning_rate": 1.379181407656686e-06, "loss": 0.0011, "step": 33850 }, { "epoch": 8.775563033911467, "grad_norm": 0.20494569838047028, "learning_rate": 1.3647999539793484e-06, "loss": 0.0014, "step": 33900 }, { "epoch": 8.788506342221071, "grad_norm": 0.3108866214752197, "learning_rate": 1.3504185003020107e-06, "loss": 0.0031, "step": 33950 }, { "epoch": 8.801449650530676, "grad_norm": 1.0901867151260376, "learning_rate": 1.336037046624673e-06, "loss": 0.006, "step": 34000 }, { "epoch": 8.81439295884028, "grad_norm": 0.1557256281375885, "learning_rate": 1.3216555929473352e-06, "loss": 0.002, "step": 34050 }, { "epoch": 8.827336267149883, "grad_norm": 0.32064932584762573, "learning_rate": 1.3072741392699975e-06, "loss": 0.0016, "step": 34100 }, { "epoch": 8.840279575459487, "grad_norm": 0.035750892013311386, "learning_rate": 1.2928926855926598e-06, "loss": 0.0016, "step": 34150 }, { "epoch": 8.853222883769092, "grad_norm": 0.11652498692274094, "learning_rate": 1.278511231915322e-06, "loss": 0.0017, "step": 34200 }, { "epoch": 8.866166192078696, "grad_norm": 0.22541067004203796, "learning_rate": 1.2641297782379844e-06, "loss": 0.002, "step": 34250 }, { "epoch": 8.8791095003883, "grad_norm": 0.09035801142454147, "learning_rate": 1.2497483245606467e-06, "loss": 0.0024, "step": 34300 }, { "epoch": 8.892052808697903, "grad_norm": 0.8051474690437317, "learning_rate": 1.235366870883309e-06, "loss": 0.0015, "step": 34350 }, { "epoch": 8.904996117007506, "grad_norm": 0.12439941614866257, "learning_rate": 1.2209854172059713e-06, "loss": 0.0014, "step": 34400 }, { "epoch": 8.917939425317112, "grad_norm": 0.0911746621131897, "learning_rate": 1.2066039635286336e-06, "loss": 0.0016, "step": 34450 }, { "epoch": 8.930882733626715, "grad_norm": 0.10455431789159775, "learning_rate": 1.1922225098512959e-06, "loss": 0.0035, "step": 34500 }, { "epoch": 8.943826041936319, "grad_norm": 0.0844273790717125, "learning_rate": 1.1778410561739582e-06, "loss": 0.0012, "step": 34550 }, { "epoch": 8.956769350245922, "grad_norm": 0.1838880330324173, "learning_rate": 1.1634596024966204e-06, "loss": 0.003, "step": 34600 }, { "epoch": 8.969712658555526, "grad_norm": 1.8350099325180054, "learning_rate": 1.1490781488192827e-06, "loss": 0.0018, "step": 34650 }, { "epoch": 8.982655966865131, "grad_norm": 0.18555675446987152, "learning_rate": 1.134696695141945e-06, "loss": 0.0015, "step": 34700 }, { "epoch": 8.995599275174735, "grad_norm": 0.14013200998306274, "learning_rate": 1.1203152414646073e-06, "loss": 0.0011, "step": 34750 }, { "epoch": 9.0, "eval_loss": 0.0022843414917588234, "eval_runtime": 89.3025, "eval_samples_per_second": 5.599, "eval_steps_per_second": 0.705, "eval_wer": 1.1861026556103704, "step": 34767 }, { "epoch": 9.008542583484338, "grad_norm": 0.13073372840881348, "learning_rate": 1.1059337877872696e-06, "loss": 0.0014, "step": 34800 }, { "epoch": 9.021485891793942, "grad_norm": 0.13120625913143158, "learning_rate": 1.091552334109932e-06, "loss": 0.0007, "step": 34850 }, { "epoch": 9.034429200103547, "grad_norm": 0.15788908302783966, "learning_rate": 1.0771708804325942e-06, "loss": 0.0012, "step": 34900 }, { "epoch": 9.04737250841315, "grad_norm": 0.1373605579137802, "learning_rate": 1.0627894267552565e-06, "loss": 0.004, "step": 34950 }, { "epoch": 9.060315816722754, "grad_norm": 0.09479701519012451, "learning_rate": 1.0484079730779188e-06, "loss": 0.0021, "step": 35000 }, { "epoch": 9.073259125032358, "grad_norm": 0.1045953705906868, "learning_rate": 1.034026519400581e-06, "loss": 0.0014, "step": 35050 }, { "epoch": 9.086202433341962, "grad_norm": 0.07537753134965897, "learning_rate": 1.0196450657232434e-06, "loss": 0.0008, "step": 35100 }, { "epoch": 9.099145741651567, "grad_norm": 0.5165144801139832, "learning_rate": 1.0052636120459057e-06, "loss": 0.002, "step": 35150 }, { "epoch": 9.11208904996117, "grad_norm": 0.13497541844844818, "learning_rate": 9.90882158368568e-07, "loss": 0.0009, "step": 35200 }, { "epoch": 9.125032358270774, "grad_norm": 0.06942334771156311, "learning_rate": 9.765007046912302e-07, "loss": 0.0012, "step": 35250 }, { "epoch": 9.137975666580378, "grad_norm": 0.19452160596847534, "learning_rate": 9.621192510138925e-07, "loss": 0.0014, "step": 35300 }, { "epoch": 9.150918974889981, "grad_norm": 0.12441800534725189, "learning_rate": 9.477377973365548e-07, "loss": 0.0009, "step": 35350 }, { "epoch": 9.163862283199586, "grad_norm": 0.1729976236820221, "learning_rate": 9.333563436592172e-07, "loss": 0.0008, "step": 35400 }, { "epoch": 9.17680559150919, "grad_norm": 0.12662172317504883, "learning_rate": 9.189748899818794e-07, "loss": 0.0015, "step": 35450 }, { "epoch": 9.189748899818794, "grad_norm": 0.07149961590766907, "learning_rate": 9.048810653780884e-07, "loss": 0.0017, "step": 35500 }, { "epoch": 9.202692208128397, "grad_norm": 0.09453389793634415, "learning_rate": 8.904996117007508e-07, "loss": 0.0008, "step": 35550 }, { "epoch": 9.215635516438002, "grad_norm": 0.06293300539255142, "learning_rate": 8.76118158023413e-07, "loss": 0.0009, "step": 35600 }, { "epoch": 9.228578824747606, "grad_norm": 0.09109367430210114, "learning_rate": 8.617367043460753e-07, "loss": 0.0007, "step": 35650 }, { "epoch": 9.24152213305721, "grad_norm": 0.12085200101137161, "learning_rate": 8.473552506687377e-07, "loss": 0.0018, "step": 35700 }, { "epoch": 9.254465441366813, "grad_norm": 0.11523808538913727, "learning_rate": 8.329737969913999e-07, "loss": 0.0032, "step": 35750 }, { "epoch": 9.267408749676417, "grad_norm": 0.28072428703308105, "learning_rate": 8.185923433140623e-07, "loss": 0.0008, "step": 35800 }, { "epoch": 9.280352057986022, "grad_norm": 0.08222024142742157, "learning_rate": 8.042108896367246e-07, "loss": 0.0007, "step": 35850 }, { "epoch": 9.293295366295625, "grad_norm": 0.29703882336616516, "learning_rate": 7.898294359593868e-07, "loss": 0.0011, "step": 35900 }, { "epoch": 9.306238674605229, "grad_norm": 0.19992585480213165, "learning_rate": 7.754479822820492e-07, "loss": 0.0007, "step": 35950 }, { "epoch": 9.319181982914833, "grad_norm": 0.04905041307210922, "learning_rate": 7.610665286047115e-07, "loss": 0.0009, "step": 36000 }, { "epoch": 9.332125291224436, "grad_norm": 0.15756992995738983, "learning_rate": 7.466850749273736e-07, "loss": 0.005, "step": 36050 }, { "epoch": 9.345068599534041, "grad_norm": 0.18442897498607635, "learning_rate": 7.32303621250036e-07, "loss": 0.0009, "step": 36100 }, { "epoch": 9.358011907843645, "grad_norm": 0.06329531967639923, "learning_rate": 7.179221675726982e-07, "loss": 0.0011, "step": 36150 }, { "epoch": 9.370955216153249, "grad_norm": 0.11927127093076706, "learning_rate": 7.035407138953606e-07, "loss": 0.0007, "step": 36200 }, { "epoch": 9.383898524462852, "grad_norm": 0.06844917684793472, "learning_rate": 6.891592602180229e-07, "loss": 0.0018, "step": 36250 }, { "epoch": 9.396841832772457, "grad_norm": 0.08744735270738602, "learning_rate": 6.747778065406851e-07, "loss": 0.0043, "step": 36300 }, { "epoch": 9.409785141082061, "grad_norm": 0.18723489344120026, "learning_rate": 6.603963528633475e-07, "loss": 0.0007, "step": 36350 }, { "epoch": 9.422728449391665, "grad_norm": 0.086359903216362, "learning_rate": 6.460148991860098e-07, "loss": 0.0039, "step": 36400 }, { "epoch": 9.435671757701268, "grad_norm": 0.10816863179206848, "learning_rate": 6.31633445508672e-07, "loss": 0.0015, "step": 36450 }, { "epoch": 9.448615066010872, "grad_norm": 0.09978567808866501, "learning_rate": 6.172519918313344e-07, "loss": 0.0054, "step": 36500 }, { "epoch": 9.461558374320477, "grad_norm": 0.07749635726213455, "learning_rate": 6.028705381539967e-07, "loss": 0.0013, "step": 36550 }, { "epoch": 9.47450168263008, "grad_norm": 0.18575559556484222, "learning_rate": 5.88489084476659e-07, "loss": 0.0007, "step": 36600 }, { "epoch": 9.487444990939684, "grad_norm": 0.3773919641971588, "learning_rate": 5.741076307993213e-07, "loss": 0.0019, "step": 36650 }, { "epoch": 9.500388299249288, "grad_norm": 0.13482239842414856, "learning_rate": 5.597261771219835e-07, "loss": 0.0006, "step": 36700 }, { "epoch": 9.513331607558893, "grad_norm": 0.16534963250160217, "learning_rate": 5.453447234446458e-07, "loss": 0.0007, "step": 36750 }, { "epoch": 9.526274915868497, "grad_norm": 0.22458544373512268, "learning_rate": 5.309632697673081e-07, "loss": 0.0013, "step": 36800 }, { "epoch": 9.5392182241781, "grad_norm": 0.07351688295602798, "learning_rate": 5.165818160899704e-07, "loss": 0.0019, "step": 36850 }, { "epoch": 9.552161532487704, "grad_norm": 0.7084305286407471, "learning_rate": 5.022003624126327e-07, "loss": 0.0024, "step": 36900 }, { "epoch": 9.565104840797307, "grad_norm": 0.09942048788070679, "learning_rate": 4.87818908735295e-07, "loss": 0.0019, "step": 36950 }, { "epoch": 9.578048149106912, "grad_norm": 1.1455363035202026, "learning_rate": 4.734374550579573e-07, "loss": 0.0013, "step": 37000 }, { "epoch": 9.590991457416516, "grad_norm": 0.07915141433477402, "learning_rate": 4.590560013806196e-07, "loss": 0.0081, "step": 37050 }, { "epoch": 9.60393476572612, "grad_norm": 0.1777876317501068, "learning_rate": 4.4467454770328193e-07, "loss": 0.0014, "step": 37100 }, { "epoch": 9.616878074035723, "grad_norm": 0.10249053686857224, "learning_rate": 4.3029309402594417e-07, "loss": 0.0007, "step": 37150 }, { "epoch": 9.629821382345327, "grad_norm": 0.10651733726263046, "learning_rate": 4.1591164034860646e-07, "loss": 0.0007, "step": 37200 }, { "epoch": 9.642764690654932, "grad_norm": 0.03686549514532089, "learning_rate": 4.0153018667126875e-07, "loss": 0.0046, "step": 37250 }, { "epoch": 9.655707998964536, "grad_norm": 0.09571921825408936, "learning_rate": 3.871487329939311e-07, "loss": 0.0009, "step": 37300 }, { "epoch": 9.66865130727414, "grad_norm": 0.08373486995697021, "learning_rate": 3.727672793165934e-07, "loss": 0.0013, "step": 37350 }, { "epoch": 9.681594615583743, "grad_norm": 3.18973708152771, "learning_rate": 3.583858256392556e-07, "loss": 0.0013, "step": 37400 }, { "epoch": 9.694537923893346, "grad_norm": 0.06650124490261078, "learning_rate": 3.440043719619179e-07, "loss": 0.0014, "step": 37450 }, { "epoch": 9.707481232202952, "grad_norm": 0.04685758426785469, "learning_rate": 3.2962291828458026e-07, "loss": 0.0011, "step": 37500 }, { "epoch": 9.720424540512555, "grad_norm": 0.23590688407421112, "learning_rate": 3.1524146460724255e-07, "loss": 0.0009, "step": 37550 }, { "epoch": 9.733367848822159, "grad_norm": 0.6385647058486938, "learning_rate": 3.0086001092990484e-07, "loss": 0.0007, "step": 37600 }, { "epoch": 9.746311157131762, "grad_norm": 0.9988199472427368, "learning_rate": 2.8647855725256713e-07, "loss": 0.0011, "step": 37650 }, { "epoch": 9.759254465441368, "grad_norm": 0.09181234240531921, "learning_rate": 2.720971035752294e-07, "loss": 0.001, "step": 37700 }, { "epoch": 9.772197773750971, "grad_norm": 0.12787938117980957, "learning_rate": 2.577156498978917e-07, "loss": 0.0007, "step": 37750 }, { "epoch": 9.785141082060575, "grad_norm": 0.24333082139492035, "learning_rate": 2.43334196220554e-07, "loss": 0.0017, "step": 37800 }, { "epoch": 9.798084390370178, "grad_norm": 0.11896287649869919, "learning_rate": 2.2895274254321627e-07, "loss": 0.0038, "step": 37850 }, { "epoch": 9.811027698679782, "grad_norm": 0.9559854865074158, "learning_rate": 2.145712888658786e-07, "loss": 0.0008, "step": 37900 }, { "epoch": 9.823971006989387, "grad_norm": 0.10168687999248505, "learning_rate": 2.0018983518854086e-07, "loss": 0.0009, "step": 37950 }, { "epoch": 9.83691431529899, "grad_norm": 0.5217211246490479, "learning_rate": 1.8580838151120317e-07, "loss": 0.0008, "step": 38000 }, { "epoch": 9.849857623608594, "grad_norm": 0.03806522488594055, "learning_rate": 1.7142692783386547e-07, "loss": 0.0022, "step": 38050 }, { "epoch": 9.862800931918198, "grad_norm": 0.05464790016412735, "learning_rate": 1.5704547415652776e-07, "loss": 0.0007, "step": 38100 }, { "epoch": 9.875744240227803, "grad_norm": 0.16794097423553467, "learning_rate": 1.4266402047919005e-07, "loss": 0.0008, "step": 38150 }, { "epoch": 9.888687548537407, "grad_norm": 0.4726152718067169, "learning_rate": 1.2828256680185234e-07, "loss": 0.0008, "step": 38200 }, { "epoch": 9.90163085684701, "grad_norm": 0.09927275031805038, "learning_rate": 1.1390111312451463e-07, "loss": 0.0009, "step": 38250 }, { "epoch": 9.914574165156614, "grad_norm": 0.07914838194847107, "learning_rate": 9.951965944717692e-08, "loss": 0.0013, "step": 38300 }, { "epoch": 9.927517473466217, "grad_norm": 2.0464367866516113, "learning_rate": 8.513820576983922e-08, "loss": 0.0014, "step": 38350 }, { "epoch": 9.940460781775823, "grad_norm": 0.0418660007417202, "learning_rate": 7.075675209250152e-08, "loss": 0.0008, "step": 38400 }, { "epoch": 9.953404090085426, "grad_norm": 0.08231345564126968, "learning_rate": 5.637529841516381e-08, "loss": 0.0007, "step": 38450 }, { "epoch": 9.96634739839503, "grad_norm": 0.05043479800224304, "learning_rate": 4.19938447378261e-08, "loss": 0.0009, "step": 38500 }, { "epoch": 9.979290706704633, "grad_norm": 0.11412689834833145, "learning_rate": 2.7612391060488395e-08, "loss": 0.0008, "step": 38550 }, { "epoch": 9.992234015014237, "grad_norm": 0.06143497675657272, "learning_rate": 1.323093738315069e-08, "loss": 0.0013, "step": 38600 }, { "epoch": 10.0, "eval_loss": 0.0013781202724203467, "eval_runtime": 89.6083, "eval_samples_per_second": 5.58, "eval_steps_per_second": 0.703, "eval_wer": 0.8082292432035268, "step": 38630 } ], "logging_steps": 50, "max_steps": 38630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.00436846133248e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }