whisper-base-urdufinetuned / trainer_state.json
m-aliabbas1's picture
Upload trainer_state.json with huggingface_hub
c48df43 verified
raw
history blame
137 kB
{
"best_global_step": 38630,
"best_metric": 0.8082292432035268,
"best_model_checkpoint": "./whisper-urdu-base-finetuned/checkpoint-38630",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 38630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012943308309603935,
"grad_norm": 2.339423418045044,
"learning_rate": 1.2684442143411856e-07,
"loss": 0.0468,
"step": 50
},
{
"epoch": 0.02588661661920787,
"grad_norm": 4.33876371383667,
"learning_rate": 2.5627750453015794e-07,
"loss": 0.0428,
"step": 100
},
{
"epoch": 0.03882992492881181,
"grad_norm": 4.59096622467041,
"learning_rate": 3.8571058762619726e-07,
"loss": 0.054,
"step": 150
},
{
"epoch": 0.05177323323841574,
"grad_norm": 5.390439033508301,
"learning_rate": 5.151436707222367e-07,
"loss": 0.0454,
"step": 200
},
{
"epoch": 0.06471654154801967,
"grad_norm": 3.9699249267578125,
"learning_rate": 6.44576753818276e-07,
"loss": 0.03,
"step": 250
},
{
"epoch": 0.07765984985762361,
"grad_norm": 5.007837295532227,
"learning_rate": 7.740098369143153e-07,
"loss": 0.0364,
"step": 300
},
{
"epoch": 0.09060315816722754,
"grad_norm": 1.7915890216827393,
"learning_rate": 9.034429200103548e-07,
"loss": 0.033,
"step": 350
},
{
"epoch": 0.10354646647683148,
"grad_norm": 1.8916817903518677,
"learning_rate": 1.032876003106394e-06,
"loss": 0.0453,
"step": 400
},
{
"epoch": 0.11648977478643541,
"grad_norm": 5.6407318115234375,
"learning_rate": 1.1623090862024335e-06,
"loss": 0.0493,
"step": 450
},
{
"epoch": 0.12943308309603935,
"grad_norm": 1.5208466053009033,
"learning_rate": 1.2917421692984728e-06,
"loss": 0.0457,
"step": 500
},
{
"epoch": 0.1423763914056433,
"grad_norm": 0.9354443550109863,
"learning_rate": 1.4211752523945122e-06,
"loss": 0.0486,
"step": 550
},
{
"epoch": 0.15531969971524723,
"grad_norm": 3.7830538749694824,
"learning_rate": 1.5506083354905516e-06,
"loss": 0.0377,
"step": 600
},
{
"epoch": 0.16826300802485114,
"grad_norm": 1.2487603425979614,
"learning_rate": 1.67745275692467e-06,
"loss": 0.069,
"step": 650
},
{
"epoch": 0.18120631633445508,
"grad_norm": 0.8733393549919128,
"learning_rate": 1.8068858400207096e-06,
"loss": 0.0457,
"step": 700
},
{
"epoch": 0.19414962464405902,
"grad_norm": 3.48146390914917,
"learning_rate": 1.936318923116749e-06,
"loss": 0.0372,
"step": 750
},
{
"epoch": 0.20709293295366296,
"grad_norm": 3.9726881980895996,
"learning_rate": 2.065752006212788e-06,
"loss": 0.0493,
"step": 800
},
{
"epoch": 0.2200362412632669,
"grad_norm": 1.6841055154800415,
"learning_rate": 2.1951850893088276e-06,
"loss": 0.046,
"step": 850
},
{
"epoch": 0.23297954957287081,
"grad_norm": 1.1303517818450928,
"learning_rate": 2.324618172404867e-06,
"loss": 0.0484,
"step": 900
},
{
"epoch": 0.24592285788247475,
"grad_norm": 1.6165255308151245,
"learning_rate": 2.4540512555009063e-06,
"loss": 0.0617,
"step": 950
},
{
"epoch": 0.2588661661920787,
"grad_norm": 4.751931667327881,
"learning_rate": 2.5834843385969457e-06,
"loss": 0.0516,
"step": 1000
},
{
"epoch": 0.27180947450168264,
"grad_norm": 2.619171142578125,
"learning_rate": 2.712917421692985e-06,
"loss": 0.0491,
"step": 1050
},
{
"epoch": 0.2847527828112866,
"grad_norm": 3.5204107761383057,
"learning_rate": 2.8423505047890244e-06,
"loss": 0.0451,
"step": 1100
},
{
"epoch": 0.2976960911208905,
"grad_norm": 1.6330580711364746,
"learning_rate": 2.971783587885064e-06,
"loss": 0.0329,
"step": 1150
},
{
"epoch": 0.31063939943049446,
"grad_norm": 9.043530464172363,
"learning_rate": 3.101216670981103e-06,
"loss": 0.0528,
"step": 1200
},
{
"epoch": 0.3235827077400984,
"grad_norm": 4.8967604637146,
"learning_rate": 3.2306497540771426e-06,
"loss": 0.052,
"step": 1250
},
{
"epoch": 0.3365260160497023,
"grad_norm": 2.9386487007141113,
"learning_rate": 3.360082837173182e-06,
"loss": 0.0431,
"step": 1300
},
{
"epoch": 0.3494693243593062,
"grad_norm": 3.271523952484131,
"learning_rate": 3.4895159202692213e-06,
"loss": 0.0435,
"step": 1350
},
{
"epoch": 0.36241263266891016,
"grad_norm": 3.5081090927124023,
"learning_rate": 3.6189490033652603e-06,
"loss": 0.0571,
"step": 1400
},
{
"epoch": 0.3753559409785141,
"grad_norm": 5.796409606933594,
"learning_rate": 3.7483820864612997e-06,
"loss": 0.0529,
"step": 1450
},
{
"epoch": 0.38829924928811804,
"grad_norm": 3.4982869625091553,
"learning_rate": 3.877815169557339e-06,
"loss": 0.0566,
"step": 1500
},
{
"epoch": 0.401242557597722,
"grad_norm": 4.461122035980225,
"learning_rate": 4.007248252653379e-06,
"loss": 0.0562,
"step": 1550
},
{
"epoch": 0.4141858659073259,
"grad_norm": 3.034180164337158,
"learning_rate": 4.136681335749418e-06,
"loss": 0.0466,
"step": 1600
},
{
"epoch": 0.42712917421692986,
"grad_norm": 5.0971598625183105,
"learning_rate": 4.266114418845458e-06,
"loss": 0.0501,
"step": 1650
},
{
"epoch": 0.4400724825265338,
"grad_norm": 5.404353618621826,
"learning_rate": 4.3955475019414965e-06,
"loss": 0.0424,
"step": 1700
},
{
"epoch": 0.45301579083613774,
"grad_norm": 2.975407838821411,
"learning_rate": 4.524980585037536e-06,
"loss": 0.0562,
"step": 1750
},
{
"epoch": 0.46595909914574163,
"grad_norm": 8.076371192932129,
"learning_rate": 4.654413668133575e-06,
"loss": 0.0377,
"step": 1800
},
{
"epoch": 0.47890240745534557,
"grad_norm": 5.4830451011657715,
"learning_rate": 4.781258089567694e-06,
"loss": 0.0436,
"step": 1850
},
{
"epoch": 0.4918457157649495,
"grad_norm": 2.6184751987457275,
"learning_rate": 4.910691172663733e-06,
"loss": 0.0414,
"step": 1900
},
{
"epoch": 0.5047890240745535,
"grad_norm": 4.777491092681885,
"learning_rate": 5.040124255759773e-06,
"loss": 0.0469,
"step": 1950
},
{
"epoch": 0.5177323323841574,
"grad_norm": 8.820015907287598,
"learning_rate": 5.169557338855812e-06,
"loss": 0.0654,
"step": 2000
},
{
"epoch": 0.5306756406937613,
"grad_norm": 4.339293003082275,
"learning_rate": 5.298990421951852e-06,
"loss": 0.0473,
"step": 2050
},
{
"epoch": 0.5436189490033653,
"grad_norm": 7.162559509277344,
"learning_rate": 5.428423505047891e-06,
"loss": 0.0473,
"step": 2100
},
{
"epoch": 0.5565622573129692,
"grad_norm": 7.811385154724121,
"learning_rate": 5.557856588143931e-06,
"loss": 0.0516,
"step": 2150
},
{
"epoch": 0.5695055656225732,
"grad_norm": 2.8982834815979004,
"learning_rate": 5.68728967123997e-06,
"loss": 0.0497,
"step": 2200
},
{
"epoch": 0.582448873932177,
"grad_norm": 4.9137749671936035,
"learning_rate": 5.8167227543360085e-06,
"loss": 0.0613,
"step": 2250
},
{
"epoch": 0.595392182241781,
"grad_norm": 4.650545597076416,
"learning_rate": 5.946155837432048e-06,
"loss": 0.0475,
"step": 2300
},
{
"epoch": 0.6083354905513849,
"grad_norm": 6.333342552185059,
"learning_rate": 6.075588920528087e-06,
"loss": 0.0793,
"step": 2350
},
{
"epoch": 0.6212787988609889,
"grad_norm": 5.444154739379883,
"learning_rate": 6.205022003624127e-06,
"loss": 0.0573,
"step": 2400
},
{
"epoch": 0.6342221071705928,
"grad_norm": 1.8880912065505981,
"learning_rate": 6.334455086720166e-06,
"loss": 0.0409,
"step": 2450
},
{
"epoch": 0.6471654154801968,
"grad_norm": 5.30678653717041,
"learning_rate": 6.463888169816206e-06,
"loss": 0.0546,
"step": 2500
},
{
"epoch": 0.6601087237898007,
"grad_norm": 11.33090591430664,
"learning_rate": 6.593321252912245e-06,
"loss": 0.055,
"step": 2550
},
{
"epoch": 0.6730520320994046,
"grad_norm": 2.6539409160614014,
"learning_rate": 6.722754336008285e-06,
"loss": 0.0466,
"step": 2600
},
{
"epoch": 0.6859953404090086,
"grad_norm": 5.582194805145264,
"learning_rate": 6.8521874191043236e-06,
"loss": 0.0559,
"step": 2650
},
{
"epoch": 0.6989386487186124,
"grad_norm": 4.70722770690918,
"learning_rate": 6.9816205022003625e-06,
"loss": 0.0505,
"step": 2700
},
{
"epoch": 0.7118819570282164,
"grad_norm": 2.8403987884521484,
"learning_rate": 7.111053585296402e-06,
"loss": 0.0508,
"step": 2750
},
{
"epoch": 0.7248252653378203,
"grad_norm": 9.0087890625,
"learning_rate": 7.240486668392441e-06,
"loss": 0.0793,
"step": 2800
},
{
"epoch": 0.7377685736474243,
"grad_norm": 5.515995025634766,
"learning_rate": 7.369919751488481e-06,
"loss": 0.0635,
"step": 2850
},
{
"epoch": 0.7507118819570282,
"grad_norm": 3.9393441677093506,
"learning_rate": 7.49935283458452e-06,
"loss": 0.0509,
"step": 2900
},
{
"epoch": 0.7636551902666322,
"grad_norm": 5.0234599113464355,
"learning_rate": 7.62878591768056e-06,
"loss": 0.0528,
"step": 2950
},
{
"epoch": 0.7765984985762361,
"grad_norm": 2.9625911712646484,
"learning_rate": 7.758219000776599e-06,
"loss": 0.0435,
"step": 3000
},
{
"epoch": 0.78954180688584,
"grad_norm": 5.779876708984375,
"learning_rate": 7.88765208387264e-06,
"loss": 0.0522,
"step": 3050
},
{
"epoch": 0.802485115195444,
"grad_norm": 1.6520557403564453,
"learning_rate": 8.017085166968678e-06,
"loss": 0.0481,
"step": 3100
},
{
"epoch": 0.8154284235050479,
"grad_norm": 10.486522674560547,
"learning_rate": 8.146518250064717e-06,
"loss": 0.0571,
"step": 3150
},
{
"epoch": 0.8283717318146518,
"grad_norm": 5.2048139572143555,
"learning_rate": 8.275951333160756e-06,
"loss": 0.0652,
"step": 3200
},
{
"epoch": 0.8413150401242557,
"grad_norm": 6.738128185272217,
"learning_rate": 8.405384416256795e-06,
"loss": 0.0732,
"step": 3250
},
{
"epoch": 0.8542583484338597,
"grad_norm": 10.937406539916992,
"learning_rate": 8.534817499352836e-06,
"loss": 0.0928,
"step": 3300
},
{
"epoch": 0.8672016567434636,
"grad_norm": 6.5531182289123535,
"learning_rate": 8.664250582448875e-06,
"loss": 0.0716,
"step": 3350
},
{
"epoch": 0.8801449650530676,
"grad_norm": 9.179738998413086,
"learning_rate": 8.793683665544914e-06,
"loss": 0.0589,
"step": 3400
},
{
"epoch": 0.8930882733626715,
"grad_norm": 3.9006974697113037,
"learning_rate": 8.923116748640953e-06,
"loss": 0.0626,
"step": 3450
},
{
"epoch": 0.9060315816722755,
"grad_norm": 1.9278124570846558,
"learning_rate": 9.052549831736993e-06,
"loss": 0.0668,
"step": 3500
},
{
"epoch": 0.9189748899818794,
"grad_norm": 1.9330800771713257,
"learning_rate": 9.181982914833032e-06,
"loss": 0.0595,
"step": 3550
},
{
"epoch": 0.9319181982914833,
"grad_norm": 6.718048572540283,
"learning_rate": 9.311415997929071e-06,
"loss": 0.0804,
"step": 3600
},
{
"epoch": 0.9448615066010873,
"grad_norm": 6.918898582458496,
"learning_rate": 9.44084908102511e-06,
"loss": 0.0716,
"step": 3650
},
{
"epoch": 0.9578048149106911,
"grad_norm": 4.3917131423950195,
"learning_rate": 9.57028216412115e-06,
"loss": 0.0694,
"step": 3700
},
{
"epoch": 0.9707481232202951,
"grad_norm": 11.285859107971191,
"learning_rate": 9.69971524721719e-06,
"loss": 0.0715,
"step": 3750
},
{
"epoch": 0.983691431529899,
"grad_norm": 4.574735641479492,
"learning_rate": 9.829148330313229e-06,
"loss": 0.0601,
"step": 3800
},
{
"epoch": 0.996634739839503,
"grad_norm": 4.355649948120117,
"learning_rate": 9.955992751747347e-06,
"loss": 0.0735,
"step": 3850
},
{
"epoch": 1.0,
"eval_loss": 0.07532492280006409,
"eval_runtime": 88.308,
"eval_samples_per_second": 5.662,
"eval_steps_per_second": 0.713,
"eval_wer": 9.278891571323607,
"step": 3863
},
{
"epoch": 1.009578048149107,
"grad_norm": 6.590569019317627,
"learning_rate": 9.990508240572958e-06,
"loss": 0.0535,
"step": 3900
},
{
"epoch": 1.0225213564587108,
"grad_norm": 1.8211477994918823,
"learning_rate": 9.976126786895621e-06,
"loss": 0.0632,
"step": 3950
},
{
"epoch": 1.0354646647683148,
"grad_norm": 6.469516754150391,
"learning_rate": 9.961745333218282e-06,
"loss": 0.0686,
"step": 4000
},
{
"epoch": 1.0484079730779188,
"grad_norm": 8.549825668334961,
"learning_rate": 9.947363879540945e-06,
"loss": 0.0613,
"step": 4050
},
{
"epoch": 1.0613512813875225,
"grad_norm": 6.740059852600098,
"learning_rate": 9.933270054937154e-06,
"loss": 0.0629,
"step": 4100
},
{
"epoch": 1.0742945896971265,
"grad_norm": 6.783405780792236,
"learning_rate": 9.918888601259817e-06,
"loss": 0.1005,
"step": 4150
},
{
"epoch": 1.0872378980067305,
"grad_norm": 3.628119707107544,
"learning_rate": 9.904507147582478e-06,
"loss": 0.0599,
"step": 4200
},
{
"epoch": 1.1001812063163345,
"grad_norm": 10.390827178955078,
"learning_rate": 9.89012569390514e-06,
"loss": 0.0713,
"step": 4250
},
{
"epoch": 1.1131245146259383,
"grad_norm": 4.781597137451172,
"learning_rate": 9.875744240227803e-06,
"loss": 0.0758,
"step": 4300
},
{
"epoch": 1.1260678229355423,
"grad_norm": 2.381582498550415,
"learning_rate": 9.861362786550466e-06,
"loss": 0.0587,
"step": 4350
},
{
"epoch": 1.1390111312451463,
"grad_norm": 3.9035913944244385,
"learning_rate": 9.846981332873127e-06,
"loss": 0.0482,
"step": 4400
},
{
"epoch": 1.1519544395547503,
"grad_norm": 3.4371120929718018,
"learning_rate": 9.83259987919579e-06,
"loss": 0.0584,
"step": 4450
},
{
"epoch": 1.164897747864354,
"grad_norm": 5.601978302001953,
"learning_rate": 9.818218425518452e-06,
"loss": 0.0567,
"step": 4500
},
{
"epoch": 1.177841056173958,
"grad_norm": 5.46005916595459,
"learning_rate": 9.803836971841113e-06,
"loss": 0.0696,
"step": 4550
},
{
"epoch": 1.190784364483562,
"grad_norm": 6.306366443634033,
"learning_rate": 9.789455518163776e-06,
"loss": 0.0628,
"step": 4600
},
{
"epoch": 1.2037276727931658,
"grad_norm": 6.448446273803711,
"learning_rate": 9.775074064486439e-06,
"loss": 0.0712,
"step": 4650
},
{
"epoch": 1.2166709811027698,
"grad_norm": 5.446472644805908,
"learning_rate": 9.760692610809102e-06,
"loss": 0.0549,
"step": 4700
},
{
"epoch": 1.2296142894123738,
"grad_norm": 8.122842788696289,
"learning_rate": 9.746311157131764e-06,
"loss": 0.0804,
"step": 4750
},
{
"epoch": 1.2425575977219778,
"grad_norm": 17.312549591064453,
"learning_rate": 9.731929703454425e-06,
"loss": 0.0598,
"step": 4800
},
{
"epoch": 1.2555009060315816,
"grad_norm": 8.815750122070312,
"learning_rate": 9.717548249777088e-06,
"loss": 0.086,
"step": 4850
},
{
"epoch": 1.2684442143411856,
"grad_norm": 7.290472030639648,
"learning_rate": 9.70316679609975e-06,
"loss": 0.0646,
"step": 4900
},
{
"epoch": 1.2813875226507896,
"grad_norm": 6.4685258865356445,
"learning_rate": 9.688785342422414e-06,
"loss": 0.0717,
"step": 4950
},
{
"epoch": 1.2943308309603934,
"grad_norm": 6.5981974601745605,
"learning_rate": 9.674403888745075e-06,
"loss": 0.0605,
"step": 5000
},
{
"epoch": 1.3072741392699974,
"grad_norm": 4.253500461578369,
"learning_rate": 9.660022435067737e-06,
"loss": 0.0657,
"step": 5050
},
{
"epoch": 1.3202174475796014,
"grad_norm": 14.457869529724121,
"learning_rate": 9.6456409813904e-06,
"loss": 0.0701,
"step": 5100
},
{
"epoch": 1.3331607558892054,
"grad_norm": 10.046952247619629,
"learning_rate": 9.631259527713061e-06,
"loss": 0.0636,
"step": 5150
},
{
"epoch": 1.3461040641988093,
"grad_norm": 0.7242924571037292,
"learning_rate": 9.616878074035725e-06,
"loss": 0.0731,
"step": 5200
},
{
"epoch": 1.3590473725084131,
"grad_norm": 5.0954155921936035,
"learning_rate": 9.602496620358386e-06,
"loss": 0.0768,
"step": 5250
},
{
"epoch": 1.3719906808180171,
"grad_norm": 3.536829710006714,
"learning_rate": 9.588115166681049e-06,
"loss": 0.0676,
"step": 5300
},
{
"epoch": 1.384933989127621,
"grad_norm": 16.024459838867188,
"learning_rate": 9.573733713003712e-06,
"loss": 0.0778,
"step": 5350
},
{
"epoch": 1.3978772974372249,
"grad_norm": 8.860788345336914,
"learning_rate": 9.559352259326373e-06,
"loss": 0.0618,
"step": 5400
},
{
"epoch": 1.4108206057468289,
"grad_norm": 8.013275146484375,
"learning_rate": 9.544970805649036e-06,
"loss": 0.0682,
"step": 5450
},
{
"epoch": 1.4237639140564329,
"grad_norm": 1.6208149194717407,
"learning_rate": 9.530589351971698e-06,
"loss": 0.0658,
"step": 5500
},
{
"epoch": 1.4367072223660369,
"grad_norm": 3.301064968109131,
"learning_rate": 9.516207898294361e-06,
"loss": 0.0578,
"step": 5550
},
{
"epoch": 1.4496505306756406,
"grad_norm": 8.253787994384766,
"learning_rate": 9.501826444617022e-06,
"loss": 0.0613,
"step": 5600
},
{
"epoch": 1.4625938389852446,
"grad_norm": 5.881521224975586,
"learning_rate": 9.487444990939685e-06,
"loss": 0.0652,
"step": 5650
},
{
"epoch": 1.4755371472948486,
"grad_norm": 5.812432289123535,
"learning_rate": 9.473063537262347e-06,
"loss": 0.0632,
"step": 5700
},
{
"epoch": 1.4884804556044524,
"grad_norm": 7.349687576293945,
"learning_rate": 9.458682083585008e-06,
"loss": 0.0633,
"step": 5750
},
{
"epoch": 1.5014237639140564,
"grad_norm": 6.8907694816589355,
"learning_rate": 9.444300629907673e-06,
"loss": 0.0621,
"step": 5800
},
{
"epoch": 1.5143670722236604,
"grad_norm": 6.606383800506592,
"learning_rate": 9.429919176230334e-06,
"loss": 0.0827,
"step": 5850
},
{
"epoch": 1.5273103805332644,
"grad_norm": 7.548374176025391,
"learning_rate": 9.415537722552997e-06,
"loss": 0.0616,
"step": 5900
},
{
"epoch": 1.5402536888428684,
"grad_norm": 2.861259698867798,
"learning_rate": 9.40115626887566e-06,
"loss": 0.0681,
"step": 5950
},
{
"epoch": 1.5531969971524722,
"grad_norm": 1.8648054599761963,
"learning_rate": 9.38677481519832e-06,
"loss": 0.0716,
"step": 6000
},
{
"epoch": 1.566140305462076,
"grad_norm": 8.61605167388916,
"learning_rate": 9.372393361520983e-06,
"loss": 0.0632,
"step": 6050
},
{
"epoch": 1.57908361377168,
"grad_norm": 7.054296493530273,
"learning_rate": 9.358011907843646e-06,
"loss": 0.067,
"step": 6100
},
{
"epoch": 1.592026922081284,
"grad_norm": 16.833148956298828,
"learning_rate": 9.343630454166308e-06,
"loss": 0.0657,
"step": 6150
},
{
"epoch": 1.604970230390888,
"grad_norm": 8.74778938293457,
"learning_rate": 9.32924900048897e-06,
"loss": 0.0824,
"step": 6200
},
{
"epoch": 1.617913538700492,
"grad_norm": 7.7294440269470215,
"learning_rate": 9.314867546811632e-06,
"loss": 0.0756,
"step": 6250
},
{
"epoch": 1.630856847010096,
"grad_norm": 5.917527675628662,
"learning_rate": 9.300486093134295e-06,
"loss": 0.0625,
"step": 6300
},
{
"epoch": 1.6438001553196997,
"grad_norm": 6.116061687469482,
"learning_rate": 9.286104639456958e-06,
"loss": 0.0733,
"step": 6350
},
{
"epoch": 1.6567434636293037,
"grad_norm": 5.308782577514648,
"learning_rate": 9.27172318577962e-06,
"loss": 0.0614,
"step": 6400
},
{
"epoch": 1.6696867719389075,
"grad_norm": 4.262322902679443,
"learning_rate": 9.257341732102281e-06,
"loss": 0.0695,
"step": 6450
},
{
"epoch": 1.6826300802485115,
"grad_norm": 10.316951751708984,
"learning_rate": 9.242960278424944e-06,
"loss": 0.0608,
"step": 6500
},
{
"epoch": 1.6955733885581155,
"grad_norm": 10.672906875610352,
"learning_rate": 9.228578824747607e-06,
"loss": 0.0497,
"step": 6550
},
{
"epoch": 1.7085166968677195,
"grad_norm": 9.800777435302734,
"learning_rate": 9.214197371070268e-06,
"loss": 0.0869,
"step": 6600
},
{
"epoch": 1.7214600051773234,
"grad_norm": 0.5741531848907471,
"learning_rate": 9.19981591739293e-06,
"loss": 0.0719,
"step": 6650
},
{
"epoch": 1.7344033134869272,
"grad_norm": 1.716304063796997,
"learning_rate": 9.185434463715593e-06,
"loss": 0.0819,
"step": 6700
},
{
"epoch": 1.7473466217965312,
"grad_norm": 5.458923816680908,
"learning_rate": 9.171053010038256e-06,
"loss": 0.0911,
"step": 6750
},
{
"epoch": 1.760289930106135,
"grad_norm": 5.717566967010498,
"learning_rate": 9.156671556360917e-06,
"loss": 0.0728,
"step": 6800
},
{
"epoch": 1.773233238415739,
"grad_norm": 2.0101475715637207,
"learning_rate": 9.14229010268358e-06,
"loss": 0.0688,
"step": 6850
},
{
"epoch": 1.786176546725343,
"grad_norm": 4.805422306060791,
"learning_rate": 9.127908649006242e-06,
"loss": 0.0722,
"step": 6900
},
{
"epoch": 1.799119855034947,
"grad_norm": 8.908441543579102,
"learning_rate": 9.113527195328905e-06,
"loss": 0.0742,
"step": 6950
},
{
"epoch": 1.812063163344551,
"grad_norm": 5.328374862670898,
"learning_rate": 9.099145741651568e-06,
"loss": 0.0705,
"step": 7000
},
{
"epoch": 1.8250064716541547,
"grad_norm": 5.544122219085693,
"learning_rate": 9.084764287974229e-06,
"loss": 0.0803,
"step": 7050
},
{
"epoch": 1.8379497799637587,
"grad_norm": 12.470900535583496,
"learning_rate": 9.070382834296892e-06,
"loss": 0.0687,
"step": 7100
},
{
"epoch": 1.8508930882733625,
"grad_norm": 6.980189323425293,
"learning_rate": 9.056001380619553e-06,
"loss": 0.0738,
"step": 7150
},
{
"epoch": 1.8638363965829665,
"grad_norm": 5.287197589874268,
"learning_rate": 9.041619926942215e-06,
"loss": 0.0723,
"step": 7200
},
{
"epoch": 1.8767797048925705,
"grad_norm": 7.992635250091553,
"learning_rate": 9.027238473264878e-06,
"loss": 0.0766,
"step": 7250
},
{
"epoch": 1.8897230132021745,
"grad_norm": 13.58613395690918,
"learning_rate": 9.01285701958754e-06,
"loss": 0.0672,
"step": 7300
},
{
"epoch": 1.9026663215117785,
"grad_norm": 6.183566093444824,
"learning_rate": 8.998475565910203e-06,
"loss": 0.0636,
"step": 7350
},
{
"epoch": 1.9156096298213825,
"grad_norm": 4.56492805480957,
"learning_rate": 8.984094112232865e-06,
"loss": 0.0823,
"step": 7400
},
{
"epoch": 1.9285529381309863,
"grad_norm": 3.8052022457122803,
"learning_rate": 8.969712658555527e-06,
"loss": 0.094,
"step": 7450
},
{
"epoch": 1.9414962464405903,
"grad_norm": 4.8918843269348145,
"learning_rate": 8.95533120487819e-06,
"loss": 0.0707,
"step": 7500
},
{
"epoch": 1.954439554750194,
"grad_norm": 8.336883544921875,
"learning_rate": 8.940949751200853e-06,
"loss": 0.082,
"step": 7550
},
{
"epoch": 1.967382863059798,
"grad_norm": 11.596981048583984,
"learning_rate": 8.926568297523515e-06,
"loss": 0.0914,
"step": 7600
},
{
"epoch": 1.980326171369402,
"grad_norm": 3.7584636211395264,
"learning_rate": 8.912186843846176e-06,
"loss": 0.0697,
"step": 7650
},
{
"epoch": 1.993269479679006,
"grad_norm": 1.8307932615280151,
"learning_rate": 8.898093019242386e-06,
"loss": 0.0597,
"step": 7700
},
{
"epoch": 2.0,
"eval_loss": 0.0625610426068306,
"eval_runtime": 88.6808,
"eval_samples_per_second": 5.638,
"eval_steps_per_second": 0.71,
"eval_wer": 7.819880340086072,
"step": 7726
},
{
"epoch": 2.00621278798861,
"grad_norm": 5.234423637390137,
"learning_rate": 8.883711565565049e-06,
"loss": 0.0493,
"step": 7750
},
{
"epoch": 2.019156096298214,
"grad_norm": 6.8441386222839355,
"learning_rate": 8.869330111887711e-06,
"loss": 0.0531,
"step": 7800
},
{
"epoch": 2.0320994046078176,
"grad_norm": 11.543621063232422,
"learning_rate": 8.854948658210372e-06,
"loss": 0.0447,
"step": 7850
},
{
"epoch": 2.0450427129174216,
"grad_norm": 8.563196182250977,
"learning_rate": 8.840567204533035e-06,
"loss": 0.0466,
"step": 7900
},
{
"epoch": 2.0579860212270256,
"grad_norm": 2.31270432472229,
"learning_rate": 8.826185750855698e-06,
"loss": 0.0487,
"step": 7950
},
{
"epoch": 2.0709293295366296,
"grad_norm": 2.1112735271453857,
"learning_rate": 8.811804297178359e-06,
"loss": 0.0424,
"step": 8000
},
{
"epoch": 2.0838726378462336,
"grad_norm": 2.2178971767425537,
"learning_rate": 8.797422843501021e-06,
"loss": 0.0472,
"step": 8050
},
{
"epoch": 2.0968159461558376,
"grad_norm": 7.748543739318848,
"learning_rate": 8.783041389823684e-06,
"loss": 0.0467,
"step": 8100
},
{
"epoch": 2.1097592544654415,
"grad_norm": 13.174030303955078,
"learning_rate": 8.768659936146347e-06,
"loss": 0.0582,
"step": 8150
},
{
"epoch": 2.122702562775045,
"grad_norm": 8.9285306930542,
"learning_rate": 8.754278482469008e-06,
"loss": 0.0388,
"step": 8200
},
{
"epoch": 2.135645871084649,
"grad_norm": 7.205421447753906,
"learning_rate": 8.73989702879167e-06,
"loss": 0.049,
"step": 8250
},
{
"epoch": 2.148589179394253,
"grad_norm": 13.037263870239258,
"learning_rate": 8.725515575114333e-06,
"loss": 0.0463,
"step": 8300
},
{
"epoch": 2.161532487703857,
"grad_norm": 7.653538703918457,
"learning_rate": 8.711134121436996e-06,
"loss": 0.047,
"step": 8350
},
{
"epoch": 2.174475796013461,
"grad_norm": 3.7179276943206787,
"learning_rate": 8.696752667759659e-06,
"loss": 0.0455,
"step": 8400
},
{
"epoch": 2.187419104323065,
"grad_norm": 5.476615905761719,
"learning_rate": 8.68237121408232e-06,
"loss": 0.0439,
"step": 8450
},
{
"epoch": 2.200362412632669,
"grad_norm": 2.110492467880249,
"learning_rate": 8.667989760404983e-06,
"loss": 0.0411,
"step": 8500
},
{
"epoch": 2.213305720942273,
"grad_norm": 5.301363468170166,
"learning_rate": 8.653608306727645e-06,
"loss": 0.0517,
"step": 8550
},
{
"epoch": 2.2262490292518766,
"grad_norm": 9.795352935791016,
"learning_rate": 8.639226853050306e-06,
"loss": 0.0564,
"step": 8600
},
{
"epoch": 2.2391923375614806,
"grad_norm": 1.353123426437378,
"learning_rate": 8.624845399372969e-06,
"loss": 0.0485,
"step": 8650
},
{
"epoch": 2.2521356458710846,
"grad_norm": 9.473326683044434,
"learning_rate": 8.610463945695632e-06,
"loss": 0.0645,
"step": 8700
},
{
"epoch": 2.2650789541806886,
"grad_norm": 4.406058311462402,
"learning_rate": 8.596082492018294e-06,
"loss": 0.0542,
"step": 8750
},
{
"epoch": 2.2780222624902926,
"grad_norm": 17.06141471862793,
"learning_rate": 8.581701038340955e-06,
"loss": 0.0502,
"step": 8800
},
{
"epoch": 2.2909655707998966,
"grad_norm": 8.37076187133789,
"learning_rate": 8.567319584663618e-06,
"loss": 0.0554,
"step": 8850
},
{
"epoch": 2.3039088791095006,
"grad_norm": 10.135491371154785,
"learning_rate": 8.552938130986281e-06,
"loss": 0.0469,
"step": 8900
},
{
"epoch": 2.316852187419104,
"grad_norm": 7.701286315917969,
"learning_rate": 8.538556677308944e-06,
"loss": 0.0468,
"step": 8950
},
{
"epoch": 2.329795495728708,
"grad_norm": 0.7516931891441345,
"learning_rate": 8.524175223631606e-06,
"loss": 0.0443,
"step": 9000
},
{
"epoch": 2.342738804038312,
"grad_norm": 8.913509368896484,
"learning_rate": 8.509793769954267e-06,
"loss": 0.0619,
"step": 9050
},
{
"epoch": 2.355682112347916,
"grad_norm": 10.149544715881348,
"learning_rate": 8.49541231627693e-06,
"loss": 0.051,
"step": 9100
},
{
"epoch": 2.36862542065752,
"grad_norm": 8.469508171081543,
"learning_rate": 8.481030862599591e-06,
"loss": 0.0481,
"step": 9150
},
{
"epoch": 2.381568728967124,
"grad_norm": 2.6739773750305176,
"learning_rate": 8.466649408922255e-06,
"loss": 0.0565,
"step": 9200
},
{
"epoch": 2.394512037276728,
"grad_norm": 7.850672245025635,
"learning_rate": 8.452267955244916e-06,
"loss": 0.0469,
"step": 9250
},
{
"epoch": 2.4074553455863317,
"grad_norm": 4.025714874267578,
"learning_rate": 8.43788650156758e-06,
"loss": 0.0554,
"step": 9300
},
{
"epoch": 2.4203986538959357,
"grad_norm": 8.857705116271973,
"learning_rate": 8.423505047890242e-06,
"loss": 0.0449,
"step": 9350
},
{
"epoch": 2.4333419622055397,
"grad_norm": 7.978151321411133,
"learning_rate": 8.409123594212903e-06,
"loss": 0.0768,
"step": 9400
},
{
"epoch": 2.4462852705151437,
"grad_norm": 7.44215726852417,
"learning_rate": 8.394742140535566e-06,
"loss": 0.0612,
"step": 9450
},
{
"epoch": 2.4592285788247477,
"grad_norm": 3.768834352493286,
"learning_rate": 8.380360686858228e-06,
"loss": 0.0405,
"step": 9500
},
{
"epoch": 2.4721718871343517,
"grad_norm": 4.792430877685547,
"learning_rate": 8.365979233180891e-06,
"loss": 0.049,
"step": 9550
},
{
"epoch": 2.4851151954439556,
"grad_norm": 7.07329797744751,
"learning_rate": 8.351597779503554e-06,
"loss": 0.0516,
"step": 9600
},
{
"epoch": 2.498058503753559,
"grad_norm": 7.043036937713623,
"learning_rate": 8.337216325826215e-06,
"loss": 0.0589,
"step": 9650
},
{
"epoch": 2.511001812063163,
"grad_norm": 8.471428871154785,
"learning_rate": 8.322834872148878e-06,
"loss": 0.0433,
"step": 9700
},
{
"epoch": 2.523945120372767,
"grad_norm": 5.081391334533691,
"learning_rate": 8.308453418471539e-06,
"loss": 0.0603,
"step": 9750
},
{
"epoch": 2.536888428682371,
"grad_norm": 6.474493980407715,
"learning_rate": 8.294071964794203e-06,
"loss": 0.0529,
"step": 9800
},
{
"epoch": 2.549831736991975,
"grad_norm": 6.623453617095947,
"learning_rate": 8.279690511116864e-06,
"loss": 0.0491,
"step": 9850
},
{
"epoch": 2.562775045301579,
"grad_norm": 5.389576435089111,
"learning_rate": 8.265309057439527e-06,
"loss": 0.0491,
"step": 9900
},
{
"epoch": 2.575718353611183,
"grad_norm": 8.212656021118164,
"learning_rate": 8.25092760376219e-06,
"loss": 0.0544,
"step": 9950
},
{
"epoch": 2.5886616619207867,
"grad_norm": 0.44550031423568726,
"learning_rate": 8.23654615008485e-06,
"loss": 0.0578,
"step": 10000
},
{
"epoch": 2.6016049702303907,
"grad_norm": 1.0146311521530151,
"learning_rate": 8.222164696407513e-06,
"loss": 0.0363,
"step": 10050
},
{
"epoch": 2.6145482785399947,
"grad_norm": 2.9840240478515625,
"learning_rate": 8.207783242730176e-06,
"loss": 0.0464,
"step": 10100
},
{
"epoch": 2.6274915868495987,
"grad_norm": 9.97850513458252,
"learning_rate": 8.193401789052839e-06,
"loss": 0.0486,
"step": 10150
},
{
"epoch": 2.6404348951592027,
"grad_norm": 7.187142372131348,
"learning_rate": 8.179020335375501e-06,
"loss": 0.0621,
"step": 10200
},
{
"epoch": 2.6533782034688067,
"grad_norm": 8.673319816589355,
"learning_rate": 8.164638881698162e-06,
"loss": 0.0495,
"step": 10250
},
{
"epoch": 2.6663215117784107,
"grad_norm": 7.366682529449463,
"learning_rate": 8.150257428020825e-06,
"loss": 0.0426,
"step": 10300
},
{
"epoch": 2.6792648200880143,
"grad_norm": 4.433532238006592,
"learning_rate": 8.135875974343488e-06,
"loss": 0.0489,
"step": 10350
},
{
"epoch": 2.6922081283976187,
"grad_norm": 1.0996958017349243,
"learning_rate": 8.12149452066615e-06,
"loss": 0.0534,
"step": 10400
},
{
"epoch": 2.7051514367072222,
"grad_norm": 4.040085792541504,
"learning_rate": 8.107113066988811e-06,
"loss": 0.0456,
"step": 10450
},
{
"epoch": 2.7180947450168262,
"grad_norm": 6.1070685386657715,
"learning_rate": 8.092731613311474e-06,
"loss": 0.0685,
"step": 10500
},
{
"epoch": 2.7310380533264302,
"grad_norm": 5.145371913909912,
"learning_rate": 8.078350159634137e-06,
"loss": 0.0537,
"step": 10550
},
{
"epoch": 2.7439813616360342,
"grad_norm": 5.4013190269470215,
"learning_rate": 8.063968705956798e-06,
"loss": 0.051,
"step": 10600
},
{
"epoch": 2.7569246699456382,
"grad_norm": 0.9301003217697144,
"learning_rate": 8.049587252279462e-06,
"loss": 0.0469,
"step": 10650
},
{
"epoch": 2.769867978255242,
"grad_norm": 2.0756452083587646,
"learning_rate": 8.035205798602123e-06,
"loss": 0.0508,
"step": 10700
},
{
"epoch": 2.782811286564846,
"grad_norm": 5.310762882232666,
"learning_rate": 8.020824344924786e-06,
"loss": 0.0559,
"step": 10750
},
{
"epoch": 2.7957545948744498,
"grad_norm": 10.922019004821777,
"learning_rate": 8.006442891247447e-06,
"loss": 0.0558,
"step": 10800
},
{
"epoch": 2.8086979031840538,
"grad_norm": 7.071091651916504,
"learning_rate": 7.99206143757011e-06,
"loss": 0.059,
"step": 10850
},
{
"epoch": 2.8216412114936578,
"grad_norm": 8.605588912963867,
"learning_rate": 7.977679983892773e-06,
"loss": 0.0502,
"step": 10900
},
{
"epoch": 2.8345845198032618,
"grad_norm": 5.918191909790039,
"learning_rate": 7.963298530215435e-06,
"loss": 0.044,
"step": 10950
},
{
"epoch": 2.8475278281128658,
"grad_norm": 10.315163612365723,
"learning_rate": 7.948917076538098e-06,
"loss": 0.0639,
"step": 11000
},
{
"epoch": 2.8604711364224693,
"grad_norm": 8.651927947998047,
"learning_rate": 7.934535622860759e-06,
"loss": 0.0632,
"step": 11050
},
{
"epoch": 2.8734144447320737,
"grad_norm": 6.735222816467285,
"learning_rate": 7.920154169183422e-06,
"loss": 0.0594,
"step": 11100
},
{
"epoch": 2.8863577530416773,
"grad_norm": 8.701018333435059,
"learning_rate": 7.905772715506084e-06,
"loss": 0.067,
"step": 11150
},
{
"epoch": 2.8993010613512813,
"grad_norm": 7.249449729919434,
"learning_rate": 7.891391261828745e-06,
"loss": 0.0562,
"step": 11200
},
{
"epoch": 2.9122443696608853,
"grad_norm": 9.386366844177246,
"learning_rate": 7.87700980815141e-06,
"loss": 0.0443,
"step": 11250
},
{
"epoch": 2.9251876779704893,
"grad_norm": 5.82024621963501,
"learning_rate": 7.86262835447407e-06,
"loss": 0.0435,
"step": 11300
},
{
"epoch": 2.9381309862800933,
"grad_norm": 9.714224815368652,
"learning_rate": 7.848246900796734e-06,
"loss": 0.0584,
"step": 11350
},
{
"epoch": 2.9510742945896973,
"grad_norm": 10.820244789123535,
"learning_rate": 7.833865447119395e-06,
"loss": 0.051,
"step": 11400
},
{
"epoch": 2.9640176028993013,
"grad_norm": 9.436473846435547,
"learning_rate": 7.819483993442057e-06,
"loss": 0.0762,
"step": 11450
},
{
"epoch": 2.976960911208905,
"grad_norm": 4.0043745040893555,
"learning_rate": 7.80510253976472e-06,
"loss": 0.0542,
"step": 11500
},
{
"epoch": 2.989904219518509,
"grad_norm": 6.734024524688721,
"learning_rate": 7.790721086087383e-06,
"loss": 0.0678,
"step": 11550
},
{
"epoch": 3.0,
"eval_loss": 0.047223061323165894,
"eval_runtime": 88.8982,
"eval_samples_per_second": 5.624,
"eval_steps_per_second": 0.709,
"eval_wer": 7.588957699170777,
"step": 11589
},
{
"epoch": 3.002847527828113,
"grad_norm": 7.159914970397949,
"learning_rate": 7.776339632410045e-06,
"loss": 0.0545,
"step": 11600
},
{
"epoch": 3.015790836137717,
"grad_norm": 8.731955528259277,
"learning_rate": 7.761958178732706e-06,
"loss": 0.0283,
"step": 11650
},
{
"epoch": 3.028734144447321,
"grad_norm": 9.880719184875488,
"learning_rate": 7.74757672505537e-06,
"loss": 0.0372,
"step": 11700
},
{
"epoch": 3.041677452756925,
"grad_norm": 4.863366603851318,
"learning_rate": 7.733195271378032e-06,
"loss": 0.0301,
"step": 11750
},
{
"epoch": 3.054620761066529,
"grad_norm": 3.5925040245056152,
"learning_rate": 7.718813817700693e-06,
"loss": 0.0319,
"step": 11800
},
{
"epoch": 3.0675640693761324,
"grad_norm": 5.193197727203369,
"learning_rate": 7.704432364023357e-06,
"loss": 0.0284,
"step": 11850
},
{
"epoch": 3.0805073776857363,
"grad_norm": 7.325606822967529,
"learning_rate": 7.690050910346018e-06,
"loss": 0.0221,
"step": 11900
},
{
"epoch": 3.0934506859953403,
"grad_norm": 1.1931557655334473,
"learning_rate": 7.675669456668681e-06,
"loss": 0.0318,
"step": 11950
},
{
"epoch": 3.1063939943049443,
"grad_norm": 3.640929698944092,
"learning_rate": 7.661288002991342e-06,
"loss": 0.0339,
"step": 12000
},
{
"epoch": 3.1193373026145483,
"grad_norm": 5.155591011047363,
"learning_rate": 7.647194178387553e-06,
"loss": 0.0294,
"step": 12050
},
{
"epoch": 3.1322806109241523,
"grad_norm": 4.072420120239258,
"learning_rate": 7.632812724710214e-06,
"loss": 0.0371,
"step": 12100
},
{
"epoch": 3.1452239192337563,
"grad_norm": 4.188466548919678,
"learning_rate": 7.618431271032877e-06,
"loss": 0.0253,
"step": 12150
},
{
"epoch": 3.15816722754336,
"grad_norm": 7.71252965927124,
"learning_rate": 7.604049817355539e-06,
"loss": 0.0291,
"step": 12200
},
{
"epoch": 3.171110535852964,
"grad_norm": 1.7614275217056274,
"learning_rate": 7.589668363678201e-06,
"loss": 0.0477,
"step": 12250
},
{
"epoch": 3.184053844162568,
"grad_norm": 6.398949146270752,
"learning_rate": 7.575286910000863e-06,
"loss": 0.0281,
"step": 12300
},
{
"epoch": 3.196997152472172,
"grad_norm": 5.648083686828613,
"learning_rate": 7.560905456323526e-06,
"loss": 0.0385,
"step": 12350
},
{
"epoch": 3.209940460781776,
"grad_norm": 1.6101973056793213,
"learning_rate": 7.546524002646188e-06,
"loss": 0.0291,
"step": 12400
},
{
"epoch": 3.22288376909138,
"grad_norm": 1.436286449432373,
"learning_rate": 7.532142548968851e-06,
"loss": 0.0443,
"step": 12450
},
{
"epoch": 3.235827077400984,
"grad_norm": 1.658963680267334,
"learning_rate": 7.517761095291513e-06,
"loss": 0.0371,
"step": 12500
},
{
"epoch": 3.2487703857105874,
"grad_norm": 8.697310447692871,
"learning_rate": 7.5033796416141745e-06,
"loss": 0.0299,
"step": 12550
},
{
"epoch": 3.2617136940201914,
"grad_norm": 6.439472198486328,
"learning_rate": 7.488998187936837e-06,
"loss": 0.0308,
"step": 12600
},
{
"epoch": 3.2746570023297954,
"grad_norm": 3.5517160892486572,
"learning_rate": 7.4746167342595e-06,
"loss": 0.0357,
"step": 12650
},
{
"epoch": 3.2876003106393994,
"grad_norm": 2.6896841526031494,
"learning_rate": 7.460235280582162e-06,
"loss": 0.0298,
"step": 12700
},
{
"epoch": 3.3005436189490034,
"grad_norm": 3.5411911010742188,
"learning_rate": 7.4458538269048245e-06,
"loss": 0.0249,
"step": 12750
},
{
"epoch": 3.3134869272586074,
"grad_norm": 0.766302227973938,
"learning_rate": 7.431472373227486e-06,
"loss": 0.0207,
"step": 12800
},
{
"epoch": 3.3264302355682114,
"grad_norm": 7.974555969238281,
"learning_rate": 7.417090919550148e-06,
"loss": 0.0258,
"step": 12850
},
{
"epoch": 3.339373543877815,
"grad_norm": 8.336533546447754,
"learning_rate": 7.40270946587281e-06,
"loss": 0.0335,
"step": 12900
},
{
"epoch": 3.352316852187419,
"grad_norm": 4.762045383453369,
"learning_rate": 7.38861564126902e-06,
"loss": 0.0391,
"step": 12950
},
{
"epoch": 3.365260160497023,
"grad_norm": 3.297501564025879,
"learning_rate": 7.374234187591682e-06,
"loss": 0.0427,
"step": 13000
},
{
"epoch": 3.378203468806627,
"grad_norm": 5.205377578735352,
"learning_rate": 7.359852733914344e-06,
"loss": 0.0517,
"step": 13050
},
{
"epoch": 3.391146777116231,
"grad_norm": 5.596180438995361,
"learning_rate": 7.345471280237007e-06,
"loss": 0.0422,
"step": 13100
},
{
"epoch": 3.404090085425835,
"grad_norm": 5.4441657066345215,
"learning_rate": 7.3310898265596695e-06,
"loss": 0.0367,
"step": 13150
},
{
"epoch": 3.417033393735439,
"grad_norm": 5.369819641113281,
"learning_rate": 7.316708372882331e-06,
"loss": 0.0245,
"step": 13200
},
{
"epoch": 3.429976702045043,
"grad_norm": 1.0381672382354736,
"learning_rate": 7.302326919204994e-06,
"loss": 0.0377,
"step": 13250
},
{
"epoch": 3.4429200103546465,
"grad_norm": 2.6047282218933105,
"learning_rate": 7.287945465527656e-06,
"loss": 0.0556,
"step": 13300
},
{
"epoch": 3.4558633186642504,
"grad_norm": 3.447537899017334,
"learning_rate": 7.273564011850318e-06,
"loss": 0.0331,
"step": 13350
},
{
"epoch": 3.4688066269738544,
"grad_norm": 12.265509605407715,
"learning_rate": 7.259182558172981e-06,
"loss": 0.0458,
"step": 13400
},
{
"epoch": 3.4817499352834584,
"grad_norm": 6.993318557739258,
"learning_rate": 7.244801104495643e-06,
"loss": 0.0328,
"step": 13450
},
{
"epoch": 3.4946932435930624,
"grad_norm": 1.927647590637207,
"learning_rate": 7.230419650818305e-06,
"loss": 0.036,
"step": 13500
},
{
"epoch": 3.5076365519026664,
"grad_norm": 4.639932632446289,
"learning_rate": 7.216038197140968e-06,
"loss": 0.0359,
"step": 13550
},
{
"epoch": 3.52057986021227,
"grad_norm": 6.088189125061035,
"learning_rate": 7.20165674346363e-06,
"loss": 0.0505,
"step": 13600
},
{
"epoch": 3.5335231685218744,
"grad_norm": 3.452139377593994,
"learning_rate": 7.187275289786292e-06,
"loss": 0.0334,
"step": 13650
},
{
"epoch": 3.546466476831478,
"grad_norm": 3.6713337898254395,
"learning_rate": 7.172893836108954e-06,
"loss": 0.0305,
"step": 13700
},
{
"epoch": 3.559409785141082,
"grad_norm": 4.258627891540527,
"learning_rate": 7.158512382431617e-06,
"loss": 0.0257,
"step": 13750
},
{
"epoch": 3.572353093450686,
"grad_norm": 7.825601577758789,
"learning_rate": 7.144130928754279e-06,
"loss": 0.0261,
"step": 13800
},
{
"epoch": 3.58529640176029,
"grad_norm": 4.783618927001953,
"learning_rate": 7.129749475076942e-06,
"loss": 0.0347,
"step": 13850
},
{
"epoch": 3.598239710069894,
"grad_norm": 4.300550937652588,
"learning_rate": 7.1153680213996035e-06,
"loss": 0.029,
"step": 13900
},
{
"epoch": 3.611183018379498,
"grad_norm": 10.43149471282959,
"learning_rate": 7.100986567722265e-06,
"loss": 0.0345,
"step": 13950
},
{
"epoch": 3.624126326689102,
"grad_norm": 5.455187797546387,
"learning_rate": 7.086605114044929e-06,
"loss": 0.0362,
"step": 14000
},
{
"epoch": 3.6370696349987055,
"grad_norm": 7.492135047912598,
"learning_rate": 7.072223660367591e-06,
"loss": 0.0295,
"step": 14050
},
{
"epoch": 3.6500129433083095,
"grad_norm": 5.982784271240234,
"learning_rate": 7.057842206690253e-06,
"loss": 0.0458,
"step": 14100
},
{
"epoch": 3.6629562516179135,
"grad_norm": 13.213232040405273,
"learning_rate": 7.043460753012915e-06,
"loss": 0.0299,
"step": 14150
},
{
"epoch": 3.6758995599275175,
"grad_norm": 0.33053404092788696,
"learning_rate": 7.029079299335577e-06,
"loss": 0.0272,
"step": 14200
},
{
"epoch": 3.6888428682371215,
"grad_norm": 2.8715531826019287,
"learning_rate": 7.014697845658239e-06,
"loss": 0.0406,
"step": 14250
},
{
"epoch": 3.7017861765467255,
"grad_norm": 7.417051315307617,
"learning_rate": 7.000316391980903e-06,
"loss": 0.0484,
"step": 14300
},
{
"epoch": 3.7147294848563295,
"grad_norm": 7.245136737823486,
"learning_rate": 6.9859349383035645e-06,
"loss": 0.0392,
"step": 14350
},
{
"epoch": 3.727672793165933,
"grad_norm": 6.493204116821289,
"learning_rate": 6.971553484626226e-06,
"loss": 0.0301,
"step": 14400
},
{
"epoch": 3.740616101475537,
"grad_norm": 7.734640121459961,
"learning_rate": 6.957172030948889e-06,
"loss": 0.0317,
"step": 14450
},
{
"epoch": 3.753559409785141,
"grad_norm": 2.9053096771240234,
"learning_rate": 6.942790577271551e-06,
"loss": 0.0381,
"step": 14500
},
{
"epoch": 3.766502718094745,
"grad_norm": 8.95727825164795,
"learning_rate": 6.928409123594213e-06,
"loss": 0.0301,
"step": 14550
},
{
"epoch": 3.779446026404349,
"grad_norm": 3.024991512298584,
"learning_rate": 6.914027669916876e-06,
"loss": 0.0371,
"step": 14600
},
{
"epoch": 3.792389334713953,
"grad_norm": 2.2586114406585693,
"learning_rate": 6.899646216239538e-06,
"loss": 0.0331,
"step": 14650
},
{
"epoch": 3.805332643023557,
"grad_norm": 2.5965662002563477,
"learning_rate": 6.8852647625622e-06,
"loss": 0.0266,
"step": 14700
},
{
"epoch": 3.8182759513331606,
"grad_norm": 14.025388717651367,
"learning_rate": 6.870883308884863e-06,
"loss": 0.0325,
"step": 14750
},
{
"epoch": 3.8312192596427646,
"grad_norm": 12.150018692016602,
"learning_rate": 6.856501855207525e-06,
"loss": 0.0371,
"step": 14800
},
{
"epoch": 3.8441625679523685,
"grad_norm": 5.943263530731201,
"learning_rate": 6.842120401530187e-06,
"loss": 0.029,
"step": 14850
},
{
"epoch": 3.8571058762619725,
"grad_norm": 3.0075082778930664,
"learning_rate": 6.82773894785285e-06,
"loss": 0.0423,
"step": 14900
},
{
"epoch": 3.8700491845715765,
"grad_norm": 7.091434001922607,
"learning_rate": 6.813357494175512e-06,
"loss": 0.0333,
"step": 14950
},
{
"epoch": 3.8829924928811805,
"grad_norm": 9.822648048400879,
"learning_rate": 6.798976040498174e-06,
"loss": 0.0623,
"step": 15000
},
{
"epoch": 3.8959358011907845,
"grad_norm": 4.8575663566589355,
"learning_rate": 6.784594586820837e-06,
"loss": 0.0334,
"step": 15050
},
{
"epoch": 3.908879109500388,
"grad_norm": 6.463123321533203,
"learning_rate": 6.7702131331434985e-06,
"loss": 0.0309,
"step": 15100
},
{
"epoch": 3.921822417809992,
"grad_norm": 5.0641374588012695,
"learning_rate": 6.75583167946616e-06,
"loss": 0.0332,
"step": 15150
},
{
"epoch": 3.934765726119596,
"grad_norm": 2.091432809829712,
"learning_rate": 6.741450225788824e-06,
"loss": 0.0383,
"step": 15200
},
{
"epoch": 3.9477090344292,
"grad_norm": 2.943432092666626,
"learning_rate": 6.727068772111486e-06,
"loss": 0.0541,
"step": 15250
},
{
"epoch": 3.960652342738804,
"grad_norm": 7.013586044311523,
"learning_rate": 6.712687318434148e-06,
"loss": 0.0281,
"step": 15300
},
{
"epoch": 3.973595651048408,
"grad_norm": 5.849566459655762,
"learning_rate": 6.69830586475681e-06,
"loss": 0.0343,
"step": 15350
},
{
"epoch": 3.986538959358012,
"grad_norm": 8.706452369689941,
"learning_rate": 6.683924411079472e-06,
"loss": 0.0488,
"step": 15400
},
{
"epoch": 3.9994822676676156,
"grad_norm": 3.453444004058838,
"learning_rate": 6.669542957402134e-06,
"loss": 0.0232,
"step": 15450
},
{
"epoch": 4.0,
"eval_loss": 0.03156248852610588,
"eval_runtime": 89.3284,
"eval_samples_per_second": 5.597,
"eval_steps_per_second": 0.705,
"eval_wer": 4.15660753647528,
"step": 15452
},
{
"epoch": 4.01242557597722,
"grad_norm": 2.5590574741363525,
"learning_rate": 6.655161503724798e-06,
"loss": 0.0184,
"step": 15500
},
{
"epoch": 4.025368884286824,
"grad_norm": 0.7780801057815552,
"learning_rate": 6.6407800500474595e-06,
"loss": 0.0192,
"step": 15550
},
{
"epoch": 4.038312192596428,
"grad_norm": 10.055984497070312,
"learning_rate": 6.626398596370121e-06,
"loss": 0.0238,
"step": 15600
},
{
"epoch": 4.051255500906032,
"grad_norm": 1.114243745803833,
"learning_rate": 6.612017142692784e-06,
"loss": 0.025,
"step": 15650
},
{
"epoch": 4.064198809215635,
"grad_norm": 3.681232452392578,
"learning_rate": 6.597635689015446e-06,
"loss": 0.02,
"step": 15700
},
{
"epoch": 4.07714211752524,
"grad_norm": 8.334362983703613,
"learning_rate": 6.583254235338109e-06,
"loss": 0.028,
"step": 15750
},
{
"epoch": 4.090085425834843,
"grad_norm": 12.538928985595703,
"learning_rate": 6.568872781660771e-06,
"loss": 0.0223,
"step": 15800
},
{
"epoch": 4.103028734144448,
"grad_norm": 4.050449371337891,
"learning_rate": 6.554491327983433e-06,
"loss": 0.0189,
"step": 15850
},
{
"epoch": 4.115972042454051,
"grad_norm": 1.1800466775894165,
"learning_rate": 6.540109874306095e-06,
"loss": 0.0205,
"step": 15900
},
{
"epoch": 4.128915350763656,
"grad_norm": 5.886932373046875,
"learning_rate": 6.525728420628758e-06,
"loss": 0.0184,
"step": 15950
},
{
"epoch": 4.141858659073259,
"grad_norm": 4.0201497077941895,
"learning_rate": 6.51134696695142e-06,
"loss": 0.0188,
"step": 16000
},
{
"epoch": 4.154801967382863,
"grad_norm": 1.844307780265808,
"learning_rate": 6.4969655132740824e-06,
"loss": 0.0175,
"step": 16050
},
{
"epoch": 4.167745275692467,
"grad_norm": 3.237921953201294,
"learning_rate": 6.482584059596745e-06,
"loss": 0.0243,
"step": 16100
},
{
"epoch": 4.180688584002071,
"grad_norm": 1.648651123046875,
"learning_rate": 6.468202605919407e-06,
"loss": 0.0166,
"step": 16150
},
{
"epoch": 4.193631892311675,
"grad_norm": 5.503207206726074,
"learning_rate": 6.453821152242069e-06,
"loss": 0.0216,
"step": 16200
},
{
"epoch": 4.206575200621279,
"grad_norm": 5.58413553237915,
"learning_rate": 6.439439698564731e-06,
"loss": 0.0269,
"step": 16250
},
{
"epoch": 4.219518508930883,
"grad_norm": 3.219493865966797,
"learning_rate": 6.4250582448873935e-06,
"loss": 0.0154,
"step": 16300
},
{
"epoch": 4.232461817240487,
"grad_norm": 7.937684059143066,
"learning_rate": 6.410676791210056e-06,
"loss": 0.0177,
"step": 16350
},
{
"epoch": 4.24540512555009,
"grad_norm": 4.213293552398682,
"learning_rate": 6.396295337532719e-06,
"loss": 0.0202,
"step": 16400
},
{
"epoch": 4.258348433859695,
"grad_norm": 3.651660680770874,
"learning_rate": 6.381913883855381e-06,
"loss": 0.0213,
"step": 16450
},
{
"epoch": 4.271291742169298,
"grad_norm": 5.593703746795654,
"learning_rate": 6.367532430178043e-06,
"loss": 0.0158,
"step": 16500
},
{
"epoch": 4.284235050478903,
"grad_norm": 8.395854949951172,
"learning_rate": 6.3531509765007045e-06,
"loss": 0.0203,
"step": 16550
},
{
"epoch": 4.297178358788506,
"grad_norm": 5.291663646697998,
"learning_rate": 6.338769522823367e-06,
"loss": 0.0182,
"step": 16600
},
{
"epoch": 4.310121667098111,
"grad_norm": 2.6728503704071045,
"learning_rate": 6.32438806914603e-06,
"loss": 0.0147,
"step": 16650
},
{
"epoch": 4.323064975407714,
"grad_norm": 2.701340675354004,
"learning_rate": 6.310006615468693e-06,
"loss": 0.032,
"step": 16700
},
{
"epoch": 4.336008283717318,
"grad_norm": 5.171128273010254,
"learning_rate": 6.2956251617913545e-06,
"loss": 0.0182,
"step": 16750
},
{
"epoch": 4.348951592026922,
"grad_norm": 8.16347885131836,
"learning_rate": 6.281243708114016e-06,
"loss": 0.0359,
"step": 16800
},
{
"epoch": 4.361894900336526,
"grad_norm": 6.198201656341553,
"learning_rate": 6.266862254436678e-06,
"loss": 0.0261,
"step": 16850
},
{
"epoch": 4.37483820864613,
"grad_norm": 5.718491077423096,
"learning_rate": 6.252480800759341e-06,
"loss": 0.0193,
"step": 16900
},
{
"epoch": 4.387781516955734,
"grad_norm": 8.977401733398438,
"learning_rate": 6.238099347082004e-06,
"loss": 0.021,
"step": 16950
},
{
"epoch": 4.400724825265338,
"grad_norm": 2.3729536533355713,
"learning_rate": 6.223717893404666e-06,
"loss": 0.0207,
"step": 17000
},
{
"epoch": 4.413668133574942,
"grad_norm": 3.5691659450531006,
"learning_rate": 6.209336439727328e-06,
"loss": 0.0256,
"step": 17050
},
{
"epoch": 4.426611441884546,
"grad_norm": 3.9508790969848633,
"learning_rate": 6.195242615123537e-06,
"loss": 0.0198,
"step": 17100
},
{
"epoch": 4.43955475019415,
"grad_norm": 6.141160488128662,
"learning_rate": 6.1808611614462e-06,
"loss": 0.0187,
"step": 17150
},
{
"epoch": 4.452498058503753,
"grad_norm": 0.2836528420448303,
"learning_rate": 6.166479707768862e-06,
"loss": 0.0187,
"step": 17200
},
{
"epoch": 4.465441366813358,
"grad_norm": 6.005315780639648,
"learning_rate": 6.152098254091524e-06,
"loss": 0.0261,
"step": 17250
},
{
"epoch": 4.478384675122961,
"grad_norm": 2.222322702407837,
"learning_rate": 6.137716800414186e-06,
"loss": 0.0183,
"step": 17300
},
{
"epoch": 4.491327983432566,
"grad_norm": 4.63626766204834,
"learning_rate": 6.123335346736849e-06,
"loss": 0.0209,
"step": 17350
},
{
"epoch": 4.504271291742169,
"grad_norm": 1.04603111743927,
"learning_rate": 6.108953893059511e-06,
"loss": 0.0343,
"step": 17400
},
{
"epoch": 4.517214600051773,
"grad_norm": 0.18591836094856262,
"learning_rate": 6.094572439382173e-06,
"loss": 0.0205,
"step": 17450
},
{
"epoch": 4.530157908361377,
"grad_norm": 4.46800422668457,
"learning_rate": 6.080190985704836e-06,
"loss": 0.0307,
"step": 17500
},
{
"epoch": 4.543101216670981,
"grad_norm": 2.5701541900634766,
"learning_rate": 6.065809532027498e-06,
"loss": 0.0201,
"step": 17550
},
{
"epoch": 4.556044524980585,
"grad_norm": 3.805527448654175,
"learning_rate": 6.05142807835016e-06,
"loss": 0.0247,
"step": 17600
},
{
"epoch": 4.568987833290189,
"grad_norm": 5.005966663360596,
"learning_rate": 6.037046624672822e-06,
"loss": 0.0166,
"step": 17650
},
{
"epoch": 4.581931141599793,
"grad_norm": 2.1261184215545654,
"learning_rate": 6.022665170995484e-06,
"loss": 0.0192,
"step": 17700
},
{
"epoch": 4.594874449909397,
"grad_norm": 3.359769582748413,
"learning_rate": 6.008283717318147e-06,
"loss": 0.0186,
"step": 17750
},
{
"epoch": 4.607817758219001,
"grad_norm": 4.658329010009766,
"learning_rate": 5.99390226364081e-06,
"loss": 0.0156,
"step": 17800
},
{
"epoch": 4.620761066528605,
"grad_norm": 1.4093743562698364,
"learning_rate": 5.979520809963472e-06,
"loss": 0.0249,
"step": 17850
},
{
"epoch": 4.633704374838208,
"grad_norm": 7.107546806335449,
"learning_rate": 5.9651393562861336e-06,
"loss": 0.019,
"step": 17900
},
{
"epoch": 4.646647683147813,
"grad_norm": 3.7134788036346436,
"learning_rate": 5.9507579026087954e-06,
"loss": 0.0316,
"step": 17950
},
{
"epoch": 4.659590991457416,
"grad_norm": 9.994954109191895,
"learning_rate": 5.936376448931458e-06,
"loss": 0.021,
"step": 18000
},
{
"epoch": 4.672534299767021,
"grad_norm": 5.7871527671813965,
"learning_rate": 5.921994995254121e-06,
"loss": 0.0201,
"step": 18050
},
{
"epoch": 4.685477608076624,
"grad_norm": 4.141567230224609,
"learning_rate": 5.9076135415767836e-06,
"loss": 0.0341,
"step": 18100
},
{
"epoch": 4.698420916386228,
"grad_norm": 3.703082799911499,
"learning_rate": 5.8932320878994454e-06,
"loss": 0.0209,
"step": 18150
},
{
"epoch": 4.711364224695832,
"grad_norm": 1.5418035984039307,
"learning_rate": 5.878850634222107e-06,
"loss": 0.0205,
"step": 18200
},
{
"epoch": 4.724307533005436,
"grad_norm": 2.696366310119629,
"learning_rate": 5.864469180544769e-06,
"loss": 0.0322,
"step": 18250
},
{
"epoch": 4.73725084131504,
"grad_norm": 12.889842987060547,
"learning_rate": 5.850087726867433e-06,
"loss": 0.0299,
"step": 18300
},
{
"epoch": 4.750194149624644,
"grad_norm": 14.697345733642578,
"learning_rate": 5.835706273190095e-06,
"loss": 0.021,
"step": 18350
},
{
"epoch": 4.763137457934248,
"grad_norm": 14.185206413269043,
"learning_rate": 5.821324819512757e-06,
"loss": 0.028,
"step": 18400
},
{
"epoch": 4.776080766243852,
"grad_norm": 1.6946377754211426,
"learning_rate": 5.806943365835419e-06,
"loss": 0.0407,
"step": 18450
},
{
"epoch": 4.789024074553456,
"grad_norm": 0.4647742509841919,
"learning_rate": 5.792561912158081e-06,
"loss": 0.0245,
"step": 18500
},
{
"epoch": 4.80196738286306,
"grad_norm": 0.9601898193359375,
"learning_rate": 5.778180458480743e-06,
"loss": 0.023,
"step": 18550
},
{
"epoch": 4.814910691172663,
"grad_norm": 8.700115203857422,
"learning_rate": 5.7637990048034065e-06,
"loss": 0.0197,
"step": 18600
},
{
"epoch": 4.827853999482268,
"grad_norm": 0.46070945262908936,
"learning_rate": 5.749417551126068e-06,
"loss": 0.0207,
"step": 18650
},
{
"epoch": 4.840797307791871,
"grad_norm": 3.087283134460449,
"learning_rate": 5.735036097448731e-06,
"loss": 0.0228,
"step": 18700
},
{
"epoch": 4.853740616101476,
"grad_norm": 2.9574930667877197,
"learning_rate": 5.720654643771393e-06,
"loss": 0.0251,
"step": 18750
},
{
"epoch": 4.866683924411079,
"grad_norm": 0.7309446334838867,
"learning_rate": 5.706273190094055e-06,
"loss": 0.0209,
"step": 18800
},
{
"epoch": 4.879627232720684,
"grad_norm": 3.813610553741455,
"learning_rate": 5.691891736416717e-06,
"loss": 0.0179,
"step": 18850
},
{
"epoch": 4.892570541030287,
"grad_norm": 6.62191104888916,
"learning_rate": 5.67751028273938e-06,
"loss": 0.0229,
"step": 18900
},
{
"epoch": 4.905513849339892,
"grad_norm": 1.3516626358032227,
"learning_rate": 5.663128829062042e-06,
"loss": 0.0202,
"step": 18950
},
{
"epoch": 4.918457157649495,
"grad_norm": 8.537408828735352,
"learning_rate": 5.648747375384705e-06,
"loss": 0.0219,
"step": 19000
},
{
"epoch": 4.931400465959099,
"grad_norm": 7.586127758026123,
"learning_rate": 5.634365921707367e-06,
"loss": 0.0263,
"step": 19050
},
{
"epoch": 4.944343774268703,
"grad_norm": 2.9053454399108887,
"learning_rate": 5.6199844680300286e-06,
"loss": 0.0173,
"step": 19100
},
{
"epoch": 4.957287082578307,
"grad_norm": 3.9602510929107666,
"learning_rate": 5.60560301435269e-06,
"loss": 0.0216,
"step": 19150
},
{
"epoch": 4.970230390887911,
"grad_norm": 8.381765365600586,
"learning_rate": 5.591221560675354e-06,
"loss": 0.0213,
"step": 19200
},
{
"epoch": 4.983173699197515,
"grad_norm": 3.1814215183258057,
"learning_rate": 5.577127736071563e-06,
"loss": 0.028,
"step": 19250
},
{
"epoch": 4.996117007507118,
"grad_norm": 9.668580055236816,
"learning_rate": 5.5627462823942245e-06,
"loss": 0.0346,
"step": 19300
},
{
"epoch": 5.0,
"eval_loss": 0.019977210089564323,
"eval_runtime": 88.6374,
"eval_samples_per_second": 5.641,
"eval_steps_per_second": 0.711,
"eval_wer": 3.1069591686784928,
"step": 19315
},
{
"epoch": 5.009060315816723,
"grad_norm": 0.802038848400116,
"learning_rate": 5.548364828716886e-06,
"loss": 0.0186,
"step": 19350
},
{
"epoch": 5.022003624126326,
"grad_norm": 3.1706273555755615,
"learning_rate": 5.53398337503955e-06,
"loss": 0.0236,
"step": 19400
},
{
"epoch": 5.034946932435931,
"grad_norm": 3.518181562423706,
"learning_rate": 5.519601921362212e-06,
"loss": 0.0136,
"step": 19450
},
{
"epoch": 5.047890240745534,
"grad_norm": 4.777484893798828,
"learning_rate": 5.50550809675842e-06,
"loss": 0.012,
"step": 19500
},
{
"epoch": 5.060833549055139,
"grad_norm": 16.247772216796875,
"learning_rate": 5.491126643081083e-06,
"loss": 0.0107,
"step": 19550
},
{
"epoch": 5.073776857364742,
"grad_norm": 1.9312938451766968,
"learning_rate": 5.476745189403745e-06,
"loss": 0.0086,
"step": 19600
},
{
"epoch": 5.086720165674346,
"grad_norm": 0.5916788578033447,
"learning_rate": 5.4623637357264085e-06,
"loss": 0.0128,
"step": 19650
},
{
"epoch": 5.09966347398395,
"grad_norm": 0.7647702097892761,
"learning_rate": 5.44798228204907e-06,
"loss": 0.0127,
"step": 19700
},
{
"epoch": 5.112606782293554,
"grad_norm": 1.958817481994629,
"learning_rate": 5.433600828371732e-06,
"loss": 0.0166,
"step": 19750
},
{
"epoch": 5.125550090603158,
"grad_norm": 5.420085430145264,
"learning_rate": 5.419219374694394e-06,
"loss": 0.0115,
"step": 19800
},
{
"epoch": 5.138493398912762,
"grad_norm": 4.039155960083008,
"learning_rate": 5.404837921017057e-06,
"loss": 0.0099,
"step": 19850
},
{
"epoch": 5.151436707222366,
"grad_norm": 3.975069999694824,
"learning_rate": 5.390456467339719e-06,
"loss": 0.0211,
"step": 19900
},
{
"epoch": 5.16438001553197,
"grad_norm": 2.953425168991089,
"learning_rate": 5.376075013662381e-06,
"loss": 0.0131,
"step": 19950
},
{
"epoch": 5.1773233238415735,
"grad_norm": 0.4892643392086029,
"learning_rate": 5.361693559985044e-06,
"loss": 0.0107,
"step": 20000
},
{
"epoch": 5.190266632151178,
"grad_norm": 0.9816207885742188,
"learning_rate": 5.347312106307706e-06,
"loss": 0.0119,
"step": 20050
},
{
"epoch": 5.2032099404607814,
"grad_norm": 0.6787395477294922,
"learning_rate": 5.332930652630368e-06,
"loss": 0.0107,
"step": 20100
},
{
"epoch": 5.216153248770386,
"grad_norm": 2.689342737197876,
"learning_rate": 5.318549198953031e-06,
"loss": 0.0118,
"step": 20150
},
{
"epoch": 5.229096557079989,
"grad_norm": 1.7226523160934448,
"learning_rate": 5.304167745275693e-06,
"loss": 0.0147,
"step": 20200
},
{
"epoch": 5.242039865389594,
"grad_norm": 0.6867370009422302,
"learning_rate": 5.289786291598355e-06,
"loss": 0.0091,
"step": 20250
},
{
"epoch": 5.254983173699197,
"grad_norm": 2.8953654766082764,
"learning_rate": 5.275404837921018e-06,
"loss": 0.0105,
"step": 20300
},
{
"epoch": 5.267926482008802,
"grad_norm": 6.105691432952881,
"learning_rate": 5.26102338424368e-06,
"loss": 0.0114,
"step": 20350
},
{
"epoch": 5.280869790318405,
"grad_norm": 2.8763232231140137,
"learning_rate": 5.246641930566342e-06,
"loss": 0.0152,
"step": 20400
},
{
"epoch": 5.293813098628009,
"grad_norm": 0.7701404094696045,
"learning_rate": 5.232260476889004e-06,
"loss": 0.0136,
"step": 20450
},
{
"epoch": 5.306756406937613,
"grad_norm": 5.06765604019165,
"learning_rate": 5.217879023211667e-06,
"loss": 0.0118,
"step": 20500
},
{
"epoch": 5.319699715247217,
"grad_norm": 0.8552833795547485,
"learning_rate": 5.203497569534329e-06,
"loss": 0.0152,
"step": 20550
},
{
"epoch": 5.332643023556821,
"grad_norm": 8.169344902038574,
"learning_rate": 5.189116115856992e-06,
"loss": 0.0137,
"step": 20600
},
{
"epoch": 5.345586331866425,
"grad_norm": 2.8536713123321533,
"learning_rate": 5.1747346621796535e-06,
"loss": 0.0179,
"step": 20650
},
{
"epoch": 5.358529640176029,
"grad_norm": 4.006629943847656,
"learning_rate": 5.160353208502315e-06,
"loss": 0.0148,
"step": 20700
},
{
"epoch": 5.371472948485633,
"grad_norm": 2.0308613777160645,
"learning_rate": 5.145971754824978e-06,
"loss": 0.0096,
"step": 20750
},
{
"epoch": 5.3844162567952365,
"grad_norm": 0.8264743089675903,
"learning_rate": 5.131590301147641e-06,
"loss": 0.012,
"step": 20800
},
{
"epoch": 5.397359565104841,
"grad_norm": 1.9350370168685913,
"learning_rate": 5.117208847470303e-06,
"loss": 0.021,
"step": 20850
},
{
"epoch": 5.4103028734144445,
"grad_norm": 3.08841872215271,
"learning_rate": 5.102827393792965e-06,
"loss": 0.01,
"step": 20900
},
{
"epoch": 5.423246181724049,
"grad_norm": 3.0373833179473877,
"learning_rate": 5.088445940115627e-06,
"loss": 0.0123,
"step": 20950
},
{
"epoch": 5.4361894900336525,
"grad_norm": 0.12145110964775085,
"learning_rate": 5.074064486438289e-06,
"loss": 0.0119,
"step": 21000
},
{
"epoch": 5.449132798343257,
"grad_norm": 2.9393069744110107,
"learning_rate": 5.059683032760951e-06,
"loss": 0.0115,
"step": 21050
},
{
"epoch": 5.4620761066528605,
"grad_norm": 0.9441426992416382,
"learning_rate": 5.0453015790836146e-06,
"loss": 0.0127,
"step": 21100
},
{
"epoch": 5.475019414962464,
"grad_norm": 1.0003094673156738,
"learning_rate": 5.030920125406276e-06,
"loss": 0.0166,
"step": 21150
},
{
"epoch": 5.4879627232720685,
"grad_norm": 3.650327682495117,
"learning_rate": 5.016538671728939e-06,
"loss": 0.012,
"step": 21200
},
{
"epoch": 5.500906031581672,
"grad_norm": 2.2948007583618164,
"learning_rate": 5.002157218051601e-06,
"loss": 0.0204,
"step": 21250
},
{
"epoch": 5.5138493398912765,
"grad_norm": 1.089821219444275,
"learning_rate": 4.987775764374264e-06,
"loss": 0.0087,
"step": 21300
},
{
"epoch": 5.52679264820088,
"grad_norm": 2.964750051498413,
"learning_rate": 4.973394310696926e-06,
"loss": 0.009,
"step": 21350
},
{
"epoch": 5.5397359565104844,
"grad_norm": 2.492793083190918,
"learning_rate": 4.9590128570195875e-06,
"loss": 0.013,
"step": 21400
},
{
"epoch": 5.552679264820088,
"grad_norm": 18.94529151916504,
"learning_rate": 4.94463140334225e-06,
"loss": 0.0217,
"step": 21450
},
{
"epoch": 5.565622573129692,
"grad_norm": 0.21670395135879517,
"learning_rate": 4.930249949664913e-06,
"loss": 0.0289,
"step": 21500
},
{
"epoch": 5.578565881439296,
"grad_norm": 1.121397852897644,
"learning_rate": 4.915868495987575e-06,
"loss": 0.0089,
"step": 21550
},
{
"epoch": 5.5915091897488995,
"grad_norm": 2.1545515060424805,
"learning_rate": 4.9014870423102375e-06,
"loss": 0.015,
"step": 21600
},
{
"epoch": 5.604452498058504,
"grad_norm": 13.737982749938965,
"learning_rate": 4.887105588632899e-06,
"loss": 0.0217,
"step": 21650
},
{
"epoch": 5.6173958063681075,
"grad_norm": 3.22420072555542,
"learning_rate": 4.872724134955561e-06,
"loss": 0.0095,
"step": 21700
},
{
"epoch": 5.630339114677712,
"grad_norm": 2.609224557876587,
"learning_rate": 4.858342681278224e-06,
"loss": 0.0156,
"step": 21750
},
{
"epoch": 5.6432824229873155,
"grad_norm": 4.490423679351807,
"learning_rate": 4.843961227600887e-06,
"loss": 0.0101,
"step": 21800
},
{
"epoch": 5.656225731296919,
"grad_norm": 3.0184295177459717,
"learning_rate": 4.8295797739235485e-06,
"loss": 0.0126,
"step": 21850
},
{
"epoch": 5.6691690396065235,
"grad_norm": 4.014559745788574,
"learning_rate": 4.815198320246211e-06,
"loss": 0.0126,
"step": 21900
},
{
"epoch": 5.682112347916127,
"grad_norm": 7.090182304382324,
"learning_rate": 4.800816866568873e-06,
"loss": 0.0182,
"step": 21950
},
{
"epoch": 5.6950556562257315,
"grad_norm": 3.3935253620147705,
"learning_rate": 4.786435412891535e-06,
"loss": 0.0155,
"step": 22000
},
{
"epoch": 5.707998964535335,
"grad_norm": 3.5761795043945312,
"learning_rate": 4.772053959214198e-06,
"loss": 0.0102,
"step": 22050
},
{
"epoch": 5.7209422728449395,
"grad_norm": 2.370244026184082,
"learning_rate": 4.75767250553686e-06,
"loss": 0.0206,
"step": 22100
},
{
"epoch": 5.733885581154543,
"grad_norm": 0.3268122971057892,
"learning_rate": 4.743291051859522e-06,
"loss": 0.0143,
"step": 22150
},
{
"epoch": 5.7468288894641475,
"grad_norm": 1.703277587890625,
"learning_rate": 4.728909598182185e-06,
"loss": 0.0143,
"step": 22200
},
{
"epoch": 5.759772197773751,
"grad_norm": 2.766359329223633,
"learning_rate": 4.714528144504847e-06,
"loss": 0.0114,
"step": 22250
},
{
"epoch": 5.772715506083355,
"grad_norm": 0.3259863555431366,
"learning_rate": 4.700146690827509e-06,
"loss": 0.0114,
"step": 22300
},
{
"epoch": 5.785658814392959,
"grad_norm": 12.3453369140625,
"learning_rate": 4.685765237150171e-06,
"loss": 0.0109,
"step": 22350
},
{
"epoch": 5.798602122702563,
"grad_norm": 4.0501179695129395,
"learning_rate": 4.671383783472834e-06,
"loss": 0.0134,
"step": 22400
},
{
"epoch": 5.811545431012167,
"grad_norm": 3.24855637550354,
"learning_rate": 4.657002329795496e-06,
"loss": 0.015,
"step": 22450
},
{
"epoch": 5.824488739321771,
"grad_norm": 4.302082061767578,
"learning_rate": 4.642620876118159e-06,
"loss": 0.0272,
"step": 22500
},
{
"epoch": 5.837432047631374,
"grad_norm": 6.086068630218506,
"learning_rate": 4.628239422440821e-06,
"loss": 0.0134,
"step": 22550
},
{
"epoch": 5.850375355940979,
"grad_norm": 0.44153615832328796,
"learning_rate": 4.613857968763483e-06,
"loss": 0.0168,
"step": 22600
},
{
"epoch": 5.863318664250582,
"grad_norm": 1.0860470533370972,
"learning_rate": 4.599476515086145e-06,
"loss": 0.0118,
"step": 22650
},
{
"epoch": 5.876261972560187,
"grad_norm": 3.069711923599243,
"learning_rate": 4.585095061408807e-06,
"loss": 0.0111,
"step": 22700
},
{
"epoch": 5.88920528086979,
"grad_norm": 4.1225504875183105,
"learning_rate": 4.57071360773147e-06,
"loss": 0.0118,
"step": 22750
},
{
"epoch": 5.9021485891793946,
"grad_norm": 1.6695607900619507,
"learning_rate": 4.5563321540541325e-06,
"loss": 0.0087,
"step": 22800
},
{
"epoch": 5.915091897488998,
"grad_norm": 3.2536139488220215,
"learning_rate": 4.541950700376794e-06,
"loss": 0.0143,
"step": 22850
},
{
"epoch": 5.9280352057986025,
"grad_norm": 1.4655452966690063,
"learning_rate": 4.527569246699457e-06,
"loss": 0.0152,
"step": 22900
},
{
"epoch": 5.940978514108206,
"grad_norm": 4.390911102294922,
"learning_rate": 4.513187793022119e-06,
"loss": 0.0117,
"step": 22950
},
{
"epoch": 5.95392182241781,
"grad_norm": 0.6409261226654053,
"learning_rate": 4.498806339344781e-06,
"loss": 0.014,
"step": 23000
},
{
"epoch": 5.966865130727414,
"grad_norm": 9.11255931854248,
"learning_rate": 4.4844248856674435e-06,
"loss": 0.0135,
"step": 23050
},
{
"epoch": 5.979808439037018,
"grad_norm": 3.790682554244995,
"learning_rate": 4.470043431990106e-06,
"loss": 0.0109,
"step": 23100
},
{
"epoch": 5.992751747346622,
"grad_norm": 6.534693241119385,
"learning_rate": 4.455661978312768e-06,
"loss": 0.0138,
"step": 23150
},
{
"epoch": 6.0,
"eval_loss": 0.013288498856127262,
"eval_runtime": 88.7687,
"eval_samples_per_second": 5.633,
"eval_steps_per_second": 0.71,
"eval_wer": 2.2567439907630944,
"step": 23178
},
{
"epoch": 6.005695055656226,
"grad_norm": 0.7158689498901367,
"learning_rate": 4.441280524635431e-06,
"loss": 0.0119,
"step": 23200
},
{
"epoch": 6.01863836396583,
"grad_norm": 2.5632243156433105,
"learning_rate": 4.426899070958093e-06,
"loss": 0.008,
"step": 23250
},
{
"epoch": 6.031581672275434,
"grad_norm": 1.491152286529541,
"learning_rate": 4.4125176172807545e-06,
"loss": 0.0084,
"step": 23300
},
{
"epoch": 6.044524980585037,
"grad_norm": 0.8352173566818237,
"learning_rate": 4.398136163603417e-06,
"loss": 0.0121,
"step": 23350
},
{
"epoch": 6.057468288894642,
"grad_norm": 0.7601485848426819,
"learning_rate": 4.38375470992608e-06,
"loss": 0.0084,
"step": 23400
},
{
"epoch": 6.070411597204245,
"grad_norm": 1.012165904045105,
"learning_rate": 4.369373256248742e-06,
"loss": 0.0149,
"step": 23450
},
{
"epoch": 6.08335490551385,
"grad_norm": 0.7509778141975403,
"learning_rate": 4.3549918025714045e-06,
"loss": 0.0045,
"step": 23500
},
{
"epoch": 6.096298213823453,
"grad_norm": 6.754235744476318,
"learning_rate": 4.340610348894066e-06,
"loss": 0.0057,
"step": 23550
},
{
"epoch": 6.109241522133058,
"grad_norm": 0.37281331419944763,
"learning_rate": 4.326228895216728e-06,
"loss": 0.0057,
"step": 23600
},
{
"epoch": 6.122184830442661,
"grad_norm": 0.06306509673595428,
"learning_rate": 4.311847441539391e-06,
"loss": 0.0069,
"step": 23650
},
{
"epoch": 6.135128138752265,
"grad_norm": 0.4581661522388458,
"learning_rate": 4.297465987862054e-06,
"loss": 0.0055,
"step": 23700
},
{
"epoch": 6.148071447061869,
"grad_norm": 0.8070671558380127,
"learning_rate": 4.2830845341847156e-06,
"loss": 0.0087,
"step": 23750
},
{
"epoch": 6.161014755371473,
"grad_norm": 0.4274216294288635,
"learning_rate": 4.268703080507378e-06,
"loss": 0.016,
"step": 23800
},
{
"epoch": 6.173958063681077,
"grad_norm": 0.6105465292930603,
"learning_rate": 4.25432162683004e-06,
"loss": 0.0063,
"step": 23850
},
{
"epoch": 6.186901371990681,
"grad_norm": 0.4398168921470642,
"learning_rate": 4.239940173152703e-06,
"loss": 0.0052,
"step": 23900
},
{
"epoch": 6.199844680300285,
"grad_norm": 2.373279333114624,
"learning_rate": 4.225558719475365e-06,
"loss": 0.008,
"step": 23950
},
{
"epoch": 6.212787988609889,
"grad_norm": 2.742097854614258,
"learning_rate": 4.2111772657980275e-06,
"loss": 0.0062,
"step": 24000
},
{
"epoch": 6.225731296919492,
"grad_norm": 1.6418052911758423,
"learning_rate": 4.19679581212069e-06,
"loss": 0.0056,
"step": 24050
},
{
"epoch": 6.238674605229097,
"grad_norm": 1.0858538150787354,
"learning_rate": 4.182414358443352e-06,
"loss": 0.0047,
"step": 24100
},
{
"epoch": 6.2516179135387,
"grad_norm": 1.6831880807876587,
"learning_rate": 4.168032904766014e-06,
"loss": 0.0045,
"step": 24150
},
{
"epoch": 6.264561221848305,
"grad_norm": 0.5000109076499939,
"learning_rate": 4.153651451088677e-06,
"loss": 0.01,
"step": 24200
},
{
"epoch": 6.277504530157908,
"grad_norm": 0.6169405579566956,
"learning_rate": 4.1392699974113385e-06,
"loss": 0.008,
"step": 24250
},
{
"epoch": 6.290447838467513,
"grad_norm": 1.9843913316726685,
"learning_rate": 4.124888543734001e-06,
"loss": 0.0095,
"step": 24300
},
{
"epoch": 6.303391146777116,
"grad_norm": 1.331559419631958,
"learning_rate": 4.110507090056663e-06,
"loss": 0.0073,
"step": 24350
},
{
"epoch": 6.31633445508672,
"grad_norm": 0.6855106949806213,
"learning_rate": 4.096125636379326e-06,
"loss": 0.0054,
"step": 24400
},
{
"epoch": 6.329277763396324,
"grad_norm": 0.35360315442085266,
"learning_rate": 4.081744182701988e-06,
"loss": 0.006,
"step": 24450
},
{
"epoch": 6.342221071705928,
"grad_norm": 1.6724082231521606,
"learning_rate": 4.06736272902465e-06,
"loss": 0.0032,
"step": 24500
},
{
"epoch": 6.355164380015532,
"grad_norm": 1.0881156921386719,
"learning_rate": 4.052981275347312e-06,
"loss": 0.0052,
"step": 24550
},
{
"epoch": 6.368107688325136,
"grad_norm": 0.6318166255950928,
"learning_rate": 4.038599821669975e-06,
"loss": 0.0081,
"step": 24600
},
{
"epoch": 6.38105099663474,
"grad_norm": 3.3334782123565674,
"learning_rate": 4.024218367992637e-06,
"loss": 0.0121,
"step": 24650
},
{
"epoch": 6.393994304944344,
"grad_norm": 0.2339646816253662,
"learning_rate": 4.0098369143152995e-06,
"loss": 0.0072,
"step": 24700
},
{
"epoch": 6.406937613253948,
"grad_norm": 1.0727429389953613,
"learning_rate": 3.995743089711508e-06,
"loss": 0.0074,
"step": 24750
},
{
"epoch": 6.419880921563552,
"grad_norm": 5.548860549926758,
"learning_rate": 3.981649265107718e-06,
"loss": 0.0072,
"step": 24800
},
{
"epoch": 6.432824229873155,
"grad_norm": 0.37892910838127136,
"learning_rate": 3.9672678114303795e-06,
"loss": 0.0105,
"step": 24850
},
{
"epoch": 6.44576753818276,
"grad_norm": 0.9245821237564087,
"learning_rate": 3.952886357753042e-06,
"loss": 0.0059,
"step": 24900
},
{
"epoch": 6.458710846492363,
"grad_norm": 1.4176559448242188,
"learning_rate": 3.938504904075705e-06,
"loss": 0.005,
"step": 24950
},
{
"epoch": 6.471654154801968,
"grad_norm": 2.894819736480713,
"learning_rate": 3.924123450398367e-06,
"loss": 0.0068,
"step": 25000
},
{
"epoch": 6.484597463111571,
"grad_norm": 1.3159211874008179,
"learning_rate": 3.909741996721029e-06,
"loss": 0.0068,
"step": 25050
},
{
"epoch": 6.497540771421175,
"grad_norm": 1.8089011907577515,
"learning_rate": 3.895360543043691e-06,
"loss": 0.0067,
"step": 25100
},
{
"epoch": 6.510484079730779,
"grad_norm": 1.673920750617981,
"learning_rate": 3.880979089366353e-06,
"loss": 0.0109,
"step": 25150
},
{
"epoch": 6.523427388040383,
"grad_norm": 0.7830101251602173,
"learning_rate": 3.866597635689016e-06,
"loss": 0.0055,
"step": 25200
},
{
"epoch": 6.536370696349987,
"grad_norm": 1.6252669095993042,
"learning_rate": 3.852216182011679e-06,
"loss": 0.0066,
"step": 25250
},
{
"epoch": 6.549314004659591,
"grad_norm": 2.7717552185058594,
"learning_rate": 3.8378347283343405e-06,
"loss": 0.0059,
"step": 25300
},
{
"epoch": 6.562257312969195,
"grad_norm": 7.495051383972168,
"learning_rate": 3.823453274657002e-06,
"loss": 0.015,
"step": 25350
},
{
"epoch": 6.575200621278799,
"grad_norm": 0.43886011838912964,
"learning_rate": 3.809071820979665e-06,
"loss": 0.0046,
"step": 25400
},
{
"epoch": 6.588143929588403,
"grad_norm": 0.8297833204269409,
"learning_rate": 3.7946903673023274e-06,
"loss": 0.0113,
"step": 25450
},
{
"epoch": 6.601087237898007,
"grad_norm": 3.7396538257598877,
"learning_rate": 3.7803089136249893e-06,
"loss": 0.0063,
"step": 25500
},
{
"epoch": 6.61403054620761,
"grad_norm": 3.463552236557007,
"learning_rate": 3.765927459947652e-06,
"loss": 0.0065,
"step": 25550
},
{
"epoch": 6.626973854517215,
"grad_norm": 6.3341450691223145,
"learning_rate": 3.7515460062703143e-06,
"loss": 0.0076,
"step": 25600
},
{
"epoch": 6.639917162826818,
"grad_norm": 1.700925350189209,
"learning_rate": 3.737164552592976e-06,
"loss": 0.0054,
"step": 25650
},
{
"epoch": 6.652860471136423,
"grad_norm": 6.3853044509887695,
"learning_rate": 3.722783098915639e-06,
"loss": 0.0097,
"step": 25700
},
{
"epoch": 6.665803779446026,
"grad_norm": 6.10149621963501,
"learning_rate": 3.7084016452383007e-06,
"loss": 0.0082,
"step": 25750
},
{
"epoch": 6.67874708775563,
"grad_norm": 1.308225393295288,
"learning_rate": 3.694020191560963e-06,
"loss": 0.0099,
"step": 25800
},
{
"epoch": 6.691690396065234,
"grad_norm": 4.188955307006836,
"learning_rate": 3.6796387378836257e-06,
"loss": 0.0112,
"step": 25850
},
{
"epoch": 6.704633704374838,
"grad_norm": 1.8746811151504517,
"learning_rate": 3.6652572842062876e-06,
"loss": 0.0118,
"step": 25900
},
{
"epoch": 6.717577012684442,
"grad_norm": 0.3944805860519409,
"learning_rate": 3.65087583052895e-06,
"loss": 0.0063,
"step": 25950
},
{
"epoch": 6.730520320994046,
"grad_norm": 1.3446645736694336,
"learning_rate": 3.6364943768516126e-06,
"loss": 0.0292,
"step": 26000
},
{
"epoch": 6.74346362930365,
"grad_norm": 10.001498222351074,
"learning_rate": 3.6221129231742745e-06,
"loss": 0.0078,
"step": 26050
},
{
"epoch": 6.756406937613254,
"grad_norm": 3.632220983505249,
"learning_rate": 3.607731469496937e-06,
"loss": 0.0044,
"step": 26100
},
{
"epoch": 6.769350245922858,
"grad_norm": 4.4222259521484375,
"learning_rate": 3.5933500158195995e-06,
"loss": 0.0093,
"step": 26150
},
{
"epoch": 6.782293554232462,
"grad_norm": 1.0133709907531738,
"learning_rate": 3.5789685621422614e-06,
"loss": 0.0072,
"step": 26200
},
{
"epoch": 6.795236862542065,
"grad_norm": 0.1933288425207138,
"learning_rate": 3.564587108464924e-06,
"loss": 0.0063,
"step": 26250
},
{
"epoch": 6.80818017085167,
"grad_norm": 1.596628189086914,
"learning_rate": 3.5502056547875864e-06,
"loss": 0.0055,
"step": 26300
},
{
"epoch": 6.821123479161273,
"grad_norm": 0.2668837010860443,
"learning_rate": 3.5358242011102482e-06,
"loss": 0.0059,
"step": 26350
},
{
"epoch": 6.834066787470878,
"grad_norm": 1.4524122476577759,
"learning_rate": 3.521442747432911e-06,
"loss": 0.0144,
"step": 26400
},
{
"epoch": 6.847010095780481,
"grad_norm": 0.7154669761657715,
"learning_rate": 3.5070612937555732e-06,
"loss": 0.0083,
"step": 26450
},
{
"epoch": 6.859953404090086,
"grad_norm": 3.9259557723999023,
"learning_rate": 3.492679840078235e-06,
"loss": 0.0054,
"step": 26500
},
{
"epoch": 6.872896712399689,
"grad_norm": 2.91253924369812,
"learning_rate": 3.478298386400898e-06,
"loss": 0.0058,
"step": 26550
},
{
"epoch": 6.885840020709293,
"grad_norm": 0.6866968870162964,
"learning_rate": 3.46391693272356e-06,
"loss": 0.0089,
"step": 26600
},
{
"epoch": 6.898783329018897,
"grad_norm": 0.4900106191635132,
"learning_rate": 3.449535479046222e-06,
"loss": 0.015,
"step": 26650
},
{
"epoch": 6.911726637328501,
"grad_norm": 0.8514009118080139,
"learning_rate": 3.4351540253688847e-06,
"loss": 0.0071,
"step": 26700
},
{
"epoch": 6.924669945638105,
"grad_norm": 1.1547664403915405,
"learning_rate": 3.420772571691547e-06,
"loss": 0.007,
"step": 26750
},
{
"epoch": 6.937613253947709,
"grad_norm": 1.6039056777954102,
"learning_rate": 3.406391118014209e-06,
"loss": 0.0049,
"step": 26800
},
{
"epoch": 6.950556562257313,
"grad_norm": 0.2261231243610382,
"learning_rate": 3.3920096643368716e-06,
"loss": 0.006,
"step": 26850
},
{
"epoch": 6.963499870566917,
"grad_norm": 2.033464193344116,
"learning_rate": 3.377628210659534e-06,
"loss": 0.0076,
"step": 26900
},
{
"epoch": 6.97644317887652,
"grad_norm": 2.289121150970459,
"learning_rate": 3.3632467569821957e-06,
"loss": 0.0114,
"step": 26950
},
{
"epoch": 6.989386487186125,
"grad_norm": 1.8845149278640747,
"learning_rate": 3.3488653033048584e-06,
"loss": 0.0067,
"step": 27000
},
{
"epoch": 7.0,
"eval_loss": 0.0072191799990832806,
"eval_runtime": 89.4574,
"eval_samples_per_second": 5.589,
"eval_steps_per_second": 0.704,
"eval_wer": 2.004828382491865,
"step": 27041
},
{
"epoch": 7.002329795495728,
"grad_norm": 0.09408234804868698,
"learning_rate": 3.3344838496275207e-06,
"loss": 0.0059,
"step": 27050
},
{
"epoch": 7.015273103805333,
"grad_norm": 0.22423428297042847,
"learning_rate": 3.3201023959501826e-06,
"loss": 0.0033,
"step": 27100
},
{
"epoch": 7.028216412114936,
"grad_norm": 0.37666457891464233,
"learning_rate": 3.3057209422728453e-06,
"loss": 0.0047,
"step": 27150
},
{
"epoch": 7.041159720424541,
"grad_norm": 0.46821069717407227,
"learning_rate": 3.2913394885955076e-06,
"loss": 0.0023,
"step": 27200
},
{
"epoch": 7.054103028734144,
"grad_norm": 0.20124687254428864,
"learning_rate": 3.2769580349181695e-06,
"loss": 0.0042,
"step": 27250
},
{
"epoch": 7.067046337043748,
"grad_norm": 0.15090468525886536,
"learning_rate": 3.262576581240832e-06,
"loss": 0.0026,
"step": 27300
},
{
"epoch": 7.079989645353352,
"grad_norm": 0.24649310111999512,
"learning_rate": 3.2481951275634945e-06,
"loss": 0.0104,
"step": 27350
},
{
"epoch": 7.092932953662956,
"grad_norm": 0.5062503814697266,
"learning_rate": 3.2338136738861563e-06,
"loss": 0.0044,
"step": 27400
},
{
"epoch": 7.10587626197256,
"grad_norm": 1.3508222103118896,
"learning_rate": 3.219432220208819e-06,
"loss": 0.0038,
"step": 27450
},
{
"epoch": 7.118819570282164,
"grad_norm": 0.41208532452583313,
"learning_rate": 3.2050507665314814e-06,
"loss": 0.0039,
"step": 27500
},
{
"epoch": 7.131762878591768,
"grad_norm": 0.5694621801376343,
"learning_rate": 3.1906693128541436e-06,
"loss": 0.0051,
"step": 27550
},
{
"epoch": 7.144706186901372,
"grad_norm": 0.21710887551307678,
"learning_rate": 3.176287859176806e-06,
"loss": 0.003,
"step": 27600
},
{
"epoch": 7.1576494952109755,
"grad_norm": 18.37504005432129,
"learning_rate": 3.1619064054994682e-06,
"loss": 0.0074,
"step": 27650
},
{
"epoch": 7.17059280352058,
"grad_norm": 0.10800693929195404,
"learning_rate": 3.1475249518221305e-06,
"loss": 0.0098,
"step": 27700
},
{
"epoch": 7.1835361118301835,
"grad_norm": 0.9992019534111023,
"learning_rate": 3.133143498144793e-06,
"loss": 0.0037,
"step": 27750
},
{
"epoch": 7.196479420139788,
"grad_norm": 0.5260515213012695,
"learning_rate": 3.118762044467455e-06,
"loss": 0.0059,
"step": 27800
},
{
"epoch": 7.2094227284493915,
"grad_norm": 0.16195982694625854,
"learning_rate": 3.1043805907901174e-06,
"loss": 0.0033,
"step": 27850
},
{
"epoch": 7.222366036758996,
"grad_norm": 0.0777490958571434,
"learning_rate": 3.0899991371127797e-06,
"loss": 0.0031,
"step": 27900
},
{
"epoch": 7.2353093450685995,
"grad_norm": 2.360994577407837,
"learning_rate": 3.0756176834354416e-06,
"loss": 0.0042,
"step": 27950
},
{
"epoch": 7.248252653378204,
"grad_norm": 0.7095078825950623,
"learning_rate": 3.0612362297581043e-06,
"loss": 0.0024,
"step": 28000
},
{
"epoch": 7.2611959616878075,
"grad_norm": 0.8493836522102356,
"learning_rate": 3.0468547760807666e-06,
"loss": 0.0061,
"step": 28050
},
{
"epoch": 7.274139269997411,
"grad_norm": 3.106424570083618,
"learning_rate": 3.0324733224034284e-06,
"loss": 0.0025,
"step": 28100
},
{
"epoch": 7.2870825783070154,
"grad_norm": 0.9348524808883667,
"learning_rate": 3.018091868726091e-06,
"loss": 0.003,
"step": 28150
},
{
"epoch": 7.300025886616619,
"grad_norm": 4.065819263458252,
"learning_rate": 3.0037104150487534e-06,
"loss": 0.0026,
"step": 28200
},
{
"epoch": 7.312969194926223,
"grad_norm": 0.16182902455329895,
"learning_rate": 2.9893289613714153e-06,
"loss": 0.0035,
"step": 28250
},
{
"epoch": 7.325912503235827,
"grad_norm": 0.36588823795318604,
"learning_rate": 2.974947507694078e-06,
"loss": 0.0042,
"step": 28300
},
{
"epoch": 7.3388558115454305,
"grad_norm": 0.47314420342445374,
"learning_rate": 2.9605660540167403e-06,
"loss": 0.0033,
"step": 28350
},
{
"epoch": 7.351799119855035,
"grad_norm": 7.304609298706055,
"learning_rate": 2.946184600339402e-06,
"loss": 0.0028,
"step": 28400
},
{
"epoch": 7.3647424281646385,
"grad_norm": 0.591309130191803,
"learning_rate": 2.931803146662065e-06,
"loss": 0.0057,
"step": 28450
},
{
"epoch": 7.377685736474243,
"grad_norm": 0.7642752528190613,
"learning_rate": 2.917421692984727e-06,
"loss": 0.0038,
"step": 28500
},
{
"epoch": 7.3906290447838465,
"grad_norm": 0.9871138334274292,
"learning_rate": 2.903040239307389e-06,
"loss": 0.0038,
"step": 28550
},
{
"epoch": 7.403572353093451,
"grad_norm": 0.7224917411804199,
"learning_rate": 2.8886587856300518e-06,
"loss": 0.0057,
"step": 28600
},
{
"epoch": 7.4165156614030545,
"grad_norm": 0.7096822261810303,
"learning_rate": 2.874277331952714e-06,
"loss": 0.0031,
"step": 28650
},
{
"epoch": 7.429458969712659,
"grad_norm": 1.5942363739013672,
"learning_rate": 2.859895878275376e-06,
"loss": 0.0041,
"step": 28700
},
{
"epoch": 7.4424022780222625,
"grad_norm": 0.6390734910964966,
"learning_rate": 2.8455144245980386e-06,
"loss": 0.0031,
"step": 28750
},
{
"epoch": 7.455345586331866,
"grad_norm": 0.8184775114059448,
"learning_rate": 2.831132970920701e-06,
"loss": 0.0088,
"step": 28800
},
{
"epoch": 7.4682888946414705,
"grad_norm": 0.07598695158958435,
"learning_rate": 2.816751517243363e-06,
"loss": 0.0033,
"step": 28850
},
{
"epoch": 7.481232202951074,
"grad_norm": 0.4833034873008728,
"learning_rate": 2.8023700635660255e-06,
"loss": 0.0045,
"step": 28900
},
{
"epoch": 7.4941755112606785,
"grad_norm": 0.30186623334884644,
"learning_rate": 2.787988609888688e-06,
"loss": 0.0053,
"step": 28950
},
{
"epoch": 7.507118819570282,
"grad_norm": 0.9921897053718567,
"learning_rate": 2.7736071562113497e-06,
"loss": 0.0026,
"step": 29000
},
{
"epoch": 7.5200621278798865,
"grad_norm": 2.7632157802581787,
"learning_rate": 2.7592257025340124e-06,
"loss": 0.0123,
"step": 29050
},
{
"epoch": 7.53300543618949,
"grad_norm": 0.11869651824235916,
"learning_rate": 2.7448442488566747e-06,
"loss": 0.0027,
"step": 29100
},
{
"epoch": 7.545948744499094,
"grad_norm": 0.14628329873085022,
"learning_rate": 2.7304627951793374e-06,
"loss": 0.003,
"step": 29150
},
{
"epoch": 7.558892052808698,
"grad_norm": 0.4393390715122223,
"learning_rate": 2.7160813415019993e-06,
"loss": 0.0026,
"step": 29200
},
{
"epoch": 7.571835361118302,
"grad_norm": 2.097261428833008,
"learning_rate": 2.7016998878246616e-06,
"loss": 0.0026,
"step": 29250
},
{
"epoch": 7.584778669427906,
"grad_norm": 0.23214209079742432,
"learning_rate": 2.6873184341473243e-06,
"loss": 0.0152,
"step": 29300
},
{
"epoch": 7.59772197773751,
"grad_norm": 6.099156379699707,
"learning_rate": 2.672936980469986e-06,
"loss": 0.0061,
"step": 29350
},
{
"epoch": 7.610665286047114,
"grad_norm": 1.0887069702148438,
"learning_rate": 2.6585555267926484e-06,
"loss": 0.0119,
"step": 29400
},
{
"epoch": 7.623608594356718,
"grad_norm": 1.0223588943481445,
"learning_rate": 2.644174073115311e-06,
"loss": 0.0027,
"step": 29450
},
{
"epoch": 7.636551902666321,
"grad_norm": 6.282520771026611,
"learning_rate": 2.629792619437973e-06,
"loss": 0.0035,
"step": 29500
},
{
"epoch": 7.6494952109759256,
"grad_norm": 0.21501053869724274,
"learning_rate": 2.6154111657606353e-06,
"loss": 0.009,
"step": 29550
},
{
"epoch": 7.662438519285529,
"grad_norm": 1.1203105449676514,
"learning_rate": 2.6013173411568443e-06,
"loss": 0.006,
"step": 29600
},
{
"epoch": 7.6753818275951335,
"grad_norm": 0.24988429248332977,
"learning_rate": 2.586935887479506e-06,
"loss": 0.0026,
"step": 29650
},
{
"epoch": 7.688325135904737,
"grad_norm": 0.8392144441604614,
"learning_rate": 2.572554433802169e-06,
"loss": 0.0047,
"step": 29700
},
{
"epoch": 7.7012684442143415,
"grad_norm": 0.7785108089447021,
"learning_rate": 2.5581729801248312e-06,
"loss": 0.0037,
"step": 29750
},
{
"epoch": 7.714211752523945,
"grad_norm": 0.2849646806716919,
"learning_rate": 2.543791526447493e-06,
"loss": 0.0027,
"step": 29800
},
{
"epoch": 7.7271550608335495,
"grad_norm": 0.3449094593524933,
"learning_rate": 2.529410072770156e-06,
"loss": 0.0114,
"step": 29850
},
{
"epoch": 7.740098369143153,
"grad_norm": 0.7601585984230042,
"learning_rate": 2.515028619092818e-06,
"loss": 0.0024,
"step": 29900
},
{
"epoch": 7.753041677452757,
"grad_norm": 0.6022003889083862,
"learning_rate": 2.50064716541548e-06,
"loss": 0.005,
"step": 29950
},
{
"epoch": 7.765984985762361,
"grad_norm": 0.08920400589704514,
"learning_rate": 2.4862657117381427e-06,
"loss": 0.0025,
"step": 30000
},
{
"epoch": 7.778928294071965,
"grad_norm": 0.5146584510803223,
"learning_rate": 2.471884258060805e-06,
"loss": 0.0035,
"step": 30050
},
{
"epoch": 7.791871602381569,
"grad_norm": 0.6136813759803772,
"learning_rate": 2.4575028043834673e-06,
"loss": 0.0033,
"step": 30100
},
{
"epoch": 7.804814910691173,
"grad_norm": 5.361100673675537,
"learning_rate": 2.4431213507061295e-06,
"loss": 0.0039,
"step": 30150
},
{
"epoch": 7.817758219000776,
"grad_norm": 0.617695152759552,
"learning_rate": 2.428739897028792e-06,
"loss": 0.0026,
"step": 30200
},
{
"epoch": 7.830701527310381,
"grad_norm": 0.42767393589019775,
"learning_rate": 2.414358443351454e-06,
"loss": 0.0022,
"step": 30250
},
{
"epoch": 7.843644835619984,
"grad_norm": 0.09423399716615677,
"learning_rate": 2.3999769896741164e-06,
"loss": 0.0038,
"step": 30300
},
{
"epoch": 7.856588143929589,
"grad_norm": 0.18421663343906403,
"learning_rate": 2.3855955359967787e-06,
"loss": 0.0048,
"step": 30350
},
{
"epoch": 7.869531452239192,
"grad_norm": 0.05506595969200134,
"learning_rate": 2.371214082319441e-06,
"loss": 0.0034,
"step": 30400
},
{
"epoch": 7.882474760548797,
"grad_norm": 1.0411120653152466,
"learning_rate": 2.3568326286421033e-06,
"loss": 0.0065,
"step": 30450
},
{
"epoch": 7.8954180688584,
"grad_norm": 0.05043673887848854,
"learning_rate": 2.3424511749647656e-06,
"loss": 0.0018,
"step": 30500
},
{
"epoch": 7.908361377168005,
"grad_norm": 0.8202661275863647,
"learning_rate": 2.328069721287428e-06,
"loss": 0.0107,
"step": 30550
},
{
"epoch": 7.921304685477608,
"grad_norm": 1.809882640838623,
"learning_rate": 2.31368826761009e-06,
"loss": 0.0038,
"step": 30600
},
{
"epoch": 7.934247993787212,
"grad_norm": 0.9416866898536682,
"learning_rate": 2.2993068139327525e-06,
"loss": 0.0031,
"step": 30650
},
{
"epoch": 7.947191302096816,
"grad_norm": 0.5891124606132507,
"learning_rate": 2.2849253602554148e-06,
"loss": 0.0023,
"step": 30700
},
{
"epoch": 7.96013461040642,
"grad_norm": 0.15361438691616058,
"learning_rate": 2.270543906578077e-06,
"loss": 0.0021,
"step": 30750
},
{
"epoch": 7.973077918716024,
"grad_norm": 1.8306443691253662,
"learning_rate": 2.2561624529007393e-06,
"loss": 0.0049,
"step": 30800
},
{
"epoch": 7.986021227025628,
"grad_norm": 0.06569012254476547,
"learning_rate": 2.2417809992234016e-06,
"loss": 0.0042,
"step": 30850
},
{
"epoch": 7.998964535335231,
"grad_norm": 0.14215713739395142,
"learning_rate": 2.227399545546064e-06,
"loss": 0.0021,
"step": 30900
},
{
"epoch": 8.0,
"eval_loss": 0.0042533595114946365,
"eval_runtime": 89.1255,
"eval_samples_per_second": 5.61,
"eval_steps_per_second": 0.707,
"eval_wer": 1.490500682271439,
"step": 30904
},
{
"epoch": 8.011907843644835,
"grad_norm": 0.9032062292098999,
"learning_rate": 2.2130180918687262e-06,
"loss": 0.0022,
"step": 30950
},
{
"epoch": 8.02485115195444,
"grad_norm": 0.09088978916406631,
"learning_rate": 2.1986366381913885e-06,
"loss": 0.0016,
"step": 31000
},
{
"epoch": 8.037794460264044,
"grad_norm": 0.1776304394006729,
"learning_rate": 2.184255184514051e-06,
"loss": 0.0063,
"step": 31050
},
{
"epoch": 8.050737768573647,
"grad_norm": 1.3590532541275024,
"learning_rate": 2.169873730836713e-06,
"loss": 0.0018,
"step": 31100
},
{
"epoch": 8.06368107688325,
"grad_norm": 2.1883018016815186,
"learning_rate": 2.1554922771593754e-06,
"loss": 0.0028,
"step": 31150
},
{
"epoch": 8.076624385192856,
"grad_norm": 3.1580357551574707,
"learning_rate": 2.1411108234820377e-06,
"loss": 0.0089,
"step": 31200
},
{
"epoch": 8.08956769350246,
"grad_norm": 0.19922618567943573,
"learning_rate": 2.1267293698047e-06,
"loss": 0.0016,
"step": 31250
},
{
"epoch": 8.102511001812063,
"grad_norm": 0.3133656084537506,
"learning_rate": 2.1123479161273623e-06,
"loss": 0.001,
"step": 31300
},
{
"epoch": 8.115454310121667,
"grad_norm": 0.1543108969926834,
"learning_rate": 2.0979664624500245e-06,
"loss": 0.0071,
"step": 31350
},
{
"epoch": 8.12839761843127,
"grad_norm": 0.06812497228384018,
"learning_rate": 2.083585008772687e-06,
"loss": 0.0057,
"step": 31400
},
{
"epoch": 8.141340926740876,
"grad_norm": 0.7921668887138367,
"learning_rate": 2.069203555095349e-06,
"loss": 0.0019,
"step": 31450
},
{
"epoch": 8.15428423505048,
"grad_norm": 0.7293940782546997,
"learning_rate": 2.0548221014180114e-06,
"loss": 0.0024,
"step": 31500
},
{
"epoch": 8.167227543360083,
"grad_norm": 0.2699018120765686,
"learning_rate": 2.0407282768142205e-06,
"loss": 0.0036,
"step": 31550
},
{
"epoch": 8.180170851669686,
"grad_norm": 1.0701220035552979,
"learning_rate": 2.026346823136883e-06,
"loss": 0.0021,
"step": 31600
},
{
"epoch": 8.193114159979292,
"grad_norm": 0.35062670707702637,
"learning_rate": 2.011965369459545e-06,
"loss": 0.0013,
"step": 31650
},
{
"epoch": 8.206057468288895,
"grad_norm": 2.343193531036377,
"learning_rate": 1.9975839157822073e-06,
"loss": 0.005,
"step": 31700
},
{
"epoch": 8.219000776598499,
"grad_norm": 0.1934152990579605,
"learning_rate": 1.9832024621048696e-06,
"loss": 0.0042,
"step": 31750
},
{
"epoch": 8.231944084908102,
"grad_norm": 1.2814443111419678,
"learning_rate": 1.968821008427532e-06,
"loss": 0.0031,
"step": 31800
},
{
"epoch": 8.244887393217706,
"grad_norm": 0.23100686073303223,
"learning_rate": 1.954439554750194e-06,
"loss": 0.001,
"step": 31850
},
{
"epoch": 8.257830701527311,
"grad_norm": 1.0474891662597656,
"learning_rate": 1.9400581010728565e-06,
"loss": 0.0017,
"step": 31900
},
{
"epoch": 8.270774009836915,
"grad_norm": 0.1752719134092331,
"learning_rate": 1.9256766473955188e-06,
"loss": 0.0025,
"step": 31950
},
{
"epoch": 8.283717318146518,
"grad_norm": 0.07388792932033539,
"learning_rate": 1.911295193718181e-06,
"loss": 0.0012,
"step": 32000
},
{
"epoch": 8.296660626456122,
"grad_norm": 0.2670608460903168,
"learning_rate": 1.8969137400408436e-06,
"loss": 0.0027,
"step": 32050
},
{
"epoch": 8.309603934765725,
"grad_norm": 0.19774726033210754,
"learning_rate": 1.8825322863635057e-06,
"loss": 0.0016,
"step": 32100
},
{
"epoch": 8.32254724307533,
"grad_norm": 0.5155441761016846,
"learning_rate": 1.868150832686168e-06,
"loss": 0.0028,
"step": 32150
},
{
"epoch": 8.335490551384934,
"grad_norm": 0.2909785509109497,
"learning_rate": 1.8537693790088305e-06,
"loss": 0.0013,
"step": 32200
},
{
"epoch": 8.348433859694538,
"grad_norm": 0.20075345039367676,
"learning_rate": 1.8393879253314927e-06,
"loss": 0.0017,
"step": 32250
},
{
"epoch": 8.361377168004141,
"grad_norm": 0.44085758924484253,
"learning_rate": 1.8250064716541548e-06,
"loss": 0.0022,
"step": 32300
},
{
"epoch": 8.374320476313747,
"grad_norm": 1.4371730089187622,
"learning_rate": 1.8106250179768173e-06,
"loss": 0.0013,
"step": 32350
},
{
"epoch": 8.38726378462335,
"grad_norm": 0.15547557175159454,
"learning_rate": 1.7962435642994796e-06,
"loss": 0.0016,
"step": 32400
},
{
"epoch": 8.400207092932954,
"grad_norm": 0.3929384648799896,
"learning_rate": 1.7818621106221417e-06,
"loss": 0.0013,
"step": 32450
},
{
"epoch": 8.413150401242557,
"grad_norm": 0.329222172498703,
"learning_rate": 1.7674806569448042e-06,
"loss": 0.0016,
"step": 32500
},
{
"epoch": 8.42609370955216,
"grad_norm": 0.24657496809959412,
"learning_rate": 1.7530992032674665e-06,
"loss": 0.0033,
"step": 32550
},
{
"epoch": 8.439037017861766,
"grad_norm": 0.1905100792646408,
"learning_rate": 1.7387177495901286e-06,
"loss": 0.0011,
"step": 32600
},
{
"epoch": 8.45198032617137,
"grad_norm": 0.06774311512708664,
"learning_rate": 1.724336295912791e-06,
"loss": 0.0028,
"step": 32650
},
{
"epoch": 8.464923634480973,
"grad_norm": 0.20226095616817474,
"learning_rate": 1.7099548422354534e-06,
"loss": 0.0014,
"step": 32700
},
{
"epoch": 8.477866942790577,
"grad_norm": 0.5388538241386414,
"learning_rate": 1.6955733885581155e-06,
"loss": 0.0085,
"step": 32750
},
{
"epoch": 8.49081025110018,
"grad_norm": 0.04724876210093498,
"learning_rate": 1.681191934880778e-06,
"loss": 0.0015,
"step": 32800
},
{
"epoch": 8.503753559409786,
"grad_norm": 0.1351761817932129,
"learning_rate": 1.6668104812034402e-06,
"loss": 0.002,
"step": 32850
},
{
"epoch": 8.51669686771939,
"grad_norm": 0.16377896070480347,
"learning_rate": 1.6524290275261023e-06,
"loss": 0.0014,
"step": 32900
},
{
"epoch": 8.529640176028993,
"grad_norm": 0.0653943344950676,
"learning_rate": 1.6380475738487648e-06,
"loss": 0.0016,
"step": 32950
},
{
"epoch": 8.542583484338596,
"grad_norm": 0.4834960699081421,
"learning_rate": 1.6236661201714271e-06,
"loss": 0.0034,
"step": 33000
},
{
"epoch": 8.555526792648202,
"grad_norm": 0.437788188457489,
"learning_rate": 1.6092846664940894e-06,
"loss": 0.0052,
"step": 33050
},
{
"epoch": 8.568470100957805,
"grad_norm": 2.195469617843628,
"learning_rate": 1.5949032128167515e-06,
"loss": 0.0036,
"step": 33100
},
{
"epoch": 8.581413409267409,
"grad_norm": 0.12040536105632782,
"learning_rate": 1.580521759139414e-06,
"loss": 0.0031,
"step": 33150
},
{
"epoch": 8.594356717577012,
"grad_norm": 0.2154337614774704,
"learning_rate": 1.5661403054620763e-06,
"loss": 0.0025,
"step": 33200
},
{
"epoch": 8.607300025886616,
"grad_norm": 0.1478249430656433,
"learning_rate": 1.5517588517847384e-06,
"loss": 0.0022,
"step": 33250
},
{
"epoch": 8.620243334196221,
"grad_norm": 0.16750039160251617,
"learning_rate": 1.5373773981074009e-06,
"loss": 0.0021,
"step": 33300
},
{
"epoch": 8.633186642505825,
"grad_norm": 0.38158321380615234,
"learning_rate": 1.5229959444300632e-06,
"loss": 0.001,
"step": 33350
},
{
"epoch": 8.646129950815428,
"grad_norm": 1.1184005737304688,
"learning_rate": 1.5086144907527252e-06,
"loss": 0.007,
"step": 33400
},
{
"epoch": 8.659073259125032,
"grad_norm": 1.4656065702438354,
"learning_rate": 1.4942330370753877e-06,
"loss": 0.0013,
"step": 33450
},
{
"epoch": 8.672016567434635,
"grad_norm": 0.3482512831687927,
"learning_rate": 1.47985158339805e-06,
"loss": 0.0024,
"step": 33500
},
{
"epoch": 8.68495987574424,
"grad_norm": 0.04078083485364914,
"learning_rate": 1.4654701297207121e-06,
"loss": 0.0021,
"step": 33550
},
{
"epoch": 8.697903184053844,
"grad_norm": 0.1383834183216095,
"learning_rate": 1.4510886760433746e-06,
"loss": 0.0025,
"step": 33600
},
{
"epoch": 8.710846492363448,
"grad_norm": 0.1986149251461029,
"learning_rate": 1.436707222366037e-06,
"loss": 0.0011,
"step": 33650
},
{
"epoch": 8.723789800673051,
"grad_norm": 0.5224486589431763,
"learning_rate": 1.422325768688699e-06,
"loss": 0.002,
"step": 33700
},
{
"epoch": 8.736733108982657,
"grad_norm": 0.14714999496936798,
"learning_rate": 1.4079443150113615e-06,
"loss": 0.0024,
"step": 33750
},
{
"epoch": 8.74967641729226,
"grad_norm": 0.07352601736783981,
"learning_rate": 1.3935628613340238e-06,
"loss": 0.0025,
"step": 33800
},
{
"epoch": 8.762619725601864,
"grad_norm": 0.04641982167959213,
"learning_rate": 1.379181407656686e-06,
"loss": 0.0011,
"step": 33850
},
{
"epoch": 8.775563033911467,
"grad_norm": 0.20494569838047028,
"learning_rate": 1.3647999539793484e-06,
"loss": 0.0014,
"step": 33900
},
{
"epoch": 8.788506342221071,
"grad_norm": 0.3108866214752197,
"learning_rate": 1.3504185003020107e-06,
"loss": 0.0031,
"step": 33950
},
{
"epoch": 8.801449650530676,
"grad_norm": 1.0901867151260376,
"learning_rate": 1.336037046624673e-06,
"loss": 0.006,
"step": 34000
},
{
"epoch": 8.81439295884028,
"grad_norm": 0.1557256281375885,
"learning_rate": 1.3216555929473352e-06,
"loss": 0.002,
"step": 34050
},
{
"epoch": 8.827336267149883,
"grad_norm": 0.32064932584762573,
"learning_rate": 1.3072741392699975e-06,
"loss": 0.0016,
"step": 34100
},
{
"epoch": 8.840279575459487,
"grad_norm": 0.035750892013311386,
"learning_rate": 1.2928926855926598e-06,
"loss": 0.0016,
"step": 34150
},
{
"epoch": 8.853222883769092,
"grad_norm": 0.11652498692274094,
"learning_rate": 1.278511231915322e-06,
"loss": 0.0017,
"step": 34200
},
{
"epoch": 8.866166192078696,
"grad_norm": 0.22541067004203796,
"learning_rate": 1.2641297782379844e-06,
"loss": 0.002,
"step": 34250
},
{
"epoch": 8.8791095003883,
"grad_norm": 0.09035801142454147,
"learning_rate": 1.2497483245606467e-06,
"loss": 0.0024,
"step": 34300
},
{
"epoch": 8.892052808697903,
"grad_norm": 0.8051474690437317,
"learning_rate": 1.235366870883309e-06,
"loss": 0.0015,
"step": 34350
},
{
"epoch": 8.904996117007506,
"grad_norm": 0.12439941614866257,
"learning_rate": 1.2209854172059713e-06,
"loss": 0.0014,
"step": 34400
},
{
"epoch": 8.917939425317112,
"grad_norm": 0.0911746621131897,
"learning_rate": 1.2066039635286336e-06,
"loss": 0.0016,
"step": 34450
},
{
"epoch": 8.930882733626715,
"grad_norm": 0.10455431789159775,
"learning_rate": 1.1922225098512959e-06,
"loss": 0.0035,
"step": 34500
},
{
"epoch": 8.943826041936319,
"grad_norm": 0.0844273790717125,
"learning_rate": 1.1778410561739582e-06,
"loss": 0.0012,
"step": 34550
},
{
"epoch": 8.956769350245922,
"grad_norm": 0.1838880330324173,
"learning_rate": 1.1634596024966204e-06,
"loss": 0.003,
"step": 34600
},
{
"epoch": 8.969712658555526,
"grad_norm": 1.8350099325180054,
"learning_rate": 1.1490781488192827e-06,
"loss": 0.0018,
"step": 34650
},
{
"epoch": 8.982655966865131,
"grad_norm": 0.18555675446987152,
"learning_rate": 1.134696695141945e-06,
"loss": 0.0015,
"step": 34700
},
{
"epoch": 8.995599275174735,
"grad_norm": 0.14013200998306274,
"learning_rate": 1.1203152414646073e-06,
"loss": 0.0011,
"step": 34750
},
{
"epoch": 9.0,
"eval_loss": 0.0022843414917588234,
"eval_runtime": 89.3025,
"eval_samples_per_second": 5.599,
"eval_steps_per_second": 0.705,
"eval_wer": 1.1861026556103704,
"step": 34767
},
{
"epoch": 9.008542583484338,
"grad_norm": 0.13073372840881348,
"learning_rate": 1.1059337877872696e-06,
"loss": 0.0014,
"step": 34800
},
{
"epoch": 9.021485891793942,
"grad_norm": 0.13120625913143158,
"learning_rate": 1.091552334109932e-06,
"loss": 0.0007,
"step": 34850
},
{
"epoch": 9.034429200103547,
"grad_norm": 0.15788908302783966,
"learning_rate": 1.0771708804325942e-06,
"loss": 0.0012,
"step": 34900
},
{
"epoch": 9.04737250841315,
"grad_norm": 0.1373605579137802,
"learning_rate": 1.0627894267552565e-06,
"loss": 0.004,
"step": 34950
},
{
"epoch": 9.060315816722754,
"grad_norm": 0.09479701519012451,
"learning_rate": 1.0484079730779188e-06,
"loss": 0.0021,
"step": 35000
},
{
"epoch": 9.073259125032358,
"grad_norm": 0.1045953705906868,
"learning_rate": 1.034026519400581e-06,
"loss": 0.0014,
"step": 35050
},
{
"epoch": 9.086202433341962,
"grad_norm": 0.07537753134965897,
"learning_rate": 1.0196450657232434e-06,
"loss": 0.0008,
"step": 35100
},
{
"epoch": 9.099145741651567,
"grad_norm": 0.5165144801139832,
"learning_rate": 1.0052636120459057e-06,
"loss": 0.002,
"step": 35150
},
{
"epoch": 9.11208904996117,
"grad_norm": 0.13497541844844818,
"learning_rate": 9.90882158368568e-07,
"loss": 0.0009,
"step": 35200
},
{
"epoch": 9.125032358270774,
"grad_norm": 0.06942334771156311,
"learning_rate": 9.765007046912302e-07,
"loss": 0.0012,
"step": 35250
},
{
"epoch": 9.137975666580378,
"grad_norm": 0.19452160596847534,
"learning_rate": 9.621192510138925e-07,
"loss": 0.0014,
"step": 35300
},
{
"epoch": 9.150918974889981,
"grad_norm": 0.12441800534725189,
"learning_rate": 9.477377973365548e-07,
"loss": 0.0009,
"step": 35350
},
{
"epoch": 9.163862283199586,
"grad_norm": 0.1729976236820221,
"learning_rate": 9.333563436592172e-07,
"loss": 0.0008,
"step": 35400
},
{
"epoch": 9.17680559150919,
"grad_norm": 0.12662172317504883,
"learning_rate": 9.189748899818794e-07,
"loss": 0.0015,
"step": 35450
},
{
"epoch": 9.189748899818794,
"grad_norm": 0.07149961590766907,
"learning_rate": 9.048810653780884e-07,
"loss": 0.0017,
"step": 35500
},
{
"epoch": 9.202692208128397,
"grad_norm": 0.09453389793634415,
"learning_rate": 8.904996117007508e-07,
"loss": 0.0008,
"step": 35550
},
{
"epoch": 9.215635516438002,
"grad_norm": 0.06293300539255142,
"learning_rate": 8.76118158023413e-07,
"loss": 0.0009,
"step": 35600
},
{
"epoch": 9.228578824747606,
"grad_norm": 0.09109367430210114,
"learning_rate": 8.617367043460753e-07,
"loss": 0.0007,
"step": 35650
},
{
"epoch": 9.24152213305721,
"grad_norm": 0.12085200101137161,
"learning_rate": 8.473552506687377e-07,
"loss": 0.0018,
"step": 35700
},
{
"epoch": 9.254465441366813,
"grad_norm": 0.11523808538913727,
"learning_rate": 8.329737969913999e-07,
"loss": 0.0032,
"step": 35750
},
{
"epoch": 9.267408749676417,
"grad_norm": 0.28072428703308105,
"learning_rate": 8.185923433140623e-07,
"loss": 0.0008,
"step": 35800
},
{
"epoch": 9.280352057986022,
"grad_norm": 0.08222024142742157,
"learning_rate": 8.042108896367246e-07,
"loss": 0.0007,
"step": 35850
},
{
"epoch": 9.293295366295625,
"grad_norm": 0.29703882336616516,
"learning_rate": 7.898294359593868e-07,
"loss": 0.0011,
"step": 35900
},
{
"epoch": 9.306238674605229,
"grad_norm": 0.19992585480213165,
"learning_rate": 7.754479822820492e-07,
"loss": 0.0007,
"step": 35950
},
{
"epoch": 9.319181982914833,
"grad_norm": 0.04905041307210922,
"learning_rate": 7.610665286047115e-07,
"loss": 0.0009,
"step": 36000
},
{
"epoch": 9.332125291224436,
"grad_norm": 0.15756992995738983,
"learning_rate": 7.466850749273736e-07,
"loss": 0.005,
"step": 36050
},
{
"epoch": 9.345068599534041,
"grad_norm": 0.18442897498607635,
"learning_rate": 7.32303621250036e-07,
"loss": 0.0009,
"step": 36100
},
{
"epoch": 9.358011907843645,
"grad_norm": 0.06329531967639923,
"learning_rate": 7.179221675726982e-07,
"loss": 0.0011,
"step": 36150
},
{
"epoch": 9.370955216153249,
"grad_norm": 0.11927127093076706,
"learning_rate": 7.035407138953606e-07,
"loss": 0.0007,
"step": 36200
},
{
"epoch": 9.383898524462852,
"grad_norm": 0.06844917684793472,
"learning_rate": 6.891592602180229e-07,
"loss": 0.0018,
"step": 36250
},
{
"epoch": 9.396841832772457,
"grad_norm": 0.08744735270738602,
"learning_rate": 6.747778065406851e-07,
"loss": 0.0043,
"step": 36300
},
{
"epoch": 9.409785141082061,
"grad_norm": 0.18723489344120026,
"learning_rate": 6.603963528633475e-07,
"loss": 0.0007,
"step": 36350
},
{
"epoch": 9.422728449391665,
"grad_norm": 0.086359903216362,
"learning_rate": 6.460148991860098e-07,
"loss": 0.0039,
"step": 36400
},
{
"epoch": 9.435671757701268,
"grad_norm": 0.10816863179206848,
"learning_rate": 6.31633445508672e-07,
"loss": 0.0015,
"step": 36450
},
{
"epoch": 9.448615066010872,
"grad_norm": 0.09978567808866501,
"learning_rate": 6.172519918313344e-07,
"loss": 0.0054,
"step": 36500
},
{
"epoch": 9.461558374320477,
"grad_norm": 0.07749635726213455,
"learning_rate": 6.028705381539967e-07,
"loss": 0.0013,
"step": 36550
},
{
"epoch": 9.47450168263008,
"grad_norm": 0.18575559556484222,
"learning_rate": 5.88489084476659e-07,
"loss": 0.0007,
"step": 36600
},
{
"epoch": 9.487444990939684,
"grad_norm": 0.3773919641971588,
"learning_rate": 5.741076307993213e-07,
"loss": 0.0019,
"step": 36650
},
{
"epoch": 9.500388299249288,
"grad_norm": 0.13482239842414856,
"learning_rate": 5.597261771219835e-07,
"loss": 0.0006,
"step": 36700
},
{
"epoch": 9.513331607558893,
"grad_norm": 0.16534963250160217,
"learning_rate": 5.453447234446458e-07,
"loss": 0.0007,
"step": 36750
},
{
"epoch": 9.526274915868497,
"grad_norm": 0.22458544373512268,
"learning_rate": 5.309632697673081e-07,
"loss": 0.0013,
"step": 36800
},
{
"epoch": 9.5392182241781,
"grad_norm": 0.07351688295602798,
"learning_rate": 5.165818160899704e-07,
"loss": 0.0019,
"step": 36850
},
{
"epoch": 9.552161532487704,
"grad_norm": 0.7084305286407471,
"learning_rate": 5.022003624126327e-07,
"loss": 0.0024,
"step": 36900
},
{
"epoch": 9.565104840797307,
"grad_norm": 0.09942048788070679,
"learning_rate": 4.87818908735295e-07,
"loss": 0.0019,
"step": 36950
},
{
"epoch": 9.578048149106912,
"grad_norm": 1.1455363035202026,
"learning_rate": 4.734374550579573e-07,
"loss": 0.0013,
"step": 37000
},
{
"epoch": 9.590991457416516,
"grad_norm": 0.07915141433477402,
"learning_rate": 4.590560013806196e-07,
"loss": 0.0081,
"step": 37050
},
{
"epoch": 9.60393476572612,
"grad_norm": 0.1777876317501068,
"learning_rate": 4.4467454770328193e-07,
"loss": 0.0014,
"step": 37100
},
{
"epoch": 9.616878074035723,
"grad_norm": 0.10249053686857224,
"learning_rate": 4.3029309402594417e-07,
"loss": 0.0007,
"step": 37150
},
{
"epoch": 9.629821382345327,
"grad_norm": 0.10651733726263046,
"learning_rate": 4.1591164034860646e-07,
"loss": 0.0007,
"step": 37200
},
{
"epoch": 9.642764690654932,
"grad_norm": 0.03686549514532089,
"learning_rate": 4.0153018667126875e-07,
"loss": 0.0046,
"step": 37250
},
{
"epoch": 9.655707998964536,
"grad_norm": 0.09571921825408936,
"learning_rate": 3.871487329939311e-07,
"loss": 0.0009,
"step": 37300
},
{
"epoch": 9.66865130727414,
"grad_norm": 0.08373486995697021,
"learning_rate": 3.727672793165934e-07,
"loss": 0.0013,
"step": 37350
},
{
"epoch": 9.681594615583743,
"grad_norm": 3.18973708152771,
"learning_rate": 3.583858256392556e-07,
"loss": 0.0013,
"step": 37400
},
{
"epoch": 9.694537923893346,
"grad_norm": 0.06650124490261078,
"learning_rate": 3.440043719619179e-07,
"loss": 0.0014,
"step": 37450
},
{
"epoch": 9.707481232202952,
"grad_norm": 0.04685758426785469,
"learning_rate": 3.2962291828458026e-07,
"loss": 0.0011,
"step": 37500
},
{
"epoch": 9.720424540512555,
"grad_norm": 0.23590688407421112,
"learning_rate": 3.1524146460724255e-07,
"loss": 0.0009,
"step": 37550
},
{
"epoch": 9.733367848822159,
"grad_norm": 0.6385647058486938,
"learning_rate": 3.0086001092990484e-07,
"loss": 0.0007,
"step": 37600
},
{
"epoch": 9.746311157131762,
"grad_norm": 0.9988199472427368,
"learning_rate": 2.8647855725256713e-07,
"loss": 0.0011,
"step": 37650
},
{
"epoch": 9.759254465441368,
"grad_norm": 0.09181234240531921,
"learning_rate": 2.720971035752294e-07,
"loss": 0.001,
"step": 37700
},
{
"epoch": 9.772197773750971,
"grad_norm": 0.12787938117980957,
"learning_rate": 2.577156498978917e-07,
"loss": 0.0007,
"step": 37750
},
{
"epoch": 9.785141082060575,
"grad_norm": 0.24333082139492035,
"learning_rate": 2.43334196220554e-07,
"loss": 0.0017,
"step": 37800
},
{
"epoch": 9.798084390370178,
"grad_norm": 0.11896287649869919,
"learning_rate": 2.2895274254321627e-07,
"loss": 0.0038,
"step": 37850
},
{
"epoch": 9.811027698679782,
"grad_norm": 0.9559854865074158,
"learning_rate": 2.145712888658786e-07,
"loss": 0.0008,
"step": 37900
},
{
"epoch": 9.823971006989387,
"grad_norm": 0.10168687999248505,
"learning_rate": 2.0018983518854086e-07,
"loss": 0.0009,
"step": 37950
},
{
"epoch": 9.83691431529899,
"grad_norm": 0.5217211246490479,
"learning_rate": 1.8580838151120317e-07,
"loss": 0.0008,
"step": 38000
},
{
"epoch": 9.849857623608594,
"grad_norm": 0.03806522488594055,
"learning_rate": 1.7142692783386547e-07,
"loss": 0.0022,
"step": 38050
},
{
"epoch": 9.862800931918198,
"grad_norm": 0.05464790016412735,
"learning_rate": 1.5704547415652776e-07,
"loss": 0.0007,
"step": 38100
},
{
"epoch": 9.875744240227803,
"grad_norm": 0.16794097423553467,
"learning_rate": 1.4266402047919005e-07,
"loss": 0.0008,
"step": 38150
},
{
"epoch": 9.888687548537407,
"grad_norm": 0.4726152718067169,
"learning_rate": 1.2828256680185234e-07,
"loss": 0.0008,
"step": 38200
},
{
"epoch": 9.90163085684701,
"grad_norm": 0.09927275031805038,
"learning_rate": 1.1390111312451463e-07,
"loss": 0.0009,
"step": 38250
},
{
"epoch": 9.914574165156614,
"grad_norm": 0.07914838194847107,
"learning_rate": 9.951965944717692e-08,
"loss": 0.0013,
"step": 38300
},
{
"epoch": 9.927517473466217,
"grad_norm": 2.0464367866516113,
"learning_rate": 8.513820576983922e-08,
"loss": 0.0014,
"step": 38350
},
{
"epoch": 9.940460781775823,
"grad_norm": 0.0418660007417202,
"learning_rate": 7.075675209250152e-08,
"loss": 0.0008,
"step": 38400
},
{
"epoch": 9.953404090085426,
"grad_norm": 0.08231345564126968,
"learning_rate": 5.637529841516381e-08,
"loss": 0.0007,
"step": 38450
},
{
"epoch": 9.96634739839503,
"grad_norm": 0.05043479800224304,
"learning_rate": 4.19938447378261e-08,
"loss": 0.0009,
"step": 38500
},
{
"epoch": 9.979290706704633,
"grad_norm": 0.11412689834833145,
"learning_rate": 2.7612391060488395e-08,
"loss": 0.0008,
"step": 38550
},
{
"epoch": 9.992234015014237,
"grad_norm": 0.06143497675657272,
"learning_rate": 1.323093738315069e-08,
"loss": 0.0013,
"step": 38600
},
{
"epoch": 10.0,
"eval_loss": 0.0013781202724203467,
"eval_runtime": 89.6083,
"eval_samples_per_second": 5.58,
"eval_steps_per_second": 0.703,
"eval_wer": 0.8082292432035268,
"step": 38630
}
],
"logging_steps": 50,
"max_steps": 38630,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.00436846133248e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}