|
{ |
|
"best_global_step": 38630, |
|
"best_metric": 0.8082292432035268, |
|
"best_model_checkpoint": "./whisper-urdu-base-finetuned/checkpoint-38630", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 38630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012943308309603935, |
|
"grad_norm": 2.339423418045044, |
|
"learning_rate": 1.2684442143411856e-07, |
|
"loss": 0.0468, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02588661661920787, |
|
"grad_norm": 4.33876371383667, |
|
"learning_rate": 2.5627750453015794e-07, |
|
"loss": 0.0428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03882992492881181, |
|
"grad_norm": 4.59096622467041, |
|
"learning_rate": 3.8571058762619726e-07, |
|
"loss": 0.054, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05177323323841574, |
|
"grad_norm": 5.390439033508301, |
|
"learning_rate": 5.151436707222367e-07, |
|
"loss": 0.0454, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06471654154801967, |
|
"grad_norm": 3.9699249267578125, |
|
"learning_rate": 6.44576753818276e-07, |
|
"loss": 0.03, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07765984985762361, |
|
"grad_norm": 5.007837295532227, |
|
"learning_rate": 7.740098369143153e-07, |
|
"loss": 0.0364, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09060315816722754, |
|
"grad_norm": 1.7915890216827393, |
|
"learning_rate": 9.034429200103548e-07, |
|
"loss": 0.033, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10354646647683148, |
|
"grad_norm": 1.8916817903518677, |
|
"learning_rate": 1.032876003106394e-06, |
|
"loss": 0.0453, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11648977478643541, |
|
"grad_norm": 5.6407318115234375, |
|
"learning_rate": 1.1623090862024335e-06, |
|
"loss": 0.0493, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12943308309603935, |
|
"grad_norm": 1.5208466053009033, |
|
"learning_rate": 1.2917421692984728e-06, |
|
"loss": 0.0457, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1423763914056433, |
|
"grad_norm": 0.9354443550109863, |
|
"learning_rate": 1.4211752523945122e-06, |
|
"loss": 0.0486, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15531969971524723, |
|
"grad_norm": 3.7830538749694824, |
|
"learning_rate": 1.5506083354905516e-06, |
|
"loss": 0.0377, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16826300802485114, |
|
"grad_norm": 1.2487603425979614, |
|
"learning_rate": 1.67745275692467e-06, |
|
"loss": 0.069, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18120631633445508, |
|
"grad_norm": 0.8733393549919128, |
|
"learning_rate": 1.8068858400207096e-06, |
|
"loss": 0.0457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19414962464405902, |
|
"grad_norm": 3.48146390914917, |
|
"learning_rate": 1.936318923116749e-06, |
|
"loss": 0.0372, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.20709293295366296, |
|
"grad_norm": 3.9726881980895996, |
|
"learning_rate": 2.065752006212788e-06, |
|
"loss": 0.0493, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2200362412632669, |
|
"grad_norm": 1.6841055154800415, |
|
"learning_rate": 2.1951850893088276e-06, |
|
"loss": 0.046, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23297954957287081, |
|
"grad_norm": 1.1303517818450928, |
|
"learning_rate": 2.324618172404867e-06, |
|
"loss": 0.0484, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24592285788247475, |
|
"grad_norm": 1.6165255308151245, |
|
"learning_rate": 2.4540512555009063e-06, |
|
"loss": 0.0617, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2588661661920787, |
|
"grad_norm": 4.751931667327881, |
|
"learning_rate": 2.5834843385969457e-06, |
|
"loss": 0.0516, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27180947450168264, |
|
"grad_norm": 2.619171142578125, |
|
"learning_rate": 2.712917421692985e-06, |
|
"loss": 0.0491, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2847527828112866, |
|
"grad_norm": 3.5204107761383057, |
|
"learning_rate": 2.8423505047890244e-06, |
|
"loss": 0.0451, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2976960911208905, |
|
"grad_norm": 1.6330580711364746, |
|
"learning_rate": 2.971783587885064e-06, |
|
"loss": 0.0329, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31063939943049446, |
|
"grad_norm": 9.043530464172363, |
|
"learning_rate": 3.101216670981103e-06, |
|
"loss": 0.0528, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3235827077400984, |
|
"grad_norm": 4.8967604637146, |
|
"learning_rate": 3.2306497540771426e-06, |
|
"loss": 0.052, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3365260160497023, |
|
"grad_norm": 2.9386487007141113, |
|
"learning_rate": 3.360082837173182e-06, |
|
"loss": 0.0431, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3494693243593062, |
|
"grad_norm": 3.271523952484131, |
|
"learning_rate": 3.4895159202692213e-06, |
|
"loss": 0.0435, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36241263266891016, |
|
"grad_norm": 3.5081090927124023, |
|
"learning_rate": 3.6189490033652603e-06, |
|
"loss": 0.0571, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3753559409785141, |
|
"grad_norm": 5.796409606933594, |
|
"learning_rate": 3.7483820864612997e-06, |
|
"loss": 0.0529, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38829924928811804, |
|
"grad_norm": 3.4982869625091553, |
|
"learning_rate": 3.877815169557339e-06, |
|
"loss": 0.0566, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.401242557597722, |
|
"grad_norm": 4.461122035980225, |
|
"learning_rate": 4.007248252653379e-06, |
|
"loss": 0.0562, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4141858659073259, |
|
"grad_norm": 3.034180164337158, |
|
"learning_rate": 4.136681335749418e-06, |
|
"loss": 0.0466, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42712917421692986, |
|
"grad_norm": 5.0971598625183105, |
|
"learning_rate": 4.266114418845458e-06, |
|
"loss": 0.0501, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4400724825265338, |
|
"grad_norm": 5.404353618621826, |
|
"learning_rate": 4.3955475019414965e-06, |
|
"loss": 0.0424, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45301579083613774, |
|
"grad_norm": 2.975407838821411, |
|
"learning_rate": 4.524980585037536e-06, |
|
"loss": 0.0562, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46595909914574163, |
|
"grad_norm": 8.076371192932129, |
|
"learning_rate": 4.654413668133575e-06, |
|
"loss": 0.0377, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47890240745534557, |
|
"grad_norm": 5.4830451011657715, |
|
"learning_rate": 4.781258089567694e-06, |
|
"loss": 0.0436, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4918457157649495, |
|
"grad_norm": 2.6184751987457275, |
|
"learning_rate": 4.910691172663733e-06, |
|
"loss": 0.0414, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5047890240745535, |
|
"grad_norm": 4.777491092681885, |
|
"learning_rate": 5.040124255759773e-06, |
|
"loss": 0.0469, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5177323323841574, |
|
"grad_norm": 8.820015907287598, |
|
"learning_rate": 5.169557338855812e-06, |
|
"loss": 0.0654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5306756406937613, |
|
"grad_norm": 4.339293003082275, |
|
"learning_rate": 5.298990421951852e-06, |
|
"loss": 0.0473, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5436189490033653, |
|
"grad_norm": 7.162559509277344, |
|
"learning_rate": 5.428423505047891e-06, |
|
"loss": 0.0473, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5565622573129692, |
|
"grad_norm": 7.811385154724121, |
|
"learning_rate": 5.557856588143931e-06, |
|
"loss": 0.0516, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5695055656225732, |
|
"grad_norm": 2.8982834815979004, |
|
"learning_rate": 5.68728967123997e-06, |
|
"loss": 0.0497, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.582448873932177, |
|
"grad_norm": 4.9137749671936035, |
|
"learning_rate": 5.8167227543360085e-06, |
|
"loss": 0.0613, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.595392182241781, |
|
"grad_norm": 4.650545597076416, |
|
"learning_rate": 5.946155837432048e-06, |
|
"loss": 0.0475, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6083354905513849, |
|
"grad_norm": 6.333342552185059, |
|
"learning_rate": 6.075588920528087e-06, |
|
"loss": 0.0793, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6212787988609889, |
|
"grad_norm": 5.444154739379883, |
|
"learning_rate": 6.205022003624127e-06, |
|
"loss": 0.0573, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6342221071705928, |
|
"grad_norm": 1.8880912065505981, |
|
"learning_rate": 6.334455086720166e-06, |
|
"loss": 0.0409, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6471654154801968, |
|
"grad_norm": 5.30678653717041, |
|
"learning_rate": 6.463888169816206e-06, |
|
"loss": 0.0546, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6601087237898007, |
|
"grad_norm": 11.33090591430664, |
|
"learning_rate": 6.593321252912245e-06, |
|
"loss": 0.055, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6730520320994046, |
|
"grad_norm": 2.6539409160614014, |
|
"learning_rate": 6.722754336008285e-06, |
|
"loss": 0.0466, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6859953404090086, |
|
"grad_norm": 5.582194805145264, |
|
"learning_rate": 6.8521874191043236e-06, |
|
"loss": 0.0559, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6989386487186124, |
|
"grad_norm": 4.70722770690918, |
|
"learning_rate": 6.9816205022003625e-06, |
|
"loss": 0.0505, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7118819570282164, |
|
"grad_norm": 2.8403987884521484, |
|
"learning_rate": 7.111053585296402e-06, |
|
"loss": 0.0508, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7248252653378203, |
|
"grad_norm": 9.0087890625, |
|
"learning_rate": 7.240486668392441e-06, |
|
"loss": 0.0793, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7377685736474243, |
|
"grad_norm": 5.515995025634766, |
|
"learning_rate": 7.369919751488481e-06, |
|
"loss": 0.0635, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7507118819570282, |
|
"grad_norm": 3.9393441677093506, |
|
"learning_rate": 7.49935283458452e-06, |
|
"loss": 0.0509, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7636551902666322, |
|
"grad_norm": 5.0234599113464355, |
|
"learning_rate": 7.62878591768056e-06, |
|
"loss": 0.0528, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7765984985762361, |
|
"grad_norm": 2.9625911712646484, |
|
"learning_rate": 7.758219000776599e-06, |
|
"loss": 0.0435, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.78954180688584, |
|
"grad_norm": 5.779876708984375, |
|
"learning_rate": 7.88765208387264e-06, |
|
"loss": 0.0522, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.802485115195444, |
|
"grad_norm": 1.6520557403564453, |
|
"learning_rate": 8.017085166968678e-06, |
|
"loss": 0.0481, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8154284235050479, |
|
"grad_norm": 10.486522674560547, |
|
"learning_rate": 8.146518250064717e-06, |
|
"loss": 0.0571, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8283717318146518, |
|
"grad_norm": 5.2048139572143555, |
|
"learning_rate": 8.275951333160756e-06, |
|
"loss": 0.0652, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8413150401242557, |
|
"grad_norm": 6.738128185272217, |
|
"learning_rate": 8.405384416256795e-06, |
|
"loss": 0.0732, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8542583484338597, |
|
"grad_norm": 10.937406539916992, |
|
"learning_rate": 8.534817499352836e-06, |
|
"loss": 0.0928, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8672016567434636, |
|
"grad_norm": 6.5531182289123535, |
|
"learning_rate": 8.664250582448875e-06, |
|
"loss": 0.0716, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8801449650530676, |
|
"grad_norm": 9.179738998413086, |
|
"learning_rate": 8.793683665544914e-06, |
|
"loss": 0.0589, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8930882733626715, |
|
"grad_norm": 3.9006974697113037, |
|
"learning_rate": 8.923116748640953e-06, |
|
"loss": 0.0626, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9060315816722755, |
|
"grad_norm": 1.9278124570846558, |
|
"learning_rate": 9.052549831736993e-06, |
|
"loss": 0.0668, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9189748899818794, |
|
"grad_norm": 1.9330800771713257, |
|
"learning_rate": 9.181982914833032e-06, |
|
"loss": 0.0595, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9319181982914833, |
|
"grad_norm": 6.718048572540283, |
|
"learning_rate": 9.311415997929071e-06, |
|
"loss": 0.0804, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9448615066010873, |
|
"grad_norm": 6.918898582458496, |
|
"learning_rate": 9.44084908102511e-06, |
|
"loss": 0.0716, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578048149106911, |
|
"grad_norm": 4.3917131423950195, |
|
"learning_rate": 9.57028216412115e-06, |
|
"loss": 0.0694, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9707481232202951, |
|
"grad_norm": 11.285859107971191, |
|
"learning_rate": 9.69971524721719e-06, |
|
"loss": 0.0715, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.983691431529899, |
|
"grad_norm": 4.574735641479492, |
|
"learning_rate": 9.829148330313229e-06, |
|
"loss": 0.0601, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.996634739839503, |
|
"grad_norm": 4.355649948120117, |
|
"learning_rate": 9.955992751747347e-06, |
|
"loss": 0.0735, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.07532492280006409, |
|
"eval_runtime": 88.308, |
|
"eval_samples_per_second": 5.662, |
|
"eval_steps_per_second": 0.713, |
|
"eval_wer": 9.278891571323607, |
|
"step": 3863 |
|
}, |
|
{ |
|
"epoch": 1.009578048149107, |
|
"grad_norm": 6.590569019317627, |
|
"learning_rate": 9.990508240572958e-06, |
|
"loss": 0.0535, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.0225213564587108, |
|
"grad_norm": 1.8211477994918823, |
|
"learning_rate": 9.976126786895621e-06, |
|
"loss": 0.0632, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.0354646647683148, |
|
"grad_norm": 6.469516754150391, |
|
"learning_rate": 9.961745333218282e-06, |
|
"loss": 0.0686, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0484079730779188, |
|
"grad_norm": 8.549825668334961, |
|
"learning_rate": 9.947363879540945e-06, |
|
"loss": 0.0613, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.0613512813875225, |
|
"grad_norm": 6.740059852600098, |
|
"learning_rate": 9.933270054937154e-06, |
|
"loss": 0.0629, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.0742945896971265, |
|
"grad_norm": 6.783405780792236, |
|
"learning_rate": 9.918888601259817e-06, |
|
"loss": 0.1005, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.0872378980067305, |
|
"grad_norm": 3.628119707107544, |
|
"learning_rate": 9.904507147582478e-06, |
|
"loss": 0.0599, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.1001812063163345, |
|
"grad_norm": 10.390827178955078, |
|
"learning_rate": 9.89012569390514e-06, |
|
"loss": 0.0713, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.1131245146259383, |
|
"grad_norm": 4.781597137451172, |
|
"learning_rate": 9.875744240227803e-06, |
|
"loss": 0.0758, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.1260678229355423, |
|
"grad_norm": 2.381582498550415, |
|
"learning_rate": 9.861362786550466e-06, |
|
"loss": 0.0587, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.1390111312451463, |
|
"grad_norm": 3.9035913944244385, |
|
"learning_rate": 9.846981332873127e-06, |
|
"loss": 0.0482, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.1519544395547503, |
|
"grad_norm": 3.4371120929718018, |
|
"learning_rate": 9.83259987919579e-06, |
|
"loss": 0.0584, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.164897747864354, |
|
"grad_norm": 5.601978302001953, |
|
"learning_rate": 9.818218425518452e-06, |
|
"loss": 0.0567, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.177841056173958, |
|
"grad_norm": 5.46005916595459, |
|
"learning_rate": 9.803836971841113e-06, |
|
"loss": 0.0696, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.190784364483562, |
|
"grad_norm": 6.306366443634033, |
|
"learning_rate": 9.789455518163776e-06, |
|
"loss": 0.0628, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.2037276727931658, |
|
"grad_norm": 6.448446273803711, |
|
"learning_rate": 9.775074064486439e-06, |
|
"loss": 0.0712, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.2166709811027698, |
|
"grad_norm": 5.446472644805908, |
|
"learning_rate": 9.760692610809102e-06, |
|
"loss": 0.0549, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.2296142894123738, |
|
"grad_norm": 8.122842788696289, |
|
"learning_rate": 9.746311157131764e-06, |
|
"loss": 0.0804, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.2425575977219778, |
|
"grad_norm": 17.312549591064453, |
|
"learning_rate": 9.731929703454425e-06, |
|
"loss": 0.0598, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.2555009060315816, |
|
"grad_norm": 8.815750122070312, |
|
"learning_rate": 9.717548249777088e-06, |
|
"loss": 0.086, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.2684442143411856, |
|
"grad_norm": 7.290472030639648, |
|
"learning_rate": 9.70316679609975e-06, |
|
"loss": 0.0646, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.2813875226507896, |
|
"grad_norm": 6.4685258865356445, |
|
"learning_rate": 9.688785342422414e-06, |
|
"loss": 0.0717, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.2943308309603934, |
|
"grad_norm": 6.5981974601745605, |
|
"learning_rate": 9.674403888745075e-06, |
|
"loss": 0.0605, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.3072741392699974, |
|
"grad_norm": 4.253500461578369, |
|
"learning_rate": 9.660022435067737e-06, |
|
"loss": 0.0657, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.3202174475796014, |
|
"grad_norm": 14.457869529724121, |
|
"learning_rate": 9.6456409813904e-06, |
|
"loss": 0.0701, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.3331607558892054, |
|
"grad_norm": 10.046952247619629, |
|
"learning_rate": 9.631259527713061e-06, |
|
"loss": 0.0636, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.3461040641988093, |
|
"grad_norm": 0.7242924571037292, |
|
"learning_rate": 9.616878074035725e-06, |
|
"loss": 0.0731, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.3590473725084131, |
|
"grad_norm": 5.0954155921936035, |
|
"learning_rate": 9.602496620358386e-06, |
|
"loss": 0.0768, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.3719906808180171, |
|
"grad_norm": 3.536829710006714, |
|
"learning_rate": 9.588115166681049e-06, |
|
"loss": 0.0676, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.384933989127621, |
|
"grad_norm": 16.024459838867188, |
|
"learning_rate": 9.573733713003712e-06, |
|
"loss": 0.0778, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.3978772974372249, |
|
"grad_norm": 8.860788345336914, |
|
"learning_rate": 9.559352259326373e-06, |
|
"loss": 0.0618, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.4108206057468289, |
|
"grad_norm": 8.013275146484375, |
|
"learning_rate": 9.544970805649036e-06, |
|
"loss": 0.0682, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.4237639140564329, |
|
"grad_norm": 1.6208149194717407, |
|
"learning_rate": 9.530589351971698e-06, |
|
"loss": 0.0658, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.4367072223660369, |
|
"grad_norm": 3.301064968109131, |
|
"learning_rate": 9.516207898294361e-06, |
|
"loss": 0.0578, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.4496505306756406, |
|
"grad_norm": 8.253787994384766, |
|
"learning_rate": 9.501826444617022e-06, |
|
"loss": 0.0613, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.4625938389852446, |
|
"grad_norm": 5.881521224975586, |
|
"learning_rate": 9.487444990939685e-06, |
|
"loss": 0.0652, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.4755371472948486, |
|
"grad_norm": 5.812432289123535, |
|
"learning_rate": 9.473063537262347e-06, |
|
"loss": 0.0632, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.4884804556044524, |
|
"grad_norm": 7.349687576293945, |
|
"learning_rate": 9.458682083585008e-06, |
|
"loss": 0.0633, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.5014237639140564, |
|
"grad_norm": 6.8907694816589355, |
|
"learning_rate": 9.444300629907673e-06, |
|
"loss": 0.0621, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.5143670722236604, |
|
"grad_norm": 6.606383800506592, |
|
"learning_rate": 9.429919176230334e-06, |
|
"loss": 0.0827, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.5273103805332644, |
|
"grad_norm": 7.548374176025391, |
|
"learning_rate": 9.415537722552997e-06, |
|
"loss": 0.0616, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.5402536888428684, |
|
"grad_norm": 2.861259698867798, |
|
"learning_rate": 9.40115626887566e-06, |
|
"loss": 0.0681, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.5531969971524722, |
|
"grad_norm": 1.8648054599761963, |
|
"learning_rate": 9.38677481519832e-06, |
|
"loss": 0.0716, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.566140305462076, |
|
"grad_norm": 8.61605167388916, |
|
"learning_rate": 9.372393361520983e-06, |
|
"loss": 0.0632, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.57908361377168, |
|
"grad_norm": 7.054296493530273, |
|
"learning_rate": 9.358011907843646e-06, |
|
"loss": 0.067, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.592026922081284, |
|
"grad_norm": 16.833148956298828, |
|
"learning_rate": 9.343630454166308e-06, |
|
"loss": 0.0657, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.604970230390888, |
|
"grad_norm": 8.74778938293457, |
|
"learning_rate": 9.32924900048897e-06, |
|
"loss": 0.0824, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.617913538700492, |
|
"grad_norm": 7.7294440269470215, |
|
"learning_rate": 9.314867546811632e-06, |
|
"loss": 0.0756, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.630856847010096, |
|
"grad_norm": 5.917527675628662, |
|
"learning_rate": 9.300486093134295e-06, |
|
"loss": 0.0625, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.6438001553196997, |
|
"grad_norm": 6.116061687469482, |
|
"learning_rate": 9.286104639456958e-06, |
|
"loss": 0.0733, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.6567434636293037, |
|
"grad_norm": 5.308782577514648, |
|
"learning_rate": 9.27172318577962e-06, |
|
"loss": 0.0614, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.6696867719389075, |
|
"grad_norm": 4.262322902679443, |
|
"learning_rate": 9.257341732102281e-06, |
|
"loss": 0.0695, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.6826300802485115, |
|
"grad_norm": 10.316951751708984, |
|
"learning_rate": 9.242960278424944e-06, |
|
"loss": 0.0608, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.6955733885581155, |
|
"grad_norm": 10.672906875610352, |
|
"learning_rate": 9.228578824747607e-06, |
|
"loss": 0.0497, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.7085166968677195, |
|
"grad_norm": 9.800777435302734, |
|
"learning_rate": 9.214197371070268e-06, |
|
"loss": 0.0869, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.7214600051773234, |
|
"grad_norm": 0.5741531848907471, |
|
"learning_rate": 9.19981591739293e-06, |
|
"loss": 0.0719, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.7344033134869272, |
|
"grad_norm": 1.716304063796997, |
|
"learning_rate": 9.185434463715593e-06, |
|
"loss": 0.0819, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.7473466217965312, |
|
"grad_norm": 5.458923816680908, |
|
"learning_rate": 9.171053010038256e-06, |
|
"loss": 0.0911, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.760289930106135, |
|
"grad_norm": 5.717566967010498, |
|
"learning_rate": 9.156671556360917e-06, |
|
"loss": 0.0728, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.773233238415739, |
|
"grad_norm": 2.0101475715637207, |
|
"learning_rate": 9.14229010268358e-06, |
|
"loss": 0.0688, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.786176546725343, |
|
"grad_norm": 4.805422306060791, |
|
"learning_rate": 9.127908649006242e-06, |
|
"loss": 0.0722, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.799119855034947, |
|
"grad_norm": 8.908441543579102, |
|
"learning_rate": 9.113527195328905e-06, |
|
"loss": 0.0742, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.812063163344551, |
|
"grad_norm": 5.328374862670898, |
|
"learning_rate": 9.099145741651568e-06, |
|
"loss": 0.0705, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.8250064716541547, |
|
"grad_norm": 5.544122219085693, |
|
"learning_rate": 9.084764287974229e-06, |
|
"loss": 0.0803, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.8379497799637587, |
|
"grad_norm": 12.470900535583496, |
|
"learning_rate": 9.070382834296892e-06, |
|
"loss": 0.0687, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.8508930882733625, |
|
"grad_norm": 6.980189323425293, |
|
"learning_rate": 9.056001380619553e-06, |
|
"loss": 0.0738, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.8638363965829665, |
|
"grad_norm": 5.287197589874268, |
|
"learning_rate": 9.041619926942215e-06, |
|
"loss": 0.0723, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.8767797048925705, |
|
"grad_norm": 7.992635250091553, |
|
"learning_rate": 9.027238473264878e-06, |
|
"loss": 0.0766, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.8897230132021745, |
|
"grad_norm": 13.58613395690918, |
|
"learning_rate": 9.01285701958754e-06, |
|
"loss": 0.0672, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.9026663215117785, |
|
"grad_norm": 6.183566093444824, |
|
"learning_rate": 8.998475565910203e-06, |
|
"loss": 0.0636, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.9156096298213825, |
|
"grad_norm": 4.56492805480957, |
|
"learning_rate": 8.984094112232865e-06, |
|
"loss": 0.0823, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.9285529381309863, |
|
"grad_norm": 3.8052022457122803, |
|
"learning_rate": 8.969712658555527e-06, |
|
"loss": 0.094, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.9414962464405903, |
|
"grad_norm": 4.8918843269348145, |
|
"learning_rate": 8.95533120487819e-06, |
|
"loss": 0.0707, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.954439554750194, |
|
"grad_norm": 8.336883544921875, |
|
"learning_rate": 8.940949751200853e-06, |
|
"loss": 0.082, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.967382863059798, |
|
"grad_norm": 11.596981048583984, |
|
"learning_rate": 8.926568297523515e-06, |
|
"loss": 0.0914, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.980326171369402, |
|
"grad_norm": 3.7584636211395264, |
|
"learning_rate": 8.912186843846176e-06, |
|
"loss": 0.0697, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.993269479679006, |
|
"grad_norm": 1.8307932615280151, |
|
"learning_rate": 8.898093019242386e-06, |
|
"loss": 0.0597, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.0625610426068306, |
|
"eval_runtime": 88.6808, |
|
"eval_samples_per_second": 5.638, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 7.819880340086072, |
|
"step": 7726 |
|
}, |
|
{ |
|
"epoch": 2.00621278798861, |
|
"grad_norm": 5.234423637390137, |
|
"learning_rate": 8.883711565565049e-06, |
|
"loss": 0.0493, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.019156096298214, |
|
"grad_norm": 6.8441386222839355, |
|
"learning_rate": 8.869330111887711e-06, |
|
"loss": 0.0531, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.0320994046078176, |
|
"grad_norm": 11.543621063232422, |
|
"learning_rate": 8.854948658210372e-06, |
|
"loss": 0.0447, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.0450427129174216, |
|
"grad_norm": 8.563196182250977, |
|
"learning_rate": 8.840567204533035e-06, |
|
"loss": 0.0466, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.0579860212270256, |
|
"grad_norm": 2.31270432472229, |
|
"learning_rate": 8.826185750855698e-06, |
|
"loss": 0.0487, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.0709293295366296, |
|
"grad_norm": 2.1112735271453857, |
|
"learning_rate": 8.811804297178359e-06, |
|
"loss": 0.0424, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.0838726378462336, |
|
"grad_norm": 2.2178971767425537, |
|
"learning_rate": 8.797422843501021e-06, |
|
"loss": 0.0472, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.0968159461558376, |
|
"grad_norm": 7.748543739318848, |
|
"learning_rate": 8.783041389823684e-06, |
|
"loss": 0.0467, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.1097592544654415, |
|
"grad_norm": 13.174030303955078, |
|
"learning_rate": 8.768659936146347e-06, |
|
"loss": 0.0582, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.122702562775045, |
|
"grad_norm": 8.9285306930542, |
|
"learning_rate": 8.754278482469008e-06, |
|
"loss": 0.0388, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.135645871084649, |
|
"grad_norm": 7.205421447753906, |
|
"learning_rate": 8.73989702879167e-06, |
|
"loss": 0.049, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.148589179394253, |
|
"grad_norm": 13.037263870239258, |
|
"learning_rate": 8.725515575114333e-06, |
|
"loss": 0.0463, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.161532487703857, |
|
"grad_norm": 7.653538703918457, |
|
"learning_rate": 8.711134121436996e-06, |
|
"loss": 0.047, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.174475796013461, |
|
"grad_norm": 3.7179276943206787, |
|
"learning_rate": 8.696752667759659e-06, |
|
"loss": 0.0455, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.187419104323065, |
|
"grad_norm": 5.476615905761719, |
|
"learning_rate": 8.68237121408232e-06, |
|
"loss": 0.0439, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.200362412632669, |
|
"grad_norm": 2.110492467880249, |
|
"learning_rate": 8.667989760404983e-06, |
|
"loss": 0.0411, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.213305720942273, |
|
"grad_norm": 5.301363468170166, |
|
"learning_rate": 8.653608306727645e-06, |
|
"loss": 0.0517, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.2262490292518766, |
|
"grad_norm": 9.795352935791016, |
|
"learning_rate": 8.639226853050306e-06, |
|
"loss": 0.0564, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.2391923375614806, |
|
"grad_norm": 1.353123426437378, |
|
"learning_rate": 8.624845399372969e-06, |
|
"loss": 0.0485, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.2521356458710846, |
|
"grad_norm": 9.473326683044434, |
|
"learning_rate": 8.610463945695632e-06, |
|
"loss": 0.0645, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.2650789541806886, |
|
"grad_norm": 4.406058311462402, |
|
"learning_rate": 8.596082492018294e-06, |
|
"loss": 0.0542, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.2780222624902926, |
|
"grad_norm": 17.06141471862793, |
|
"learning_rate": 8.581701038340955e-06, |
|
"loss": 0.0502, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.2909655707998966, |
|
"grad_norm": 8.37076187133789, |
|
"learning_rate": 8.567319584663618e-06, |
|
"loss": 0.0554, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.3039088791095006, |
|
"grad_norm": 10.135491371154785, |
|
"learning_rate": 8.552938130986281e-06, |
|
"loss": 0.0469, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.316852187419104, |
|
"grad_norm": 7.701286315917969, |
|
"learning_rate": 8.538556677308944e-06, |
|
"loss": 0.0468, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.329795495728708, |
|
"grad_norm": 0.7516931891441345, |
|
"learning_rate": 8.524175223631606e-06, |
|
"loss": 0.0443, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.342738804038312, |
|
"grad_norm": 8.913509368896484, |
|
"learning_rate": 8.509793769954267e-06, |
|
"loss": 0.0619, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.355682112347916, |
|
"grad_norm": 10.149544715881348, |
|
"learning_rate": 8.49541231627693e-06, |
|
"loss": 0.051, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.36862542065752, |
|
"grad_norm": 8.469508171081543, |
|
"learning_rate": 8.481030862599591e-06, |
|
"loss": 0.0481, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.381568728967124, |
|
"grad_norm": 2.6739773750305176, |
|
"learning_rate": 8.466649408922255e-06, |
|
"loss": 0.0565, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.394512037276728, |
|
"grad_norm": 7.850672245025635, |
|
"learning_rate": 8.452267955244916e-06, |
|
"loss": 0.0469, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.4074553455863317, |
|
"grad_norm": 4.025714874267578, |
|
"learning_rate": 8.43788650156758e-06, |
|
"loss": 0.0554, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.4203986538959357, |
|
"grad_norm": 8.857705116271973, |
|
"learning_rate": 8.423505047890242e-06, |
|
"loss": 0.0449, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.4333419622055397, |
|
"grad_norm": 7.978151321411133, |
|
"learning_rate": 8.409123594212903e-06, |
|
"loss": 0.0768, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.4462852705151437, |
|
"grad_norm": 7.44215726852417, |
|
"learning_rate": 8.394742140535566e-06, |
|
"loss": 0.0612, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.4592285788247477, |
|
"grad_norm": 3.768834352493286, |
|
"learning_rate": 8.380360686858228e-06, |
|
"loss": 0.0405, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.4721718871343517, |
|
"grad_norm": 4.792430877685547, |
|
"learning_rate": 8.365979233180891e-06, |
|
"loss": 0.049, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.4851151954439556, |
|
"grad_norm": 7.07329797744751, |
|
"learning_rate": 8.351597779503554e-06, |
|
"loss": 0.0516, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.498058503753559, |
|
"grad_norm": 7.043036937713623, |
|
"learning_rate": 8.337216325826215e-06, |
|
"loss": 0.0589, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.511001812063163, |
|
"grad_norm": 8.471428871154785, |
|
"learning_rate": 8.322834872148878e-06, |
|
"loss": 0.0433, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.523945120372767, |
|
"grad_norm": 5.081391334533691, |
|
"learning_rate": 8.308453418471539e-06, |
|
"loss": 0.0603, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.536888428682371, |
|
"grad_norm": 6.474493980407715, |
|
"learning_rate": 8.294071964794203e-06, |
|
"loss": 0.0529, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.549831736991975, |
|
"grad_norm": 6.623453617095947, |
|
"learning_rate": 8.279690511116864e-06, |
|
"loss": 0.0491, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.562775045301579, |
|
"grad_norm": 5.389576435089111, |
|
"learning_rate": 8.265309057439527e-06, |
|
"loss": 0.0491, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.575718353611183, |
|
"grad_norm": 8.212656021118164, |
|
"learning_rate": 8.25092760376219e-06, |
|
"loss": 0.0544, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.5886616619207867, |
|
"grad_norm": 0.44550031423568726, |
|
"learning_rate": 8.23654615008485e-06, |
|
"loss": 0.0578, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.6016049702303907, |
|
"grad_norm": 1.0146311521530151, |
|
"learning_rate": 8.222164696407513e-06, |
|
"loss": 0.0363, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.6145482785399947, |
|
"grad_norm": 2.9840240478515625, |
|
"learning_rate": 8.207783242730176e-06, |
|
"loss": 0.0464, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.6274915868495987, |
|
"grad_norm": 9.97850513458252, |
|
"learning_rate": 8.193401789052839e-06, |
|
"loss": 0.0486, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.6404348951592027, |
|
"grad_norm": 7.187142372131348, |
|
"learning_rate": 8.179020335375501e-06, |
|
"loss": 0.0621, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.6533782034688067, |
|
"grad_norm": 8.673319816589355, |
|
"learning_rate": 8.164638881698162e-06, |
|
"loss": 0.0495, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.6663215117784107, |
|
"grad_norm": 7.366682529449463, |
|
"learning_rate": 8.150257428020825e-06, |
|
"loss": 0.0426, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.6792648200880143, |
|
"grad_norm": 4.433532238006592, |
|
"learning_rate": 8.135875974343488e-06, |
|
"loss": 0.0489, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.6922081283976187, |
|
"grad_norm": 1.0996958017349243, |
|
"learning_rate": 8.12149452066615e-06, |
|
"loss": 0.0534, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.7051514367072222, |
|
"grad_norm": 4.040085792541504, |
|
"learning_rate": 8.107113066988811e-06, |
|
"loss": 0.0456, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.7180947450168262, |
|
"grad_norm": 6.1070685386657715, |
|
"learning_rate": 8.092731613311474e-06, |
|
"loss": 0.0685, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.7310380533264302, |
|
"grad_norm": 5.145371913909912, |
|
"learning_rate": 8.078350159634137e-06, |
|
"loss": 0.0537, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.7439813616360342, |
|
"grad_norm": 5.4013190269470215, |
|
"learning_rate": 8.063968705956798e-06, |
|
"loss": 0.051, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.7569246699456382, |
|
"grad_norm": 0.9301003217697144, |
|
"learning_rate": 8.049587252279462e-06, |
|
"loss": 0.0469, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.769867978255242, |
|
"grad_norm": 2.0756452083587646, |
|
"learning_rate": 8.035205798602123e-06, |
|
"loss": 0.0508, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.782811286564846, |
|
"grad_norm": 5.310762882232666, |
|
"learning_rate": 8.020824344924786e-06, |
|
"loss": 0.0559, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.7957545948744498, |
|
"grad_norm": 10.922019004821777, |
|
"learning_rate": 8.006442891247447e-06, |
|
"loss": 0.0558, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.8086979031840538, |
|
"grad_norm": 7.071091651916504, |
|
"learning_rate": 7.99206143757011e-06, |
|
"loss": 0.059, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.8216412114936578, |
|
"grad_norm": 8.605588912963867, |
|
"learning_rate": 7.977679983892773e-06, |
|
"loss": 0.0502, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.8345845198032618, |
|
"grad_norm": 5.918191909790039, |
|
"learning_rate": 7.963298530215435e-06, |
|
"loss": 0.044, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.8475278281128658, |
|
"grad_norm": 10.315163612365723, |
|
"learning_rate": 7.948917076538098e-06, |
|
"loss": 0.0639, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.8604711364224693, |
|
"grad_norm": 8.651927947998047, |
|
"learning_rate": 7.934535622860759e-06, |
|
"loss": 0.0632, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.8734144447320737, |
|
"grad_norm": 6.735222816467285, |
|
"learning_rate": 7.920154169183422e-06, |
|
"loss": 0.0594, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.8863577530416773, |
|
"grad_norm": 8.701018333435059, |
|
"learning_rate": 7.905772715506084e-06, |
|
"loss": 0.067, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.8993010613512813, |
|
"grad_norm": 7.249449729919434, |
|
"learning_rate": 7.891391261828745e-06, |
|
"loss": 0.0562, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.9122443696608853, |
|
"grad_norm": 9.386366844177246, |
|
"learning_rate": 7.87700980815141e-06, |
|
"loss": 0.0443, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.9251876779704893, |
|
"grad_norm": 5.82024621963501, |
|
"learning_rate": 7.86262835447407e-06, |
|
"loss": 0.0435, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.9381309862800933, |
|
"grad_norm": 9.714224815368652, |
|
"learning_rate": 7.848246900796734e-06, |
|
"loss": 0.0584, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.9510742945896973, |
|
"grad_norm": 10.820244789123535, |
|
"learning_rate": 7.833865447119395e-06, |
|
"loss": 0.051, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.9640176028993013, |
|
"grad_norm": 9.436473846435547, |
|
"learning_rate": 7.819483993442057e-06, |
|
"loss": 0.0762, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.976960911208905, |
|
"grad_norm": 4.0043745040893555, |
|
"learning_rate": 7.80510253976472e-06, |
|
"loss": 0.0542, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.989904219518509, |
|
"grad_norm": 6.734024524688721, |
|
"learning_rate": 7.790721086087383e-06, |
|
"loss": 0.0678, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.047223061323165894, |
|
"eval_runtime": 88.8982, |
|
"eval_samples_per_second": 5.624, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 7.588957699170777, |
|
"step": 11589 |
|
}, |
|
{ |
|
"epoch": 3.002847527828113, |
|
"grad_norm": 7.159914970397949, |
|
"learning_rate": 7.776339632410045e-06, |
|
"loss": 0.0545, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.015790836137717, |
|
"grad_norm": 8.731955528259277, |
|
"learning_rate": 7.761958178732706e-06, |
|
"loss": 0.0283, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 3.028734144447321, |
|
"grad_norm": 9.880719184875488, |
|
"learning_rate": 7.74757672505537e-06, |
|
"loss": 0.0372, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.041677452756925, |
|
"grad_norm": 4.863366603851318, |
|
"learning_rate": 7.733195271378032e-06, |
|
"loss": 0.0301, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 3.054620761066529, |
|
"grad_norm": 3.5925040245056152, |
|
"learning_rate": 7.718813817700693e-06, |
|
"loss": 0.0319, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.0675640693761324, |
|
"grad_norm": 5.193197727203369, |
|
"learning_rate": 7.704432364023357e-06, |
|
"loss": 0.0284, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 3.0805073776857363, |
|
"grad_norm": 7.325606822967529, |
|
"learning_rate": 7.690050910346018e-06, |
|
"loss": 0.0221, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.0934506859953403, |
|
"grad_norm": 1.1931557655334473, |
|
"learning_rate": 7.675669456668681e-06, |
|
"loss": 0.0318, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.1063939943049443, |
|
"grad_norm": 3.640929698944092, |
|
"learning_rate": 7.661288002991342e-06, |
|
"loss": 0.0339, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.1193373026145483, |
|
"grad_norm": 5.155591011047363, |
|
"learning_rate": 7.647194178387553e-06, |
|
"loss": 0.0294, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.1322806109241523, |
|
"grad_norm": 4.072420120239258, |
|
"learning_rate": 7.632812724710214e-06, |
|
"loss": 0.0371, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.1452239192337563, |
|
"grad_norm": 4.188466548919678, |
|
"learning_rate": 7.618431271032877e-06, |
|
"loss": 0.0253, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.15816722754336, |
|
"grad_norm": 7.71252965927124, |
|
"learning_rate": 7.604049817355539e-06, |
|
"loss": 0.0291, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.171110535852964, |
|
"grad_norm": 1.7614275217056274, |
|
"learning_rate": 7.589668363678201e-06, |
|
"loss": 0.0477, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.184053844162568, |
|
"grad_norm": 6.398949146270752, |
|
"learning_rate": 7.575286910000863e-06, |
|
"loss": 0.0281, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.196997152472172, |
|
"grad_norm": 5.648083686828613, |
|
"learning_rate": 7.560905456323526e-06, |
|
"loss": 0.0385, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.209940460781776, |
|
"grad_norm": 1.6101973056793213, |
|
"learning_rate": 7.546524002646188e-06, |
|
"loss": 0.0291, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.22288376909138, |
|
"grad_norm": 1.436286449432373, |
|
"learning_rate": 7.532142548968851e-06, |
|
"loss": 0.0443, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.235827077400984, |
|
"grad_norm": 1.658963680267334, |
|
"learning_rate": 7.517761095291513e-06, |
|
"loss": 0.0371, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.2487703857105874, |
|
"grad_norm": 8.697310447692871, |
|
"learning_rate": 7.5033796416141745e-06, |
|
"loss": 0.0299, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.2617136940201914, |
|
"grad_norm": 6.439472198486328, |
|
"learning_rate": 7.488998187936837e-06, |
|
"loss": 0.0308, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.2746570023297954, |
|
"grad_norm": 3.5517160892486572, |
|
"learning_rate": 7.4746167342595e-06, |
|
"loss": 0.0357, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.2876003106393994, |
|
"grad_norm": 2.6896841526031494, |
|
"learning_rate": 7.460235280582162e-06, |
|
"loss": 0.0298, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.3005436189490034, |
|
"grad_norm": 3.5411911010742188, |
|
"learning_rate": 7.4458538269048245e-06, |
|
"loss": 0.0249, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.3134869272586074, |
|
"grad_norm": 0.766302227973938, |
|
"learning_rate": 7.431472373227486e-06, |
|
"loss": 0.0207, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.3264302355682114, |
|
"grad_norm": 7.974555969238281, |
|
"learning_rate": 7.417090919550148e-06, |
|
"loss": 0.0258, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.339373543877815, |
|
"grad_norm": 8.336533546447754, |
|
"learning_rate": 7.40270946587281e-06, |
|
"loss": 0.0335, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.352316852187419, |
|
"grad_norm": 4.762045383453369, |
|
"learning_rate": 7.38861564126902e-06, |
|
"loss": 0.0391, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.365260160497023, |
|
"grad_norm": 3.297501564025879, |
|
"learning_rate": 7.374234187591682e-06, |
|
"loss": 0.0427, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.378203468806627, |
|
"grad_norm": 5.205377578735352, |
|
"learning_rate": 7.359852733914344e-06, |
|
"loss": 0.0517, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 3.391146777116231, |
|
"grad_norm": 5.596180438995361, |
|
"learning_rate": 7.345471280237007e-06, |
|
"loss": 0.0422, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.404090085425835, |
|
"grad_norm": 5.4441657066345215, |
|
"learning_rate": 7.3310898265596695e-06, |
|
"loss": 0.0367, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 3.417033393735439, |
|
"grad_norm": 5.369819641113281, |
|
"learning_rate": 7.316708372882331e-06, |
|
"loss": 0.0245, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.429976702045043, |
|
"grad_norm": 1.0381672382354736, |
|
"learning_rate": 7.302326919204994e-06, |
|
"loss": 0.0377, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 3.4429200103546465, |
|
"grad_norm": 2.6047282218933105, |
|
"learning_rate": 7.287945465527656e-06, |
|
"loss": 0.0556, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.4558633186642504, |
|
"grad_norm": 3.447537899017334, |
|
"learning_rate": 7.273564011850318e-06, |
|
"loss": 0.0331, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 3.4688066269738544, |
|
"grad_norm": 12.265509605407715, |
|
"learning_rate": 7.259182558172981e-06, |
|
"loss": 0.0458, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.4817499352834584, |
|
"grad_norm": 6.993318557739258, |
|
"learning_rate": 7.244801104495643e-06, |
|
"loss": 0.0328, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 3.4946932435930624, |
|
"grad_norm": 1.927647590637207, |
|
"learning_rate": 7.230419650818305e-06, |
|
"loss": 0.036, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.5076365519026664, |
|
"grad_norm": 4.639932632446289, |
|
"learning_rate": 7.216038197140968e-06, |
|
"loss": 0.0359, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 3.52057986021227, |
|
"grad_norm": 6.088189125061035, |
|
"learning_rate": 7.20165674346363e-06, |
|
"loss": 0.0505, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.5335231685218744, |
|
"grad_norm": 3.452139377593994, |
|
"learning_rate": 7.187275289786292e-06, |
|
"loss": 0.0334, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 3.546466476831478, |
|
"grad_norm": 3.6713337898254395, |
|
"learning_rate": 7.172893836108954e-06, |
|
"loss": 0.0305, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.559409785141082, |
|
"grad_norm": 4.258627891540527, |
|
"learning_rate": 7.158512382431617e-06, |
|
"loss": 0.0257, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 3.572353093450686, |
|
"grad_norm": 7.825601577758789, |
|
"learning_rate": 7.144130928754279e-06, |
|
"loss": 0.0261, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.58529640176029, |
|
"grad_norm": 4.783618927001953, |
|
"learning_rate": 7.129749475076942e-06, |
|
"loss": 0.0347, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 3.598239710069894, |
|
"grad_norm": 4.300550937652588, |
|
"learning_rate": 7.1153680213996035e-06, |
|
"loss": 0.029, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.611183018379498, |
|
"grad_norm": 10.43149471282959, |
|
"learning_rate": 7.100986567722265e-06, |
|
"loss": 0.0345, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 3.624126326689102, |
|
"grad_norm": 5.455187797546387, |
|
"learning_rate": 7.086605114044929e-06, |
|
"loss": 0.0362, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.6370696349987055, |
|
"grad_norm": 7.492135047912598, |
|
"learning_rate": 7.072223660367591e-06, |
|
"loss": 0.0295, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 3.6500129433083095, |
|
"grad_norm": 5.982784271240234, |
|
"learning_rate": 7.057842206690253e-06, |
|
"loss": 0.0458, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.6629562516179135, |
|
"grad_norm": 13.213232040405273, |
|
"learning_rate": 7.043460753012915e-06, |
|
"loss": 0.0299, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 3.6758995599275175, |
|
"grad_norm": 0.33053404092788696, |
|
"learning_rate": 7.029079299335577e-06, |
|
"loss": 0.0272, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.6888428682371215, |
|
"grad_norm": 2.8715531826019287, |
|
"learning_rate": 7.014697845658239e-06, |
|
"loss": 0.0406, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 3.7017861765467255, |
|
"grad_norm": 7.417051315307617, |
|
"learning_rate": 7.000316391980903e-06, |
|
"loss": 0.0484, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 3.7147294848563295, |
|
"grad_norm": 7.245136737823486, |
|
"learning_rate": 6.9859349383035645e-06, |
|
"loss": 0.0392, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 3.727672793165933, |
|
"grad_norm": 6.493204116821289, |
|
"learning_rate": 6.971553484626226e-06, |
|
"loss": 0.0301, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.740616101475537, |
|
"grad_norm": 7.734640121459961, |
|
"learning_rate": 6.957172030948889e-06, |
|
"loss": 0.0317, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 3.753559409785141, |
|
"grad_norm": 2.9053096771240234, |
|
"learning_rate": 6.942790577271551e-06, |
|
"loss": 0.0381, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.766502718094745, |
|
"grad_norm": 8.95727825164795, |
|
"learning_rate": 6.928409123594213e-06, |
|
"loss": 0.0301, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 3.779446026404349, |
|
"grad_norm": 3.024991512298584, |
|
"learning_rate": 6.914027669916876e-06, |
|
"loss": 0.0371, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.792389334713953, |
|
"grad_norm": 2.2586114406585693, |
|
"learning_rate": 6.899646216239538e-06, |
|
"loss": 0.0331, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 3.805332643023557, |
|
"grad_norm": 2.5965662002563477, |
|
"learning_rate": 6.8852647625622e-06, |
|
"loss": 0.0266, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 3.8182759513331606, |
|
"grad_norm": 14.025388717651367, |
|
"learning_rate": 6.870883308884863e-06, |
|
"loss": 0.0325, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 3.8312192596427646, |
|
"grad_norm": 12.150018692016602, |
|
"learning_rate": 6.856501855207525e-06, |
|
"loss": 0.0371, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.8441625679523685, |
|
"grad_norm": 5.943263530731201, |
|
"learning_rate": 6.842120401530187e-06, |
|
"loss": 0.029, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 3.8571058762619725, |
|
"grad_norm": 3.0075082778930664, |
|
"learning_rate": 6.82773894785285e-06, |
|
"loss": 0.0423, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 3.8700491845715765, |
|
"grad_norm": 7.091434001922607, |
|
"learning_rate": 6.813357494175512e-06, |
|
"loss": 0.0333, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 3.8829924928811805, |
|
"grad_norm": 9.822648048400879, |
|
"learning_rate": 6.798976040498174e-06, |
|
"loss": 0.0623, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.8959358011907845, |
|
"grad_norm": 4.8575663566589355, |
|
"learning_rate": 6.784594586820837e-06, |
|
"loss": 0.0334, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 3.908879109500388, |
|
"grad_norm": 6.463123321533203, |
|
"learning_rate": 6.7702131331434985e-06, |
|
"loss": 0.0309, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 3.921822417809992, |
|
"grad_norm": 5.0641374588012695, |
|
"learning_rate": 6.75583167946616e-06, |
|
"loss": 0.0332, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 3.934765726119596, |
|
"grad_norm": 2.091432809829712, |
|
"learning_rate": 6.741450225788824e-06, |
|
"loss": 0.0383, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.9477090344292, |
|
"grad_norm": 2.943432092666626, |
|
"learning_rate": 6.727068772111486e-06, |
|
"loss": 0.0541, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 3.960652342738804, |
|
"grad_norm": 7.013586044311523, |
|
"learning_rate": 6.712687318434148e-06, |
|
"loss": 0.0281, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.973595651048408, |
|
"grad_norm": 5.849566459655762, |
|
"learning_rate": 6.69830586475681e-06, |
|
"loss": 0.0343, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 3.986538959358012, |
|
"grad_norm": 8.706452369689941, |
|
"learning_rate": 6.683924411079472e-06, |
|
"loss": 0.0488, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.9994822676676156, |
|
"grad_norm": 3.453444004058838, |
|
"learning_rate": 6.669542957402134e-06, |
|
"loss": 0.0232, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.03156248852610588, |
|
"eval_runtime": 89.3284, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 0.705, |
|
"eval_wer": 4.15660753647528, |
|
"step": 15452 |
|
}, |
|
{ |
|
"epoch": 4.01242557597722, |
|
"grad_norm": 2.5590574741363525, |
|
"learning_rate": 6.655161503724798e-06, |
|
"loss": 0.0184, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.025368884286824, |
|
"grad_norm": 0.7780801057815552, |
|
"learning_rate": 6.6407800500474595e-06, |
|
"loss": 0.0192, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 4.038312192596428, |
|
"grad_norm": 10.055984497070312, |
|
"learning_rate": 6.626398596370121e-06, |
|
"loss": 0.0238, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.051255500906032, |
|
"grad_norm": 1.114243745803833, |
|
"learning_rate": 6.612017142692784e-06, |
|
"loss": 0.025, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.064198809215635, |
|
"grad_norm": 3.681232452392578, |
|
"learning_rate": 6.597635689015446e-06, |
|
"loss": 0.02, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.07714211752524, |
|
"grad_norm": 8.334362983703613, |
|
"learning_rate": 6.583254235338109e-06, |
|
"loss": 0.028, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 4.090085425834843, |
|
"grad_norm": 12.538928985595703, |
|
"learning_rate": 6.568872781660771e-06, |
|
"loss": 0.0223, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.103028734144448, |
|
"grad_norm": 4.050449371337891, |
|
"learning_rate": 6.554491327983433e-06, |
|
"loss": 0.0189, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 4.115972042454051, |
|
"grad_norm": 1.1800466775894165, |
|
"learning_rate": 6.540109874306095e-06, |
|
"loss": 0.0205, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.128915350763656, |
|
"grad_norm": 5.886932373046875, |
|
"learning_rate": 6.525728420628758e-06, |
|
"loss": 0.0184, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 4.141858659073259, |
|
"grad_norm": 4.0201497077941895, |
|
"learning_rate": 6.51134696695142e-06, |
|
"loss": 0.0188, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.154801967382863, |
|
"grad_norm": 1.844307780265808, |
|
"learning_rate": 6.4969655132740824e-06, |
|
"loss": 0.0175, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 4.167745275692467, |
|
"grad_norm": 3.237921953201294, |
|
"learning_rate": 6.482584059596745e-06, |
|
"loss": 0.0243, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.180688584002071, |
|
"grad_norm": 1.648651123046875, |
|
"learning_rate": 6.468202605919407e-06, |
|
"loss": 0.0166, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 4.193631892311675, |
|
"grad_norm": 5.503207206726074, |
|
"learning_rate": 6.453821152242069e-06, |
|
"loss": 0.0216, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.206575200621279, |
|
"grad_norm": 5.58413553237915, |
|
"learning_rate": 6.439439698564731e-06, |
|
"loss": 0.0269, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 4.219518508930883, |
|
"grad_norm": 3.219493865966797, |
|
"learning_rate": 6.4250582448873935e-06, |
|
"loss": 0.0154, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.232461817240487, |
|
"grad_norm": 7.937684059143066, |
|
"learning_rate": 6.410676791210056e-06, |
|
"loss": 0.0177, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 4.24540512555009, |
|
"grad_norm": 4.213293552398682, |
|
"learning_rate": 6.396295337532719e-06, |
|
"loss": 0.0202, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.258348433859695, |
|
"grad_norm": 3.651660680770874, |
|
"learning_rate": 6.381913883855381e-06, |
|
"loss": 0.0213, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 4.271291742169298, |
|
"grad_norm": 5.593703746795654, |
|
"learning_rate": 6.367532430178043e-06, |
|
"loss": 0.0158, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.284235050478903, |
|
"grad_norm": 8.395854949951172, |
|
"learning_rate": 6.3531509765007045e-06, |
|
"loss": 0.0203, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 4.297178358788506, |
|
"grad_norm": 5.291663646697998, |
|
"learning_rate": 6.338769522823367e-06, |
|
"loss": 0.0182, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.310121667098111, |
|
"grad_norm": 2.6728503704071045, |
|
"learning_rate": 6.32438806914603e-06, |
|
"loss": 0.0147, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 4.323064975407714, |
|
"grad_norm": 2.701340675354004, |
|
"learning_rate": 6.310006615468693e-06, |
|
"loss": 0.032, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 4.336008283717318, |
|
"grad_norm": 5.171128273010254, |
|
"learning_rate": 6.2956251617913545e-06, |
|
"loss": 0.0182, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 4.348951592026922, |
|
"grad_norm": 8.16347885131836, |
|
"learning_rate": 6.281243708114016e-06, |
|
"loss": 0.0359, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 4.361894900336526, |
|
"grad_norm": 6.198201656341553, |
|
"learning_rate": 6.266862254436678e-06, |
|
"loss": 0.0261, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 4.37483820864613, |
|
"grad_norm": 5.718491077423096, |
|
"learning_rate": 6.252480800759341e-06, |
|
"loss": 0.0193, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 4.387781516955734, |
|
"grad_norm": 8.977401733398438, |
|
"learning_rate": 6.238099347082004e-06, |
|
"loss": 0.021, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 4.400724825265338, |
|
"grad_norm": 2.3729536533355713, |
|
"learning_rate": 6.223717893404666e-06, |
|
"loss": 0.0207, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.413668133574942, |
|
"grad_norm": 3.5691659450531006, |
|
"learning_rate": 6.209336439727328e-06, |
|
"loss": 0.0256, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 4.426611441884546, |
|
"grad_norm": 3.9508790969848633, |
|
"learning_rate": 6.195242615123537e-06, |
|
"loss": 0.0198, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 4.43955475019415, |
|
"grad_norm": 6.141160488128662, |
|
"learning_rate": 6.1808611614462e-06, |
|
"loss": 0.0187, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 4.452498058503753, |
|
"grad_norm": 0.2836528420448303, |
|
"learning_rate": 6.166479707768862e-06, |
|
"loss": 0.0187, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.465441366813358, |
|
"grad_norm": 6.005315780639648, |
|
"learning_rate": 6.152098254091524e-06, |
|
"loss": 0.0261, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 4.478384675122961, |
|
"grad_norm": 2.222322702407837, |
|
"learning_rate": 6.137716800414186e-06, |
|
"loss": 0.0183, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.491327983432566, |
|
"grad_norm": 4.63626766204834, |
|
"learning_rate": 6.123335346736849e-06, |
|
"loss": 0.0209, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 4.504271291742169, |
|
"grad_norm": 1.04603111743927, |
|
"learning_rate": 6.108953893059511e-06, |
|
"loss": 0.0343, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.517214600051773, |
|
"grad_norm": 0.18591836094856262, |
|
"learning_rate": 6.094572439382173e-06, |
|
"loss": 0.0205, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 4.530157908361377, |
|
"grad_norm": 4.46800422668457, |
|
"learning_rate": 6.080190985704836e-06, |
|
"loss": 0.0307, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.543101216670981, |
|
"grad_norm": 2.5701541900634766, |
|
"learning_rate": 6.065809532027498e-06, |
|
"loss": 0.0201, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 4.556044524980585, |
|
"grad_norm": 3.805527448654175, |
|
"learning_rate": 6.05142807835016e-06, |
|
"loss": 0.0247, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 4.568987833290189, |
|
"grad_norm": 5.005966663360596, |
|
"learning_rate": 6.037046624672822e-06, |
|
"loss": 0.0166, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 4.581931141599793, |
|
"grad_norm": 2.1261184215545654, |
|
"learning_rate": 6.022665170995484e-06, |
|
"loss": 0.0192, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 4.594874449909397, |
|
"grad_norm": 3.359769582748413, |
|
"learning_rate": 6.008283717318147e-06, |
|
"loss": 0.0186, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 4.607817758219001, |
|
"grad_norm": 4.658329010009766, |
|
"learning_rate": 5.99390226364081e-06, |
|
"loss": 0.0156, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 4.620761066528605, |
|
"grad_norm": 1.4093743562698364, |
|
"learning_rate": 5.979520809963472e-06, |
|
"loss": 0.0249, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 4.633704374838208, |
|
"grad_norm": 7.107546806335449, |
|
"learning_rate": 5.9651393562861336e-06, |
|
"loss": 0.019, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 4.646647683147813, |
|
"grad_norm": 3.7134788036346436, |
|
"learning_rate": 5.9507579026087954e-06, |
|
"loss": 0.0316, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 4.659590991457416, |
|
"grad_norm": 9.994954109191895, |
|
"learning_rate": 5.936376448931458e-06, |
|
"loss": 0.021, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.672534299767021, |
|
"grad_norm": 5.7871527671813965, |
|
"learning_rate": 5.921994995254121e-06, |
|
"loss": 0.0201, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 4.685477608076624, |
|
"grad_norm": 4.141567230224609, |
|
"learning_rate": 5.9076135415767836e-06, |
|
"loss": 0.0341, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 4.698420916386228, |
|
"grad_norm": 3.703082799911499, |
|
"learning_rate": 5.8932320878994454e-06, |
|
"loss": 0.0209, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 4.711364224695832, |
|
"grad_norm": 1.5418035984039307, |
|
"learning_rate": 5.878850634222107e-06, |
|
"loss": 0.0205, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 4.724307533005436, |
|
"grad_norm": 2.696366310119629, |
|
"learning_rate": 5.864469180544769e-06, |
|
"loss": 0.0322, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 4.73725084131504, |
|
"grad_norm": 12.889842987060547, |
|
"learning_rate": 5.850087726867433e-06, |
|
"loss": 0.0299, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 4.750194149624644, |
|
"grad_norm": 14.697345733642578, |
|
"learning_rate": 5.835706273190095e-06, |
|
"loss": 0.021, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 4.763137457934248, |
|
"grad_norm": 14.185206413269043, |
|
"learning_rate": 5.821324819512757e-06, |
|
"loss": 0.028, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 4.776080766243852, |
|
"grad_norm": 1.6946377754211426, |
|
"learning_rate": 5.806943365835419e-06, |
|
"loss": 0.0407, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 4.789024074553456, |
|
"grad_norm": 0.4647742509841919, |
|
"learning_rate": 5.792561912158081e-06, |
|
"loss": 0.0245, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.80196738286306, |
|
"grad_norm": 0.9601898193359375, |
|
"learning_rate": 5.778180458480743e-06, |
|
"loss": 0.023, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 4.814910691172663, |
|
"grad_norm": 8.700115203857422, |
|
"learning_rate": 5.7637990048034065e-06, |
|
"loss": 0.0197, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 4.827853999482268, |
|
"grad_norm": 0.46070945262908936, |
|
"learning_rate": 5.749417551126068e-06, |
|
"loss": 0.0207, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 4.840797307791871, |
|
"grad_norm": 3.087283134460449, |
|
"learning_rate": 5.735036097448731e-06, |
|
"loss": 0.0228, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 4.853740616101476, |
|
"grad_norm": 2.9574930667877197, |
|
"learning_rate": 5.720654643771393e-06, |
|
"loss": 0.0251, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 4.866683924411079, |
|
"grad_norm": 0.7309446334838867, |
|
"learning_rate": 5.706273190094055e-06, |
|
"loss": 0.0209, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 4.879627232720684, |
|
"grad_norm": 3.813610553741455, |
|
"learning_rate": 5.691891736416717e-06, |
|
"loss": 0.0179, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 4.892570541030287, |
|
"grad_norm": 6.62191104888916, |
|
"learning_rate": 5.67751028273938e-06, |
|
"loss": 0.0229, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 4.905513849339892, |
|
"grad_norm": 1.3516626358032227, |
|
"learning_rate": 5.663128829062042e-06, |
|
"loss": 0.0202, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 4.918457157649495, |
|
"grad_norm": 8.537408828735352, |
|
"learning_rate": 5.648747375384705e-06, |
|
"loss": 0.0219, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.931400465959099, |
|
"grad_norm": 7.586127758026123, |
|
"learning_rate": 5.634365921707367e-06, |
|
"loss": 0.0263, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 4.944343774268703, |
|
"grad_norm": 2.9053454399108887, |
|
"learning_rate": 5.6199844680300286e-06, |
|
"loss": 0.0173, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 4.957287082578307, |
|
"grad_norm": 3.9602510929107666, |
|
"learning_rate": 5.60560301435269e-06, |
|
"loss": 0.0216, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 4.970230390887911, |
|
"grad_norm": 8.381765365600586, |
|
"learning_rate": 5.591221560675354e-06, |
|
"loss": 0.0213, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 4.983173699197515, |
|
"grad_norm": 3.1814215183258057, |
|
"learning_rate": 5.577127736071563e-06, |
|
"loss": 0.028, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 4.996117007507118, |
|
"grad_norm": 9.668580055236816, |
|
"learning_rate": 5.5627462823942245e-06, |
|
"loss": 0.0346, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.019977210089564323, |
|
"eval_runtime": 88.6374, |
|
"eval_samples_per_second": 5.641, |
|
"eval_steps_per_second": 0.711, |
|
"eval_wer": 3.1069591686784928, |
|
"step": 19315 |
|
}, |
|
{ |
|
"epoch": 5.009060315816723, |
|
"grad_norm": 0.802038848400116, |
|
"learning_rate": 5.548364828716886e-06, |
|
"loss": 0.0186, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 5.022003624126326, |
|
"grad_norm": 3.1706273555755615, |
|
"learning_rate": 5.53398337503955e-06, |
|
"loss": 0.0236, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 5.034946932435931, |
|
"grad_norm": 3.518181562423706, |
|
"learning_rate": 5.519601921362212e-06, |
|
"loss": 0.0136, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 5.047890240745534, |
|
"grad_norm": 4.777484893798828, |
|
"learning_rate": 5.50550809675842e-06, |
|
"loss": 0.012, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.060833549055139, |
|
"grad_norm": 16.247772216796875, |
|
"learning_rate": 5.491126643081083e-06, |
|
"loss": 0.0107, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 5.073776857364742, |
|
"grad_norm": 1.9312938451766968, |
|
"learning_rate": 5.476745189403745e-06, |
|
"loss": 0.0086, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 5.086720165674346, |
|
"grad_norm": 0.5916788578033447, |
|
"learning_rate": 5.4623637357264085e-06, |
|
"loss": 0.0128, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 5.09966347398395, |
|
"grad_norm": 0.7647702097892761, |
|
"learning_rate": 5.44798228204907e-06, |
|
"loss": 0.0127, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 5.112606782293554, |
|
"grad_norm": 1.958817481994629, |
|
"learning_rate": 5.433600828371732e-06, |
|
"loss": 0.0166, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 5.125550090603158, |
|
"grad_norm": 5.420085430145264, |
|
"learning_rate": 5.419219374694394e-06, |
|
"loss": 0.0115, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 5.138493398912762, |
|
"grad_norm": 4.039155960083008, |
|
"learning_rate": 5.404837921017057e-06, |
|
"loss": 0.0099, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 5.151436707222366, |
|
"grad_norm": 3.975069999694824, |
|
"learning_rate": 5.390456467339719e-06, |
|
"loss": 0.0211, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 5.16438001553197, |
|
"grad_norm": 2.953425168991089, |
|
"learning_rate": 5.376075013662381e-06, |
|
"loss": 0.0131, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 5.1773233238415735, |
|
"grad_norm": 0.4892643392086029, |
|
"learning_rate": 5.361693559985044e-06, |
|
"loss": 0.0107, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.190266632151178, |
|
"grad_norm": 0.9816207885742188, |
|
"learning_rate": 5.347312106307706e-06, |
|
"loss": 0.0119, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 5.2032099404607814, |
|
"grad_norm": 0.6787395477294922, |
|
"learning_rate": 5.332930652630368e-06, |
|
"loss": 0.0107, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 5.216153248770386, |
|
"grad_norm": 2.689342737197876, |
|
"learning_rate": 5.318549198953031e-06, |
|
"loss": 0.0118, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 5.229096557079989, |
|
"grad_norm": 1.7226523160934448, |
|
"learning_rate": 5.304167745275693e-06, |
|
"loss": 0.0147, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 5.242039865389594, |
|
"grad_norm": 0.6867370009422302, |
|
"learning_rate": 5.289786291598355e-06, |
|
"loss": 0.0091, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 5.254983173699197, |
|
"grad_norm": 2.8953654766082764, |
|
"learning_rate": 5.275404837921018e-06, |
|
"loss": 0.0105, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 5.267926482008802, |
|
"grad_norm": 6.105691432952881, |
|
"learning_rate": 5.26102338424368e-06, |
|
"loss": 0.0114, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 5.280869790318405, |
|
"grad_norm": 2.8763232231140137, |
|
"learning_rate": 5.246641930566342e-06, |
|
"loss": 0.0152, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 5.293813098628009, |
|
"grad_norm": 0.7701404094696045, |
|
"learning_rate": 5.232260476889004e-06, |
|
"loss": 0.0136, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 5.306756406937613, |
|
"grad_norm": 5.06765604019165, |
|
"learning_rate": 5.217879023211667e-06, |
|
"loss": 0.0118, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.319699715247217, |
|
"grad_norm": 0.8552833795547485, |
|
"learning_rate": 5.203497569534329e-06, |
|
"loss": 0.0152, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 5.332643023556821, |
|
"grad_norm": 8.169344902038574, |
|
"learning_rate": 5.189116115856992e-06, |
|
"loss": 0.0137, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 5.345586331866425, |
|
"grad_norm": 2.8536713123321533, |
|
"learning_rate": 5.1747346621796535e-06, |
|
"loss": 0.0179, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 5.358529640176029, |
|
"grad_norm": 4.006629943847656, |
|
"learning_rate": 5.160353208502315e-06, |
|
"loss": 0.0148, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 5.371472948485633, |
|
"grad_norm": 2.0308613777160645, |
|
"learning_rate": 5.145971754824978e-06, |
|
"loss": 0.0096, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 5.3844162567952365, |
|
"grad_norm": 0.8264743089675903, |
|
"learning_rate": 5.131590301147641e-06, |
|
"loss": 0.012, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 5.397359565104841, |
|
"grad_norm": 1.9350370168685913, |
|
"learning_rate": 5.117208847470303e-06, |
|
"loss": 0.021, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 5.4103028734144445, |
|
"grad_norm": 3.08841872215271, |
|
"learning_rate": 5.102827393792965e-06, |
|
"loss": 0.01, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 5.423246181724049, |
|
"grad_norm": 3.0373833179473877, |
|
"learning_rate": 5.088445940115627e-06, |
|
"loss": 0.0123, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 5.4361894900336525, |
|
"grad_norm": 0.12145110964775085, |
|
"learning_rate": 5.074064486438289e-06, |
|
"loss": 0.0119, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.449132798343257, |
|
"grad_norm": 2.9393069744110107, |
|
"learning_rate": 5.059683032760951e-06, |
|
"loss": 0.0115, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 5.4620761066528605, |
|
"grad_norm": 0.9441426992416382, |
|
"learning_rate": 5.0453015790836146e-06, |
|
"loss": 0.0127, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 5.475019414962464, |
|
"grad_norm": 1.0003094673156738, |
|
"learning_rate": 5.030920125406276e-06, |
|
"loss": 0.0166, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 5.4879627232720685, |
|
"grad_norm": 3.650327682495117, |
|
"learning_rate": 5.016538671728939e-06, |
|
"loss": 0.012, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 5.500906031581672, |
|
"grad_norm": 2.2948007583618164, |
|
"learning_rate": 5.002157218051601e-06, |
|
"loss": 0.0204, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 5.5138493398912765, |
|
"grad_norm": 1.089821219444275, |
|
"learning_rate": 4.987775764374264e-06, |
|
"loss": 0.0087, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 5.52679264820088, |
|
"grad_norm": 2.964750051498413, |
|
"learning_rate": 4.973394310696926e-06, |
|
"loss": 0.009, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 5.5397359565104844, |
|
"grad_norm": 2.492793083190918, |
|
"learning_rate": 4.9590128570195875e-06, |
|
"loss": 0.013, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 5.552679264820088, |
|
"grad_norm": 18.94529151916504, |
|
"learning_rate": 4.94463140334225e-06, |
|
"loss": 0.0217, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 5.565622573129692, |
|
"grad_norm": 0.21670395135879517, |
|
"learning_rate": 4.930249949664913e-06, |
|
"loss": 0.0289, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.578565881439296, |
|
"grad_norm": 1.121397852897644, |
|
"learning_rate": 4.915868495987575e-06, |
|
"loss": 0.0089, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 5.5915091897488995, |
|
"grad_norm": 2.1545515060424805, |
|
"learning_rate": 4.9014870423102375e-06, |
|
"loss": 0.015, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 5.604452498058504, |
|
"grad_norm": 13.737982749938965, |
|
"learning_rate": 4.887105588632899e-06, |
|
"loss": 0.0217, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 5.6173958063681075, |
|
"grad_norm": 3.22420072555542, |
|
"learning_rate": 4.872724134955561e-06, |
|
"loss": 0.0095, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 5.630339114677712, |
|
"grad_norm": 2.609224557876587, |
|
"learning_rate": 4.858342681278224e-06, |
|
"loss": 0.0156, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 5.6432824229873155, |
|
"grad_norm": 4.490423679351807, |
|
"learning_rate": 4.843961227600887e-06, |
|
"loss": 0.0101, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 5.656225731296919, |
|
"grad_norm": 3.0184295177459717, |
|
"learning_rate": 4.8295797739235485e-06, |
|
"loss": 0.0126, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 5.6691690396065235, |
|
"grad_norm": 4.014559745788574, |
|
"learning_rate": 4.815198320246211e-06, |
|
"loss": 0.0126, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 5.682112347916127, |
|
"grad_norm": 7.090182304382324, |
|
"learning_rate": 4.800816866568873e-06, |
|
"loss": 0.0182, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 5.6950556562257315, |
|
"grad_norm": 3.3935253620147705, |
|
"learning_rate": 4.786435412891535e-06, |
|
"loss": 0.0155, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.707998964535335, |
|
"grad_norm": 3.5761795043945312, |
|
"learning_rate": 4.772053959214198e-06, |
|
"loss": 0.0102, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 5.7209422728449395, |
|
"grad_norm": 2.370244026184082, |
|
"learning_rate": 4.75767250553686e-06, |
|
"loss": 0.0206, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 5.733885581154543, |
|
"grad_norm": 0.3268122971057892, |
|
"learning_rate": 4.743291051859522e-06, |
|
"loss": 0.0143, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 5.7468288894641475, |
|
"grad_norm": 1.703277587890625, |
|
"learning_rate": 4.728909598182185e-06, |
|
"loss": 0.0143, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 5.759772197773751, |
|
"grad_norm": 2.766359329223633, |
|
"learning_rate": 4.714528144504847e-06, |
|
"loss": 0.0114, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 5.772715506083355, |
|
"grad_norm": 0.3259863555431366, |
|
"learning_rate": 4.700146690827509e-06, |
|
"loss": 0.0114, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 5.785658814392959, |
|
"grad_norm": 12.3453369140625, |
|
"learning_rate": 4.685765237150171e-06, |
|
"loss": 0.0109, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 5.798602122702563, |
|
"grad_norm": 4.0501179695129395, |
|
"learning_rate": 4.671383783472834e-06, |
|
"loss": 0.0134, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 5.811545431012167, |
|
"grad_norm": 3.24855637550354, |
|
"learning_rate": 4.657002329795496e-06, |
|
"loss": 0.015, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 5.824488739321771, |
|
"grad_norm": 4.302082061767578, |
|
"learning_rate": 4.642620876118159e-06, |
|
"loss": 0.0272, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.837432047631374, |
|
"grad_norm": 6.086068630218506, |
|
"learning_rate": 4.628239422440821e-06, |
|
"loss": 0.0134, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 5.850375355940979, |
|
"grad_norm": 0.44153615832328796, |
|
"learning_rate": 4.613857968763483e-06, |
|
"loss": 0.0168, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 5.863318664250582, |
|
"grad_norm": 1.0860470533370972, |
|
"learning_rate": 4.599476515086145e-06, |
|
"loss": 0.0118, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 5.876261972560187, |
|
"grad_norm": 3.069711923599243, |
|
"learning_rate": 4.585095061408807e-06, |
|
"loss": 0.0111, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 5.88920528086979, |
|
"grad_norm": 4.1225504875183105, |
|
"learning_rate": 4.57071360773147e-06, |
|
"loss": 0.0118, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 5.9021485891793946, |
|
"grad_norm": 1.6695607900619507, |
|
"learning_rate": 4.5563321540541325e-06, |
|
"loss": 0.0087, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 5.915091897488998, |
|
"grad_norm": 3.2536139488220215, |
|
"learning_rate": 4.541950700376794e-06, |
|
"loss": 0.0143, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 5.9280352057986025, |
|
"grad_norm": 1.4655452966690063, |
|
"learning_rate": 4.527569246699457e-06, |
|
"loss": 0.0152, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 5.940978514108206, |
|
"grad_norm": 4.390911102294922, |
|
"learning_rate": 4.513187793022119e-06, |
|
"loss": 0.0117, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 5.95392182241781, |
|
"grad_norm": 0.6409261226654053, |
|
"learning_rate": 4.498806339344781e-06, |
|
"loss": 0.014, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.966865130727414, |
|
"grad_norm": 9.11255931854248, |
|
"learning_rate": 4.4844248856674435e-06, |
|
"loss": 0.0135, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 5.979808439037018, |
|
"grad_norm": 3.790682554244995, |
|
"learning_rate": 4.470043431990106e-06, |
|
"loss": 0.0109, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 5.992751747346622, |
|
"grad_norm": 6.534693241119385, |
|
"learning_rate": 4.455661978312768e-06, |
|
"loss": 0.0138, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.013288498856127262, |
|
"eval_runtime": 88.7687, |
|
"eval_samples_per_second": 5.633, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 2.2567439907630944, |
|
"step": 23178 |
|
}, |
|
{ |
|
"epoch": 6.005695055656226, |
|
"grad_norm": 0.7158689498901367, |
|
"learning_rate": 4.441280524635431e-06, |
|
"loss": 0.0119, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 6.01863836396583, |
|
"grad_norm": 2.5632243156433105, |
|
"learning_rate": 4.426899070958093e-06, |
|
"loss": 0.008, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 6.031581672275434, |
|
"grad_norm": 1.491152286529541, |
|
"learning_rate": 4.4125176172807545e-06, |
|
"loss": 0.0084, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 6.044524980585037, |
|
"grad_norm": 0.8352173566818237, |
|
"learning_rate": 4.398136163603417e-06, |
|
"loss": 0.0121, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 6.057468288894642, |
|
"grad_norm": 0.7601485848426819, |
|
"learning_rate": 4.38375470992608e-06, |
|
"loss": 0.0084, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 6.070411597204245, |
|
"grad_norm": 1.012165904045105, |
|
"learning_rate": 4.369373256248742e-06, |
|
"loss": 0.0149, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 6.08335490551385, |
|
"grad_norm": 0.7509778141975403, |
|
"learning_rate": 4.3549918025714045e-06, |
|
"loss": 0.0045, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.096298213823453, |
|
"grad_norm": 6.754235744476318, |
|
"learning_rate": 4.340610348894066e-06, |
|
"loss": 0.0057, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 6.109241522133058, |
|
"grad_norm": 0.37281331419944763, |
|
"learning_rate": 4.326228895216728e-06, |
|
"loss": 0.0057, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 6.122184830442661, |
|
"grad_norm": 0.06306509673595428, |
|
"learning_rate": 4.311847441539391e-06, |
|
"loss": 0.0069, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 6.135128138752265, |
|
"grad_norm": 0.4581661522388458, |
|
"learning_rate": 4.297465987862054e-06, |
|
"loss": 0.0055, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 6.148071447061869, |
|
"grad_norm": 0.8070671558380127, |
|
"learning_rate": 4.2830845341847156e-06, |
|
"loss": 0.0087, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 6.161014755371473, |
|
"grad_norm": 0.4274216294288635, |
|
"learning_rate": 4.268703080507378e-06, |
|
"loss": 0.016, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 6.173958063681077, |
|
"grad_norm": 0.6105465292930603, |
|
"learning_rate": 4.25432162683004e-06, |
|
"loss": 0.0063, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 6.186901371990681, |
|
"grad_norm": 0.4398168921470642, |
|
"learning_rate": 4.239940173152703e-06, |
|
"loss": 0.0052, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 6.199844680300285, |
|
"grad_norm": 2.373279333114624, |
|
"learning_rate": 4.225558719475365e-06, |
|
"loss": 0.008, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 6.212787988609889, |
|
"grad_norm": 2.742097854614258, |
|
"learning_rate": 4.2111772657980275e-06, |
|
"loss": 0.0062, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.225731296919492, |
|
"grad_norm": 1.6418052911758423, |
|
"learning_rate": 4.19679581212069e-06, |
|
"loss": 0.0056, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 6.238674605229097, |
|
"grad_norm": 1.0858538150787354, |
|
"learning_rate": 4.182414358443352e-06, |
|
"loss": 0.0047, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 6.2516179135387, |
|
"grad_norm": 1.6831880807876587, |
|
"learning_rate": 4.168032904766014e-06, |
|
"loss": 0.0045, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 6.264561221848305, |
|
"grad_norm": 0.5000109076499939, |
|
"learning_rate": 4.153651451088677e-06, |
|
"loss": 0.01, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 6.277504530157908, |
|
"grad_norm": 0.6169405579566956, |
|
"learning_rate": 4.1392699974113385e-06, |
|
"loss": 0.008, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 6.290447838467513, |
|
"grad_norm": 1.9843913316726685, |
|
"learning_rate": 4.124888543734001e-06, |
|
"loss": 0.0095, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 6.303391146777116, |
|
"grad_norm": 1.331559419631958, |
|
"learning_rate": 4.110507090056663e-06, |
|
"loss": 0.0073, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 6.31633445508672, |
|
"grad_norm": 0.6855106949806213, |
|
"learning_rate": 4.096125636379326e-06, |
|
"loss": 0.0054, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 6.329277763396324, |
|
"grad_norm": 0.35360315442085266, |
|
"learning_rate": 4.081744182701988e-06, |
|
"loss": 0.006, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 6.342221071705928, |
|
"grad_norm": 1.6724082231521606, |
|
"learning_rate": 4.06736272902465e-06, |
|
"loss": 0.0032, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.355164380015532, |
|
"grad_norm": 1.0881156921386719, |
|
"learning_rate": 4.052981275347312e-06, |
|
"loss": 0.0052, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 6.368107688325136, |
|
"grad_norm": 0.6318166255950928, |
|
"learning_rate": 4.038599821669975e-06, |
|
"loss": 0.0081, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 6.38105099663474, |
|
"grad_norm": 3.3334782123565674, |
|
"learning_rate": 4.024218367992637e-06, |
|
"loss": 0.0121, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 6.393994304944344, |
|
"grad_norm": 0.2339646816253662, |
|
"learning_rate": 4.0098369143152995e-06, |
|
"loss": 0.0072, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 6.406937613253948, |
|
"grad_norm": 1.0727429389953613, |
|
"learning_rate": 3.995743089711508e-06, |
|
"loss": 0.0074, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 6.419880921563552, |
|
"grad_norm": 5.548860549926758, |
|
"learning_rate": 3.981649265107718e-06, |
|
"loss": 0.0072, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 6.432824229873155, |
|
"grad_norm": 0.37892910838127136, |
|
"learning_rate": 3.9672678114303795e-06, |
|
"loss": 0.0105, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 6.44576753818276, |
|
"grad_norm": 0.9245821237564087, |
|
"learning_rate": 3.952886357753042e-06, |
|
"loss": 0.0059, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 6.458710846492363, |
|
"grad_norm": 1.4176559448242188, |
|
"learning_rate": 3.938504904075705e-06, |
|
"loss": 0.005, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 6.471654154801968, |
|
"grad_norm": 2.894819736480713, |
|
"learning_rate": 3.924123450398367e-06, |
|
"loss": 0.0068, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.484597463111571, |
|
"grad_norm": 1.3159211874008179, |
|
"learning_rate": 3.909741996721029e-06, |
|
"loss": 0.0068, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 6.497540771421175, |
|
"grad_norm": 1.8089011907577515, |
|
"learning_rate": 3.895360543043691e-06, |
|
"loss": 0.0067, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 6.510484079730779, |
|
"grad_norm": 1.673920750617981, |
|
"learning_rate": 3.880979089366353e-06, |
|
"loss": 0.0109, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 6.523427388040383, |
|
"grad_norm": 0.7830101251602173, |
|
"learning_rate": 3.866597635689016e-06, |
|
"loss": 0.0055, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 6.536370696349987, |
|
"grad_norm": 1.6252669095993042, |
|
"learning_rate": 3.852216182011679e-06, |
|
"loss": 0.0066, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 6.549314004659591, |
|
"grad_norm": 2.7717552185058594, |
|
"learning_rate": 3.8378347283343405e-06, |
|
"loss": 0.0059, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 6.562257312969195, |
|
"grad_norm": 7.495051383972168, |
|
"learning_rate": 3.823453274657002e-06, |
|
"loss": 0.015, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 6.575200621278799, |
|
"grad_norm": 0.43886011838912964, |
|
"learning_rate": 3.809071820979665e-06, |
|
"loss": 0.0046, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 6.588143929588403, |
|
"grad_norm": 0.8297833204269409, |
|
"learning_rate": 3.7946903673023274e-06, |
|
"loss": 0.0113, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 6.601087237898007, |
|
"grad_norm": 3.7396538257598877, |
|
"learning_rate": 3.7803089136249893e-06, |
|
"loss": 0.0063, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.61403054620761, |
|
"grad_norm": 3.463552236557007, |
|
"learning_rate": 3.765927459947652e-06, |
|
"loss": 0.0065, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 6.626973854517215, |
|
"grad_norm": 6.3341450691223145, |
|
"learning_rate": 3.7515460062703143e-06, |
|
"loss": 0.0076, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 6.639917162826818, |
|
"grad_norm": 1.700925350189209, |
|
"learning_rate": 3.737164552592976e-06, |
|
"loss": 0.0054, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 6.652860471136423, |
|
"grad_norm": 6.3853044509887695, |
|
"learning_rate": 3.722783098915639e-06, |
|
"loss": 0.0097, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 6.665803779446026, |
|
"grad_norm": 6.10149621963501, |
|
"learning_rate": 3.7084016452383007e-06, |
|
"loss": 0.0082, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 6.67874708775563, |
|
"grad_norm": 1.308225393295288, |
|
"learning_rate": 3.694020191560963e-06, |
|
"loss": 0.0099, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 6.691690396065234, |
|
"grad_norm": 4.188955307006836, |
|
"learning_rate": 3.6796387378836257e-06, |
|
"loss": 0.0112, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 6.704633704374838, |
|
"grad_norm": 1.8746811151504517, |
|
"learning_rate": 3.6652572842062876e-06, |
|
"loss": 0.0118, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 6.717577012684442, |
|
"grad_norm": 0.3944805860519409, |
|
"learning_rate": 3.65087583052895e-06, |
|
"loss": 0.0063, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 6.730520320994046, |
|
"grad_norm": 1.3446645736694336, |
|
"learning_rate": 3.6364943768516126e-06, |
|
"loss": 0.0292, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.74346362930365, |
|
"grad_norm": 10.001498222351074, |
|
"learning_rate": 3.6221129231742745e-06, |
|
"loss": 0.0078, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 6.756406937613254, |
|
"grad_norm": 3.632220983505249, |
|
"learning_rate": 3.607731469496937e-06, |
|
"loss": 0.0044, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 6.769350245922858, |
|
"grad_norm": 4.4222259521484375, |
|
"learning_rate": 3.5933500158195995e-06, |
|
"loss": 0.0093, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 6.782293554232462, |
|
"grad_norm": 1.0133709907531738, |
|
"learning_rate": 3.5789685621422614e-06, |
|
"loss": 0.0072, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 6.795236862542065, |
|
"grad_norm": 0.1933288425207138, |
|
"learning_rate": 3.564587108464924e-06, |
|
"loss": 0.0063, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 6.80818017085167, |
|
"grad_norm": 1.596628189086914, |
|
"learning_rate": 3.5502056547875864e-06, |
|
"loss": 0.0055, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 6.821123479161273, |
|
"grad_norm": 0.2668837010860443, |
|
"learning_rate": 3.5358242011102482e-06, |
|
"loss": 0.0059, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 6.834066787470878, |
|
"grad_norm": 1.4524122476577759, |
|
"learning_rate": 3.521442747432911e-06, |
|
"loss": 0.0144, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 6.847010095780481, |
|
"grad_norm": 0.7154669761657715, |
|
"learning_rate": 3.5070612937555732e-06, |
|
"loss": 0.0083, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 6.859953404090086, |
|
"grad_norm": 3.9259557723999023, |
|
"learning_rate": 3.492679840078235e-06, |
|
"loss": 0.0054, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.872896712399689, |
|
"grad_norm": 2.91253924369812, |
|
"learning_rate": 3.478298386400898e-06, |
|
"loss": 0.0058, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 6.885840020709293, |
|
"grad_norm": 0.6866968870162964, |
|
"learning_rate": 3.46391693272356e-06, |
|
"loss": 0.0089, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 6.898783329018897, |
|
"grad_norm": 0.4900106191635132, |
|
"learning_rate": 3.449535479046222e-06, |
|
"loss": 0.015, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 6.911726637328501, |
|
"grad_norm": 0.8514009118080139, |
|
"learning_rate": 3.4351540253688847e-06, |
|
"loss": 0.0071, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 6.924669945638105, |
|
"grad_norm": 1.1547664403915405, |
|
"learning_rate": 3.420772571691547e-06, |
|
"loss": 0.007, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 6.937613253947709, |
|
"grad_norm": 1.6039056777954102, |
|
"learning_rate": 3.406391118014209e-06, |
|
"loss": 0.0049, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 6.950556562257313, |
|
"grad_norm": 0.2261231243610382, |
|
"learning_rate": 3.3920096643368716e-06, |
|
"loss": 0.006, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 6.963499870566917, |
|
"grad_norm": 2.033464193344116, |
|
"learning_rate": 3.377628210659534e-06, |
|
"loss": 0.0076, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 6.97644317887652, |
|
"grad_norm": 2.289121150970459, |
|
"learning_rate": 3.3632467569821957e-06, |
|
"loss": 0.0114, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 6.989386487186125, |
|
"grad_norm": 1.8845149278640747, |
|
"learning_rate": 3.3488653033048584e-06, |
|
"loss": 0.0067, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.0072191799990832806, |
|
"eval_runtime": 89.4574, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 0.704, |
|
"eval_wer": 2.004828382491865, |
|
"step": 27041 |
|
}, |
|
{ |
|
"epoch": 7.002329795495728, |
|
"grad_norm": 0.09408234804868698, |
|
"learning_rate": 3.3344838496275207e-06, |
|
"loss": 0.0059, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 7.015273103805333, |
|
"grad_norm": 0.22423428297042847, |
|
"learning_rate": 3.3201023959501826e-06, |
|
"loss": 0.0033, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 7.028216412114936, |
|
"grad_norm": 0.37666457891464233, |
|
"learning_rate": 3.3057209422728453e-06, |
|
"loss": 0.0047, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 7.041159720424541, |
|
"grad_norm": 0.46821069717407227, |
|
"learning_rate": 3.2913394885955076e-06, |
|
"loss": 0.0023, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 7.054103028734144, |
|
"grad_norm": 0.20124687254428864, |
|
"learning_rate": 3.2769580349181695e-06, |
|
"loss": 0.0042, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 7.067046337043748, |
|
"grad_norm": 0.15090468525886536, |
|
"learning_rate": 3.262576581240832e-06, |
|
"loss": 0.0026, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 7.079989645353352, |
|
"grad_norm": 0.24649310111999512, |
|
"learning_rate": 3.2481951275634945e-06, |
|
"loss": 0.0104, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 7.092932953662956, |
|
"grad_norm": 0.5062503814697266, |
|
"learning_rate": 3.2338136738861563e-06, |
|
"loss": 0.0044, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 7.10587626197256, |
|
"grad_norm": 1.3508222103118896, |
|
"learning_rate": 3.219432220208819e-06, |
|
"loss": 0.0038, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 7.118819570282164, |
|
"grad_norm": 0.41208532452583313, |
|
"learning_rate": 3.2050507665314814e-06, |
|
"loss": 0.0039, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.131762878591768, |
|
"grad_norm": 0.5694621801376343, |
|
"learning_rate": 3.1906693128541436e-06, |
|
"loss": 0.0051, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 7.144706186901372, |
|
"grad_norm": 0.21710887551307678, |
|
"learning_rate": 3.176287859176806e-06, |
|
"loss": 0.003, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 7.1576494952109755, |
|
"grad_norm": 18.37504005432129, |
|
"learning_rate": 3.1619064054994682e-06, |
|
"loss": 0.0074, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 7.17059280352058, |
|
"grad_norm": 0.10800693929195404, |
|
"learning_rate": 3.1475249518221305e-06, |
|
"loss": 0.0098, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 7.1835361118301835, |
|
"grad_norm": 0.9992019534111023, |
|
"learning_rate": 3.133143498144793e-06, |
|
"loss": 0.0037, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 7.196479420139788, |
|
"grad_norm": 0.5260515213012695, |
|
"learning_rate": 3.118762044467455e-06, |
|
"loss": 0.0059, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 7.2094227284493915, |
|
"grad_norm": 0.16195982694625854, |
|
"learning_rate": 3.1043805907901174e-06, |
|
"loss": 0.0033, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 7.222366036758996, |
|
"grad_norm": 0.0777490958571434, |
|
"learning_rate": 3.0899991371127797e-06, |
|
"loss": 0.0031, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 7.2353093450685995, |
|
"grad_norm": 2.360994577407837, |
|
"learning_rate": 3.0756176834354416e-06, |
|
"loss": 0.0042, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 7.248252653378204, |
|
"grad_norm": 0.7095078825950623, |
|
"learning_rate": 3.0612362297581043e-06, |
|
"loss": 0.0024, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.2611959616878075, |
|
"grad_norm": 0.8493836522102356, |
|
"learning_rate": 3.0468547760807666e-06, |
|
"loss": 0.0061, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 7.274139269997411, |
|
"grad_norm": 3.106424570083618, |
|
"learning_rate": 3.0324733224034284e-06, |
|
"loss": 0.0025, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 7.2870825783070154, |
|
"grad_norm": 0.9348524808883667, |
|
"learning_rate": 3.018091868726091e-06, |
|
"loss": 0.003, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 7.300025886616619, |
|
"grad_norm": 4.065819263458252, |
|
"learning_rate": 3.0037104150487534e-06, |
|
"loss": 0.0026, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 7.312969194926223, |
|
"grad_norm": 0.16182902455329895, |
|
"learning_rate": 2.9893289613714153e-06, |
|
"loss": 0.0035, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 7.325912503235827, |
|
"grad_norm": 0.36588823795318604, |
|
"learning_rate": 2.974947507694078e-06, |
|
"loss": 0.0042, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 7.3388558115454305, |
|
"grad_norm": 0.47314420342445374, |
|
"learning_rate": 2.9605660540167403e-06, |
|
"loss": 0.0033, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 7.351799119855035, |
|
"grad_norm": 7.304609298706055, |
|
"learning_rate": 2.946184600339402e-06, |
|
"loss": 0.0028, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 7.3647424281646385, |
|
"grad_norm": 0.591309130191803, |
|
"learning_rate": 2.931803146662065e-06, |
|
"loss": 0.0057, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 7.377685736474243, |
|
"grad_norm": 0.7642752528190613, |
|
"learning_rate": 2.917421692984727e-06, |
|
"loss": 0.0038, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.3906290447838465, |
|
"grad_norm": 0.9871138334274292, |
|
"learning_rate": 2.903040239307389e-06, |
|
"loss": 0.0038, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 7.403572353093451, |
|
"grad_norm": 0.7224917411804199, |
|
"learning_rate": 2.8886587856300518e-06, |
|
"loss": 0.0057, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 7.4165156614030545, |
|
"grad_norm": 0.7096822261810303, |
|
"learning_rate": 2.874277331952714e-06, |
|
"loss": 0.0031, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 7.429458969712659, |
|
"grad_norm": 1.5942363739013672, |
|
"learning_rate": 2.859895878275376e-06, |
|
"loss": 0.0041, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 7.4424022780222625, |
|
"grad_norm": 0.6390734910964966, |
|
"learning_rate": 2.8455144245980386e-06, |
|
"loss": 0.0031, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 7.455345586331866, |
|
"grad_norm": 0.8184775114059448, |
|
"learning_rate": 2.831132970920701e-06, |
|
"loss": 0.0088, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 7.4682888946414705, |
|
"grad_norm": 0.07598695158958435, |
|
"learning_rate": 2.816751517243363e-06, |
|
"loss": 0.0033, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 7.481232202951074, |
|
"grad_norm": 0.4833034873008728, |
|
"learning_rate": 2.8023700635660255e-06, |
|
"loss": 0.0045, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 7.4941755112606785, |
|
"grad_norm": 0.30186623334884644, |
|
"learning_rate": 2.787988609888688e-06, |
|
"loss": 0.0053, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 7.507118819570282, |
|
"grad_norm": 0.9921897053718567, |
|
"learning_rate": 2.7736071562113497e-06, |
|
"loss": 0.0026, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.5200621278798865, |
|
"grad_norm": 2.7632157802581787, |
|
"learning_rate": 2.7592257025340124e-06, |
|
"loss": 0.0123, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 7.53300543618949, |
|
"grad_norm": 0.11869651824235916, |
|
"learning_rate": 2.7448442488566747e-06, |
|
"loss": 0.0027, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 7.545948744499094, |
|
"grad_norm": 0.14628329873085022, |
|
"learning_rate": 2.7304627951793374e-06, |
|
"loss": 0.003, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 7.558892052808698, |
|
"grad_norm": 0.4393390715122223, |
|
"learning_rate": 2.7160813415019993e-06, |
|
"loss": 0.0026, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 7.571835361118302, |
|
"grad_norm": 2.097261428833008, |
|
"learning_rate": 2.7016998878246616e-06, |
|
"loss": 0.0026, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 7.584778669427906, |
|
"grad_norm": 0.23214209079742432, |
|
"learning_rate": 2.6873184341473243e-06, |
|
"loss": 0.0152, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 7.59772197773751, |
|
"grad_norm": 6.099156379699707, |
|
"learning_rate": 2.672936980469986e-06, |
|
"loss": 0.0061, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 7.610665286047114, |
|
"grad_norm": 1.0887069702148438, |
|
"learning_rate": 2.6585555267926484e-06, |
|
"loss": 0.0119, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 7.623608594356718, |
|
"grad_norm": 1.0223588943481445, |
|
"learning_rate": 2.644174073115311e-06, |
|
"loss": 0.0027, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 7.636551902666321, |
|
"grad_norm": 6.282520771026611, |
|
"learning_rate": 2.629792619437973e-06, |
|
"loss": 0.0035, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.6494952109759256, |
|
"grad_norm": 0.21501053869724274, |
|
"learning_rate": 2.6154111657606353e-06, |
|
"loss": 0.009, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 7.662438519285529, |
|
"grad_norm": 1.1203105449676514, |
|
"learning_rate": 2.6013173411568443e-06, |
|
"loss": 0.006, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 7.6753818275951335, |
|
"grad_norm": 0.24988429248332977, |
|
"learning_rate": 2.586935887479506e-06, |
|
"loss": 0.0026, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 7.688325135904737, |
|
"grad_norm": 0.8392144441604614, |
|
"learning_rate": 2.572554433802169e-06, |
|
"loss": 0.0047, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 7.7012684442143415, |
|
"grad_norm": 0.7785108089447021, |
|
"learning_rate": 2.5581729801248312e-06, |
|
"loss": 0.0037, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 7.714211752523945, |
|
"grad_norm": 0.2849646806716919, |
|
"learning_rate": 2.543791526447493e-06, |
|
"loss": 0.0027, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 7.7271550608335495, |
|
"grad_norm": 0.3449094593524933, |
|
"learning_rate": 2.529410072770156e-06, |
|
"loss": 0.0114, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 7.740098369143153, |
|
"grad_norm": 0.7601585984230042, |
|
"learning_rate": 2.515028619092818e-06, |
|
"loss": 0.0024, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 7.753041677452757, |
|
"grad_norm": 0.6022003889083862, |
|
"learning_rate": 2.50064716541548e-06, |
|
"loss": 0.005, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 7.765984985762361, |
|
"grad_norm": 0.08920400589704514, |
|
"learning_rate": 2.4862657117381427e-06, |
|
"loss": 0.0025, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.778928294071965, |
|
"grad_norm": 0.5146584510803223, |
|
"learning_rate": 2.471884258060805e-06, |
|
"loss": 0.0035, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 7.791871602381569, |
|
"grad_norm": 0.6136813759803772, |
|
"learning_rate": 2.4575028043834673e-06, |
|
"loss": 0.0033, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 7.804814910691173, |
|
"grad_norm": 5.361100673675537, |
|
"learning_rate": 2.4431213507061295e-06, |
|
"loss": 0.0039, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 7.817758219000776, |
|
"grad_norm": 0.617695152759552, |
|
"learning_rate": 2.428739897028792e-06, |
|
"loss": 0.0026, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 7.830701527310381, |
|
"grad_norm": 0.42767393589019775, |
|
"learning_rate": 2.414358443351454e-06, |
|
"loss": 0.0022, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 7.843644835619984, |
|
"grad_norm": 0.09423399716615677, |
|
"learning_rate": 2.3999769896741164e-06, |
|
"loss": 0.0038, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 7.856588143929589, |
|
"grad_norm": 0.18421663343906403, |
|
"learning_rate": 2.3855955359967787e-06, |
|
"loss": 0.0048, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 7.869531452239192, |
|
"grad_norm": 0.05506595969200134, |
|
"learning_rate": 2.371214082319441e-06, |
|
"loss": 0.0034, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 7.882474760548797, |
|
"grad_norm": 1.0411120653152466, |
|
"learning_rate": 2.3568326286421033e-06, |
|
"loss": 0.0065, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 7.8954180688584, |
|
"grad_norm": 0.05043673887848854, |
|
"learning_rate": 2.3424511749647656e-06, |
|
"loss": 0.0018, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.908361377168005, |
|
"grad_norm": 0.8202661275863647, |
|
"learning_rate": 2.328069721287428e-06, |
|
"loss": 0.0107, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 7.921304685477608, |
|
"grad_norm": 1.809882640838623, |
|
"learning_rate": 2.31368826761009e-06, |
|
"loss": 0.0038, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 7.934247993787212, |
|
"grad_norm": 0.9416866898536682, |
|
"learning_rate": 2.2993068139327525e-06, |
|
"loss": 0.0031, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 7.947191302096816, |
|
"grad_norm": 0.5891124606132507, |
|
"learning_rate": 2.2849253602554148e-06, |
|
"loss": 0.0023, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 7.96013461040642, |
|
"grad_norm": 0.15361438691616058, |
|
"learning_rate": 2.270543906578077e-06, |
|
"loss": 0.0021, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 7.973077918716024, |
|
"grad_norm": 1.8306443691253662, |
|
"learning_rate": 2.2561624529007393e-06, |
|
"loss": 0.0049, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 7.986021227025628, |
|
"grad_norm": 0.06569012254476547, |
|
"learning_rate": 2.2417809992234016e-06, |
|
"loss": 0.0042, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 7.998964535335231, |
|
"grad_norm": 0.14215713739395142, |
|
"learning_rate": 2.227399545546064e-06, |
|
"loss": 0.0021, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.0042533595114946365, |
|
"eval_runtime": 89.1255, |
|
"eval_samples_per_second": 5.61, |
|
"eval_steps_per_second": 0.707, |
|
"eval_wer": 1.490500682271439, |
|
"step": 30904 |
|
}, |
|
{ |
|
"epoch": 8.011907843644835, |
|
"grad_norm": 0.9032062292098999, |
|
"learning_rate": 2.2130180918687262e-06, |
|
"loss": 0.0022, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 8.02485115195444, |
|
"grad_norm": 0.09088978916406631, |
|
"learning_rate": 2.1986366381913885e-06, |
|
"loss": 0.0016, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.037794460264044, |
|
"grad_norm": 0.1776304394006729, |
|
"learning_rate": 2.184255184514051e-06, |
|
"loss": 0.0063, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 8.050737768573647, |
|
"grad_norm": 1.3590532541275024, |
|
"learning_rate": 2.169873730836713e-06, |
|
"loss": 0.0018, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 8.06368107688325, |
|
"grad_norm": 2.1883018016815186, |
|
"learning_rate": 2.1554922771593754e-06, |
|
"loss": 0.0028, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 8.076624385192856, |
|
"grad_norm": 3.1580357551574707, |
|
"learning_rate": 2.1411108234820377e-06, |
|
"loss": 0.0089, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 8.08956769350246, |
|
"grad_norm": 0.19922618567943573, |
|
"learning_rate": 2.1267293698047e-06, |
|
"loss": 0.0016, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 8.102511001812063, |
|
"grad_norm": 0.3133656084537506, |
|
"learning_rate": 2.1123479161273623e-06, |
|
"loss": 0.001, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 8.115454310121667, |
|
"grad_norm": 0.1543108969926834, |
|
"learning_rate": 2.0979664624500245e-06, |
|
"loss": 0.0071, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 8.12839761843127, |
|
"grad_norm": 0.06812497228384018, |
|
"learning_rate": 2.083585008772687e-06, |
|
"loss": 0.0057, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 8.141340926740876, |
|
"grad_norm": 0.7921668887138367, |
|
"learning_rate": 2.069203555095349e-06, |
|
"loss": 0.0019, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 8.15428423505048, |
|
"grad_norm": 0.7293940782546997, |
|
"learning_rate": 2.0548221014180114e-06, |
|
"loss": 0.0024, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 8.167227543360083, |
|
"grad_norm": 0.2699018120765686, |
|
"learning_rate": 2.0407282768142205e-06, |
|
"loss": 0.0036, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 8.180170851669686, |
|
"grad_norm": 1.0701220035552979, |
|
"learning_rate": 2.026346823136883e-06, |
|
"loss": 0.0021, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 8.193114159979292, |
|
"grad_norm": 0.35062670707702637, |
|
"learning_rate": 2.011965369459545e-06, |
|
"loss": 0.0013, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 8.206057468288895, |
|
"grad_norm": 2.343193531036377, |
|
"learning_rate": 1.9975839157822073e-06, |
|
"loss": 0.005, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 8.219000776598499, |
|
"grad_norm": 0.1934152990579605, |
|
"learning_rate": 1.9832024621048696e-06, |
|
"loss": 0.0042, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 8.231944084908102, |
|
"grad_norm": 1.2814443111419678, |
|
"learning_rate": 1.968821008427532e-06, |
|
"loss": 0.0031, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 8.244887393217706, |
|
"grad_norm": 0.23100686073303223, |
|
"learning_rate": 1.954439554750194e-06, |
|
"loss": 0.001, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 8.257830701527311, |
|
"grad_norm": 1.0474891662597656, |
|
"learning_rate": 1.9400581010728565e-06, |
|
"loss": 0.0017, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 8.270774009836915, |
|
"grad_norm": 0.1752719134092331, |
|
"learning_rate": 1.9256766473955188e-06, |
|
"loss": 0.0025, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 8.283717318146518, |
|
"grad_norm": 0.07388792932033539, |
|
"learning_rate": 1.911295193718181e-06, |
|
"loss": 0.0012, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.296660626456122, |
|
"grad_norm": 0.2670608460903168, |
|
"learning_rate": 1.8969137400408436e-06, |
|
"loss": 0.0027, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 8.309603934765725, |
|
"grad_norm": 0.19774726033210754, |
|
"learning_rate": 1.8825322863635057e-06, |
|
"loss": 0.0016, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 8.32254724307533, |
|
"grad_norm": 0.5155441761016846, |
|
"learning_rate": 1.868150832686168e-06, |
|
"loss": 0.0028, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 8.335490551384934, |
|
"grad_norm": 0.2909785509109497, |
|
"learning_rate": 1.8537693790088305e-06, |
|
"loss": 0.0013, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 8.348433859694538, |
|
"grad_norm": 0.20075345039367676, |
|
"learning_rate": 1.8393879253314927e-06, |
|
"loss": 0.0017, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 8.361377168004141, |
|
"grad_norm": 0.44085758924484253, |
|
"learning_rate": 1.8250064716541548e-06, |
|
"loss": 0.0022, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 8.374320476313747, |
|
"grad_norm": 1.4371730089187622, |
|
"learning_rate": 1.8106250179768173e-06, |
|
"loss": 0.0013, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 8.38726378462335, |
|
"grad_norm": 0.15547557175159454, |
|
"learning_rate": 1.7962435642994796e-06, |
|
"loss": 0.0016, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 8.400207092932954, |
|
"grad_norm": 0.3929384648799896, |
|
"learning_rate": 1.7818621106221417e-06, |
|
"loss": 0.0013, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 8.413150401242557, |
|
"grad_norm": 0.329222172498703, |
|
"learning_rate": 1.7674806569448042e-06, |
|
"loss": 0.0016, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 8.42609370955216, |
|
"grad_norm": 0.24657496809959412, |
|
"learning_rate": 1.7530992032674665e-06, |
|
"loss": 0.0033, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 8.439037017861766, |
|
"grad_norm": 0.1905100792646408, |
|
"learning_rate": 1.7387177495901286e-06, |
|
"loss": 0.0011, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 8.45198032617137, |
|
"grad_norm": 0.06774311512708664, |
|
"learning_rate": 1.724336295912791e-06, |
|
"loss": 0.0028, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 8.464923634480973, |
|
"grad_norm": 0.20226095616817474, |
|
"learning_rate": 1.7099548422354534e-06, |
|
"loss": 0.0014, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 8.477866942790577, |
|
"grad_norm": 0.5388538241386414, |
|
"learning_rate": 1.6955733885581155e-06, |
|
"loss": 0.0085, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 8.49081025110018, |
|
"grad_norm": 0.04724876210093498, |
|
"learning_rate": 1.681191934880778e-06, |
|
"loss": 0.0015, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 8.503753559409786, |
|
"grad_norm": 0.1351761817932129, |
|
"learning_rate": 1.6668104812034402e-06, |
|
"loss": 0.002, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 8.51669686771939, |
|
"grad_norm": 0.16377896070480347, |
|
"learning_rate": 1.6524290275261023e-06, |
|
"loss": 0.0014, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 8.529640176028993, |
|
"grad_norm": 0.0653943344950676, |
|
"learning_rate": 1.6380475738487648e-06, |
|
"loss": 0.0016, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 8.542583484338596, |
|
"grad_norm": 0.4834960699081421, |
|
"learning_rate": 1.6236661201714271e-06, |
|
"loss": 0.0034, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 8.555526792648202, |
|
"grad_norm": 0.437788188457489, |
|
"learning_rate": 1.6092846664940894e-06, |
|
"loss": 0.0052, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 8.568470100957805, |
|
"grad_norm": 2.195469617843628, |
|
"learning_rate": 1.5949032128167515e-06, |
|
"loss": 0.0036, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 8.581413409267409, |
|
"grad_norm": 0.12040536105632782, |
|
"learning_rate": 1.580521759139414e-06, |
|
"loss": 0.0031, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 8.594356717577012, |
|
"grad_norm": 0.2154337614774704, |
|
"learning_rate": 1.5661403054620763e-06, |
|
"loss": 0.0025, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 8.607300025886616, |
|
"grad_norm": 0.1478249430656433, |
|
"learning_rate": 1.5517588517847384e-06, |
|
"loss": 0.0022, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 8.620243334196221, |
|
"grad_norm": 0.16750039160251617, |
|
"learning_rate": 1.5373773981074009e-06, |
|
"loss": 0.0021, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 8.633186642505825, |
|
"grad_norm": 0.38158321380615234, |
|
"learning_rate": 1.5229959444300632e-06, |
|
"loss": 0.001, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 8.646129950815428, |
|
"grad_norm": 1.1184005737304688, |
|
"learning_rate": 1.5086144907527252e-06, |
|
"loss": 0.007, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 8.659073259125032, |
|
"grad_norm": 1.4656065702438354, |
|
"learning_rate": 1.4942330370753877e-06, |
|
"loss": 0.0013, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 8.672016567434635, |
|
"grad_norm": 0.3482512831687927, |
|
"learning_rate": 1.47985158339805e-06, |
|
"loss": 0.0024, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 8.68495987574424, |
|
"grad_norm": 0.04078083485364914, |
|
"learning_rate": 1.4654701297207121e-06, |
|
"loss": 0.0021, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 8.697903184053844, |
|
"grad_norm": 0.1383834183216095, |
|
"learning_rate": 1.4510886760433746e-06, |
|
"loss": 0.0025, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 8.710846492363448, |
|
"grad_norm": 0.1986149251461029, |
|
"learning_rate": 1.436707222366037e-06, |
|
"loss": 0.0011, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 8.723789800673051, |
|
"grad_norm": 0.5224486589431763, |
|
"learning_rate": 1.422325768688699e-06, |
|
"loss": 0.002, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 8.736733108982657, |
|
"grad_norm": 0.14714999496936798, |
|
"learning_rate": 1.4079443150113615e-06, |
|
"loss": 0.0024, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 8.74967641729226, |
|
"grad_norm": 0.07352601736783981, |
|
"learning_rate": 1.3935628613340238e-06, |
|
"loss": 0.0025, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 8.762619725601864, |
|
"grad_norm": 0.04641982167959213, |
|
"learning_rate": 1.379181407656686e-06, |
|
"loss": 0.0011, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 8.775563033911467, |
|
"grad_norm": 0.20494569838047028, |
|
"learning_rate": 1.3647999539793484e-06, |
|
"loss": 0.0014, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 8.788506342221071, |
|
"grad_norm": 0.3108866214752197, |
|
"learning_rate": 1.3504185003020107e-06, |
|
"loss": 0.0031, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 8.801449650530676, |
|
"grad_norm": 1.0901867151260376, |
|
"learning_rate": 1.336037046624673e-06, |
|
"loss": 0.006, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.81439295884028, |
|
"grad_norm": 0.1557256281375885, |
|
"learning_rate": 1.3216555929473352e-06, |
|
"loss": 0.002, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 8.827336267149883, |
|
"grad_norm": 0.32064932584762573, |
|
"learning_rate": 1.3072741392699975e-06, |
|
"loss": 0.0016, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 8.840279575459487, |
|
"grad_norm": 0.035750892013311386, |
|
"learning_rate": 1.2928926855926598e-06, |
|
"loss": 0.0016, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 8.853222883769092, |
|
"grad_norm": 0.11652498692274094, |
|
"learning_rate": 1.278511231915322e-06, |
|
"loss": 0.0017, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 8.866166192078696, |
|
"grad_norm": 0.22541067004203796, |
|
"learning_rate": 1.2641297782379844e-06, |
|
"loss": 0.002, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 8.8791095003883, |
|
"grad_norm": 0.09035801142454147, |
|
"learning_rate": 1.2497483245606467e-06, |
|
"loss": 0.0024, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 8.892052808697903, |
|
"grad_norm": 0.8051474690437317, |
|
"learning_rate": 1.235366870883309e-06, |
|
"loss": 0.0015, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 8.904996117007506, |
|
"grad_norm": 0.12439941614866257, |
|
"learning_rate": 1.2209854172059713e-06, |
|
"loss": 0.0014, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 8.917939425317112, |
|
"grad_norm": 0.0911746621131897, |
|
"learning_rate": 1.2066039635286336e-06, |
|
"loss": 0.0016, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 8.930882733626715, |
|
"grad_norm": 0.10455431789159775, |
|
"learning_rate": 1.1922225098512959e-06, |
|
"loss": 0.0035, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.943826041936319, |
|
"grad_norm": 0.0844273790717125, |
|
"learning_rate": 1.1778410561739582e-06, |
|
"loss": 0.0012, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 8.956769350245922, |
|
"grad_norm": 0.1838880330324173, |
|
"learning_rate": 1.1634596024966204e-06, |
|
"loss": 0.003, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 8.969712658555526, |
|
"grad_norm": 1.8350099325180054, |
|
"learning_rate": 1.1490781488192827e-06, |
|
"loss": 0.0018, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 8.982655966865131, |
|
"grad_norm": 0.18555675446987152, |
|
"learning_rate": 1.134696695141945e-06, |
|
"loss": 0.0015, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 8.995599275174735, |
|
"grad_norm": 0.14013200998306274, |
|
"learning_rate": 1.1203152414646073e-06, |
|
"loss": 0.0011, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.0022843414917588234, |
|
"eval_runtime": 89.3025, |
|
"eval_samples_per_second": 5.599, |
|
"eval_steps_per_second": 0.705, |
|
"eval_wer": 1.1861026556103704, |
|
"step": 34767 |
|
}, |
|
{ |
|
"epoch": 9.008542583484338, |
|
"grad_norm": 0.13073372840881348, |
|
"learning_rate": 1.1059337877872696e-06, |
|
"loss": 0.0014, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 9.021485891793942, |
|
"grad_norm": 0.13120625913143158, |
|
"learning_rate": 1.091552334109932e-06, |
|
"loss": 0.0007, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 9.034429200103547, |
|
"grad_norm": 0.15788908302783966, |
|
"learning_rate": 1.0771708804325942e-06, |
|
"loss": 0.0012, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 9.04737250841315, |
|
"grad_norm": 0.1373605579137802, |
|
"learning_rate": 1.0627894267552565e-06, |
|
"loss": 0.004, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 9.060315816722754, |
|
"grad_norm": 0.09479701519012451, |
|
"learning_rate": 1.0484079730779188e-06, |
|
"loss": 0.0021, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.073259125032358, |
|
"grad_norm": 0.1045953705906868, |
|
"learning_rate": 1.034026519400581e-06, |
|
"loss": 0.0014, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 9.086202433341962, |
|
"grad_norm": 0.07537753134965897, |
|
"learning_rate": 1.0196450657232434e-06, |
|
"loss": 0.0008, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 9.099145741651567, |
|
"grad_norm": 0.5165144801139832, |
|
"learning_rate": 1.0052636120459057e-06, |
|
"loss": 0.002, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 9.11208904996117, |
|
"grad_norm": 0.13497541844844818, |
|
"learning_rate": 9.90882158368568e-07, |
|
"loss": 0.0009, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 9.125032358270774, |
|
"grad_norm": 0.06942334771156311, |
|
"learning_rate": 9.765007046912302e-07, |
|
"loss": 0.0012, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 9.137975666580378, |
|
"grad_norm": 0.19452160596847534, |
|
"learning_rate": 9.621192510138925e-07, |
|
"loss": 0.0014, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 9.150918974889981, |
|
"grad_norm": 0.12441800534725189, |
|
"learning_rate": 9.477377973365548e-07, |
|
"loss": 0.0009, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 9.163862283199586, |
|
"grad_norm": 0.1729976236820221, |
|
"learning_rate": 9.333563436592172e-07, |
|
"loss": 0.0008, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 9.17680559150919, |
|
"grad_norm": 0.12662172317504883, |
|
"learning_rate": 9.189748899818794e-07, |
|
"loss": 0.0015, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 9.189748899818794, |
|
"grad_norm": 0.07149961590766907, |
|
"learning_rate": 9.048810653780884e-07, |
|
"loss": 0.0017, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 9.202692208128397, |
|
"grad_norm": 0.09453389793634415, |
|
"learning_rate": 8.904996117007508e-07, |
|
"loss": 0.0008, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 9.215635516438002, |
|
"grad_norm": 0.06293300539255142, |
|
"learning_rate": 8.76118158023413e-07, |
|
"loss": 0.0009, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 9.228578824747606, |
|
"grad_norm": 0.09109367430210114, |
|
"learning_rate": 8.617367043460753e-07, |
|
"loss": 0.0007, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 9.24152213305721, |
|
"grad_norm": 0.12085200101137161, |
|
"learning_rate": 8.473552506687377e-07, |
|
"loss": 0.0018, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 9.254465441366813, |
|
"grad_norm": 0.11523808538913727, |
|
"learning_rate": 8.329737969913999e-07, |
|
"loss": 0.0032, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 9.267408749676417, |
|
"grad_norm": 0.28072428703308105, |
|
"learning_rate": 8.185923433140623e-07, |
|
"loss": 0.0008, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 9.280352057986022, |
|
"grad_norm": 0.08222024142742157, |
|
"learning_rate": 8.042108896367246e-07, |
|
"loss": 0.0007, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 9.293295366295625, |
|
"grad_norm": 0.29703882336616516, |
|
"learning_rate": 7.898294359593868e-07, |
|
"loss": 0.0011, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 9.306238674605229, |
|
"grad_norm": 0.19992585480213165, |
|
"learning_rate": 7.754479822820492e-07, |
|
"loss": 0.0007, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 9.319181982914833, |
|
"grad_norm": 0.04905041307210922, |
|
"learning_rate": 7.610665286047115e-07, |
|
"loss": 0.0009, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 9.332125291224436, |
|
"grad_norm": 0.15756992995738983, |
|
"learning_rate": 7.466850749273736e-07, |
|
"loss": 0.005, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 9.345068599534041, |
|
"grad_norm": 0.18442897498607635, |
|
"learning_rate": 7.32303621250036e-07, |
|
"loss": 0.0009, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 9.358011907843645, |
|
"grad_norm": 0.06329531967639923, |
|
"learning_rate": 7.179221675726982e-07, |
|
"loss": 0.0011, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 9.370955216153249, |
|
"grad_norm": 0.11927127093076706, |
|
"learning_rate": 7.035407138953606e-07, |
|
"loss": 0.0007, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 9.383898524462852, |
|
"grad_norm": 0.06844917684793472, |
|
"learning_rate": 6.891592602180229e-07, |
|
"loss": 0.0018, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 9.396841832772457, |
|
"grad_norm": 0.08744735270738602, |
|
"learning_rate": 6.747778065406851e-07, |
|
"loss": 0.0043, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 9.409785141082061, |
|
"grad_norm": 0.18723489344120026, |
|
"learning_rate": 6.603963528633475e-07, |
|
"loss": 0.0007, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 9.422728449391665, |
|
"grad_norm": 0.086359903216362, |
|
"learning_rate": 6.460148991860098e-07, |
|
"loss": 0.0039, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 9.435671757701268, |
|
"grad_norm": 0.10816863179206848, |
|
"learning_rate": 6.31633445508672e-07, |
|
"loss": 0.0015, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 9.448615066010872, |
|
"grad_norm": 0.09978567808866501, |
|
"learning_rate": 6.172519918313344e-07, |
|
"loss": 0.0054, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 9.461558374320477, |
|
"grad_norm": 0.07749635726213455, |
|
"learning_rate": 6.028705381539967e-07, |
|
"loss": 0.0013, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 9.47450168263008, |
|
"grad_norm": 0.18575559556484222, |
|
"learning_rate": 5.88489084476659e-07, |
|
"loss": 0.0007, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 9.487444990939684, |
|
"grad_norm": 0.3773919641971588, |
|
"learning_rate": 5.741076307993213e-07, |
|
"loss": 0.0019, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 9.500388299249288, |
|
"grad_norm": 0.13482239842414856, |
|
"learning_rate": 5.597261771219835e-07, |
|
"loss": 0.0006, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 9.513331607558893, |
|
"grad_norm": 0.16534963250160217, |
|
"learning_rate": 5.453447234446458e-07, |
|
"loss": 0.0007, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 9.526274915868497, |
|
"grad_norm": 0.22458544373512268, |
|
"learning_rate": 5.309632697673081e-07, |
|
"loss": 0.0013, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 9.5392182241781, |
|
"grad_norm": 0.07351688295602798, |
|
"learning_rate": 5.165818160899704e-07, |
|
"loss": 0.0019, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 9.552161532487704, |
|
"grad_norm": 0.7084305286407471, |
|
"learning_rate": 5.022003624126327e-07, |
|
"loss": 0.0024, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 9.565104840797307, |
|
"grad_norm": 0.09942048788070679, |
|
"learning_rate": 4.87818908735295e-07, |
|
"loss": 0.0019, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 9.578048149106912, |
|
"grad_norm": 1.1455363035202026, |
|
"learning_rate": 4.734374550579573e-07, |
|
"loss": 0.0013, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 9.590991457416516, |
|
"grad_norm": 0.07915141433477402, |
|
"learning_rate": 4.590560013806196e-07, |
|
"loss": 0.0081, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 9.60393476572612, |
|
"grad_norm": 0.1777876317501068, |
|
"learning_rate": 4.4467454770328193e-07, |
|
"loss": 0.0014, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 9.616878074035723, |
|
"grad_norm": 0.10249053686857224, |
|
"learning_rate": 4.3029309402594417e-07, |
|
"loss": 0.0007, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 9.629821382345327, |
|
"grad_norm": 0.10651733726263046, |
|
"learning_rate": 4.1591164034860646e-07, |
|
"loss": 0.0007, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 9.642764690654932, |
|
"grad_norm": 0.03686549514532089, |
|
"learning_rate": 4.0153018667126875e-07, |
|
"loss": 0.0046, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 9.655707998964536, |
|
"grad_norm": 0.09571921825408936, |
|
"learning_rate": 3.871487329939311e-07, |
|
"loss": 0.0009, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 9.66865130727414, |
|
"grad_norm": 0.08373486995697021, |
|
"learning_rate": 3.727672793165934e-07, |
|
"loss": 0.0013, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 9.681594615583743, |
|
"grad_norm": 3.18973708152771, |
|
"learning_rate": 3.583858256392556e-07, |
|
"loss": 0.0013, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 9.694537923893346, |
|
"grad_norm": 0.06650124490261078, |
|
"learning_rate": 3.440043719619179e-07, |
|
"loss": 0.0014, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 9.707481232202952, |
|
"grad_norm": 0.04685758426785469, |
|
"learning_rate": 3.2962291828458026e-07, |
|
"loss": 0.0011, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 9.720424540512555, |
|
"grad_norm": 0.23590688407421112, |
|
"learning_rate": 3.1524146460724255e-07, |
|
"loss": 0.0009, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 9.733367848822159, |
|
"grad_norm": 0.6385647058486938, |
|
"learning_rate": 3.0086001092990484e-07, |
|
"loss": 0.0007, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 9.746311157131762, |
|
"grad_norm": 0.9988199472427368, |
|
"learning_rate": 2.8647855725256713e-07, |
|
"loss": 0.0011, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 9.759254465441368, |
|
"grad_norm": 0.09181234240531921, |
|
"learning_rate": 2.720971035752294e-07, |
|
"loss": 0.001, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 9.772197773750971, |
|
"grad_norm": 0.12787938117980957, |
|
"learning_rate": 2.577156498978917e-07, |
|
"loss": 0.0007, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 9.785141082060575, |
|
"grad_norm": 0.24333082139492035, |
|
"learning_rate": 2.43334196220554e-07, |
|
"loss": 0.0017, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 9.798084390370178, |
|
"grad_norm": 0.11896287649869919, |
|
"learning_rate": 2.2895274254321627e-07, |
|
"loss": 0.0038, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 9.811027698679782, |
|
"grad_norm": 0.9559854865074158, |
|
"learning_rate": 2.145712888658786e-07, |
|
"loss": 0.0008, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 9.823971006989387, |
|
"grad_norm": 0.10168687999248505, |
|
"learning_rate": 2.0018983518854086e-07, |
|
"loss": 0.0009, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 9.83691431529899, |
|
"grad_norm": 0.5217211246490479, |
|
"learning_rate": 1.8580838151120317e-07, |
|
"loss": 0.0008, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.849857623608594, |
|
"grad_norm": 0.03806522488594055, |
|
"learning_rate": 1.7142692783386547e-07, |
|
"loss": 0.0022, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 9.862800931918198, |
|
"grad_norm": 0.05464790016412735, |
|
"learning_rate": 1.5704547415652776e-07, |
|
"loss": 0.0007, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 9.875744240227803, |
|
"grad_norm": 0.16794097423553467, |
|
"learning_rate": 1.4266402047919005e-07, |
|
"loss": 0.0008, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 9.888687548537407, |
|
"grad_norm": 0.4726152718067169, |
|
"learning_rate": 1.2828256680185234e-07, |
|
"loss": 0.0008, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 9.90163085684701, |
|
"grad_norm": 0.09927275031805038, |
|
"learning_rate": 1.1390111312451463e-07, |
|
"loss": 0.0009, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 9.914574165156614, |
|
"grad_norm": 0.07914838194847107, |
|
"learning_rate": 9.951965944717692e-08, |
|
"loss": 0.0013, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 9.927517473466217, |
|
"grad_norm": 2.0464367866516113, |
|
"learning_rate": 8.513820576983922e-08, |
|
"loss": 0.0014, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 9.940460781775823, |
|
"grad_norm": 0.0418660007417202, |
|
"learning_rate": 7.075675209250152e-08, |
|
"loss": 0.0008, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 9.953404090085426, |
|
"grad_norm": 0.08231345564126968, |
|
"learning_rate": 5.637529841516381e-08, |
|
"loss": 0.0007, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 9.96634739839503, |
|
"grad_norm": 0.05043479800224304, |
|
"learning_rate": 4.19938447378261e-08, |
|
"loss": 0.0009, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.979290706704633, |
|
"grad_norm": 0.11412689834833145, |
|
"learning_rate": 2.7612391060488395e-08, |
|
"loss": 0.0008, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 9.992234015014237, |
|
"grad_norm": 0.06143497675657272, |
|
"learning_rate": 1.323093738315069e-08, |
|
"loss": 0.0013, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.0013781202724203467, |
|
"eval_runtime": 89.6083, |
|
"eval_samples_per_second": 5.58, |
|
"eval_steps_per_second": 0.703, |
|
"eval_wer": 0.8082292432035268, |
|
"step": 38630 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 38630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.00436846133248e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|