RafatK's picture
Upload 10 files
f6cb67f verified
{
"best_metric": 27.064476999160213,
"best_model_checkpoint": "./whisper-large-v2/second/checkpoint-3000",
"epoch": 1.7164520743919884,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002861230329041488,
"grad_norm": 21.954660415649414,
"learning_rate": 2.666666666666667e-07,
"loss": 3.1748,
"step": 5
},
{
"epoch": 0.005722460658082976,
"grad_norm": 21.253908157348633,
"learning_rate": 5.333333333333335e-07,
"loss": 3.0924,
"step": 10
},
{
"epoch": 0.008583690987124463,
"grad_norm": 12.657620429992676,
"learning_rate": 8.666666666666668e-07,
"loss": 3.0297,
"step": 15
},
{
"epoch": 0.011444921316165951,
"grad_norm": 9.762269973754883,
"learning_rate": 1.2000000000000002e-06,
"loss": 2.8025,
"step": 20
},
{
"epoch": 0.01430615164520744,
"grad_norm": 9.680177688598633,
"learning_rate": 1.5333333333333334e-06,
"loss": 2.3626,
"step": 25
},
{
"epoch": 0.017167381974248927,
"grad_norm": 15.449712753295898,
"learning_rate": 1.8666666666666669e-06,
"loss": 2.0648,
"step": 30
},
{
"epoch": 0.020028612303290415,
"grad_norm": 5.520791530609131,
"learning_rate": 2.2e-06,
"loss": 1.8108,
"step": 35
},
{
"epoch": 0.022889842632331903,
"grad_norm": 4.177562713623047,
"learning_rate": 2.5333333333333338e-06,
"loss": 1.7586,
"step": 40
},
{
"epoch": 0.02575107296137339,
"grad_norm": 4.502848148345947,
"learning_rate": 2.866666666666667e-06,
"loss": 1.6625,
"step": 45
},
{
"epoch": 0.02861230329041488,
"grad_norm": 4.344760417938232,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.5478,
"step": 50
},
{
"epoch": 0.031473533619456366,
"grad_norm": 4.028677940368652,
"learning_rate": 3.5333333333333335e-06,
"loss": 1.5475,
"step": 55
},
{
"epoch": 0.034334763948497854,
"grad_norm": 4.139718532562256,
"learning_rate": 3.866666666666667e-06,
"loss": 1.3069,
"step": 60
},
{
"epoch": 0.03719599427753934,
"grad_norm": 5.028609752655029,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.2144,
"step": 65
},
{
"epoch": 0.04005722460658083,
"grad_norm": 4.420126438140869,
"learning_rate": 4.533333333333334e-06,
"loss": 0.9845,
"step": 70
},
{
"epoch": 0.04291845493562232,
"grad_norm": 4.999370574951172,
"learning_rate": 4.7333333333333335e-06,
"loss": 1.0552,
"step": 75
},
{
"epoch": 0.045779685264663805,
"grad_norm": 3.8054537773132324,
"learning_rate": 5.0666666666666676e-06,
"loss": 0.9627,
"step": 80
},
{
"epoch": 0.04864091559370529,
"grad_norm": 4.616196632385254,
"learning_rate": 5.400000000000001e-06,
"loss": 0.954,
"step": 85
},
{
"epoch": 0.05150214592274678,
"grad_norm": 3.5308120250701904,
"learning_rate": 5.733333333333334e-06,
"loss": 0.8436,
"step": 90
},
{
"epoch": 0.05436337625178827,
"grad_norm": 3.5140163898468018,
"learning_rate": 6.066666666666667e-06,
"loss": 0.8272,
"step": 95
},
{
"epoch": 0.05722460658082976,
"grad_norm": 4.183732986450195,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.8312,
"step": 100
},
{
"epoch": 0.060085836909871244,
"grad_norm": 3.4372739791870117,
"learning_rate": 6.733333333333334e-06,
"loss": 0.8222,
"step": 105
},
{
"epoch": 0.06294706723891273,
"grad_norm": 4.268640041351318,
"learning_rate": 7.066666666666667e-06,
"loss": 0.722,
"step": 110
},
{
"epoch": 0.06580829756795423,
"grad_norm": 3.489219903945923,
"learning_rate": 7.4e-06,
"loss": 0.7458,
"step": 115
},
{
"epoch": 0.06866952789699571,
"grad_norm": 3.189199447631836,
"learning_rate": 7.733333333333334e-06,
"loss": 0.7707,
"step": 120
},
{
"epoch": 0.0715307582260372,
"grad_norm": 2.9029526710510254,
"learning_rate": 8.066666666666667e-06,
"loss": 0.7396,
"step": 125
},
{
"epoch": 0.07439198855507868,
"grad_norm": 3.8515231609344482,
"learning_rate": 8.400000000000001e-06,
"loss": 0.7496,
"step": 130
},
{
"epoch": 0.07725321888412018,
"grad_norm": 3.910144805908203,
"learning_rate": 8.733333333333333e-06,
"loss": 0.7224,
"step": 135
},
{
"epoch": 0.08011444921316166,
"grad_norm": 3.474600076675415,
"learning_rate": 9.066666666666667e-06,
"loss": 0.7432,
"step": 140
},
{
"epoch": 0.08297567954220315,
"grad_norm": 4.4429779052734375,
"learning_rate": 9.4e-06,
"loss": 0.7369,
"step": 145
},
{
"epoch": 0.08583690987124463,
"grad_norm": 2.9862446784973145,
"learning_rate": 9.733333333333334e-06,
"loss": 0.764,
"step": 150
},
{
"epoch": 0.08869814020028613,
"grad_norm": 2.7896616458892822,
"learning_rate": 9.997701149425289e-06,
"loss": 0.6994,
"step": 155
},
{
"epoch": 0.09155937052932761,
"grad_norm": 3.3134992122650146,
"learning_rate": 9.986206896551724e-06,
"loss": 0.7356,
"step": 160
},
{
"epoch": 0.0944206008583691,
"grad_norm": 3.047471046447754,
"learning_rate": 9.974712643678162e-06,
"loss": 0.7674,
"step": 165
},
{
"epoch": 0.09728183118741059,
"grad_norm": 3.043182373046875,
"learning_rate": 9.963218390804599e-06,
"loss": 0.6429,
"step": 170
},
{
"epoch": 0.10014306151645208,
"grad_norm": 3.04833722114563,
"learning_rate": 9.951724137931035e-06,
"loss": 0.6888,
"step": 175
},
{
"epoch": 0.10300429184549356,
"grad_norm": 3.2061524391174316,
"learning_rate": 9.940229885057472e-06,
"loss": 0.6523,
"step": 180
},
{
"epoch": 0.10586552217453506,
"grad_norm": 2.497347593307495,
"learning_rate": 9.928735632183909e-06,
"loss": 0.6607,
"step": 185
},
{
"epoch": 0.10872675250357654,
"grad_norm": 3.134495973587036,
"learning_rate": 9.917241379310347e-06,
"loss": 0.6465,
"step": 190
},
{
"epoch": 0.11158798283261803,
"grad_norm": 2.9670844078063965,
"learning_rate": 9.905747126436782e-06,
"loss": 0.7079,
"step": 195
},
{
"epoch": 0.11444921316165951,
"grad_norm": 2.81243896484375,
"learning_rate": 9.89425287356322e-06,
"loss": 0.6617,
"step": 200
},
{
"epoch": 0.11731044349070101,
"grad_norm": 3.66934871673584,
"learning_rate": 9.882758620689657e-06,
"loss": 0.696,
"step": 205
},
{
"epoch": 0.12017167381974249,
"grad_norm": 3.769137144088745,
"learning_rate": 9.871264367816093e-06,
"loss": 0.6105,
"step": 210
},
{
"epoch": 0.12303290414878398,
"grad_norm": 3.402257204055786,
"learning_rate": 9.85977011494253e-06,
"loss": 0.6433,
"step": 215
},
{
"epoch": 0.12589413447782546,
"grad_norm": 2.8765928745269775,
"learning_rate": 9.848275862068966e-06,
"loss": 0.6646,
"step": 220
},
{
"epoch": 0.12875536480686695,
"grad_norm": 2.7113330364227295,
"learning_rate": 9.836781609195403e-06,
"loss": 0.5717,
"step": 225
},
{
"epoch": 0.13161659513590845,
"grad_norm": 3.1563119888305664,
"learning_rate": 9.82528735632184e-06,
"loss": 0.579,
"step": 230
},
{
"epoch": 0.13447782546494993,
"grad_norm": 2.4642252922058105,
"learning_rate": 9.813793103448276e-06,
"loss": 0.6197,
"step": 235
},
{
"epoch": 0.13733905579399142,
"grad_norm": 2.8252813816070557,
"learning_rate": 9.802298850574713e-06,
"loss": 0.5913,
"step": 240
},
{
"epoch": 0.1402002861230329,
"grad_norm": 4.408820629119873,
"learning_rate": 9.79080459770115e-06,
"loss": 0.6163,
"step": 245
},
{
"epoch": 0.1430615164520744,
"grad_norm": 2.8997461795806885,
"learning_rate": 9.779310344827588e-06,
"loss": 0.5658,
"step": 250
},
{
"epoch": 0.1459227467811159,
"grad_norm": 2.968632459640503,
"learning_rate": 9.767816091954022e-06,
"loss": 0.6076,
"step": 255
},
{
"epoch": 0.14878397711015737,
"grad_norm": 2.8237318992614746,
"learning_rate": 9.75632183908046e-06,
"loss": 0.5988,
"step": 260
},
{
"epoch": 0.15164520743919885,
"grad_norm": 2.6587376594543457,
"learning_rate": 9.744827586206897e-06,
"loss": 0.6651,
"step": 265
},
{
"epoch": 0.15450643776824036,
"grad_norm": 2.794654369354248,
"learning_rate": 9.733333333333334e-06,
"loss": 0.6383,
"step": 270
},
{
"epoch": 0.15736766809728184,
"grad_norm": 2.70902419090271,
"learning_rate": 9.72183908045977e-06,
"loss": 0.6082,
"step": 275
},
{
"epoch": 0.16022889842632332,
"grad_norm": 2.6028928756713867,
"learning_rate": 9.710344827586207e-06,
"loss": 0.6708,
"step": 280
},
{
"epoch": 0.1630901287553648,
"grad_norm": 2.982057571411133,
"learning_rate": 9.698850574712645e-06,
"loss": 0.5788,
"step": 285
},
{
"epoch": 0.1659513590844063,
"grad_norm": 2.9549922943115234,
"learning_rate": 9.68735632183908e-06,
"loss": 0.5164,
"step": 290
},
{
"epoch": 0.1688125894134478,
"grad_norm": 5.054598808288574,
"learning_rate": 9.675862068965518e-06,
"loss": 0.5822,
"step": 295
},
{
"epoch": 0.17167381974248927,
"grad_norm": 2.511012315750122,
"learning_rate": 9.664367816091955e-06,
"loss": 0.5995,
"step": 300
},
{
"epoch": 0.17453505007153075,
"grad_norm": 3.233830451965332,
"learning_rate": 9.652873563218392e-06,
"loss": 0.5666,
"step": 305
},
{
"epoch": 0.17739628040057226,
"grad_norm": 2.620699167251587,
"learning_rate": 9.641379310344828e-06,
"loss": 0.5093,
"step": 310
},
{
"epoch": 0.18025751072961374,
"grad_norm": 5.6488728523254395,
"learning_rate": 9.629885057471265e-06,
"loss": 0.4977,
"step": 315
},
{
"epoch": 0.18311874105865522,
"grad_norm": 2.7786080837249756,
"learning_rate": 9.618390804597701e-06,
"loss": 0.5563,
"step": 320
},
{
"epoch": 0.1859799713876967,
"grad_norm": 2.7111008167266846,
"learning_rate": 9.606896551724138e-06,
"loss": 0.5267,
"step": 325
},
{
"epoch": 0.1888412017167382,
"grad_norm": 2.415985584259033,
"learning_rate": 9.595402298850576e-06,
"loss": 0.5335,
"step": 330
},
{
"epoch": 0.1917024320457797,
"grad_norm": 2.8600497245788574,
"learning_rate": 9.583908045977011e-06,
"loss": 0.5402,
"step": 335
},
{
"epoch": 0.19456366237482117,
"grad_norm": 2.4760890007019043,
"learning_rate": 9.57241379310345e-06,
"loss": 0.5985,
"step": 340
},
{
"epoch": 0.19742489270386265,
"grad_norm": 3.655806541442871,
"learning_rate": 9.560919540229886e-06,
"loss": 0.5025,
"step": 345
},
{
"epoch": 0.20028612303290416,
"grad_norm": 2.7408816814422607,
"learning_rate": 9.549425287356323e-06,
"loss": 0.5415,
"step": 350
},
{
"epoch": 0.20314735336194564,
"grad_norm": 9.877788543701172,
"learning_rate": 9.537931034482759e-06,
"loss": 0.4862,
"step": 355
},
{
"epoch": 0.20600858369098712,
"grad_norm": 2.2370998859405518,
"learning_rate": 9.526436781609196e-06,
"loss": 0.5054,
"step": 360
},
{
"epoch": 0.2088698140200286,
"grad_norm": 3.0609054565429688,
"learning_rate": 9.514942528735634e-06,
"loss": 0.5602,
"step": 365
},
{
"epoch": 0.2117310443490701,
"grad_norm": 2.412261486053467,
"learning_rate": 9.503448275862069e-06,
"loss": 0.536,
"step": 370
},
{
"epoch": 0.2145922746781116,
"grad_norm": 2.7460951805114746,
"learning_rate": 9.491954022988507e-06,
"loss": 0.525,
"step": 375
},
{
"epoch": 0.21745350500715308,
"grad_norm": 2.7391223907470703,
"learning_rate": 9.480459770114944e-06,
"loss": 0.5462,
"step": 380
},
{
"epoch": 0.22031473533619456,
"grad_norm": 2.774522066116333,
"learning_rate": 9.46896551724138e-06,
"loss": 0.5564,
"step": 385
},
{
"epoch": 0.22317596566523606,
"grad_norm": 2.564851999282837,
"learning_rate": 9.457471264367817e-06,
"loss": 0.5193,
"step": 390
},
{
"epoch": 0.22603719599427755,
"grad_norm": 2.2967424392700195,
"learning_rate": 9.445977011494253e-06,
"loss": 0.4689,
"step": 395
},
{
"epoch": 0.22889842632331903,
"grad_norm": 2.590604782104492,
"learning_rate": 9.43448275862069e-06,
"loss": 0.5028,
"step": 400
},
{
"epoch": 0.2317596566523605,
"grad_norm": 2.7005531787872314,
"learning_rate": 9.422988505747127e-06,
"loss": 0.5287,
"step": 405
},
{
"epoch": 0.23462088698140202,
"grad_norm": 2.4030439853668213,
"learning_rate": 9.411494252873565e-06,
"loss": 0.5581,
"step": 410
},
{
"epoch": 0.2374821173104435,
"grad_norm": 2.6434333324432373,
"learning_rate": 9.4e-06,
"loss": 0.4987,
"step": 415
},
{
"epoch": 0.24034334763948498,
"grad_norm": 2.167104959487915,
"learning_rate": 9.388505747126438e-06,
"loss": 0.4725,
"step": 420
},
{
"epoch": 0.24320457796852646,
"grad_norm": 2.4801626205444336,
"learning_rate": 9.377011494252875e-06,
"loss": 0.5301,
"step": 425
},
{
"epoch": 0.24606580829756797,
"grad_norm": 2.600285291671753,
"learning_rate": 9.365517241379311e-06,
"loss": 0.5267,
"step": 430
},
{
"epoch": 0.24892703862660945,
"grad_norm": 2.541861057281494,
"learning_rate": 9.354022988505748e-06,
"loss": 0.5006,
"step": 435
},
{
"epoch": 0.25178826895565093,
"grad_norm": 2.667675018310547,
"learning_rate": 9.342528735632184e-06,
"loss": 0.4852,
"step": 440
},
{
"epoch": 0.25464949928469244,
"grad_norm": 3.0567309856414795,
"learning_rate": 9.331034482758623e-06,
"loss": 0.4525,
"step": 445
},
{
"epoch": 0.2575107296137339,
"grad_norm": 2.588550090789795,
"learning_rate": 9.319540229885058e-06,
"loss": 0.5356,
"step": 450
},
{
"epoch": 0.2603719599427754,
"grad_norm": 2.520589828491211,
"learning_rate": 9.308045977011496e-06,
"loss": 0.5632,
"step": 455
},
{
"epoch": 0.2632331902718169,
"grad_norm": 2.466790199279785,
"learning_rate": 9.296551724137932e-06,
"loss": 0.5452,
"step": 460
},
{
"epoch": 0.26609442060085836,
"grad_norm": 2.561587333679199,
"learning_rate": 9.285057471264369e-06,
"loss": 0.5066,
"step": 465
},
{
"epoch": 0.26895565092989987,
"grad_norm": 2.183382511138916,
"learning_rate": 9.273563218390806e-06,
"loss": 0.4582,
"step": 470
},
{
"epoch": 0.2718168812589413,
"grad_norm": 3.0747621059417725,
"learning_rate": 9.262068965517242e-06,
"loss": 0.4871,
"step": 475
},
{
"epoch": 0.27467811158798283,
"grad_norm": 3.8696835041046143,
"learning_rate": 9.250574712643679e-06,
"loss": 0.4493,
"step": 480
},
{
"epoch": 0.27753934191702434,
"grad_norm": 2.3706977367401123,
"learning_rate": 9.239080459770115e-06,
"loss": 0.4401,
"step": 485
},
{
"epoch": 0.2804005722460658,
"grad_norm": 4.646365642547607,
"learning_rate": 9.227586206896552e-06,
"loss": 0.4801,
"step": 490
},
{
"epoch": 0.2832618025751073,
"grad_norm": 2.519423723220825,
"learning_rate": 9.216091954022988e-06,
"loss": 0.4543,
"step": 495
},
{
"epoch": 0.2861230329041488,
"grad_norm": 2.541934013366699,
"learning_rate": 9.204597701149425e-06,
"loss": 0.5319,
"step": 500
},
{
"epoch": 0.2861230329041488,
"eval_bleu": 0.480183300780465,
"eval_cer": 23.119512122029427,
"eval_loss": 0.7426198720932007,
"eval_runtime": 501.4653,
"eval_samples_per_second": 2.638,
"eval_steps_per_second": 0.331,
"eval_wer": 40.98161799010917,
"step": 500
},
{
"epoch": 0.28898426323319026,
"grad_norm": 4.23681116104126,
"learning_rate": 9.193103448275863e-06,
"loss": 0.4907,
"step": 505
},
{
"epoch": 0.2918454935622318,
"grad_norm": 2.405881404876709,
"learning_rate": 9.1816091954023e-06,
"loss": 0.4101,
"step": 510
},
{
"epoch": 0.2947067238912732,
"grad_norm": 2.5162203311920166,
"learning_rate": 9.170114942528736e-06,
"loss": 0.4606,
"step": 515
},
{
"epoch": 0.29756795422031473,
"grad_norm": 2.335982084274292,
"learning_rate": 9.158620689655173e-06,
"loss": 0.4662,
"step": 520
},
{
"epoch": 0.30042918454935624,
"grad_norm": 2.8497443199157715,
"learning_rate": 9.14712643678161e-06,
"loss": 0.4826,
"step": 525
},
{
"epoch": 0.3032904148783977,
"grad_norm": 2.6003220081329346,
"learning_rate": 9.135632183908046e-06,
"loss": 0.4986,
"step": 530
},
{
"epoch": 0.3061516452074392,
"grad_norm": 2.5119142532348633,
"learning_rate": 9.124137931034483e-06,
"loss": 0.4771,
"step": 535
},
{
"epoch": 0.3090128755364807,
"grad_norm": 8.402257919311523,
"learning_rate": 9.112643678160921e-06,
"loss": 0.4317,
"step": 540
},
{
"epoch": 0.31187410586552217,
"grad_norm": 2.056044816970825,
"learning_rate": 9.101149425287356e-06,
"loss": 0.4711,
"step": 545
},
{
"epoch": 0.3147353361945637,
"grad_norm": 2.1775403022766113,
"learning_rate": 9.089655172413794e-06,
"loss": 0.4298,
"step": 550
},
{
"epoch": 0.31759656652360513,
"grad_norm": 2.420010805130005,
"learning_rate": 9.07816091954023e-06,
"loss": 0.4389,
"step": 555
},
{
"epoch": 0.32045779685264664,
"grad_norm": 2.5747554302215576,
"learning_rate": 9.066666666666667e-06,
"loss": 0.4092,
"step": 560
},
{
"epoch": 0.32331902718168815,
"grad_norm": 2.8916008472442627,
"learning_rate": 9.055172413793104e-06,
"loss": 0.5179,
"step": 565
},
{
"epoch": 0.3261802575107296,
"grad_norm": 2.4451074600219727,
"learning_rate": 9.04367816091954e-06,
"loss": 0.4783,
"step": 570
},
{
"epoch": 0.3290414878397711,
"grad_norm": 2.4466512203216553,
"learning_rate": 9.032183908045977e-06,
"loss": 0.4781,
"step": 575
},
{
"epoch": 0.3319027181688126,
"grad_norm": 2.4510862827301025,
"learning_rate": 9.020689655172414e-06,
"loss": 0.475,
"step": 580
},
{
"epoch": 0.33476394849785407,
"grad_norm": 2.7609736919403076,
"learning_rate": 9.009195402298852e-06,
"loss": 0.4245,
"step": 585
},
{
"epoch": 0.3376251788268956,
"grad_norm": 2.5262796878814697,
"learning_rate": 8.997701149425289e-06,
"loss": 0.4323,
"step": 590
},
{
"epoch": 0.34048640915593703,
"grad_norm": 2.2738754749298096,
"learning_rate": 8.986206896551725e-06,
"loss": 0.4612,
"step": 595
},
{
"epoch": 0.34334763948497854,
"grad_norm": 2.770024538040161,
"learning_rate": 8.974712643678162e-06,
"loss": 0.4388,
"step": 600
},
{
"epoch": 0.34620886981402005,
"grad_norm": 2.2855265140533447,
"learning_rate": 8.963218390804598e-06,
"loss": 0.4388,
"step": 605
},
{
"epoch": 0.3490701001430615,
"grad_norm": 2.3497042655944824,
"learning_rate": 8.951724137931035e-06,
"loss": 0.4515,
"step": 610
},
{
"epoch": 0.351931330472103,
"grad_norm": 2.3851680755615234,
"learning_rate": 8.940229885057471e-06,
"loss": 0.4598,
"step": 615
},
{
"epoch": 0.3547925608011445,
"grad_norm": 2.254589319229126,
"learning_rate": 8.92873563218391e-06,
"loss": 0.4421,
"step": 620
},
{
"epoch": 0.35765379113018597,
"grad_norm": 2.236403703689575,
"learning_rate": 8.917241379310345e-06,
"loss": 0.4465,
"step": 625
},
{
"epoch": 0.3605150214592275,
"grad_norm": 2.3994998931884766,
"learning_rate": 8.905747126436783e-06,
"loss": 0.4199,
"step": 630
},
{
"epoch": 0.36337625178826893,
"grad_norm": 2.434783458709717,
"learning_rate": 8.89425287356322e-06,
"loss": 0.4444,
"step": 635
},
{
"epoch": 0.36623748211731044,
"grad_norm": 2.076225757598877,
"learning_rate": 8.882758620689656e-06,
"loss": 0.4563,
"step": 640
},
{
"epoch": 0.36909871244635195,
"grad_norm": 2.2288596630096436,
"learning_rate": 8.871264367816093e-06,
"loss": 0.451,
"step": 645
},
{
"epoch": 0.3719599427753934,
"grad_norm": 2.42144775390625,
"learning_rate": 8.85977011494253e-06,
"loss": 0.4545,
"step": 650
},
{
"epoch": 0.3748211731044349,
"grad_norm": 2.624284267425537,
"learning_rate": 8.848275862068966e-06,
"loss": 0.4045,
"step": 655
},
{
"epoch": 0.3776824034334764,
"grad_norm": 9.528400421142578,
"learning_rate": 8.836781609195402e-06,
"loss": 0.4278,
"step": 660
},
{
"epoch": 0.3805436337625179,
"grad_norm": 2.683384656906128,
"learning_rate": 8.82528735632184e-06,
"loss": 0.4331,
"step": 665
},
{
"epoch": 0.3834048640915594,
"grad_norm": 2.5828654766082764,
"learning_rate": 8.813793103448277e-06,
"loss": 0.4128,
"step": 670
},
{
"epoch": 0.38626609442060084,
"grad_norm": 2.038062334060669,
"learning_rate": 8.802298850574714e-06,
"loss": 0.4491,
"step": 675
},
{
"epoch": 0.38912732474964234,
"grad_norm": 2.5008745193481445,
"learning_rate": 8.79080459770115e-06,
"loss": 0.4472,
"step": 680
},
{
"epoch": 0.39198855507868385,
"grad_norm": 1.8770477771759033,
"learning_rate": 8.779310344827587e-06,
"loss": 0.3688,
"step": 685
},
{
"epoch": 0.3948497854077253,
"grad_norm": 6.977114200592041,
"learning_rate": 8.767816091954024e-06,
"loss": 0.4046,
"step": 690
},
{
"epoch": 0.3977110157367668,
"grad_norm": 4.948948383331299,
"learning_rate": 8.75632183908046e-06,
"loss": 0.4068,
"step": 695
},
{
"epoch": 0.4005722460658083,
"grad_norm": 2.363247871398926,
"learning_rate": 8.744827586206898e-06,
"loss": 0.4461,
"step": 700
},
{
"epoch": 0.4034334763948498,
"grad_norm": 2.1947221755981445,
"learning_rate": 8.733333333333333e-06,
"loss": 0.4199,
"step": 705
},
{
"epoch": 0.4062947067238913,
"grad_norm": 2.2790868282318115,
"learning_rate": 8.721839080459772e-06,
"loss": 0.4671,
"step": 710
},
{
"epoch": 0.40915593705293274,
"grad_norm": 2.0782546997070312,
"learning_rate": 8.710344827586208e-06,
"loss": 0.4433,
"step": 715
},
{
"epoch": 0.41201716738197425,
"grad_norm": 2.1484241485595703,
"learning_rate": 8.698850574712645e-06,
"loss": 0.4461,
"step": 720
},
{
"epoch": 0.41487839771101576,
"grad_norm": 3.469759225845337,
"learning_rate": 8.687356321839081e-06,
"loss": 0.4165,
"step": 725
},
{
"epoch": 0.4177396280400572,
"grad_norm": 2.101419687271118,
"learning_rate": 8.675862068965518e-06,
"loss": 0.4002,
"step": 730
},
{
"epoch": 0.4206008583690987,
"grad_norm": 2.3812413215637207,
"learning_rate": 8.664367816091954e-06,
"loss": 0.4196,
"step": 735
},
{
"epoch": 0.4234620886981402,
"grad_norm": 2.1074626445770264,
"learning_rate": 8.652873563218391e-06,
"loss": 0.4543,
"step": 740
},
{
"epoch": 0.4263233190271817,
"grad_norm": 2.31530499458313,
"learning_rate": 8.641379310344828e-06,
"loss": 0.4452,
"step": 745
},
{
"epoch": 0.4291845493562232,
"grad_norm": 2.038053035736084,
"learning_rate": 8.629885057471266e-06,
"loss": 0.3887,
"step": 750
},
{
"epoch": 0.43204577968526464,
"grad_norm": 2.248845100402832,
"learning_rate": 8.6183908045977e-06,
"loss": 0.3911,
"step": 755
},
{
"epoch": 0.43490701001430615,
"grad_norm": 2.2125308513641357,
"learning_rate": 8.606896551724139e-06,
"loss": 0.3985,
"step": 760
},
{
"epoch": 0.43776824034334766,
"grad_norm": 2.6718173027038574,
"learning_rate": 8.595402298850576e-06,
"loss": 0.4349,
"step": 765
},
{
"epoch": 0.4406294706723891,
"grad_norm": 2.338291645050049,
"learning_rate": 8.583908045977012e-06,
"loss": 0.4338,
"step": 770
},
{
"epoch": 0.4434907010014306,
"grad_norm": 2.2783362865448,
"learning_rate": 8.572413793103449e-06,
"loss": 0.4223,
"step": 775
},
{
"epoch": 0.44635193133047213,
"grad_norm": 5.572329521179199,
"learning_rate": 8.560919540229885e-06,
"loss": 0.373,
"step": 780
},
{
"epoch": 0.4492131616595136,
"grad_norm": 2.154139518737793,
"learning_rate": 8.549425287356322e-06,
"loss": 0.3849,
"step": 785
},
{
"epoch": 0.4520743919885551,
"grad_norm": 2.2320711612701416,
"learning_rate": 8.537931034482759e-06,
"loss": 0.398,
"step": 790
},
{
"epoch": 0.45493562231759654,
"grad_norm": 1.9622141122817993,
"learning_rate": 8.526436781609197e-06,
"loss": 0.4432,
"step": 795
},
{
"epoch": 0.45779685264663805,
"grad_norm": 2.268714189529419,
"learning_rate": 8.514942528735632e-06,
"loss": 0.4021,
"step": 800
},
{
"epoch": 0.46065808297567956,
"grad_norm": 2.613569974899292,
"learning_rate": 8.50344827586207e-06,
"loss": 0.4017,
"step": 805
},
{
"epoch": 0.463519313304721,
"grad_norm": 2.3678150177001953,
"learning_rate": 8.491954022988507e-06,
"loss": 0.3759,
"step": 810
},
{
"epoch": 0.4663805436337625,
"grad_norm": 2.136413335800171,
"learning_rate": 8.480459770114943e-06,
"loss": 0.4278,
"step": 815
},
{
"epoch": 0.46924177396280403,
"grad_norm": 2.0563809871673584,
"learning_rate": 8.46896551724138e-06,
"loss": 0.4112,
"step": 820
},
{
"epoch": 0.4721030042918455,
"grad_norm": 3.3801417350769043,
"learning_rate": 8.457471264367816e-06,
"loss": 0.3994,
"step": 825
},
{
"epoch": 0.474964234620887,
"grad_norm": 2.1917874813079834,
"learning_rate": 8.445977011494255e-06,
"loss": 0.3993,
"step": 830
},
{
"epoch": 0.47782546494992845,
"grad_norm": 2.640014410018921,
"learning_rate": 8.43448275862069e-06,
"loss": 0.4242,
"step": 835
},
{
"epoch": 0.48068669527896996,
"grad_norm": 2.17958664894104,
"learning_rate": 8.422988505747128e-06,
"loss": 0.3786,
"step": 840
},
{
"epoch": 0.48354792560801146,
"grad_norm": 2.140859603881836,
"learning_rate": 8.411494252873564e-06,
"loss": 0.3993,
"step": 845
},
{
"epoch": 0.4864091559370529,
"grad_norm": 2.2732794284820557,
"learning_rate": 8.400000000000001e-06,
"loss": 0.3761,
"step": 850
},
{
"epoch": 0.4892703862660944,
"grad_norm": 2.2494969367980957,
"learning_rate": 8.388505747126437e-06,
"loss": 0.403,
"step": 855
},
{
"epoch": 0.49213161659513593,
"grad_norm": 2.0700440406799316,
"learning_rate": 8.377011494252874e-06,
"loss": 0.4249,
"step": 860
},
{
"epoch": 0.4949928469241774,
"grad_norm": 2.3763673305511475,
"learning_rate": 8.36551724137931e-06,
"loss": 0.3646,
"step": 865
},
{
"epoch": 0.4978540772532189,
"grad_norm": 2.0693488121032715,
"learning_rate": 8.354022988505747e-06,
"loss": 0.3974,
"step": 870
},
{
"epoch": 0.5007153075822603,
"grad_norm": 2.108673095703125,
"learning_rate": 8.342528735632185e-06,
"loss": 0.3871,
"step": 875
},
{
"epoch": 0.5035765379113019,
"grad_norm": 4.603137493133545,
"learning_rate": 8.33103448275862e-06,
"loss": 0.392,
"step": 880
},
{
"epoch": 0.5064377682403434,
"grad_norm": 2.015547275543213,
"learning_rate": 8.319540229885059e-06,
"loss": 0.4202,
"step": 885
},
{
"epoch": 0.5092989985693849,
"grad_norm": 2.3268239498138428,
"learning_rate": 8.308045977011495e-06,
"loss": 0.3995,
"step": 890
},
{
"epoch": 0.5121602288984263,
"grad_norm": 2.2089991569519043,
"learning_rate": 8.296551724137932e-06,
"loss": 0.4013,
"step": 895
},
{
"epoch": 0.5150214592274678,
"grad_norm": 2.1193923950195312,
"learning_rate": 8.285057471264368e-06,
"loss": 0.3817,
"step": 900
},
{
"epoch": 0.5178826895565093,
"grad_norm": 1.7973988056182861,
"learning_rate": 8.273563218390805e-06,
"loss": 0.3724,
"step": 905
},
{
"epoch": 0.5207439198855508,
"grad_norm": 3.1105146408081055,
"learning_rate": 8.262068965517243e-06,
"loss": 0.3775,
"step": 910
},
{
"epoch": 0.5236051502145923,
"grad_norm": 2.1949639320373535,
"learning_rate": 8.250574712643678e-06,
"loss": 0.4222,
"step": 915
},
{
"epoch": 0.5264663805436338,
"grad_norm": 9.827264785766602,
"learning_rate": 8.239080459770116e-06,
"loss": 0.399,
"step": 920
},
{
"epoch": 0.5293276108726752,
"grad_norm": 2.282693862915039,
"learning_rate": 8.227586206896553e-06,
"loss": 0.3708,
"step": 925
},
{
"epoch": 0.5321888412017167,
"grad_norm": 2.296790599822998,
"learning_rate": 8.21609195402299e-06,
"loss": 0.4161,
"step": 930
},
{
"epoch": 0.5350500715307582,
"grad_norm": 1.8870588541030884,
"learning_rate": 8.204597701149426e-06,
"loss": 0.3556,
"step": 935
},
{
"epoch": 0.5379113018597997,
"grad_norm": 2.1730587482452393,
"learning_rate": 8.193103448275863e-06,
"loss": 0.3998,
"step": 940
},
{
"epoch": 0.5407725321888412,
"grad_norm": 2.2204318046569824,
"learning_rate": 8.1816091954023e-06,
"loss": 0.3458,
"step": 945
},
{
"epoch": 0.5436337625178826,
"grad_norm": 1.9323971271514893,
"learning_rate": 8.170114942528736e-06,
"loss": 0.359,
"step": 950
},
{
"epoch": 0.5464949928469242,
"grad_norm": 13.680197715759277,
"learning_rate": 8.158620689655174e-06,
"loss": 0.4351,
"step": 955
},
{
"epoch": 0.5493562231759657,
"grad_norm": 2.302557945251465,
"learning_rate": 8.147126436781609e-06,
"loss": 0.3962,
"step": 960
},
{
"epoch": 0.5522174535050072,
"grad_norm": 2.17879056930542,
"learning_rate": 8.135632183908047e-06,
"loss": 0.4163,
"step": 965
},
{
"epoch": 0.5550786838340487,
"grad_norm": 1.8171741962432861,
"learning_rate": 8.124137931034484e-06,
"loss": 0.4154,
"step": 970
},
{
"epoch": 0.5579399141630901,
"grad_norm": 2.161654233932495,
"learning_rate": 8.11264367816092e-06,
"loss": 0.4062,
"step": 975
},
{
"epoch": 0.5608011444921316,
"grad_norm": 2.201740264892578,
"learning_rate": 8.101149425287357e-06,
"loss": 0.3761,
"step": 980
},
{
"epoch": 0.5636623748211731,
"grad_norm": 1.8697162866592407,
"learning_rate": 8.089655172413794e-06,
"loss": 0.3507,
"step": 985
},
{
"epoch": 0.5665236051502146,
"grad_norm": 2.4330639839172363,
"learning_rate": 8.078160919540232e-06,
"loss": 0.3872,
"step": 990
},
{
"epoch": 0.5693848354792561,
"grad_norm": 2.6203877925872803,
"learning_rate": 8.066666666666667e-06,
"loss": 0.4102,
"step": 995
},
{
"epoch": 0.5722460658082976,
"grad_norm": 1.927872896194458,
"learning_rate": 8.055172413793103e-06,
"loss": 0.3354,
"step": 1000
},
{
"epoch": 0.5722460658082976,
"eval_bleu": 0.5331656154016544,
"eval_cer": 20.120471095927357,
"eval_loss": 0.6430336833000183,
"eval_runtime": 484.3877,
"eval_samples_per_second": 2.731,
"eval_steps_per_second": 0.343,
"eval_wer": 36.26947839880564,
"step": 1000
},
{
"epoch": 0.575107296137339,
"grad_norm": 2.234710216522217,
"learning_rate": 8.043678160919542e-06,
"loss": 0.3395,
"step": 1005
},
{
"epoch": 0.5779685264663805,
"grad_norm": 1.911469578742981,
"learning_rate": 8.032183908045977e-06,
"loss": 0.3965,
"step": 1010
},
{
"epoch": 0.580829756795422,
"grad_norm": 2.7243082523345947,
"learning_rate": 8.020689655172415e-06,
"loss": 0.381,
"step": 1015
},
{
"epoch": 0.5836909871244635,
"grad_norm": 2.4552667140960693,
"learning_rate": 8.009195402298851e-06,
"loss": 0.357,
"step": 1020
},
{
"epoch": 0.586552217453505,
"grad_norm": 2.055844306945801,
"learning_rate": 7.997701149425288e-06,
"loss": 0.3691,
"step": 1025
},
{
"epoch": 0.5894134477825465,
"grad_norm": 2.588245391845703,
"learning_rate": 7.986206896551725e-06,
"loss": 0.3515,
"step": 1030
},
{
"epoch": 0.592274678111588,
"grad_norm": 2.321615695953369,
"learning_rate": 7.974712643678161e-06,
"loss": 0.3567,
"step": 1035
},
{
"epoch": 0.5951359084406295,
"grad_norm": 2.2714169025421143,
"learning_rate": 7.963218390804598e-06,
"loss": 0.3676,
"step": 1040
},
{
"epoch": 0.597997138769671,
"grad_norm": 2.0706753730773926,
"learning_rate": 7.951724137931034e-06,
"loss": 0.39,
"step": 1045
},
{
"epoch": 0.6008583690987125,
"grad_norm": 1.8779950141906738,
"learning_rate": 7.940229885057473e-06,
"loss": 0.3361,
"step": 1050
},
{
"epoch": 0.6037195994277539,
"grad_norm": 3.2601895332336426,
"learning_rate": 7.928735632183907e-06,
"loss": 0.3611,
"step": 1055
},
{
"epoch": 0.6065808297567954,
"grad_norm": 1.9203985929489136,
"learning_rate": 7.917241379310346e-06,
"loss": 0.3906,
"step": 1060
},
{
"epoch": 0.6094420600858369,
"grad_norm": 2.388303518295288,
"learning_rate": 7.905747126436782e-06,
"loss": 0.3871,
"step": 1065
},
{
"epoch": 0.6123032904148784,
"grad_norm": 1.9799396991729736,
"learning_rate": 7.894252873563219e-06,
"loss": 0.3654,
"step": 1070
},
{
"epoch": 0.6151645207439199,
"grad_norm": 2.21216082572937,
"learning_rate": 7.882758620689655e-06,
"loss": 0.4078,
"step": 1075
},
{
"epoch": 0.6180257510729614,
"grad_norm": 1.9444260597229004,
"learning_rate": 7.871264367816092e-06,
"loss": 0.3693,
"step": 1080
},
{
"epoch": 0.6208869814020028,
"grad_norm": 1.9393128156661987,
"learning_rate": 7.85977011494253e-06,
"loss": 0.3599,
"step": 1085
},
{
"epoch": 0.6237482117310443,
"grad_norm": 1.9323508739471436,
"learning_rate": 7.848275862068965e-06,
"loss": 0.3322,
"step": 1090
},
{
"epoch": 0.6266094420600858,
"grad_norm": 2.9762513637542725,
"learning_rate": 7.836781609195403e-06,
"loss": 0.3957,
"step": 1095
},
{
"epoch": 0.6294706723891274,
"grad_norm": 2.230358362197876,
"learning_rate": 7.82528735632184e-06,
"loss": 0.3563,
"step": 1100
},
{
"epoch": 0.6323319027181689,
"grad_norm": 2.114469051361084,
"learning_rate": 7.813793103448277e-06,
"loss": 0.3902,
"step": 1105
},
{
"epoch": 0.6351931330472103,
"grad_norm": 3.268624782562256,
"learning_rate": 7.802298850574713e-06,
"loss": 0.3582,
"step": 1110
},
{
"epoch": 0.6380543633762518,
"grad_norm": 2.0319249629974365,
"learning_rate": 7.79080459770115e-06,
"loss": 0.3368,
"step": 1115
},
{
"epoch": 0.6409155937052933,
"grad_norm": 2.1834099292755127,
"learning_rate": 7.779310344827586e-06,
"loss": 0.4234,
"step": 1120
},
{
"epoch": 0.6437768240343348,
"grad_norm": 2.5935869216918945,
"learning_rate": 7.767816091954023e-06,
"loss": 0.356,
"step": 1125
},
{
"epoch": 0.6466380543633763,
"grad_norm": 2.1476664543151855,
"learning_rate": 7.756321839080461e-06,
"loss": 0.3941,
"step": 1130
},
{
"epoch": 0.6494992846924177,
"grad_norm": 2.005326986312866,
"learning_rate": 7.744827586206896e-06,
"loss": 0.3448,
"step": 1135
},
{
"epoch": 0.6523605150214592,
"grad_norm": 2.055753469467163,
"learning_rate": 7.733333333333334e-06,
"loss": 0.3169,
"step": 1140
},
{
"epoch": 0.6552217453505007,
"grad_norm": 2.293834924697876,
"learning_rate": 7.721839080459771e-06,
"loss": 0.3939,
"step": 1145
},
{
"epoch": 0.6580829756795422,
"grad_norm": 2.178842782974243,
"learning_rate": 7.710344827586208e-06,
"loss": 0.3473,
"step": 1150
},
{
"epoch": 0.6609442060085837,
"grad_norm": 2.022002935409546,
"learning_rate": 7.698850574712644e-06,
"loss": 0.3884,
"step": 1155
},
{
"epoch": 0.6638054363376252,
"grad_norm": 2.30517840385437,
"learning_rate": 7.68735632183908e-06,
"loss": 0.3516,
"step": 1160
},
{
"epoch": 0.6666666666666666,
"grad_norm": 2.340341806411743,
"learning_rate": 7.675862068965519e-06,
"loss": 0.3469,
"step": 1165
},
{
"epoch": 0.6695278969957081,
"grad_norm": 1.8878148794174194,
"learning_rate": 7.664367816091954e-06,
"loss": 0.3484,
"step": 1170
},
{
"epoch": 0.6723891273247496,
"grad_norm": 2.0944101810455322,
"learning_rate": 7.652873563218392e-06,
"loss": 0.3395,
"step": 1175
},
{
"epoch": 0.6752503576537912,
"grad_norm": 3.42524790763855,
"learning_rate": 7.641379310344829e-06,
"loss": 0.2986,
"step": 1180
},
{
"epoch": 0.6781115879828327,
"grad_norm": 2.1868836879730225,
"learning_rate": 7.629885057471265e-06,
"loss": 0.3645,
"step": 1185
},
{
"epoch": 0.6809728183118741,
"grad_norm": 2.6077160835266113,
"learning_rate": 7.618390804597702e-06,
"loss": 0.3482,
"step": 1190
},
{
"epoch": 0.6838340486409156,
"grad_norm": 2.132552146911621,
"learning_rate": 7.6068965517241385e-06,
"loss": 0.3358,
"step": 1195
},
{
"epoch": 0.6866952789699571,
"grad_norm": 1.9296846389770508,
"learning_rate": 7.595402298850575e-06,
"loss": 0.4169,
"step": 1200
},
{
"epoch": 0.6895565092989986,
"grad_norm": 1.8717005252838135,
"learning_rate": 7.583908045977012e-06,
"loss": 0.4115,
"step": 1205
},
{
"epoch": 0.6924177396280401,
"grad_norm": 1.8066362142562866,
"learning_rate": 7.572413793103449e-06,
"loss": 0.3517,
"step": 1210
},
{
"epoch": 0.6952789699570815,
"grad_norm": 2.3206546306610107,
"learning_rate": 7.560919540229885e-06,
"loss": 0.3548,
"step": 1215
},
{
"epoch": 0.698140200286123,
"grad_norm": 3.7949299812316895,
"learning_rate": 7.549425287356322e-06,
"loss": 0.393,
"step": 1220
},
{
"epoch": 0.7010014306151645,
"grad_norm": 2.03859281539917,
"learning_rate": 7.53793103448276e-06,
"loss": 0.4031,
"step": 1225
},
{
"epoch": 0.703862660944206,
"grad_norm": 1.8809409141540527,
"learning_rate": 7.526436781609196e-06,
"loss": 0.3643,
"step": 1230
},
{
"epoch": 0.7067238912732475,
"grad_norm": 2.139955759048462,
"learning_rate": 7.514942528735633e-06,
"loss": 0.3614,
"step": 1235
},
{
"epoch": 0.709585121602289,
"grad_norm": 5.799084186553955,
"learning_rate": 7.503448275862069e-06,
"loss": 0.3257,
"step": 1240
},
{
"epoch": 0.7124463519313304,
"grad_norm": 2.0690135955810547,
"learning_rate": 7.491954022988507e-06,
"loss": 0.3545,
"step": 1245
},
{
"epoch": 0.7153075822603719,
"grad_norm": 1.9870096445083618,
"learning_rate": 7.4804597701149425e-06,
"loss": 0.3142,
"step": 1250
},
{
"epoch": 0.7181688125894135,
"grad_norm": 51.02791976928711,
"learning_rate": 7.46896551724138e-06,
"loss": 0.3382,
"step": 1255
},
{
"epoch": 0.721030042918455,
"grad_norm": 2.2177982330322266,
"learning_rate": 7.457471264367817e-06,
"loss": 0.3917,
"step": 1260
},
{
"epoch": 0.7238912732474965,
"grad_norm": 2.1543567180633545,
"learning_rate": 7.445977011494253e-06,
"loss": 0.3522,
"step": 1265
},
{
"epoch": 0.7267525035765379,
"grad_norm": 2.217419147491455,
"learning_rate": 7.4344827586206906e-06,
"loss": 0.3539,
"step": 1270
},
{
"epoch": 0.7296137339055794,
"grad_norm": 2.348618984222412,
"learning_rate": 7.422988505747127e-06,
"loss": 0.336,
"step": 1275
},
{
"epoch": 0.7324749642346209,
"grad_norm": 2.39912486076355,
"learning_rate": 7.411494252873564e-06,
"loss": 0.2934,
"step": 1280
},
{
"epoch": 0.7353361945636624,
"grad_norm": 2.20755934715271,
"learning_rate": 7.4e-06,
"loss": 0.3641,
"step": 1285
},
{
"epoch": 0.7381974248927039,
"grad_norm": 2.0606160163879395,
"learning_rate": 7.388505747126438e-06,
"loss": 0.3378,
"step": 1290
},
{
"epoch": 0.7410586552217453,
"grad_norm": 2.7238168716430664,
"learning_rate": 7.3770114942528735e-06,
"loss": 0.3276,
"step": 1295
},
{
"epoch": 0.7439198855507868,
"grad_norm": 1.9976911544799805,
"learning_rate": 7.365517241379311e-06,
"loss": 0.3222,
"step": 1300
},
{
"epoch": 0.7467811158798283,
"grad_norm": 1.986830711364746,
"learning_rate": 7.354022988505748e-06,
"loss": 0.3396,
"step": 1305
},
{
"epoch": 0.7496423462088698,
"grad_norm": 2.4798574447631836,
"learning_rate": 7.342528735632185e-06,
"loss": 0.371,
"step": 1310
},
{
"epoch": 0.7525035765379113,
"grad_norm": 2.417635917663574,
"learning_rate": 7.3310344827586215e-06,
"loss": 0.306,
"step": 1315
},
{
"epoch": 0.7553648068669528,
"grad_norm": 2.8017005920410156,
"learning_rate": 7.319540229885058e-06,
"loss": 0.3713,
"step": 1320
},
{
"epoch": 0.7582260371959942,
"grad_norm": 2.679738759994507,
"learning_rate": 7.3080459770114955e-06,
"loss": 0.3295,
"step": 1325
},
{
"epoch": 0.7610872675250357,
"grad_norm": 2.0234341621398926,
"learning_rate": 7.296551724137931e-06,
"loss": 0.3433,
"step": 1330
},
{
"epoch": 0.7639484978540773,
"grad_norm": 2.098039150238037,
"learning_rate": 7.285057471264369e-06,
"loss": 0.2951,
"step": 1335
},
{
"epoch": 0.7668097281831188,
"grad_norm": 2.076972723007202,
"learning_rate": 7.273563218390805e-06,
"loss": 0.3264,
"step": 1340
},
{
"epoch": 0.7696709585121603,
"grad_norm": 1.5598034858703613,
"learning_rate": 7.262068965517242e-06,
"loss": 0.3109,
"step": 1345
},
{
"epoch": 0.7725321888412017,
"grad_norm": 2.146202564239502,
"learning_rate": 7.250574712643678e-06,
"loss": 0.3736,
"step": 1350
},
{
"epoch": 0.7753934191702432,
"grad_norm": 1.838539958000183,
"learning_rate": 7.239080459770116e-06,
"loss": 0.3024,
"step": 1355
},
{
"epoch": 0.7782546494992847,
"grad_norm": 2.216581344604492,
"learning_rate": 7.2275862068965515e-06,
"loss": 0.3306,
"step": 1360
},
{
"epoch": 0.7811158798283262,
"grad_norm": 2.171466112136841,
"learning_rate": 7.216091954022989e-06,
"loss": 0.303,
"step": 1365
},
{
"epoch": 0.7839771101573677,
"grad_norm": 2.2364814281463623,
"learning_rate": 7.204597701149426e-06,
"loss": 0.321,
"step": 1370
},
{
"epoch": 0.7868383404864091,
"grad_norm": 1.6637369394302368,
"learning_rate": 7.193103448275862e-06,
"loss": 0.3252,
"step": 1375
},
{
"epoch": 0.7896995708154506,
"grad_norm": 1.8782521486282349,
"learning_rate": 7.1816091954022996e-06,
"loss": 0.3786,
"step": 1380
},
{
"epoch": 0.7925608011444921,
"grad_norm": 2.16534423828125,
"learning_rate": 7.170114942528736e-06,
"loss": 0.3426,
"step": 1385
},
{
"epoch": 0.7954220314735336,
"grad_norm": 2.1053144931793213,
"learning_rate": 7.1586206896551736e-06,
"loss": 0.3505,
"step": 1390
},
{
"epoch": 0.7982832618025751,
"grad_norm": 6.17349100112915,
"learning_rate": 7.147126436781609e-06,
"loss": 0.2968,
"step": 1395
},
{
"epoch": 0.8011444921316166,
"grad_norm": 2.3340353965759277,
"learning_rate": 7.135632183908047e-06,
"loss": 0.3762,
"step": 1400
},
{
"epoch": 0.804005722460658,
"grad_norm": 2.3221354484558105,
"learning_rate": 7.124137931034484e-06,
"loss": 0.3955,
"step": 1405
},
{
"epoch": 0.8068669527896996,
"grad_norm": 2.350771427154541,
"learning_rate": 7.11264367816092e-06,
"loss": 0.3191,
"step": 1410
},
{
"epoch": 0.8097281831187411,
"grad_norm": 1.8787072896957397,
"learning_rate": 7.101149425287357e-06,
"loss": 0.3006,
"step": 1415
},
{
"epoch": 0.8125894134477826,
"grad_norm": 1.9597926139831543,
"learning_rate": 7.089655172413794e-06,
"loss": 0.2888,
"step": 1420
},
{
"epoch": 0.8154506437768241,
"grad_norm": 1.9055655002593994,
"learning_rate": 7.0781609195402305e-06,
"loss": 0.3297,
"step": 1425
},
{
"epoch": 0.8183118741058655,
"grad_norm": 2.2036375999450684,
"learning_rate": 7.066666666666667e-06,
"loss": 0.3717,
"step": 1430
},
{
"epoch": 0.821173104434907,
"grad_norm": 2.1537721157073975,
"learning_rate": 7.0551724137931045e-06,
"loss": 0.3656,
"step": 1435
},
{
"epoch": 0.8240343347639485,
"grad_norm": 2.1095426082611084,
"learning_rate": 7.04367816091954e-06,
"loss": 0.3491,
"step": 1440
},
{
"epoch": 0.82689556509299,
"grad_norm": 1.9522277116775513,
"learning_rate": 7.032183908045978e-06,
"loss": 0.3656,
"step": 1445
},
{
"epoch": 0.8297567954220315,
"grad_norm": 2.347623109817505,
"learning_rate": 7.020689655172414e-06,
"loss": 0.3174,
"step": 1450
},
{
"epoch": 0.8326180257510729,
"grad_norm": 2.204845428466797,
"learning_rate": 7.009195402298851e-06,
"loss": 0.3624,
"step": 1455
},
{
"epoch": 0.8354792560801144,
"grad_norm": 2.0607683658599854,
"learning_rate": 6.997701149425287e-06,
"loss": 0.3131,
"step": 1460
},
{
"epoch": 0.8383404864091559,
"grad_norm": 2.110132932662964,
"learning_rate": 6.986206896551725e-06,
"loss": 0.3632,
"step": 1465
},
{
"epoch": 0.8412017167381974,
"grad_norm": 1.96920645236969,
"learning_rate": 6.974712643678162e-06,
"loss": 0.3401,
"step": 1470
},
{
"epoch": 0.844062947067239,
"grad_norm": 3.4651010036468506,
"learning_rate": 6.963218390804598e-06,
"loss": 0.2866,
"step": 1475
},
{
"epoch": 0.8469241773962805,
"grad_norm": 2.317070484161377,
"learning_rate": 6.951724137931035e-06,
"loss": 0.3004,
"step": 1480
},
{
"epoch": 0.8497854077253219,
"grad_norm": 26.246292114257812,
"learning_rate": 6.940229885057472e-06,
"loss": 0.3586,
"step": 1485
},
{
"epoch": 0.8526466380543634,
"grad_norm": 1.979914665222168,
"learning_rate": 6.9287356321839086e-06,
"loss": 0.3052,
"step": 1490
},
{
"epoch": 0.8555078683834049,
"grad_norm": 2.253858804702759,
"learning_rate": 6.917241379310345e-06,
"loss": 0.3112,
"step": 1495
},
{
"epoch": 0.8583690987124464,
"grad_norm": 2.219684362411499,
"learning_rate": 6.9057471264367826e-06,
"loss": 0.2922,
"step": 1500
},
{
"epoch": 0.8583690987124464,
"eval_bleu": 0.5704059005886749,
"eval_cer": 17.589679043423537,
"eval_loss": 0.5921686291694641,
"eval_runtime": 472.2636,
"eval_samples_per_second": 2.801,
"eval_steps_per_second": 0.351,
"eval_wer": 31.27740972287021,
"step": 1500
},
{
"epoch": 0.8612303290414879,
"grad_norm": 1.9186010360717773,
"learning_rate": 6.894252873563218e-06,
"loss": 0.2853,
"step": 1505
},
{
"epoch": 0.8640915593705293,
"grad_norm": 2.261216402053833,
"learning_rate": 6.882758620689656e-06,
"loss": 0.3318,
"step": 1510
},
{
"epoch": 0.8669527896995708,
"grad_norm": 3.1048824787139893,
"learning_rate": 6.871264367816093e-06,
"loss": 0.3328,
"step": 1515
},
{
"epoch": 0.8698140200286123,
"grad_norm": 2.310605049133301,
"learning_rate": 6.859770114942529e-06,
"loss": 0.3264,
"step": 1520
},
{
"epoch": 0.8726752503576538,
"grad_norm": 1.8471814393997192,
"learning_rate": 6.848275862068966e-06,
"loss": 0.2892,
"step": 1525
},
{
"epoch": 0.8755364806866953,
"grad_norm": 2.1484620571136475,
"learning_rate": 6.836781609195403e-06,
"loss": 0.2602,
"step": 1530
},
{
"epoch": 0.8783977110157367,
"grad_norm": 1.8682204484939575,
"learning_rate": 6.8252873563218395e-06,
"loss": 0.303,
"step": 1535
},
{
"epoch": 0.8812589413447782,
"grad_norm": 3.347831964492798,
"learning_rate": 6.813793103448276e-06,
"loss": 0.2832,
"step": 1540
},
{
"epoch": 0.8841201716738197,
"grad_norm": 1.732176661491394,
"learning_rate": 6.8022988505747135e-06,
"loss": 0.2986,
"step": 1545
},
{
"epoch": 0.8869814020028612,
"grad_norm": 2.360858917236328,
"learning_rate": 6.790804597701151e-06,
"loss": 0.3109,
"step": 1550
},
{
"epoch": 0.8898426323319027,
"grad_norm": 1.6071327924728394,
"learning_rate": 6.779310344827587e-06,
"loss": 0.3077,
"step": 1555
},
{
"epoch": 0.8927038626609443,
"grad_norm": 1.632338047027588,
"learning_rate": 6.767816091954024e-06,
"loss": 0.3101,
"step": 1560
},
{
"epoch": 0.8955650929899857,
"grad_norm": 2.3832032680511475,
"learning_rate": 6.756321839080461e-06,
"loss": 0.3123,
"step": 1565
},
{
"epoch": 0.8984263233190272,
"grad_norm": 2.1065351963043213,
"learning_rate": 6.744827586206897e-06,
"loss": 0.3644,
"step": 1570
},
{
"epoch": 0.9012875536480687,
"grad_norm": 2.0931053161621094,
"learning_rate": 6.733333333333334e-06,
"loss": 0.3104,
"step": 1575
},
{
"epoch": 0.9041487839771102,
"grad_norm": 4.35342264175415,
"learning_rate": 6.721839080459771e-06,
"loss": 0.3217,
"step": 1580
},
{
"epoch": 0.9070100143061517,
"grad_norm": 1.878527283668518,
"learning_rate": 6.710344827586207e-06,
"loss": 0.3229,
"step": 1585
},
{
"epoch": 0.9098712446351931,
"grad_norm": 8.411993980407715,
"learning_rate": 6.698850574712644e-06,
"loss": 0.2923,
"step": 1590
},
{
"epoch": 0.9127324749642346,
"grad_norm": 1.510301113128662,
"learning_rate": 6.687356321839081e-06,
"loss": 0.336,
"step": 1595
},
{
"epoch": 0.9155937052932761,
"grad_norm": 2.0813026428222656,
"learning_rate": 6.6758620689655176e-06,
"loss": 0.3407,
"step": 1600
},
{
"epoch": 0.9184549356223176,
"grad_norm": 2.687464952468872,
"learning_rate": 6.664367816091954e-06,
"loss": 0.332,
"step": 1605
},
{
"epoch": 0.9213161659513591,
"grad_norm": 2.227113723754883,
"learning_rate": 6.6528735632183916e-06,
"loss": 0.3387,
"step": 1610
},
{
"epoch": 0.9241773962804005,
"grad_norm": 2.1991989612579346,
"learning_rate": 6.641379310344827e-06,
"loss": 0.35,
"step": 1615
},
{
"epoch": 0.927038626609442,
"grad_norm": 1.8104325532913208,
"learning_rate": 6.629885057471265e-06,
"loss": 0.298,
"step": 1620
},
{
"epoch": 0.9298998569384835,
"grad_norm": 1.8303800821304321,
"learning_rate": 6.618390804597702e-06,
"loss": 0.3369,
"step": 1625
},
{
"epoch": 0.932761087267525,
"grad_norm": 2.31982159614563,
"learning_rate": 6.606896551724139e-06,
"loss": 0.3661,
"step": 1630
},
{
"epoch": 0.9356223175965666,
"grad_norm": 1.976562261581421,
"learning_rate": 6.595402298850575e-06,
"loss": 0.3207,
"step": 1635
},
{
"epoch": 0.9384835479256081,
"grad_norm": 1.7467138767242432,
"learning_rate": 6.583908045977012e-06,
"loss": 0.3236,
"step": 1640
},
{
"epoch": 0.9413447782546495,
"grad_norm": 1.8210949897766113,
"learning_rate": 6.572413793103449e-06,
"loss": 0.3603,
"step": 1645
},
{
"epoch": 0.944206008583691,
"grad_norm": 1.9193000793457031,
"learning_rate": 6.560919540229885e-06,
"loss": 0.2887,
"step": 1650
},
{
"epoch": 0.9470672389127325,
"grad_norm": 2.1192359924316406,
"learning_rate": 6.5494252873563225e-06,
"loss": 0.2915,
"step": 1655
},
{
"epoch": 0.949928469241774,
"grad_norm": 1.9616992473602295,
"learning_rate": 6.53793103448276e-06,
"loss": 0.3274,
"step": 1660
},
{
"epoch": 0.9527896995708155,
"grad_norm": 2.124767780303955,
"learning_rate": 6.526436781609196e-06,
"loss": 0.2469,
"step": 1665
},
{
"epoch": 0.9556509298998569,
"grad_norm": 3.6808688640594482,
"learning_rate": 6.514942528735633e-06,
"loss": 0.3146,
"step": 1670
},
{
"epoch": 0.9585121602288984,
"grad_norm": 1.803134560585022,
"learning_rate": 6.50344827586207e-06,
"loss": 0.3169,
"step": 1675
},
{
"epoch": 0.9613733905579399,
"grad_norm": 2.043998956680298,
"learning_rate": 6.491954022988506e-06,
"loss": 0.2779,
"step": 1680
},
{
"epoch": 0.9642346208869814,
"grad_norm": 2.008289098739624,
"learning_rate": 6.480459770114943e-06,
"loss": 0.305,
"step": 1685
},
{
"epoch": 0.9670958512160229,
"grad_norm": 2.0637102127075195,
"learning_rate": 6.46896551724138e-06,
"loss": 0.3521,
"step": 1690
},
{
"epoch": 0.9699570815450643,
"grad_norm": 2.0706608295440674,
"learning_rate": 6.457471264367816e-06,
"loss": 0.3179,
"step": 1695
},
{
"epoch": 0.9728183118741058,
"grad_norm": 2.867837905883789,
"learning_rate": 6.445977011494253e-06,
"loss": 0.3405,
"step": 1700
},
{
"epoch": 0.9756795422031473,
"grad_norm": 2.0019872188568115,
"learning_rate": 6.43448275862069e-06,
"loss": 0.3149,
"step": 1705
},
{
"epoch": 0.9785407725321889,
"grad_norm": 2.19998836517334,
"learning_rate": 6.422988505747127e-06,
"loss": 0.2983,
"step": 1710
},
{
"epoch": 0.9814020028612304,
"grad_norm": 2.0203206539154053,
"learning_rate": 6.411494252873563e-06,
"loss": 0.3399,
"step": 1715
},
{
"epoch": 0.9842632331902719,
"grad_norm": 1.9604451656341553,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.2917,
"step": 1720
},
{
"epoch": 0.9871244635193133,
"grad_norm": 2.5613560676574707,
"learning_rate": 6.388505747126438e-06,
"loss": 0.3283,
"step": 1725
},
{
"epoch": 0.9899856938483548,
"grad_norm": 2.0251967906951904,
"learning_rate": 6.377011494252874e-06,
"loss": 0.3311,
"step": 1730
},
{
"epoch": 0.9928469241773963,
"grad_norm": 2.370732545852661,
"learning_rate": 6.365517241379311e-06,
"loss": 0.3488,
"step": 1735
},
{
"epoch": 0.9957081545064378,
"grad_norm": 2.151982545852661,
"learning_rate": 6.354022988505748e-06,
"loss": 0.322,
"step": 1740
},
{
"epoch": 0.9985693848354793,
"grad_norm": 5.340651988983154,
"learning_rate": 6.342528735632184e-06,
"loss": 0.3054,
"step": 1745
},
{
"epoch": 1.0011444921316166,
"grad_norm": 2.277435302734375,
"learning_rate": 6.331034482758621e-06,
"loss": 0.2416,
"step": 1750
},
{
"epoch": 1.004005722460658,
"grad_norm": 1.7808513641357422,
"learning_rate": 6.319540229885058e-06,
"loss": 0.2649,
"step": 1755
},
{
"epoch": 1.0068669527896996,
"grad_norm": 1.7711721658706665,
"learning_rate": 6.308045977011494e-06,
"loss": 0.2364,
"step": 1760
},
{
"epoch": 1.009728183118741,
"grad_norm": 1.7829340696334839,
"learning_rate": 6.2965517241379315e-06,
"loss": 0.2855,
"step": 1765
},
{
"epoch": 1.0125894134477826,
"grad_norm": 1.5565894842147827,
"learning_rate": 6.285057471264369e-06,
"loss": 0.2562,
"step": 1770
},
{
"epoch": 1.015450643776824,
"grad_norm": 1.6067863702774048,
"learning_rate": 6.273563218390805e-06,
"loss": 0.2253,
"step": 1775
},
{
"epoch": 1.0183118741058654,
"grad_norm": 2.1315195560455322,
"learning_rate": 6.262068965517242e-06,
"loss": 0.2312,
"step": 1780
},
{
"epoch": 1.021173104434907,
"grad_norm": 1.8657792806625366,
"learning_rate": 6.250574712643679e-06,
"loss": 0.247,
"step": 1785
},
{
"epoch": 1.0240343347639485,
"grad_norm": 3.063485860824585,
"learning_rate": 6.239080459770116e-06,
"loss": 0.2215,
"step": 1790
},
{
"epoch": 1.02689556509299,
"grad_norm": 1.955806016921997,
"learning_rate": 6.227586206896552e-06,
"loss": 0.2666,
"step": 1795
},
{
"epoch": 1.0297567954220315,
"grad_norm": 1.8338463306427002,
"learning_rate": 6.216091954022989e-06,
"loss": 0.2201,
"step": 1800
},
{
"epoch": 1.0326180257510729,
"grad_norm": 1.8974248170852661,
"learning_rate": 6.204597701149427e-06,
"loss": 0.2229,
"step": 1805
},
{
"epoch": 1.0354792560801145,
"grad_norm": 2.2553300857543945,
"learning_rate": 6.193103448275862e-06,
"loss": 0.2259,
"step": 1810
},
{
"epoch": 1.0383404864091559,
"grad_norm": 2.0794475078582764,
"learning_rate": 6.1816091954023e-06,
"loss": 0.2728,
"step": 1815
},
{
"epoch": 1.0412017167381975,
"grad_norm": 2.247518301010132,
"learning_rate": 6.170114942528736e-06,
"loss": 0.2729,
"step": 1820
},
{
"epoch": 1.044062947067239,
"grad_norm": 1.6274186372756958,
"learning_rate": 6.158620689655173e-06,
"loss": 0.2038,
"step": 1825
},
{
"epoch": 1.0469241773962803,
"grad_norm": 2.1243278980255127,
"learning_rate": 6.1471264367816096e-06,
"loss": 0.274,
"step": 1830
},
{
"epoch": 1.049785407725322,
"grad_norm": 1.9605655670166016,
"learning_rate": 6.135632183908047e-06,
"loss": 0.2541,
"step": 1835
},
{
"epoch": 1.0526466380543633,
"grad_norm": 1.710223913192749,
"learning_rate": 6.124137931034483e-06,
"loss": 0.21,
"step": 1840
},
{
"epoch": 1.055507868383405,
"grad_norm": 1.8919130563735962,
"learning_rate": 6.11264367816092e-06,
"loss": 0.2507,
"step": 1845
},
{
"epoch": 1.0583690987124463,
"grad_norm": 1.8515825271606445,
"learning_rate": 6.101149425287357e-06,
"loss": 0.2385,
"step": 1850
},
{
"epoch": 1.061230329041488,
"grad_norm": 1.883626103401184,
"learning_rate": 6.089655172413793e-06,
"loss": 0.2451,
"step": 1855
},
{
"epoch": 1.0640915593705293,
"grad_norm": 1.564466953277588,
"learning_rate": 6.07816091954023e-06,
"loss": 0.2471,
"step": 1860
},
{
"epoch": 1.0669527896995707,
"grad_norm": 1.9687753915786743,
"learning_rate": 6.066666666666667e-06,
"loss": 0.2778,
"step": 1865
},
{
"epoch": 1.0698140200286124,
"grad_norm": 2.0385849475860596,
"learning_rate": 6.055172413793105e-06,
"loss": 0.2365,
"step": 1870
},
{
"epoch": 1.0726752503576538,
"grad_norm": 2.087181568145752,
"learning_rate": 6.0436781609195405e-06,
"loss": 0.2367,
"step": 1875
},
{
"epoch": 1.0755364806866954,
"grad_norm": 1.9057412147521973,
"learning_rate": 6.032183908045978e-06,
"loss": 0.2579,
"step": 1880
},
{
"epoch": 1.0783977110157368,
"grad_norm": 1.5374144315719604,
"learning_rate": 6.0206896551724145e-06,
"loss": 0.2296,
"step": 1885
},
{
"epoch": 1.0812589413447782,
"grad_norm": 1.5945782661437988,
"learning_rate": 6.009195402298851e-06,
"loss": 0.2564,
"step": 1890
},
{
"epoch": 1.0841201716738198,
"grad_norm": 2.265857219696045,
"learning_rate": 5.997701149425288e-06,
"loss": 0.2523,
"step": 1895
},
{
"epoch": 1.0869814020028612,
"grad_norm": 1.6930688619613647,
"learning_rate": 5.986206896551725e-06,
"loss": 0.2054,
"step": 1900
},
{
"epoch": 1.0898426323319028,
"grad_norm": 2.441913604736328,
"learning_rate": 5.974712643678161e-06,
"loss": 0.2328,
"step": 1905
},
{
"epoch": 1.0927038626609442,
"grad_norm": 1.6199525594711304,
"learning_rate": 5.963218390804598e-06,
"loss": 0.2068,
"step": 1910
},
{
"epoch": 1.0955650929899856,
"grad_norm": 1.6580902338027954,
"learning_rate": 5.951724137931036e-06,
"loss": 0.2232,
"step": 1915
},
{
"epoch": 1.0984263233190272,
"grad_norm": 1.8525216579437256,
"learning_rate": 5.940229885057471e-06,
"loss": 0.2504,
"step": 1920
},
{
"epoch": 1.1012875536480686,
"grad_norm": 15.594454765319824,
"learning_rate": 5.928735632183909e-06,
"loss": 0.2506,
"step": 1925
},
{
"epoch": 1.1041487839771102,
"grad_norm": 1.6737009286880493,
"learning_rate": 5.917241379310345e-06,
"loss": 0.2715,
"step": 1930
},
{
"epoch": 1.1070100143061516,
"grad_norm": 1.895065426826477,
"learning_rate": 5.905747126436782e-06,
"loss": 0.2166,
"step": 1935
},
{
"epoch": 1.109871244635193,
"grad_norm": 1.8196299076080322,
"learning_rate": 5.8942528735632186e-06,
"loss": 0.2583,
"step": 1940
},
{
"epoch": 1.1127324749642347,
"grad_norm": 2.042278289794922,
"learning_rate": 5.882758620689656e-06,
"loss": 0.2847,
"step": 1945
},
{
"epoch": 1.115593705293276,
"grad_norm": 2.381350517272949,
"learning_rate": 5.871264367816092e-06,
"loss": 0.2275,
"step": 1950
},
{
"epoch": 1.1184549356223177,
"grad_norm": 1.5981552600860596,
"learning_rate": 5.859770114942529e-06,
"loss": 0.2241,
"step": 1955
},
{
"epoch": 1.121316165951359,
"grad_norm": 1.9198013544082642,
"learning_rate": 5.848275862068966e-06,
"loss": 0.2101,
"step": 1960
},
{
"epoch": 1.1241773962804005,
"grad_norm": 11.413874626159668,
"learning_rate": 5.836781609195403e-06,
"loss": 0.3174,
"step": 1965
},
{
"epoch": 1.127038626609442,
"grad_norm": 2.361424446105957,
"learning_rate": 5.825287356321839e-06,
"loss": 0.2478,
"step": 1970
},
{
"epoch": 1.1298998569384835,
"grad_norm": 2.057995080947876,
"learning_rate": 5.813793103448276e-06,
"loss": 0.2354,
"step": 1975
},
{
"epoch": 1.1327610872675251,
"grad_norm": 1.7415863275527954,
"learning_rate": 5.802298850574714e-06,
"loss": 0.2048,
"step": 1980
},
{
"epoch": 1.1356223175965665,
"grad_norm": 1.927140235900879,
"learning_rate": 5.7908045977011495e-06,
"loss": 0.2467,
"step": 1985
},
{
"epoch": 1.138483547925608,
"grad_norm": 1.8381935358047485,
"learning_rate": 5.779310344827587e-06,
"loss": 0.2603,
"step": 1990
},
{
"epoch": 1.1413447782546495,
"grad_norm": 2.509054660797119,
"learning_rate": 5.7678160919540235e-06,
"loss": 0.2359,
"step": 1995
},
{
"epoch": 1.144206008583691,
"grad_norm": 2.898055076599121,
"learning_rate": 5.75632183908046e-06,
"loss": 0.2451,
"step": 2000
},
{
"epoch": 1.144206008583691,
"eval_bleu": 0.5963992770681681,
"eval_cer": 15.110581677604962,
"eval_loss": 0.5831637382507324,
"eval_runtime": 451.8627,
"eval_samples_per_second": 2.928,
"eval_steps_per_second": 0.367,
"eval_wer": 28.156200429224597,
"step": 2000
},
{
"epoch": 1.1470672389127325,
"grad_norm": 3.072053909301758,
"learning_rate": 5.744827586206897e-06,
"loss": 0.2566,
"step": 2005
},
{
"epoch": 1.149928469241774,
"grad_norm": 1.6033082008361816,
"learning_rate": 5.733333333333334e-06,
"loss": 0.2582,
"step": 2010
},
{
"epoch": 1.1527896995708153,
"grad_norm": 13.557411193847656,
"learning_rate": 5.72183908045977e-06,
"loss": 0.2502,
"step": 2015
},
{
"epoch": 1.155650929899857,
"grad_norm": 1.9101053476333618,
"learning_rate": 5.710344827586207e-06,
"loss": 0.2492,
"step": 2020
},
{
"epoch": 1.1585121602288984,
"grad_norm": 1.8975669145584106,
"learning_rate": 5.698850574712645e-06,
"loss": 0.2205,
"step": 2025
},
{
"epoch": 1.16137339055794,
"grad_norm": 7.335193157196045,
"learning_rate": 5.68735632183908e-06,
"loss": 0.2087,
"step": 2030
},
{
"epoch": 1.1642346208869814,
"grad_norm": 2.010817050933838,
"learning_rate": 5.675862068965518e-06,
"loss": 0.2717,
"step": 2035
},
{
"epoch": 1.1670958512160228,
"grad_norm": 1.9336493015289307,
"learning_rate": 5.664367816091954e-06,
"loss": 0.202,
"step": 2040
},
{
"epoch": 1.1699570815450644,
"grad_norm": 1.725829839706421,
"learning_rate": 5.652873563218392e-06,
"loss": 0.2232,
"step": 2045
},
{
"epoch": 1.1728183118741058,
"grad_norm": 1.7198960781097412,
"learning_rate": 5.6413793103448275e-06,
"loss": 0.2283,
"step": 2050
},
{
"epoch": 1.1756795422031474,
"grad_norm": 2.4473893642425537,
"learning_rate": 5.629885057471265e-06,
"loss": 0.2511,
"step": 2055
},
{
"epoch": 1.1785407725321888,
"grad_norm": 1.8747438192367554,
"learning_rate": 5.618390804597702e-06,
"loss": 0.2217,
"step": 2060
},
{
"epoch": 1.1814020028612302,
"grad_norm": 1.8291605710983276,
"learning_rate": 5.606896551724138e-06,
"loss": 0.2128,
"step": 2065
},
{
"epoch": 1.1842632331902718,
"grad_norm": 1.9345383644104004,
"learning_rate": 5.5954022988505756e-06,
"loss": 0.2282,
"step": 2070
},
{
"epoch": 1.1871244635193132,
"grad_norm": 1.9912608861923218,
"learning_rate": 5.583908045977012e-06,
"loss": 0.2305,
"step": 2075
},
{
"epoch": 1.1899856938483548,
"grad_norm": 1.7613855600357056,
"learning_rate": 5.572413793103449e-06,
"loss": 0.2152,
"step": 2080
},
{
"epoch": 1.1928469241773962,
"grad_norm": 1.9164098501205444,
"learning_rate": 5.560919540229885e-06,
"loss": 0.2385,
"step": 2085
},
{
"epoch": 1.1957081545064379,
"grad_norm": 1.8758496046066284,
"learning_rate": 5.549425287356323e-06,
"loss": 0.2197,
"step": 2090
},
{
"epoch": 1.1985693848354793,
"grad_norm": 1.8170535564422607,
"learning_rate": 5.5379310344827585e-06,
"loss": 0.2273,
"step": 2095
},
{
"epoch": 1.2014306151645207,
"grad_norm": 4.734244346618652,
"learning_rate": 5.526436781609196e-06,
"loss": 0.2379,
"step": 2100
},
{
"epoch": 1.2042918454935623,
"grad_norm": 1.7855401039123535,
"learning_rate": 5.5149425287356325e-06,
"loss": 0.2144,
"step": 2105
},
{
"epoch": 1.2071530758226037,
"grad_norm": 1.7985947132110596,
"learning_rate": 5.503448275862069e-06,
"loss": 0.2322,
"step": 2110
},
{
"epoch": 1.2100143061516453,
"grad_norm": 2.8456921577453613,
"learning_rate": 5.491954022988506e-06,
"loss": 0.2486,
"step": 2115
},
{
"epoch": 1.2128755364806867,
"grad_norm": 1.7646148204803467,
"learning_rate": 5.480459770114943e-06,
"loss": 0.2119,
"step": 2120
},
{
"epoch": 1.215736766809728,
"grad_norm": 1.7737618684768677,
"learning_rate": 5.4689655172413805e-06,
"loss": 0.2084,
"step": 2125
},
{
"epoch": 1.2185979971387697,
"grad_norm": 2.054755926132202,
"learning_rate": 5.457471264367816e-06,
"loss": 0.2324,
"step": 2130
},
{
"epoch": 1.221459227467811,
"grad_norm": 2.3212740421295166,
"learning_rate": 5.445977011494254e-06,
"loss": 0.2808,
"step": 2135
},
{
"epoch": 1.2243204577968527,
"grad_norm": 2.056720495223999,
"learning_rate": 5.43448275862069e-06,
"loss": 0.2789,
"step": 2140
},
{
"epoch": 1.2271816881258941,
"grad_norm": 2.018855333328247,
"learning_rate": 5.422988505747127e-06,
"loss": 0.2229,
"step": 2145
},
{
"epoch": 1.2300429184549357,
"grad_norm": 1.7383620738983154,
"learning_rate": 5.411494252873563e-06,
"loss": 0.2371,
"step": 2150
},
{
"epoch": 1.2329041487839771,
"grad_norm": 6.553476333618164,
"learning_rate": 5.400000000000001e-06,
"loss": 0.264,
"step": 2155
},
{
"epoch": 1.2357653791130185,
"grad_norm": 6.118597984313965,
"learning_rate": 5.3885057471264365e-06,
"loss": 0.2769,
"step": 2160
},
{
"epoch": 1.2386266094420602,
"grad_norm": 1.9519344568252563,
"learning_rate": 5.377011494252874e-06,
"loss": 0.2316,
"step": 2165
},
{
"epoch": 1.2414878397711016,
"grad_norm": 2.161031723022461,
"learning_rate": 5.365517241379311e-06,
"loss": 0.2351,
"step": 2170
},
{
"epoch": 1.2443490701001432,
"grad_norm": 1.8503310680389404,
"learning_rate": 5.354022988505747e-06,
"loss": 0.2303,
"step": 2175
},
{
"epoch": 1.2472103004291846,
"grad_norm": 2.138127088546753,
"learning_rate": 5.3425287356321846e-06,
"loss": 0.2353,
"step": 2180
},
{
"epoch": 1.250071530758226,
"grad_norm": 1.747727632522583,
"learning_rate": 5.331034482758621e-06,
"loss": 0.25,
"step": 2185
},
{
"epoch": 1.2529327610872676,
"grad_norm": 2.1394238471984863,
"learning_rate": 5.319540229885058e-06,
"loss": 0.2618,
"step": 2190
},
{
"epoch": 1.255793991416309,
"grad_norm": 2.055069923400879,
"learning_rate": 5.308045977011494e-06,
"loss": 0.28,
"step": 2195
},
{
"epoch": 1.2586552217453506,
"grad_norm": 2.0998175144195557,
"learning_rate": 5.296551724137932e-06,
"loss": 0.2549,
"step": 2200
},
{
"epoch": 1.261516452074392,
"grad_norm": 3.816950798034668,
"learning_rate": 5.285057471264369e-06,
"loss": 0.2376,
"step": 2205
},
{
"epoch": 1.2643776824034334,
"grad_norm": 2.042818307876587,
"learning_rate": 5.273563218390805e-06,
"loss": 0.2322,
"step": 2210
},
{
"epoch": 1.267238912732475,
"grad_norm": 1.8918198347091675,
"learning_rate": 5.2620689655172415e-06,
"loss": 0.2178,
"step": 2215
},
{
"epoch": 1.2701001430615164,
"grad_norm": 1.6626828908920288,
"learning_rate": 5.250574712643679e-06,
"loss": 0.1998,
"step": 2220
},
{
"epoch": 1.272961373390558,
"grad_norm": 1.6923973560333252,
"learning_rate": 5.239080459770115e-06,
"loss": 0.2257,
"step": 2225
},
{
"epoch": 1.2758226037195994,
"grad_norm": 1.5752869844436646,
"learning_rate": 5.227586206896552e-06,
"loss": 0.1856,
"step": 2230
},
{
"epoch": 1.2786838340486408,
"grad_norm": 2.132702350616455,
"learning_rate": 5.2160919540229895e-06,
"loss": 0.2622,
"step": 2235
},
{
"epoch": 1.2815450643776825,
"grad_norm": 1.4981218576431274,
"learning_rate": 5.204597701149425e-06,
"loss": 0.2241,
"step": 2240
},
{
"epoch": 1.2844062947067239,
"grad_norm": 2.2513628005981445,
"learning_rate": 5.193103448275863e-06,
"loss": 0.2307,
"step": 2245
},
{
"epoch": 1.2872675250357655,
"grad_norm": 1.824265718460083,
"learning_rate": 5.181609195402299e-06,
"loss": 0.2252,
"step": 2250
},
{
"epoch": 1.2901287553648069,
"grad_norm": 1.9756693840026855,
"learning_rate": 5.170114942528736e-06,
"loss": 0.2284,
"step": 2255
},
{
"epoch": 1.2929899856938483,
"grad_norm": 1.7013108730316162,
"learning_rate": 5.158620689655172e-06,
"loss": 0.2399,
"step": 2260
},
{
"epoch": 1.2958512160228899,
"grad_norm": 1.9803868532180786,
"learning_rate": 5.14712643678161e-06,
"loss": 0.2376,
"step": 2265
},
{
"epoch": 1.2987124463519313,
"grad_norm": 1.7983365058898926,
"learning_rate": 5.1356321839080455e-06,
"loss": 0.2461,
"step": 2270
},
{
"epoch": 1.301573676680973,
"grad_norm": 1.6878979206085205,
"learning_rate": 5.124137931034483e-06,
"loss": 0.2415,
"step": 2275
},
{
"epoch": 1.3044349070100143,
"grad_norm": 1.693176507949829,
"learning_rate": 5.11264367816092e-06,
"loss": 0.2007,
"step": 2280
},
{
"epoch": 1.3072961373390557,
"grad_norm": 2.423611640930176,
"learning_rate": 5.101149425287357e-06,
"loss": 0.2614,
"step": 2285
},
{
"epoch": 1.3101573676680973,
"grad_norm": 1.6612671613693237,
"learning_rate": 5.0896551724137936e-06,
"loss": 0.206,
"step": 2290
},
{
"epoch": 1.3130185979971387,
"grad_norm": 1.9878997802734375,
"learning_rate": 5.07816091954023e-06,
"loss": 0.214,
"step": 2295
},
{
"epoch": 1.3158798283261803,
"grad_norm": 2.2919623851776123,
"learning_rate": 5.0666666666666676e-06,
"loss": 0.212,
"step": 2300
},
{
"epoch": 1.3187410586552217,
"grad_norm": 1.8216506242752075,
"learning_rate": 5.055172413793103e-06,
"loss": 0.2002,
"step": 2305
},
{
"epoch": 1.3216022889842631,
"grad_norm": 2.561490058898926,
"learning_rate": 5.043678160919541e-06,
"loss": 0.2258,
"step": 2310
},
{
"epoch": 1.3244635193133047,
"grad_norm": 1.7542258501052856,
"learning_rate": 5.032183908045978e-06,
"loss": 0.2214,
"step": 2315
},
{
"epoch": 1.3273247496423461,
"grad_norm": 13.024736404418945,
"learning_rate": 5.020689655172414e-06,
"loss": 0.2181,
"step": 2320
},
{
"epoch": 1.3301859799713878,
"grad_norm": 1.8888355493545532,
"learning_rate": 5.009195402298851e-06,
"loss": 0.213,
"step": 2325
},
{
"epoch": 1.3330472103004292,
"grad_norm": 1.8508597612380981,
"learning_rate": 4.997701149425288e-06,
"loss": 0.2143,
"step": 2330
},
{
"epoch": 1.3359084406294706,
"grad_norm": 2.070064067840576,
"learning_rate": 4.9862068965517245e-06,
"loss": 0.2511,
"step": 2335
},
{
"epoch": 1.3387696709585122,
"grad_norm": 2.605440378189087,
"learning_rate": 4.974712643678161e-06,
"loss": 0.2259,
"step": 2340
},
{
"epoch": 1.3416309012875536,
"grad_norm": 1.8583369255065918,
"learning_rate": 4.9632183908045985e-06,
"loss": 0.2394,
"step": 2345
},
{
"epoch": 1.3444921316165952,
"grad_norm": 2.5312728881835938,
"learning_rate": 4.951724137931035e-06,
"loss": 0.2218,
"step": 2350
},
{
"epoch": 1.3473533619456366,
"grad_norm": 2.2665977478027344,
"learning_rate": 4.940229885057472e-06,
"loss": 0.2255,
"step": 2355
},
{
"epoch": 1.350214592274678,
"grad_norm": 1.9536534547805786,
"learning_rate": 4.928735632183908e-06,
"loss": 0.2055,
"step": 2360
},
{
"epoch": 1.3530758226037196,
"grad_norm": 2.3409595489501953,
"learning_rate": 4.917241379310345e-06,
"loss": 0.234,
"step": 2365
},
{
"epoch": 1.355937052932761,
"grad_norm": 1.9503809213638306,
"learning_rate": 4.905747126436781e-06,
"loss": 0.221,
"step": 2370
},
{
"epoch": 1.3587982832618026,
"grad_norm": 1.5935649871826172,
"learning_rate": 4.894252873563219e-06,
"loss": 0.2168,
"step": 2375
},
{
"epoch": 1.361659513590844,
"grad_norm": 1.8054594993591309,
"learning_rate": 4.882758620689655e-06,
"loss": 0.2136,
"step": 2380
},
{
"epoch": 1.3645207439198854,
"grad_norm": 2.6980879306793213,
"learning_rate": 4.871264367816093e-06,
"loss": 0.2095,
"step": 2385
},
{
"epoch": 1.367381974248927,
"grad_norm": 1.9824919700622559,
"learning_rate": 4.859770114942529e-06,
"loss": 0.2201,
"step": 2390
},
{
"epoch": 1.3702432045779684,
"grad_norm": 1.7837703227996826,
"learning_rate": 4.848275862068966e-06,
"loss": 0.2373,
"step": 2395
},
{
"epoch": 1.37310443490701,
"grad_norm": 3.7595748901367188,
"learning_rate": 4.8367816091954026e-06,
"loss": 0.206,
"step": 2400
},
{
"epoch": 1.3759656652360515,
"grad_norm": 2.127598762512207,
"learning_rate": 4.825287356321839e-06,
"loss": 0.1929,
"step": 2405
},
{
"epoch": 1.3788268955650929,
"grad_norm": 2.089553117752075,
"learning_rate": 4.813793103448276e-06,
"loss": 0.2033,
"step": 2410
},
{
"epoch": 1.3816881258941345,
"grad_norm": 1.8560612201690674,
"learning_rate": 4.802298850574713e-06,
"loss": 0.1716,
"step": 2415
},
{
"epoch": 1.384549356223176,
"grad_norm": 1.9387139081954956,
"learning_rate": 4.79080459770115e-06,
"loss": 0.1935,
"step": 2420
},
{
"epoch": 1.3874105865522175,
"grad_norm": 1.860589623451233,
"learning_rate": 4.779310344827587e-06,
"loss": 0.2101,
"step": 2425
},
{
"epoch": 1.390271816881259,
"grad_norm": 1.9807707071304321,
"learning_rate": 4.767816091954024e-06,
"loss": 0.2143,
"step": 2430
},
{
"epoch": 1.3931330472103003,
"grad_norm": 1.8010809421539307,
"learning_rate": 4.75632183908046e-06,
"loss": 0.1802,
"step": 2435
},
{
"epoch": 1.395994277539342,
"grad_norm": 2.161853790283203,
"learning_rate": 4.744827586206897e-06,
"loss": 0.1936,
"step": 2440
},
{
"epoch": 1.3988555078683835,
"grad_norm": 2.5427753925323486,
"learning_rate": 4.7333333333333335e-06,
"loss": 0.2381,
"step": 2445
},
{
"epoch": 1.401716738197425,
"grad_norm": 1.8699232339859009,
"learning_rate": 4.72183908045977e-06,
"loss": 0.1974,
"step": 2450
},
{
"epoch": 1.4045779685264663,
"grad_norm": 2.080120086669922,
"learning_rate": 4.7103448275862075e-06,
"loss": 0.2287,
"step": 2455
},
{
"epoch": 1.407439198855508,
"grad_norm": 4.648638725280762,
"learning_rate": 4.698850574712644e-06,
"loss": 0.1988,
"step": 2460
},
{
"epoch": 1.4103004291845493,
"grad_norm": 1.942683458328247,
"learning_rate": 4.6873563218390815e-06,
"loss": 0.2285,
"step": 2465
},
{
"epoch": 1.413161659513591,
"grad_norm": 1.915105938911438,
"learning_rate": 4.675862068965517e-06,
"loss": 0.2113,
"step": 2470
},
{
"epoch": 1.4160228898426324,
"grad_norm": 1.8002665042877197,
"learning_rate": 4.664367816091954e-06,
"loss": 0.2011,
"step": 2475
},
{
"epoch": 1.4188841201716738,
"grad_norm": 1.9071236848831177,
"learning_rate": 4.652873563218391e-06,
"loss": 0.1968,
"step": 2480
},
{
"epoch": 1.4217453505007154,
"grad_norm": 1.786940097808838,
"learning_rate": 4.641379310344828e-06,
"loss": 0.2006,
"step": 2485
},
{
"epoch": 1.4246065808297568,
"grad_norm": 2.5510807037353516,
"learning_rate": 4.629885057471264e-06,
"loss": 0.2088,
"step": 2490
},
{
"epoch": 1.4274678111587984,
"grad_norm": 1.7373242378234863,
"learning_rate": 4.618390804597702e-06,
"loss": 0.2007,
"step": 2495
},
{
"epoch": 1.4303290414878398,
"grad_norm": 1.6989120244979858,
"learning_rate": 4.606896551724138e-06,
"loss": 0.2401,
"step": 2500
},
{
"epoch": 1.4303290414878398,
"eval_bleu": 0.5879103462796057,
"eval_cer": 16.617219575054694,
"eval_loss": 0.5721695423126221,
"eval_runtime": 484.1918,
"eval_samples_per_second": 2.732,
"eval_steps_per_second": 0.343,
"eval_wer": 29.817112998040496,
"step": 2500
},
{
"epoch": 1.4331902718168812,
"grad_norm": 1.815650463104248,
"learning_rate": 4.595402298850575e-06,
"loss": 0.1995,
"step": 2505
},
{
"epoch": 1.4360515021459228,
"grad_norm": 1.6936380863189697,
"learning_rate": 4.5839080459770116e-06,
"loss": 0.2073,
"step": 2510
},
{
"epoch": 1.4389127324749642,
"grad_norm": 2.0940747261047363,
"learning_rate": 4.572413793103448e-06,
"loss": 0.2513,
"step": 2515
},
{
"epoch": 1.4417739628040058,
"grad_norm": 1.7273896932601929,
"learning_rate": 4.5609195402298856e-06,
"loss": 0.2056,
"step": 2520
},
{
"epoch": 1.4446351931330472,
"grad_norm": 1.7717649936676025,
"learning_rate": 4.549425287356322e-06,
"loss": 0.2089,
"step": 2525
},
{
"epoch": 1.4474964234620886,
"grad_norm": 2.0581321716308594,
"learning_rate": 4.537931034482759e-06,
"loss": 0.2207,
"step": 2530
},
{
"epoch": 1.4503576537911302,
"grad_norm": 2.054208517074585,
"learning_rate": 4.526436781609196e-06,
"loss": 0.2087,
"step": 2535
},
{
"epoch": 1.4532188841201716,
"grad_norm": 2.047858953475952,
"learning_rate": 4.514942528735633e-06,
"loss": 0.2405,
"step": 2540
},
{
"epoch": 1.4560801144492133,
"grad_norm": 1.9581420421600342,
"learning_rate": 4.503448275862069e-06,
"loss": 0.1779,
"step": 2545
},
{
"epoch": 1.4589413447782547,
"grad_norm": 2.9552524089813232,
"learning_rate": 4.491954022988506e-06,
"loss": 0.2099,
"step": 2550
},
{
"epoch": 1.461802575107296,
"grad_norm": 18.079795837402344,
"learning_rate": 4.4804597701149425e-06,
"loss": 0.2165,
"step": 2555
},
{
"epoch": 1.4646638054363377,
"grad_norm": 1.990879774093628,
"learning_rate": 4.46896551724138e-06,
"loss": 0.2078,
"step": 2560
},
{
"epoch": 1.467525035765379,
"grad_norm": 1.542114019393921,
"learning_rate": 4.4574712643678165e-06,
"loss": 0.1974,
"step": 2565
},
{
"epoch": 1.4703862660944207,
"grad_norm": 1.6736189126968384,
"learning_rate": 4.445977011494253e-06,
"loss": 0.1994,
"step": 2570
},
{
"epoch": 1.473247496423462,
"grad_norm": 1.9050010442733765,
"learning_rate": 4.4344827586206905e-06,
"loss": 0.1968,
"step": 2575
},
{
"epoch": 1.4761087267525035,
"grad_norm": 2.1282589435577393,
"learning_rate": 4.422988505747127e-06,
"loss": 0.2174,
"step": 2580
},
{
"epoch": 1.478969957081545,
"grad_norm": 2.1793580055236816,
"learning_rate": 4.411494252873564e-06,
"loss": 0.2112,
"step": 2585
},
{
"epoch": 1.4818311874105865,
"grad_norm": 1.7508175373077393,
"learning_rate": 4.4e-06,
"loss": 0.2173,
"step": 2590
},
{
"epoch": 1.4846924177396281,
"grad_norm": 1.8711589574813843,
"learning_rate": 4.388505747126437e-06,
"loss": 0.1944,
"step": 2595
},
{
"epoch": 1.4875536480686695,
"grad_norm": 1.7508400678634644,
"learning_rate": 4.377011494252874e-06,
"loss": 0.2124,
"step": 2600
},
{
"epoch": 1.490414878397711,
"grad_norm": 1.943958044052124,
"learning_rate": 4.365517241379311e-06,
"loss": 0.1899,
"step": 2605
},
{
"epoch": 1.4932761087267525,
"grad_norm": 1.783746361732483,
"learning_rate": 4.354022988505747e-06,
"loss": 0.2267,
"step": 2610
},
{
"epoch": 1.496137339055794,
"grad_norm": 2.324143171310425,
"learning_rate": 4.342528735632184e-06,
"loss": 0.2066,
"step": 2615
},
{
"epoch": 1.4989985693848356,
"grad_norm": 6.195375442504883,
"learning_rate": 4.3310344827586206e-06,
"loss": 0.2285,
"step": 2620
},
{
"epoch": 1.501859799713877,
"grad_norm": 1.9747071266174316,
"learning_rate": 4.319540229885058e-06,
"loss": 0.2448,
"step": 2625
},
{
"epoch": 1.5047210300429184,
"grad_norm": 2.495802640914917,
"learning_rate": 4.3080459770114946e-06,
"loss": 0.1911,
"step": 2630
},
{
"epoch": 1.50758226037196,
"grad_norm": 2.073364019393921,
"learning_rate": 4.296551724137931e-06,
"loss": 0.2217,
"step": 2635
},
{
"epoch": 1.5104434907010016,
"grad_norm": 1.9022804498672485,
"learning_rate": 4.2850574712643686e-06,
"loss": 0.198,
"step": 2640
},
{
"epoch": 1.513304721030043,
"grad_norm": 1.475517988204956,
"learning_rate": 4.273563218390805e-06,
"loss": 0.2037,
"step": 2645
},
{
"epoch": 1.5161659513590844,
"grad_norm": 1.7946468591690063,
"learning_rate": 4.262068965517242e-06,
"loss": 0.199,
"step": 2650
},
{
"epoch": 1.5190271816881258,
"grad_norm": 13.506173133850098,
"learning_rate": 4.250574712643678e-06,
"loss": 0.236,
"step": 2655
},
{
"epoch": 1.5218884120171674,
"grad_norm": 2.0159194469451904,
"learning_rate": 4.239080459770115e-06,
"loss": 0.2094,
"step": 2660
},
{
"epoch": 1.524749642346209,
"grad_norm": 1.893707036972046,
"learning_rate": 4.227586206896552e-06,
"loss": 0.2002,
"step": 2665
},
{
"epoch": 1.5276108726752504,
"grad_norm": 1.8341419696807861,
"learning_rate": 4.216091954022989e-06,
"loss": 0.2072,
"step": 2670
},
{
"epoch": 1.5304721030042918,
"grad_norm": 1.940000057220459,
"learning_rate": 4.2045977011494255e-06,
"loss": 0.2211,
"step": 2675
},
{
"epoch": 1.5333333333333332,
"grad_norm": 2.36838698387146,
"learning_rate": 4.193103448275863e-06,
"loss": 0.2139,
"step": 2680
},
{
"epoch": 1.5361945636623748,
"grad_norm": 1.875691294670105,
"learning_rate": 4.1816091954022995e-06,
"loss": 0.1754,
"step": 2685
},
{
"epoch": 1.5390557939914165,
"grad_norm": 1.5981868505477905,
"learning_rate": 4.170114942528736e-06,
"loss": 0.1998,
"step": 2690
},
{
"epoch": 1.5419170243204579,
"grad_norm": 1.8597708940505981,
"learning_rate": 4.158620689655173e-06,
"loss": 0.2081,
"step": 2695
},
{
"epoch": 1.5447782546494992,
"grad_norm": 2.2451958656311035,
"learning_rate": 4.147126436781609e-06,
"loss": 0.2084,
"step": 2700
},
{
"epoch": 1.5476394849785406,
"grad_norm": 1.8167076110839844,
"learning_rate": 4.135632183908047e-06,
"loss": 0.193,
"step": 2705
},
{
"epoch": 1.5505007153075823,
"grad_norm": 2.029426097869873,
"learning_rate": 4.124137931034483e-06,
"loss": 0.2223,
"step": 2710
},
{
"epoch": 1.5533619456366239,
"grad_norm": 1.903867483139038,
"learning_rate": 4.11264367816092e-06,
"loss": 0.2066,
"step": 2715
},
{
"epoch": 1.5562231759656653,
"grad_norm": 2.1051104068756104,
"learning_rate": 4.101149425287357e-06,
"loss": 0.2131,
"step": 2720
},
{
"epoch": 1.5590844062947067,
"grad_norm": 1.787436842918396,
"learning_rate": 4.089655172413794e-06,
"loss": 0.2099,
"step": 2725
},
{
"epoch": 1.561945636623748,
"grad_norm": 1.9237557649612427,
"learning_rate": 4.0781609195402295e-06,
"loss": 0.1873,
"step": 2730
},
{
"epoch": 1.5648068669527897,
"grad_norm": 1.709716558456421,
"learning_rate": 4.066666666666667e-06,
"loss": 0.1762,
"step": 2735
},
{
"epoch": 1.5676680972818313,
"grad_norm": 2.1726202964782715,
"learning_rate": 4.0551724137931036e-06,
"loss": 0.1982,
"step": 2740
},
{
"epoch": 1.5705293276108727,
"grad_norm": 1.8287854194641113,
"learning_rate": 4.043678160919541e-06,
"loss": 0.168,
"step": 2745
},
{
"epoch": 1.5733905579399141,
"grad_norm": 2.111295223236084,
"learning_rate": 4.0321839080459776e-06,
"loss": 0.1783,
"step": 2750
},
{
"epoch": 1.5762517882689555,
"grad_norm": 3.8943326473236084,
"learning_rate": 4.020689655172414e-06,
"loss": 0.2125,
"step": 2755
},
{
"epoch": 1.5791130185979971,
"grad_norm": 1.7108508348464966,
"learning_rate": 4.009195402298851e-06,
"loss": 0.1765,
"step": 2760
},
{
"epoch": 1.5819742489270388,
"grad_norm": 1.863000750541687,
"learning_rate": 3.997701149425287e-06,
"loss": 0.2175,
"step": 2765
},
{
"epoch": 1.5848354792560801,
"grad_norm": 2.054055690765381,
"learning_rate": 3.986206896551724e-06,
"loss": 0.2081,
"step": 2770
},
{
"epoch": 1.5876967095851215,
"grad_norm": 7.339261054992676,
"learning_rate": 3.974712643678161e-06,
"loss": 0.1797,
"step": 2775
},
{
"epoch": 1.590557939914163,
"grad_norm": 1.821616530418396,
"learning_rate": 3.963218390804598e-06,
"loss": 0.1712,
"step": 2780
},
{
"epoch": 1.5934191702432046,
"grad_norm": 4.537174701690674,
"learning_rate": 3.951724137931035e-06,
"loss": 0.2074,
"step": 2785
},
{
"epoch": 1.5962804005722462,
"grad_norm": 1.9394702911376953,
"learning_rate": 3.940229885057472e-06,
"loss": 0.1899,
"step": 2790
},
{
"epoch": 1.5991416309012876,
"grad_norm": 2.021707773208618,
"learning_rate": 3.9287356321839085e-06,
"loss": 0.2004,
"step": 2795
},
{
"epoch": 1.602002861230329,
"grad_norm": 2.337878942489624,
"learning_rate": 3.917241379310345e-06,
"loss": 0.2254,
"step": 2800
},
{
"epoch": 1.6048640915593704,
"grad_norm": 2.3269996643066406,
"learning_rate": 3.905747126436782e-06,
"loss": 0.218,
"step": 2805
},
{
"epoch": 1.607725321888412,
"grad_norm": 1.7878772020339966,
"learning_rate": 3.894252873563218e-06,
"loss": 0.1816,
"step": 2810
},
{
"epoch": 1.6105865522174536,
"grad_norm": 1.7744303941726685,
"learning_rate": 3.882758620689656e-06,
"loss": 0.1694,
"step": 2815
},
{
"epoch": 1.613447782546495,
"grad_norm": 1.849387288093567,
"learning_rate": 3.871264367816092e-06,
"loss": 0.1892,
"step": 2820
},
{
"epoch": 1.6163090128755364,
"grad_norm": 2.10662841796875,
"learning_rate": 3.85977011494253e-06,
"loss": 0.1889,
"step": 2825
},
{
"epoch": 1.6191702432045778,
"grad_norm": 1.9940298795700073,
"learning_rate": 3.848275862068966e-06,
"loss": 0.2051,
"step": 2830
},
{
"epoch": 1.6220314735336194,
"grad_norm": 1.9454128742218018,
"learning_rate": 3.836781609195403e-06,
"loss": 0.204,
"step": 2835
},
{
"epoch": 1.624892703862661,
"grad_norm": 1.6831291913986206,
"learning_rate": 3.825287356321839e-06,
"loss": 0.1882,
"step": 2840
},
{
"epoch": 1.6277539341917024,
"grad_norm": 2.420912265777588,
"learning_rate": 3.813793103448276e-06,
"loss": 0.1869,
"step": 2845
},
{
"epoch": 1.6306151645207438,
"grad_norm": 1.7464041709899902,
"learning_rate": 3.8022988505747126e-06,
"loss": 0.1921,
"step": 2850
},
{
"epoch": 1.6334763948497855,
"grad_norm": 1.9783605337142944,
"learning_rate": 3.79080459770115e-06,
"loss": 0.1892,
"step": 2855
},
{
"epoch": 1.6363376251788269,
"grad_norm": 1.7680976390838623,
"learning_rate": 3.7793103448275866e-06,
"loss": 0.1851,
"step": 2860
},
{
"epoch": 1.6391988555078685,
"grad_norm": 2.4723477363586426,
"learning_rate": 3.7678160919540236e-06,
"loss": 0.2011,
"step": 2865
},
{
"epoch": 1.6420600858369099,
"grad_norm": 2.6624374389648438,
"learning_rate": 3.75632183908046e-06,
"loss": 0.191,
"step": 2870
},
{
"epoch": 1.6449213161659513,
"grad_norm": 2.0972347259521484,
"learning_rate": 3.7448275862068967e-06,
"loss": 0.2108,
"step": 2875
},
{
"epoch": 1.647782546494993,
"grad_norm": 2.13672137260437,
"learning_rate": 3.7333333333333337e-06,
"loss": 0.2003,
"step": 2880
},
{
"epoch": 1.6506437768240343,
"grad_norm": 1.685014009475708,
"learning_rate": 3.7218390804597703e-06,
"loss": 0.1727,
"step": 2885
},
{
"epoch": 1.653505007153076,
"grad_norm": 2.269775390625,
"learning_rate": 3.710344827586207e-06,
"loss": 0.2263,
"step": 2890
},
{
"epoch": 1.6563662374821173,
"grad_norm": 2.918632745742798,
"learning_rate": 3.698850574712644e-06,
"loss": 0.199,
"step": 2895
},
{
"epoch": 1.6592274678111587,
"grad_norm": 1.939340353012085,
"learning_rate": 3.6873563218390805e-06,
"loss": 0.1753,
"step": 2900
},
{
"epoch": 1.6620886981402003,
"grad_norm": 2.1699063777923584,
"learning_rate": 3.675862068965518e-06,
"loss": 0.2076,
"step": 2905
},
{
"epoch": 1.6649499284692417,
"grad_norm": 5.3711466789245605,
"learning_rate": 3.6643678160919545e-06,
"loss": 0.1819,
"step": 2910
},
{
"epoch": 1.6678111587982833,
"grad_norm": 12.513273239135742,
"learning_rate": 3.652873563218391e-06,
"loss": 0.1815,
"step": 2915
},
{
"epoch": 1.6706723891273247,
"grad_norm": 1.9002474546432495,
"learning_rate": 3.641379310344828e-06,
"loss": 0.2064,
"step": 2920
},
{
"epoch": 1.6735336194563661,
"grad_norm": 2.12951922416687,
"learning_rate": 3.6298850574712646e-06,
"loss": 0.2099,
"step": 2925
},
{
"epoch": 1.6763948497854078,
"grad_norm": 2.566774845123291,
"learning_rate": 3.6183908045977012e-06,
"loss": 0.1969,
"step": 2930
},
{
"epoch": 1.6792560801144494,
"grad_norm": 1.7637192010879517,
"learning_rate": 3.6068965517241382e-06,
"loss": 0.1889,
"step": 2935
},
{
"epoch": 1.6821173104434908,
"grad_norm": 1.8705408573150635,
"learning_rate": 3.595402298850575e-06,
"loss": 0.1916,
"step": 2940
},
{
"epoch": 1.6849785407725322,
"grad_norm": 3.1661245822906494,
"learning_rate": 3.5839080459770122e-06,
"loss": 0.1919,
"step": 2945
},
{
"epoch": 1.6878397711015736,
"grad_norm": 2.1932051181793213,
"learning_rate": 3.5724137931034484e-06,
"loss": 0.1978,
"step": 2950
},
{
"epoch": 1.6907010014306152,
"grad_norm": 1.9052879810333252,
"learning_rate": 3.560919540229885e-06,
"loss": 0.1745,
"step": 2955
},
{
"epoch": 1.6935622317596568,
"grad_norm": 2.185124397277832,
"learning_rate": 3.5494252873563224e-06,
"loss": 0.2084,
"step": 2960
},
{
"epoch": 1.6964234620886982,
"grad_norm": 1.7581264972686768,
"learning_rate": 3.537931034482759e-06,
"loss": 0.1929,
"step": 2965
},
{
"epoch": 1.6992846924177396,
"grad_norm": 1.7022705078125,
"learning_rate": 3.5264367816091956e-06,
"loss": 0.201,
"step": 2970
},
{
"epoch": 1.702145922746781,
"grad_norm": 2.636112928390503,
"learning_rate": 3.5149425287356326e-06,
"loss": 0.2293,
"step": 2975
},
{
"epoch": 1.7050071530758226,
"grad_norm": 1.6399792432785034,
"learning_rate": 3.503448275862069e-06,
"loss": 0.2029,
"step": 2980
},
{
"epoch": 1.7078683834048642,
"grad_norm": 1.7805216312408447,
"learning_rate": 3.491954022988506e-06,
"loss": 0.1911,
"step": 2985
},
{
"epoch": 1.7107296137339056,
"grad_norm": 3.3004565238952637,
"learning_rate": 3.4804597701149427e-06,
"loss": 0.1818,
"step": 2990
},
{
"epoch": 1.713590844062947,
"grad_norm": 2.374055862426758,
"learning_rate": 3.4689655172413793e-06,
"loss": 0.1959,
"step": 2995
},
{
"epoch": 1.7164520743919884,
"grad_norm": 1.8845436573028564,
"learning_rate": 3.4574712643678167e-06,
"loss": 0.17,
"step": 3000
},
{
"epoch": 1.7164520743919884,
"eval_bleu": 0.6118896028399405,
"eval_cer": 14.497737420959572,
"eval_loss": 0.5691251754760742,
"eval_runtime": 456.0743,
"eval_samples_per_second": 2.901,
"eval_steps_per_second": 0.364,
"eval_wer": 27.064476999160213,
"step": 3000
}
],
"logging_steps": 5,
"max_steps": 4500,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.07563909152768e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}