new_model1 / trainer_state.json
SR
up1
ca00a9f verified
raw
history blame
97.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 83265,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005404431633939831,
"grad_norm": 0.9054504632949829,
"learning_rate": 3.602738080941516e-06,
"loss": 1.2514,
"step": 150
},
{
"epoch": 0.010808863267879661,
"grad_norm": 0.6858287453651428,
"learning_rate": 7.205476161883032e-06,
"loss": 1.1276,
"step": 300
},
{
"epoch": 0.016213294901819494,
"grad_norm": 0.6737526655197144,
"learning_rate": 1.0808214242824548e-05,
"loss": 1.0866,
"step": 450
},
{
"epoch": 0.021617726535759323,
"grad_norm": 0.6372509002685547,
"learning_rate": 1.4410952323766064e-05,
"loss": 1.0705,
"step": 600
},
{
"epoch": 0.027022158169699155,
"grad_norm": 0.5196628570556641,
"learning_rate": 1.8013690404707578e-05,
"loss": 1.0525,
"step": 750
},
{
"epoch": 0.03242658980363899,
"grad_norm": 0.4922332167625427,
"learning_rate": 2.1616428485649097e-05,
"loss": 1.0418,
"step": 900
},
{
"epoch": 0.03783102143757881,
"grad_norm": 0.5018568634986877,
"learning_rate": 2.521916656659061e-05,
"loss": 1.0359,
"step": 1050
},
{
"epoch": 0.043235453071518645,
"grad_norm": 0.4413062334060669,
"learning_rate": 2.8821904647532128e-05,
"loss": 1.037,
"step": 1200
},
{
"epoch": 0.04863988470545848,
"grad_norm": 0.3888317048549652,
"learning_rate": 3.242464272847364e-05,
"loss": 1.0232,
"step": 1350
},
{
"epoch": 0.05404431633939831,
"grad_norm": 0.43577057123184204,
"learning_rate": 3.6027380809415156e-05,
"loss": 1.0124,
"step": 1500
},
{
"epoch": 0.059448747973338135,
"grad_norm": 0.41379234194755554,
"learning_rate": 3.963011889035667e-05,
"loss": 1.0225,
"step": 1650
},
{
"epoch": 0.06485317960727797,
"grad_norm": 0.48700177669525146,
"learning_rate": 4.3232856971298193e-05,
"loss": 1.0138,
"step": 1800
},
{
"epoch": 0.0702576112412178,
"grad_norm": 0.40877047181129456,
"learning_rate": 4.683559505223971e-05,
"loss": 1.0068,
"step": 1950
},
{
"epoch": 0.07566204287515763,
"grad_norm": 0.37194114923477173,
"learning_rate": 5.043833313318122e-05,
"loss": 1.0007,
"step": 2100
},
{
"epoch": 0.08106647450909746,
"grad_norm": 0.49839073419570923,
"learning_rate": 5.404107121412274e-05,
"loss": 1.0038,
"step": 2250
},
{
"epoch": 0.08647090614303729,
"grad_norm": 0.3880678117275238,
"learning_rate": 5.7643809295064256e-05,
"loss": 0.9996,
"step": 2400
},
{
"epoch": 0.09187533777697712,
"grad_norm": 0.4280707538127899,
"learning_rate": 6.124654737600577e-05,
"loss": 1.0049,
"step": 2550
},
{
"epoch": 0.09727976941091696,
"grad_norm": 0.4451320469379425,
"learning_rate": 6.484928545694728e-05,
"loss": 1.0057,
"step": 2700
},
{
"epoch": 0.10268420104485679,
"grad_norm": 0.38181596994400024,
"learning_rate": 6.84520235378888e-05,
"loss": 1.0019,
"step": 2850
},
{
"epoch": 0.10808863267879662,
"grad_norm": 0.38614770770072937,
"learning_rate": 7.205476161883031e-05,
"loss": 1.0045,
"step": 3000
},
{
"epoch": 0.11349306431273644,
"grad_norm": 0.3148934543132782,
"learning_rate": 7.565749969977183e-05,
"loss": 1.0041,
"step": 3150
},
{
"epoch": 0.11889749594667627,
"grad_norm": 0.41060400009155273,
"learning_rate": 7.926023778071334e-05,
"loss": 1.0001,
"step": 3300
},
{
"epoch": 0.1243019275806161,
"grad_norm": 0.40537866950035095,
"learning_rate": 8.286297586165485e-05,
"loss": 1.0014,
"step": 3450
},
{
"epoch": 0.12970635921455595,
"grad_norm": 0.3297308683395386,
"learning_rate": 8.646571394259639e-05,
"loss": 1.0055,
"step": 3600
},
{
"epoch": 0.13511079084849575,
"grad_norm": 0.39976179599761963,
"learning_rate": 9.00684520235379e-05,
"loss": 0.9993,
"step": 3750
},
{
"epoch": 0.1405152224824356,
"grad_norm": 0.39322683215141296,
"learning_rate": 9.367119010447942e-05,
"loss": 0.9965,
"step": 3900
},
{
"epoch": 0.14591965411637542,
"grad_norm": 0.45231467485427856,
"learning_rate": 9.727392818542093e-05,
"loss": 0.9978,
"step": 4050
},
{
"epoch": 0.15132408575031525,
"grad_norm": 0.41241922974586487,
"learning_rate": 0.00010087666626636244,
"loss": 1.0051,
"step": 4200
},
{
"epoch": 0.15672851738425508,
"grad_norm": 0.5085678100585938,
"learning_rate": 0.00010447940434730397,
"loss": 0.9971,
"step": 4350
},
{
"epoch": 0.16213294901819492,
"grad_norm": 0.4659586548805237,
"learning_rate": 0.00010808214242824548,
"loss": 1.0083,
"step": 4500
},
{
"epoch": 0.16753738065213475,
"grad_norm": 0.330456018447876,
"learning_rate": 0.00011168488050918699,
"loss": 1.0013,
"step": 4650
},
{
"epoch": 0.17294181228607458,
"grad_norm": 0.4083492159843445,
"learning_rate": 0.00011528761859012851,
"loss": 1.0107,
"step": 4800
},
{
"epoch": 0.1783462439200144,
"grad_norm": 0.5598177909851074,
"learning_rate": 0.00011889035667107002,
"loss": 0.9992,
"step": 4950
},
{
"epoch": 0.18375067555395425,
"grad_norm": 0.4554787576198578,
"learning_rate": 0.00012249309475201154,
"loss": 0.9972,
"step": 5100
},
{
"epoch": 0.18915510718789408,
"grad_norm": 0.5599480271339417,
"learning_rate": 0.00012609583283295305,
"loss": 1.0017,
"step": 5250
},
{
"epoch": 0.1945595388218339,
"grad_norm": 0.4103052318096161,
"learning_rate": 0.00012969857091389456,
"loss": 1.0075,
"step": 5400
},
{
"epoch": 0.19996397045577374,
"grad_norm": 0.5033989548683167,
"learning_rate": 0.0001333013089948361,
"loss": 0.9998,
"step": 5550
},
{
"epoch": 0.20536840208971358,
"grad_norm": 0.41184836626052856,
"learning_rate": 0.0001369040470757776,
"loss": 1.0116,
"step": 5700
},
{
"epoch": 0.2107728337236534,
"grad_norm": 0.4604012370109558,
"learning_rate": 0.0001405067851567191,
"loss": 1.0144,
"step": 5850
},
{
"epoch": 0.21617726535759324,
"grad_norm": 0.5769256949424744,
"learning_rate": 0.00014410952323766062,
"loss": 1.0142,
"step": 6000
},
{
"epoch": 0.22158169699153304,
"grad_norm": 0.49323058128356934,
"learning_rate": 0.00014771226131860213,
"loss": 1.0224,
"step": 6150
},
{
"epoch": 0.22698612862547288,
"grad_norm": 0.4065729081630707,
"learning_rate": 0.00015131499939954367,
"loss": 1.011,
"step": 6300
},
{
"epoch": 0.2323905602594127,
"grad_norm": 0.4484567642211914,
"learning_rate": 0.00015491773748048518,
"loss": 1.0135,
"step": 6450
},
{
"epoch": 0.23779499189335254,
"grad_norm": 0.5265558958053589,
"learning_rate": 0.00015852047556142668,
"loss": 1.0266,
"step": 6600
},
{
"epoch": 0.24319942352729237,
"grad_norm": 0.43009766936302185,
"learning_rate": 0.0001621232136423682,
"loss": 1.025,
"step": 6750
},
{
"epoch": 0.2486038551612322,
"grad_norm": 0.45328229665756226,
"learning_rate": 0.0001657259517233097,
"loss": 1.0256,
"step": 6900
},
{
"epoch": 0.25400828679517207,
"grad_norm": 0.4880930781364441,
"learning_rate": 0.00016932868980425124,
"loss": 1.0268,
"step": 7050
},
{
"epoch": 0.2594127184291119,
"grad_norm": 0.4783656597137451,
"learning_rate": 0.00017293142788519277,
"loss": 1.0281,
"step": 7200
},
{
"epoch": 0.2648171500630517,
"grad_norm": 0.40857091546058655,
"learning_rate": 0.00017653416596613428,
"loss": 1.0436,
"step": 7350
},
{
"epoch": 0.2702215816969915,
"grad_norm": 0.5468364953994751,
"learning_rate": 0.0001801369040470758,
"loss": 1.0431,
"step": 7500
},
{
"epoch": 0.27562601333093134,
"grad_norm": 0.4680778384208679,
"learning_rate": 0.0001837396421280173,
"loss": 1.0449,
"step": 7650
},
{
"epoch": 0.2810304449648712,
"grad_norm": 0.5532673001289368,
"learning_rate": 0.00018734238020895884,
"loss": 1.0453,
"step": 7800
},
{
"epoch": 0.286434876598811,
"grad_norm": 0.5404918789863586,
"learning_rate": 0.00019094511828990034,
"loss": 1.0592,
"step": 7950
},
{
"epoch": 0.29183930823275084,
"grad_norm": 0.5416702628135681,
"learning_rate": 0.00019454785637084185,
"loss": 1.0541,
"step": 8100
},
{
"epoch": 0.29724373986669067,
"grad_norm": 0.5036255121231079,
"learning_rate": 0.00019815059445178336,
"loss": 1.0544,
"step": 8250
},
{
"epoch": 0.3026481715006305,
"grad_norm": 0.564854621887207,
"learning_rate": 0.00019999953171425823,
"loss": 1.0528,
"step": 8400
},
{
"epoch": 0.30805260313457034,
"grad_norm": 0.5236982107162476,
"learning_rate": 0.00019999563009378472,
"loss": 1.0595,
"step": 8550
},
{
"epoch": 0.31345703476851017,
"grad_norm": 0.5642319917678833,
"learning_rate": 0.00019998777428218277,
"loss": 1.0733,
"step": 8700
},
{
"epoch": 0.31886146640245,
"grad_norm": 0.5522397756576538,
"learning_rate": 0.00019997596459009974,
"loss": 1.0685,
"step": 8850
},
{
"epoch": 0.32426589803638983,
"grad_norm": 0.5239744782447815,
"learning_rate": 0.00019996020148453384,
"loss": 1.068,
"step": 9000
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.5960803627967834,
"learning_rate": 0.00019994048558881562,
"loss": 1.0681,
"step": 9150
},
{
"epoch": 0.3350747613042695,
"grad_norm": 0.5771428942680359,
"learning_rate": 0.00019991681768258336,
"loss": 1.0649,
"step": 9300
},
{
"epoch": 0.34047919293820933,
"grad_norm": 0.5502661466598511,
"learning_rate": 0.00019988919870175223,
"loss": 1.0632,
"step": 9450
},
{
"epoch": 0.34588362457214916,
"grad_norm": 0.5481303930282593,
"learning_rate": 0.0001998576297384772,
"loss": 1.0604,
"step": 9600
},
{
"epoch": 0.351288056206089,
"grad_norm": 0.520757257938385,
"learning_rate": 0.00019982211204111,
"loss": 1.0703,
"step": 9750
},
{
"epoch": 0.3566924878400288,
"grad_norm": 0.5234895348548889,
"learning_rate": 0.00019978264701414963,
"loss": 1.0693,
"step": 9900
},
{
"epoch": 0.36209691947396866,
"grad_norm": 0.669703483581543,
"learning_rate": 0.0001997392362181869,
"loss": 1.0706,
"step": 10050
},
{
"epoch": 0.3675013511079085,
"grad_norm": 0.5472550392150879,
"learning_rate": 0.00019969188136984267,
"loss": 1.0743,
"step": 10200
},
{
"epoch": 0.3729057827418483,
"grad_norm": 0.5862524509429932,
"learning_rate": 0.00019964058434169995,
"loss": 1.069,
"step": 10350
},
{
"epoch": 0.37831021437578816,
"grad_norm": 0.5793502330780029,
"learning_rate": 0.0001995853471622299,
"loss": 1.0686,
"step": 10500
},
{
"epoch": 0.383714646009728,
"grad_norm": 0.670881986618042,
"learning_rate": 0.0001995261720157117,
"loss": 1.0749,
"step": 10650
},
{
"epoch": 0.3891190776436678,
"grad_norm": 0.698593258857727,
"learning_rate": 0.00019946306124214594,
"loss": 1.0678,
"step": 10800
},
{
"epoch": 0.39452350927760765,
"grad_norm": 0.5866215229034424,
"learning_rate": 0.00019939601733716232,
"loss": 1.0605,
"step": 10950
},
{
"epoch": 0.3999279409115475,
"grad_norm": 0.5571088790893555,
"learning_rate": 0.0001993250429519208,
"loss": 1.0732,
"step": 11100
},
{
"epoch": 0.4053323725454873,
"grad_norm": 0.6108280420303345,
"learning_rate": 0.0001992501408930069,
"loss": 1.0717,
"step": 11250
},
{
"epoch": 0.41073680417942715,
"grad_norm": 0.5834035873413086,
"learning_rate": 0.00019917131412232057,
"loss": 1.0767,
"step": 11400
},
{
"epoch": 0.416141235813367,
"grad_norm": 0.6449561715126038,
"learning_rate": 0.00019908856575695925,
"loss": 1.0679,
"step": 11550
},
{
"epoch": 0.4215456674473068,
"grad_norm": 0.6005063652992249,
"learning_rate": 0.00019900189906909446,
"loss": 1.0697,
"step": 11700
},
{
"epoch": 0.42695009908124665,
"grad_norm": 0.48533475399017334,
"learning_rate": 0.0001989113174858424,
"loss": 1.0759,
"step": 11850
},
{
"epoch": 0.4323545307151865,
"grad_norm": 0.6543179154396057,
"learning_rate": 0.00019881682458912855,
"loss": 1.068,
"step": 12000
},
{
"epoch": 0.43775896234912626,
"grad_norm": 0.6233469843864441,
"learning_rate": 0.00019871842411554598,
"loss": 1.0665,
"step": 12150
},
{
"epoch": 0.4431633939830661,
"grad_norm": 0.5530846118927002,
"learning_rate": 0.0001986161199562074,
"loss": 1.0759,
"step": 12300
},
{
"epoch": 0.4485678256170059,
"grad_norm": 0.6484875679016113,
"learning_rate": 0.00019850991615659173,
"loss": 1.0799,
"step": 12450
},
{
"epoch": 0.45397225725094575,
"grad_norm": 0.5916330814361572,
"learning_rate": 0.00019839981691638364,
"loss": 1.0732,
"step": 12600
},
{
"epoch": 0.4593766888848856,
"grad_norm": 0.6168014407157898,
"learning_rate": 0.00019828582658930777,
"loss": 1.063,
"step": 12750
},
{
"epoch": 0.4647811205188254,
"grad_norm": 0.7302340269088745,
"learning_rate": 0.00019816794968295648,
"loss": 1.0694,
"step": 12900
},
{
"epoch": 0.47018555215276525,
"grad_norm": 0.7804449200630188,
"learning_rate": 0.00019804619085861172,
"loss": 1.0681,
"step": 13050
},
{
"epoch": 0.4755899837867051,
"grad_norm": 0.690500020980835,
"learning_rate": 0.00019792055493106042,
"loss": 1.0662,
"step": 13200
},
{
"epoch": 0.4809944154206449,
"grad_norm": 0.6514592170715332,
"learning_rate": 0.00019779104686840445,
"loss": 1.0682,
"step": 13350
},
{
"epoch": 0.48639884705458475,
"grad_norm": 0.7182182669639587,
"learning_rate": 0.00019765767179186393,
"loss": 1.0761,
"step": 13500
},
{
"epoch": 0.4918032786885246,
"grad_norm": 0.6194586157798767,
"learning_rate": 0.00019752043497557473,
"loss": 1.0637,
"step": 13650
},
{
"epoch": 0.4972077103224644,
"grad_norm": 0.5965324640274048,
"learning_rate": 0.00019737934184638006,
"loss": 1.0658,
"step": 13800
},
{
"epoch": 0.5026121419564042,
"grad_norm": 0.6684099435806274,
"learning_rate": 0.0001972343979836157,
"loss": 1.0788,
"step": 13950
},
{
"epoch": 0.5080165735903441,
"grad_norm": 0.6042500734329224,
"learning_rate": 0.00019708560911888947,
"loss": 1.0748,
"step": 14100
},
{
"epoch": 0.5134210052242839,
"grad_norm": 0.6769179701805115,
"learning_rate": 0.0001969329811358546,
"loss": 1.08,
"step": 14250
},
{
"epoch": 0.5188254368582238,
"grad_norm": 0.6137043237686157,
"learning_rate": 0.000196776520069977,
"loss": 1.0752,
"step": 14400
},
{
"epoch": 0.5242298684921636,
"grad_norm": 0.5905526280403137,
"learning_rate": 0.00019661623210829657,
"loss": 1.0711,
"step": 14550
},
{
"epoch": 0.5296343001261034,
"grad_norm": 0.5724222660064697,
"learning_rate": 0.00019645212358918273,
"loss": 1.0665,
"step": 14700
},
{
"epoch": 0.5350387317600432,
"grad_norm": 0.6485213041305542,
"learning_rate": 0.00019628420100208354,
"loss": 1.075,
"step": 14850
},
{
"epoch": 0.540443163393983,
"grad_norm": 0.6828542351722717,
"learning_rate": 0.00019611247098726917,
"loss": 1.0742,
"step": 15000
},
{
"epoch": 0.5458475950279229,
"grad_norm": 0.7089459300041199,
"learning_rate": 0.00019593694033556944,
"loss": 1.0717,
"step": 15150
},
{
"epoch": 0.5512520266618627,
"grad_norm": 0.6180184483528137,
"learning_rate": 0.00019575761598810508,
"loss": 1.0701,
"step": 15300
},
{
"epoch": 0.5566564582958026,
"grad_norm": 0.6298936605453491,
"learning_rate": 0.00019557450503601345,
"loss": 1.0693,
"step": 15450
},
{
"epoch": 0.5620608899297423,
"grad_norm": 0.7352581024169922,
"learning_rate": 0.00019538761472016796,
"loss": 1.0773,
"step": 15600
},
{
"epoch": 0.5674653215636822,
"grad_norm": 0.5634006857872009,
"learning_rate": 0.00019519695243089188,
"loss": 1.0747,
"step": 15750
},
{
"epoch": 0.572869753197622,
"grad_norm": 0.6061451435089111,
"learning_rate": 0.00019500252570766599,
"loss": 1.0659,
"step": 15900
},
{
"epoch": 0.5782741848315619,
"grad_norm": 0.7047978043556213,
"learning_rate": 0.00019480434223883046,
"loss": 1.0695,
"step": 16050
},
{
"epoch": 0.5836786164655017,
"grad_norm": 0.7310365438461304,
"learning_rate": 0.00019460240986128095,
"loss": 1.074,
"step": 16200
},
{
"epoch": 0.5890830480994416,
"grad_norm": 0.7517262697219849,
"learning_rate": 0.00019439673656015857,
"loss": 1.0675,
"step": 16350
},
{
"epoch": 0.5944874797333813,
"grad_norm": 0.6441323757171631,
"learning_rate": 0.00019418733046853412,
"loss": 1.0832,
"step": 16500
},
{
"epoch": 0.5998919113673212,
"grad_norm": 0.7108227014541626,
"learning_rate": 0.00019397419986708658,
"loss": 1.0702,
"step": 16650
},
{
"epoch": 0.605296343001261,
"grad_norm": 0.7227650284767151,
"learning_rate": 0.00019375735318377557,
"loss": 1.0676,
"step": 16800
},
{
"epoch": 0.6107007746352009,
"grad_norm": 0.7566308975219727,
"learning_rate": 0.00019353679899350814,
"loss": 1.076,
"step": 16950
},
{
"epoch": 0.6161052062691407,
"grad_norm": 0.5554959177970886,
"learning_rate": 0.00019331254601779959,
"loss": 1.0758,
"step": 17100
},
{
"epoch": 0.6215096379030806,
"grad_norm": 0.6587594747543335,
"learning_rate": 0.0001930846031244287,
"loss": 1.0671,
"step": 17250
},
{
"epoch": 0.6269140695370203,
"grad_norm": 0.7100338339805603,
"learning_rate": 0.0001928529793270871,
"loss": 1.067,
"step": 17400
},
{
"epoch": 0.6323185011709602,
"grad_norm": 0.6286484003067017,
"learning_rate": 0.00019261768378502262,
"loss": 1.0668,
"step": 17550
},
{
"epoch": 0.6377229328049,
"grad_norm": 0.7707709670066833,
"learning_rate": 0.00019237872580267734,
"loss": 1.0672,
"step": 17700
},
{
"epoch": 0.6431273644388399,
"grad_norm": 0.7858836054801941,
"learning_rate": 0.00019213611482931953,
"loss": 1.0736,
"step": 17850
},
{
"epoch": 0.6485317960727797,
"grad_norm": 0.6796938180923462,
"learning_rate": 0.00019188986045866997,
"loss": 1.0759,
"step": 18000
},
{
"epoch": 0.6539362277067196,
"grad_norm": 0.6615278124809265,
"learning_rate": 0.0001916399724285227,
"loss": 1.0713,
"step": 18150
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.6353105306625366,
"learning_rate": 0.00019138646062035982,
"loss": 1.0769,
"step": 18300
},
{
"epoch": 0.6647450909745992,
"grad_norm": 0.6170017123222351,
"learning_rate": 0.0001911293350589609,
"loss": 1.07,
"step": 18450
},
{
"epoch": 0.670149522608539,
"grad_norm": 0.6368488073348999,
"learning_rate": 0.00019086860591200632,
"loss": 1.0774,
"step": 18600
},
{
"epoch": 0.6755539542424789,
"grad_norm": 0.5853469371795654,
"learning_rate": 0.00019060428348967548,
"loss": 1.0732,
"step": 18750
},
{
"epoch": 0.6809583858764187,
"grad_norm": 0.7817432880401611,
"learning_rate": 0.00019033637824423884,
"loss": 1.0732,
"step": 18900
},
{
"epoch": 0.6863628175103585,
"grad_norm": 0.6566998362541199,
"learning_rate": 0.00019006490076964487,
"loss": 1.0671,
"step": 19050
},
{
"epoch": 0.6917672491442983,
"grad_norm": 0.5824844837188721,
"learning_rate": 0.00018978986180110088,
"loss": 1.0656,
"step": 19200
},
{
"epoch": 0.6971716807782381,
"grad_norm": 0.5842050909996033,
"learning_rate": 0.0001895112722146486,
"loss": 1.0646,
"step": 19350
},
{
"epoch": 0.702576112412178,
"grad_norm": 0.6520604491233826,
"learning_rate": 0.00018922914302673421,
"loss": 1.0745,
"step": 19500
},
{
"epoch": 0.7079805440461178,
"grad_norm": 0.648113489151001,
"learning_rate": 0.0001889434853937725,
"loss": 1.0711,
"step": 19650
},
{
"epoch": 0.7133849756800577,
"grad_norm": 1.0153329372406006,
"learning_rate": 0.00018865431061170588,
"loss": 1.0643,
"step": 19800
},
{
"epoch": 0.7187894073139974,
"grad_norm": 0.6522130370140076,
"learning_rate": 0.00018836163011555764,
"loss": 1.0629,
"step": 19950
},
{
"epoch": 0.7241938389479373,
"grad_norm": 0.6235710978507996,
"learning_rate": 0.0001880654554789798,
"loss": 1.0637,
"step": 20100
},
{
"epoch": 0.7295982705818771,
"grad_norm": 0.6486189365386963,
"learning_rate": 0.00018776579841379528,
"loss": 1.0679,
"step": 20250
},
{
"epoch": 0.735002702215817,
"grad_norm": 0.7326012849807739,
"learning_rate": 0.00018746267076953505,
"loss": 1.0624,
"step": 20400
},
{
"epoch": 0.7404071338497568,
"grad_norm": 0.7451658248901367,
"learning_rate": 0.00018715608453296926,
"loss": 1.0799,
"step": 20550
},
{
"epoch": 0.7458115654836966,
"grad_norm": 0.5677480101585388,
"learning_rate": 0.00018684605182763355,
"loss": 1.0665,
"step": 20700
},
{
"epoch": 0.7512159971176364,
"grad_norm": 0.6265568137168884,
"learning_rate": 0.00018653258491334933,
"loss": 1.0562,
"step": 20850
},
{
"epoch": 0.7566204287515763,
"grad_norm": 0.5560349225997925,
"learning_rate": 0.0001862156961857392,
"loss": 1.0696,
"step": 21000
},
{
"epoch": 0.7620248603855161,
"grad_norm": 0.7811048626899719,
"learning_rate": 0.0001858953981757367,
"loss": 1.0713,
"step": 21150
},
{
"epoch": 0.767429292019456,
"grad_norm": 0.8111995458602905,
"learning_rate": 0.00018557170354909088,
"loss": 1.0641,
"step": 21300
},
{
"epoch": 0.7728337236533958,
"grad_norm": 0.6084979176521301,
"learning_rate": 0.0001852446251058652,
"loss": 1.0609,
"step": 21450
},
{
"epoch": 0.7782381552873356,
"grad_norm": 0.6472198963165283,
"learning_rate": 0.0001849141757799317,
"loss": 1.0659,
"step": 21600
},
{
"epoch": 0.7836425869212754,
"grad_norm": 0.6767707467079163,
"learning_rate": 0.00018458036863845933,
"loss": 1.0687,
"step": 21750
},
{
"epoch": 0.7890470185552153,
"grad_norm": 0.6994395852088928,
"learning_rate": 0.00018424321688139729,
"loss": 1.0634,
"step": 21900
},
{
"epoch": 0.7944514501891551,
"grad_norm": 0.6968779563903809,
"learning_rate": 0.000183902733840953,
"loss": 1.0552,
"step": 22050
},
{
"epoch": 0.799855881823095,
"grad_norm": 0.6974983215332031,
"learning_rate": 0.0001835589329810651,
"loss": 1.0722,
"step": 22200
},
{
"epoch": 0.8052603134570347,
"grad_norm": 0.6921077966690063,
"learning_rate": 0.00018321182789687068,
"loss": 1.0557,
"step": 22350
},
{
"epoch": 0.8106647450909746,
"grad_norm": 0.6887233257293701,
"learning_rate": 0.00018286143231416806,
"loss": 1.0633,
"step": 22500
},
{
"epoch": 0.8160691767249144,
"grad_norm": 0.6151506900787354,
"learning_rate": 0.00018250776008887375,
"loss": 1.0694,
"step": 22650
},
{
"epoch": 0.8214736083588543,
"grad_norm": 0.682551383972168,
"learning_rate": 0.00018215082520647467,
"loss": 1.0677,
"step": 22800
},
{
"epoch": 0.8268780399927941,
"grad_norm": 0.6813539862632751,
"learning_rate": 0.00018179064178147506,
"loss": 1.0628,
"step": 22950
},
{
"epoch": 0.832282471626734,
"grad_norm": 0.583910346031189,
"learning_rate": 0.00018142722405683839,
"loss": 1.0605,
"step": 23100
},
{
"epoch": 0.8376869032606737,
"grad_norm": 0.6265426278114319,
"learning_rate": 0.000181060586403424,
"loss": 1.0709,
"step": 23250
},
{
"epoch": 0.8430913348946136,
"grad_norm": 0.5985749959945679,
"learning_rate": 0.0001806907433194191,
"loss": 1.0521,
"step": 23400
},
{
"epoch": 0.8484957665285534,
"grad_norm": 0.6286662220954895,
"learning_rate": 0.00018031770942976514,
"loss": 1.0648,
"step": 23550
},
{
"epoch": 0.8539001981624933,
"grad_norm": 0.6208794713020325,
"learning_rate": 0.00017994149948557975,
"loss": 1.0565,
"step": 23700
},
{
"epoch": 0.8593046297964331,
"grad_norm": 0.7522740960121155,
"learning_rate": 0.00017956212836357324,
"loss": 1.0583,
"step": 23850
},
{
"epoch": 0.864709061430373,
"grad_norm": 0.791959285736084,
"learning_rate": 0.0001791796110654604,
"loss": 1.0663,
"step": 24000
},
{
"epoch": 0.8701134930643127,
"grad_norm": 0.5950735211372375,
"learning_rate": 0.0001787939627173673,
"loss": 1.0652,
"step": 24150
},
{
"epoch": 0.8755179246982525,
"grad_norm": 0.6595513820648193,
"learning_rate": 0.0001784051985692332,
"loss": 1.051,
"step": 24300
},
{
"epoch": 0.8809223563321924,
"grad_norm": 0.6468363404273987,
"learning_rate": 0.00017801333399420724,
"loss": 1.0465,
"step": 24450
},
{
"epoch": 0.8863267879661322,
"grad_norm": 3.451094150543213,
"learning_rate": 0.0001776183844880409,
"loss": 1.0534,
"step": 24600
},
{
"epoch": 0.8917312196000721,
"grad_norm": 0.6846780180931091,
"learning_rate": 0.00017722036566847495,
"loss": 1.0554,
"step": 24750
},
{
"epoch": 0.8971356512340118,
"grad_norm": 0.7100343704223633,
"learning_rate": 0.00017681929327462205,
"loss": 1.0524,
"step": 24900
},
{
"epoch": 0.9025400828679517,
"grad_norm": 0.5465316772460938,
"learning_rate": 0.00017641518316634426,
"loss": 1.046,
"step": 25050
},
{
"epoch": 0.9079445145018915,
"grad_norm": 0.7278814911842346,
"learning_rate": 0.000176008051323626,
"loss": 1.0543,
"step": 25200
},
{
"epoch": 0.9133489461358314,
"grad_norm": 0.6412672996520996,
"learning_rate": 0.00017559791384594192,
"loss": 1.0477,
"step": 25350
},
{
"epoch": 0.9187533777697712,
"grad_norm": 0.6557443141937256,
"learning_rate": 0.00017518478695162056,
"loss": 1.0638,
"step": 25500
},
{
"epoch": 0.9241578094037111,
"grad_norm": 0.7106101512908936,
"learning_rate": 0.00017476868697720278,
"loss": 1.0588,
"step": 25650
},
{
"epoch": 0.9295622410376508,
"grad_norm": 0.6246557235717773,
"learning_rate": 0.00017434963037679592,
"loss": 1.054,
"step": 25800
},
{
"epoch": 0.9349666726715907,
"grad_norm": 0.6114718914031982,
"learning_rate": 0.000173927633721423,
"loss": 1.0504,
"step": 25950
},
{
"epoch": 0.9403711043055305,
"grad_norm": 0.7704567909240723,
"learning_rate": 0.0001735027136983676,
"loss": 1.0537,
"step": 26100
},
{
"epoch": 0.9457755359394704,
"grad_norm": 0.6341020464897156,
"learning_rate": 0.0001730748871105138,
"loss": 1.0493,
"step": 26250
},
{
"epoch": 0.9511799675734102,
"grad_norm": 0.5861644148826599,
"learning_rate": 0.00017264417087568189,
"loss": 1.052,
"step": 26400
},
{
"epoch": 0.9565843992073501,
"grad_norm": 0.5983610153198242,
"learning_rate": 0.00017221058202595928,
"loss": 1.052,
"step": 26550
},
{
"epoch": 0.9619888308412898,
"grad_norm": 0.6839273571968079,
"learning_rate": 0.0001717741377070271,
"loss": 1.0632,
"step": 26700
},
{
"epoch": 0.9673932624752297,
"grad_norm": 0.7345322966575623,
"learning_rate": 0.000171334855177482,
"loss": 1.0416,
"step": 26850
},
{
"epoch": 0.9727976941091695,
"grad_norm": 0.6669878363609314,
"learning_rate": 0.00017089275180815394,
"loss": 1.0499,
"step": 27000
},
{
"epoch": 0.9782021257431094,
"grad_norm": 0.5807615518569946,
"learning_rate": 0.0001704478450814191,
"loss": 1.0469,
"step": 27150
},
{
"epoch": 0.9836065573770492,
"grad_norm": 0.6089076399803162,
"learning_rate": 0.00017000015259050855,
"loss": 1.0403,
"step": 27300
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.6615424156188965,
"learning_rate": 0.00016954969203881272,
"loss": 1.0492,
"step": 27450
},
{
"epoch": 0.9944154206449288,
"grad_norm": 0.660163164138794,
"learning_rate": 0.00016909648123918116,
"loss": 1.0543,
"step": 27600
},
{
"epoch": 0.9998198522788687,
"grad_norm": 0.631686806678772,
"learning_rate": 0.0001686405381132183,
"loss": 1.0474,
"step": 27750
},
{
"epoch": 1.0052242839128085,
"grad_norm": 0.7013711333274841,
"learning_rate": 0.00016818188069057458,
"loss": 0.9965,
"step": 27900
},
{
"epoch": 1.0106287155467484,
"grad_norm": 0.76506507396698,
"learning_rate": 0.00016772052710823374,
"loss": 0.9981,
"step": 28050
},
{
"epoch": 1.0160331471806883,
"grad_norm": 0.8097601532936096,
"learning_rate": 0.00016725649560979546,
"loss": 0.9995,
"step": 28200
},
{
"epoch": 1.021437578814628,
"grad_norm": 0.795626163482666,
"learning_rate": 0.00016678980454475385,
"loss": 0.9983,
"step": 28350
},
{
"epoch": 1.0268420104485678,
"grad_norm": 0.6494497060775757,
"learning_rate": 0.00016632047236777214,
"loss": 1.0075,
"step": 28500
},
{
"epoch": 1.0322464420825077,
"grad_norm": 0.7171606421470642,
"learning_rate": 0.00016584851763795262,
"loss": 0.9972,
"step": 28650
},
{
"epoch": 1.0376508737164474,
"grad_norm": 0.604192316532135,
"learning_rate": 0.00016537395901810288,
"loss": 0.9943,
"step": 28800
},
{
"epoch": 1.0430553053503873,
"grad_norm": 0.6858931183815002,
"learning_rate": 0.0001648968152739978,
"loss": 1.0092,
"step": 28950
},
{
"epoch": 1.0484597369843272,
"grad_norm": 0.685265839099884,
"learning_rate": 0.00016441710527363753,
"loss": 0.9936,
"step": 29100
},
{
"epoch": 1.053864168618267,
"grad_norm": 0.6720730066299438,
"learning_rate": 0.00016393484798650132,
"loss": 0.993,
"step": 29250
},
{
"epoch": 1.0592686002522067,
"grad_norm": 0.7085748314857483,
"learning_rate": 0.0001634500624827973,
"loss": 1.0083,
"step": 29400
},
{
"epoch": 1.0646730318861466,
"grad_norm": 0.6460698843002319,
"learning_rate": 0.00016296276793270864,
"loss": 0.9952,
"step": 29550
},
{
"epoch": 1.0700774635200865,
"grad_norm": 0.6689881086349487,
"learning_rate": 0.0001624729836056352,
"loss": 0.9958,
"step": 29700
},
{
"epoch": 1.0754818951540264,
"grad_norm": 0.7271780967712402,
"learning_rate": 0.00016198072886943181,
"loss": 0.9954,
"step": 29850
},
{
"epoch": 1.080886326787966,
"grad_norm": 0.5559628009796143,
"learning_rate": 0.0001614860231896422,
"loss": 0.9984,
"step": 30000
},
{
"epoch": 1.086290758421906,
"grad_norm": 0.6752548813819885,
"learning_rate": 0.0001609888861287293,
"loss": 1.0019,
"step": 30150
},
{
"epoch": 1.0916951900558458,
"grad_norm": 0.7046670913696289,
"learning_rate": 0.0001604893373453017,
"loss": 0.9936,
"step": 30300
},
{
"epoch": 1.0970996216897857,
"grad_norm": 0.6102576851844788,
"learning_rate": 0.00015998739659333638,
"loss": 1.0061,
"step": 30450
},
{
"epoch": 1.1025040533237254,
"grad_norm": 0.7669439911842346,
"learning_rate": 0.00015948308372139739,
"loss": 1.0017,
"step": 30600
},
{
"epoch": 1.1079084849576653,
"grad_norm": 0.7437514662742615,
"learning_rate": 0.00015897641867185092,
"loss": 0.9947,
"step": 30750
},
{
"epoch": 1.1133129165916051,
"grad_norm": 0.7851073741912842,
"learning_rate": 0.0001584674214800771,
"loss": 1.0026,
"step": 30900
},
{
"epoch": 1.118717348225545,
"grad_norm": 0.7046276926994324,
"learning_rate": 0.0001579561122736772,
"loss": 0.9893,
"step": 31050
},
{
"epoch": 1.1241217798594847,
"grad_norm": 0.8143602013587952,
"learning_rate": 0.000157442511271678,
"loss": 1.0013,
"step": 31200
},
{
"epoch": 1.1295262114934246,
"grad_norm": 1.2338451147079468,
"learning_rate": 0.0001569266387837324,
"loss": 1.002,
"step": 31350
},
{
"epoch": 1.1349306431273645,
"grad_norm": 0.7588093876838684,
"learning_rate": 0.00015640851520931588,
"loss": 1.0064,
"step": 31500
},
{
"epoch": 1.1403350747613044,
"grad_norm": 0.7656028270721436,
"learning_rate": 0.00015588816103692023,
"loss": 0.9963,
"step": 31650
},
{
"epoch": 1.145739506395244,
"grad_norm": 0.82599937915802,
"learning_rate": 0.00015536559684324315,
"loss": 0.9961,
"step": 31800
},
{
"epoch": 1.151143938029184,
"grad_norm": 0.6491279006004333,
"learning_rate": 0.0001548408432923746,
"loss": 0.9946,
"step": 31950
},
{
"epoch": 1.1565483696631238,
"grad_norm": 0.49154847860336304,
"learning_rate": 0.00015431392113497979,
"loss": 1.0035,
"step": 32100
},
{
"epoch": 1.1619528012970637,
"grad_norm": 0.5830157399177551,
"learning_rate": 0.00015378485120747835,
"loss": 0.9978,
"step": 32250
},
{
"epoch": 1.1673572329310034,
"grad_norm": 0.6672685146331787,
"learning_rate": 0.00015325365443122078,
"loss": 1.0079,
"step": 32400
},
{
"epoch": 1.1727616645649432,
"grad_norm": 0.7243463397026062,
"learning_rate": 0.00015272035181166066,
"loss": 1.0023,
"step": 32550
},
{
"epoch": 1.1781660961988831,
"grad_norm": 0.6492652893066406,
"learning_rate": 0.00015218496443752456,
"loss": 0.9972,
"step": 32700
},
{
"epoch": 1.1835705278328228,
"grad_norm": 0.6047407388687134,
"learning_rate": 0.00015164751347997762,
"loss": 0.9864,
"step": 32850
},
{
"epoch": 1.1889749594667627,
"grad_norm": 0.6448661088943481,
"learning_rate": 0.00015110802019178661,
"loss": 1.0046,
"step": 33000
},
{
"epoch": 1.1943793911007026,
"grad_norm": 0.7006458044052124,
"learning_rate": 0.0001505665059064796,
"loss": 1.0018,
"step": 33150
},
{
"epoch": 1.1997838227346425,
"grad_norm": 0.6918825507164001,
"learning_rate": 0.00015002299203750212,
"loss": 0.991,
"step": 33300
},
{
"epoch": 1.2051882543685823,
"grad_norm": 0.6090679168701172,
"learning_rate": 0.00014947750007737062,
"loss": 0.9939,
"step": 33450
},
{
"epoch": 1.210592686002522,
"grad_norm": 0.718387246131897,
"learning_rate": 0.00014893005159682233,
"loss": 0.9873,
"step": 33600
},
{
"epoch": 1.215997117636462,
"grad_norm": 0.6664546132087708,
"learning_rate": 0.00014838066824396256,
"loss": 0.9926,
"step": 33750
},
{
"epoch": 1.2214015492704018,
"grad_norm": 0.6758761405944824,
"learning_rate": 0.00014782937174340845,
"loss": 0.9924,
"step": 33900
},
{
"epoch": 1.2268059809043415,
"grad_norm": 0.5241803526878357,
"learning_rate": 0.00014727618389542995,
"loss": 0.9935,
"step": 34050
},
{
"epoch": 1.2322104125382813,
"grad_norm": 0.6897122859954834,
"learning_rate": 0.00014672112657508778,
"loss": 0.9859,
"step": 34200
},
{
"epoch": 1.2376148441722212,
"grad_norm": 0.6511486172676086,
"learning_rate": 0.00014616422173136846,
"loss": 0.9905,
"step": 34350
},
{
"epoch": 1.2430192758061611,
"grad_norm": 0.8631020784378052,
"learning_rate": 0.00014560549138631617,
"loss": 0.9996,
"step": 34500
},
{
"epoch": 1.248423707440101,
"grad_norm": 0.5925600528717041,
"learning_rate": 0.00014504495763416225,
"loss": 0.9961,
"step": 34650
},
{
"epoch": 1.2538281390740407,
"grad_norm": 0.6121050715446472,
"learning_rate": 0.00014448264264045114,
"loss": 1.0039,
"step": 34800
},
{
"epoch": 1.2592325707079806,
"grad_norm": 0.628056526184082,
"learning_rate": 0.00014391856864116414,
"loss": 1.0004,
"step": 34950
},
{
"epoch": 1.2646370023419204,
"grad_norm": 0.6576303243637085,
"learning_rate": 0.00014335275794184003,
"loss": 0.9978,
"step": 35100
},
{
"epoch": 1.2700414339758601,
"grad_norm": 0.5684065222740173,
"learning_rate": 0.00014278523291669302,
"loss": 0.9874,
"step": 35250
},
{
"epoch": 1.2754458656098,
"grad_norm": 0.8131369352340698,
"learning_rate": 0.000142216016007728,
"loss": 1.0006,
"step": 35400
},
{
"epoch": 1.2808502972437399,
"grad_norm": 0.6513379216194153,
"learning_rate": 0.00014164512972385306,
"loss": 0.9817,
"step": 35550
},
{
"epoch": 1.2862547288776798,
"grad_norm": 0.6244243383407593,
"learning_rate": 0.0001410725966399896,
"loss": 0.9805,
"step": 35700
},
{
"epoch": 1.2916591605116197,
"grad_norm": 0.760666012763977,
"learning_rate": 0.00014049843939617924,
"loss": 0.9889,
"step": 35850
},
{
"epoch": 1.2970635921455593,
"grad_norm": 0.7188459634780884,
"learning_rate": 0.00013992268069668904,
"loss": 0.9895,
"step": 36000
},
{
"epoch": 1.3024680237794992,
"grad_norm": 0.6034685969352722,
"learning_rate": 0.0001393453433091133,
"loss": 0.9882,
"step": 36150
},
{
"epoch": 1.307872455413439,
"grad_norm": 0.6076464653015137,
"learning_rate": 0.0001387664500634734,
"loss": 0.9823,
"step": 36300
},
{
"epoch": 1.3132768870473788,
"grad_norm": 0.6652275323867798,
"learning_rate": 0.00013818602385131512,
"loss": 0.9784,
"step": 36450
},
{
"epoch": 1.3186813186813187,
"grad_norm": 0.6014280319213867,
"learning_rate": 0.00013760408762480316,
"loss": 0.9812,
"step": 36600
},
{
"epoch": 1.3240857503152585,
"grad_norm": 0.6998510360717773,
"learning_rate": 0.00013702066439581382,
"loss": 0.9886,
"step": 36750
},
{
"epoch": 1.3294901819491982,
"grad_norm": 0.5891895294189453,
"learning_rate": 0.00013643577723502476,
"loss": 0.9873,
"step": 36900
},
{
"epoch": 1.334894613583138,
"grad_norm": 0.7246126532554626,
"learning_rate": 0.00013584944927100298,
"loss": 0.9859,
"step": 37050
},
{
"epoch": 1.340299045217078,
"grad_norm": 0.664380669593811,
"learning_rate": 0.00013526170368928993,
"loss": 0.9793,
"step": 37200
},
{
"epoch": 1.3457034768510179,
"grad_norm": 0.6437602639198303,
"learning_rate": 0.00013467256373148496,
"loss": 0.9853,
"step": 37350
},
{
"epoch": 1.3511079084849578,
"grad_norm": 0.6728150844573975,
"learning_rate": 0.000134082052694326,
"loss": 0.9792,
"step": 37500
},
{
"epoch": 1.3565123401188974,
"grad_norm": 0.8101018071174622,
"learning_rate": 0.00013349019392876858,
"loss": 0.9791,
"step": 37650
},
{
"epoch": 1.3619167717528373,
"grad_norm": 0.6081525683403015,
"learning_rate": 0.00013289701083906214,
"loss": 0.9825,
"step": 37800
},
{
"epoch": 1.3673212033867772,
"grad_norm": 0.6776862740516663,
"learning_rate": 0.00013230252688182497,
"loss": 0.9693,
"step": 37950
},
{
"epoch": 1.3727256350207169,
"grad_norm": 0.6200093030929565,
"learning_rate": 0.0001317067655651161,
"loss": 0.9677,
"step": 38100
},
{
"epoch": 1.3781300666546568,
"grad_norm": 0.7349710464477539,
"learning_rate": 0.00013110975044750621,
"loss": 0.9714,
"step": 38250
},
{
"epoch": 1.3835344982885966,
"grad_norm": 0.5907526612281799,
"learning_rate": 0.0001305115051371458,
"loss": 0.9779,
"step": 38400
},
{
"epoch": 1.3889389299225365,
"grad_norm": 0.6219062805175781,
"learning_rate": 0.0001299120532908316,
"loss": 0.9647,
"step": 38550
},
{
"epoch": 1.3943433615564764,
"grad_norm": 0.777947723865509,
"learning_rate": 0.0001293114186130712,
"loss": 0.97,
"step": 38700
},
{
"epoch": 1.399747793190416,
"grad_norm": 0.686892569065094,
"learning_rate": 0.00012870962485514567,
"loss": 0.9683,
"step": 38850
},
{
"epoch": 1.405152224824356,
"grad_norm": 0.6655575633049011,
"learning_rate": 0.00012810669581417032,
"loss": 0.9674,
"step": 39000
},
{
"epoch": 1.4105566564582959,
"grad_norm": 0.679595947265625,
"learning_rate": 0.0001275026553321536,
"loss": 0.9725,
"step": 39150
},
{
"epoch": 1.4159610880922355,
"grad_norm": 0.6671122312545776,
"learning_rate": 0.00012689752729505457,
"loss": 0.9677,
"step": 39300
},
{
"epoch": 1.4213655197261754,
"grad_norm": 0.6357312202453613,
"learning_rate": 0.00012629133563183797,
"loss": 0.9651,
"step": 39450
},
{
"epoch": 1.4267699513601153,
"grad_norm": 0.7441504001617432,
"learning_rate": 0.0001256841043135283,
"loss": 0.9704,
"step": 39600
},
{
"epoch": 1.4321743829940552,
"grad_norm": 0.5487176179885864,
"learning_rate": 0.00012507585735226185,
"loss": 0.9714,
"step": 39750
},
{
"epoch": 1.437578814627995,
"grad_norm": 0.6709308624267578,
"learning_rate": 0.00012446661880033698,
"loss": 0.9587,
"step": 39900
},
{
"epoch": 1.4429832462619347,
"grad_norm": 0.638081431388855,
"learning_rate": 0.00012385641274926328,
"loss": 0.9631,
"step": 40050
},
{
"epoch": 1.4483876778958746,
"grad_norm": 0.6448566913604736,
"learning_rate": 0.00012324526332880867,
"loss": 0.9634,
"step": 40200
},
{
"epoch": 1.4537921095298145,
"grad_norm": 0.7188845872879028,
"learning_rate": 0.0001226331947060455,
"loss": 0.9669,
"step": 40350
},
{
"epoch": 1.4591965411637542,
"grad_norm": 0.5700541138648987,
"learning_rate": 0.00012202023108439455,
"loss": 0.9598,
"step": 40500
},
{
"epoch": 1.464600972797694,
"grad_norm": 0.6200810670852661,
"learning_rate": 0.0001214063967026682,
"loss": 0.9651,
"step": 40650
},
{
"epoch": 1.470005404431634,
"grad_norm": 0.6882332563400269,
"learning_rate": 0.00012079171583411184,
"loss": 0.9649,
"step": 40800
},
{
"epoch": 1.4754098360655736,
"grad_norm": 0.6133975982666016,
"learning_rate": 0.00012017621278544402,
"loss": 0.9495,
"step": 40950
},
{
"epoch": 1.4808142676995135,
"grad_norm": 0.8365902304649353,
"learning_rate": 0.00011955991189589526,
"loss": 0.95,
"step": 41100
},
{
"epoch": 1.4862186993334534,
"grad_norm": 0.5351865887641907,
"learning_rate": 0.0001189428375362457,
"loss": 0.9579,
"step": 41250
},
{
"epoch": 1.4916231309673933,
"grad_norm": 0.6488143801689148,
"learning_rate": 0.00011832501410786116,
"loss": 0.9513,
"step": 41400
},
{
"epoch": 1.4970275626013332,
"grad_norm": 0.6101202964782715,
"learning_rate": 0.0001177064660417285,
"loss": 0.9573,
"step": 41550
},
{
"epoch": 1.5024319942352728,
"grad_norm": 0.7013749480247498,
"learning_rate": 0.00011708721779748933,
"loss": 0.9508,
"step": 41700
},
{
"epoch": 1.5078364258692127,
"grad_norm": 0.5707131028175354,
"learning_rate": 0.00011646729386247286,
"loss": 0.9486,
"step": 41850
},
{
"epoch": 1.5132408575031526,
"grad_norm": 0.6973045468330383,
"learning_rate": 0.00011584671875072757,
"loss": 0.962,
"step": 42000
},
{
"epoch": 1.5186452891370923,
"grad_norm": 0.6686086654663086,
"learning_rate": 0.00011522551700205184,
"loss": 0.9606,
"step": 42150
},
{
"epoch": 1.5240497207710324,
"grad_norm": 0.5340304970741272,
"learning_rate": 0.00011460371318102358,
"loss": 0.9584,
"step": 42300
},
{
"epoch": 1.529454152404972,
"grad_norm": 0.6170547008514404,
"learning_rate": 0.00011398133187602873,
"loss": 0.947,
"step": 42450
},
{
"epoch": 1.534858584038912,
"grad_norm": 0.5485740900039673,
"learning_rate": 0.00011335839769828924,
"loss": 0.961,
"step": 42600
},
{
"epoch": 1.5402630156728518,
"grad_norm": 0.6151200532913208,
"learning_rate": 0.00011273493528088945,
"loss": 0.9531,
"step": 42750
},
{
"epoch": 1.5456674473067915,
"grad_norm": 0.6902984976768494,
"learning_rate": 0.00011211096927780236,
"loss": 0.9418,
"step": 42900
},
{
"epoch": 1.5510718789407314,
"grad_norm": 0.7150260806083679,
"learning_rate": 0.00011148652436291451,
"loss": 0.948,
"step": 43050
},
{
"epoch": 1.5564763105746713,
"grad_norm": 0.6931044459342957,
"learning_rate": 0.0001108616252290504,
"loss": 0.9571,
"step": 43200
},
{
"epoch": 1.561880742208611,
"grad_norm": 0.641190230846405,
"learning_rate": 0.00011023629658699596,
"loss": 0.9412,
"step": 43350
},
{
"epoch": 1.5672851738425508,
"grad_norm": 0.6901960968971252,
"learning_rate": 0.00010961056316452145,
"loss": 0.954,
"step": 43500
},
{
"epoch": 1.5726896054764907,
"grad_norm": 0.6115658283233643,
"learning_rate": 0.00010898444970540372,
"loss": 0.952,
"step": 43650
},
{
"epoch": 1.5780940371104304,
"grad_norm": 0.7072962522506714,
"learning_rate": 0.00010835798096844743,
"loss": 0.9484,
"step": 43800
},
{
"epoch": 1.5834984687443705,
"grad_norm": 0.5898342728614807,
"learning_rate": 0.00010773118172650643,
"loss": 0.9421,
"step": 43950
},
{
"epoch": 1.5889029003783102,
"grad_norm": 0.503633439540863,
"learning_rate": 0.00010710407676550382,
"loss": 0.935,
"step": 44100
},
{
"epoch": 1.59430733201225,
"grad_norm": 0.5756278038024902,
"learning_rate": 0.00010647669088345204,
"loss": 0.9514,
"step": 44250
},
{
"epoch": 1.59971176364619,
"grad_norm": 0.6327024102210999,
"learning_rate": 0.00010584904888947204,
"loss": 0.9398,
"step": 44400
},
{
"epoch": 1.6051161952801296,
"grad_norm": 0.6922555565834045,
"learning_rate": 0.00010522117560281251,
"loss": 0.9411,
"step": 44550
},
{
"epoch": 1.6105206269140695,
"grad_norm": 0.7153000235557556,
"learning_rate": 0.00010459309585186818,
"loss": 0.9437,
"step": 44700
},
{
"epoch": 1.6159250585480094,
"grad_norm": 0.7171802520751953,
"learning_rate": 0.0001039648344731982,
"loss": 0.9305,
"step": 44850
},
{
"epoch": 1.621329490181949,
"grad_norm": 0.5943671464920044,
"learning_rate": 0.00010333641631054391,
"loss": 0.938,
"step": 45000
},
{
"epoch": 1.6267339218158892,
"grad_norm": 0.7467085123062134,
"learning_rate": 0.00010270786621384645,
"loss": 0.9416,
"step": 45150
},
{
"epoch": 1.6321383534498288,
"grad_norm": 0.6827779412269592,
"learning_rate": 0.00010207920903826415,
"loss": 0.9381,
"step": 45300
},
{
"epoch": 1.6375427850837687,
"grad_norm": 0.6708967089653015,
"learning_rate": 0.00010145046964318963,
"loss": 0.9495,
"step": 45450
},
{
"epoch": 1.6429472167177086,
"grad_norm": 0.6415010094642639,
"learning_rate": 0.00010082167289126672,
"loss": 0.9312,
"step": 45600
},
{
"epoch": 1.6483516483516483,
"grad_norm": 0.695865273475647,
"learning_rate": 0.00010019284364740731,
"loss": 0.9309,
"step": 45750
},
{
"epoch": 1.6537560799855882,
"grad_norm": 0.6317395567893982,
"learning_rate": 9.956400677780833e-05,
"loss": 0.941,
"step": 45900
},
{
"epoch": 1.659160511619528,
"grad_norm": 0.6181449294090271,
"learning_rate": 9.893518714896805e-05,
"loss": 0.9295,
"step": 46050
},
{
"epoch": 1.6645649432534677,
"grad_norm": 0.5777118802070618,
"learning_rate": 9.830640962670306e-05,
"loss": 0.9264,
"step": 46200
},
{
"epoch": 1.6699693748874078,
"grad_norm": 0.6352208852767944,
"learning_rate": 9.767769907516495e-05,
"loss": 0.9311,
"step": 46350
},
{
"epoch": 1.6753738065213475,
"grad_norm": 0.6197606325149536,
"learning_rate": 9.704908035585692e-05,
"loss": 0.9302,
"step": 46500
},
{
"epoch": 1.6807782381552874,
"grad_norm": 0.6172420382499695,
"learning_rate": 9.642057832665095e-05,
"loss": 0.9253,
"step": 46650
},
{
"epoch": 1.6861826697892273,
"grad_norm": 0.6538959741592407,
"learning_rate": 9.579221784080455e-05,
"loss": 0.9376,
"step": 46800
},
{
"epoch": 1.691587101423167,
"grad_norm": 0.6067585945129395,
"learning_rate": 9.516402374597812e-05,
"loss": 0.927,
"step": 46950
},
{
"epoch": 1.6969915330571068,
"grad_norm": 0.5777443647384644,
"learning_rate": 9.453602088325234e-05,
"loss": 0.9289,
"step": 47100
},
{
"epoch": 1.7023959646910467,
"grad_norm": 0.5103596448898315,
"learning_rate": 9.390823408614598e-05,
"loss": 0.9137,
"step": 47250
},
{
"epoch": 1.7078003963249864,
"grad_norm": 0.624183714389801,
"learning_rate": 9.328068817963359e-05,
"loss": 0.9236,
"step": 47400
},
{
"epoch": 1.7132048279589265,
"grad_norm": 0.5513512492179871,
"learning_rate": 9.265340797916421e-05,
"loss": 0.918,
"step": 47550
},
{
"epoch": 1.7186092595928661,
"grad_norm": 0.7002034187316895,
"learning_rate": 9.202641828967985e-05,
"loss": 0.9149,
"step": 47700
},
{
"epoch": 1.724013691226806,
"grad_norm": 0.5479480028152466,
"learning_rate": 9.139974390463459e-05,
"loss": 0.9265,
"step": 47850
},
{
"epoch": 1.729418122860746,
"grad_norm": 0.570182204246521,
"learning_rate": 9.077340960501425e-05,
"loss": 0.9079,
"step": 48000
},
{
"epoch": 1.7348225544946856,
"grad_norm": 0.6392347812652588,
"learning_rate": 9.014744015835656e-05,
"loss": 0.911,
"step": 48150
},
{
"epoch": 1.7402269861286255,
"grad_norm": 0.6063001751899719,
"learning_rate": 8.952186031777144e-05,
"loss": 0.9113,
"step": 48300
},
{
"epoch": 1.7456314177625654,
"grad_norm": 0.6585242748260498,
"learning_rate": 8.88966948209625e-05,
"loss": 0.9137,
"step": 48450
},
{
"epoch": 1.751035849396505,
"grad_norm": 0.5171977281570435,
"learning_rate": 8.827196838924867e-05,
"loss": 0.9211,
"step": 48600
},
{
"epoch": 1.756440281030445,
"grad_norm": 0.6493880152702332,
"learning_rate": 8.764770572658655e-05,
"loss": 0.9056,
"step": 48750
},
{
"epoch": 1.7618447126643848,
"grad_norm": 0.8104442954063416,
"learning_rate": 8.70239315185938e-05,
"loss": 0.9045,
"step": 48900
},
{
"epoch": 1.7672491442983245,
"grad_norm": 0.5967045426368713,
"learning_rate": 8.64006704315727e-05,
"loss": 0.9164,
"step": 49050
},
{
"epoch": 1.7726535759322646,
"grad_norm": 0.6888705492019653,
"learning_rate": 8.577794711153479e-05,
"loss": 0.9111,
"step": 49200
},
{
"epoch": 1.7780580075662042,
"grad_norm": 0.5948097705841064,
"learning_rate": 8.515578618322648e-05,
"loss": 0.9095,
"step": 49350
},
{
"epoch": 1.7834624392001441,
"grad_norm": 0.6458430886268616,
"learning_rate": 8.453421224915511e-05,
"loss": 0.9029,
"step": 49500
},
{
"epoch": 1.788866870834084,
"grad_norm": 0.8202154040336609,
"learning_rate": 8.391324988861611e-05,
"loss": 0.9168,
"step": 49650
},
{
"epoch": 1.7942713024680237,
"grad_norm": 0.5799959897994995,
"learning_rate": 8.32929236567211e-05,
"loss": 0.9005,
"step": 49800
},
{
"epoch": 1.7996757341019636,
"grad_norm": 0.7229143381118774,
"learning_rate": 8.267325808342685e-05,
"loss": 0.897,
"step": 49950
},
{
"epoch": 1.8050801657359035,
"grad_norm": 0.5912762880325317,
"learning_rate": 8.205427767256524e-05,
"loss": 0.9015,
"step": 50100
},
{
"epoch": 1.8104845973698431,
"grad_norm": 0.6438339352607727,
"learning_rate": 8.143600690087443e-05,
"loss": 0.9137,
"step": 50250
},
{
"epoch": 1.8158890290037832,
"grad_norm": 0.5374941229820251,
"learning_rate": 8.08184702170308e-05,
"loss": 0.9008,
"step": 50400
},
{
"epoch": 1.821293460637723,
"grad_norm": 0.5253046751022339,
"learning_rate": 8.020169204068219e-05,
"loss": 0.9015,
"step": 50550
},
{
"epoch": 1.8266978922716628,
"grad_norm": 0.6589975357055664,
"learning_rate": 7.958569676148234e-05,
"loss": 0.9117,
"step": 50700
},
{
"epoch": 1.8321023239056027,
"grad_norm": 0.5939854979515076,
"learning_rate": 7.897050873812647e-05,
"loss": 0.9024,
"step": 50850
},
{
"epoch": 1.8375067555395423,
"grad_norm": 0.6179183721542358,
"learning_rate": 7.835615229738775e-05,
"loss": 0.9111,
"step": 51000
},
{
"epoch": 1.8429111871734822,
"grad_norm": 0.6526548266410828,
"learning_rate": 7.774265173315581e-05,
"loss": 0.9002,
"step": 51150
},
{
"epoch": 1.8483156188074221,
"grad_norm": 0.5846490263938904,
"learning_rate": 7.713003130547556e-05,
"loss": 0.8889,
"step": 51300
},
{
"epoch": 1.8537200504413618,
"grad_norm": 0.5639694333076477,
"learning_rate": 7.651831523958827e-05,
"loss": 0.896,
"step": 51450
},
{
"epoch": 1.859124482075302,
"grad_norm": 0.5969030857086182,
"learning_rate": 7.590752772497345e-05,
"loss": 0.8899,
"step": 51600
},
{
"epoch": 1.8645289137092416,
"grad_norm": 0.57610023021698,
"learning_rate": 7.529769291439216e-05,
"loss": 0.8908,
"step": 51750
},
{
"epoch": 1.8699333453431815,
"grad_norm": 0.7263045907020569,
"learning_rate": 7.468883492293228e-05,
"loss": 0.8956,
"step": 51900
},
{
"epoch": 1.8753377769771213,
"grad_norm": 0.5964723825454712,
"learning_rate": 7.40809778270546e-05,
"loss": 0.8944,
"step": 52050
},
{
"epoch": 1.880742208611061,
"grad_norm": 0.6026207804679871,
"learning_rate": 7.347414566364085e-05,
"loss": 0.8892,
"step": 52200
},
{
"epoch": 1.886146640245001,
"grad_norm": 0.6354103684425354,
"learning_rate": 7.28683624290432e-05,
"loss": 0.8972,
"step": 52350
},
{
"epoch": 1.8915510718789408,
"grad_norm": 0.6123978495597839,
"learning_rate": 7.226365207813542e-05,
"loss": 0.8951,
"step": 52500
},
{
"epoch": 1.8969555035128804,
"grad_norm": 0.7344669699668884,
"learning_rate": 7.166003852336548e-05,
"loss": 0.8825,
"step": 52650
},
{
"epoch": 1.9023599351468206,
"grad_norm": 0.5727975368499756,
"learning_rate": 7.105754563381006e-05,
"loss": 0.8815,
"step": 52800
},
{
"epoch": 1.9077643667807602,
"grad_norm": 0.5696874856948853,
"learning_rate": 7.045619723423072e-05,
"loss": 0.8868,
"step": 52950
},
{
"epoch": 1.9131687984147,
"grad_norm": 0.6967275142669678,
"learning_rate": 6.985601710413158e-05,
"loss": 0.8845,
"step": 53100
},
{
"epoch": 1.91857323004864,
"grad_norm": 0.64991295337677,
"learning_rate": 6.92570289768193e-05,
"loss": 0.8824,
"step": 53250
},
{
"epoch": 1.9239776616825797,
"grad_norm": 0.6261005997657776,
"learning_rate": 6.865925653846432e-05,
"loss": 0.881,
"step": 53400
},
{
"epoch": 1.9293820933165196,
"grad_norm": 0.6127173900604248,
"learning_rate": 6.806272342716431e-05,
"loss": 0.8878,
"step": 53550
},
{
"epoch": 1.9347865249504594,
"grad_norm": 0.552493691444397,
"learning_rate": 6.746745323200943e-05,
"loss": 0.888,
"step": 53700
},
{
"epoch": 1.940190956584399,
"grad_norm": 0.641351580619812,
"learning_rate": 6.687346949214966e-05,
"loss": 0.8834,
"step": 53850
},
{
"epoch": 1.945595388218339,
"grad_norm": 0.5708601474761963,
"learning_rate": 6.628079569586365e-05,
"loss": 0.8901,
"step": 54000
},
{
"epoch": 1.9509998198522789,
"grad_norm": 0.5919014811515808,
"learning_rate": 6.56894552796303e-05,
"loss": 0.8833,
"step": 54150
},
{
"epoch": 1.9564042514862185,
"grad_norm": 0.5352922677993774,
"learning_rate": 6.509947162720172e-05,
"loss": 0.8762,
"step": 54300
},
{
"epoch": 1.9618086831201587,
"grad_norm": 0.5126431584358215,
"learning_rate": 6.451086806867864e-05,
"loss": 0.8719,
"step": 54450
},
{
"epoch": 1.9672131147540983,
"grad_norm": 0.6120204329490662,
"learning_rate": 6.392366787958786e-05,
"loss": 0.882,
"step": 54600
},
{
"epoch": 1.9726175463880382,
"grad_norm": 0.641154408454895,
"learning_rate": 6.333789427996191e-05,
"loss": 0.8743,
"step": 54750
},
{
"epoch": 1.978021978021978,
"grad_norm": 0.648558497428894,
"learning_rate": 6.275357043342069e-05,
"loss": 0.8645,
"step": 54900
},
{
"epoch": 1.9834264096559178,
"grad_norm": 0.6066434979438782,
"learning_rate": 6.217071944625562e-05,
"loss": 0.8622,
"step": 55050
},
{
"epoch": 1.9888308412898577,
"grad_norm": 0.5739848613739014,
"learning_rate": 6.158936436651593e-05,
"loss": 0.8718,
"step": 55200
},
{
"epoch": 1.9942352729237975,
"grad_norm": 0.5929279923439026,
"learning_rate": 6.100952818309715e-05,
"loss": 0.8686,
"step": 55350
},
{
"epoch": 1.9996397045577372,
"grad_norm": 0.5922086238861084,
"learning_rate": 6.043123382483224e-05,
"loss": 0.8753,
"step": 55500
},
{
"epoch": 2.0050441361916773,
"grad_norm": 0.6458303332328796,
"learning_rate": 5.98545041595847e-05,
"loss": 0.791,
"step": 55650
},
{
"epoch": 2.010448567825617,
"grad_norm": 0.5965596437454224,
"learning_rate": 5.927936199334435e-05,
"loss": 0.7904,
"step": 55800
},
{
"epoch": 2.0158529994595566,
"grad_norm": 0.523539125919342,
"learning_rate": 5.8705830069325566e-05,
"loss": 0.7859,
"step": 55950
},
{
"epoch": 2.0212574310934968,
"grad_norm": 0.5941675305366516,
"learning_rate": 5.813393106706795e-05,
"loss": 0.7907,
"step": 56100
},
{
"epoch": 2.0266618627274364,
"grad_norm": 0.5710470080375671,
"learning_rate": 5.7563687601539276e-05,
"loss": 0.787,
"step": 56250
},
{
"epoch": 2.0320662943613765,
"grad_norm": 0.7543295621871948,
"learning_rate": 5.699512222224148e-05,
"loss": 0.7925,
"step": 56400
},
{
"epoch": 2.037470725995316,
"grad_norm": 0.7011525630950928,
"learning_rate": 5.642825741231889e-05,
"loss": 0.7863,
"step": 56550
},
{
"epoch": 2.042875157629256,
"grad_norm": 0.7366952300071716,
"learning_rate": 5.586311558766908e-05,
"loss": 0.7845,
"step": 56700
},
{
"epoch": 2.048279589263196,
"grad_norm": 0.5936063528060913,
"learning_rate": 5.5299719096056444e-05,
"loss": 0.7878,
"step": 56850
},
{
"epoch": 2.0536840208971356,
"grad_norm": 0.6049606800079346,
"learning_rate": 5.4738090216228724e-05,
"loss": 0.7856,
"step": 57000
},
{
"epoch": 2.0590884525310753,
"grad_norm": 0.6939170360565186,
"learning_rate": 5.4178251157035675e-05,
"loss": 0.7886,
"step": 57150
},
{
"epoch": 2.0644928841650154,
"grad_norm": 0.5444577932357788,
"learning_rate": 5.3620224056551224e-05,
"loss": 0.7806,
"step": 57300
},
{
"epoch": 2.069897315798955,
"grad_norm": 0.6011742949485779,
"learning_rate": 5.30640309811977e-05,
"loss": 0.7852,
"step": 57450
},
{
"epoch": 2.0753017474328948,
"grad_norm": 0.6152522563934326,
"learning_rate": 5.250969392487343e-05,
"loss": 0.7777,
"step": 57600
},
{
"epoch": 2.080706179066835,
"grad_norm": 0.4750346839427948,
"learning_rate": 5.195723480808309e-05,
"loss": 0.7735,
"step": 57750
},
{
"epoch": 2.0861106107007745,
"grad_norm": 0.5713702440261841,
"learning_rate": 5.140667547707064e-05,
"loss": 0.7874,
"step": 57900
},
{
"epoch": 2.0915150423347146,
"grad_norm": 0.5541932582855225,
"learning_rate": 5.085803770295579e-05,
"loss": 0.789,
"step": 58050
},
{
"epoch": 2.0969194739686543,
"grad_norm": 0.571283221244812,
"learning_rate": 5.03113431808727e-05,
"loss": 0.789,
"step": 58200
},
{
"epoch": 2.102323905602594,
"grad_norm": 0.6038793325424194,
"learning_rate": 4.976661352911237e-05,
"loss": 0.7887,
"step": 58350
},
{
"epoch": 2.107728337236534,
"grad_norm": 0.6276759505271912,
"learning_rate": 4.922387028826768e-05,
"loss": 0.7858,
"step": 58500
},
{
"epoch": 2.1131327688704737,
"grad_norm": 0.6171843409538269,
"learning_rate": 4.8683134920381665e-05,
"loss": 0.7813,
"step": 58650
},
{
"epoch": 2.1185372005044134,
"grad_norm": 0.6076928973197937,
"learning_rate": 4.814442880809853e-05,
"loss": 0.7871,
"step": 58800
},
{
"epoch": 2.1239416321383535,
"grad_norm": 0.6066181063652039,
"learning_rate": 4.760777325381852e-05,
"loss": 0.7793,
"step": 58950
},
{
"epoch": 2.129346063772293,
"grad_norm": 0.6619130373001099,
"learning_rate": 4.707318947885537e-05,
"loss": 0.7842,
"step": 59100
},
{
"epoch": 2.1347504954062333,
"grad_norm": 0.6103502511978149,
"learning_rate": 4.6540698622597e-05,
"loss": 0.7858,
"step": 59250
},
{
"epoch": 2.140154927040173,
"grad_norm": 0.6459470391273499,
"learning_rate": 4.6010321741669726e-05,
"loss": 0.7817,
"step": 59400
},
{
"epoch": 2.1455593586741126,
"grad_norm": 0.643363356590271,
"learning_rate": 4.5482079809105704e-05,
"loss": 0.7743,
"step": 59550
},
{
"epoch": 2.1509637903080527,
"grad_norm": 0.518678605556488,
"learning_rate": 4.495599371351331e-05,
"loss": 0.7826,
"step": 59700
},
{
"epoch": 2.1563682219419924,
"grad_norm": 0.5462015867233276,
"learning_rate": 4.4432084258251415e-05,
"loss": 0.7729,
"step": 59850
},
{
"epoch": 2.161772653575932,
"grad_norm": 0.5519649982452393,
"learning_rate": 4.39103721606065e-05,
"loss": 0.7765,
"step": 60000
},
{
"epoch": 2.167177085209872,
"grad_norm": 0.672087550163269,
"learning_rate": 4.3390878050973573e-05,
"loss": 0.7808,
"step": 60150
},
{
"epoch": 2.172581516843812,
"grad_norm": 0.5825379490852356,
"learning_rate": 4.287362247204033e-05,
"loss": 0.7711,
"step": 60300
},
{
"epoch": 2.177985948477752,
"grad_norm": 0.6448932886123657,
"learning_rate": 4.2358625877974864e-05,
"loss": 0.7767,
"step": 60450
},
{
"epoch": 2.1833903801116916,
"grad_norm": 0.60658860206604,
"learning_rate": 4.1845908633616695e-05,
"loss": 0.772,
"step": 60600
},
{
"epoch": 2.1887948117456313,
"grad_norm": 0.6476044058799744,
"learning_rate": 4.1335491013671565e-05,
"loss": 0.7784,
"step": 60750
},
{
"epoch": 2.1941992433795714,
"grad_norm": 0.7101139426231384,
"learning_rate": 4.0827393201909794e-05,
"loss": 0.7727,
"step": 60900
},
{
"epoch": 2.199603675013511,
"grad_norm": 0.7003293633460999,
"learning_rate": 4.032163529036792e-05,
"loss": 0.7806,
"step": 61050
},
{
"epoch": 2.2050081066474507,
"grad_norm": 0.5855246782302856,
"learning_rate": 3.981823727855444e-05,
"loss": 0.7814,
"step": 61200
},
{
"epoch": 2.210412538281391,
"grad_norm": 0.5075130462646484,
"learning_rate": 3.9317219072658726e-05,
"loss": 0.7689,
"step": 61350
},
{
"epoch": 2.2158169699153305,
"grad_norm": 0.5855611562728882,
"learning_rate": 3.881860048476396e-05,
"loss": 0.7777,
"step": 61500
},
{
"epoch": 2.22122140154927,
"grad_norm": 0.5581937432289124,
"learning_rate": 3.8322401232063765e-05,
"loss": 0.7845,
"step": 61650
},
{
"epoch": 2.2266258331832103,
"grad_norm": 0.5910426378250122,
"learning_rate": 3.782864093608245e-05,
"loss": 0.7792,
"step": 61800
},
{
"epoch": 2.23203026481715,
"grad_norm": 0.5566779971122742,
"learning_rate": 3.733733912189903e-05,
"loss": 0.7711,
"step": 61950
},
{
"epoch": 2.23743469645109,
"grad_norm": 0.5984916090965271,
"learning_rate": 3.68485152173752e-05,
"loss": 0.7675,
"step": 62100
},
{
"epoch": 2.2428391280850297,
"grad_norm": 0.5687974095344543,
"learning_rate": 3.6362188552387186e-05,
"loss": 0.7752,
"step": 62250
},
{
"epoch": 2.2482435597189694,
"grad_norm": 0.5997481942176819,
"learning_rate": 3.587837835806116e-05,
"loss": 0.7762,
"step": 62400
},
{
"epoch": 2.2536479913529095,
"grad_norm": 0.6333452463150024,
"learning_rate": 3.539710376601299e-05,
"loss": 0.776,
"step": 62550
},
{
"epoch": 2.259052422986849,
"grad_norm": 0.49814724922180176,
"learning_rate": 3.4918383807591516e-05,
"loss": 0.7704,
"step": 62700
},
{
"epoch": 2.2644568546207893,
"grad_norm": 0.6359221935272217,
"learning_rate": 3.444223741312608e-05,
"loss": 0.7749,
"step": 62850
},
{
"epoch": 2.269861286254729,
"grad_norm": 0.5802394151687622,
"learning_rate": 3.396868341117798e-05,
"loss": 0.7755,
"step": 63000
},
{
"epoch": 2.2752657178886686,
"grad_norm": 0.6383761763572693,
"learning_rate": 3.3497740527795905e-05,
"loss": 0.775,
"step": 63150
},
{
"epoch": 2.2806701495226087,
"grad_norm": 0.5394207835197449,
"learning_rate": 3.3029427385775335e-05,
"loss": 0.7755,
"step": 63300
},
{
"epoch": 2.2860745811565484,
"grad_norm": 0.5275822877883911,
"learning_rate": 3.25637625039222e-05,
"loss": 0.7728,
"step": 63450
},
{
"epoch": 2.291479012790488,
"grad_norm": 0.5123447775840759,
"learning_rate": 3.21007642963207e-05,
"loss": 0.7721,
"step": 63600
},
{
"epoch": 2.296883444424428,
"grad_norm": 0.586459755897522,
"learning_rate": 3.164045107160487e-05,
"loss": 0.7708,
"step": 63750
},
{
"epoch": 2.302287876058368,
"grad_norm": 0.6412725448608398,
"learning_rate": 3.1182841032234924e-05,
"loss": 0.7695,
"step": 63900
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.5762320160865784,
"learning_rate": 3.072795227377716e-05,
"loss": 0.7602,
"step": 64050
},
{
"epoch": 2.3130967393262476,
"grad_norm": 0.5541566014289856,
"learning_rate": 3.027580278418852e-05,
"loss": 0.7649,
"step": 64200
},
{
"epoch": 2.3185011709601873,
"grad_norm": 0.5710071921348572,
"learning_rate": 2.9826410443105422e-05,
"loss": 0.7643,
"step": 64350
},
{
"epoch": 2.3239056025941274,
"grad_norm": 0.6665874719619751,
"learning_rate": 2.9379793021136427e-05,
"loss": 0.7619,
"step": 64500
},
{
"epoch": 2.329310034228067,
"grad_norm": 0.5459585189819336,
"learning_rate": 2.8935968179159843e-05,
"loss": 0.7503,
"step": 64650
},
{
"epoch": 2.3347144658620067,
"grad_norm": 0.6013796925544739,
"learning_rate": 2.8494953467625107e-05,
"loss": 0.7616,
"step": 64800
},
{
"epoch": 2.340118897495947,
"grad_norm": 0.6519309282302856,
"learning_rate": 2.8056766325858863e-05,
"loss": 0.7582,
"step": 64950
},
{
"epoch": 2.3455233291298865,
"grad_norm": 0.6198135614395142,
"learning_rate": 2.7621424081375423e-05,
"loss": 0.7538,
"step": 65100
},
{
"epoch": 2.350927760763826,
"grad_norm": 0.580227792263031,
"learning_rate": 2.718894394919155e-05,
"loss": 0.7604,
"step": 65250
},
{
"epoch": 2.3563321923977663,
"grad_norm": 0.5496440529823303,
"learning_rate": 2.6759343031145467e-05,
"loss": 0.7629,
"step": 65400
},
{
"epoch": 2.361736624031706,
"grad_norm": 0.6118148565292358,
"learning_rate": 2.633263831522098e-05,
"loss": 0.7543,
"step": 65550
},
{
"epoch": 2.3671410556656456,
"grad_norm": 0.5903668403625488,
"learning_rate": 2.5908846674875497e-05,
"loss": 0.7626,
"step": 65700
},
{
"epoch": 2.3725454872995857,
"grad_norm": 0.5964175462722778,
"learning_rate": 2.548798486837276e-05,
"loss": 0.7584,
"step": 65850
},
{
"epoch": 2.3779499189335254,
"grad_norm": 0.6447151899337769,
"learning_rate": 2.5070069538120212e-05,
"loss": 0.7659,
"step": 66000
},
{
"epoch": 2.3833543505674655,
"grad_norm": 0.5526403188705444,
"learning_rate": 2.465511721001098e-05,
"loss": 0.7528,
"step": 66150
},
{
"epoch": 2.388758782201405,
"grad_norm": 0.6118183732032776,
"learning_rate": 2.4243144292770215e-05,
"loss": 0.7447,
"step": 66300
},
{
"epoch": 2.394163213835345,
"grad_norm": 0.5308869481086731,
"learning_rate": 2.383416707730637e-05,
"loss": 0.7593,
"step": 66450
},
{
"epoch": 2.399567645469285,
"grad_norm": 0.6109766364097595,
"learning_rate": 2.3428201736067003e-05,
"loss": 0.761,
"step": 66600
},
{
"epoch": 2.4049720771032246,
"grad_norm": 0.6102012991905212,
"learning_rate": 2.302526432239902e-05,
"loss": 0.7533,
"step": 66750
},
{
"epoch": 2.4103765087371647,
"grad_norm": 0.5869913697242737,
"learning_rate": 2.2625370769914233e-05,
"loss": 0.7514,
"step": 66900
},
{
"epoch": 2.4157809403711044,
"grad_norm": 0.5591433644294739,
"learning_rate": 2.2228536891859063e-05,
"loss": 0.7608,
"step": 67050
},
{
"epoch": 2.421185372005044,
"grad_norm": 0.48755505681037903,
"learning_rate": 2.183477838048923e-05,
"loss": 0.7581,
"step": 67200
},
{
"epoch": 2.426589803638984,
"grad_norm": 0.5120564103126526,
"learning_rate": 2.144411080644925e-05,
"loss": 0.7609,
"step": 67350
},
{
"epoch": 2.431994235272924,
"grad_norm": 0.5482677221298218,
"learning_rate": 2.1056549618156796e-05,
"loss": 0.7618,
"step": 67500
},
{
"epoch": 2.4373986669068635,
"grad_norm": 0.6918262243270874,
"learning_rate": 2.067211014119168e-05,
"loss": 0.757,
"step": 67650
},
{
"epoch": 2.4428030985408036,
"grad_norm": 0.455586701631546,
"learning_rate": 2.029080757768994e-05,
"loss": 0.7446,
"step": 67800
},
{
"epoch": 2.4482075301747432,
"grad_norm": 0.5845438838005066,
"learning_rate": 1.9912657005742608e-05,
"loss": 0.7558,
"step": 67950
},
{
"epoch": 2.453611961808683,
"grad_norm": 0.6255479454994202,
"learning_rate": 1.953767337879947e-05,
"loss": 0.7426,
"step": 68100
},
{
"epoch": 2.459016393442623,
"grad_norm": 0.5470909476280212,
"learning_rate": 1.9165871525077828e-05,
"loss": 0.7597,
"step": 68250
},
{
"epoch": 2.4644208250765627,
"grad_norm": 0.5875541567802429,
"learning_rate": 1.879726614697612e-05,
"loss": 0.7491,
"step": 68400
},
{
"epoch": 2.469825256710503,
"grad_norm": 0.6186181306838989,
"learning_rate": 1.843187182049244e-05,
"loss": 0.7556,
"step": 68550
},
{
"epoch": 2.4752296883444425,
"grad_norm": 0.6414260268211365,
"learning_rate": 1.8069702994648208e-05,
"loss": 0.7534,
"step": 68700
},
{
"epoch": 2.480634119978382,
"grad_norm": 0.5647196173667908,
"learning_rate": 1.7710773990916885e-05,
"loss": 0.7467,
"step": 68850
},
{
"epoch": 2.4860385516123222,
"grad_norm": 0.5534460544586182,
"learning_rate": 1.7355099002657495e-05,
"loss": 0.7591,
"step": 69000
},
{
"epoch": 2.491442983246262,
"grad_norm": 0.5535364151000977,
"learning_rate": 1.7002692094553506e-05,
"loss": 0.7497,
"step": 69150
},
{
"epoch": 2.496847414880202,
"grad_norm": 0.5928584337234497,
"learning_rate": 1.6653567202056585e-05,
"loss": 0.7496,
"step": 69300
},
{
"epoch": 2.5022518465141417,
"grad_norm": 0.5369604825973511,
"learning_rate": 1.6307738130835515e-05,
"loss": 0.761,
"step": 69450
},
{
"epoch": 2.5076562781480813,
"grad_norm": 0.6959002614021301,
"learning_rate": 1.5965218556230375e-05,
"loss": 0.7461,
"step": 69600
},
{
"epoch": 2.513060709782021,
"grad_norm": 0.6277987360954285,
"learning_rate": 1.5626022022711694e-05,
"loss": 0.7467,
"step": 69750
},
{
"epoch": 2.518465141415961,
"grad_norm": 0.6087015867233276,
"learning_rate": 1.529016194334484e-05,
"loss": 0.7556,
"step": 69900
},
{
"epoch": 2.523869573049901,
"grad_norm": 0.5043054819107056,
"learning_rate": 1.4957651599259615e-05,
"loss": 0.7397,
"step": 70050
},
{
"epoch": 2.529274004683841,
"grad_norm": 0.6836428642272949,
"learning_rate": 1.4628504139125177e-05,
"loss": 0.741,
"step": 70200
},
{
"epoch": 2.5346784363177806,
"grad_norm": 0.5704199075698853,
"learning_rate": 1.4302732578629918e-05,
"loss": 0.7513,
"step": 70350
},
{
"epoch": 2.5400828679517202,
"grad_norm": 0.5928525328636169,
"learning_rate": 1.3980349799966985e-05,
"loss": 0.7485,
"step": 70500
},
{
"epoch": 2.5454872995856603,
"grad_norm": 0.6592413783073425,
"learning_rate": 1.3661368551324648e-05,
"loss": 0.7452,
"step": 70650
},
{
"epoch": 2.5508917312196,
"grad_norm": 0.5700178146362305,
"learning_rate": 1.3345801446382344e-05,
"loss": 0.7496,
"step": 70800
},
{
"epoch": 2.55629616285354,
"grad_norm": 0.5675559043884277,
"learning_rate": 1.3033660963811878e-05,
"loss": 0.7488,
"step": 70950
},
{
"epoch": 2.5617005944874798,
"grad_norm": 0.5796085596084595,
"learning_rate": 1.2724959446783868e-05,
"loss": 0.7454,
"step": 71100
},
{
"epoch": 2.5671050261214194,
"grad_norm": 0.6384360194206238,
"learning_rate": 1.2419709102479804e-05,
"loss": 0.7387,
"step": 71250
},
{
"epoch": 2.5725094577553596,
"grad_norm": 0.5239229798316956,
"learning_rate": 1.2117922001609173e-05,
"loss": 0.7371,
"step": 71400
},
{
"epoch": 2.577913889389299,
"grad_norm": 0.5770368576049805,
"learning_rate": 1.181961007793222e-05,
"loss": 0.7451,
"step": 71550
},
{
"epoch": 2.5833183210232393,
"grad_norm": 0.5493025779724121,
"learning_rate": 1.1524785127788074e-05,
"loss": 0.7396,
"step": 71700
},
{
"epoch": 2.588722752657179,
"grad_norm": 0.5658043622970581,
"learning_rate": 1.123345880962826e-05,
"loss": 0.7448,
"step": 71850
},
{
"epoch": 2.5941271842911187,
"grad_norm": 0.5434427857398987,
"learning_rate": 1.0945642643555542e-05,
"loss": 0.7471,
"step": 72000
},
{
"epoch": 2.5995316159250583,
"grad_norm": 0.5109556913375854,
"learning_rate": 1.066134801086862e-05,
"loss": 0.7434,
"step": 72150
},
{
"epoch": 2.6049360475589984,
"grad_norm": 0.5859112739562988,
"learning_rate": 1.0380586153611926e-05,
"loss": 0.7391,
"step": 72300
},
{
"epoch": 2.610340479192938,
"grad_norm": 0.5381293296813965,
"learning_rate": 1.0103368174131044e-05,
"loss": 0.7402,
"step": 72450
},
{
"epoch": 2.615744910826878,
"grad_norm": 0.5799181461334229,
"learning_rate": 9.829705034633763e-06,
"loss": 0.746,
"step": 72600
},
{
"epoch": 2.621149342460818,
"grad_norm": 0.5245427489280701,
"learning_rate": 9.559607556756589e-06,
"loss": 0.7374,
"step": 72750
},
{
"epoch": 2.6265537740947575,
"grad_norm": 0.5755253434181213,
"learning_rate": 9.29308642113672e-06,
"loss": 0.7335,
"step": 72900
},
{
"epoch": 2.6319582057286977,
"grad_norm": 0.5702092051506042,
"learning_rate": 9.030152166989848e-06,
"loss": 0.7441,
"step": 73050
},
{
"epoch": 2.6373626373626373,
"grad_norm": 0.5722294449806213,
"learning_rate": 8.770815191693294e-06,
"loss": 0.745,
"step": 73200
},
{
"epoch": 2.6427670689965774,
"grad_norm": 0.5095585584640503,
"learning_rate": 8.515085750374819e-06,
"loss": 0.7399,
"step": 73350
},
{
"epoch": 2.648171500630517,
"grad_norm": 0.7061243057250977,
"learning_rate": 8.262973955507213e-06,
"loss": 0.7317,
"step": 73500
},
{
"epoch": 2.6535759322644568,
"grad_norm": 0.6071792244911194,
"learning_rate": 8.014489776508406e-06,
"loss": 0.7457,
"step": 73650
},
{
"epoch": 2.6589803638983964,
"grad_norm": 0.6209822297096252,
"learning_rate": 7.769643039347118e-06,
"loss": 0.7304,
"step": 73800
},
{
"epoch": 2.6643847955323365,
"grad_norm": 0.5465585589408875,
"learning_rate": 7.528443426154386e-06,
"loss": 0.7348,
"step": 73950
},
{
"epoch": 2.669789227166276,
"grad_norm": 0.5735740661621094,
"learning_rate": 7.290900474840745e-06,
"loss": 0.7509,
"step": 74100
},
{
"epoch": 2.6751936588002163,
"grad_norm": 0.5864896178245544,
"learning_rate": 7.0570235787189575e-06,
"loss": 0.7422,
"step": 74250
},
{
"epoch": 2.680598090434156,
"grad_norm": 0.5019831657409668,
"learning_rate": 6.82682198613267e-06,
"loss": 0.74,
"step": 74400
},
{
"epoch": 2.6860025220680956,
"grad_norm": 0.4947664141654968,
"learning_rate": 6.600304800090629e-06,
"loss": 0.7424,
"step": 74550
},
{
"epoch": 2.6914069537020358,
"grad_norm": 0.5284778475761414,
"learning_rate": 6.3774809779066914e-06,
"loss": 0.741,
"step": 74700
},
{
"epoch": 2.6968113853359754,
"grad_norm": 0.5382539629936218,
"learning_rate": 6.158359330845742e-06,
"loss": 0.7384,
"step": 74850
},
{
"epoch": 2.7022158169699155,
"grad_norm": 0.6098785996437073,
"learning_rate": 5.942948523775172e-06,
"loss": 0.732,
"step": 75000
},
{
"epoch": 2.707620248603855,
"grad_norm": 0.5111733675003052,
"learning_rate": 5.731257074822227e-06,
"loss": 0.7401,
"step": 75150
},
{
"epoch": 2.713024680237795,
"grad_norm": 0.563735842704773,
"learning_rate": 5.523293355037174e-06,
"loss": 0.7373,
"step": 75300
},
{
"epoch": 2.718429111871735,
"grad_norm": 0.48581522703170776,
"learning_rate": 5.319065588062389e-06,
"loss": 0.7355,
"step": 75450
},
{
"epoch": 2.7238335435056746,
"grad_norm": 0.6022956371307373,
"learning_rate": 5.118581849806991e-06,
"loss": 0.752,
"step": 75600
},
{
"epoch": 2.7292379751396147,
"grad_norm": 0.5350160002708435,
"learning_rate": 4.92185006812762e-06,
"loss": 0.7302,
"step": 75750
},
{
"epoch": 2.7346424067735544,
"grad_norm": 0.5559709668159485,
"learning_rate": 4.728878022514904e-06,
"loss": 0.7258,
"step": 75900
},
{
"epoch": 2.740046838407494,
"grad_norm": 0.5401473045349121,
"learning_rate": 4.5396733437857885e-06,
"loss": 0.7485,
"step": 76050
},
{
"epoch": 2.7454512700414337,
"grad_norm": 0.5016641020774841,
"learning_rate": 4.354243513781841e-06,
"loss": 0.7257,
"step": 76200
},
{
"epoch": 2.750855701675374,
"grad_norm": 0.5274752974510193,
"learning_rate": 4.172595865073414e-06,
"loss": 0.7307,
"step": 76350
},
{
"epoch": 2.7562601333093135,
"grad_norm": 0.5795451402664185,
"learning_rate": 3.994737580669572e-06,
"loss": 0.7431,
"step": 76500
},
{
"epoch": 2.7616645649432536,
"grad_norm": 0.584701418876648,
"learning_rate": 3.820675693734166e-06,
"loss": 0.7333,
"step": 76650
},
{
"epoch": 2.7670689965771933,
"grad_norm": 0.5679466724395752,
"learning_rate": 3.6504170873076894e-06,
"loss": 0.7457,
"step": 76800
},
{
"epoch": 2.772473428211133,
"grad_norm": 0.5592213869094849,
"learning_rate": 3.483968494035039e-06,
"loss": 0.7438,
"step": 76950
},
{
"epoch": 2.777877859845073,
"grad_norm": 0.6507932543754578,
"learning_rate": 3.3213364958993633e-06,
"loss": 0.7332,
"step": 77100
},
{
"epoch": 2.7832822914790127,
"grad_norm": 0.5836296081542969,
"learning_rate": 3.1625275239617447e-06,
"loss": 0.7341,
"step": 77250
},
{
"epoch": 2.788686723112953,
"grad_norm": 0.6291818618774414,
"learning_rate": 3.0075478581068517e-06,
"loss": 0.7391,
"step": 77400
},
{
"epoch": 2.7940911547468925,
"grad_norm": 0.59623783826828,
"learning_rate": 2.8564036267947347e-06,
"loss": 0.7281,
"step": 77550
},
{
"epoch": 2.799495586380832,
"grad_norm": 0.5835798978805542,
"learning_rate": 2.7091008068183323e-06,
"loss": 0.7385,
"step": 77700
},
{
"epoch": 2.804900018014772,
"grad_norm": 0.5502892732620239,
"learning_rate": 2.565645223067237e-06,
"loss": 0.7441,
"step": 77850
},
{
"epoch": 2.810304449648712,
"grad_norm": 0.5453166365623474,
"learning_rate": 2.4260425482973025e-06,
"loss": 0.7338,
"step": 78000
},
{
"epoch": 2.8157088812826516,
"grad_norm": 0.5541927814483643,
"learning_rate": 2.2902983029063463e-06,
"loss": 0.7325,
"step": 78150
},
{
"epoch": 2.8211133129165917,
"grad_norm": 0.5624451041221619,
"learning_rate": 2.158417854715844e-06,
"loss": 0.7311,
"step": 78300
},
{
"epoch": 2.8265177445505314,
"grad_norm": 0.6407118439674377,
"learning_rate": 2.0304064187587012e-06,
"loss": 0.7343,
"step": 78450
},
{
"epoch": 2.831922176184471,
"grad_norm": 0.6349582076072693,
"learning_rate": 1.906269057072918e-06,
"loss": 0.7289,
"step": 78600
},
{
"epoch": 2.837326607818411,
"grad_norm": 0.511360764503479,
"learning_rate": 1.7860106785015707e-06,
"loss": 0.7362,
"step": 78750
},
{
"epoch": 2.842731039452351,
"grad_norm": 0.6116952300071716,
"learning_rate": 1.669636038498612e-06,
"loss": 0.7357,
"step": 78900
},
{
"epoch": 2.848135471086291,
"grad_norm": 0.5288776159286499,
"learning_rate": 1.5571497389408218e-06,
"loss": 0.7377,
"step": 79050
},
{
"epoch": 2.8535399027202306,
"grad_norm": 0.5661271810531616,
"learning_rate": 1.4485562279458742e-06,
"loss": 0.7335,
"step": 79200
},
{
"epoch": 2.8589443343541703,
"grad_norm": 0.46028730273246765,
"learning_rate": 1.3438597996963675e-06,
"loss": 0.7306,
"step": 79350
},
{
"epoch": 2.8643487659881104,
"grad_norm": 0.5887011289596558,
"learning_rate": 1.243064594270127e-06,
"loss": 0.7348,
"step": 79500
},
{
"epoch": 2.86975319762205,
"grad_norm": 0.5686684846878052,
"learning_rate": 1.1461745974763682e-06,
"loss": 0.7305,
"step": 79650
},
{
"epoch": 2.87515762925599,
"grad_norm": 0.5735449194908142,
"learning_rate": 1.0531936406982247e-06,
"loss": 0.726,
"step": 79800
},
{
"epoch": 2.88056206088993,
"grad_norm": 0.6428796648979187,
"learning_rate": 9.64125400741056e-07,
"loss": 0.7288,
"step": 79950
},
{
"epoch": 2.8859664925238695,
"grad_norm": 0.6176515817642212,
"learning_rate": 8.789733996872551e-07,
"loss": 0.7345,
"step": 80100
},
{
"epoch": 2.891370924157809,
"grad_norm": 0.5095422267913818,
"learning_rate": 7.977410047568246e-07,
"loss": 0.7419,
"step": 80250
},
{
"epoch": 2.8967753557917493,
"grad_norm": 0.5800315141677856,
"learning_rate": 7.204314281742952e-07,
"loss": 0.7375,
"step": 80400
},
{
"epoch": 2.902179787425689,
"grad_norm": 0.5727178454399109,
"learning_rate": 6.470477270416719e-07,
"loss": 0.7356,
"step": 80550
},
{
"epoch": 2.907584219059629,
"grad_norm": 0.5594687461853027,
"learning_rate": 5.775928032175637e-07,
"loss": 0.7363,
"step": 80700
},
{
"epoch": 2.9129886506935687,
"grad_norm": 0.6071078777313232,
"learning_rate": 5.120694032024309e-07,
"loss": 0.7491,
"step": 80850
},
{
"epoch": 2.9183930823275084,
"grad_norm": 0.6253530383110046,
"learning_rate": 4.5048011802997226e-07,
"loss": 0.7495,
"step": 81000
},
{
"epoch": 2.9237975139614485,
"grad_norm": 0.7043154835700989,
"learning_rate": 3.928273831646512e-07,
"loss": 0.7349,
"step": 81150
},
{
"epoch": 2.929201945595388,
"grad_norm": 0.5901583433151245,
"learning_rate": 3.391134784054284e-07,
"loss": 0.7388,
"step": 81300
},
{
"epoch": 2.9346063772293283,
"grad_norm": 0.5171722173690796,
"learning_rate": 2.8934052779558965e-07,
"loss": 0.7357,
"step": 81450
},
{
"epoch": 2.940010808863268,
"grad_norm": 0.5885277986526489,
"learning_rate": 2.4351049953872386e-07,
"loss": 0.7294,
"step": 81600
},
{
"epoch": 2.9454152404972076,
"grad_norm": 0.5369580388069153,
"learning_rate": 2.0162520592095225e-07,
"loss": 0.724,
"step": 81750
},
{
"epoch": 2.9508196721311473,
"grad_norm": 0.505922794342041,
"learning_rate": 1.6368630323920776e-07,
"loss": 0.7376,
"step": 81900
},
{
"epoch": 2.9562241037650874,
"grad_norm": 0.5709424018859863,
"learning_rate": 1.2969529173577633e-07,
"loss": 0.7273,
"step": 82050
},
{
"epoch": 2.961628535399027,
"grad_norm": 0.5696266293525696,
"learning_rate": 9.965351553895552e-08,
"loss": 0.7358,
"step": 82200
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.6568360924720764,
"learning_rate": 7.356216260990811e-08,
"loss": 0.7337,
"step": 82350
},
{
"epoch": 2.972437398666907,
"grad_norm": 0.6210362911224365,
"learning_rate": 5.142226469568856e-08,
"loss": 0.7301,
"step": 82500
},
{
"epoch": 2.9778418303008465,
"grad_norm": 0.5563607811927795,
"learning_rate": 3.32346972884312e-08,
"loss": 0.7311,
"step": 82650
},
{
"epoch": 2.9832462619347866,
"grad_norm": 0.6156190633773804,
"learning_rate": 1.9000179590733525e-08,
"loss": 0.7248,
"step": 82800
},
{
"epoch": 2.9886506935687263,
"grad_norm": 0.6303669810295105,
"learning_rate": 8.719274487245522e-09,
"loss": 0.7412,
"step": 82950
},
{
"epoch": 2.9940551252026664,
"grad_norm": 0.4844772517681122,
"learning_rate": 2.392388522343136e-09,
"loss": 0.7329,
"step": 83100
},
{
"epoch": 2.999459556836606,
"grad_norm": 0.5367130041122437,
"learning_rate": 1.977188415214215e-11,
"loss": 0.7302,
"step": 83250
}
],
"logging_steps": 150,
"max_steps": 83265,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.661509740266363e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}