|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 83265, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005404431633939831, |
|
"grad_norm": 0.9054504632949829, |
|
"learning_rate": 3.602738080941516e-06, |
|
"loss": 1.2514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010808863267879661, |
|
"grad_norm": 0.6858287453651428, |
|
"learning_rate": 7.205476161883032e-06, |
|
"loss": 1.1276, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.016213294901819494, |
|
"grad_norm": 0.6737526655197144, |
|
"learning_rate": 1.0808214242824548e-05, |
|
"loss": 1.0866, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.021617726535759323, |
|
"grad_norm": 0.6372509002685547, |
|
"learning_rate": 1.4410952323766064e-05, |
|
"loss": 1.0705, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.027022158169699155, |
|
"grad_norm": 0.5196628570556641, |
|
"learning_rate": 1.8013690404707578e-05, |
|
"loss": 1.0525, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.03242658980363899, |
|
"grad_norm": 0.4922332167625427, |
|
"learning_rate": 2.1616428485649097e-05, |
|
"loss": 1.0418, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03783102143757881, |
|
"grad_norm": 0.5018568634986877, |
|
"learning_rate": 2.521916656659061e-05, |
|
"loss": 1.0359, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.043235453071518645, |
|
"grad_norm": 0.4413062334060669, |
|
"learning_rate": 2.8821904647532128e-05, |
|
"loss": 1.037, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04863988470545848, |
|
"grad_norm": 0.3888317048549652, |
|
"learning_rate": 3.242464272847364e-05, |
|
"loss": 1.0232, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.05404431633939831, |
|
"grad_norm": 0.43577057123184204, |
|
"learning_rate": 3.6027380809415156e-05, |
|
"loss": 1.0124, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.059448747973338135, |
|
"grad_norm": 0.41379234194755554, |
|
"learning_rate": 3.963011889035667e-05, |
|
"loss": 1.0225, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.06485317960727797, |
|
"grad_norm": 0.48700177669525146, |
|
"learning_rate": 4.3232856971298193e-05, |
|
"loss": 1.0138, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0702576112412178, |
|
"grad_norm": 0.40877047181129456, |
|
"learning_rate": 4.683559505223971e-05, |
|
"loss": 1.0068, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.07566204287515763, |
|
"grad_norm": 0.37194114923477173, |
|
"learning_rate": 5.043833313318122e-05, |
|
"loss": 1.0007, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.08106647450909746, |
|
"grad_norm": 0.49839073419570923, |
|
"learning_rate": 5.404107121412274e-05, |
|
"loss": 1.0038, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.08647090614303729, |
|
"grad_norm": 0.3880678117275238, |
|
"learning_rate": 5.7643809295064256e-05, |
|
"loss": 0.9996, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.09187533777697712, |
|
"grad_norm": 0.4280707538127899, |
|
"learning_rate": 6.124654737600577e-05, |
|
"loss": 1.0049, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.09727976941091696, |
|
"grad_norm": 0.4451320469379425, |
|
"learning_rate": 6.484928545694728e-05, |
|
"loss": 1.0057, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.10268420104485679, |
|
"grad_norm": 0.38181596994400024, |
|
"learning_rate": 6.84520235378888e-05, |
|
"loss": 1.0019, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.10808863267879662, |
|
"grad_norm": 0.38614770770072937, |
|
"learning_rate": 7.205476161883031e-05, |
|
"loss": 1.0045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11349306431273644, |
|
"grad_norm": 0.3148934543132782, |
|
"learning_rate": 7.565749969977183e-05, |
|
"loss": 1.0041, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.11889749594667627, |
|
"grad_norm": 0.41060400009155273, |
|
"learning_rate": 7.926023778071334e-05, |
|
"loss": 1.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.1243019275806161, |
|
"grad_norm": 0.40537866950035095, |
|
"learning_rate": 8.286297586165485e-05, |
|
"loss": 1.0014, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.12970635921455595, |
|
"grad_norm": 0.3297308683395386, |
|
"learning_rate": 8.646571394259639e-05, |
|
"loss": 1.0055, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.13511079084849575, |
|
"grad_norm": 0.39976179599761963, |
|
"learning_rate": 9.00684520235379e-05, |
|
"loss": 0.9993, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.1405152224824356, |
|
"grad_norm": 0.39322683215141296, |
|
"learning_rate": 9.367119010447942e-05, |
|
"loss": 0.9965, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.14591965411637542, |
|
"grad_norm": 0.45231467485427856, |
|
"learning_rate": 9.727392818542093e-05, |
|
"loss": 0.9978, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.15132408575031525, |
|
"grad_norm": 0.41241922974586487, |
|
"learning_rate": 0.00010087666626636244, |
|
"loss": 1.0051, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.15672851738425508, |
|
"grad_norm": 0.5085678100585938, |
|
"learning_rate": 0.00010447940434730397, |
|
"loss": 0.9971, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.16213294901819492, |
|
"grad_norm": 0.4659586548805237, |
|
"learning_rate": 0.00010808214242824548, |
|
"loss": 1.0083, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.16753738065213475, |
|
"grad_norm": 0.330456018447876, |
|
"learning_rate": 0.00011168488050918699, |
|
"loss": 1.0013, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.17294181228607458, |
|
"grad_norm": 0.4083492159843445, |
|
"learning_rate": 0.00011528761859012851, |
|
"loss": 1.0107, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1783462439200144, |
|
"grad_norm": 0.5598177909851074, |
|
"learning_rate": 0.00011889035667107002, |
|
"loss": 0.9992, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.18375067555395425, |
|
"grad_norm": 0.4554787576198578, |
|
"learning_rate": 0.00012249309475201154, |
|
"loss": 0.9972, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.18915510718789408, |
|
"grad_norm": 0.5599480271339417, |
|
"learning_rate": 0.00012609583283295305, |
|
"loss": 1.0017, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.1945595388218339, |
|
"grad_norm": 0.4103052318096161, |
|
"learning_rate": 0.00012969857091389456, |
|
"loss": 1.0075, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.19996397045577374, |
|
"grad_norm": 0.5033989548683167, |
|
"learning_rate": 0.0001333013089948361, |
|
"loss": 0.9998, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.20536840208971358, |
|
"grad_norm": 0.41184836626052856, |
|
"learning_rate": 0.0001369040470757776, |
|
"loss": 1.0116, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.2107728337236534, |
|
"grad_norm": 0.4604012370109558, |
|
"learning_rate": 0.0001405067851567191, |
|
"loss": 1.0144, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.21617726535759324, |
|
"grad_norm": 0.5769256949424744, |
|
"learning_rate": 0.00014410952323766062, |
|
"loss": 1.0142, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22158169699153304, |
|
"grad_norm": 0.49323058128356934, |
|
"learning_rate": 0.00014771226131860213, |
|
"loss": 1.0224, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.22698612862547288, |
|
"grad_norm": 0.4065729081630707, |
|
"learning_rate": 0.00015131499939954367, |
|
"loss": 1.011, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.2323905602594127, |
|
"grad_norm": 0.4484567642211914, |
|
"learning_rate": 0.00015491773748048518, |
|
"loss": 1.0135, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.23779499189335254, |
|
"grad_norm": 0.5265558958053589, |
|
"learning_rate": 0.00015852047556142668, |
|
"loss": 1.0266, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.24319942352729237, |
|
"grad_norm": 0.43009766936302185, |
|
"learning_rate": 0.0001621232136423682, |
|
"loss": 1.025, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.2486038551612322, |
|
"grad_norm": 0.45328229665756226, |
|
"learning_rate": 0.0001657259517233097, |
|
"loss": 1.0256, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.25400828679517207, |
|
"grad_norm": 0.4880930781364441, |
|
"learning_rate": 0.00016932868980425124, |
|
"loss": 1.0268, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.2594127184291119, |
|
"grad_norm": 0.4783656597137451, |
|
"learning_rate": 0.00017293142788519277, |
|
"loss": 1.0281, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2648171500630517, |
|
"grad_norm": 0.40857091546058655, |
|
"learning_rate": 0.00017653416596613428, |
|
"loss": 1.0436, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.2702215816969915, |
|
"grad_norm": 0.5468364953994751, |
|
"learning_rate": 0.0001801369040470758, |
|
"loss": 1.0431, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.27562601333093134, |
|
"grad_norm": 0.4680778384208679, |
|
"learning_rate": 0.0001837396421280173, |
|
"loss": 1.0449, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.2810304449648712, |
|
"grad_norm": 0.5532673001289368, |
|
"learning_rate": 0.00018734238020895884, |
|
"loss": 1.0453, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.286434876598811, |
|
"grad_norm": 0.5404918789863586, |
|
"learning_rate": 0.00019094511828990034, |
|
"loss": 1.0592, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.29183930823275084, |
|
"grad_norm": 0.5416702628135681, |
|
"learning_rate": 0.00019454785637084185, |
|
"loss": 1.0541, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.29724373986669067, |
|
"grad_norm": 0.5036255121231079, |
|
"learning_rate": 0.00019815059445178336, |
|
"loss": 1.0544, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.3026481715006305, |
|
"grad_norm": 0.564854621887207, |
|
"learning_rate": 0.00019999953171425823, |
|
"loss": 1.0528, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.30805260313457034, |
|
"grad_norm": 0.5236982107162476, |
|
"learning_rate": 0.00019999563009378472, |
|
"loss": 1.0595, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.31345703476851017, |
|
"grad_norm": 0.5642319917678833, |
|
"learning_rate": 0.00019998777428218277, |
|
"loss": 1.0733, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.31886146640245, |
|
"grad_norm": 0.5522397756576538, |
|
"learning_rate": 0.00019997596459009974, |
|
"loss": 1.0685, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.32426589803638983, |
|
"grad_norm": 0.5239744782447815, |
|
"learning_rate": 0.00019996020148453384, |
|
"loss": 1.068, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 0.5960803627967834, |
|
"learning_rate": 0.00019994048558881562, |
|
"loss": 1.0681, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.3350747613042695, |
|
"grad_norm": 0.5771428942680359, |
|
"learning_rate": 0.00019991681768258336, |
|
"loss": 1.0649, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.34047919293820933, |
|
"grad_norm": 0.5502661466598511, |
|
"learning_rate": 0.00019988919870175223, |
|
"loss": 1.0632, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.34588362457214916, |
|
"grad_norm": 0.5481303930282593, |
|
"learning_rate": 0.0001998576297384772, |
|
"loss": 1.0604, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.351288056206089, |
|
"grad_norm": 0.520757257938385, |
|
"learning_rate": 0.00019982211204111, |
|
"loss": 1.0703, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.3566924878400288, |
|
"grad_norm": 0.5234895348548889, |
|
"learning_rate": 0.00019978264701414963, |
|
"loss": 1.0693, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.36209691947396866, |
|
"grad_norm": 0.669703483581543, |
|
"learning_rate": 0.0001997392362181869, |
|
"loss": 1.0706, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.3675013511079085, |
|
"grad_norm": 0.5472550392150879, |
|
"learning_rate": 0.00019969188136984267, |
|
"loss": 1.0743, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.3729057827418483, |
|
"grad_norm": 0.5862524509429932, |
|
"learning_rate": 0.00019964058434169995, |
|
"loss": 1.069, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.37831021437578816, |
|
"grad_norm": 0.5793502330780029, |
|
"learning_rate": 0.0001995853471622299, |
|
"loss": 1.0686, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.383714646009728, |
|
"grad_norm": 0.670881986618042, |
|
"learning_rate": 0.0001995261720157117, |
|
"loss": 1.0749, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.3891190776436678, |
|
"grad_norm": 0.698593258857727, |
|
"learning_rate": 0.00019946306124214594, |
|
"loss": 1.0678, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.39452350927760765, |
|
"grad_norm": 0.5866215229034424, |
|
"learning_rate": 0.00019939601733716232, |
|
"loss": 1.0605, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.3999279409115475, |
|
"grad_norm": 0.5571088790893555, |
|
"learning_rate": 0.0001993250429519208, |
|
"loss": 1.0732, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.4053323725454873, |
|
"grad_norm": 0.6108280420303345, |
|
"learning_rate": 0.0001992501408930069, |
|
"loss": 1.0717, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.41073680417942715, |
|
"grad_norm": 0.5834035873413086, |
|
"learning_rate": 0.00019917131412232057, |
|
"loss": 1.0767, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.416141235813367, |
|
"grad_norm": 0.6449561715126038, |
|
"learning_rate": 0.00019908856575695925, |
|
"loss": 1.0679, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.4215456674473068, |
|
"grad_norm": 0.6005063652992249, |
|
"learning_rate": 0.00019900189906909446, |
|
"loss": 1.0697, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.42695009908124665, |
|
"grad_norm": 0.48533475399017334, |
|
"learning_rate": 0.0001989113174858424, |
|
"loss": 1.0759, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.4323545307151865, |
|
"grad_norm": 0.6543179154396057, |
|
"learning_rate": 0.00019881682458912855, |
|
"loss": 1.068, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.43775896234912626, |
|
"grad_norm": 0.6233469843864441, |
|
"learning_rate": 0.00019871842411554598, |
|
"loss": 1.0665, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.4431633939830661, |
|
"grad_norm": 0.5530846118927002, |
|
"learning_rate": 0.0001986161199562074, |
|
"loss": 1.0759, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.4485678256170059, |
|
"grad_norm": 0.6484875679016113, |
|
"learning_rate": 0.00019850991615659173, |
|
"loss": 1.0799, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.45397225725094575, |
|
"grad_norm": 0.5916330814361572, |
|
"learning_rate": 0.00019839981691638364, |
|
"loss": 1.0732, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.4593766888848856, |
|
"grad_norm": 0.6168014407157898, |
|
"learning_rate": 0.00019828582658930777, |
|
"loss": 1.063, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.4647811205188254, |
|
"grad_norm": 0.7302340269088745, |
|
"learning_rate": 0.00019816794968295648, |
|
"loss": 1.0694, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.47018555215276525, |
|
"grad_norm": 0.7804449200630188, |
|
"learning_rate": 0.00019804619085861172, |
|
"loss": 1.0681, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.4755899837867051, |
|
"grad_norm": 0.690500020980835, |
|
"learning_rate": 0.00019792055493106042, |
|
"loss": 1.0662, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.4809944154206449, |
|
"grad_norm": 0.6514592170715332, |
|
"learning_rate": 0.00019779104686840445, |
|
"loss": 1.0682, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.48639884705458475, |
|
"grad_norm": 0.7182182669639587, |
|
"learning_rate": 0.00019765767179186393, |
|
"loss": 1.0761, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 0.6194586157798767, |
|
"learning_rate": 0.00019752043497557473, |
|
"loss": 1.0637, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.4972077103224644, |
|
"grad_norm": 0.5965324640274048, |
|
"learning_rate": 0.00019737934184638006, |
|
"loss": 1.0658, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5026121419564042, |
|
"grad_norm": 0.6684099435806274, |
|
"learning_rate": 0.0001972343979836157, |
|
"loss": 1.0788, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.5080165735903441, |
|
"grad_norm": 0.6042500734329224, |
|
"learning_rate": 0.00019708560911888947, |
|
"loss": 1.0748, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.5134210052242839, |
|
"grad_norm": 0.6769179701805115, |
|
"learning_rate": 0.0001969329811358546, |
|
"loss": 1.08, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.5188254368582238, |
|
"grad_norm": 0.6137043237686157, |
|
"learning_rate": 0.000196776520069977, |
|
"loss": 1.0752, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5242298684921636, |
|
"grad_norm": 0.5905526280403137, |
|
"learning_rate": 0.00019661623210829657, |
|
"loss": 1.0711, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.5296343001261034, |
|
"grad_norm": 0.5724222660064697, |
|
"learning_rate": 0.00019645212358918273, |
|
"loss": 1.0665, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.5350387317600432, |
|
"grad_norm": 0.6485213041305542, |
|
"learning_rate": 0.00019628420100208354, |
|
"loss": 1.075, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.540443163393983, |
|
"grad_norm": 0.6828542351722717, |
|
"learning_rate": 0.00019611247098726917, |
|
"loss": 1.0742, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5458475950279229, |
|
"grad_norm": 0.7089459300041199, |
|
"learning_rate": 0.00019593694033556944, |
|
"loss": 1.0717, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.5512520266618627, |
|
"grad_norm": 0.6180184483528137, |
|
"learning_rate": 0.00019575761598810508, |
|
"loss": 1.0701, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.5566564582958026, |
|
"grad_norm": 0.6298936605453491, |
|
"learning_rate": 0.00019557450503601345, |
|
"loss": 1.0693, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.5620608899297423, |
|
"grad_norm": 0.7352581024169922, |
|
"learning_rate": 0.00019538761472016796, |
|
"loss": 1.0773, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.5674653215636822, |
|
"grad_norm": 0.5634006857872009, |
|
"learning_rate": 0.00019519695243089188, |
|
"loss": 1.0747, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.572869753197622, |
|
"grad_norm": 0.6061451435089111, |
|
"learning_rate": 0.00019500252570766599, |
|
"loss": 1.0659, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.5782741848315619, |
|
"grad_norm": 0.7047978043556213, |
|
"learning_rate": 0.00019480434223883046, |
|
"loss": 1.0695, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.5836786164655017, |
|
"grad_norm": 0.7310365438461304, |
|
"learning_rate": 0.00019460240986128095, |
|
"loss": 1.074, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.5890830480994416, |
|
"grad_norm": 0.7517262697219849, |
|
"learning_rate": 0.00019439673656015857, |
|
"loss": 1.0675, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.5944874797333813, |
|
"grad_norm": 0.6441323757171631, |
|
"learning_rate": 0.00019418733046853412, |
|
"loss": 1.0832, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.5998919113673212, |
|
"grad_norm": 0.7108227014541626, |
|
"learning_rate": 0.00019397419986708658, |
|
"loss": 1.0702, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.605296343001261, |
|
"grad_norm": 0.7227650284767151, |
|
"learning_rate": 0.00019375735318377557, |
|
"loss": 1.0676, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6107007746352009, |
|
"grad_norm": 0.7566308975219727, |
|
"learning_rate": 0.00019353679899350814, |
|
"loss": 1.076, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.6161052062691407, |
|
"grad_norm": 0.5554959177970886, |
|
"learning_rate": 0.00019331254601779959, |
|
"loss": 1.0758, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.6215096379030806, |
|
"grad_norm": 0.6587594747543335, |
|
"learning_rate": 0.0001930846031244287, |
|
"loss": 1.0671, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.6269140695370203, |
|
"grad_norm": 0.7100338339805603, |
|
"learning_rate": 0.0001928529793270871, |
|
"loss": 1.067, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6323185011709602, |
|
"grad_norm": 0.6286484003067017, |
|
"learning_rate": 0.00019261768378502262, |
|
"loss": 1.0668, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.6377229328049, |
|
"grad_norm": 0.7707709670066833, |
|
"learning_rate": 0.00019237872580267734, |
|
"loss": 1.0672, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.6431273644388399, |
|
"grad_norm": 0.7858836054801941, |
|
"learning_rate": 0.00019213611482931953, |
|
"loss": 1.0736, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.6485317960727797, |
|
"grad_norm": 0.6796938180923462, |
|
"learning_rate": 0.00019188986045866997, |
|
"loss": 1.0759, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.6539362277067196, |
|
"grad_norm": 0.6615278124809265, |
|
"learning_rate": 0.0001916399724285227, |
|
"loss": 1.0713, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 0.6353105306625366, |
|
"learning_rate": 0.00019138646062035982, |
|
"loss": 1.0769, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.6647450909745992, |
|
"grad_norm": 0.6170017123222351, |
|
"learning_rate": 0.0001911293350589609, |
|
"loss": 1.07, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.670149522608539, |
|
"grad_norm": 0.6368488073348999, |
|
"learning_rate": 0.00019086860591200632, |
|
"loss": 1.0774, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.6755539542424789, |
|
"grad_norm": 0.5853469371795654, |
|
"learning_rate": 0.00019060428348967548, |
|
"loss": 1.0732, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.6809583858764187, |
|
"grad_norm": 0.7817432880401611, |
|
"learning_rate": 0.00019033637824423884, |
|
"loss": 1.0732, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.6863628175103585, |
|
"grad_norm": 0.6566998362541199, |
|
"learning_rate": 0.00019006490076964487, |
|
"loss": 1.0671, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.6917672491442983, |
|
"grad_norm": 0.5824844837188721, |
|
"learning_rate": 0.00018978986180110088, |
|
"loss": 1.0656, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.6971716807782381, |
|
"grad_norm": 0.5842050909996033, |
|
"learning_rate": 0.0001895112722146486, |
|
"loss": 1.0646, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.702576112412178, |
|
"grad_norm": 0.6520604491233826, |
|
"learning_rate": 0.00018922914302673421, |
|
"loss": 1.0745, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7079805440461178, |
|
"grad_norm": 0.648113489151001, |
|
"learning_rate": 0.0001889434853937725, |
|
"loss": 1.0711, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.7133849756800577, |
|
"grad_norm": 1.0153329372406006, |
|
"learning_rate": 0.00018865431061170588, |
|
"loss": 1.0643, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7187894073139974, |
|
"grad_norm": 0.6522130370140076, |
|
"learning_rate": 0.00018836163011555764, |
|
"loss": 1.0629, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.7241938389479373, |
|
"grad_norm": 0.6235710978507996, |
|
"learning_rate": 0.0001880654554789798, |
|
"loss": 1.0637, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.7295982705818771, |
|
"grad_norm": 0.6486189365386963, |
|
"learning_rate": 0.00018776579841379528, |
|
"loss": 1.0679, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.735002702215817, |
|
"grad_norm": 0.7326012849807739, |
|
"learning_rate": 0.00018746267076953505, |
|
"loss": 1.0624, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.7404071338497568, |
|
"grad_norm": 0.7451658248901367, |
|
"learning_rate": 0.00018715608453296926, |
|
"loss": 1.0799, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.7458115654836966, |
|
"grad_norm": 0.5677480101585388, |
|
"learning_rate": 0.00018684605182763355, |
|
"loss": 1.0665, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.7512159971176364, |
|
"grad_norm": 0.6265568137168884, |
|
"learning_rate": 0.00018653258491334933, |
|
"loss": 1.0562, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.7566204287515763, |
|
"grad_norm": 0.5560349225997925, |
|
"learning_rate": 0.0001862156961857392, |
|
"loss": 1.0696, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.7620248603855161, |
|
"grad_norm": 0.7811048626899719, |
|
"learning_rate": 0.0001858953981757367, |
|
"loss": 1.0713, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.767429292019456, |
|
"grad_norm": 0.8111995458602905, |
|
"learning_rate": 0.00018557170354909088, |
|
"loss": 1.0641, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.7728337236533958, |
|
"grad_norm": 0.6084979176521301, |
|
"learning_rate": 0.0001852446251058652, |
|
"loss": 1.0609, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.7782381552873356, |
|
"grad_norm": 0.6472198963165283, |
|
"learning_rate": 0.0001849141757799317, |
|
"loss": 1.0659, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.7836425869212754, |
|
"grad_norm": 0.6767707467079163, |
|
"learning_rate": 0.00018458036863845933, |
|
"loss": 1.0687, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.7890470185552153, |
|
"grad_norm": 0.6994395852088928, |
|
"learning_rate": 0.00018424321688139729, |
|
"loss": 1.0634, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.7944514501891551, |
|
"grad_norm": 0.6968779563903809, |
|
"learning_rate": 0.000183902733840953, |
|
"loss": 1.0552, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.799855881823095, |
|
"grad_norm": 0.6974983215332031, |
|
"learning_rate": 0.0001835589329810651, |
|
"loss": 1.0722, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.8052603134570347, |
|
"grad_norm": 0.6921077966690063, |
|
"learning_rate": 0.00018321182789687068, |
|
"loss": 1.0557, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.8106647450909746, |
|
"grad_norm": 0.6887233257293701, |
|
"learning_rate": 0.00018286143231416806, |
|
"loss": 1.0633, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.8160691767249144, |
|
"grad_norm": 0.6151506900787354, |
|
"learning_rate": 0.00018250776008887375, |
|
"loss": 1.0694, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.8214736083588543, |
|
"grad_norm": 0.682551383972168, |
|
"learning_rate": 0.00018215082520647467, |
|
"loss": 1.0677, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.8268780399927941, |
|
"grad_norm": 0.6813539862632751, |
|
"learning_rate": 0.00018179064178147506, |
|
"loss": 1.0628, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.832282471626734, |
|
"grad_norm": 0.583910346031189, |
|
"learning_rate": 0.00018142722405683839, |
|
"loss": 1.0605, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.8376869032606737, |
|
"grad_norm": 0.6265426278114319, |
|
"learning_rate": 0.000181060586403424, |
|
"loss": 1.0709, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.8430913348946136, |
|
"grad_norm": 0.5985749959945679, |
|
"learning_rate": 0.0001806907433194191, |
|
"loss": 1.0521, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.8484957665285534, |
|
"grad_norm": 0.6286662220954895, |
|
"learning_rate": 0.00018031770942976514, |
|
"loss": 1.0648, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.8539001981624933, |
|
"grad_norm": 0.6208794713020325, |
|
"learning_rate": 0.00017994149948557975, |
|
"loss": 1.0565, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.8593046297964331, |
|
"grad_norm": 0.7522740960121155, |
|
"learning_rate": 0.00017956212836357324, |
|
"loss": 1.0583, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.864709061430373, |
|
"grad_norm": 0.791959285736084, |
|
"learning_rate": 0.0001791796110654604, |
|
"loss": 1.0663, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.8701134930643127, |
|
"grad_norm": 0.5950735211372375, |
|
"learning_rate": 0.0001787939627173673, |
|
"loss": 1.0652, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.8755179246982525, |
|
"grad_norm": 0.6595513820648193, |
|
"learning_rate": 0.0001784051985692332, |
|
"loss": 1.051, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.8809223563321924, |
|
"grad_norm": 0.6468363404273987, |
|
"learning_rate": 0.00017801333399420724, |
|
"loss": 1.0465, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.8863267879661322, |
|
"grad_norm": 3.451094150543213, |
|
"learning_rate": 0.0001776183844880409, |
|
"loss": 1.0534, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.8917312196000721, |
|
"grad_norm": 0.6846780180931091, |
|
"learning_rate": 0.00017722036566847495, |
|
"loss": 1.0554, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.8971356512340118, |
|
"grad_norm": 0.7100343704223633, |
|
"learning_rate": 0.00017681929327462205, |
|
"loss": 1.0524, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.9025400828679517, |
|
"grad_norm": 0.5465316772460938, |
|
"learning_rate": 0.00017641518316634426, |
|
"loss": 1.046, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.9079445145018915, |
|
"grad_norm": 0.7278814911842346, |
|
"learning_rate": 0.000176008051323626, |
|
"loss": 1.0543, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.9133489461358314, |
|
"grad_norm": 0.6412672996520996, |
|
"learning_rate": 0.00017559791384594192, |
|
"loss": 1.0477, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.9187533777697712, |
|
"grad_norm": 0.6557443141937256, |
|
"learning_rate": 0.00017518478695162056, |
|
"loss": 1.0638, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.9241578094037111, |
|
"grad_norm": 0.7106101512908936, |
|
"learning_rate": 0.00017476868697720278, |
|
"loss": 1.0588, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.9295622410376508, |
|
"grad_norm": 0.6246557235717773, |
|
"learning_rate": 0.00017434963037679592, |
|
"loss": 1.054, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.9349666726715907, |
|
"grad_norm": 0.6114718914031982, |
|
"learning_rate": 0.000173927633721423, |
|
"loss": 1.0504, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.9403711043055305, |
|
"grad_norm": 0.7704567909240723, |
|
"learning_rate": 0.0001735027136983676, |
|
"loss": 1.0537, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.9457755359394704, |
|
"grad_norm": 0.6341020464897156, |
|
"learning_rate": 0.0001730748871105138, |
|
"loss": 1.0493, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.9511799675734102, |
|
"grad_norm": 0.5861644148826599, |
|
"learning_rate": 0.00017264417087568189, |
|
"loss": 1.052, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.9565843992073501, |
|
"grad_norm": 0.5983610153198242, |
|
"learning_rate": 0.00017221058202595928, |
|
"loss": 1.052, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.9619888308412898, |
|
"grad_norm": 0.6839273571968079, |
|
"learning_rate": 0.0001717741377070271, |
|
"loss": 1.0632, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.9673932624752297, |
|
"grad_norm": 0.7345322966575623, |
|
"learning_rate": 0.000171334855177482, |
|
"loss": 1.0416, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.9727976941091695, |
|
"grad_norm": 0.6669878363609314, |
|
"learning_rate": 0.00017089275180815394, |
|
"loss": 1.0499, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.9782021257431094, |
|
"grad_norm": 0.5807615518569946, |
|
"learning_rate": 0.0001704478450814191, |
|
"loss": 1.0469, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 0.6089076399803162, |
|
"learning_rate": 0.00017000015259050855, |
|
"loss": 1.0403, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 0.6615424156188965, |
|
"learning_rate": 0.00016954969203881272, |
|
"loss": 1.0492, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.9944154206449288, |
|
"grad_norm": 0.660163164138794, |
|
"learning_rate": 0.00016909648123918116, |
|
"loss": 1.0543, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.9998198522788687, |
|
"grad_norm": 0.631686806678772, |
|
"learning_rate": 0.0001686405381132183, |
|
"loss": 1.0474, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 1.0052242839128085, |
|
"grad_norm": 0.7013711333274841, |
|
"learning_rate": 0.00016818188069057458, |
|
"loss": 0.9965, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.0106287155467484, |
|
"grad_norm": 0.76506507396698, |
|
"learning_rate": 0.00016772052710823374, |
|
"loss": 0.9981, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 1.0160331471806883, |
|
"grad_norm": 0.8097601532936096, |
|
"learning_rate": 0.00016725649560979546, |
|
"loss": 0.9995, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.021437578814628, |
|
"grad_norm": 0.795626163482666, |
|
"learning_rate": 0.00016678980454475385, |
|
"loss": 0.9983, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 1.0268420104485678, |
|
"grad_norm": 0.6494497060775757, |
|
"learning_rate": 0.00016632047236777214, |
|
"loss": 1.0075, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.0322464420825077, |
|
"grad_norm": 0.7171606421470642, |
|
"learning_rate": 0.00016584851763795262, |
|
"loss": 0.9972, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 1.0376508737164474, |
|
"grad_norm": 0.604192316532135, |
|
"learning_rate": 0.00016537395901810288, |
|
"loss": 0.9943, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.0430553053503873, |
|
"grad_norm": 0.6858931183815002, |
|
"learning_rate": 0.0001648968152739978, |
|
"loss": 1.0092, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 1.0484597369843272, |
|
"grad_norm": 0.685265839099884, |
|
"learning_rate": 0.00016441710527363753, |
|
"loss": 0.9936, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.053864168618267, |
|
"grad_norm": 0.6720730066299438, |
|
"learning_rate": 0.00016393484798650132, |
|
"loss": 0.993, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 1.0592686002522067, |
|
"grad_norm": 0.7085748314857483, |
|
"learning_rate": 0.0001634500624827973, |
|
"loss": 1.0083, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.0646730318861466, |
|
"grad_norm": 0.6460698843002319, |
|
"learning_rate": 0.00016296276793270864, |
|
"loss": 0.9952, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 1.0700774635200865, |
|
"grad_norm": 0.6689881086349487, |
|
"learning_rate": 0.0001624729836056352, |
|
"loss": 0.9958, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.0754818951540264, |
|
"grad_norm": 0.7271780967712402, |
|
"learning_rate": 0.00016198072886943181, |
|
"loss": 0.9954, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 1.080886326787966, |
|
"grad_norm": 0.5559628009796143, |
|
"learning_rate": 0.0001614860231896422, |
|
"loss": 0.9984, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.086290758421906, |
|
"grad_norm": 0.6752548813819885, |
|
"learning_rate": 0.0001609888861287293, |
|
"loss": 1.0019, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 1.0916951900558458, |
|
"grad_norm": 0.7046670913696289, |
|
"learning_rate": 0.0001604893373453017, |
|
"loss": 0.9936, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.0970996216897857, |
|
"grad_norm": 0.6102576851844788, |
|
"learning_rate": 0.00015998739659333638, |
|
"loss": 1.0061, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 1.1025040533237254, |
|
"grad_norm": 0.7669439911842346, |
|
"learning_rate": 0.00015948308372139739, |
|
"loss": 1.0017, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.1079084849576653, |
|
"grad_norm": 0.7437514662742615, |
|
"learning_rate": 0.00015897641867185092, |
|
"loss": 0.9947, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 1.1133129165916051, |
|
"grad_norm": 0.7851073741912842, |
|
"learning_rate": 0.0001584674214800771, |
|
"loss": 1.0026, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.118717348225545, |
|
"grad_norm": 0.7046276926994324, |
|
"learning_rate": 0.0001579561122736772, |
|
"loss": 0.9893, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 1.1241217798594847, |
|
"grad_norm": 0.8143602013587952, |
|
"learning_rate": 0.000157442511271678, |
|
"loss": 1.0013, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.1295262114934246, |
|
"grad_norm": 1.2338451147079468, |
|
"learning_rate": 0.0001569266387837324, |
|
"loss": 1.002, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 1.1349306431273645, |
|
"grad_norm": 0.7588093876838684, |
|
"learning_rate": 0.00015640851520931588, |
|
"loss": 1.0064, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.1403350747613044, |
|
"grad_norm": 0.7656028270721436, |
|
"learning_rate": 0.00015588816103692023, |
|
"loss": 0.9963, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 1.145739506395244, |
|
"grad_norm": 0.82599937915802, |
|
"learning_rate": 0.00015536559684324315, |
|
"loss": 0.9961, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.151143938029184, |
|
"grad_norm": 0.6491279006004333, |
|
"learning_rate": 0.0001548408432923746, |
|
"loss": 0.9946, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 1.1565483696631238, |
|
"grad_norm": 0.49154847860336304, |
|
"learning_rate": 0.00015431392113497979, |
|
"loss": 1.0035, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.1619528012970637, |
|
"grad_norm": 0.5830157399177551, |
|
"learning_rate": 0.00015378485120747835, |
|
"loss": 0.9978, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 1.1673572329310034, |
|
"grad_norm": 0.6672685146331787, |
|
"learning_rate": 0.00015325365443122078, |
|
"loss": 1.0079, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.1727616645649432, |
|
"grad_norm": 0.7243463397026062, |
|
"learning_rate": 0.00015272035181166066, |
|
"loss": 1.0023, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 1.1781660961988831, |
|
"grad_norm": 0.6492652893066406, |
|
"learning_rate": 0.00015218496443752456, |
|
"loss": 0.9972, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.1835705278328228, |
|
"grad_norm": 0.6047407388687134, |
|
"learning_rate": 0.00015164751347997762, |
|
"loss": 0.9864, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 1.1889749594667627, |
|
"grad_norm": 0.6448661088943481, |
|
"learning_rate": 0.00015110802019178661, |
|
"loss": 1.0046, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.1943793911007026, |
|
"grad_norm": 0.7006458044052124, |
|
"learning_rate": 0.0001505665059064796, |
|
"loss": 1.0018, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 1.1997838227346425, |
|
"grad_norm": 0.6918825507164001, |
|
"learning_rate": 0.00015002299203750212, |
|
"loss": 0.991, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.2051882543685823, |
|
"grad_norm": 0.6090679168701172, |
|
"learning_rate": 0.00014947750007737062, |
|
"loss": 0.9939, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 1.210592686002522, |
|
"grad_norm": 0.718387246131897, |
|
"learning_rate": 0.00014893005159682233, |
|
"loss": 0.9873, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.215997117636462, |
|
"grad_norm": 0.6664546132087708, |
|
"learning_rate": 0.00014838066824396256, |
|
"loss": 0.9926, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 1.2214015492704018, |
|
"grad_norm": 0.6758761405944824, |
|
"learning_rate": 0.00014782937174340845, |
|
"loss": 0.9924, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.2268059809043415, |
|
"grad_norm": 0.5241803526878357, |
|
"learning_rate": 0.00014727618389542995, |
|
"loss": 0.9935, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 1.2322104125382813, |
|
"grad_norm": 0.6897122859954834, |
|
"learning_rate": 0.00014672112657508778, |
|
"loss": 0.9859, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.2376148441722212, |
|
"grad_norm": 0.6511486172676086, |
|
"learning_rate": 0.00014616422173136846, |
|
"loss": 0.9905, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 1.2430192758061611, |
|
"grad_norm": 0.8631020784378052, |
|
"learning_rate": 0.00014560549138631617, |
|
"loss": 0.9996, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.248423707440101, |
|
"grad_norm": 0.5925600528717041, |
|
"learning_rate": 0.00014504495763416225, |
|
"loss": 0.9961, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 1.2538281390740407, |
|
"grad_norm": 0.6121050715446472, |
|
"learning_rate": 0.00014448264264045114, |
|
"loss": 1.0039, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.2592325707079806, |
|
"grad_norm": 0.628056526184082, |
|
"learning_rate": 0.00014391856864116414, |
|
"loss": 1.0004, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 1.2646370023419204, |
|
"grad_norm": 0.6576303243637085, |
|
"learning_rate": 0.00014335275794184003, |
|
"loss": 0.9978, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.2700414339758601, |
|
"grad_norm": 0.5684065222740173, |
|
"learning_rate": 0.00014278523291669302, |
|
"loss": 0.9874, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 1.2754458656098, |
|
"grad_norm": 0.8131369352340698, |
|
"learning_rate": 0.000142216016007728, |
|
"loss": 1.0006, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.2808502972437399, |
|
"grad_norm": 0.6513379216194153, |
|
"learning_rate": 0.00014164512972385306, |
|
"loss": 0.9817, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 1.2862547288776798, |
|
"grad_norm": 0.6244243383407593, |
|
"learning_rate": 0.0001410725966399896, |
|
"loss": 0.9805, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.2916591605116197, |
|
"grad_norm": 0.760666012763977, |
|
"learning_rate": 0.00014049843939617924, |
|
"loss": 0.9889, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 1.2970635921455593, |
|
"grad_norm": 0.7188459634780884, |
|
"learning_rate": 0.00013992268069668904, |
|
"loss": 0.9895, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3024680237794992, |
|
"grad_norm": 0.6034685969352722, |
|
"learning_rate": 0.0001393453433091133, |
|
"loss": 0.9882, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 1.307872455413439, |
|
"grad_norm": 0.6076464653015137, |
|
"learning_rate": 0.0001387664500634734, |
|
"loss": 0.9823, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.3132768870473788, |
|
"grad_norm": 0.6652275323867798, |
|
"learning_rate": 0.00013818602385131512, |
|
"loss": 0.9784, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 1.3186813186813187, |
|
"grad_norm": 0.6014280319213867, |
|
"learning_rate": 0.00013760408762480316, |
|
"loss": 0.9812, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.3240857503152585, |
|
"grad_norm": 0.6998510360717773, |
|
"learning_rate": 0.00013702066439581382, |
|
"loss": 0.9886, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 1.3294901819491982, |
|
"grad_norm": 0.5891895294189453, |
|
"learning_rate": 0.00013643577723502476, |
|
"loss": 0.9873, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.334894613583138, |
|
"grad_norm": 0.7246126532554626, |
|
"learning_rate": 0.00013584944927100298, |
|
"loss": 0.9859, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 1.340299045217078, |
|
"grad_norm": 0.664380669593811, |
|
"learning_rate": 0.00013526170368928993, |
|
"loss": 0.9793, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.3457034768510179, |
|
"grad_norm": 0.6437602639198303, |
|
"learning_rate": 0.00013467256373148496, |
|
"loss": 0.9853, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 1.3511079084849578, |
|
"grad_norm": 0.6728150844573975, |
|
"learning_rate": 0.000134082052694326, |
|
"loss": 0.9792, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.3565123401188974, |
|
"grad_norm": 0.8101018071174622, |
|
"learning_rate": 0.00013349019392876858, |
|
"loss": 0.9791, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 1.3619167717528373, |
|
"grad_norm": 0.6081525683403015, |
|
"learning_rate": 0.00013289701083906214, |
|
"loss": 0.9825, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.3673212033867772, |
|
"grad_norm": 0.6776862740516663, |
|
"learning_rate": 0.00013230252688182497, |
|
"loss": 0.9693, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 1.3727256350207169, |
|
"grad_norm": 0.6200093030929565, |
|
"learning_rate": 0.0001317067655651161, |
|
"loss": 0.9677, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.3781300666546568, |
|
"grad_norm": 0.7349710464477539, |
|
"learning_rate": 0.00013110975044750621, |
|
"loss": 0.9714, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 1.3835344982885966, |
|
"grad_norm": 0.5907526612281799, |
|
"learning_rate": 0.0001305115051371458, |
|
"loss": 0.9779, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.3889389299225365, |
|
"grad_norm": 0.6219062805175781, |
|
"learning_rate": 0.0001299120532908316, |
|
"loss": 0.9647, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 1.3943433615564764, |
|
"grad_norm": 0.777947723865509, |
|
"learning_rate": 0.0001293114186130712, |
|
"loss": 0.97, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.399747793190416, |
|
"grad_norm": 0.686892569065094, |
|
"learning_rate": 0.00012870962485514567, |
|
"loss": 0.9683, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 1.405152224824356, |
|
"grad_norm": 0.6655575633049011, |
|
"learning_rate": 0.00012810669581417032, |
|
"loss": 0.9674, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.4105566564582959, |
|
"grad_norm": 0.679595947265625, |
|
"learning_rate": 0.0001275026553321536, |
|
"loss": 0.9725, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 1.4159610880922355, |
|
"grad_norm": 0.6671122312545776, |
|
"learning_rate": 0.00012689752729505457, |
|
"loss": 0.9677, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.4213655197261754, |
|
"grad_norm": 0.6357312202453613, |
|
"learning_rate": 0.00012629133563183797, |
|
"loss": 0.9651, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 1.4267699513601153, |
|
"grad_norm": 0.7441504001617432, |
|
"learning_rate": 0.0001256841043135283, |
|
"loss": 0.9704, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.4321743829940552, |
|
"grad_norm": 0.5487176179885864, |
|
"learning_rate": 0.00012507585735226185, |
|
"loss": 0.9714, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 1.437578814627995, |
|
"grad_norm": 0.6709308624267578, |
|
"learning_rate": 0.00012446661880033698, |
|
"loss": 0.9587, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.4429832462619347, |
|
"grad_norm": 0.638081431388855, |
|
"learning_rate": 0.00012385641274926328, |
|
"loss": 0.9631, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 1.4483876778958746, |
|
"grad_norm": 0.6448566913604736, |
|
"learning_rate": 0.00012324526332880867, |
|
"loss": 0.9634, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.4537921095298145, |
|
"grad_norm": 0.7188845872879028, |
|
"learning_rate": 0.0001226331947060455, |
|
"loss": 0.9669, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 1.4591965411637542, |
|
"grad_norm": 0.5700541138648987, |
|
"learning_rate": 0.00012202023108439455, |
|
"loss": 0.9598, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.464600972797694, |
|
"grad_norm": 0.6200810670852661, |
|
"learning_rate": 0.0001214063967026682, |
|
"loss": 0.9651, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 1.470005404431634, |
|
"grad_norm": 0.6882332563400269, |
|
"learning_rate": 0.00012079171583411184, |
|
"loss": 0.9649, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.4754098360655736, |
|
"grad_norm": 0.6133975982666016, |
|
"learning_rate": 0.00012017621278544402, |
|
"loss": 0.9495, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 1.4808142676995135, |
|
"grad_norm": 0.8365902304649353, |
|
"learning_rate": 0.00011955991189589526, |
|
"loss": 0.95, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.4862186993334534, |
|
"grad_norm": 0.5351865887641907, |
|
"learning_rate": 0.0001189428375362457, |
|
"loss": 0.9579, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 1.4916231309673933, |
|
"grad_norm": 0.6488143801689148, |
|
"learning_rate": 0.00011832501410786116, |
|
"loss": 0.9513, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.4970275626013332, |
|
"grad_norm": 0.6101202964782715, |
|
"learning_rate": 0.0001177064660417285, |
|
"loss": 0.9573, |
|
"step": 41550 |
|
}, |
|
{ |
|
"epoch": 1.5024319942352728, |
|
"grad_norm": 0.7013749480247498, |
|
"learning_rate": 0.00011708721779748933, |
|
"loss": 0.9508, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.5078364258692127, |
|
"grad_norm": 0.5707131028175354, |
|
"learning_rate": 0.00011646729386247286, |
|
"loss": 0.9486, |
|
"step": 41850 |
|
}, |
|
{ |
|
"epoch": 1.5132408575031526, |
|
"grad_norm": 0.6973045468330383, |
|
"learning_rate": 0.00011584671875072757, |
|
"loss": 0.962, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.5186452891370923, |
|
"grad_norm": 0.6686086654663086, |
|
"learning_rate": 0.00011522551700205184, |
|
"loss": 0.9606, |
|
"step": 42150 |
|
}, |
|
{ |
|
"epoch": 1.5240497207710324, |
|
"grad_norm": 0.5340304970741272, |
|
"learning_rate": 0.00011460371318102358, |
|
"loss": 0.9584, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.529454152404972, |
|
"grad_norm": 0.6170547008514404, |
|
"learning_rate": 0.00011398133187602873, |
|
"loss": 0.947, |
|
"step": 42450 |
|
}, |
|
{ |
|
"epoch": 1.534858584038912, |
|
"grad_norm": 0.5485740900039673, |
|
"learning_rate": 0.00011335839769828924, |
|
"loss": 0.961, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.5402630156728518, |
|
"grad_norm": 0.6151200532913208, |
|
"learning_rate": 0.00011273493528088945, |
|
"loss": 0.9531, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 1.5456674473067915, |
|
"grad_norm": 0.6902984976768494, |
|
"learning_rate": 0.00011211096927780236, |
|
"loss": 0.9418, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.5510718789407314, |
|
"grad_norm": 0.7150260806083679, |
|
"learning_rate": 0.00011148652436291451, |
|
"loss": 0.948, |
|
"step": 43050 |
|
}, |
|
{ |
|
"epoch": 1.5564763105746713, |
|
"grad_norm": 0.6931044459342957, |
|
"learning_rate": 0.0001108616252290504, |
|
"loss": 0.9571, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.561880742208611, |
|
"grad_norm": 0.641190230846405, |
|
"learning_rate": 0.00011023629658699596, |
|
"loss": 0.9412, |
|
"step": 43350 |
|
}, |
|
{ |
|
"epoch": 1.5672851738425508, |
|
"grad_norm": 0.6901960968971252, |
|
"learning_rate": 0.00010961056316452145, |
|
"loss": 0.954, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.5726896054764907, |
|
"grad_norm": 0.6115658283233643, |
|
"learning_rate": 0.00010898444970540372, |
|
"loss": 0.952, |
|
"step": 43650 |
|
}, |
|
{ |
|
"epoch": 1.5780940371104304, |
|
"grad_norm": 0.7072962522506714, |
|
"learning_rate": 0.00010835798096844743, |
|
"loss": 0.9484, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.5834984687443705, |
|
"grad_norm": 0.5898342728614807, |
|
"learning_rate": 0.00010773118172650643, |
|
"loss": 0.9421, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 1.5889029003783102, |
|
"grad_norm": 0.503633439540863, |
|
"learning_rate": 0.00010710407676550382, |
|
"loss": 0.935, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.59430733201225, |
|
"grad_norm": 0.5756278038024902, |
|
"learning_rate": 0.00010647669088345204, |
|
"loss": 0.9514, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 1.59971176364619, |
|
"grad_norm": 0.6327024102210999, |
|
"learning_rate": 0.00010584904888947204, |
|
"loss": 0.9398, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.6051161952801296, |
|
"grad_norm": 0.6922555565834045, |
|
"learning_rate": 0.00010522117560281251, |
|
"loss": 0.9411, |
|
"step": 44550 |
|
}, |
|
{ |
|
"epoch": 1.6105206269140695, |
|
"grad_norm": 0.7153000235557556, |
|
"learning_rate": 0.00010459309585186818, |
|
"loss": 0.9437, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.6159250585480094, |
|
"grad_norm": 0.7171802520751953, |
|
"learning_rate": 0.0001039648344731982, |
|
"loss": 0.9305, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 1.621329490181949, |
|
"grad_norm": 0.5943671464920044, |
|
"learning_rate": 0.00010333641631054391, |
|
"loss": 0.938, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.6267339218158892, |
|
"grad_norm": 0.7467085123062134, |
|
"learning_rate": 0.00010270786621384645, |
|
"loss": 0.9416, |
|
"step": 45150 |
|
}, |
|
{ |
|
"epoch": 1.6321383534498288, |
|
"grad_norm": 0.6827779412269592, |
|
"learning_rate": 0.00010207920903826415, |
|
"loss": 0.9381, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.6375427850837687, |
|
"grad_norm": 0.6708967089653015, |
|
"learning_rate": 0.00010145046964318963, |
|
"loss": 0.9495, |
|
"step": 45450 |
|
}, |
|
{ |
|
"epoch": 1.6429472167177086, |
|
"grad_norm": 0.6415010094642639, |
|
"learning_rate": 0.00010082167289126672, |
|
"loss": 0.9312, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.6483516483516483, |
|
"grad_norm": 0.695865273475647, |
|
"learning_rate": 0.00010019284364740731, |
|
"loss": 0.9309, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 1.6537560799855882, |
|
"grad_norm": 0.6317395567893982, |
|
"learning_rate": 9.956400677780833e-05, |
|
"loss": 0.941, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.659160511619528, |
|
"grad_norm": 0.6181449294090271, |
|
"learning_rate": 9.893518714896805e-05, |
|
"loss": 0.9295, |
|
"step": 46050 |
|
}, |
|
{ |
|
"epoch": 1.6645649432534677, |
|
"grad_norm": 0.5777118802070618, |
|
"learning_rate": 9.830640962670306e-05, |
|
"loss": 0.9264, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.6699693748874078, |
|
"grad_norm": 0.6352208852767944, |
|
"learning_rate": 9.767769907516495e-05, |
|
"loss": 0.9311, |
|
"step": 46350 |
|
}, |
|
{ |
|
"epoch": 1.6753738065213475, |
|
"grad_norm": 0.6197606325149536, |
|
"learning_rate": 9.704908035585692e-05, |
|
"loss": 0.9302, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.6807782381552874, |
|
"grad_norm": 0.6172420382499695, |
|
"learning_rate": 9.642057832665095e-05, |
|
"loss": 0.9253, |
|
"step": 46650 |
|
}, |
|
{ |
|
"epoch": 1.6861826697892273, |
|
"grad_norm": 0.6538959741592407, |
|
"learning_rate": 9.579221784080455e-05, |
|
"loss": 0.9376, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.691587101423167, |
|
"grad_norm": 0.6067585945129395, |
|
"learning_rate": 9.516402374597812e-05, |
|
"loss": 0.927, |
|
"step": 46950 |
|
}, |
|
{ |
|
"epoch": 1.6969915330571068, |
|
"grad_norm": 0.5777443647384644, |
|
"learning_rate": 9.453602088325234e-05, |
|
"loss": 0.9289, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.7023959646910467, |
|
"grad_norm": 0.5103596448898315, |
|
"learning_rate": 9.390823408614598e-05, |
|
"loss": 0.9137, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 1.7078003963249864, |
|
"grad_norm": 0.624183714389801, |
|
"learning_rate": 9.328068817963359e-05, |
|
"loss": 0.9236, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.7132048279589265, |
|
"grad_norm": 0.5513512492179871, |
|
"learning_rate": 9.265340797916421e-05, |
|
"loss": 0.918, |
|
"step": 47550 |
|
}, |
|
{ |
|
"epoch": 1.7186092595928661, |
|
"grad_norm": 0.7002034187316895, |
|
"learning_rate": 9.202641828967985e-05, |
|
"loss": 0.9149, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.724013691226806, |
|
"grad_norm": 0.5479480028152466, |
|
"learning_rate": 9.139974390463459e-05, |
|
"loss": 0.9265, |
|
"step": 47850 |
|
}, |
|
{ |
|
"epoch": 1.729418122860746, |
|
"grad_norm": 0.570182204246521, |
|
"learning_rate": 9.077340960501425e-05, |
|
"loss": 0.9079, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.7348225544946856, |
|
"grad_norm": 0.6392347812652588, |
|
"learning_rate": 9.014744015835656e-05, |
|
"loss": 0.911, |
|
"step": 48150 |
|
}, |
|
{ |
|
"epoch": 1.7402269861286255, |
|
"grad_norm": 0.6063001751899719, |
|
"learning_rate": 8.952186031777144e-05, |
|
"loss": 0.9113, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.7456314177625654, |
|
"grad_norm": 0.6585242748260498, |
|
"learning_rate": 8.88966948209625e-05, |
|
"loss": 0.9137, |
|
"step": 48450 |
|
}, |
|
{ |
|
"epoch": 1.751035849396505, |
|
"grad_norm": 0.5171977281570435, |
|
"learning_rate": 8.827196838924867e-05, |
|
"loss": 0.9211, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.756440281030445, |
|
"grad_norm": 0.6493880152702332, |
|
"learning_rate": 8.764770572658655e-05, |
|
"loss": 0.9056, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 1.7618447126643848, |
|
"grad_norm": 0.8104442954063416, |
|
"learning_rate": 8.70239315185938e-05, |
|
"loss": 0.9045, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.7672491442983245, |
|
"grad_norm": 0.5967045426368713, |
|
"learning_rate": 8.64006704315727e-05, |
|
"loss": 0.9164, |
|
"step": 49050 |
|
}, |
|
{ |
|
"epoch": 1.7726535759322646, |
|
"grad_norm": 0.6888705492019653, |
|
"learning_rate": 8.577794711153479e-05, |
|
"loss": 0.9111, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.7780580075662042, |
|
"grad_norm": 0.5948097705841064, |
|
"learning_rate": 8.515578618322648e-05, |
|
"loss": 0.9095, |
|
"step": 49350 |
|
}, |
|
{ |
|
"epoch": 1.7834624392001441, |
|
"grad_norm": 0.6458430886268616, |
|
"learning_rate": 8.453421224915511e-05, |
|
"loss": 0.9029, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.788866870834084, |
|
"grad_norm": 0.8202154040336609, |
|
"learning_rate": 8.391324988861611e-05, |
|
"loss": 0.9168, |
|
"step": 49650 |
|
}, |
|
{ |
|
"epoch": 1.7942713024680237, |
|
"grad_norm": 0.5799959897994995, |
|
"learning_rate": 8.32929236567211e-05, |
|
"loss": 0.9005, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.7996757341019636, |
|
"grad_norm": 0.7229143381118774, |
|
"learning_rate": 8.267325808342685e-05, |
|
"loss": 0.897, |
|
"step": 49950 |
|
}, |
|
{ |
|
"epoch": 1.8050801657359035, |
|
"grad_norm": 0.5912762880325317, |
|
"learning_rate": 8.205427767256524e-05, |
|
"loss": 0.9015, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.8104845973698431, |
|
"grad_norm": 0.6438339352607727, |
|
"learning_rate": 8.143600690087443e-05, |
|
"loss": 0.9137, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 1.8158890290037832, |
|
"grad_norm": 0.5374941229820251, |
|
"learning_rate": 8.08184702170308e-05, |
|
"loss": 0.9008, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.821293460637723, |
|
"grad_norm": 0.5253046751022339, |
|
"learning_rate": 8.020169204068219e-05, |
|
"loss": 0.9015, |
|
"step": 50550 |
|
}, |
|
{ |
|
"epoch": 1.8266978922716628, |
|
"grad_norm": 0.6589975357055664, |
|
"learning_rate": 7.958569676148234e-05, |
|
"loss": 0.9117, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.8321023239056027, |
|
"grad_norm": 0.5939854979515076, |
|
"learning_rate": 7.897050873812647e-05, |
|
"loss": 0.9024, |
|
"step": 50850 |
|
}, |
|
{ |
|
"epoch": 1.8375067555395423, |
|
"grad_norm": 0.6179183721542358, |
|
"learning_rate": 7.835615229738775e-05, |
|
"loss": 0.9111, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.8429111871734822, |
|
"grad_norm": 0.6526548266410828, |
|
"learning_rate": 7.774265173315581e-05, |
|
"loss": 0.9002, |
|
"step": 51150 |
|
}, |
|
{ |
|
"epoch": 1.8483156188074221, |
|
"grad_norm": 0.5846490263938904, |
|
"learning_rate": 7.713003130547556e-05, |
|
"loss": 0.8889, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.8537200504413618, |
|
"grad_norm": 0.5639694333076477, |
|
"learning_rate": 7.651831523958827e-05, |
|
"loss": 0.896, |
|
"step": 51450 |
|
}, |
|
{ |
|
"epoch": 1.859124482075302, |
|
"grad_norm": 0.5969030857086182, |
|
"learning_rate": 7.590752772497345e-05, |
|
"loss": 0.8899, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.8645289137092416, |
|
"grad_norm": 0.57610023021698, |
|
"learning_rate": 7.529769291439216e-05, |
|
"loss": 0.8908, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 1.8699333453431815, |
|
"grad_norm": 0.7263045907020569, |
|
"learning_rate": 7.468883492293228e-05, |
|
"loss": 0.8956, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.8753377769771213, |
|
"grad_norm": 0.5964723825454712, |
|
"learning_rate": 7.40809778270546e-05, |
|
"loss": 0.8944, |
|
"step": 52050 |
|
}, |
|
{ |
|
"epoch": 1.880742208611061, |
|
"grad_norm": 0.6026207804679871, |
|
"learning_rate": 7.347414566364085e-05, |
|
"loss": 0.8892, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.886146640245001, |
|
"grad_norm": 0.6354103684425354, |
|
"learning_rate": 7.28683624290432e-05, |
|
"loss": 0.8972, |
|
"step": 52350 |
|
}, |
|
{ |
|
"epoch": 1.8915510718789408, |
|
"grad_norm": 0.6123978495597839, |
|
"learning_rate": 7.226365207813542e-05, |
|
"loss": 0.8951, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.8969555035128804, |
|
"grad_norm": 0.7344669699668884, |
|
"learning_rate": 7.166003852336548e-05, |
|
"loss": 0.8825, |
|
"step": 52650 |
|
}, |
|
{ |
|
"epoch": 1.9023599351468206, |
|
"grad_norm": 0.5727975368499756, |
|
"learning_rate": 7.105754563381006e-05, |
|
"loss": 0.8815, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.9077643667807602, |
|
"grad_norm": 0.5696874856948853, |
|
"learning_rate": 7.045619723423072e-05, |
|
"loss": 0.8868, |
|
"step": 52950 |
|
}, |
|
{ |
|
"epoch": 1.9131687984147, |
|
"grad_norm": 0.6967275142669678, |
|
"learning_rate": 6.985601710413158e-05, |
|
"loss": 0.8845, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.91857323004864, |
|
"grad_norm": 0.64991295337677, |
|
"learning_rate": 6.92570289768193e-05, |
|
"loss": 0.8824, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 1.9239776616825797, |
|
"grad_norm": 0.6261005997657776, |
|
"learning_rate": 6.865925653846432e-05, |
|
"loss": 0.881, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.9293820933165196, |
|
"grad_norm": 0.6127173900604248, |
|
"learning_rate": 6.806272342716431e-05, |
|
"loss": 0.8878, |
|
"step": 53550 |
|
}, |
|
{ |
|
"epoch": 1.9347865249504594, |
|
"grad_norm": 0.552493691444397, |
|
"learning_rate": 6.746745323200943e-05, |
|
"loss": 0.888, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.940190956584399, |
|
"grad_norm": 0.641351580619812, |
|
"learning_rate": 6.687346949214966e-05, |
|
"loss": 0.8834, |
|
"step": 53850 |
|
}, |
|
{ |
|
"epoch": 1.945595388218339, |
|
"grad_norm": 0.5708601474761963, |
|
"learning_rate": 6.628079569586365e-05, |
|
"loss": 0.8901, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.9509998198522789, |
|
"grad_norm": 0.5919014811515808, |
|
"learning_rate": 6.56894552796303e-05, |
|
"loss": 0.8833, |
|
"step": 54150 |
|
}, |
|
{ |
|
"epoch": 1.9564042514862185, |
|
"grad_norm": 0.5352922677993774, |
|
"learning_rate": 6.509947162720172e-05, |
|
"loss": 0.8762, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.9618086831201587, |
|
"grad_norm": 0.5126431584358215, |
|
"learning_rate": 6.451086806867864e-05, |
|
"loss": 0.8719, |
|
"step": 54450 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 0.6120204329490662, |
|
"learning_rate": 6.392366787958786e-05, |
|
"loss": 0.882, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.9726175463880382, |
|
"grad_norm": 0.641154408454895, |
|
"learning_rate": 6.333789427996191e-05, |
|
"loss": 0.8743, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 1.978021978021978, |
|
"grad_norm": 0.648558497428894, |
|
"learning_rate": 6.275357043342069e-05, |
|
"loss": 0.8645, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.9834264096559178, |
|
"grad_norm": 0.6066434979438782, |
|
"learning_rate": 6.217071944625562e-05, |
|
"loss": 0.8622, |
|
"step": 55050 |
|
}, |
|
{ |
|
"epoch": 1.9888308412898577, |
|
"grad_norm": 0.5739848613739014, |
|
"learning_rate": 6.158936436651593e-05, |
|
"loss": 0.8718, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.9942352729237975, |
|
"grad_norm": 0.5929279923439026, |
|
"learning_rate": 6.100952818309715e-05, |
|
"loss": 0.8686, |
|
"step": 55350 |
|
}, |
|
{ |
|
"epoch": 1.9996397045577372, |
|
"grad_norm": 0.5922086238861084, |
|
"learning_rate": 6.043123382483224e-05, |
|
"loss": 0.8753, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.0050441361916773, |
|
"grad_norm": 0.6458303332328796, |
|
"learning_rate": 5.98545041595847e-05, |
|
"loss": 0.791, |
|
"step": 55650 |
|
}, |
|
{ |
|
"epoch": 2.010448567825617, |
|
"grad_norm": 0.5965596437454224, |
|
"learning_rate": 5.927936199334435e-05, |
|
"loss": 0.7904, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 2.0158529994595566, |
|
"grad_norm": 0.523539125919342, |
|
"learning_rate": 5.8705830069325566e-05, |
|
"loss": 0.7859, |
|
"step": 55950 |
|
}, |
|
{ |
|
"epoch": 2.0212574310934968, |
|
"grad_norm": 0.5941675305366516, |
|
"learning_rate": 5.813393106706795e-05, |
|
"loss": 0.7907, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 2.0266618627274364, |
|
"grad_norm": 0.5710470080375671, |
|
"learning_rate": 5.7563687601539276e-05, |
|
"loss": 0.787, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 2.0320662943613765, |
|
"grad_norm": 0.7543295621871948, |
|
"learning_rate": 5.699512222224148e-05, |
|
"loss": 0.7925, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 2.037470725995316, |
|
"grad_norm": 0.7011525630950928, |
|
"learning_rate": 5.642825741231889e-05, |
|
"loss": 0.7863, |
|
"step": 56550 |
|
}, |
|
{ |
|
"epoch": 2.042875157629256, |
|
"grad_norm": 0.7366952300071716, |
|
"learning_rate": 5.586311558766908e-05, |
|
"loss": 0.7845, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 2.048279589263196, |
|
"grad_norm": 0.5936063528060913, |
|
"learning_rate": 5.5299719096056444e-05, |
|
"loss": 0.7878, |
|
"step": 56850 |
|
}, |
|
{ |
|
"epoch": 2.0536840208971356, |
|
"grad_norm": 0.6049606800079346, |
|
"learning_rate": 5.4738090216228724e-05, |
|
"loss": 0.7856, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.0590884525310753, |
|
"grad_norm": 0.6939170360565186, |
|
"learning_rate": 5.4178251157035675e-05, |
|
"loss": 0.7886, |
|
"step": 57150 |
|
}, |
|
{ |
|
"epoch": 2.0644928841650154, |
|
"grad_norm": 0.5444577932357788, |
|
"learning_rate": 5.3620224056551224e-05, |
|
"loss": 0.7806, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 2.069897315798955, |
|
"grad_norm": 0.6011742949485779, |
|
"learning_rate": 5.30640309811977e-05, |
|
"loss": 0.7852, |
|
"step": 57450 |
|
}, |
|
{ |
|
"epoch": 2.0753017474328948, |
|
"grad_norm": 0.6152522563934326, |
|
"learning_rate": 5.250969392487343e-05, |
|
"loss": 0.7777, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 2.080706179066835, |
|
"grad_norm": 0.4750346839427948, |
|
"learning_rate": 5.195723480808309e-05, |
|
"loss": 0.7735, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 2.0861106107007745, |
|
"grad_norm": 0.5713702440261841, |
|
"learning_rate": 5.140667547707064e-05, |
|
"loss": 0.7874, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 2.0915150423347146, |
|
"grad_norm": 0.5541932582855225, |
|
"learning_rate": 5.085803770295579e-05, |
|
"loss": 0.789, |
|
"step": 58050 |
|
}, |
|
{ |
|
"epoch": 2.0969194739686543, |
|
"grad_norm": 0.571283221244812, |
|
"learning_rate": 5.03113431808727e-05, |
|
"loss": 0.789, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 2.102323905602594, |
|
"grad_norm": 0.6038793325424194, |
|
"learning_rate": 4.976661352911237e-05, |
|
"loss": 0.7887, |
|
"step": 58350 |
|
}, |
|
{ |
|
"epoch": 2.107728337236534, |
|
"grad_norm": 0.6276759505271912, |
|
"learning_rate": 4.922387028826768e-05, |
|
"loss": 0.7858, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.1131327688704737, |
|
"grad_norm": 0.6171843409538269, |
|
"learning_rate": 4.8683134920381665e-05, |
|
"loss": 0.7813, |
|
"step": 58650 |
|
}, |
|
{ |
|
"epoch": 2.1185372005044134, |
|
"grad_norm": 0.6076928973197937, |
|
"learning_rate": 4.814442880809853e-05, |
|
"loss": 0.7871, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 2.1239416321383535, |
|
"grad_norm": 0.6066181063652039, |
|
"learning_rate": 4.760777325381852e-05, |
|
"loss": 0.7793, |
|
"step": 58950 |
|
}, |
|
{ |
|
"epoch": 2.129346063772293, |
|
"grad_norm": 0.6619130373001099, |
|
"learning_rate": 4.707318947885537e-05, |
|
"loss": 0.7842, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 2.1347504954062333, |
|
"grad_norm": 0.6103502511978149, |
|
"learning_rate": 4.6540698622597e-05, |
|
"loss": 0.7858, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 2.140154927040173, |
|
"grad_norm": 0.6459470391273499, |
|
"learning_rate": 4.6010321741669726e-05, |
|
"loss": 0.7817, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 2.1455593586741126, |
|
"grad_norm": 0.643363356590271, |
|
"learning_rate": 4.5482079809105704e-05, |
|
"loss": 0.7743, |
|
"step": 59550 |
|
}, |
|
{ |
|
"epoch": 2.1509637903080527, |
|
"grad_norm": 0.518678605556488, |
|
"learning_rate": 4.495599371351331e-05, |
|
"loss": 0.7826, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 2.1563682219419924, |
|
"grad_norm": 0.5462015867233276, |
|
"learning_rate": 4.4432084258251415e-05, |
|
"loss": 0.7729, |
|
"step": 59850 |
|
}, |
|
{ |
|
"epoch": 2.161772653575932, |
|
"grad_norm": 0.5519649982452393, |
|
"learning_rate": 4.39103721606065e-05, |
|
"loss": 0.7765, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.167177085209872, |
|
"grad_norm": 0.672087550163269, |
|
"learning_rate": 4.3390878050973573e-05, |
|
"loss": 0.7808, |
|
"step": 60150 |
|
}, |
|
{ |
|
"epoch": 2.172581516843812, |
|
"grad_norm": 0.5825379490852356, |
|
"learning_rate": 4.287362247204033e-05, |
|
"loss": 0.7711, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 2.177985948477752, |
|
"grad_norm": 0.6448932886123657, |
|
"learning_rate": 4.2358625877974864e-05, |
|
"loss": 0.7767, |
|
"step": 60450 |
|
}, |
|
{ |
|
"epoch": 2.1833903801116916, |
|
"grad_norm": 0.60658860206604, |
|
"learning_rate": 4.1845908633616695e-05, |
|
"loss": 0.772, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 2.1887948117456313, |
|
"grad_norm": 0.6476044058799744, |
|
"learning_rate": 4.1335491013671565e-05, |
|
"loss": 0.7784, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 2.1941992433795714, |
|
"grad_norm": 0.7101139426231384, |
|
"learning_rate": 4.0827393201909794e-05, |
|
"loss": 0.7727, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 2.199603675013511, |
|
"grad_norm": 0.7003293633460999, |
|
"learning_rate": 4.032163529036792e-05, |
|
"loss": 0.7806, |
|
"step": 61050 |
|
}, |
|
{ |
|
"epoch": 2.2050081066474507, |
|
"grad_norm": 0.5855246782302856, |
|
"learning_rate": 3.981823727855444e-05, |
|
"loss": 0.7814, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 2.210412538281391, |
|
"grad_norm": 0.5075130462646484, |
|
"learning_rate": 3.9317219072658726e-05, |
|
"loss": 0.7689, |
|
"step": 61350 |
|
}, |
|
{ |
|
"epoch": 2.2158169699153305, |
|
"grad_norm": 0.5855611562728882, |
|
"learning_rate": 3.881860048476396e-05, |
|
"loss": 0.7777, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.22122140154927, |
|
"grad_norm": 0.5581937432289124, |
|
"learning_rate": 3.8322401232063765e-05, |
|
"loss": 0.7845, |
|
"step": 61650 |
|
}, |
|
{ |
|
"epoch": 2.2266258331832103, |
|
"grad_norm": 0.5910426378250122, |
|
"learning_rate": 3.782864093608245e-05, |
|
"loss": 0.7792, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 2.23203026481715, |
|
"grad_norm": 0.5566779971122742, |
|
"learning_rate": 3.733733912189903e-05, |
|
"loss": 0.7711, |
|
"step": 61950 |
|
}, |
|
{ |
|
"epoch": 2.23743469645109, |
|
"grad_norm": 0.5984916090965271, |
|
"learning_rate": 3.68485152173752e-05, |
|
"loss": 0.7675, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 2.2428391280850297, |
|
"grad_norm": 0.5687974095344543, |
|
"learning_rate": 3.6362188552387186e-05, |
|
"loss": 0.7752, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 2.2482435597189694, |
|
"grad_norm": 0.5997481942176819, |
|
"learning_rate": 3.587837835806116e-05, |
|
"loss": 0.7762, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 2.2536479913529095, |
|
"grad_norm": 0.6333452463150024, |
|
"learning_rate": 3.539710376601299e-05, |
|
"loss": 0.776, |
|
"step": 62550 |
|
}, |
|
{ |
|
"epoch": 2.259052422986849, |
|
"grad_norm": 0.49814724922180176, |
|
"learning_rate": 3.4918383807591516e-05, |
|
"loss": 0.7704, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 2.2644568546207893, |
|
"grad_norm": 0.6359221935272217, |
|
"learning_rate": 3.444223741312608e-05, |
|
"loss": 0.7749, |
|
"step": 62850 |
|
}, |
|
{ |
|
"epoch": 2.269861286254729, |
|
"grad_norm": 0.5802394151687622, |
|
"learning_rate": 3.396868341117798e-05, |
|
"loss": 0.7755, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.2752657178886686, |
|
"grad_norm": 0.6383761763572693, |
|
"learning_rate": 3.3497740527795905e-05, |
|
"loss": 0.775, |
|
"step": 63150 |
|
}, |
|
{ |
|
"epoch": 2.2806701495226087, |
|
"grad_norm": 0.5394207835197449, |
|
"learning_rate": 3.3029427385775335e-05, |
|
"loss": 0.7755, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 2.2860745811565484, |
|
"grad_norm": 0.5275822877883911, |
|
"learning_rate": 3.25637625039222e-05, |
|
"loss": 0.7728, |
|
"step": 63450 |
|
}, |
|
{ |
|
"epoch": 2.291479012790488, |
|
"grad_norm": 0.5123447775840759, |
|
"learning_rate": 3.21007642963207e-05, |
|
"loss": 0.7721, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 2.296883444424428, |
|
"grad_norm": 0.586459755897522, |
|
"learning_rate": 3.164045107160487e-05, |
|
"loss": 0.7708, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 2.302287876058368, |
|
"grad_norm": 0.6412725448608398, |
|
"learning_rate": 3.1182841032234924e-05, |
|
"loss": 0.7695, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.5762320160865784, |
|
"learning_rate": 3.072795227377716e-05, |
|
"loss": 0.7602, |
|
"step": 64050 |
|
}, |
|
{ |
|
"epoch": 2.3130967393262476, |
|
"grad_norm": 0.5541566014289856, |
|
"learning_rate": 3.027580278418852e-05, |
|
"loss": 0.7649, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 2.3185011709601873, |
|
"grad_norm": 0.5710071921348572, |
|
"learning_rate": 2.9826410443105422e-05, |
|
"loss": 0.7643, |
|
"step": 64350 |
|
}, |
|
{ |
|
"epoch": 2.3239056025941274, |
|
"grad_norm": 0.6665874719619751, |
|
"learning_rate": 2.9379793021136427e-05, |
|
"loss": 0.7619, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.329310034228067, |
|
"grad_norm": 0.5459585189819336, |
|
"learning_rate": 2.8935968179159843e-05, |
|
"loss": 0.7503, |
|
"step": 64650 |
|
}, |
|
{ |
|
"epoch": 2.3347144658620067, |
|
"grad_norm": 0.6013796925544739, |
|
"learning_rate": 2.8494953467625107e-05, |
|
"loss": 0.7616, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 2.340118897495947, |
|
"grad_norm": 0.6519309282302856, |
|
"learning_rate": 2.8056766325858863e-05, |
|
"loss": 0.7582, |
|
"step": 64950 |
|
}, |
|
{ |
|
"epoch": 2.3455233291298865, |
|
"grad_norm": 0.6198135614395142, |
|
"learning_rate": 2.7621424081375423e-05, |
|
"loss": 0.7538, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 2.350927760763826, |
|
"grad_norm": 0.580227792263031, |
|
"learning_rate": 2.718894394919155e-05, |
|
"loss": 0.7604, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 2.3563321923977663, |
|
"grad_norm": 0.5496440529823303, |
|
"learning_rate": 2.6759343031145467e-05, |
|
"loss": 0.7629, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 2.361736624031706, |
|
"grad_norm": 0.6118148565292358, |
|
"learning_rate": 2.633263831522098e-05, |
|
"loss": 0.7543, |
|
"step": 65550 |
|
}, |
|
{ |
|
"epoch": 2.3671410556656456, |
|
"grad_norm": 0.5903668403625488, |
|
"learning_rate": 2.5908846674875497e-05, |
|
"loss": 0.7626, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 2.3725454872995857, |
|
"grad_norm": 0.5964175462722778, |
|
"learning_rate": 2.548798486837276e-05, |
|
"loss": 0.7584, |
|
"step": 65850 |
|
}, |
|
{ |
|
"epoch": 2.3779499189335254, |
|
"grad_norm": 0.6447151899337769, |
|
"learning_rate": 2.5070069538120212e-05, |
|
"loss": 0.7659, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.3833543505674655, |
|
"grad_norm": 0.5526403188705444, |
|
"learning_rate": 2.465511721001098e-05, |
|
"loss": 0.7528, |
|
"step": 66150 |
|
}, |
|
{ |
|
"epoch": 2.388758782201405, |
|
"grad_norm": 0.6118183732032776, |
|
"learning_rate": 2.4243144292770215e-05, |
|
"loss": 0.7447, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 2.394163213835345, |
|
"grad_norm": 0.5308869481086731, |
|
"learning_rate": 2.383416707730637e-05, |
|
"loss": 0.7593, |
|
"step": 66450 |
|
}, |
|
{ |
|
"epoch": 2.399567645469285, |
|
"grad_norm": 0.6109766364097595, |
|
"learning_rate": 2.3428201736067003e-05, |
|
"loss": 0.761, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 2.4049720771032246, |
|
"grad_norm": 0.6102012991905212, |
|
"learning_rate": 2.302526432239902e-05, |
|
"loss": 0.7533, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 2.4103765087371647, |
|
"grad_norm": 0.5869913697242737, |
|
"learning_rate": 2.2625370769914233e-05, |
|
"loss": 0.7514, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 2.4157809403711044, |
|
"grad_norm": 0.5591433644294739, |
|
"learning_rate": 2.2228536891859063e-05, |
|
"loss": 0.7608, |
|
"step": 67050 |
|
}, |
|
{ |
|
"epoch": 2.421185372005044, |
|
"grad_norm": 0.48755505681037903, |
|
"learning_rate": 2.183477838048923e-05, |
|
"loss": 0.7581, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 2.426589803638984, |
|
"grad_norm": 0.5120564103126526, |
|
"learning_rate": 2.144411080644925e-05, |
|
"loss": 0.7609, |
|
"step": 67350 |
|
}, |
|
{ |
|
"epoch": 2.431994235272924, |
|
"grad_norm": 0.5482677221298218, |
|
"learning_rate": 2.1056549618156796e-05, |
|
"loss": 0.7618, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.4373986669068635, |
|
"grad_norm": 0.6918262243270874, |
|
"learning_rate": 2.067211014119168e-05, |
|
"loss": 0.757, |
|
"step": 67650 |
|
}, |
|
{ |
|
"epoch": 2.4428030985408036, |
|
"grad_norm": 0.455586701631546, |
|
"learning_rate": 2.029080757768994e-05, |
|
"loss": 0.7446, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 2.4482075301747432, |
|
"grad_norm": 0.5845438838005066, |
|
"learning_rate": 1.9912657005742608e-05, |
|
"loss": 0.7558, |
|
"step": 67950 |
|
}, |
|
{ |
|
"epoch": 2.453611961808683, |
|
"grad_norm": 0.6255479454994202, |
|
"learning_rate": 1.953767337879947e-05, |
|
"loss": 0.7426, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 2.459016393442623, |
|
"grad_norm": 0.5470909476280212, |
|
"learning_rate": 1.9165871525077828e-05, |
|
"loss": 0.7597, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 2.4644208250765627, |
|
"grad_norm": 0.5875541567802429, |
|
"learning_rate": 1.879726614697612e-05, |
|
"loss": 0.7491, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 2.469825256710503, |
|
"grad_norm": 0.6186181306838989, |
|
"learning_rate": 1.843187182049244e-05, |
|
"loss": 0.7556, |
|
"step": 68550 |
|
}, |
|
{ |
|
"epoch": 2.4752296883444425, |
|
"grad_norm": 0.6414260268211365, |
|
"learning_rate": 1.8069702994648208e-05, |
|
"loss": 0.7534, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 2.480634119978382, |
|
"grad_norm": 0.5647196173667908, |
|
"learning_rate": 1.7710773990916885e-05, |
|
"loss": 0.7467, |
|
"step": 68850 |
|
}, |
|
{ |
|
"epoch": 2.4860385516123222, |
|
"grad_norm": 0.5534460544586182, |
|
"learning_rate": 1.7355099002657495e-05, |
|
"loss": 0.7591, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.491442983246262, |
|
"grad_norm": 0.5535364151000977, |
|
"learning_rate": 1.7002692094553506e-05, |
|
"loss": 0.7497, |
|
"step": 69150 |
|
}, |
|
{ |
|
"epoch": 2.496847414880202, |
|
"grad_norm": 0.5928584337234497, |
|
"learning_rate": 1.6653567202056585e-05, |
|
"loss": 0.7496, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 2.5022518465141417, |
|
"grad_norm": 0.5369604825973511, |
|
"learning_rate": 1.6307738130835515e-05, |
|
"loss": 0.761, |
|
"step": 69450 |
|
}, |
|
{ |
|
"epoch": 2.5076562781480813, |
|
"grad_norm": 0.6959002614021301, |
|
"learning_rate": 1.5965218556230375e-05, |
|
"loss": 0.7461, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 2.513060709782021, |
|
"grad_norm": 0.6277987360954285, |
|
"learning_rate": 1.5626022022711694e-05, |
|
"loss": 0.7467, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 2.518465141415961, |
|
"grad_norm": 0.6087015867233276, |
|
"learning_rate": 1.529016194334484e-05, |
|
"loss": 0.7556, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 2.523869573049901, |
|
"grad_norm": 0.5043054819107056, |
|
"learning_rate": 1.4957651599259615e-05, |
|
"loss": 0.7397, |
|
"step": 70050 |
|
}, |
|
{ |
|
"epoch": 2.529274004683841, |
|
"grad_norm": 0.6836428642272949, |
|
"learning_rate": 1.4628504139125177e-05, |
|
"loss": 0.741, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 2.5346784363177806, |
|
"grad_norm": 0.5704199075698853, |
|
"learning_rate": 1.4302732578629918e-05, |
|
"loss": 0.7513, |
|
"step": 70350 |
|
}, |
|
{ |
|
"epoch": 2.5400828679517202, |
|
"grad_norm": 0.5928525328636169, |
|
"learning_rate": 1.3980349799966985e-05, |
|
"loss": 0.7485, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.5454872995856603, |
|
"grad_norm": 0.6592413783073425, |
|
"learning_rate": 1.3661368551324648e-05, |
|
"loss": 0.7452, |
|
"step": 70650 |
|
}, |
|
{ |
|
"epoch": 2.5508917312196, |
|
"grad_norm": 0.5700178146362305, |
|
"learning_rate": 1.3345801446382344e-05, |
|
"loss": 0.7496, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 2.55629616285354, |
|
"grad_norm": 0.5675559043884277, |
|
"learning_rate": 1.3033660963811878e-05, |
|
"loss": 0.7488, |
|
"step": 70950 |
|
}, |
|
{ |
|
"epoch": 2.5617005944874798, |
|
"grad_norm": 0.5796085596084595, |
|
"learning_rate": 1.2724959446783868e-05, |
|
"loss": 0.7454, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 2.5671050261214194, |
|
"grad_norm": 0.6384360194206238, |
|
"learning_rate": 1.2419709102479804e-05, |
|
"loss": 0.7387, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 2.5725094577553596, |
|
"grad_norm": 0.5239229798316956, |
|
"learning_rate": 1.2117922001609173e-05, |
|
"loss": 0.7371, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 2.577913889389299, |
|
"grad_norm": 0.5770368576049805, |
|
"learning_rate": 1.181961007793222e-05, |
|
"loss": 0.7451, |
|
"step": 71550 |
|
}, |
|
{ |
|
"epoch": 2.5833183210232393, |
|
"grad_norm": 0.5493025779724121, |
|
"learning_rate": 1.1524785127788074e-05, |
|
"loss": 0.7396, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.588722752657179, |
|
"grad_norm": 0.5658043622970581, |
|
"learning_rate": 1.123345880962826e-05, |
|
"loss": 0.7448, |
|
"step": 71850 |
|
}, |
|
{ |
|
"epoch": 2.5941271842911187, |
|
"grad_norm": 0.5434427857398987, |
|
"learning_rate": 1.0945642643555542e-05, |
|
"loss": 0.7471, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.5995316159250583, |
|
"grad_norm": 0.5109556913375854, |
|
"learning_rate": 1.066134801086862e-05, |
|
"loss": 0.7434, |
|
"step": 72150 |
|
}, |
|
{ |
|
"epoch": 2.6049360475589984, |
|
"grad_norm": 0.5859112739562988, |
|
"learning_rate": 1.0380586153611926e-05, |
|
"loss": 0.7391, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.610340479192938, |
|
"grad_norm": 0.5381293296813965, |
|
"learning_rate": 1.0103368174131044e-05, |
|
"loss": 0.7402, |
|
"step": 72450 |
|
}, |
|
{ |
|
"epoch": 2.615744910826878, |
|
"grad_norm": 0.5799181461334229, |
|
"learning_rate": 9.829705034633763e-06, |
|
"loss": 0.746, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.621149342460818, |
|
"grad_norm": 0.5245427489280701, |
|
"learning_rate": 9.559607556756589e-06, |
|
"loss": 0.7374, |
|
"step": 72750 |
|
}, |
|
{ |
|
"epoch": 2.6265537740947575, |
|
"grad_norm": 0.5755253434181213, |
|
"learning_rate": 9.29308642113672e-06, |
|
"loss": 0.7335, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.6319582057286977, |
|
"grad_norm": 0.5702092051506042, |
|
"learning_rate": 9.030152166989848e-06, |
|
"loss": 0.7441, |
|
"step": 73050 |
|
}, |
|
{ |
|
"epoch": 2.6373626373626373, |
|
"grad_norm": 0.5722294449806213, |
|
"learning_rate": 8.770815191693294e-06, |
|
"loss": 0.745, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.6427670689965774, |
|
"grad_norm": 0.5095585584640503, |
|
"learning_rate": 8.515085750374819e-06, |
|
"loss": 0.7399, |
|
"step": 73350 |
|
}, |
|
{ |
|
"epoch": 2.648171500630517, |
|
"grad_norm": 0.7061243057250977, |
|
"learning_rate": 8.262973955507213e-06, |
|
"loss": 0.7317, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.6535759322644568, |
|
"grad_norm": 0.6071792244911194, |
|
"learning_rate": 8.014489776508406e-06, |
|
"loss": 0.7457, |
|
"step": 73650 |
|
}, |
|
{ |
|
"epoch": 2.6589803638983964, |
|
"grad_norm": 0.6209822297096252, |
|
"learning_rate": 7.769643039347118e-06, |
|
"loss": 0.7304, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.6643847955323365, |
|
"grad_norm": 0.5465585589408875, |
|
"learning_rate": 7.528443426154386e-06, |
|
"loss": 0.7348, |
|
"step": 73950 |
|
}, |
|
{ |
|
"epoch": 2.669789227166276, |
|
"grad_norm": 0.5735740661621094, |
|
"learning_rate": 7.290900474840745e-06, |
|
"loss": 0.7509, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.6751936588002163, |
|
"grad_norm": 0.5864896178245544, |
|
"learning_rate": 7.0570235787189575e-06, |
|
"loss": 0.7422, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 2.680598090434156, |
|
"grad_norm": 0.5019831657409668, |
|
"learning_rate": 6.82682198613267e-06, |
|
"loss": 0.74, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.6860025220680956, |
|
"grad_norm": 0.4947664141654968, |
|
"learning_rate": 6.600304800090629e-06, |
|
"loss": 0.7424, |
|
"step": 74550 |
|
}, |
|
{ |
|
"epoch": 2.6914069537020358, |
|
"grad_norm": 0.5284778475761414, |
|
"learning_rate": 6.3774809779066914e-06, |
|
"loss": 0.741, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.6968113853359754, |
|
"grad_norm": 0.5382539629936218, |
|
"learning_rate": 6.158359330845742e-06, |
|
"loss": 0.7384, |
|
"step": 74850 |
|
}, |
|
{ |
|
"epoch": 2.7022158169699155, |
|
"grad_norm": 0.6098785996437073, |
|
"learning_rate": 5.942948523775172e-06, |
|
"loss": 0.732, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.707620248603855, |
|
"grad_norm": 0.5111733675003052, |
|
"learning_rate": 5.731257074822227e-06, |
|
"loss": 0.7401, |
|
"step": 75150 |
|
}, |
|
{ |
|
"epoch": 2.713024680237795, |
|
"grad_norm": 0.563735842704773, |
|
"learning_rate": 5.523293355037174e-06, |
|
"loss": 0.7373, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.718429111871735, |
|
"grad_norm": 0.48581522703170776, |
|
"learning_rate": 5.319065588062389e-06, |
|
"loss": 0.7355, |
|
"step": 75450 |
|
}, |
|
{ |
|
"epoch": 2.7238335435056746, |
|
"grad_norm": 0.6022956371307373, |
|
"learning_rate": 5.118581849806991e-06, |
|
"loss": 0.752, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.7292379751396147, |
|
"grad_norm": 0.5350160002708435, |
|
"learning_rate": 4.92185006812762e-06, |
|
"loss": 0.7302, |
|
"step": 75750 |
|
}, |
|
{ |
|
"epoch": 2.7346424067735544, |
|
"grad_norm": 0.5559709668159485, |
|
"learning_rate": 4.728878022514904e-06, |
|
"loss": 0.7258, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 2.740046838407494, |
|
"grad_norm": 0.5401473045349121, |
|
"learning_rate": 4.5396733437857885e-06, |
|
"loss": 0.7485, |
|
"step": 76050 |
|
}, |
|
{ |
|
"epoch": 2.7454512700414337, |
|
"grad_norm": 0.5016641020774841, |
|
"learning_rate": 4.354243513781841e-06, |
|
"loss": 0.7257, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 2.750855701675374, |
|
"grad_norm": 0.5274752974510193, |
|
"learning_rate": 4.172595865073414e-06, |
|
"loss": 0.7307, |
|
"step": 76350 |
|
}, |
|
{ |
|
"epoch": 2.7562601333093135, |
|
"grad_norm": 0.5795451402664185, |
|
"learning_rate": 3.994737580669572e-06, |
|
"loss": 0.7431, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.7616645649432536, |
|
"grad_norm": 0.584701418876648, |
|
"learning_rate": 3.820675693734166e-06, |
|
"loss": 0.7333, |
|
"step": 76650 |
|
}, |
|
{ |
|
"epoch": 2.7670689965771933, |
|
"grad_norm": 0.5679466724395752, |
|
"learning_rate": 3.6504170873076894e-06, |
|
"loss": 0.7457, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 2.772473428211133, |
|
"grad_norm": 0.5592213869094849, |
|
"learning_rate": 3.483968494035039e-06, |
|
"loss": 0.7438, |
|
"step": 76950 |
|
}, |
|
{ |
|
"epoch": 2.777877859845073, |
|
"grad_norm": 0.6507932543754578, |
|
"learning_rate": 3.3213364958993633e-06, |
|
"loss": 0.7332, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 2.7832822914790127, |
|
"grad_norm": 0.5836296081542969, |
|
"learning_rate": 3.1625275239617447e-06, |
|
"loss": 0.7341, |
|
"step": 77250 |
|
}, |
|
{ |
|
"epoch": 2.788686723112953, |
|
"grad_norm": 0.6291818618774414, |
|
"learning_rate": 3.0075478581068517e-06, |
|
"loss": 0.7391, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 2.7940911547468925, |
|
"grad_norm": 0.59623783826828, |
|
"learning_rate": 2.8564036267947347e-06, |
|
"loss": 0.7281, |
|
"step": 77550 |
|
}, |
|
{ |
|
"epoch": 2.799495586380832, |
|
"grad_norm": 0.5835798978805542, |
|
"learning_rate": 2.7091008068183323e-06, |
|
"loss": 0.7385, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 2.804900018014772, |
|
"grad_norm": 0.5502892732620239, |
|
"learning_rate": 2.565645223067237e-06, |
|
"loss": 0.7441, |
|
"step": 77850 |
|
}, |
|
{ |
|
"epoch": 2.810304449648712, |
|
"grad_norm": 0.5453166365623474, |
|
"learning_rate": 2.4260425482973025e-06, |
|
"loss": 0.7338, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.8157088812826516, |
|
"grad_norm": 0.5541927814483643, |
|
"learning_rate": 2.2902983029063463e-06, |
|
"loss": 0.7325, |
|
"step": 78150 |
|
}, |
|
{ |
|
"epoch": 2.8211133129165917, |
|
"grad_norm": 0.5624451041221619, |
|
"learning_rate": 2.158417854715844e-06, |
|
"loss": 0.7311, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 2.8265177445505314, |
|
"grad_norm": 0.6407118439674377, |
|
"learning_rate": 2.0304064187587012e-06, |
|
"loss": 0.7343, |
|
"step": 78450 |
|
}, |
|
{ |
|
"epoch": 2.831922176184471, |
|
"grad_norm": 0.6349582076072693, |
|
"learning_rate": 1.906269057072918e-06, |
|
"loss": 0.7289, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 2.837326607818411, |
|
"grad_norm": 0.511360764503479, |
|
"learning_rate": 1.7860106785015707e-06, |
|
"loss": 0.7362, |
|
"step": 78750 |
|
}, |
|
{ |
|
"epoch": 2.842731039452351, |
|
"grad_norm": 0.6116952300071716, |
|
"learning_rate": 1.669636038498612e-06, |
|
"loss": 0.7357, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 2.848135471086291, |
|
"grad_norm": 0.5288776159286499, |
|
"learning_rate": 1.5571497389408218e-06, |
|
"loss": 0.7377, |
|
"step": 79050 |
|
}, |
|
{ |
|
"epoch": 2.8535399027202306, |
|
"grad_norm": 0.5661271810531616, |
|
"learning_rate": 1.4485562279458742e-06, |
|
"loss": 0.7335, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 2.8589443343541703, |
|
"grad_norm": 0.46028730273246765, |
|
"learning_rate": 1.3438597996963675e-06, |
|
"loss": 0.7306, |
|
"step": 79350 |
|
}, |
|
{ |
|
"epoch": 2.8643487659881104, |
|
"grad_norm": 0.5887011289596558, |
|
"learning_rate": 1.243064594270127e-06, |
|
"loss": 0.7348, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.86975319762205, |
|
"grad_norm": 0.5686684846878052, |
|
"learning_rate": 1.1461745974763682e-06, |
|
"loss": 0.7305, |
|
"step": 79650 |
|
}, |
|
{ |
|
"epoch": 2.87515762925599, |
|
"grad_norm": 0.5735449194908142, |
|
"learning_rate": 1.0531936406982247e-06, |
|
"loss": 0.726, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 2.88056206088993, |
|
"grad_norm": 0.6428796648979187, |
|
"learning_rate": 9.64125400741056e-07, |
|
"loss": 0.7288, |
|
"step": 79950 |
|
}, |
|
{ |
|
"epoch": 2.8859664925238695, |
|
"grad_norm": 0.6176515817642212, |
|
"learning_rate": 8.789733996872551e-07, |
|
"loss": 0.7345, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 2.891370924157809, |
|
"grad_norm": 0.5095422267913818, |
|
"learning_rate": 7.977410047568246e-07, |
|
"loss": 0.7419, |
|
"step": 80250 |
|
}, |
|
{ |
|
"epoch": 2.8967753557917493, |
|
"grad_norm": 0.5800315141677856, |
|
"learning_rate": 7.204314281742952e-07, |
|
"loss": 0.7375, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 2.902179787425689, |
|
"grad_norm": 0.5727178454399109, |
|
"learning_rate": 6.470477270416719e-07, |
|
"loss": 0.7356, |
|
"step": 80550 |
|
}, |
|
{ |
|
"epoch": 2.907584219059629, |
|
"grad_norm": 0.5594687461853027, |
|
"learning_rate": 5.775928032175637e-07, |
|
"loss": 0.7363, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 2.9129886506935687, |
|
"grad_norm": 0.6071078777313232, |
|
"learning_rate": 5.120694032024309e-07, |
|
"loss": 0.7491, |
|
"step": 80850 |
|
}, |
|
{ |
|
"epoch": 2.9183930823275084, |
|
"grad_norm": 0.6253530383110046, |
|
"learning_rate": 4.5048011802997226e-07, |
|
"loss": 0.7495, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.9237975139614485, |
|
"grad_norm": 0.7043154835700989, |
|
"learning_rate": 3.928273831646512e-07, |
|
"loss": 0.7349, |
|
"step": 81150 |
|
}, |
|
{ |
|
"epoch": 2.929201945595388, |
|
"grad_norm": 0.5901583433151245, |
|
"learning_rate": 3.391134784054284e-07, |
|
"loss": 0.7388, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 2.9346063772293283, |
|
"grad_norm": 0.5171722173690796, |
|
"learning_rate": 2.8934052779558965e-07, |
|
"loss": 0.7357, |
|
"step": 81450 |
|
}, |
|
{ |
|
"epoch": 2.940010808863268, |
|
"grad_norm": 0.5885277986526489, |
|
"learning_rate": 2.4351049953872386e-07, |
|
"loss": 0.7294, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 2.9454152404972076, |
|
"grad_norm": 0.5369580388069153, |
|
"learning_rate": 2.0162520592095225e-07, |
|
"loss": 0.724, |
|
"step": 81750 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 0.505922794342041, |
|
"learning_rate": 1.6368630323920776e-07, |
|
"loss": 0.7376, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 2.9562241037650874, |
|
"grad_norm": 0.5709424018859863, |
|
"learning_rate": 1.2969529173577633e-07, |
|
"loss": 0.7273, |
|
"step": 82050 |
|
}, |
|
{ |
|
"epoch": 2.961628535399027, |
|
"grad_norm": 0.5696266293525696, |
|
"learning_rate": 9.965351553895552e-08, |
|
"loss": 0.7358, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 2.967032967032967, |
|
"grad_norm": 0.6568360924720764, |
|
"learning_rate": 7.356216260990811e-08, |
|
"loss": 0.7337, |
|
"step": 82350 |
|
}, |
|
{ |
|
"epoch": 2.972437398666907, |
|
"grad_norm": 0.6210362911224365, |
|
"learning_rate": 5.142226469568856e-08, |
|
"loss": 0.7301, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.9778418303008465, |
|
"grad_norm": 0.5563607811927795, |
|
"learning_rate": 3.32346972884312e-08, |
|
"loss": 0.7311, |
|
"step": 82650 |
|
}, |
|
{ |
|
"epoch": 2.9832462619347866, |
|
"grad_norm": 0.6156190633773804, |
|
"learning_rate": 1.9000179590733525e-08, |
|
"loss": 0.7248, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 2.9886506935687263, |
|
"grad_norm": 0.6303669810295105, |
|
"learning_rate": 8.719274487245522e-09, |
|
"loss": 0.7412, |
|
"step": 82950 |
|
}, |
|
{ |
|
"epoch": 2.9940551252026664, |
|
"grad_norm": 0.4844772517681122, |
|
"learning_rate": 2.392388522343136e-09, |
|
"loss": 0.7329, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 2.999459556836606, |
|
"grad_norm": 0.5367130041122437, |
|
"learning_rate": 1.977188415214215e-11, |
|
"loss": 0.7302, |
|
"step": 83250 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 83265, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.661509740266363e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|