|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4109148371804279, |
|
"eval_steps": 500, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0028218296743608554, |
|
"grad_norm": 175.4037628173828, |
|
"learning_rate": 1.3261851015801355e-07, |
|
"loss": 3.1599, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.005643659348721711, |
|
"grad_norm": 24.378341674804688, |
|
"learning_rate": 2.737020316027088e-07, |
|
"loss": 1.848, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.008465489023082567, |
|
"grad_norm": 27.120737075805664, |
|
"learning_rate": 4.147855530474041e-07, |
|
"loss": 1.2576, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.011287318697443422, |
|
"grad_norm": 17.9366397857666, |
|
"learning_rate": 5.558690744920993e-07, |
|
"loss": 1.0372, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.014109148371804279, |
|
"grad_norm": 24.153575897216797, |
|
"learning_rate": 6.969525959367947e-07, |
|
"loss": 0.9444, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.016930978046165134, |
|
"grad_norm": 15.082656860351562, |
|
"learning_rate": 8.3803611738149e-07, |
|
"loss": 0.8119, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01975280772052599, |
|
"grad_norm": 23.82472801208496, |
|
"learning_rate": 9.79119638826185e-07, |
|
"loss": 0.7448, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.022574637394886844, |
|
"grad_norm": 46.331058502197266, |
|
"learning_rate": 1.1187923250564336e-06, |
|
"loss": 0.7746, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0253964670692477, |
|
"grad_norm": 29.84396743774414, |
|
"learning_rate": 1.2598758465011287e-06, |
|
"loss": 0.6949, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.028218296743608557, |
|
"grad_norm": 34.73414993286133, |
|
"learning_rate": 1.4009593679458242e-06, |
|
"loss": 0.6737, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03104012641796941, |
|
"grad_norm": 33.15461349487305, |
|
"learning_rate": 1.5420428893905193e-06, |
|
"loss": 0.6346, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03386195609233027, |
|
"grad_norm": 61.0333251953125, |
|
"learning_rate": 1.6831264108352146e-06, |
|
"loss": 0.6682, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.036683785766691124, |
|
"grad_norm": 28.32975196838379, |
|
"learning_rate": 1.8242099322799097e-06, |
|
"loss": 0.6747, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03950561544105198, |
|
"grad_norm": 26.81439781188965, |
|
"learning_rate": 1.9652934537246053e-06, |
|
"loss": 0.668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04232744511541283, |
|
"grad_norm": 12.049301147460938, |
|
"learning_rate": 2.1063769751693e-06, |
|
"loss": 0.624, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04514927478977369, |
|
"grad_norm": 24.25949478149414, |
|
"learning_rate": 2.2474604966139955e-06, |
|
"loss": 0.5744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.047971104464134544, |
|
"grad_norm": 28.51569938659668, |
|
"learning_rate": 2.3885440180586912e-06, |
|
"loss": 0.6342, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0507929341384954, |
|
"grad_norm": 29.432788848876953, |
|
"learning_rate": 2.5296275395033865e-06, |
|
"loss": 0.6092, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05361476381285626, |
|
"grad_norm": 38.4299430847168, |
|
"learning_rate": 2.6707110609480814e-06, |
|
"loss": 0.5947, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.056436593487217114, |
|
"grad_norm": 76.33415222167969, |
|
"learning_rate": 2.8117945823927768e-06, |
|
"loss": 0.6018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.059258423161577964, |
|
"grad_norm": 26.28867530822754, |
|
"learning_rate": 2.952878103837472e-06, |
|
"loss": 0.5609, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.06208025283593882, |
|
"grad_norm": 27.49398422241211, |
|
"learning_rate": 3.0939616252821674e-06, |
|
"loss": 0.5916, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06490208251029968, |
|
"grad_norm": 27.57271385192871, |
|
"learning_rate": 3.2350451467268623e-06, |
|
"loss": 0.5775, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06772391218466053, |
|
"grad_norm": 40.42005157470703, |
|
"learning_rate": 3.3761286681715576e-06, |
|
"loss": 0.5847, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07054574185902139, |
|
"grad_norm": 23.6585693359375, |
|
"learning_rate": 3.517212189616253e-06, |
|
"loss": 0.5808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07336757153338225, |
|
"grad_norm": 21.844501495361328, |
|
"learning_rate": 3.6582957110609487e-06, |
|
"loss": 0.562, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0761894012077431, |
|
"grad_norm": 26.929052352905273, |
|
"learning_rate": 3.799379232505644e-06, |
|
"loss": 0.569, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07901123088210396, |
|
"grad_norm": 44.13679504394531, |
|
"learning_rate": 3.9404627539503385e-06, |
|
"loss": 0.6041, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08183306055646482, |
|
"grad_norm": 26.586490631103516, |
|
"learning_rate": 4.081546275395034e-06, |
|
"loss": 0.5796, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.08465489023082566, |
|
"grad_norm": 25.3786563873291, |
|
"learning_rate": 4.222629796839729e-06, |
|
"loss": 0.5912, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08747671990518652, |
|
"grad_norm": 31.699079513549805, |
|
"learning_rate": 4.363713318284425e-06, |
|
"loss": 0.5506, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.09029854957954737, |
|
"grad_norm": 38.314876556396484, |
|
"learning_rate": 4.50479683972912e-06, |
|
"loss": 0.5687, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.09312037925390823, |
|
"grad_norm": 31.68910789489746, |
|
"learning_rate": 4.6458803611738155e-06, |
|
"loss": 0.5463, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.09594220892826909, |
|
"grad_norm": 34.52235412597656, |
|
"learning_rate": 4.78696388261851e-06, |
|
"loss": 0.5681, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.09876403860262994, |
|
"grad_norm": 52.666526794433594, |
|
"learning_rate": 4.928047404063206e-06, |
|
"loss": 0.5819, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1015858682769908, |
|
"grad_norm": 55.63333511352539, |
|
"learning_rate": 5.069130925507901e-06, |
|
"loss": 0.5496, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.10440769795135166, |
|
"grad_norm": 30.57868003845215, |
|
"learning_rate": 5.210214446952596e-06, |
|
"loss": 0.548, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.10722952762571251, |
|
"grad_norm": 27.27458953857422, |
|
"learning_rate": 5.3512979683972925e-06, |
|
"loss": 0.5183, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.11005135730007337, |
|
"grad_norm": 48.254302978515625, |
|
"learning_rate": 5.4923814898419865e-06, |
|
"loss": 0.5295, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.11287318697443423, |
|
"grad_norm": 66.4515151977539, |
|
"learning_rate": 5.6334650112866814e-06, |
|
"loss": 0.5385, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11569501664879508, |
|
"grad_norm": 15.82089900970459, |
|
"learning_rate": 5.774548532731378e-06, |
|
"loss": 0.4862, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.11851684632315593, |
|
"grad_norm": 14.725001335144043, |
|
"learning_rate": 5.915632054176073e-06, |
|
"loss": 0.5143, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.12133867599751678, |
|
"grad_norm": 11.792068481445312, |
|
"learning_rate": 6.056715575620769e-06, |
|
"loss": 0.5314, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.12416050567187764, |
|
"grad_norm": 29.84812355041504, |
|
"learning_rate": 6.1977990970654636e-06, |
|
"loss": 0.5633, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.1269823353462385, |
|
"grad_norm": 27.085613250732422, |
|
"learning_rate": 6.3388826185101585e-06, |
|
"loss": 0.5071, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.12980416502059935, |
|
"grad_norm": 31.483890533447266, |
|
"learning_rate": 6.479966139954854e-06, |
|
"loss": 0.5577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.13262599469496023, |
|
"grad_norm": 17.639286041259766, |
|
"learning_rate": 6.621049661399549e-06, |
|
"loss": 0.5347, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.13544782436932107, |
|
"grad_norm": 27.730653762817383, |
|
"learning_rate": 6.762133182844244e-06, |
|
"loss": 0.516, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1382696540436819, |
|
"grad_norm": 36.5928840637207, |
|
"learning_rate": 6.90321670428894e-06, |
|
"loss": 0.4875, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.14109148371804278, |
|
"grad_norm": 16.40862274169922, |
|
"learning_rate": 7.044300225733635e-06, |
|
"loss": 0.4683, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14391331339240362, |
|
"grad_norm": 26.178585052490234, |
|
"learning_rate": 7.18538374717833e-06, |
|
"loss": 0.5131, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.1467351430667645, |
|
"grad_norm": 31.325714111328125, |
|
"learning_rate": 7.326467268623025e-06, |
|
"loss": 0.4667, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.14955697274112534, |
|
"grad_norm": 32.41719055175781, |
|
"learning_rate": 7.46755079006772e-06, |
|
"loss": 0.5131, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.1523788024154862, |
|
"grad_norm": 19.27738380432129, |
|
"learning_rate": 7.608634311512416e-06, |
|
"loss": 0.4934, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.15520063208984705, |
|
"grad_norm": 18.57884407043457, |
|
"learning_rate": 7.74971783295711e-06, |
|
"loss": 0.5104, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.15802246176420792, |
|
"grad_norm": 23.134544372558594, |
|
"learning_rate": 7.890801354401807e-06, |
|
"loss": 0.5355, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.16084429143856876, |
|
"grad_norm": 31.898052215576172, |
|
"learning_rate": 8.031884875846502e-06, |
|
"loss": 0.4796, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.16366612111292964, |
|
"grad_norm": 25.020967483520508, |
|
"learning_rate": 8.172968397291197e-06, |
|
"loss": 0.5195, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.16648795078729048, |
|
"grad_norm": 33.08086395263672, |
|
"learning_rate": 8.314051918735892e-06, |
|
"loss": 0.5263, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.16930978046165132, |
|
"grad_norm": 30.09421730041504, |
|
"learning_rate": 8.455135440180587e-06, |
|
"loss": 0.4806, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1721316101360122, |
|
"grad_norm": 27.095197677612305, |
|
"learning_rate": 8.596218961625284e-06, |
|
"loss": 0.5154, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.17495343981037303, |
|
"grad_norm": 30.298784255981445, |
|
"learning_rate": 8.737302483069978e-06, |
|
"loss": 0.5072, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.1777752694847339, |
|
"grad_norm": 21.500614166259766, |
|
"learning_rate": 8.878386004514673e-06, |
|
"loss": 0.4962, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.18059709915909475, |
|
"grad_norm": 19.31947898864746, |
|
"learning_rate": 9.019469525959368e-06, |
|
"loss": 0.5303, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.18341892883345562, |
|
"grad_norm": 43.50458526611328, |
|
"learning_rate": 9.160553047404063e-06, |
|
"loss": 0.4585, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.18624075850781646, |
|
"grad_norm": 45.34175491333008, |
|
"learning_rate": 9.30163656884876e-06, |
|
"loss": 0.5084, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.18906258818217733, |
|
"grad_norm": 45.26927185058594, |
|
"learning_rate": 9.442720090293455e-06, |
|
"loss": 0.4849, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.19188441785653818, |
|
"grad_norm": 42.85562515258789, |
|
"learning_rate": 9.58380361173815e-06, |
|
"loss": 0.5519, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.19470624753089905, |
|
"grad_norm": 21.38568687438965, |
|
"learning_rate": 9.724887133182846e-06, |
|
"loss": 0.5143, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.1975280772052599, |
|
"grad_norm": 40.83327865600586, |
|
"learning_rate": 9.865970654627541e-06, |
|
"loss": 0.5234, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.20034990687962076, |
|
"grad_norm": 18.91502571105957, |
|
"learning_rate": 9.999216153508497e-06, |
|
"loss": 0.5068, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.2031717365539816, |
|
"grad_norm": 21.48363494873047, |
|
"learning_rate": 9.983539223678435e-06, |
|
"loss": 0.5049, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.20599356622834245, |
|
"grad_norm": 36.2054443359375, |
|
"learning_rate": 9.967862293848373e-06, |
|
"loss": 0.5111, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.20881539590270332, |
|
"grad_norm": 33.61429214477539, |
|
"learning_rate": 9.952185364018311e-06, |
|
"loss": 0.4963, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.21163722557706416, |
|
"grad_norm": 26.054624557495117, |
|
"learning_rate": 9.93650843418825e-06, |
|
"loss": 0.4979, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.21445905525142503, |
|
"grad_norm": 22.114206314086914, |
|
"learning_rate": 9.920831504358187e-06, |
|
"loss": 0.4938, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.21728088492578587, |
|
"grad_norm": 25.162818908691406, |
|
"learning_rate": 9.905154574528125e-06, |
|
"loss": 0.5034, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.22010271460014674, |
|
"grad_norm": 32.36638259887695, |
|
"learning_rate": 9.889477644698063e-06, |
|
"loss": 0.4968, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.22292454427450759, |
|
"grad_norm": 12.753120422363281, |
|
"learning_rate": 9.873800714868001e-06, |
|
"loss": 0.5068, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.22574637394886846, |
|
"grad_norm": 24.235403060913086, |
|
"learning_rate": 9.85812378503794e-06, |
|
"loss": 0.5157, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2285682036232293, |
|
"grad_norm": 29.90271759033203, |
|
"learning_rate": 9.842446855207877e-06, |
|
"loss": 0.5183, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.23139003329759017, |
|
"grad_norm": 29.402860641479492, |
|
"learning_rate": 9.826769925377815e-06, |
|
"loss": 0.4724, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.234211862971951, |
|
"grad_norm": 25.902576446533203, |
|
"learning_rate": 9.811092995547753e-06, |
|
"loss": 0.5222, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.23703369264631186, |
|
"grad_norm": 22.64885711669922, |
|
"learning_rate": 9.795416065717691e-06, |
|
"loss": 0.5079, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.23985552232067273, |
|
"grad_norm": 29.80979347229004, |
|
"learning_rate": 9.779739135887628e-06, |
|
"loss": 0.4755, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.24267735199503357, |
|
"grad_norm": 31.037330627441406, |
|
"learning_rate": 9.764062206057566e-06, |
|
"loss": 0.4869, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.24549918166939444, |
|
"grad_norm": 24.076200485229492, |
|
"learning_rate": 9.748385276227504e-06, |
|
"loss": 0.4911, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.24832101134375528, |
|
"grad_norm": 26.947887420654297, |
|
"learning_rate": 9.732708346397442e-06, |
|
"loss": 0.4852, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.2511428410181161, |
|
"grad_norm": 18.701513290405273, |
|
"learning_rate": 9.71703141656738e-06, |
|
"loss": 0.5125, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.253964670692477, |
|
"grad_norm": 20.67565155029297, |
|
"learning_rate": 9.701354486737318e-06, |
|
"loss": 0.5037, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.25678650036683787, |
|
"grad_norm": 17.804229736328125, |
|
"learning_rate": 9.685677556907256e-06, |
|
"loss": 0.5024, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.2596083300411987, |
|
"grad_norm": 25.94413185119629, |
|
"learning_rate": 9.670000627077194e-06, |
|
"loss": 0.5133, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.26243015971555955, |
|
"grad_norm": 20.22077178955078, |
|
"learning_rate": 9.654323697247132e-06, |
|
"loss": 0.4844, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.26525198938992045, |
|
"grad_norm": 32.44327926635742, |
|
"learning_rate": 9.63864676741707e-06, |
|
"loss": 0.4573, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.2680738190642813, |
|
"grad_norm": 10.91318130493164, |
|
"learning_rate": 9.622969837587008e-06, |
|
"loss": 0.4434, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.27089564873864214, |
|
"grad_norm": 13.019550323486328, |
|
"learning_rate": 9.607292907756946e-06, |
|
"loss": 0.4943, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.273717478413003, |
|
"grad_norm": 15.419180870056152, |
|
"learning_rate": 9.591615977926884e-06, |
|
"loss": 0.492, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.2765393080873638, |
|
"grad_norm": 24.357208251953125, |
|
"learning_rate": 9.575939048096822e-06, |
|
"loss": 0.4474, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.2793611377617247, |
|
"grad_norm": 33.39434051513672, |
|
"learning_rate": 9.56026211826676e-06, |
|
"loss": 0.4623, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.28218296743608556, |
|
"grad_norm": 32.260337829589844, |
|
"learning_rate": 9.544585188436696e-06, |
|
"loss": 0.501, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2850047971104464, |
|
"grad_norm": 29.994497299194336, |
|
"learning_rate": 9.528908258606634e-06, |
|
"loss": 0.4795, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.28782662678480725, |
|
"grad_norm": 51.09225845336914, |
|
"learning_rate": 9.513388098074874e-06, |
|
"loss": 0.4829, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.29064845645916815, |
|
"grad_norm": 17.05582618713379, |
|
"learning_rate": 9.497711168244812e-06, |
|
"loss": 0.5194, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.293470286133529, |
|
"grad_norm": 47.8975830078125, |
|
"learning_rate": 9.48203423841475e-06, |
|
"loss": 0.4935, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.29629211580788983, |
|
"grad_norm": 20.064878463745117, |
|
"learning_rate": 9.466514077882987e-06, |
|
"loss": 0.4558, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.2991139454822507, |
|
"grad_norm": 30.60138511657715, |
|
"learning_rate": 9.450837148052927e-06, |
|
"loss": 0.4908, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.3019357751566116, |
|
"grad_norm": 12.470020294189453, |
|
"learning_rate": 9.435160218222865e-06, |
|
"loss": 0.4521, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.3047576048309724, |
|
"grad_norm": 26.887653350830078, |
|
"learning_rate": 9.419483288392803e-06, |
|
"loss": 0.3894, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.30757943450533326, |
|
"grad_norm": 26.63880157470703, |
|
"learning_rate": 9.40380635856274e-06, |
|
"loss": 0.4649, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.3104012641796941, |
|
"grad_norm": 29.928979873657227, |
|
"learning_rate": 9.388129428732677e-06, |
|
"loss": 0.435, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31322309385405495, |
|
"grad_norm": 30.5889949798584, |
|
"learning_rate": 9.372452498902615e-06, |
|
"loss": 0.445, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.31604492352841584, |
|
"grad_norm": 31.825000762939453, |
|
"learning_rate": 9.356775569072553e-06, |
|
"loss": 0.4913, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.3188667532027767, |
|
"grad_norm": 40.66577911376953, |
|
"learning_rate": 9.341098639242491e-06, |
|
"loss": 0.4763, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.32168858287713753, |
|
"grad_norm": 17.6318359375, |
|
"learning_rate": 9.32542170941243e-06, |
|
"loss": 0.4989, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.3245104125514984, |
|
"grad_norm": 26.51487922668457, |
|
"learning_rate": 9.309744779582367e-06, |
|
"loss": 0.4486, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.32733224222585927, |
|
"grad_norm": 3.3657779693603516, |
|
"learning_rate": 9.294067849752305e-06, |
|
"loss": 0.4233, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.3301540719002201, |
|
"grad_norm": 34.170921325683594, |
|
"learning_rate": 9.278390919922243e-06, |
|
"loss": 0.4867, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.33297590157458096, |
|
"grad_norm": 23.013031005859375, |
|
"learning_rate": 9.262713990092181e-06, |
|
"loss": 0.468, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.3357977312489418, |
|
"grad_norm": 16.811792373657227, |
|
"learning_rate": 9.24703706026212e-06, |
|
"loss": 0.4735, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.33861956092330264, |
|
"grad_norm": 29.813079833984375, |
|
"learning_rate": 9.231360130432057e-06, |
|
"loss": 0.4386, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.34144139059766354, |
|
"grad_norm": 29.00061798095703, |
|
"learning_rate": 9.215683200601995e-06, |
|
"loss": 0.4741, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.3442632202720244, |
|
"grad_norm": 17.047637939453125, |
|
"learning_rate": 9.200006270771933e-06, |
|
"loss": 0.4525, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.3470850499463852, |
|
"grad_norm": 16.266225814819336, |
|
"learning_rate": 9.184329340941871e-06, |
|
"loss": 0.4348, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.34990687962074607, |
|
"grad_norm": 31.315366744995117, |
|
"learning_rate": 9.16865241111181e-06, |
|
"loss": 0.4497, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.35272870929510697, |
|
"grad_norm": 14.484158515930176, |
|
"learning_rate": 9.152975481281746e-06, |
|
"loss": 0.4492, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.3555505389694678, |
|
"grad_norm": 14.117356300354004, |
|
"learning_rate": 9.137298551451684e-06, |
|
"loss": 0.4731, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.35837236864382865, |
|
"grad_norm": 18.32455825805664, |
|
"learning_rate": 9.121621621621622e-06, |
|
"loss": 0.4952, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.3611941983181895, |
|
"grad_norm": 22.009477615356445, |
|
"learning_rate": 9.10594469179156e-06, |
|
"loss": 0.4374, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.3640160279925504, |
|
"grad_norm": 44.56202697753906, |
|
"learning_rate": 9.0902677619615e-06, |
|
"loss": 0.4576, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.36683785766691124, |
|
"grad_norm": 32.11207962036133, |
|
"learning_rate": 9.074590832131436e-06, |
|
"loss": 0.4477, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.3696596873412721, |
|
"grad_norm": 18.823020935058594, |
|
"learning_rate": 9.058913902301374e-06, |
|
"loss": 0.4628, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.3724815170156329, |
|
"grad_norm": 6.9682793617248535, |
|
"learning_rate": 9.043236972471312e-06, |
|
"loss": 0.4405, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.37530334668999377, |
|
"grad_norm": 56.02937698364258, |
|
"learning_rate": 9.027873581237852e-06, |
|
"loss": 0.4235, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.37812517636435466, |
|
"grad_norm": 17.02280044555664, |
|
"learning_rate": 9.012196651407788e-06, |
|
"loss": 0.4659, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.3809470060387155, |
|
"grad_norm": 127.55671691894531, |
|
"learning_rate": 8.996519721577726e-06, |
|
"loss": 0.4209, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.38376883571307635, |
|
"grad_norm": 23.909799575805664, |
|
"learning_rate": 8.980842791747665e-06, |
|
"loss": 0.4412, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.3865906653874372, |
|
"grad_norm": 42.155277252197266, |
|
"learning_rate": 8.965165861917603e-06, |
|
"loss": 0.4995, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.3894124950617981, |
|
"grad_norm": 21.608457565307617, |
|
"learning_rate": 8.94948893208754e-06, |
|
"loss": 0.4857, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.39223432473615893, |
|
"grad_norm": 25.70061492919922, |
|
"learning_rate": 8.933812002257479e-06, |
|
"loss": 0.4607, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.3950561544105198, |
|
"grad_norm": 19.82216453552246, |
|
"learning_rate": 8.918135072427417e-06, |
|
"loss": 0.4585, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.3978779840848806, |
|
"grad_norm": 17.299297332763672, |
|
"learning_rate": 8.902458142597355e-06, |
|
"loss": 0.4895, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.4006998137592415, |
|
"grad_norm": 20.835285186767578, |
|
"learning_rate": 8.886781212767293e-06, |
|
"loss": 0.4418, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.40352164343360236, |
|
"grad_norm": 14.876681327819824, |
|
"learning_rate": 8.87110428293723e-06, |
|
"loss": 0.4235, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.4063434731079632, |
|
"grad_norm": 44.82238006591797, |
|
"learning_rate": 8.855427353107167e-06, |
|
"loss": 0.4812, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.40916530278232405, |
|
"grad_norm": 30.035110473632812, |
|
"learning_rate": 8.839750423277105e-06, |
|
"loss": 0.4643, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.4119871324566849, |
|
"grad_norm": 24.600914001464844, |
|
"learning_rate": 8.824073493447045e-06, |
|
"loss": 0.4672, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.4148089621310458, |
|
"grad_norm": 7.020761966705322, |
|
"learning_rate": 8.808396563616983e-06, |
|
"loss": 0.4084, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.41763079180540663, |
|
"grad_norm": 31.321611404418945, |
|
"learning_rate": 8.79271963378692e-06, |
|
"loss": 0.4673, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.4204526214797675, |
|
"grad_norm": 26.225324630737305, |
|
"learning_rate": 8.777042703956857e-06, |
|
"loss": 0.4656, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.4232744511541283, |
|
"grad_norm": 21.322677612304688, |
|
"learning_rate": 8.761365774126795e-06, |
|
"loss": 0.4201, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.4260962808284892, |
|
"grad_norm": 9.067322731018066, |
|
"learning_rate": 8.745688844296733e-06, |
|
"loss": 0.4521, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.42891811050285006, |
|
"grad_norm": 29.99122428894043, |
|
"learning_rate": 8.730011914466671e-06, |
|
"loss": 0.4767, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.4317399401772109, |
|
"grad_norm": 19.088232040405273, |
|
"learning_rate": 8.71433498463661e-06, |
|
"loss": 0.4604, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.43456176985157174, |
|
"grad_norm": 43.28144836425781, |
|
"learning_rate": 8.698658054806547e-06, |
|
"loss": 0.4886, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.4373835995259326, |
|
"grad_norm": 23.3623046875, |
|
"learning_rate": 8.682981124976485e-06, |
|
"loss": 0.461, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.4402054292002935, |
|
"grad_norm": 10.750836372375488, |
|
"learning_rate": 8.667304195146423e-06, |
|
"loss": 0.4882, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.44302725887465433, |
|
"grad_norm": 20.971227645874023, |
|
"learning_rate": 8.651627265316361e-06, |
|
"loss": 0.4247, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.44584908854901517, |
|
"grad_norm": 91.97208404541016, |
|
"learning_rate": 8.6359503354863e-06, |
|
"loss": 0.4089, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.448670918223376, |
|
"grad_norm": 32.81388854980469, |
|
"learning_rate": 8.620273405656236e-06, |
|
"loss": 0.4393, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.4514927478977369, |
|
"grad_norm": 47.73301315307617, |
|
"learning_rate": 8.604596475826175e-06, |
|
"loss": 0.4213, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.45431457757209776, |
|
"grad_norm": 30.3223934173584, |
|
"learning_rate": 8.588919545996113e-06, |
|
"loss": 0.4443, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.4571364072464586, |
|
"grad_norm": 31.105688095092773, |
|
"learning_rate": 8.573242616166051e-06, |
|
"loss": 0.456, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.45995823692081944, |
|
"grad_norm": 31.305255889892578, |
|
"learning_rate": 8.55756568633599e-06, |
|
"loss": 0.4203, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.46278006659518034, |
|
"grad_norm": 16.12314224243164, |
|
"learning_rate": 8.541888756505926e-06, |
|
"loss": 0.467, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.4656018962695412, |
|
"grad_norm": 12.041780471801758, |
|
"learning_rate": 8.526211826675864e-06, |
|
"loss": 0.4549, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.468423725943902, |
|
"grad_norm": 70.41606903076172, |
|
"learning_rate": 8.510534896845802e-06, |
|
"loss": 0.453, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.47124555561826287, |
|
"grad_norm": 25.043682098388672, |
|
"learning_rate": 8.49485796701574e-06, |
|
"loss": 0.4863, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.4740673852926237, |
|
"grad_norm": 35.47560501098633, |
|
"learning_rate": 8.479181037185678e-06, |
|
"loss": 0.3768, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.4768892149669846, |
|
"grad_norm": 95.46676635742188, |
|
"learning_rate": 8.463504107355616e-06, |
|
"loss": 0.4667, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.47971104464134545, |
|
"grad_norm": 11.174333572387695, |
|
"learning_rate": 8.447827177525554e-06, |
|
"loss": 0.4867, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.4825328743157063, |
|
"grad_norm": 26.256561279296875, |
|
"learning_rate": 8.432150247695492e-06, |
|
"loss": 0.4473, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.48535470399006714, |
|
"grad_norm": 37.75515365600586, |
|
"learning_rate": 8.41647331786543e-06, |
|
"loss": 0.4615, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.48817653366442804, |
|
"grad_norm": 11.617927551269531, |
|
"learning_rate": 8.400796388035368e-06, |
|
"loss": 0.434, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.4909983633387889, |
|
"grad_norm": 36.8308219909668, |
|
"learning_rate": 8.385119458205306e-06, |
|
"loss": 0.4153, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.4938201930131497, |
|
"grad_norm": 21.96497344970703, |
|
"learning_rate": 8.369442528375244e-06, |
|
"loss": 0.4356, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.49664202268751056, |
|
"grad_norm": 24.90114974975586, |
|
"learning_rate": 8.353765598545182e-06, |
|
"loss": 0.4432, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.49946385236187146, |
|
"grad_norm": 35.501869201660156, |
|
"learning_rate": 8.33808866871512e-06, |
|
"loss": 0.4359, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.5022856820362323, |
|
"grad_norm": 19.375484466552734, |
|
"learning_rate": 8.322411738885058e-06, |
|
"loss": 0.4736, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.5051075117105931, |
|
"grad_norm": 33.64216995239258, |
|
"learning_rate": 8.306734809054994e-06, |
|
"loss": 0.4209, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.507929341384954, |
|
"grad_norm": 26.897354125976562, |
|
"learning_rate": 8.291057879224932e-06, |
|
"loss": 0.4558, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5107511710593149, |
|
"grad_norm": 28.531604766845703, |
|
"learning_rate": 8.27538094939487e-06, |
|
"loss": 0.4091, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.5135730007336757, |
|
"grad_norm": 17.724021911621094, |
|
"learning_rate": 8.259704019564809e-06, |
|
"loss": 0.4936, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.5163948304080366, |
|
"grad_norm": 30.871612548828125, |
|
"learning_rate": 8.244027089734748e-06, |
|
"loss": 0.4301, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.5192166600823974, |
|
"grad_norm": 10.603446006774902, |
|
"learning_rate": 8.228350159904685e-06, |
|
"loss": 0.4271, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.5220384897567583, |
|
"grad_norm": 21.891576766967773, |
|
"learning_rate": 8.212673230074623e-06, |
|
"loss": 0.4397, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.5248603194311191, |
|
"grad_norm": 18.257530212402344, |
|
"learning_rate": 8.19699630024456e-06, |
|
"loss": 0.4438, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.52768214910548, |
|
"grad_norm": 38.37961959838867, |
|
"learning_rate": 8.181319370414499e-06, |
|
"loss": 0.457, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.5305039787798409, |
|
"grad_norm": 43.02951431274414, |
|
"learning_rate": 8.165642440584437e-06, |
|
"loss": 0.4775, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.5333258084542017, |
|
"grad_norm": 19.02762794494629, |
|
"learning_rate": 8.149965510754375e-06, |
|
"loss": 0.4016, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.5361476381285626, |
|
"grad_norm": 15.892502784729004, |
|
"learning_rate": 8.134288580924313e-06, |
|
"loss": 0.4091, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.5389694678029234, |
|
"grad_norm": 23.187881469726562, |
|
"learning_rate": 8.11861165109425e-06, |
|
"loss": 0.4637, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.5417912974772843, |
|
"grad_norm": 22.69843101501465, |
|
"learning_rate": 8.102934721264189e-06, |
|
"loss": 0.4567, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.5446131271516451, |
|
"grad_norm": 25.485212326049805, |
|
"learning_rate": 8.087257791434127e-06, |
|
"loss": 0.4329, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.547434956826006, |
|
"grad_norm": 13.632718086242676, |
|
"learning_rate": 8.071580861604063e-06, |
|
"loss": 0.3943, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.5502567865003668, |
|
"grad_norm": 28.55874252319336, |
|
"learning_rate": 8.055903931774001e-06, |
|
"loss": 0.4587, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.5530786161747276, |
|
"grad_norm": 14.307991027832031, |
|
"learning_rate": 8.040227001943939e-06, |
|
"loss": 0.4297, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.5559004458490886, |
|
"grad_norm": 29.6854305267334, |
|
"learning_rate": 8.024550072113879e-06, |
|
"loss": 0.4665, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.5587222755234494, |
|
"grad_norm": 12.39411735534668, |
|
"learning_rate": 8.008873142283817e-06, |
|
"loss": 0.44, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.5615441051978103, |
|
"grad_norm": 19.937421798706055, |
|
"learning_rate": 7.993352981752054e-06, |
|
"loss": 0.4209, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.5643659348721711, |
|
"grad_norm": 42.9919548034668, |
|
"learning_rate": 7.977676051921992e-06, |
|
"loss": 0.4165, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.567187764546532, |
|
"grad_norm": 36.624691009521484, |
|
"learning_rate": 7.96199912209193e-06, |
|
"loss": 0.4602, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.5700095942208928, |
|
"grad_norm": 96.30526733398438, |
|
"learning_rate": 7.946322192261868e-06, |
|
"loss": 0.4202, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.5728314238952537, |
|
"grad_norm": 23.55826187133789, |
|
"learning_rate": 7.930802031730107e-06, |
|
"loss": 0.4349, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.5756532535696145, |
|
"grad_norm": 17.5443172454834, |
|
"learning_rate": 7.915125101900044e-06, |
|
"loss": 0.4351, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.5784750832439753, |
|
"grad_norm": 33.78348922729492, |
|
"learning_rate": 7.899448172069982e-06, |
|
"loss": 0.4825, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.5812969129183363, |
|
"grad_norm": 72.64608001708984, |
|
"learning_rate": 7.88377124223992e-06, |
|
"loss": 0.4596, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.5841187425926971, |
|
"grad_norm": 29.05013656616211, |
|
"learning_rate": 7.868094312409858e-06, |
|
"loss": 0.436, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.586940572267058, |
|
"grad_norm": 11.080700874328613, |
|
"learning_rate": 7.852417382579796e-06, |
|
"loss": 0.4302, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5897624019414188, |
|
"grad_norm": 12.57494068145752, |
|
"learning_rate": 7.836740452749734e-06, |
|
"loss": 0.4222, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.5925842316157797, |
|
"grad_norm": 18.02518081665039, |
|
"learning_rate": 7.821063522919672e-06, |
|
"loss": 0.3936, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5954060612901405, |
|
"grad_norm": 23.98390007019043, |
|
"learning_rate": 7.80538659308961e-06, |
|
"loss": 0.429, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5982278909645014, |
|
"grad_norm": 30.125253677368164, |
|
"learning_rate": 7.789709663259548e-06, |
|
"loss": 0.4255, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.6010497206388622, |
|
"grad_norm": 47.189022064208984, |
|
"learning_rate": 7.774032733429486e-06, |
|
"loss": 0.4276, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.6038715503132231, |
|
"grad_norm": 23.556894302368164, |
|
"learning_rate": 7.758355803599424e-06, |
|
"loss": 0.4735, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.606693379987584, |
|
"grad_norm": 18.217546463012695, |
|
"learning_rate": 7.742678873769362e-06, |
|
"loss": 0.4202, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.6095152096619448, |
|
"grad_norm": 15.294795036315918, |
|
"learning_rate": 7.7270019439393e-06, |
|
"loss": 0.3781, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.6123370393363057, |
|
"grad_norm": 27.4034366607666, |
|
"learning_rate": 7.711325014109238e-06, |
|
"loss": 0.4353, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.6151588690106665, |
|
"grad_norm": 6.8590240478515625, |
|
"learning_rate": 7.695648084279176e-06, |
|
"loss": 0.4145, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.6179806986850274, |
|
"grad_norm": 17.38852310180664, |
|
"learning_rate": 7.679971154449112e-06, |
|
"loss": 0.4146, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.6208025283593882, |
|
"grad_norm": 25.68893051147461, |
|
"learning_rate": 7.66429422461905e-06, |
|
"loss": 0.4162, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.623624358033749, |
|
"grad_norm": 40.78746795654297, |
|
"learning_rate": 7.648617294788989e-06, |
|
"loss": 0.4339, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.6264461877081099, |
|
"grad_norm": 37.15106201171875, |
|
"learning_rate": 7.632940364958927e-06, |
|
"loss": 0.4507, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.6292680173824708, |
|
"grad_norm": 16.895755767822266, |
|
"learning_rate": 7.617263435128865e-06, |
|
"loss": 0.4273, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.6320898470568317, |
|
"grad_norm": 20.340709686279297, |
|
"learning_rate": 7.6015865052988026e-06, |
|
"loss": 0.4298, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.6349116767311925, |
|
"grad_norm": 10.428862571716309, |
|
"learning_rate": 7.585909575468741e-06, |
|
"loss": 0.4425, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.6377335064055534, |
|
"grad_norm": 18.21014404296875, |
|
"learning_rate": 7.570232645638679e-06, |
|
"loss": 0.4118, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.6405553360799142, |
|
"grad_norm": 64.62041473388672, |
|
"learning_rate": 7.554555715808617e-06, |
|
"loss": 0.4506, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.6433771657542751, |
|
"grad_norm": 18.57569694519043, |
|
"learning_rate": 7.538878785978555e-06, |
|
"loss": 0.4119, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.6461989954286359, |
|
"grad_norm": 36.90799331665039, |
|
"learning_rate": 7.523201856148492e-06, |
|
"loss": 0.4595, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.6490208251029967, |
|
"grad_norm": 18.904600143432617, |
|
"learning_rate": 7.50752492631843e-06, |
|
"loss": 0.4712, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.6518426547773576, |
|
"grad_norm": 29.996702194213867, |
|
"learning_rate": 7.491847996488369e-06, |
|
"loss": 0.4554, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.6546644844517185, |
|
"grad_norm": 32.28736877441406, |
|
"learning_rate": 7.476171066658307e-06, |
|
"loss": 0.4066, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.6574863141260794, |
|
"grad_norm": 21.389598846435547, |
|
"learning_rate": 7.460494136828245e-06, |
|
"loss": 0.4264, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.6603081438004402, |
|
"grad_norm": 20.693862915039062, |
|
"learning_rate": 7.444817206998182e-06, |
|
"loss": 0.4345, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.6631299734748011, |
|
"grad_norm": 25.96352195739746, |
|
"learning_rate": 7.42914027716812e-06, |
|
"loss": 0.4207, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.6659518031491619, |
|
"grad_norm": 7.279160022735596, |
|
"learning_rate": 7.413463347338058e-06, |
|
"loss": 0.4606, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.6687736328235228, |
|
"grad_norm": 21.541379928588867, |
|
"learning_rate": 7.397786417507996e-06, |
|
"loss": 0.4424, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.6715954624978836, |
|
"grad_norm": 20.808313369750977, |
|
"learning_rate": 7.382109487677934e-06, |
|
"loss": 0.4207, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.6744172921722444, |
|
"grad_norm": 33.443336486816406, |
|
"learning_rate": 7.366432557847871e-06, |
|
"loss": 0.4192, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.6772391218466053, |
|
"grad_norm": 4.05402946472168, |
|
"learning_rate": 7.350755628017809e-06, |
|
"loss": 0.3986, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.6800609515209662, |
|
"grad_norm": 15.301130294799805, |
|
"learning_rate": 7.335078698187747e-06, |
|
"loss": 0.3992, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.6828827811953271, |
|
"grad_norm": 17.400495529174805, |
|
"learning_rate": 7.319401768357685e-06, |
|
"loss": 0.3992, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.6857046108696879, |
|
"grad_norm": 20.655588150024414, |
|
"learning_rate": 7.303724838527624e-06, |
|
"loss": 0.417, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.6885264405440488, |
|
"grad_norm": 17.406002044677734, |
|
"learning_rate": 7.2880479086975605e-06, |
|
"loss": 0.4224, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.6913482702184096, |
|
"grad_norm": 16.471834182739258, |
|
"learning_rate": 7.272370978867499e-06, |
|
"loss": 0.4193, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.6941700998927705, |
|
"grad_norm": 30.130985260009766, |
|
"learning_rate": 7.256694049037437e-06, |
|
"loss": 0.3966, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.6969919295671313, |
|
"grad_norm": 25.141477584838867, |
|
"learning_rate": 7.2410171192073754e-06, |
|
"loss": 0.4037, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.6998137592414921, |
|
"grad_norm": 13.053258895874023, |
|
"learning_rate": 7.2253401893773134e-06, |
|
"loss": 0.4396, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.7026355889158531, |
|
"grad_norm": 15.657031059265137, |
|
"learning_rate": 7.209663259547251e-06, |
|
"loss": 0.3962, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.7054574185902139, |
|
"grad_norm": 28.782482147216797, |
|
"learning_rate": 7.194143099015489e-06, |
|
"loss": 0.4074, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7082792482645748, |
|
"grad_norm": 28.72026252746582, |
|
"learning_rate": 7.178466169185428e-06, |
|
"loss": 0.4422, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.7111010779389356, |
|
"grad_norm": 14.960714340209961, |
|
"learning_rate": 7.162789239355366e-06, |
|
"loss": 0.4374, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.7139229076132965, |
|
"grad_norm": 26.17475700378418, |
|
"learning_rate": 7.147112309525304e-06, |
|
"loss": 0.4295, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.7167447372876573, |
|
"grad_norm": 17.277433395385742, |
|
"learning_rate": 7.131435379695241e-06, |
|
"loss": 0.4229, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.7195665669620182, |
|
"grad_norm": 36.890193939208984, |
|
"learning_rate": 7.115758449865179e-06, |
|
"loss": 0.4005, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.722388396636379, |
|
"grad_norm": 18.203617095947266, |
|
"learning_rate": 7.100081520035117e-06, |
|
"loss": 0.4241, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.7252102263107398, |
|
"grad_norm": 26.79283905029297, |
|
"learning_rate": 7.084404590205055e-06, |
|
"loss": 0.447, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.7280320559851008, |
|
"grad_norm": 24.888479232788086, |
|
"learning_rate": 7.068727660374993e-06, |
|
"loss": 0.4332, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.7308538856594616, |
|
"grad_norm": 20.294689178466797, |
|
"learning_rate": 7.05305073054493e-06, |
|
"loss": 0.3704, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.7336757153338225, |
|
"grad_norm": 20.747148513793945, |
|
"learning_rate": 7.037373800714868e-06, |
|
"loss": 0.4064, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.7364975450081833, |
|
"grad_norm": 23.540077209472656, |
|
"learning_rate": 7.021696870884806e-06, |
|
"loss": 0.4507, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.7393193746825442, |
|
"grad_norm": 19.756235122680664, |
|
"learning_rate": 7.006019941054744e-06, |
|
"loss": 0.4319, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.742141204356905, |
|
"grad_norm": 23.604589462280273, |
|
"learning_rate": 6.990343011224683e-06, |
|
"loss": 0.4004, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.7449630340312658, |
|
"grad_norm": 16.374685287475586, |
|
"learning_rate": 6.9746660813946195e-06, |
|
"loss": 0.3955, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.7477848637056267, |
|
"grad_norm": 14.722450256347656, |
|
"learning_rate": 6.958989151564558e-06, |
|
"loss": 0.3847, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.7506066933799875, |
|
"grad_norm": 25.750301361083984, |
|
"learning_rate": 6.943312221734496e-06, |
|
"loss": 0.3973, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.7534285230543485, |
|
"grad_norm": 29.76510238647461, |
|
"learning_rate": 6.9276352919044344e-06, |
|
"loss": 0.4074, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.7562503527287093, |
|
"grad_norm": 15.254782676696777, |
|
"learning_rate": 6.9119583620743725e-06, |
|
"loss": 0.4551, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.7590721824030702, |
|
"grad_norm": 58.55656433105469, |
|
"learning_rate": 6.89628143224431e-06, |
|
"loss": 0.4214, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.761894012077431, |
|
"grad_norm": 17.498140335083008, |
|
"learning_rate": 6.880604502414248e-06, |
|
"loss": 0.4295, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.7647158417517919, |
|
"grad_norm": 17.728435516357422, |
|
"learning_rate": 6.865084341882487e-06, |
|
"loss": 0.4005, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.7675376714261527, |
|
"grad_norm": 2.0892069339752197, |
|
"learning_rate": 6.849407412052425e-06, |
|
"loss": 0.3864, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.7703595011005135, |
|
"grad_norm": 23.627687454223633, |
|
"learning_rate": 6.833730482222363e-06, |
|
"loss": 0.4168, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.7731813307748744, |
|
"grad_norm": 3.664445161819458, |
|
"learning_rate": 6.8180535523923e-06, |
|
"loss": 0.4031, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.7760031604492352, |
|
"grad_norm": 21.93037986755371, |
|
"learning_rate": 6.802376622562238e-06, |
|
"loss": 0.4043, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.7788249901235962, |
|
"grad_norm": 22.63160514831543, |
|
"learning_rate": 6.786699692732176e-06, |
|
"loss": 0.3895, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.781646819797957, |
|
"grad_norm": 27.74697494506836, |
|
"learning_rate": 6.771022762902114e-06, |
|
"loss": 0.444, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.7844686494723179, |
|
"grad_norm": 60.84165573120117, |
|
"learning_rate": 6.755345833072052e-06, |
|
"loss": 0.4018, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.7872904791466787, |
|
"grad_norm": 41.33998107910156, |
|
"learning_rate": 6.739668903241989e-06, |
|
"loss": 0.3751, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.7901123088210396, |
|
"grad_norm": 7.033244609832764, |
|
"learning_rate": 6.723991973411927e-06, |
|
"loss": 0.4115, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.7929341384954004, |
|
"grad_norm": 14.345335960388184, |
|
"learning_rate": 6.708315043581865e-06, |
|
"loss": 0.3875, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.7957559681697612, |
|
"grad_norm": 18.797395706176758, |
|
"learning_rate": 6.692638113751803e-06, |
|
"loss": 0.385, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.7985777978441221, |
|
"grad_norm": 16.46762466430664, |
|
"learning_rate": 6.676961183921742e-06, |
|
"loss": 0.4209, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.801399627518483, |
|
"grad_norm": 42.29709243774414, |
|
"learning_rate": 6.6612842540916785e-06, |
|
"loss": 0.3697, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.8042214571928439, |
|
"grad_norm": 9.446586608886719, |
|
"learning_rate": 6.645607324261617e-06, |
|
"loss": 0.3943, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.8070432868672047, |
|
"grad_norm": 9.191097259521484, |
|
"learning_rate": 6.629930394431555e-06, |
|
"loss": 0.4095, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.8098651165415656, |
|
"grad_norm": 27.068052291870117, |
|
"learning_rate": 6.6142534646014934e-06, |
|
"loss": 0.4112, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.8126869462159264, |
|
"grad_norm": 7.080472946166992, |
|
"learning_rate": 6.5985765347714315e-06, |
|
"loss": 0.4225, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.8155087758902873, |
|
"grad_norm": 14.27031421661377, |
|
"learning_rate": 6.582899604941369e-06, |
|
"loss": 0.3919, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.8183306055646481, |
|
"grad_norm": 18.635713577270508, |
|
"learning_rate": 6.567222675111307e-06, |
|
"loss": 0.4208, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.8211524352390089, |
|
"grad_norm": 15.811898231506348, |
|
"learning_rate": 6.551545745281245e-06, |
|
"loss": 0.3505, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.8239742649133698, |
|
"grad_norm": 9.68923282623291, |
|
"learning_rate": 6.535868815451183e-06, |
|
"loss": 0.3879, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.8267960945877307, |
|
"grad_norm": 20.192699432373047, |
|
"learning_rate": 6.520191885621121e-06, |
|
"loss": 0.3847, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.8296179242620916, |
|
"grad_norm": 12.542318344116211, |
|
"learning_rate": 6.504514955791058e-06, |
|
"loss": 0.3754, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.8324397539364524, |
|
"grad_norm": 38.85354232788086, |
|
"learning_rate": 6.488838025960996e-06, |
|
"loss": 0.4194, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.8352615836108133, |
|
"grad_norm": 15.035304069519043, |
|
"learning_rate": 6.473161096130934e-06, |
|
"loss": 0.4417, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.8380834132851741, |
|
"grad_norm": 32.99053955078125, |
|
"learning_rate": 6.457484166300873e-06, |
|
"loss": 0.404, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.840905242959535, |
|
"grad_norm": 14.194585800170898, |
|
"learning_rate": 6.441807236470811e-06, |
|
"loss": 0.3853, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.8437270726338958, |
|
"grad_norm": 12.659259796142578, |
|
"learning_rate": 6.426130306640748e-06, |
|
"loss": 0.4223, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.8465489023082566, |
|
"grad_norm": 20.427833557128906, |
|
"learning_rate": 6.410453376810686e-06, |
|
"loss": 0.4356, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.8493707319826175, |
|
"grad_norm": 16.472497940063477, |
|
"learning_rate": 6.394776446980624e-06, |
|
"loss": 0.3848, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.8521925616569784, |
|
"grad_norm": 35.50379180908203, |
|
"learning_rate": 6.379099517150562e-06, |
|
"loss": 0.4256, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.8550143913313393, |
|
"grad_norm": 21.51117706298828, |
|
"learning_rate": 6.3634225873205e-06, |
|
"loss": 0.3987, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.8578362210057001, |
|
"grad_norm": 22.2384033203125, |
|
"learning_rate": 6.347745657490437e-06, |
|
"loss": 0.4016, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.860658050680061, |
|
"grad_norm": 39.41447448730469, |
|
"learning_rate": 6.332068727660375e-06, |
|
"loss": 0.3865, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.8634798803544218, |
|
"grad_norm": 7.72763729095459, |
|
"learning_rate": 6.316391797830313e-06, |
|
"loss": 0.4245, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.8663017100287826, |
|
"grad_norm": 22.295452117919922, |
|
"learning_rate": 6.3008716372985525e-06, |
|
"loss": 0.4315, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.8691235397031435, |
|
"grad_norm": 11.646142959594727, |
|
"learning_rate": 6.28519470746849e-06, |
|
"loss": 0.4399, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.8719453693775043, |
|
"grad_norm": 26.82891082763672, |
|
"learning_rate": 6.269517777638428e-06, |
|
"loss": 0.3699, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.8747671990518652, |
|
"grad_norm": 20.642230987548828, |
|
"learning_rate": 6.253840847808366e-06, |
|
"loss": 0.4167, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.8775890287262261, |
|
"grad_norm": 14.905511856079102, |
|
"learning_rate": 6.238163917978304e-06, |
|
"loss": 0.4052, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.880410858400587, |
|
"grad_norm": 69.13500213623047, |
|
"learning_rate": 6.222486988148242e-06, |
|
"loss": 0.3816, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.8832326880749478, |
|
"grad_norm": 20.331384658813477, |
|
"learning_rate": 6.206810058318179e-06, |
|
"loss": 0.3954, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.8860545177493087, |
|
"grad_norm": 18.5074405670166, |
|
"learning_rate": 6.191133128488117e-06, |
|
"loss": 0.4321, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.8888763474236695, |
|
"grad_norm": 34.6412467956543, |
|
"learning_rate": 6.175456198658055e-06, |
|
"loss": 0.3988, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.8916981770980303, |
|
"grad_norm": 15.479742050170898, |
|
"learning_rate": 6.159779268827993e-06, |
|
"loss": 0.4052, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.8945200067723912, |
|
"grad_norm": 31.75602149963379, |
|
"learning_rate": 6.144102338997932e-06, |
|
"loss": 0.3739, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.897341836446752, |
|
"grad_norm": 12.538125038146973, |
|
"learning_rate": 6.128425409167868e-06, |
|
"loss": 0.381, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.900163666121113, |
|
"grad_norm": 18.574064254760742, |
|
"learning_rate": 6.112748479337807e-06, |
|
"loss": 0.4223, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.9029854957954738, |
|
"grad_norm": 20.351797103881836, |
|
"learning_rate": 6.097071549507745e-06, |
|
"loss": 0.4108, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.9058073254698347, |
|
"grad_norm": 16.650991439819336, |
|
"learning_rate": 6.081394619677683e-06, |
|
"loss": 0.3868, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.9086291551441955, |
|
"grad_norm": 24.825759887695312, |
|
"learning_rate": 6.065717689847621e-06, |
|
"loss": 0.3987, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.9114509848185564, |
|
"grad_norm": 7.137796401977539, |
|
"learning_rate": 6.050040760017558e-06, |
|
"loss": 0.3624, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.9142728144929172, |
|
"grad_norm": 28.790571212768555, |
|
"learning_rate": 6.034363830187496e-06, |
|
"loss": 0.408, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.917094644167278, |
|
"grad_norm": 24.61075782775879, |
|
"learning_rate": 6.018686900357434e-06, |
|
"loss": 0.4137, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.9199164738416389, |
|
"grad_norm": 26.280406951904297, |
|
"learning_rate": 6.003009970527372e-06, |
|
"loss": 0.3809, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.9227383035159997, |
|
"grad_norm": 13.658437728881836, |
|
"learning_rate": 5.98733304069731e-06, |
|
"loss": 0.3811, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.9255601331903607, |
|
"grad_norm": 5.478005886077881, |
|
"learning_rate": 5.9716561108672476e-06, |
|
"loss": 0.3579, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.9283819628647215, |
|
"grad_norm": 17.17485237121582, |
|
"learning_rate": 5.955979181037186e-06, |
|
"loss": 0.392, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.9312037925390824, |
|
"grad_norm": 19.072818756103516, |
|
"learning_rate": 5.940302251207124e-06, |
|
"loss": 0.381, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.9340256222134432, |
|
"grad_norm": 5.044217586517334, |
|
"learning_rate": 5.9246253213770625e-06, |
|
"loss": 0.4043, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.936847451887804, |
|
"grad_norm": 20.710311889648438, |
|
"learning_rate": 5.9089483915470005e-06, |
|
"loss": 0.4074, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.9396692815621649, |
|
"grad_norm": 16.337045669555664, |
|
"learning_rate": 5.893271461716938e-06, |
|
"loss": 0.3658, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.9424911112365257, |
|
"grad_norm": 25.688541412353516, |
|
"learning_rate": 5.877594531886876e-06, |
|
"loss": 0.377, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.9453129409108866, |
|
"grad_norm": 15.326305389404297, |
|
"learning_rate": 5.862074371355114e-06, |
|
"loss": 0.4225, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.9481347705852474, |
|
"grad_norm": 23.59290313720703, |
|
"learning_rate": 5.846397441525052e-06, |
|
"loss": 0.387, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.9509566002596084, |
|
"grad_norm": 50.01143264770508, |
|
"learning_rate": 5.830720511694991e-06, |
|
"loss": 0.3959, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.9537784299339692, |
|
"grad_norm": 21.439271926879883, |
|
"learning_rate": 5.815043581864927e-06, |
|
"loss": 0.3403, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.9566002596083301, |
|
"grad_norm": 26.8652286529541, |
|
"learning_rate": 5.799366652034866e-06, |
|
"loss": 0.3849, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.9594220892826909, |
|
"grad_norm": 19.363805770874023, |
|
"learning_rate": 5.783689722204804e-06, |
|
"loss": 0.3969, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.9622439189570517, |
|
"grad_norm": 11.499284744262695, |
|
"learning_rate": 5.768012792374742e-06, |
|
"loss": 0.4231, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.9650657486314126, |
|
"grad_norm": 34.81698989868164, |
|
"learning_rate": 5.75233586254468e-06, |
|
"loss": 0.346, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.9678875783057734, |
|
"grad_norm": 31.242355346679688, |
|
"learning_rate": 5.736658932714617e-06, |
|
"loss": 0.3799, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.9707094079801343, |
|
"grad_norm": 27.321941375732422, |
|
"learning_rate": 5.720982002884555e-06, |
|
"loss": 0.3418, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.9735312376544952, |
|
"grad_norm": 22.507356643676758, |
|
"learning_rate": 5.705305073054493e-06, |
|
"loss": 0.3828, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.9763530673288561, |
|
"grad_norm": 19.005266189575195, |
|
"learning_rate": 5.689628143224431e-06, |
|
"loss": 0.4241, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.9791748970032169, |
|
"grad_norm": 17.969890594482422, |
|
"learning_rate": 5.673951213394369e-06, |
|
"loss": 0.4084, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.9819967266775778, |
|
"grad_norm": 16.145771026611328, |
|
"learning_rate": 5.6582742835643066e-06, |
|
"loss": 0.3818, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.9848185563519386, |
|
"grad_norm": 27.250308990478516, |
|
"learning_rate": 5.642597353734245e-06, |
|
"loss": 0.3544, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.9876403860262994, |
|
"grad_norm": 57.412166595458984, |
|
"learning_rate": 5.626920423904183e-06, |
|
"loss": 0.3829, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.9904622157006603, |
|
"grad_norm": 33.557403564453125, |
|
"learning_rate": 5.6112434940741215e-06, |
|
"loss": 0.4252, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.9932840453750211, |
|
"grad_norm": 21.193218231201172, |
|
"learning_rate": 5.5955665642440595e-06, |
|
"loss": 0.4006, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.996105875049382, |
|
"grad_norm": 26.300689697265625, |
|
"learning_rate": 5.579889634413997e-06, |
|
"loss": 0.3857, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.9989277047237429, |
|
"grad_norm": 13.547060012817383, |
|
"learning_rate": 5.564212704583935e-06, |
|
"loss": 0.3635, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.0017495343981038, |
|
"grad_norm": 19.988895416259766, |
|
"learning_rate": 5.548535774753873e-06, |
|
"loss": 0.3543, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.0045713640724645, |
|
"grad_norm": 12.582673072814941, |
|
"learning_rate": 5.532858844923811e-06, |
|
"loss": 0.3324, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.0073931937468255, |
|
"grad_norm": 17.54091453552246, |
|
"learning_rate": 5.517181915093749e-06, |
|
"loss": 0.3733, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.0102150234211864, |
|
"grad_norm": 12.848298072814941, |
|
"learning_rate": 5.501661754561986e-06, |
|
"loss": 0.3594, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.0130368530955471, |
|
"grad_norm": 19.090791702270508, |
|
"learning_rate": 5.485984824731925e-06, |
|
"loss": 0.3468, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.015858682769908, |
|
"grad_norm": 11.574841499328613, |
|
"learning_rate": 5.470307894901863e-06, |
|
"loss": 0.3442, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.0186805124442688, |
|
"grad_norm": 27.498323440551758, |
|
"learning_rate": 5.454630965071801e-06, |
|
"loss": 0.3296, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.0215023421186298, |
|
"grad_norm": 18.88401222229004, |
|
"learning_rate": 5.438954035241739e-06, |
|
"loss": 0.3814, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.0243241717929905, |
|
"grad_norm": 32.382423400878906, |
|
"learning_rate": 5.423277105411676e-06, |
|
"loss": 0.3652, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.0271460014673515, |
|
"grad_norm": 23.787736892700195, |
|
"learning_rate": 5.407600175581614e-06, |
|
"loss": 0.3308, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.0299678311417122, |
|
"grad_norm": 16.6649112701416, |
|
"learning_rate": 5.391923245751552e-06, |
|
"loss": 0.3386, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.0327896608160732, |
|
"grad_norm": 24.679080963134766, |
|
"learning_rate": 5.37624631592149e-06, |
|
"loss": 0.3248, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.0356114904904339, |
|
"grad_norm": 30.37528419494629, |
|
"learning_rate": 5.360569386091428e-06, |
|
"loss": 0.3572, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.0384333201647948, |
|
"grad_norm": 17.88707733154297, |
|
"learning_rate": 5.344892456261366e-06, |
|
"loss": 0.3473, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.0412551498391558, |
|
"grad_norm": 46.716697692871094, |
|
"learning_rate": 5.329215526431304e-06, |
|
"loss": 0.3372, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.0440769795135165, |
|
"grad_norm": 40.04937744140625, |
|
"learning_rate": 5.313538596601242e-06, |
|
"loss": 0.3848, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.0468988091878775, |
|
"grad_norm": 24.893983840942383, |
|
"learning_rate": 5.298018436069481e-06, |
|
"loss": 0.3518, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.0497206388622382, |
|
"grad_norm": 8.319790840148926, |
|
"learning_rate": 5.282341506239419e-06, |
|
"loss": 0.3164, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.0525424685365992, |
|
"grad_norm": 26.47063636779785, |
|
"learning_rate": 5.266664576409356e-06, |
|
"loss": 0.3034, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.05536429821096, |
|
"grad_norm": 41.414642333984375, |
|
"learning_rate": 5.250987646579294e-06, |
|
"loss": 0.3404, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.0581861278853208, |
|
"grad_norm": 29.034229278564453, |
|
"learning_rate": 5.235310716749232e-06, |
|
"loss": 0.3271, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.0610079575596818, |
|
"grad_norm": 12.065890312194824, |
|
"learning_rate": 5.219633786919171e-06, |
|
"loss": 0.3462, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 21.52030372619629, |
|
"learning_rate": 5.203956857089109e-06, |
|
"loss": 0.3499, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.0666516169084035, |
|
"grad_norm": 14.449933052062988, |
|
"learning_rate": 5.188279927259045e-06, |
|
"loss": 0.2902, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.0694734465827642, |
|
"grad_norm": 26.385765075683594, |
|
"learning_rate": 5.172602997428984e-06, |
|
"loss": 0.3312, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.0722952762571252, |
|
"grad_norm": 30.85702896118164, |
|
"learning_rate": 5.156926067598922e-06, |
|
"loss": 0.3487, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.075117105931486, |
|
"grad_norm": 23.276355743408203, |
|
"learning_rate": 5.14124913776886e-06, |
|
"loss": 0.311, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.0779389356058469, |
|
"grad_norm": 15.212447166442871, |
|
"learning_rate": 5.125572207938798e-06, |
|
"loss": 0.3562, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.0807607652802076, |
|
"grad_norm": 11.746698379516602, |
|
"learning_rate": 5.1100520474070356e-06, |
|
"loss": 0.3741, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.0835825949545685, |
|
"grad_norm": 27.38930892944336, |
|
"learning_rate": 5.094375117576974e-06, |
|
"loss": 0.3321, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.0864044246289295, |
|
"grad_norm": 8.432238578796387, |
|
"learning_rate": 5.0786981877469124e-06, |
|
"loss": 0.3253, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.0892262543032902, |
|
"grad_norm": 20.7791690826416, |
|
"learning_rate": 5.0630212579168505e-06, |
|
"loss": 0.328, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.0920480839776512, |
|
"grad_norm": 25.740007400512695, |
|
"learning_rate": 5.0473443280867885e-06, |
|
"loss": 0.3106, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.094869913652012, |
|
"grad_norm": 16.10772132873535, |
|
"learning_rate": 5.031667398256726e-06, |
|
"loss": 0.3118, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.0976917433263729, |
|
"grad_norm": 0.25727030634880066, |
|
"learning_rate": 5.015990468426664e-06, |
|
"loss": 0.3339, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.1005135730007336, |
|
"grad_norm": 39.43502426147461, |
|
"learning_rate": 5.000313538596602e-06, |
|
"loss": 0.3497, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.1033354026750946, |
|
"grad_norm": 14.343636512756348, |
|
"learning_rate": 4.98463660876654e-06, |
|
"loss": 0.3212, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.1061572323494553, |
|
"grad_norm": 30.527856826782227, |
|
"learning_rate": 4.968959678936478e-06, |
|
"loss": 0.3732, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.1089790620238162, |
|
"grad_norm": 30.169086456298828, |
|
"learning_rate": 4.953282749106416e-06, |
|
"loss": 0.3391, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.1118008916981772, |
|
"grad_norm": 18.833539962768555, |
|
"learning_rate": 4.937605819276353e-06, |
|
"loss": 0.3477, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.114622721372538, |
|
"grad_norm": 17.68643569946289, |
|
"learning_rate": 4.921928889446291e-06, |
|
"loss": 0.3545, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.1174445510468989, |
|
"grad_norm": 24.589397430419922, |
|
"learning_rate": 4.906251959616229e-06, |
|
"loss": 0.3153, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.1202663807212596, |
|
"grad_norm": 9.25218677520752, |
|
"learning_rate": 4.890575029786167e-06, |
|
"loss": 0.3535, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.1230882103956206, |
|
"grad_norm": 21.546316146850586, |
|
"learning_rate": 4.874898099956105e-06, |
|
"loss": 0.3531, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.1259100400699813, |
|
"grad_norm": 20.65337562561035, |
|
"learning_rate": 4.859221170126043e-06, |
|
"loss": 0.3675, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.1287318697443423, |
|
"grad_norm": 11.405496597290039, |
|
"learning_rate": 4.843544240295981e-06, |
|
"loss": 0.3298, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.1315536994187032, |
|
"grad_norm": 9.069683074951172, |
|
"learning_rate": 4.827867310465918e-06, |
|
"loss": 0.3224, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.134375529093064, |
|
"grad_norm": 26.082910537719727, |
|
"learning_rate": 4.812190380635856e-06, |
|
"loss": 0.3633, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.137197358767425, |
|
"grad_norm": 27.91574478149414, |
|
"learning_rate": 4.796513450805795e-06, |
|
"loss": 0.3403, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.1400191884417856, |
|
"grad_norm": 14.8870267868042, |
|
"learning_rate": 4.780836520975732e-06, |
|
"loss": 0.3535, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.1428410181161466, |
|
"grad_norm": 49.3880729675293, |
|
"learning_rate": 4.76515959114567e-06, |
|
"loss": 0.3833, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.1456628477905073, |
|
"grad_norm": 26.83273696899414, |
|
"learning_rate": 4.749482661315608e-06, |
|
"loss": 0.3367, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.1484846774648683, |
|
"grad_norm": 15.598971366882324, |
|
"learning_rate": 4.7338057314855464e-06, |
|
"loss": 0.308, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.151306507139229, |
|
"grad_norm": 16.14190673828125, |
|
"learning_rate": 4.7181288016554844e-06, |
|
"loss": 0.3277, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.15412833681359, |
|
"grad_norm": 23.744840621948242, |
|
"learning_rate": 4.702451871825422e-06, |
|
"loss": 0.3407, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.1569501664879507, |
|
"grad_norm": 10.290372848510742, |
|
"learning_rate": 4.6867749419953605e-06, |
|
"loss": 0.3537, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.1597719961623116, |
|
"grad_norm": 19.450990676879883, |
|
"learning_rate": 4.671098012165298e-06, |
|
"loss": 0.3517, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.1625938258366726, |
|
"grad_norm": 7.8305463790893555, |
|
"learning_rate": 4.655421082335236e-06, |
|
"loss": 0.3366, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.1654156555110333, |
|
"grad_norm": 21.166301727294922, |
|
"learning_rate": 4.639744152505174e-06, |
|
"loss": 0.329, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.1682374851853943, |
|
"grad_norm": 33.572425842285156, |
|
"learning_rate": 4.624067222675112e-06, |
|
"loss": 0.3557, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.171059314859755, |
|
"grad_norm": 30.565309524536133, |
|
"learning_rate": 4.60839029284505e-06, |
|
"loss": 0.3466, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.173881144534116, |
|
"grad_norm": 32.635765075683594, |
|
"learning_rate": 4.592713363014987e-06, |
|
"loss": 0.3132, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.1767029742084767, |
|
"grad_norm": 2.45202898979187, |
|
"learning_rate": 4.577036433184926e-06, |
|
"loss": 0.3707, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.1795248038828376, |
|
"grad_norm": 4.587299823760986, |
|
"learning_rate": 4.561359503354864e-06, |
|
"loss": 0.3301, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.1823466335571986, |
|
"grad_norm": 25.243562698364258, |
|
"learning_rate": 4.545682573524801e-06, |
|
"loss": 0.3245, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.1851684632315593, |
|
"grad_norm": 22.650365829467773, |
|
"learning_rate": 4.530005643694739e-06, |
|
"loss": 0.3518, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.1879902929059203, |
|
"grad_norm": 18.754680633544922, |
|
"learning_rate": 4.514328713864677e-06, |
|
"loss": 0.3297, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.190812122580281, |
|
"grad_norm": 19.45864486694336, |
|
"learning_rate": 4.498651784034615e-06, |
|
"loss": 0.3446, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.193633952254642, |
|
"grad_norm": 23.143203735351562, |
|
"learning_rate": 4.482974854204553e-06, |
|
"loss": 0.3494, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.1964557819290027, |
|
"grad_norm": 33.07261276245117, |
|
"learning_rate": 4.467297924374491e-06, |
|
"loss": 0.3619, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.1992776116033637, |
|
"grad_norm": 8.783498764038086, |
|
"learning_rate": 4.451620994544429e-06, |
|
"loss": 0.3596, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.2020994412777244, |
|
"grad_norm": 19.049306869506836, |
|
"learning_rate": 4.435944064714366e-06, |
|
"loss": 0.3379, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.2049212709520853, |
|
"grad_norm": 22.958511352539062, |
|
"learning_rate": 4.420267134884304e-06, |
|
"loss": 0.3661, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.207743100626446, |
|
"grad_norm": 26.9248104095459, |
|
"learning_rate": 4.404590205054242e-06, |
|
"loss": 0.3483, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.210564930300807, |
|
"grad_norm": 39.222469329833984, |
|
"learning_rate": 4.389070044522481e-06, |
|
"loss": 0.3221, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.213386759975168, |
|
"grad_norm": 15.85262393951416, |
|
"learning_rate": 4.3733931146924195e-06, |
|
"loss": 0.3067, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.2162085896495287, |
|
"grad_norm": 11.266934394836426, |
|
"learning_rate": 4.357716184862357e-06, |
|
"loss": 0.3341, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.2190304193238897, |
|
"grad_norm": 30.2665958404541, |
|
"learning_rate": 4.342039255032295e-06, |
|
"loss": 0.3616, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.2218522489982504, |
|
"grad_norm": 20.351150512695312, |
|
"learning_rate": 4.326362325202233e-06, |
|
"loss": 0.3349, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.2246740786726114, |
|
"grad_norm": 14.63962173461914, |
|
"learning_rate": 4.310685395372171e-06, |
|
"loss": 0.3217, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.227495908346972, |
|
"grad_norm": 7.7118048667907715, |
|
"learning_rate": 4.295008465542109e-06, |
|
"loss": 0.3708, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.230317738021333, |
|
"grad_norm": 24.568256378173828, |
|
"learning_rate": 4.279331535712046e-06, |
|
"loss": 0.3349, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.233139567695694, |
|
"grad_norm": 64.85833740234375, |
|
"learning_rate": 4.263654605881985e-06, |
|
"loss": 0.3571, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.2359613973700547, |
|
"grad_norm": 6.7669596672058105, |
|
"learning_rate": 4.247977676051923e-06, |
|
"loss": 0.3257, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.2387832270444157, |
|
"grad_norm": 23.950790405273438, |
|
"learning_rate": 4.23230074622186e-06, |
|
"loss": 0.3309, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.2416050567187764, |
|
"grad_norm": 11.690123558044434, |
|
"learning_rate": 4.216623816391798e-06, |
|
"loss": 0.3109, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.2444268863931374, |
|
"grad_norm": 22.820682525634766, |
|
"learning_rate": 4.200946886561736e-06, |
|
"loss": 0.3684, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.247248716067498, |
|
"grad_norm": 7.326537609100342, |
|
"learning_rate": 4.185269956731674e-06, |
|
"loss": 0.3165, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.250070545741859, |
|
"grad_norm": 28.421340942382812, |
|
"learning_rate": 4.169593026901612e-06, |
|
"loss": 0.3667, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.25289237541622, |
|
"grad_norm": 16.26470947265625, |
|
"learning_rate": 4.15391609707155e-06, |
|
"loss": 0.3026, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.2557142050905807, |
|
"grad_norm": 22.38811683654785, |
|
"learning_rate": 4.138239167241488e-06, |
|
"loss": 0.3474, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.2585360347649415, |
|
"grad_norm": 10.734014511108398, |
|
"learning_rate": 4.122562237411425e-06, |
|
"loss": 0.3543, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.2613578644393024, |
|
"grad_norm": 18.710065841674805, |
|
"learning_rate": 4.106885307581363e-06, |
|
"loss": 0.3496, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.2641796941136634, |
|
"grad_norm": 2.5336763858795166, |
|
"learning_rate": 4.091208377751301e-06, |
|
"loss": 0.364, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.267001523788024, |
|
"grad_norm": 34.18901062011719, |
|
"learning_rate": 4.075531447921239e-06, |
|
"loss": 0.3058, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.269823353462385, |
|
"grad_norm": 22.022573471069336, |
|
"learning_rate": 4.0598545180911774e-06, |
|
"loss": 0.3144, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.2726451831367458, |
|
"grad_norm": 8.996906280517578, |
|
"learning_rate": 4.0441775882611155e-06, |
|
"loss": 0.3609, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.2754670128111067, |
|
"grad_norm": 25.605409622192383, |
|
"learning_rate": 4.0285006584310535e-06, |
|
"loss": 0.3217, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.2782888424854675, |
|
"grad_norm": 22.709383010864258, |
|
"learning_rate": 4.0128237286009915e-06, |
|
"loss": 0.3398, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.2811106721598284, |
|
"grad_norm": 17.49720001220703, |
|
"learning_rate": 3.997146798770929e-06, |
|
"loss": 0.3178, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.2839325018341894, |
|
"grad_norm": 19.673856735229492, |
|
"learning_rate": 3.981469868940867e-06, |
|
"loss": 0.3138, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.2867543315085501, |
|
"grad_norm": 21.33672332763672, |
|
"learning_rate": 3.965792939110805e-06, |
|
"loss": 0.343, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.289576161182911, |
|
"grad_norm": 33.10939025878906, |
|
"learning_rate": 3.950116009280743e-06, |
|
"loss": 0.3301, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.2923979908572718, |
|
"grad_norm": 26.29018211364746, |
|
"learning_rate": 3.934439079450681e-06, |
|
"loss": 0.3163, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.2952198205316328, |
|
"grad_norm": 10.499427795410156, |
|
"learning_rate": 3.918762149620619e-06, |
|
"loss": 0.3164, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.2980416502059935, |
|
"grad_norm": 35.09203338623047, |
|
"learning_rate": 3.903085219790557e-06, |
|
"loss": 0.3155, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.3008634798803544, |
|
"grad_norm": 18.63652992248535, |
|
"learning_rate": 3.887408289960494e-06, |
|
"loss": 0.3354, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.3036853095547154, |
|
"grad_norm": 17.310117721557617, |
|
"learning_rate": 3.871731360130432e-06, |
|
"loss": 0.3096, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.3065071392290761, |
|
"grad_norm": 30.03758430480957, |
|
"learning_rate": 3.856054430300371e-06, |
|
"loss": 0.3484, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.3093289689034369, |
|
"grad_norm": 15.059159278869629, |
|
"learning_rate": 3.840377500470308e-06, |
|
"loss": 0.3436, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.3121507985777978, |
|
"grad_norm": 19.42188262939453, |
|
"learning_rate": 3.824700570640246e-06, |
|
"loss": 0.3254, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.3149726282521588, |
|
"grad_norm": 21.08489990234375, |
|
"learning_rate": 3.8090236408101837e-06, |
|
"loss": 0.3292, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.3177944579265195, |
|
"grad_norm": 11.324195861816406, |
|
"learning_rate": 3.793346710980122e-06, |
|
"loss": 0.3301, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.3206162876008805, |
|
"grad_norm": 9.959671020507812, |
|
"learning_rate": 3.77766978115006e-06, |
|
"loss": 0.3029, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.3234381172752412, |
|
"grad_norm": 22.626794815063477, |
|
"learning_rate": 3.7619928513199978e-06, |
|
"loss": 0.3139, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.3262599469496021, |
|
"grad_norm": 21.049654006958008, |
|
"learning_rate": 3.746315921489936e-06, |
|
"loss": 0.3101, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.3290817766239629, |
|
"grad_norm": 8.8051118850708, |
|
"learning_rate": 3.730795760958174e-06, |
|
"loss": 0.3302, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.3319036062983238, |
|
"grad_norm": 20.859481811523438, |
|
"learning_rate": 3.715118831128112e-06, |
|
"loss": 0.3125, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.3347254359726848, |
|
"grad_norm": 6.377940654754639, |
|
"learning_rate": 3.6994419012980505e-06, |
|
"loss": 0.324, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.3375472656470455, |
|
"grad_norm": 20.301328659057617, |
|
"learning_rate": 3.683764971467988e-06, |
|
"loss": 0.3548, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.3403690953214065, |
|
"grad_norm": 9.764044761657715, |
|
"learning_rate": 3.668088041637926e-06, |
|
"loss": 0.354, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.3431909249957672, |
|
"grad_norm": 19.413543701171875, |
|
"learning_rate": 3.6524111118078637e-06, |
|
"loss": 0.3266, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.3460127546701282, |
|
"grad_norm": 16.790817260742188, |
|
"learning_rate": 3.6367341819778018e-06, |
|
"loss": 0.3377, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.3488345843444889, |
|
"grad_norm": 20.554304122924805, |
|
"learning_rate": 3.62105725214774e-06, |
|
"loss": 0.3209, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.3516564140188498, |
|
"grad_norm": 18.711938858032227, |
|
"learning_rate": 3.6053803223176774e-06, |
|
"loss": 0.3324, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.3544782436932108, |
|
"grad_norm": 23.41309356689453, |
|
"learning_rate": 3.589703392487616e-06, |
|
"loss": 0.3169, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.3573000733675715, |
|
"grad_norm": 42.12315368652344, |
|
"learning_rate": 3.5740264626575534e-06, |
|
"loss": 0.3001, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.3601219030419323, |
|
"grad_norm": 22.255311965942383, |
|
"learning_rate": 3.5583495328274915e-06, |
|
"loss": 0.3635, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.3629437327162932, |
|
"grad_norm": 18.722684860229492, |
|
"learning_rate": 3.5426726029974295e-06, |
|
"loss": 0.3579, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.3657655623906542, |
|
"grad_norm": 108.33175659179688, |
|
"learning_rate": 3.526995673167367e-06, |
|
"loss": 0.3021, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.368587392065015, |
|
"grad_norm": 17.446874618530273, |
|
"learning_rate": 3.511318743337305e-06, |
|
"loss": 0.3227, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.3714092217393758, |
|
"grad_norm": 15.62735366821289, |
|
"learning_rate": 3.4956418135072427e-06, |
|
"loss": 0.3042, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.3742310514137368, |
|
"grad_norm": 35.5767936706543, |
|
"learning_rate": 3.479964883677181e-06, |
|
"loss": 0.3575, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.3770528810880975, |
|
"grad_norm": 30.960519790649414, |
|
"learning_rate": 3.464287953847119e-06, |
|
"loss": 0.3681, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.3798747107624583, |
|
"grad_norm": 21.970129013061523, |
|
"learning_rate": 3.4486110240170568e-06, |
|
"loss": 0.3517, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.3826965404368192, |
|
"grad_norm": 10.104969024658203, |
|
"learning_rate": 3.432934094186995e-06, |
|
"loss": 0.3385, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.3855183701111802, |
|
"grad_norm": 10.913897514343262, |
|
"learning_rate": 3.4172571643569324e-06, |
|
"loss": 0.3126, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.388340199785541, |
|
"grad_norm": 12.78243350982666, |
|
"learning_rate": 3.4015802345268704e-06, |
|
"loss": 0.3202, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.3911620294599019, |
|
"grad_norm": 47.281341552734375, |
|
"learning_rate": 3.385903304696809e-06, |
|
"loss": 0.3453, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.3939838591342626, |
|
"grad_norm": 25.738739013671875, |
|
"learning_rate": 3.3702263748667465e-06, |
|
"loss": 0.3163, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.3968056888086235, |
|
"grad_norm": 27.61959457397461, |
|
"learning_rate": 3.354706214334985e-06, |
|
"loss": 0.3169, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.3996275184829843, |
|
"grad_norm": 21.792509078979492, |
|
"learning_rate": 3.3390292845049228e-06, |
|
"loss": 0.3353, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.4024493481573452, |
|
"grad_norm": 27.541452407836914, |
|
"learning_rate": 3.3233523546748608e-06, |
|
"loss": 0.3456, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.4052711778317062, |
|
"grad_norm": 15.29205322265625, |
|
"learning_rate": 3.3076754248447984e-06, |
|
"loss": 0.3542, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.408093007506067, |
|
"grad_norm": 19.681842803955078, |
|
"learning_rate": 3.2919984950147364e-06, |
|
"loss": 0.3325, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.4109148371804279, |
|
"grad_norm": 27.43589210510254, |
|
"learning_rate": 3.276321565184675e-06, |
|
"loss": 0.3252, |
|
"step": 50000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 70876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|