Omni-DNA-DNA2Function / trainer_state.json
zehui127's picture
Upload folder using huggingface_hub
502d727 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.851523834331857,
"eval_steps": 500,
"global_step": 74500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03256056264652253,
"grad_norm": 5.030904293060303,
"learning_rate": 1.986975774941391e-05,
"loss": 5.9746,
"step": 500
},
{
"epoch": 0.06512112529304506,
"grad_norm": 3.0790352821350098,
"learning_rate": 1.973951549882782e-05,
"loss": 4.2176,
"step": 1000
},
{
"epoch": 0.0976816879395676,
"grad_norm": 2.3053739070892334,
"learning_rate": 1.9609273248241733e-05,
"loss": 3.3847,
"step": 1500
},
{
"epoch": 0.13024225058609012,
"grad_norm": 2.5033621788024902,
"learning_rate": 1.9479030997655642e-05,
"loss": 2.9223,
"step": 2000
},
{
"epoch": 0.16280281323261267,
"grad_norm": 2.464855909347534,
"learning_rate": 1.934878874706955e-05,
"loss": 2.582,
"step": 2500
},
{
"epoch": 0.1953633758791352,
"grad_norm": 2.3733980655670166,
"learning_rate": 1.921854649648346e-05,
"loss": 2.381,
"step": 3000
},
{
"epoch": 0.22792393852565773,
"grad_norm": 2.560279130935669,
"learning_rate": 1.908830424589737e-05,
"loss": 2.2095,
"step": 3500
},
{
"epoch": 0.26048450117218025,
"grad_norm": 2.146317958831787,
"learning_rate": 1.895806199531128e-05,
"loss": 2.0995,
"step": 4000
},
{
"epoch": 0.29304506381870277,
"grad_norm": 2.359065294265747,
"learning_rate": 1.8827819744725192e-05,
"loss": 1.9948,
"step": 4500
},
{
"epoch": 0.32560562646522534,
"grad_norm": 2.245957851409912,
"learning_rate": 1.86975774941391e-05,
"loss": 1.9036,
"step": 5000
},
{
"epoch": 0.35816618911174786,
"grad_norm": 2.824934482574463,
"learning_rate": 1.856733524355301e-05,
"loss": 1.8212,
"step": 5500
},
{
"epoch": 0.3907267517582704,
"grad_norm": 2.4427430629730225,
"learning_rate": 1.843709299296692e-05,
"loss": 1.7307,
"step": 6000
},
{
"epoch": 0.4232873144047929,
"grad_norm": 2.3356220722198486,
"learning_rate": 1.830685074238083e-05,
"loss": 1.658,
"step": 6500
},
{
"epoch": 0.45584787705131546,
"grad_norm": 2.7466249465942383,
"learning_rate": 1.817660849179474e-05,
"loss": 1.5993,
"step": 7000
},
{
"epoch": 0.488408439697838,
"grad_norm": 2.31550669670105,
"learning_rate": 1.8046366241208652e-05,
"loss": 1.5493,
"step": 7500
},
{
"epoch": 0.5209690023443605,
"grad_norm": 2.412864923477173,
"learning_rate": 1.791612399062256e-05,
"loss": 1.4979,
"step": 8000
},
{
"epoch": 0.553529564990883,
"grad_norm": 2.5272300243377686,
"learning_rate": 1.778588174003647e-05,
"loss": 1.4487,
"step": 8500
},
{
"epoch": 0.5860901276374055,
"grad_norm": 2.343013286590576,
"learning_rate": 1.765563948945038e-05,
"loss": 1.4119,
"step": 9000
},
{
"epoch": 0.618650690283928,
"grad_norm": 2.6124706268310547,
"learning_rate": 1.752539723886429e-05,
"loss": 1.3896,
"step": 9500
},
{
"epoch": 0.6512112529304507,
"grad_norm": 2.8961498737335205,
"learning_rate": 1.73951549882782e-05,
"loss": 1.3333,
"step": 10000
},
{
"epoch": 0.6837718155769732,
"grad_norm": 2.8462820053100586,
"learning_rate": 1.7264912737692108e-05,
"loss": 1.3036,
"step": 10500
},
{
"epoch": 0.7163323782234957,
"grad_norm": 2.2509639263153076,
"learning_rate": 1.7134670487106017e-05,
"loss": 1.2872,
"step": 11000
},
{
"epoch": 0.7488929408700182,
"grad_norm": 2.3151662349700928,
"learning_rate": 1.7004428236519926e-05,
"loss": 1.2498,
"step": 11500
},
{
"epoch": 0.7814535035165407,
"grad_norm": 2.587400197982788,
"learning_rate": 1.687418598593384e-05,
"loss": 1.2433,
"step": 12000
},
{
"epoch": 0.8140140661630633,
"grad_norm": 2.7084901332855225,
"learning_rate": 1.674394373534775e-05,
"loss": 1.2189,
"step": 12500
},
{
"epoch": 0.8465746288095858,
"grad_norm": 2.3007726669311523,
"learning_rate": 1.6613701484761658e-05,
"loss": 1.1927,
"step": 13000
},
{
"epoch": 0.8791351914561084,
"grad_norm": 2.200362205505371,
"learning_rate": 1.6483459234175567e-05,
"loss": 1.1849,
"step": 13500
},
{
"epoch": 0.9116957541026309,
"grad_norm": 2.2914557456970215,
"learning_rate": 1.6353216983589476e-05,
"loss": 1.1706,
"step": 14000
},
{
"epoch": 0.9442563167491534,
"grad_norm": 2.357699155807495,
"learning_rate": 1.6222974733003386e-05,
"loss": 1.161,
"step": 14500
},
{
"epoch": 0.976816879395676,
"grad_norm": 2.5686471462249756,
"learning_rate": 1.60927324824173e-05,
"loss": 1.1459,
"step": 15000
},
{
"epoch": 1.0093774420421986,
"grad_norm": 2.511021375656128,
"learning_rate": 1.5962490231831208e-05,
"loss": 1.114,
"step": 15500
},
{
"epoch": 1.041938004688721,
"grad_norm": 2.976020097732544,
"learning_rate": 1.5832247981245117e-05,
"loss": 1.0509,
"step": 16000
},
{
"epoch": 1.0744985673352436,
"grad_norm": 2.2788777351379395,
"learning_rate": 1.5702005730659026e-05,
"loss": 1.0342,
"step": 16500
},
{
"epoch": 1.107059129981766,
"grad_norm": 2.359161853790283,
"learning_rate": 1.5571763480072936e-05,
"loss": 1.0347,
"step": 17000
},
{
"epoch": 1.1396196926282887,
"grad_norm": 2.8540244102478027,
"learning_rate": 1.5441521229486845e-05,
"loss": 1.0288,
"step": 17500
},
{
"epoch": 1.172180255274811,
"grad_norm": 2.635509729385376,
"learning_rate": 1.5311278978900758e-05,
"loss": 1.0166,
"step": 18000
},
{
"epoch": 1.2047408179213337,
"grad_norm": 2.5582518577575684,
"learning_rate": 1.5181036728314667e-05,
"loss": 1.0124,
"step": 18500
},
{
"epoch": 1.2373013805678563,
"grad_norm": 2.1439788341522217,
"learning_rate": 1.5050794477728576e-05,
"loss": 1.0141,
"step": 19000
},
{
"epoch": 1.2698619432143787,
"grad_norm": 2.3901960849761963,
"learning_rate": 1.4920552227142486e-05,
"loss": 1.0014,
"step": 19500
},
{
"epoch": 1.3024225058609014,
"grad_norm": 2.6219823360443115,
"learning_rate": 1.4790309976556397e-05,
"loss": 1.0073,
"step": 20000
},
{
"epoch": 1.3349830685074238,
"grad_norm": 2.7062482833862305,
"learning_rate": 1.4660067725970306e-05,
"loss": 0.9964,
"step": 20500
},
{
"epoch": 1.3675436311539464,
"grad_norm": 2.4956464767456055,
"learning_rate": 1.4529825475384215e-05,
"loss": 0.9936,
"step": 21000
},
{
"epoch": 1.4001041938004688,
"grad_norm": 2.357893228530884,
"learning_rate": 1.4399583224798126e-05,
"loss": 0.9904,
"step": 21500
},
{
"epoch": 1.4326647564469914,
"grad_norm": 2.3728160858154297,
"learning_rate": 1.4269340974212036e-05,
"loss": 0.9798,
"step": 22000
},
{
"epoch": 1.465225319093514,
"grad_norm": 2.1804134845733643,
"learning_rate": 1.4139098723625945e-05,
"loss": 0.9786,
"step": 22500
},
{
"epoch": 1.4977858817400365,
"grad_norm": 2.3426220417022705,
"learning_rate": 1.4008856473039856e-05,
"loss": 0.9717,
"step": 23000
},
{
"epoch": 1.5303464443865589,
"grad_norm": 2.6158998012542725,
"learning_rate": 1.3878614222453765e-05,
"loss": 0.969,
"step": 23500
},
{
"epoch": 1.5629070070330815,
"grad_norm": 2.3006558418273926,
"learning_rate": 1.3748371971867675e-05,
"loss": 0.9655,
"step": 24000
},
{
"epoch": 1.5954675696796041,
"grad_norm": 2.3054986000061035,
"learning_rate": 1.3618129721281586e-05,
"loss": 0.9576,
"step": 24500
},
{
"epoch": 1.6280281323261265,
"grad_norm": 2.3399717807769775,
"learning_rate": 1.3487887470695495e-05,
"loss": 0.9522,
"step": 25000
},
{
"epoch": 1.6605886949726492,
"grad_norm": 2.381333589553833,
"learning_rate": 1.3357645220109406e-05,
"loss": 0.963,
"step": 25500
},
{
"epoch": 1.6931492576191718,
"grad_norm": 2.5838122367858887,
"learning_rate": 1.3227402969523315e-05,
"loss": 0.952,
"step": 26000
},
{
"epoch": 1.7257098202656942,
"grad_norm": 2.398665428161621,
"learning_rate": 1.3097160718937225e-05,
"loss": 0.9482,
"step": 26500
},
{
"epoch": 1.7582703829122166,
"grad_norm": 2.4087893962860107,
"learning_rate": 1.2966918468351136e-05,
"loss": 0.9436,
"step": 27000
},
{
"epoch": 1.7908309455587392,
"grad_norm": 2.380199432373047,
"learning_rate": 1.2836676217765045e-05,
"loss": 0.9491,
"step": 27500
},
{
"epoch": 1.8233915082052619,
"grad_norm": 2.5550014972686768,
"learning_rate": 1.2706433967178954e-05,
"loss": 0.9365,
"step": 28000
},
{
"epoch": 1.8559520708517843,
"grad_norm": 2.352365493774414,
"learning_rate": 1.2576191716592865e-05,
"loss": 0.9314,
"step": 28500
},
{
"epoch": 1.888512633498307,
"grad_norm": 2.1357262134552,
"learning_rate": 1.2445949466006773e-05,
"loss": 0.9287,
"step": 29000
},
{
"epoch": 1.9210731961448295,
"grad_norm": 2.809288501739502,
"learning_rate": 1.2315707215420682e-05,
"loss": 0.9231,
"step": 29500
},
{
"epoch": 1.953633758791352,
"grad_norm": 2.195413589477539,
"learning_rate": 1.2185464964834592e-05,
"loss": 0.9165,
"step": 30000
},
{
"epoch": 1.9861943214378743,
"grad_norm": 2.4369585514068604,
"learning_rate": 1.2055222714248503e-05,
"loss": 0.9261,
"step": 30500
},
{
"epoch": 2.018754884084397,
"grad_norm": 2.0791983604431152,
"learning_rate": 1.1924980463662412e-05,
"loss": 0.8401,
"step": 31000
},
{
"epoch": 2.0513154467309196,
"grad_norm": 2.3653042316436768,
"learning_rate": 1.1794738213076321e-05,
"loss": 0.7797,
"step": 31500
},
{
"epoch": 2.083876009377442,
"grad_norm": 2.7878382205963135,
"learning_rate": 1.1664495962490232e-05,
"loss": 0.7782,
"step": 32000
},
{
"epoch": 2.1164365720239644,
"grad_norm": 2.4624345302581787,
"learning_rate": 1.1534253711904142e-05,
"loss": 0.7783,
"step": 32500
},
{
"epoch": 2.1489971346704873,
"grad_norm": 2.4672300815582275,
"learning_rate": 1.1404011461318051e-05,
"loss": 0.7778,
"step": 33000
},
{
"epoch": 2.1815576973170097,
"grad_norm": 2.6120986938476562,
"learning_rate": 1.1273769210731962e-05,
"loss": 0.7774,
"step": 33500
},
{
"epoch": 2.214118259963532,
"grad_norm": 2.7739064693450928,
"learning_rate": 1.1143526960145871e-05,
"loss": 0.7817,
"step": 34000
},
{
"epoch": 2.246678822610055,
"grad_norm": 2.5610642433166504,
"learning_rate": 1.1013284709559782e-05,
"loss": 0.7733,
"step": 34500
},
{
"epoch": 2.2792393852565773,
"grad_norm": 2.655161142349243,
"learning_rate": 1.0883042458973692e-05,
"loss": 0.78,
"step": 35000
},
{
"epoch": 2.3117999479030997,
"grad_norm": 2.468252182006836,
"learning_rate": 1.0752800208387601e-05,
"loss": 0.7799,
"step": 35500
},
{
"epoch": 2.344360510549622,
"grad_norm": 2.766505718231201,
"learning_rate": 1.0622557957801512e-05,
"loss": 0.7743,
"step": 36000
},
{
"epoch": 2.376921073196145,
"grad_norm": 3.1091792583465576,
"learning_rate": 1.0492315707215421e-05,
"loss": 0.7831,
"step": 36500
},
{
"epoch": 2.4094816358426674,
"grad_norm": 2.9491870403289795,
"learning_rate": 1.036207345662933e-05,
"loss": 0.7766,
"step": 37000
},
{
"epoch": 2.44204219848919,
"grad_norm": 2.8023264408111572,
"learning_rate": 1.0231831206043242e-05,
"loss": 0.7759,
"step": 37500
},
{
"epoch": 2.4746027611357126,
"grad_norm": 2.604647636413574,
"learning_rate": 1.0101588955457151e-05,
"loss": 0.7778,
"step": 38000
},
{
"epoch": 2.507163323782235,
"grad_norm": 2.879962205886841,
"learning_rate": 9.97134670487106e-06,
"loss": 0.7685,
"step": 38500
},
{
"epoch": 2.5397238864287575,
"grad_norm": 3.1485841274261475,
"learning_rate": 9.841104454284971e-06,
"loss": 0.7758,
"step": 39000
},
{
"epoch": 2.57228444907528,
"grad_norm": 2.426480293273926,
"learning_rate": 9.71086220369888e-06,
"loss": 0.7696,
"step": 39500
},
{
"epoch": 2.6048450117218027,
"grad_norm": 2.696232318878174,
"learning_rate": 9.58061995311279e-06,
"loss": 0.7738,
"step": 40000
},
{
"epoch": 2.637405574368325,
"grad_norm": 3.0641300678253174,
"learning_rate": 9.450377702526701e-06,
"loss": 0.7718,
"step": 40500
},
{
"epoch": 2.6699661370148475,
"grad_norm": 2.822618246078491,
"learning_rate": 9.32013545194061e-06,
"loss": 0.7657,
"step": 41000
},
{
"epoch": 2.7025266996613704,
"grad_norm": 3.1593356132507324,
"learning_rate": 9.18989320135452e-06,
"loss": 0.7718,
"step": 41500
},
{
"epoch": 2.735087262307893,
"grad_norm": 2.6383330821990967,
"learning_rate": 9.05965095076843e-06,
"loss": 0.7693,
"step": 42000
},
{
"epoch": 2.767647824954415,
"grad_norm": 2.7163684368133545,
"learning_rate": 8.92940870018234e-06,
"loss": 0.7648,
"step": 42500
},
{
"epoch": 2.8002083876009376,
"grad_norm": 3.0254065990448,
"learning_rate": 8.79916644959625e-06,
"loss": 0.7609,
"step": 43000
},
{
"epoch": 2.83276895024746,
"grad_norm": 3.440492630004883,
"learning_rate": 8.668924199010159e-06,
"loss": 0.7641,
"step": 43500
},
{
"epoch": 2.865329512893983,
"grad_norm": 2.6121511459350586,
"learning_rate": 8.53868194842407e-06,
"loss": 0.7645,
"step": 44000
},
{
"epoch": 2.8978900755405053,
"grad_norm": 2.865845203399658,
"learning_rate": 8.40843969783798e-06,
"loss": 0.7652,
"step": 44500
},
{
"epoch": 2.930450638187028,
"grad_norm": 2.8584651947021484,
"learning_rate": 8.278197447251888e-06,
"loss": 0.7603,
"step": 45000
},
{
"epoch": 2.9630112008335505,
"grad_norm": 2.286515235900879,
"learning_rate": 8.1479551966658e-06,
"loss": 0.7655,
"step": 45500
},
{
"epoch": 2.995571763480073,
"grad_norm": 3.0863349437713623,
"learning_rate": 8.017712946079709e-06,
"loss": 0.7598,
"step": 46000
},
{
"epoch": 3.0281323261265953,
"grad_norm": 2.7062647342681885,
"learning_rate": 7.887470695493618e-06,
"loss": 0.6164,
"step": 46500
},
{
"epoch": 3.060692888773118,
"grad_norm": 3.3541259765625,
"learning_rate": 7.75722844490753e-06,
"loss": 0.5882,
"step": 47000
},
{
"epoch": 3.0932534514196406,
"grad_norm": 3.511744260787964,
"learning_rate": 7.6269861943214385e-06,
"loss": 0.5884,
"step": 47500
},
{
"epoch": 3.125814014066163,
"grad_norm": 3.1489553451538086,
"learning_rate": 7.496743943735349e-06,
"loss": 0.5837,
"step": 48000
},
{
"epoch": 3.1583745767126854,
"grad_norm": 3.2325332164764404,
"learning_rate": 7.366501693149258e-06,
"loss": 0.5841,
"step": 48500
},
{
"epoch": 3.1909351393592083,
"grad_norm": 3.4985926151275635,
"learning_rate": 7.236259442563168e-06,
"loss": 0.5847,
"step": 49000
},
{
"epoch": 3.2234957020057307,
"grad_norm": 3.218742609024048,
"learning_rate": 7.106017191977078e-06,
"loss": 0.5868,
"step": 49500
},
{
"epoch": 3.256056264652253,
"grad_norm": 3.2203478813171387,
"learning_rate": 6.975774941390988e-06,
"loss": 0.5883,
"step": 50000
},
{
"epoch": 3.288616827298776,
"grad_norm": 3.2793335914611816,
"learning_rate": 6.845532690804898e-06,
"loss": 0.5876,
"step": 50500
},
{
"epoch": 3.3211773899452983,
"grad_norm": 3.3763086795806885,
"learning_rate": 6.715290440218808e-06,
"loss": 0.5843,
"step": 51000
},
{
"epoch": 3.3537379525918207,
"grad_norm": 3.314659833908081,
"learning_rate": 6.585048189632718e-06,
"loss": 0.5834,
"step": 51500
},
{
"epoch": 3.386298515238343,
"grad_norm": 4.0635457038879395,
"learning_rate": 6.4548059390466275e-06,
"loss": 0.5839,
"step": 52000
},
{
"epoch": 3.418859077884866,
"grad_norm": 3.561662197113037,
"learning_rate": 6.324563688460537e-06,
"loss": 0.586,
"step": 52500
},
{
"epoch": 3.4514196405313884,
"grad_norm": 3.3345561027526855,
"learning_rate": 6.194321437874446e-06,
"loss": 0.5819,
"step": 53000
},
{
"epoch": 3.483980203177911,
"grad_norm": 3.2945241928100586,
"learning_rate": 6.064079187288356e-06,
"loss": 0.5846,
"step": 53500
},
{
"epoch": 3.516540765824433,
"grad_norm": 3.8004238605499268,
"learning_rate": 5.9338369367022665e-06,
"loss": 0.5847,
"step": 54000
},
{
"epoch": 3.549101328470956,
"grad_norm": 3.7713723182678223,
"learning_rate": 5.803594686116176e-06,
"loss": 0.5846,
"step": 54500
},
{
"epoch": 3.5816618911174785,
"grad_norm": 3.562333822250366,
"learning_rate": 5.673352435530086e-06,
"loss": 0.5849,
"step": 55000
},
{
"epoch": 3.6142224537640013,
"grad_norm": 4.006633758544922,
"learning_rate": 5.543110184943996e-06,
"loss": 0.5847,
"step": 55500
},
{
"epoch": 3.6467830164105237,
"grad_norm": 3.453509569168091,
"learning_rate": 5.412867934357906e-06,
"loss": 0.5825,
"step": 56000
},
{
"epoch": 3.679343579057046,
"grad_norm": 3.36258864402771,
"learning_rate": 5.282625683771816e-06,
"loss": 0.5819,
"step": 56500
},
{
"epoch": 3.7119041417035685,
"grad_norm": 3.6564488410949707,
"learning_rate": 5.152383433185726e-06,
"loss": 0.5809,
"step": 57000
},
{
"epoch": 3.744464704350091,
"grad_norm": 3.977710485458374,
"learning_rate": 5.022141182599636e-06,
"loss": 0.5803,
"step": 57500
},
{
"epoch": 3.777025266996614,
"grad_norm": 3.4889750480651855,
"learning_rate": 4.891898932013545e-06,
"loss": 0.5808,
"step": 58000
},
{
"epoch": 3.809585829643136,
"grad_norm": 3.451753616333008,
"learning_rate": 4.7616566814274556e-06,
"loss": 0.5783,
"step": 58500
},
{
"epoch": 3.842146392289659,
"grad_norm": 3.9667842388153076,
"learning_rate": 4.631414430841366e-06,
"loss": 0.578,
"step": 59000
},
{
"epoch": 3.8747069549361814,
"grad_norm": 3.6356189250946045,
"learning_rate": 4.501172180255275e-06,
"loss": 0.5776,
"step": 59500
},
{
"epoch": 3.907267517582704,
"grad_norm": 4.25313663482666,
"learning_rate": 4.370929929669185e-06,
"loss": 0.5775,
"step": 60000
},
{
"epoch": 3.9398280802292263,
"grad_norm": 3.822178602218628,
"learning_rate": 4.2406876790830946e-06,
"loss": 0.5774,
"step": 60500
},
{
"epoch": 3.9723886428757487,
"grad_norm": 3.882927179336548,
"learning_rate": 4.110445428497005e-06,
"loss": 0.5733,
"step": 61000
},
{
"epoch": 4.004949205522271,
"grad_norm": 2.9648609161376953,
"learning_rate": 3.980203177910915e-06,
"loss": 0.553,
"step": 61500
},
{
"epoch": 4.037509768168794,
"grad_norm": 3.1388580799102783,
"learning_rate": 3.849960927324824e-06,
"loss": 0.4113,
"step": 62000
},
{
"epoch": 4.070070330815317,
"grad_norm": 3.7440290451049805,
"learning_rate": 3.7197186767387344e-06,
"loss": 0.4071,
"step": 62500
},
{
"epoch": 4.102630893461839,
"grad_norm": 3.4993302822113037,
"learning_rate": 3.589476426152644e-06,
"loss": 0.4078,
"step": 63000
},
{
"epoch": 4.135191456108362,
"grad_norm": 3.8999550342559814,
"learning_rate": 3.4592341755665543e-06,
"loss": 0.404,
"step": 63500
},
{
"epoch": 4.167752018754884,
"grad_norm": 3.9213688373565674,
"learning_rate": 3.328991924980464e-06,
"loss": 0.4057,
"step": 64000
},
{
"epoch": 4.200312581401406,
"grad_norm": 4.091826438903809,
"learning_rate": 3.1987496743943734e-06,
"loss": 0.4037,
"step": 64500
},
{
"epoch": 4.232873144047929,
"grad_norm": 3.9140231609344482,
"learning_rate": 3.0685074238082836e-06,
"loss": 0.4053,
"step": 65000
},
{
"epoch": 4.265433706694452,
"grad_norm": 4.0627760887146,
"learning_rate": 2.9382651732221933e-06,
"loss": 0.4029,
"step": 65500
},
{
"epoch": 4.2979942693409745,
"grad_norm": 3.8601019382476807,
"learning_rate": 2.8080229226361035e-06,
"loss": 0.4005,
"step": 66000
},
{
"epoch": 4.330554831987497,
"grad_norm": 3.769637107849121,
"learning_rate": 2.6777806720500133e-06,
"loss": 0.4001,
"step": 66500
},
{
"epoch": 4.363115394634019,
"grad_norm": 4.234343528747559,
"learning_rate": 2.547538421463923e-06,
"loss": 0.4002,
"step": 67000
},
{
"epoch": 4.395675957280542,
"grad_norm": 3.9124088287353516,
"learning_rate": 2.417296170877833e-06,
"loss": 0.4005,
"step": 67500
},
{
"epoch": 4.428236519927064,
"grad_norm": 3.8314108848571777,
"learning_rate": 2.2870539202917425e-06,
"loss": 0.3993,
"step": 68000
},
{
"epoch": 4.4607970825735865,
"grad_norm": 4.098474979400635,
"learning_rate": 2.1568116697056527e-06,
"loss": 0.3988,
"step": 68500
},
{
"epoch": 4.49335764522011,
"grad_norm": 3.8353285789489746,
"learning_rate": 2.0265694191195624e-06,
"loss": 0.3987,
"step": 69000
},
{
"epoch": 4.525918207866632,
"grad_norm": 3.7794976234436035,
"learning_rate": 1.8963271685334724e-06,
"loss": 0.3972,
"step": 69500
},
{
"epoch": 4.558478770513155,
"grad_norm": 4.056552410125732,
"learning_rate": 1.7660849179473824e-06,
"loss": 0.3958,
"step": 70000
},
{
"epoch": 4.591039333159677,
"grad_norm": 3.7579519748687744,
"learning_rate": 1.6358426673612921e-06,
"loss": 0.3955,
"step": 70500
},
{
"epoch": 4.6235998958061995,
"grad_norm": 4.280270576477051,
"learning_rate": 1.5056004167752019e-06,
"loss": 0.3951,
"step": 71000
},
{
"epoch": 4.656160458452722,
"grad_norm": 4.043455123901367,
"learning_rate": 1.3753581661891118e-06,
"loss": 0.3944,
"step": 71500
},
{
"epoch": 4.688721021099244,
"grad_norm": 3.790985584259033,
"learning_rate": 1.2451159156030216e-06,
"loss": 0.395,
"step": 72000
},
{
"epoch": 4.721281583745768,
"grad_norm": 3.877270460128784,
"learning_rate": 1.1148736650169315e-06,
"loss": 0.3916,
"step": 72500
},
{
"epoch": 4.75384214639229,
"grad_norm": 4.055418491363525,
"learning_rate": 9.846314144308415e-07,
"loss": 0.3928,
"step": 73000
},
{
"epoch": 4.786402709038812,
"grad_norm": 4.357405662536621,
"learning_rate": 8.543891638447512e-07,
"loss": 0.3911,
"step": 73500
},
{
"epoch": 4.818963271685335,
"grad_norm": 3.596019983291626,
"learning_rate": 7.241469132586612e-07,
"loss": 0.3897,
"step": 74000
},
{
"epoch": 4.851523834331857,
"grad_norm": 4.408013820648193,
"learning_rate": 5.939046626725711e-07,
"loss": 0.3887,
"step": 74500
}
],
"logging_steps": 500,
"max_steps": 76780,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.199090369536721e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}