Qwen2.5-7B-Open-R1-Step1-SFT / trainer_state.json
yjyjyj98's picture
Model save
b5ff6c5 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9995732696082615,
"eval_steps": 500,
"global_step": 1464,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003413843133907997,
"grad_norm": 4.705833074023066,
"learning_rate": 2.702702702702703e-06,
"loss": 0.8155,
"num_tokens": 1939277.0,
"step": 5
},
{
"epoch": 0.006827686267815994,
"grad_norm": 4.012926640069732,
"learning_rate": 6.081081081081082e-06,
"loss": 0.71,
"num_tokens": 3766520.0,
"step": 10
},
{
"epoch": 0.01024152940172399,
"grad_norm": 2.280161297786124,
"learning_rate": 9.45945945945946e-06,
"loss": 0.6608,
"num_tokens": 5654095.0,
"step": 15
},
{
"epoch": 0.013655372535631987,
"grad_norm": 1.143701205704835,
"learning_rate": 1.2837837837837838e-05,
"loss": 0.5888,
"num_tokens": 7684648.0,
"step": 20
},
{
"epoch": 0.017069215669539985,
"grad_norm": 0.9015302385209565,
"learning_rate": 1.6216216216216218e-05,
"loss": 0.5422,
"num_tokens": 9531309.0,
"step": 25
},
{
"epoch": 0.02048305880344798,
"grad_norm": 0.8842795138478181,
"learning_rate": 1.9594594594594595e-05,
"loss": 0.5045,
"num_tokens": 11396996.0,
"step": 30
},
{
"epoch": 0.02389690193735598,
"grad_norm": 0.8574020610099237,
"learning_rate": 2.2972972972972976e-05,
"loss": 0.5068,
"num_tokens": 13252705.0,
"step": 35
},
{
"epoch": 0.027310745071263975,
"grad_norm": 0.8271667971649893,
"learning_rate": 2.635135135135135e-05,
"loss": 0.4881,
"num_tokens": 15158631.0,
"step": 40
},
{
"epoch": 0.030724588205171974,
"grad_norm": 0.6442503629860594,
"learning_rate": 2.9729729729729733e-05,
"loss": 0.4917,
"num_tokens": 17089810.0,
"step": 45
},
{
"epoch": 0.03413843133907997,
"grad_norm": 1.0354213580125469,
"learning_rate": 3.310810810810811e-05,
"loss": 0.4841,
"num_tokens": 18959052.0,
"step": 50
},
{
"epoch": 0.037552274472987965,
"grad_norm": 0.9482071468568299,
"learning_rate": 3.648648648648649e-05,
"loss": 0.4743,
"num_tokens": 20852973.0,
"step": 55
},
{
"epoch": 0.04096611760689596,
"grad_norm": 1.0587081263012454,
"learning_rate": 3.986486486486487e-05,
"loss": 0.4808,
"num_tokens": 22704402.0,
"step": 60
},
{
"epoch": 0.04437996074080396,
"grad_norm": 0.8101784899967004,
"learning_rate": 4.324324324324325e-05,
"loss": 0.4859,
"num_tokens": 24673530.0,
"step": 65
},
{
"epoch": 0.04779380387471196,
"grad_norm": 1.634279655733224,
"learning_rate": 4.662162162162162e-05,
"loss": 0.4832,
"num_tokens": 26712591.0,
"step": 70
},
{
"epoch": 0.051207647008619954,
"grad_norm": 1.38458922388954,
"learning_rate": 5e-05,
"loss": 0.4584,
"num_tokens": 28520859.0,
"step": 75
},
{
"epoch": 0.05462149014252795,
"grad_norm": 1.01136396529527,
"learning_rate": 4.9998563326589096e-05,
"loss": 0.4788,
"num_tokens": 30504282.0,
"step": 80
},
{
"epoch": 0.058035333276435945,
"grad_norm": 1.0066323926409173,
"learning_rate": 4.9994253489825765e-05,
"loss": 0.4719,
"num_tokens": 32443081.0,
"step": 85
},
{
"epoch": 0.06144917641034395,
"grad_norm": 0.9287342359690847,
"learning_rate": 4.998707104009471e-05,
"loss": 0.4745,
"num_tokens": 34326339.0,
"step": 90
},
{
"epoch": 0.06486301954425194,
"grad_norm": 0.9879821790567098,
"learning_rate": 4.997701689462566e-05,
"loss": 0.4735,
"num_tokens": 36170185.0,
"step": 95
},
{
"epoch": 0.06827686267815994,
"grad_norm": 0.8069102268140448,
"learning_rate": 4.996409233737627e-05,
"loss": 0.4823,
"num_tokens": 38065120.0,
"step": 100
},
{
"epoch": 0.07169070581206793,
"grad_norm": 1.032813457346951,
"learning_rate": 4.99482990188681e-05,
"loss": 0.4807,
"num_tokens": 39954377.0,
"step": 105
},
{
"epoch": 0.07510454894597593,
"grad_norm": 1.0693609940034285,
"learning_rate": 4.992963895597589e-05,
"loss": 0.4791,
"num_tokens": 41945405.0,
"step": 110
},
{
"epoch": 0.07851839207988393,
"grad_norm": 0.966510416634568,
"learning_rate": 4.990811453166999e-05,
"loss": 0.476,
"num_tokens": 43794631.0,
"step": 115
},
{
"epoch": 0.08193223521379192,
"grad_norm": 1.0423100991803456,
"learning_rate": 4.9883728494711986e-05,
"loss": 0.4688,
"num_tokens": 45665940.0,
"step": 120
},
{
"epoch": 0.08534607834769992,
"grad_norm": 0.8310450354278927,
"learning_rate": 4.985648395930374e-05,
"loss": 0.475,
"num_tokens": 47594004.0,
"step": 125
},
{
"epoch": 0.08875992148160793,
"grad_norm": 0.7185061502223402,
"learning_rate": 4.9826384404689666e-05,
"loss": 0.4725,
"num_tokens": 49500082.0,
"step": 130
},
{
"epoch": 0.09217376461551592,
"grad_norm": 0.9610194838528363,
"learning_rate": 4.9793433674712395e-05,
"loss": 0.4679,
"num_tokens": 51326229.0,
"step": 135
},
{
"epoch": 0.09558760774942392,
"grad_norm": 0.7899645948112779,
"learning_rate": 4.9757635977321965e-05,
"loss": 0.4724,
"num_tokens": 53151114.0,
"step": 140
},
{
"epoch": 0.09900145088333191,
"grad_norm": 0.9979391758397096,
"learning_rate": 4.971899588403836e-05,
"loss": 0.4645,
"num_tokens": 55066586.0,
"step": 145
},
{
"epoch": 0.10241529401723991,
"grad_norm": 0.727201189085688,
"learning_rate": 4.9677518329367775e-05,
"loss": 0.4776,
"num_tokens": 57062268.0,
"step": 150
},
{
"epoch": 0.1058291371511479,
"grad_norm": 0.957820374972616,
"learning_rate": 4.963320861017242e-05,
"loss": 0.4527,
"num_tokens": 58912697.0,
"step": 155
},
{
"epoch": 0.1092429802850559,
"grad_norm": 0.6922629053527111,
"learning_rate": 4.9586072384994126e-05,
"loss": 0.4609,
"num_tokens": 60809724.0,
"step": 160
},
{
"epoch": 0.1126568234189639,
"grad_norm": 0.8360362963465879,
"learning_rate": 4.953611567333166e-05,
"loss": 0.4462,
"num_tokens": 62827054.0,
"step": 165
},
{
"epoch": 0.11607066655287189,
"grad_norm": 0.7194003955094253,
"learning_rate": 4.9483344854872096e-05,
"loss": 0.4607,
"num_tokens": 64735170.0,
"step": 170
},
{
"epoch": 0.11948450968677989,
"grad_norm": 0.612939116097357,
"learning_rate": 4.942776666867602e-05,
"loss": 0.4616,
"num_tokens": 66662588.0,
"step": 175
},
{
"epoch": 0.1228983528206879,
"grad_norm": 0.7916296824208874,
"learning_rate": 4.936938821231698e-05,
"loss": 0.4717,
"num_tokens": 68607450.0,
"step": 180
},
{
"epoch": 0.1263121959545959,
"grad_norm": 1.0690262233247425,
"learning_rate": 4.9308216940975075e-05,
"loss": 0.4651,
"num_tokens": 70515883.0,
"step": 185
},
{
"epoch": 0.12972603908850389,
"grad_norm": 0.6683242093385041,
"learning_rate": 4.924426066648486e-05,
"loss": 0.4645,
"num_tokens": 72437556.0,
"step": 190
},
{
"epoch": 0.13313988222241188,
"grad_norm": 0.9539510507374334,
"learning_rate": 4.9177527556337835e-05,
"loss": 0.4444,
"num_tokens": 74410081.0,
"step": 195
},
{
"epoch": 0.13655372535631988,
"grad_norm": 0.9252957776174132,
"learning_rate": 4.910802613263931e-05,
"loss": 0.4607,
"num_tokens": 76352374.0,
"step": 200
},
{
"epoch": 0.13996756849022787,
"grad_norm": 0.6332331264138565,
"learning_rate": 4.903576527102018e-05,
"loss": 0.4536,
"num_tokens": 2029573.0,
"step": 205
},
{
"epoch": 0.14338141162413587,
"grad_norm": 0.7431194368438976,
"learning_rate": 4.896075419950342e-05,
"loss": 0.4615,
"num_tokens": 4026360.0,
"step": 210
},
{
"epoch": 0.14679525475804386,
"grad_norm": 0.9372438278269656,
"learning_rate": 4.888300249732565e-05,
"loss": 0.4536,
"num_tokens": 5855288.0,
"step": 215
},
{
"epoch": 0.15020909789195186,
"grad_norm": 0.6587314361302145,
"learning_rate": 4.880252009371382e-05,
"loss": 0.4607,
"num_tokens": 7786439.0,
"step": 220
},
{
"epoch": 0.15362294102585985,
"grad_norm": 0.7310394284137065,
"learning_rate": 4.8719317266617206e-05,
"loss": 0.4403,
"num_tokens": 9571254.0,
"step": 225
},
{
"epoch": 0.15703678415976785,
"grad_norm": 0.8609393951227605,
"learning_rate": 4.863340464139486e-05,
"loss": 0.4769,
"num_tokens": 11588067.0,
"step": 230
},
{
"epoch": 0.16045062729367585,
"grad_norm": 0.7326705125046628,
"learning_rate": 4.854479318945873e-05,
"loss": 0.4503,
"num_tokens": 13478146.0,
"step": 235
},
{
"epoch": 0.16386447042758384,
"grad_norm": 0.7462315558110848,
"learning_rate": 4.8453494226872526e-05,
"loss": 0.4467,
"num_tokens": 15442504.0,
"step": 240
},
{
"epoch": 0.16727831356149184,
"grad_norm": 0.9061873366679609,
"learning_rate": 4.8359519412906656e-05,
"loss": 0.4493,
"num_tokens": 17409465.0,
"step": 245
},
{
"epoch": 0.17069215669539983,
"grad_norm": 0.7321388464328794,
"learning_rate": 4.826288074854926e-05,
"loss": 0.4672,
"num_tokens": 19363531.0,
"step": 250
},
{
"epoch": 0.17410599982930786,
"grad_norm": 0.7665868699326416,
"learning_rate": 4.816359057497363e-05,
"loss": 0.4564,
"num_tokens": 21233246.0,
"step": 255
},
{
"epoch": 0.17751984296321585,
"grad_norm": 0.812562841775717,
"learning_rate": 4.806166157196218e-05,
"loss": 0.449,
"num_tokens": 23327219.0,
"step": 260
},
{
"epoch": 0.18093368609712385,
"grad_norm": 0.8688256441368103,
"learning_rate": 4.795710675628724e-05,
"loss": 0.4567,
"num_tokens": 25306743.0,
"step": 265
},
{
"epoch": 0.18434752923103184,
"grad_norm": 0.9435753690607764,
"learning_rate": 4.784993948004867e-05,
"loss": 0.4517,
"num_tokens": 27174263.0,
"step": 270
},
{
"epoch": 0.18776137236493984,
"grad_norm": 1.4362472881603188,
"learning_rate": 4.774017342896881e-05,
"loss": 0.4604,
"num_tokens": 28998685.0,
"step": 275
},
{
"epoch": 0.19117521549884783,
"grad_norm": 3.7837849040471623,
"learning_rate": 4.7627822620644735e-05,
"loss": 0.5089,
"num_tokens": 30861476.0,
"step": 280
},
{
"epoch": 0.19458905863275583,
"grad_norm": 1.6810142201752163,
"learning_rate": 4.7512901402758135e-05,
"loss": 0.4516,
"num_tokens": 32762640.0,
"step": 285
},
{
"epoch": 0.19800290176666382,
"grad_norm": 1.2181895510266618,
"learning_rate": 4.7395424451243056e-05,
"loss": 0.4753,
"num_tokens": 34810642.0,
"step": 290
},
{
"epoch": 0.20141674490057182,
"grad_norm": 0.8193936087136641,
"learning_rate": 4.7275406768411736e-05,
"loss": 0.4652,
"num_tokens": 36683859.0,
"step": 295
},
{
"epoch": 0.20483058803447982,
"grad_norm": 1.0109058198551923,
"learning_rate": 4.715286368103873e-05,
"loss": 0.4537,
"num_tokens": 38493680.0,
"step": 300
},
{
"epoch": 0.2082444311683878,
"grad_norm": 0.7390004642372939,
"learning_rate": 4.702781083840362e-05,
"loss": 0.44,
"num_tokens": 40420032.0,
"step": 305
},
{
"epoch": 0.2116582743022958,
"grad_norm": 0.9033348017381692,
"learning_rate": 4.690026421029254e-05,
"loss": 0.4371,
"num_tokens": 42428881.0,
"step": 310
},
{
"epoch": 0.2150721174362038,
"grad_norm": 0.6122028277057487,
"learning_rate": 4.677024008495876e-05,
"loss": 0.4463,
"num_tokens": 44293027.0,
"step": 315
},
{
"epoch": 0.2184859605701118,
"grad_norm": 0.5056169247540012,
"learning_rate": 4.66377550670426e-05,
"loss": 0.4469,
"num_tokens": 46312487.0,
"step": 320
},
{
"epoch": 0.2218998037040198,
"grad_norm": 0.6403101500013877,
"learning_rate": 4.650282607545096e-05,
"loss": 0.451,
"num_tokens": 48245722.0,
"step": 325
},
{
"epoch": 0.2253136468379278,
"grad_norm": 0.652067318740719,
"learning_rate": 4.636547034119668e-05,
"loss": 0.4458,
"num_tokens": 50103512.0,
"step": 330
},
{
"epoch": 0.22872748997183578,
"grad_norm": 0.6724023128855483,
"learning_rate": 4.622570540519811e-05,
"loss": 0.4349,
"num_tokens": 52090247.0,
"step": 335
},
{
"epoch": 0.23214133310574378,
"grad_norm": 0.5515342597831395,
"learning_rate": 4.6083549116039e-05,
"loss": 0.4548,
"num_tokens": 53993921.0,
"step": 340
},
{
"epoch": 0.23555517623965178,
"grad_norm": 0.7100102164490761,
"learning_rate": 4.59390196276892e-05,
"loss": 0.4525,
"num_tokens": 55882666.0,
"step": 345
},
{
"epoch": 0.23896901937355977,
"grad_norm": 0.8679862020361908,
"learning_rate": 4.579213539718632e-05,
"loss": 0.4607,
"num_tokens": 57807639.0,
"step": 350
},
{
"epoch": 0.2423828625074678,
"grad_norm": 0.6200007378003499,
"learning_rate": 4.564291518227866e-05,
"loss": 0.4351,
"num_tokens": 59586573.0,
"step": 355
},
{
"epoch": 0.2457967056413758,
"grad_norm": 0.6889715253876092,
"learning_rate": 4.549137803902978e-05,
"loss": 0.4422,
"num_tokens": 61574526.0,
"step": 360
},
{
"epoch": 0.24921054877528379,
"grad_norm": 0.7149528120875684,
"learning_rate": 4.533754331938498e-05,
"loss": 0.4333,
"num_tokens": 63467351.0,
"step": 365
},
{
"epoch": 0.2526243919091918,
"grad_norm": 0.7467833331657708,
"learning_rate": 4.5181430668699934e-05,
"loss": 0.4391,
"num_tokens": 65251754.0,
"step": 370
},
{
"epoch": 0.25603823504309975,
"grad_norm": 0.6233104186087223,
"learning_rate": 4.5023060023231915e-05,
"loss": 0.4542,
"num_tokens": 67341024.0,
"step": 375
},
{
"epoch": 0.25945207817700777,
"grad_norm": 0.7036989801599275,
"learning_rate": 4.486245160759385e-05,
"loss": 0.4357,
"num_tokens": 69203543.0,
"step": 380
},
{
"epoch": 0.26286592131091574,
"grad_norm": 0.6213336099848067,
"learning_rate": 4.469962593217154e-05,
"loss": 0.4312,
"num_tokens": 71079356.0,
"step": 385
},
{
"epoch": 0.26627976444482376,
"grad_norm": 0.7761503823566354,
"learning_rate": 4.453460379050441e-05,
"loss": 0.4402,
"num_tokens": 73014977.0,
"step": 390
},
{
"epoch": 0.26969360757873173,
"grad_norm": 0.7024337605986739,
"learning_rate": 4.436740625663008e-05,
"loss": 0.4492,
"num_tokens": 74994274.0,
"step": 395
},
{
"epoch": 0.27310745071263975,
"grad_norm": 0.6710110629057495,
"learning_rate": 4.41980546823931e-05,
"loss": 0.4363,
"num_tokens": 76893195.0,
"step": 400
},
{
"epoch": 0.2765212938465478,
"grad_norm": 0.5751338135821263,
"learning_rate": 4.4026570694718243e-05,
"loss": 0.4229,
"num_tokens": 78803696.0,
"step": 405
},
{
"epoch": 0.27993513698045575,
"grad_norm": 0.6675876320789876,
"learning_rate": 4.385297619284868e-05,
"loss": 0.4485,
"num_tokens": 80775669.0,
"step": 410
},
{
"epoch": 0.28334898011436377,
"grad_norm": 0.5465180089446272,
"learning_rate": 4.367729334554932e-05,
"loss": 0.4285,
"num_tokens": 82613548.0,
"step": 415
},
{
"epoch": 0.28676282324827174,
"grad_norm": 0.5408169168484701,
"learning_rate": 4.3499544588275725e-05,
"loss": 0.4476,
"num_tokens": 84534417.0,
"step": 420
},
{
"epoch": 0.29017666638217976,
"grad_norm": 0.622446223545655,
"learning_rate": 4.331975262030911e-05,
"loss": 0.4334,
"num_tokens": 86437083.0,
"step": 425
},
{
"epoch": 0.2935905095160877,
"grad_norm": 0.6346095791048567,
"learning_rate": 4.3137940401857464e-05,
"loss": 0.4445,
"num_tokens": 88482847.0,
"step": 430
},
{
"epoch": 0.29700435264999575,
"grad_norm": 0.5790350174610308,
"learning_rate": 4.295413115112345e-05,
"loss": 0.4185,
"num_tokens": 90375995.0,
"step": 435
},
{
"epoch": 0.3004181957839037,
"grad_norm": 0.5571697392013049,
"learning_rate": 4.2768348341339356e-05,
"loss": 0.4268,
"num_tokens": 92352827.0,
"step": 440
},
{
"epoch": 0.30383203891781174,
"grad_norm": 0.5962036778698983,
"learning_rate": 4.258061569776944e-05,
"loss": 0.4443,
"num_tokens": 94322928.0,
"step": 445
},
{
"epoch": 0.3072458820517197,
"grad_norm": 0.6651032961579146,
"learning_rate": 4.239095719468015e-05,
"loss": 0.4265,
"num_tokens": 96272955.0,
"step": 450
},
{
"epoch": 0.31065972518562773,
"grad_norm": 0.6679632928061942,
"learning_rate": 4.2199397052278467e-05,
"loss": 0.4352,
"num_tokens": 98232888.0,
"step": 455
},
{
"epoch": 0.3140735683195357,
"grad_norm": 0.5411345637950933,
"learning_rate": 4.200595973361888e-05,
"loss": 0.4262,
"num_tokens": 100242672.0,
"step": 460
},
{
"epoch": 0.3174874114534437,
"grad_norm": 0.5125155486830999,
"learning_rate": 4.1810669941479396e-05,
"loss": 0.413,
"num_tokens": 102186114.0,
"step": 465
},
{
"epoch": 0.3209012545873517,
"grad_norm": 0.6282116038580893,
"learning_rate": 4.161355261520683e-05,
"loss": 0.4328,
"num_tokens": 104084923.0,
"step": 470
},
{
"epoch": 0.3243150977212597,
"grad_norm": 0.6010959699090055,
"learning_rate": 4.141463292753199e-05,
"loss": 0.439,
"num_tokens": 106031358.0,
"step": 475
},
{
"epoch": 0.3277289408551677,
"grad_norm": 0.8772786749582367,
"learning_rate": 4.121393628135498e-05,
"loss": 0.431,
"num_tokens": 107924869.0,
"step": 480
},
{
"epoch": 0.3311427839890757,
"grad_norm": 1.0201596112760478,
"learning_rate": 4.101148830650114e-05,
"loss": 0.4439,
"num_tokens": 109870864.0,
"step": 485
},
{
"epoch": 0.3345566271229837,
"grad_norm": 0.559327637241473,
"learning_rate": 4.080731485644804e-05,
"loss": 0.435,
"num_tokens": 111884766.0,
"step": 490
},
{
"epoch": 0.3379704702568917,
"grad_norm": 0.6096167733572574,
"learning_rate": 4.0601442005023856e-05,
"loss": 0.4242,
"num_tokens": 113772365.0,
"step": 495
},
{
"epoch": 0.34138431339079967,
"grad_norm": 0.9108459316259976,
"learning_rate": 4.039389604307762e-05,
"loss": 0.4261,
"num_tokens": 115712538.0,
"step": 500
},
{
"epoch": 0.3447981565247077,
"grad_norm": 0.6525655395918979,
"learning_rate": 4.018470347512177e-05,
"loss": 0.4247,
"num_tokens": 117621912.0,
"step": 505
},
{
"epoch": 0.3482119996586157,
"grad_norm": 0.5854044655049723,
"learning_rate": 3.9973891015947444e-05,
"loss": 0.4303,
"num_tokens": 119575860.0,
"step": 510
},
{
"epoch": 0.3516258427925237,
"grad_norm": 0.6006841042494367,
"learning_rate": 3.976148558721285e-05,
"loss": 0.4121,
"num_tokens": 121468214.0,
"step": 515
},
{
"epoch": 0.3550396859264317,
"grad_norm": 0.54247201922023,
"learning_rate": 3.954751431400524e-05,
"loss": 0.4195,
"num_tokens": 123433327.0,
"step": 520
},
{
"epoch": 0.35845352906033967,
"grad_norm": 0.7322510070215393,
"learning_rate": 3.933200452137698e-05,
"loss": 0.4432,
"num_tokens": 125322825.0,
"step": 525
},
{
"epoch": 0.3618673721942477,
"grad_norm": 0.5768815019253595,
"learning_rate": 3.911498373085596e-05,
"loss": 0.4258,
"num_tokens": 127130221.0,
"step": 530
},
{
"epoch": 0.36528121532815566,
"grad_norm": 0.591651997040178,
"learning_rate": 3.889647965693101e-05,
"loss": 0.4244,
"num_tokens": 128997742.0,
"step": 535
},
{
"epoch": 0.3686950584620637,
"grad_norm": 0.5344728835769015,
"learning_rate": 3.867652020351264e-05,
"loss": 0.4241,
"num_tokens": 130995215.0,
"step": 540
},
{
"epoch": 0.37210890159597165,
"grad_norm": 0.5325744114113397,
"learning_rate": 3.845513346036958e-05,
"loss": 0.4228,
"num_tokens": 133018839.0,
"step": 545
},
{
"epoch": 0.3755227447298797,
"grad_norm": 0.6631108435559407,
"learning_rate": 3.823234769954158e-05,
"loss": 0.421,
"num_tokens": 134832930.0,
"step": 550
},
{
"epoch": 0.37893658786378764,
"grad_norm": 0.6928079184781692,
"learning_rate": 3.8008191371729017e-05,
"loss": 0.4281,
"num_tokens": 136698114.0,
"step": 555
},
{
"epoch": 0.38235043099769567,
"grad_norm": 0.5955134031861724,
"learning_rate": 3.778269310265952e-05,
"loss": 0.4242,
"num_tokens": 138517838.0,
"step": 560
},
{
"epoch": 0.38576427413160363,
"grad_norm": 0.5869430753929097,
"learning_rate": 3.7555881689432424e-05,
"loss": 0.4348,
"num_tokens": 140490351.0,
"step": 565
},
{
"epoch": 0.38917811726551166,
"grad_norm": 0.8509776377325078,
"learning_rate": 3.73277860968412e-05,
"loss": 0.4263,
"num_tokens": 142410473.0,
"step": 570
},
{
"epoch": 0.3925919603994196,
"grad_norm": 0.5861313037171644,
"learning_rate": 3.709843545367456e-05,
"loss": 0.4243,
"num_tokens": 144380368.0,
"step": 575
},
{
"epoch": 0.39600580353332765,
"grad_norm": 0.4286584654592609,
"learning_rate": 3.6867859048996595e-05,
"loss": 0.426,
"num_tokens": 146330816.0,
"step": 580
},
{
"epoch": 0.3994196466672356,
"grad_norm": 0.7753370232383693,
"learning_rate": 3.663608632840638e-05,
"loss": 0.4153,
"num_tokens": 148170239.0,
"step": 585
},
{
"epoch": 0.40283348980114364,
"grad_norm": 0.7689293256752312,
"learning_rate": 3.640314689027768e-05,
"loss": 0.4241,
"num_tokens": 150112731.0,
"step": 590
},
{
"epoch": 0.4062473329350516,
"grad_norm": 0.6665265300249069,
"learning_rate": 3.616907048197917e-05,
"loss": 0.4189,
"num_tokens": 152035344.0,
"step": 595
},
{
"epoch": 0.40966117606895963,
"grad_norm": 0.5723446787640218,
"learning_rate": 3.5933886996075435e-05,
"loss": 0.4167,
"num_tokens": 154002716.0,
"step": 600
},
{
"epoch": 0.41307501920286765,
"grad_norm": 0.6694354277387731,
"learning_rate": 3.5697626466509663e-05,
"loss": 0.4224,
"num_tokens": 155973989.0,
"step": 605
},
{
"epoch": 0.4164888623367756,
"grad_norm": 0.5019251612975679,
"learning_rate": 3.546031906476818e-05,
"loss": 0.4367,
"num_tokens": 157917749.0,
"step": 610
},
{
"epoch": 0.41990270547068365,
"grad_norm": 0.5366028805927413,
"learning_rate": 3.5221995096027335e-05,
"loss": 0.4223,
"num_tokens": 159769673.0,
"step": 615
},
{
"epoch": 0.4233165486045916,
"grad_norm": 0.6307389093268616,
"learning_rate": 3.498268499528351e-05,
"loss": 0.4296,
"num_tokens": 161702363.0,
"step": 620
},
{
"epoch": 0.42673039173849964,
"grad_norm": 0.5185000780295823,
"learning_rate": 3.474241932346637e-05,
"loss": 0.4229,
"num_tokens": 163583753.0,
"step": 625
},
{
"epoch": 0.4301442348724076,
"grad_norm": 0.6074107938423772,
"learning_rate": 3.450122876353609e-05,
"loss": 0.423,
"num_tokens": 165432734.0,
"step": 630
},
{
"epoch": 0.43355807800631563,
"grad_norm": 0.533346072388862,
"learning_rate": 3.42591441165651e-05,
"loss": 0.4341,
"num_tokens": 167313920.0,
"step": 635
},
{
"epoch": 0.4369719211402236,
"grad_norm": 0.5550506682873664,
"learning_rate": 3.4016196297804516e-05,
"loss": 0.414,
"num_tokens": 169298285.0,
"step": 640
},
{
"epoch": 0.4403857642741316,
"grad_norm": 0.5664369905865961,
"learning_rate": 3.3772416332736266e-05,
"loss": 0.4415,
"num_tokens": 171315518.0,
"step": 645
},
{
"epoch": 0.4437996074080396,
"grad_norm": 0.5095443091018487,
"learning_rate": 3.352783535311093e-05,
"loss": 0.4144,
"num_tokens": 173090546.0,
"step": 650
},
{
"epoch": 0.4472134505419476,
"grad_norm": 0.702407462685812,
"learning_rate": 3.3282484592972064e-05,
"loss": 0.4229,
"num_tokens": 175013822.0,
"step": 655
},
{
"epoch": 0.4506272936758556,
"grad_norm": 0.4562607019189676,
"learning_rate": 3.3036395384667545e-05,
"loss": 0.4117,
"num_tokens": 176955808.0,
"step": 660
},
{
"epoch": 0.4540411368097636,
"grad_norm": 0.5524213808614263,
"learning_rate": 3.278959915484822e-05,
"loss": 0.4228,
"num_tokens": 178848221.0,
"step": 665
},
{
"epoch": 0.45745497994367157,
"grad_norm": 0.7175125912985042,
"learning_rate": 3.2542127420454675e-05,
"loss": 0.4205,
"num_tokens": 180778091.0,
"step": 670
},
{
"epoch": 0.4608688230775796,
"grad_norm": 0.5768863903803467,
"learning_rate": 3.229401178469231e-05,
"loss": 0.4087,
"num_tokens": 182763012.0,
"step": 675
},
{
"epoch": 0.46428266621148756,
"grad_norm": 0.5647802515717784,
"learning_rate": 3.2045283932995465e-05,
"loss": 0.4245,
"num_tokens": 184824341.0,
"step": 680
},
{
"epoch": 0.4676965093453956,
"grad_norm": 0.4620392448650056,
"learning_rate": 3.1795975628981164e-05,
"loss": 0.4093,
"num_tokens": 186756487.0,
"step": 685
},
{
"epoch": 0.47111035247930355,
"grad_norm": 0.5118022386772281,
"learning_rate": 3.154611871039264e-05,
"loss": 0.4115,
"num_tokens": 188672319.0,
"step": 690
},
{
"epoch": 0.4745241956132116,
"grad_norm": 0.5561299324608565,
"learning_rate": 3.1295745085033565e-05,
"loss": 0.4215,
"num_tokens": 190589583.0,
"step": 695
},
{
"epoch": 0.47793803874711954,
"grad_norm": 0.4691354993514919,
"learning_rate": 3.104488672669332e-05,
"loss": 0.418,
"num_tokens": 192492174.0,
"step": 700
},
{
"epoch": 0.48135188188102757,
"grad_norm": 0.5755352685196231,
"learning_rate": 3.079357567106375e-05,
"loss": 0.4265,
"num_tokens": 194432665.0,
"step": 705
},
{
"epoch": 0.4847657250149356,
"grad_norm": 0.4711522992107647,
"learning_rate": 3.05418440116481e-05,
"loss": 0.4245,
"num_tokens": 196379920.0,
"step": 710
},
{
"epoch": 0.48817956814884356,
"grad_norm": 0.5520972585859143,
"learning_rate": 3.0289723895662524e-05,
"loss": 0.4285,
"num_tokens": 198398055.0,
"step": 715
},
{
"epoch": 0.4915934112827516,
"grad_norm": 0.6345781690749602,
"learning_rate": 3.0037247519930757e-05,
"loss": 0.4242,
"num_tokens": 200270832.0,
"step": 720
},
{
"epoch": 0.49500725441665955,
"grad_norm": 0.5825312523831861,
"learning_rate": 2.9784447126772437e-05,
"loss": 0.4003,
"num_tokens": 202109491.0,
"step": 725
},
{
"epoch": 0.49842109755056757,
"grad_norm": 0.47408045365479284,
"learning_rate": 2.9531354999885607e-05,
"loss": 0.4178,
"num_tokens": 203930534.0,
"step": 730
},
{
"epoch": 0.5018349406844755,
"grad_norm": 0.5325996103792026,
"learning_rate": 2.9278003460223986e-05,
"loss": 0.4077,
"num_tokens": 205892837.0,
"step": 735
},
{
"epoch": 0.5052487838183836,
"grad_norm": 0.5298839794980272,
"learning_rate": 2.902442486186941e-05,
"loss": 0.4176,
"num_tokens": 207899891.0,
"step": 740
},
{
"epoch": 0.5086626269522916,
"grad_norm": 0.4998105699659344,
"learning_rate": 2.8770651587900078e-05,
"loss": 0.4133,
"num_tokens": 209877182.0,
"step": 745
},
{
"epoch": 0.5120764700861995,
"grad_norm": 0.5133764198248255,
"learning_rate": 2.8516716046255115e-05,
"loss": 0.423,
"num_tokens": 211819088.0,
"step": 750
},
{
"epoch": 0.5154903132201075,
"grad_norm": 0.5310538979020579,
"learning_rate": 2.8262650665595914e-05,
"loss": 0.4019,
"num_tokens": 213708565.0,
"step": 755
},
{
"epoch": 0.5189041563540155,
"grad_norm": 0.5445101655689776,
"learning_rate": 2.800848789116489e-05,
"loss": 0.4149,
"num_tokens": 215508938.0,
"step": 760
},
{
"epoch": 0.5223179994879236,
"grad_norm": 0.6540766089094884,
"learning_rate": 2.775426018064205e-05,
"loss": 0.4203,
"num_tokens": 217491244.0,
"step": 765
},
{
"epoch": 0.5257318426218315,
"grad_norm": 0.5395770305665898,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.408,
"num_tokens": 219339581.0,
"step": 770
},
{
"epoch": 0.5291456857557395,
"grad_norm": 0.48365089981209847,
"learning_rate": 2.7245739819357964e-05,
"loss": 0.4214,
"num_tokens": 221254680.0,
"step": 775
},
{
"epoch": 0.5325595288896475,
"grad_norm": 0.460635944445485,
"learning_rate": 2.699151210883512e-05,
"loss": 0.4141,
"num_tokens": 223144608.0,
"step": 780
},
{
"epoch": 0.5359733720235555,
"grad_norm": 0.4304625584138572,
"learning_rate": 2.6737349334404087e-05,
"loss": 0.4067,
"num_tokens": 225087725.0,
"step": 785
},
{
"epoch": 0.5393872151574635,
"grad_norm": 0.44701240718703694,
"learning_rate": 2.6483283953744897e-05,
"loss": 0.4011,
"num_tokens": 226868036.0,
"step": 790
},
{
"epoch": 0.5428010582913715,
"grad_norm": 0.5014347366716482,
"learning_rate": 2.622934841209993e-05,
"loss": 0.4171,
"num_tokens": 228775587.0,
"step": 795
},
{
"epoch": 0.5462149014252795,
"grad_norm": 0.4975075619084584,
"learning_rate": 2.5975575138130597e-05,
"loss": 0.4181,
"num_tokens": 230614734.0,
"step": 800
},
{
"epoch": 0.5496287445591875,
"grad_norm": 0.5147269313286894,
"learning_rate": 2.5721996539776023e-05,
"loss": 0.4217,
"num_tokens": 232514049.0,
"step": 805
},
{
"epoch": 0.5530425876930956,
"grad_norm": 0.4747545337319586,
"learning_rate": 2.5468645000114395e-05,
"loss": 0.4127,
"num_tokens": 234444977.0,
"step": 810
},
{
"epoch": 0.5564564308270035,
"grad_norm": 0.4903248400626682,
"learning_rate": 2.521555287322757e-05,
"loss": 0.4043,
"num_tokens": 236352441.0,
"step": 815
},
{
"epoch": 0.5598702739609115,
"grad_norm": 0.5597063633430578,
"learning_rate": 2.496275248006925e-05,
"loss": 0.4108,
"num_tokens": 238222102.0,
"step": 820
},
{
"epoch": 0.5632841170948195,
"grad_norm": 0.5100590720976775,
"learning_rate": 2.4710276104337482e-05,
"loss": 0.4031,
"num_tokens": 240193053.0,
"step": 825
},
{
"epoch": 0.5666979602287275,
"grad_norm": 0.5618375369863093,
"learning_rate": 2.4458155988351907e-05,
"loss": 0.4155,
"num_tokens": 242035771.0,
"step": 830
},
{
"epoch": 0.5701118033626354,
"grad_norm": 0.517325444445414,
"learning_rate": 2.420642432893625e-05,
"loss": 0.4081,
"num_tokens": 244022427.0,
"step": 835
},
{
"epoch": 0.5735256464965435,
"grad_norm": 0.42604811387441327,
"learning_rate": 2.395511327330668e-05,
"loss": 0.4099,
"num_tokens": 245932222.0,
"step": 840
},
{
"epoch": 0.5769394896304515,
"grad_norm": 0.49013520367676244,
"learning_rate": 2.370425491496644e-05,
"loss": 0.4029,
"num_tokens": 247827549.0,
"step": 845
},
{
"epoch": 0.5803533327643595,
"grad_norm": 0.4851808594715651,
"learning_rate": 2.3453881289607372e-05,
"loss": 0.4055,
"num_tokens": 249632270.0,
"step": 850
},
{
"epoch": 0.5837671758982674,
"grad_norm": 0.5855416869416588,
"learning_rate": 2.3204024371018844e-05,
"loss": 0.4082,
"num_tokens": 251437745.0,
"step": 855
},
{
"epoch": 0.5871810190321755,
"grad_norm": 0.6044854619444842,
"learning_rate": 2.2954716067004534e-05,
"loss": 0.4095,
"num_tokens": 253372171.0,
"step": 860
},
{
"epoch": 0.5905948621660835,
"grad_norm": 0.5934417052542611,
"learning_rate": 2.2705988215307704e-05,
"loss": 0.4075,
"num_tokens": 255368626.0,
"step": 865
},
{
"epoch": 0.5940087052999915,
"grad_norm": 0.5359964239833354,
"learning_rate": 2.245787257954533e-05,
"loss": 0.3938,
"num_tokens": 257318060.0,
"step": 870
},
{
"epoch": 0.5974225484338994,
"grad_norm": 0.563887986329405,
"learning_rate": 2.221040084515178e-05,
"loss": 0.4065,
"num_tokens": 259300427.0,
"step": 875
},
{
"epoch": 0.6008363915678074,
"grad_norm": 0.4443245904144499,
"learning_rate": 2.1963604615332467e-05,
"loss": 0.3872,
"num_tokens": 261155573.0,
"step": 880
},
{
"epoch": 0.6042502347017155,
"grad_norm": 0.39956918103747635,
"learning_rate": 2.1717515407027938e-05,
"loss": 0.4014,
"num_tokens": 262948392.0,
"step": 885
},
{
"epoch": 0.6076640778356235,
"grad_norm": 0.4591793649544128,
"learning_rate": 2.147216464688907e-05,
"loss": 0.4134,
"num_tokens": 264928246.0,
"step": 890
},
{
"epoch": 0.6110779209695314,
"grad_norm": 0.5128192459967758,
"learning_rate": 2.1227583667263733e-05,
"loss": 0.4125,
"num_tokens": 266763637.0,
"step": 895
},
{
"epoch": 0.6144917641034394,
"grad_norm": 0.5521676175422746,
"learning_rate": 2.0983803702195486e-05,
"loss": 0.4011,
"num_tokens": 268823568.0,
"step": 900
},
{
"epoch": 0.6179056072373474,
"grad_norm": 0.49824193142191775,
"learning_rate": 2.0740855883434913e-05,
"loss": 0.4119,
"num_tokens": 270796270.0,
"step": 905
},
{
"epoch": 0.6213194503712555,
"grad_norm": 0.5108779953936516,
"learning_rate": 2.049877123646391e-05,
"loss": 0.4062,
"num_tokens": 272675016.0,
"step": 910
},
{
"epoch": 0.6247332935051635,
"grad_norm": 0.5974402147887337,
"learning_rate": 2.0257580676533637e-05,
"loss": 0.4126,
"num_tokens": 274528787.0,
"step": 915
},
{
"epoch": 0.6281471366390714,
"grad_norm": 0.49396513244608686,
"learning_rate": 2.0017315004716493e-05,
"loss": 0.3936,
"num_tokens": 276418125.0,
"step": 920
},
{
"epoch": 0.6315609797729794,
"grad_norm": 0.487366764504166,
"learning_rate": 1.9778004903972667e-05,
"loss": 0.4004,
"num_tokens": 278341722.0,
"step": 925
},
{
"epoch": 0.6349748229068874,
"grad_norm": 0.4711689645645803,
"learning_rate": 1.953968093523183e-05,
"loss": 0.397,
"num_tokens": 280248165.0,
"step": 930
},
{
"epoch": 0.6383886660407955,
"grad_norm": 0.4537279287511967,
"learning_rate": 1.9302373533490335e-05,
"loss": 0.4129,
"num_tokens": 282166215.0,
"step": 935
},
{
"epoch": 0.6418025091747034,
"grad_norm": 0.586245477819875,
"learning_rate": 1.9066113003924574e-05,
"loss": 0.411,
"num_tokens": 284060529.0,
"step": 940
},
{
"epoch": 0.6452163523086114,
"grad_norm": 0.5613407219001563,
"learning_rate": 1.8830929518020833e-05,
"loss": 0.3931,
"num_tokens": 285954023.0,
"step": 945
},
{
"epoch": 0.6486301954425194,
"grad_norm": 0.5399254636998788,
"learning_rate": 1.8596853109722323e-05,
"loss": 0.399,
"num_tokens": 287819237.0,
"step": 950
},
{
"epoch": 0.6520440385764275,
"grad_norm": 0.4520412022868816,
"learning_rate": 1.836391367159364e-05,
"loss": 0.3962,
"num_tokens": 289657591.0,
"step": 955
},
{
"epoch": 0.6554578817103354,
"grad_norm": 0.5520302927407486,
"learning_rate": 1.8132140951003414e-05,
"loss": 0.3897,
"num_tokens": 291512633.0,
"step": 960
},
{
"epoch": 0.6588717248442434,
"grad_norm": 0.4729609184343458,
"learning_rate": 1.790156454632544e-05,
"loss": 0.4123,
"num_tokens": 293487255.0,
"step": 965
},
{
"epoch": 0.6622855679781514,
"grad_norm": 0.4638796086042126,
"learning_rate": 1.7672213903158813e-05,
"loss": 0.4007,
"num_tokens": 295378178.0,
"step": 970
},
{
"epoch": 0.6656994111120594,
"grad_norm": 0.4004041498754247,
"learning_rate": 1.744411831056758e-05,
"loss": 0.3868,
"num_tokens": 297228147.0,
"step": 975
},
{
"epoch": 0.6691132542459673,
"grad_norm": 0.4693011092079871,
"learning_rate": 1.721730689734049e-05,
"loss": 0.4024,
"num_tokens": 299233885.0,
"step": 980
},
{
"epoch": 0.6725270973798754,
"grad_norm": 0.5111518020676975,
"learning_rate": 1.699180862827099e-05,
"loss": 0.3877,
"num_tokens": 301106661.0,
"step": 985
},
{
"epoch": 0.6759409405137834,
"grad_norm": 0.4806204434046691,
"learning_rate": 1.6767652300458417e-05,
"loss": 0.3903,
"num_tokens": 303007424.0,
"step": 990
},
{
"epoch": 0.6793547836476914,
"grad_norm": 0.4566261498657525,
"learning_rate": 1.654486653963043e-05,
"loss": 0.384,
"num_tokens": 304932074.0,
"step": 995
},
{
"epoch": 0.6827686267815993,
"grad_norm": 0.42269960844619753,
"learning_rate": 1.632347979648737e-05,
"loss": 0.4007,
"num_tokens": 306794512.0,
"step": 1000
},
{
"epoch": 0.6861824699155074,
"grad_norm": 0.6082446933491564,
"learning_rate": 1.6103520343068995e-05,
"loss": 0.4114,
"num_tokens": 308710513.0,
"step": 1005
},
{
"epoch": 0.6895963130494154,
"grad_norm": 0.5745984360238439,
"learning_rate": 1.588501626914404e-05,
"loss": 0.3886,
"num_tokens": 310538563.0,
"step": 1010
},
{
"epoch": 0.6930101561833234,
"grad_norm": 0.688939868912826,
"learning_rate": 1.5667995478623027e-05,
"loss": 0.4006,
"num_tokens": 312422847.0,
"step": 1015
},
{
"epoch": 0.6964239993172314,
"grad_norm": 0.5455006930867037,
"learning_rate": 1.5452485685994766e-05,
"loss": 0.392,
"num_tokens": 314304209.0,
"step": 1020
},
{
"epoch": 0.6998378424511393,
"grad_norm": 0.49975087118690814,
"learning_rate": 1.5238514412787158e-05,
"loss": 0.4034,
"num_tokens": 316294604.0,
"step": 1025
},
{
"epoch": 0.7032516855850474,
"grad_norm": 0.39649261290017973,
"learning_rate": 1.5026108984052565e-05,
"loss": 0.3958,
"num_tokens": 318158111.0,
"step": 1030
},
{
"epoch": 0.7066655287189554,
"grad_norm": 0.5102610944466994,
"learning_rate": 1.4815296524878236e-05,
"loss": 0.4049,
"num_tokens": 319996964.0,
"step": 1035
},
{
"epoch": 0.7100793718528634,
"grad_norm": 0.49044740180990587,
"learning_rate": 1.4606103956922388e-05,
"loss": 0.4113,
"num_tokens": 321897848.0,
"step": 1040
},
{
"epoch": 0.7134932149867713,
"grad_norm": 0.5610768682384292,
"learning_rate": 1.4398557994976153e-05,
"loss": 0.3901,
"num_tokens": 323836050.0,
"step": 1045
},
{
"epoch": 0.7169070581206793,
"grad_norm": 0.45115509497097084,
"learning_rate": 1.419268514355197e-05,
"loss": 0.4047,
"num_tokens": 325673763.0,
"step": 1050
},
{
"epoch": 0.7203209012545874,
"grad_norm": 0.4284920867269521,
"learning_rate": 1.3988511693498868e-05,
"loss": 0.3869,
"num_tokens": 327548131.0,
"step": 1055
},
{
"epoch": 0.7237347443884954,
"grad_norm": 0.49464944285568574,
"learning_rate": 1.3786063718645027e-05,
"loss": 0.3977,
"num_tokens": 329443563.0,
"step": 1060
},
{
"epoch": 0.7271485875224033,
"grad_norm": 0.5710280894381398,
"learning_rate": 1.3585367072468014e-05,
"loss": 0.3973,
"num_tokens": 331374265.0,
"step": 1065
},
{
"epoch": 0.7305624306563113,
"grad_norm": 0.424309277242454,
"learning_rate": 1.3386447384793166e-05,
"loss": 0.3972,
"num_tokens": 333352485.0,
"step": 1070
},
{
"epoch": 0.7339762737902193,
"grad_norm": 0.4788996741106936,
"learning_rate": 1.3189330058520605e-05,
"loss": 0.4041,
"num_tokens": 335215438.0,
"step": 1075
},
{
"epoch": 0.7373901169241274,
"grad_norm": 0.4342833991078278,
"learning_rate": 1.2994040266381124e-05,
"loss": 0.4003,
"num_tokens": 337161669.0,
"step": 1080
},
{
"epoch": 0.7408039600580353,
"grad_norm": 0.43204091990934734,
"learning_rate": 1.280060294772154e-05,
"loss": 0.4081,
"num_tokens": 339139569.0,
"step": 1085
},
{
"epoch": 0.7442178031919433,
"grad_norm": 0.36722220839525554,
"learning_rate": 1.2609042805319848e-05,
"loss": 0.3982,
"num_tokens": 341159421.0,
"step": 1090
},
{
"epoch": 0.7476316463258513,
"grad_norm": 0.45921341384234504,
"learning_rate": 1.2419384302230562e-05,
"loss": 0.3941,
"num_tokens": 343058673.0,
"step": 1095
},
{
"epoch": 0.7510454894597594,
"grad_norm": 0.4734058543110404,
"learning_rate": 1.2231651658660653e-05,
"loss": 0.3853,
"num_tokens": 344849449.0,
"step": 1100
},
{
"epoch": 0.7544593325936674,
"grad_norm": 0.4426908856722868,
"learning_rate": 1.2045868848876554e-05,
"loss": 0.3867,
"num_tokens": 346706798.0,
"step": 1105
},
{
"epoch": 0.7578731757275753,
"grad_norm": 0.5059105436600692,
"learning_rate": 1.1862059598142537e-05,
"loss": 0.3928,
"num_tokens": 348493564.0,
"step": 1110
},
{
"epoch": 0.7612870188614833,
"grad_norm": 0.4830390714316328,
"learning_rate": 1.1680247379690893e-05,
"loss": 0.3871,
"num_tokens": 350327439.0,
"step": 1115
},
{
"epoch": 0.7647008619953913,
"grad_norm": 0.383477127300196,
"learning_rate": 1.1500455411724277e-05,
"loss": 0.3839,
"num_tokens": 352222780.0,
"step": 1120
},
{
"epoch": 0.7681147051292994,
"grad_norm": 0.4192236038559293,
"learning_rate": 1.1322706654450693e-05,
"loss": 0.387,
"num_tokens": 354158243.0,
"step": 1125
},
{
"epoch": 0.7715285482632073,
"grad_norm": 0.4225409862113959,
"learning_rate": 1.1147023807151319e-05,
"loss": 0.3882,
"num_tokens": 356121045.0,
"step": 1130
},
{
"epoch": 0.7749423913971153,
"grad_norm": 0.4618680342672497,
"learning_rate": 1.0973429305281755e-05,
"loss": 0.392,
"num_tokens": 358052013.0,
"step": 1135
},
{
"epoch": 0.7783562345310233,
"grad_norm": 0.4145071434270498,
"learning_rate": 1.080194531760691e-05,
"loss": 0.3845,
"num_tokens": 359922588.0,
"step": 1140
},
{
"epoch": 0.7817700776649313,
"grad_norm": 0.4648820991648914,
"learning_rate": 1.063259374336993e-05,
"loss": 0.3907,
"num_tokens": 361916248.0,
"step": 1145
},
{
"epoch": 0.7851839207988393,
"grad_norm": 0.42609495826749355,
"learning_rate": 1.0465396209495592e-05,
"loss": 0.3858,
"num_tokens": 363863339.0,
"step": 1150
},
{
"epoch": 0.7885977639327473,
"grad_norm": 0.39598818557393706,
"learning_rate": 1.0300374067828463e-05,
"loss": 0.3971,
"num_tokens": 365987286.0,
"step": 1155
},
{
"epoch": 0.7920116070666553,
"grad_norm": 0.5378540325321663,
"learning_rate": 1.0137548392406157e-05,
"loss": 0.4006,
"num_tokens": 367915269.0,
"step": 1160
},
{
"epoch": 0.7954254502005633,
"grad_norm": 0.5261694039722628,
"learning_rate": 9.976939976768094e-06,
"loss": 0.391,
"num_tokens": 369748560.0,
"step": 1165
},
{
"epoch": 0.7988392933344712,
"grad_norm": 0.45871697845064263,
"learning_rate": 9.81856933130007e-06,
"loss": 0.3884,
"num_tokens": 371728452.0,
"step": 1170
},
{
"epoch": 0.8022531364683793,
"grad_norm": 0.3849106883948326,
"learning_rate": 9.662456680615026e-06,
"loss": 0.3919,
"num_tokens": 373546125.0,
"step": 1175
},
{
"epoch": 0.8056669796022873,
"grad_norm": 0.4069296296834249,
"learning_rate": 9.50862196097022e-06,
"loss": 0.411,
"num_tokens": 375581054.0,
"step": 1180
},
{
"epoch": 0.8090808227361953,
"grad_norm": 0.3774090848188763,
"learning_rate": 9.357084817721343e-06,
"loss": 0.3908,
"num_tokens": 377458399.0,
"step": 1185
},
{
"epoch": 0.8124946658701032,
"grad_norm": 0.46564290335514785,
"learning_rate": 9.207864602813684e-06,
"loss": 0.388,
"num_tokens": 379400003.0,
"step": 1190
},
{
"epoch": 0.8159085090040112,
"grad_norm": 0.38385099187145405,
"learning_rate": 9.060980372310805e-06,
"loss": 0.3774,
"num_tokens": 381337873.0,
"step": 1195
},
{
"epoch": 0.8193223521379193,
"grad_norm": 0.5024542029662248,
"learning_rate": 8.916450883961005e-06,
"loss": 0.3955,
"num_tokens": 383250150.0,
"step": 1200
},
{
"epoch": 0.8227361952718273,
"grad_norm": 0.40892869928158526,
"learning_rate": 8.77429459480189e-06,
"loss": 0.4013,
"num_tokens": 385155779.0,
"step": 1205
},
{
"epoch": 0.8261500384057353,
"grad_norm": 0.3945484962557817,
"learning_rate": 8.634529658803322e-06,
"loss": 0.3802,
"num_tokens": 386988785.0,
"step": 1210
},
{
"epoch": 0.8295638815396432,
"grad_norm": 0.385950673206825,
"learning_rate": 8.497173924549042e-06,
"loss": 0.3825,
"num_tokens": 388925343.0,
"step": 1215
},
{
"epoch": 0.8329777246735512,
"grad_norm": 0.38739675307188914,
"learning_rate": 8.362244932957402e-06,
"loss": 0.3937,
"num_tokens": 390919568.0,
"step": 1220
},
{
"epoch": 0.8363915678074593,
"grad_norm": 0.4115485385386184,
"learning_rate": 8.229759915041243e-06,
"loss": 0.3883,
"num_tokens": 392910905.0,
"step": 1225
},
{
"epoch": 0.8398054109413673,
"grad_norm": 0.4475212067282627,
"learning_rate": 8.099735789707462e-06,
"loss": 0.3808,
"num_tokens": 394743801.0,
"step": 1230
},
{
"epoch": 0.8432192540752752,
"grad_norm": 0.4081804601752203,
"learning_rate": 7.97218916159638e-06,
"loss": 0.3852,
"num_tokens": 396575417.0,
"step": 1235
},
{
"epoch": 0.8466330972091832,
"grad_norm": 0.4183809315204868,
"learning_rate": 7.847136318961276e-06,
"loss": 0.3812,
"num_tokens": 398541087.0,
"step": 1240
},
{
"epoch": 0.8500469403430913,
"grad_norm": 0.40295579325617725,
"learning_rate": 7.724593231588272e-06,
"loss": 0.3934,
"num_tokens": 400489921.0,
"step": 1245
},
{
"epoch": 0.8534607834769993,
"grad_norm": 0.45804469948779925,
"learning_rate": 7.604575548756949e-06,
"loss": 0.3994,
"num_tokens": 402379009.0,
"step": 1250
},
{
"epoch": 0.8568746266109072,
"grad_norm": 0.38882818799193014,
"learning_rate": 7.487098597241871e-06,
"loss": 0.3908,
"num_tokens": 404352878.0,
"step": 1255
},
{
"epoch": 0.8602884697448152,
"grad_norm": 0.36846595203062904,
"learning_rate": 7.372177379355269e-06,
"loss": 0.3947,
"num_tokens": 406330141.0,
"step": 1260
},
{
"epoch": 0.8637023128787232,
"grad_norm": 0.42947478533888517,
"learning_rate": 7.259826571031192e-06,
"loss": 0.3862,
"num_tokens": 408204838.0,
"step": 1265
},
{
"epoch": 0.8671161560126313,
"grad_norm": 0.3878495259462103,
"learning_rate": 7.150060519951341e-06,
"loss": 0.4017,
"num_tokens": 410160328.0,
"step": 1270
},
{
"epoch": 0.8705299991465392,
"grad_norm": 0.37081146658298825,
"learning_rate": 7.042893243712772e-06,
"loss": 0.3815,
"num_tokens": 412112871.0,
"step": 1275
},
{
"epoch": 0.8739438422804472,
"grad_norm": 0.37469108073137014,
"learning_rate": 6.938338428037822e-06,
"loss": 0.3801,
"num_tokens": 414056837.0,
"step": 1280
},
{
"epoch": 0.8773576854143552,
"grad_norm": 0.4484507788307278,
"learning_rate": 6.836409425026376e-06,
"loss": 0.3815,
"num_tokens": 415976407.0,
"step": 1285
},
{
"epoch": 0.8807715285482632,
"grad_norm": 0.42468934286329335,
"learning_rate": 6.737119251450741e-06,
"loss": 0.3836,
"num_tokens": 417897727.0,
"step": 1290
},
{
"epoch": 0.8841853716821712,
"grad_norm": 0.3620037642681858,
"learning_rate": 6.640480587093342e-06,
"loss": 0.4026,
"num_tokens": 419994110.0,
"step": 1295
},
{
"epoch": 0.8875992148160792,
"grad_norm": 0.38344852481107256,
"learning_rate": 6.546505773127476e-06,
"loss": 0.3797,
"num_tokens": 421924847.0,
"step": 1300
},
{
"epoch": 0.8910130579499872,
"grad_norm": 0.3584332646483648,
"learning_rate": 6.455206810541276e-06,
"loss": 0.387,
"num_tokens": 423915098.0,
"step": 1305
},
{
"epoch": 0.8944269010838952,
"grad_norm": 0.42116701870642653,
"learning_rate": 6.366595358605142e-06,
"loss": 0.3832,
"num_tokens": 425893344.0,
"step": 1310
},
{
"epoch": 0.8978407442178032,
"grad_norm": 0.3521494410121148,
"learning_rate": 6.280682733382796e-06,
"loss": 0.394,
"num_tokens": 427909649.0,
"step": 1315
},
{
"epoch": 0.9012545873517112,
"grad_norm": 0.3983072854935981,
"learning_rate": 6.197479906286184e-06,
"loss": 0.3819,
"num_tokens": 429848299.0,
"step": 1320
},
{
"epoch": 0.9046684304856192,
"grad_norm": 0.3595529616776975,
"learning_rate": 6.116997502674356e-06,
"loss": 0.3836,
"num_tokens": 431823107.0,
"step": 1325
},
{
"epoch": 0.9080822736195272,
"grad_norm": 0.42462919737239924,
"learning_rate": 6.039245800496585e-06,
"loss": 0.3842,
"num_tokens": 433654195.0,
"step": 1330
},
{
"epoch": 0.9114961167534352,
"grad_norm": 0.5008600053663176,
"learning_rate": 5.964234728979824e-06,
"loss": 0.3855,
"num_tokens": 435602596.0,
"step": 1335
},
{
"epoch": 0.9149099598873431,
"grad_norm": 0.4939861659729308,
"learning_rate": 5.8919738673606936e-06,
"loss": 0.3968,
"num_tokens": 437539406.0,
"step": 1340
},
{
"epoch": 0.9183238030212512,
"grad_norm": 0.418502345365578,
"learning_rate": 5.8224724436621695e-06,
"loss": 0.377,
"num_tokens": 439392309.0,
"step": 1345
},
{
"epoch": 0.9217376461551592,
"grad_norm": 0.3865370260878832,
"learning_rate": 5.75573933351514e-06,
"loss": 0.3907,
"num_tokens": 441289639.0,
"step": 1350
},
{
"epoch": 0.9251514892890672,
"grad_norm": 0.36155969808913657,
"learning_rate": 5.6917830590249315e-06,
"loss": 0.3939,
"num_tokens": 443217058.0,
"step": 1355
},
{
"epoch": 0.9285653324229751,
"grad_norm": 0.4469202947870825,
"learning_rate": 5.63061178768302e-06,
"loss": 0.3908,
"num_tokens": 445046539.0,
"step": 1360
},
{
"epoch": 0.9319791755568831,
"grad_norm": 0.36026663716494844,
"learning_rate": 5.5722333313239804e-06,
"loss": 0.3765,
"num_tokens": 446908896.0,
"step": 1365
},
{
"epoch": 0.9353930186907912,
"grad_norm": 0.4386391881600453,
"learning_rate": 5.5166551451279065e-06,
"loss": 0.3846,
"num_tokens": 448884969.0,
"step": 1370
},
{
"epoch": 0.9388068618246992,
"grad_norm": 0.3656587817230275,
"learning_rate": 5.463884326668339e-06,
"loss": 0.3884,
"num_tokens": 450849803.0,
"step": 1375
},
{
"epoch": 0.9422207049586071,
"grad_norm": 0.473339948542882,
"learning_rate": 5.413927615005879e-06,
"loss": 0.3918,
"num_tokens": 452778895.0,
"step": 1380
},
{
"epoch": 0.9456345480925151,
"grad_norm": 0.4327834800966883,
"learning_rate": 5.366791389827578e-06,
"loss": 0.3901,
"num_tokens": 454738409.0,
"step": 1385
},
{
"epoch": 0.9490483912264231,
"grad_norm": 0.38831303378458926,
"learning_rate": 5.322481670632229e-06,
"loss": 0.3868,
"num_tokens": 456587467.0,
"step": 1390
},
{
"epoch": 0.9524622343603312,
"grad_norm": 0.3682504260150772,
"learning_rate": 5.281004115961642e-06,
"loss": 0.397,
"num_tokens": 458487641.0,
"step": 1395
},
{
"epoch": 0.9558760774942391,
"grad_norm": 0.45399594742338995,
"learning_rate": 5.242364022678038e-06,
"loss": 0.3822,
"num_tokens": 460386310.0,
"step": 1400
},
{
"epoch": 0.9592899206281471,
"grad_norm": 0.36213676752021623,
"learning_rate": 5.206566325287607e-06,
"loss": 0.375,
"num_tokens": 462390119.0,
"step": 1405
},
{
"epoch": 0.9627037637620551,
"grad_norm": 0.3623211794036725,
"learning_rate": 5.173615595310344e-06,
"loss": 0.3797,
"num_tokens": 464283440.0,
"step": 1410
},
{
"epoch": 0.9661176068959632,
"grad_norm": 0.40375519775096763,
"learning_rate": 5.143516040696265e-06,
"loss": 0.3849,
"num_tokens": 466103896.0,
"step": 1415
},
{
"epoch": 0.9695314500298712,
"grad_norm": 0.3806537371766384,
"learning_rate": 5.116271505288018e-06,
"loss": 0.3876,
"num_tokens": 467974167.0,
"step": 1420
},
{
"epoch": 0.9729452931637791,
"grad_norm": 0.36856238291556104,
"learning_rate": 5.0918854683300105e-06,
"loss": 0.3965,
"num_tokens": 469915807.0,
"step": 1425
},
{
"epoch": 0.9763591362976871,
"grad_norm": 0.4291715872204905,
"learning_rate": 5.070361044024103e-06,
"loss": 0.3891,
"num_tokens": 471734184.0,
"step": 1430
},
{
"epoch": 0.9797729794315951,
"grad_norm": 0.3580402114466082,
"learning_rate": 5.051700981131903e-06,
"loss": 0.3829,
"num_tokens": 473649866.0,
"step": 1435
},
{
"epoch": 0.9831868225655032,
"grad_norm": 0.3475223629471517,
"learning_rate": 5.035907662623737e-06,
"loss": 0.3853,
"num_tokens": 475543359.0,
"step": 1440
},
{
"epoch": 0.9866006656994111,
"grad_norm": 0.39539394938410216,
"learning_rate": 5.02298310537434e-06,
"loss": 0.3862,
"num_tokens": 477474338.0,
"step": 1445
},
{
"epoch": 0.9900145088333191,
"grad_norm": 0.3692684131085188,
"learning_rate": 5.0129289599052915e-06,
"loss": 0.3717,
"num_tokens": 479401233.0,
"step": 1450
},
{
"epoch": 0.9934283519672271,
"grad_norm": 0.4177627041474193,
"learning_rate": 5.005746510174235e-06,
"loss": 0.3924,
"num_tokens": 481235568.0,
"step": 1455
},
{
"epoch": 0.9968421951011351,
"grad_norm": 0.3671477184652084,
"learning_rate": 5.001436673410903e-06,
"loss": 0.3852,
"num_tokens": 483184723.0,
"step": 1460
},
{
"epoch": 0.9995732696082615,
"step": 1464,
"total_flos": 2.8225135896499847e+19,
"train_loss": 0.0,
"train_runtime": 1.6907,
"train_samples_per_second": 55441.67,
"train_steps_per_second": 865.934
}
],
"logging_steps": 5,
"max_steps": 1464,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8225135896499847e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}