{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9999854016729683, "eval_steps": 500, "global_step": 102750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014598327031722165, "grad_norm": 17.248261527435126, "learning_rate": 2.4330900243309006e-08, "loss": 1.979, "step": 5 }, { "epoch": 0.0002919665406344433, "grad_norm": 19.627295806790222, "learning_rate": 4.866180048661801e-08, "loss": 2.1697, "step": 10 }, { "epoch": 0.00043794981095166496, "grad_norm": 19.027712030076067, "learning_rate": 7.299270072992701e-08, "loss": 2.0359, "step": 15 }, { "epoch": 0.0005839330812688866, "grad_norm": 19.13509051453691, "learning_rate": 9.732360097323602e-08, "loss": 2.0142, "step": 20 }, { "epoch": 0.0007299163515861083, "grad_norm": 14.98706847595891, "learning_rate": 1.21654501216545e-07, "loss": 1.9433, "step": 25 }, { "epoch": 0.0008758996219033299, "grad_norm": 17.50374739052231, "learning_rate": 1.4598540145985402e-07, "loss": 2.0821, "step": 30 }, { "epoch": 0.0010218828922205516, "grad_norm": 23.180932329465502, "learning_rate": 1.7031630170316303e-07, "loss": 2.1079, "step": 35 }, { "epoch": 0.0011678661625377732, "grad_norm": 12.684886944635224, "learning_rate": 1.9464720194647204e-07, "loss": 2.0209, "step": 40 }, { "epoch": 0.0013138494328549949, "grad_norm": 18.360116042756843, "learning_rate": 2.1897810218978106e-07, "loss": 2.006, "step": 45 }, { "epoch": 0.0014598327031722165, "grad_norm": 15.538193220468331, "learning_rate": 2.4330900243309e-07, "loss": 1.8976, "step": 50 }, { "epoch": 0.0016058159734894382, "grad_norm": 16.741550852491084, "learning_rate": 2.6763990267639905e-07, "loss": 2.0428, "step": 55 }, { "epoch": 0.0017517992438066598, "grad_norm": 14.070233960696449, "learning_rate": 2.9197080291970804e-07, "loss": 1.8271, "step": 60 }, { "epoch": 0.0018977825141238815, "grad_norm": 14.209161598610043, "learning_rate": 3.163017031630171e-07, "loss": 1.9059, "step": 65 }, { "epoch": 0.002043765784441103, "grad_norm": 14.964224055815833, "learning_rate": 3.4063260340632607e-07, "loss": 1.704, "step": 70 }, { "epoch": 0.0021897490547583246, "grad_norm": 10.656628420539201, "learning_rate": 3.6496350364963505e-07, "loss": 1.6516, "step": 75 }, { "epoch": 0.0023357323250755464, "grad_norm": 9.850317896728008, "learning_rate": 3.892944038929441e-07, "loss": 1.6478, "step": 80 }, { "epoch": 0.002481715595392768, "grad_norm": 6.439691166082175, "learning_rate": 4.13625304136253e-07, "loss": 1.4833, "step": 85 }, { "epoch": 0.0026276988657099897, "grad_norm": 4.899448105568026, "learning_rate": 4.379562043795621e-07, "loss": 1.3485, "step": 90 }, { "epoch": 0.002773682136027211, "grad_norm": 4.3858539236842375, "learning_rate": 4.622871046228711e-07, "loss": 1.3588, "step": 95 }, { "epoch": 0.002919665406344433, "grad_norm": 3.188002518648283, "learning_rate": 4.8661800486618e-07, "loss": 1.2154, "step": 100 }, { "epoch": 0.0030656486766616545, "grad_norm": 3.567712703134465, "learning_rate": 5.109489051094891e-07, "loss": 1.3257, "step": 105 }, { "epoch": 0.0032116319469788763, "grad_norm": 2.1985356713541946, "learning_rate": 5.352798053527981e-07, "loss": 1.2593, "step": 110 }, { "epoch": 0.0033576152172960978, "grad_norm": 1.839367519805932, "learning_rate": 5.59610705596107e-07, "loss": 1.2081, "step": 115 }, { "epoch": 0.0035035984876133196, "grad_norm": 1.819930677952983, "learning_rate": 5.839416058394161e-07, "loss": 1.1885, "step": 120 }, { "epoch": 0.003649581757930541, "grad_norm": 1.848506042018173, "learning_rate": 6.082725060827251e-07, "loss": 1.1986, "step": 125 }, { "epoch": 0.003795565028247763, "grad_norm": 1.5262048159798136, "learning_rate": 6.326034063260342e-07, "loss": 1.156, "step": 130 }, { "epoch": 0.003941548298564985, "grad_norm": 1.565504250388118, "learning_rate": 6.569343065693431e-07, "loss": 1.0814, "step": 135 }, { "epoch": 0.004087531568882206, "grad_norm": 1.5251390374805636, "learning_rate": 6.812652068126521e-07, "loss": 1.1161, "step": 140 }, { "epoch": 0.004233514839199428, "grad_norm": 1.4845848494981926, "learning_rate": 7.055961070559611e-07, "loss": 1.1046, "step": 145 }, { "epoch": 0.004379498109516649, "grad_norm": 1.430471633533247, "learning_rate": 7.299270072992701e-07, "loss": 1.0299, "step": 150 }, { "epoch": 0.004525481379833871, "grad_norm": 1.2669949211900218, "learning_rate": 7.54257907542579e-07, "loss": 1.0546, "step": 155 }, { "epoch": 0.004671464650151093, "grad_norm": 1.1896786441745557, "learning_rate": 7.785888077858882e-07, "loss": 0.9991, "step": 160 }, { "epoch": 0.004817447920468314, "grad_norm": 1.1972329816689211, "learning_rate": 8.029197080291971e-07, "loss": 1.0447, "step": 165 }, { "epoch": 0.004963431190785536, "grad_norm": 1.1716525361354055, "learning_rate": 8.27250608272506e-07, "loss": 0.9815, "step": 170 }, { "epoch": 0.005109414461102758, "grad_norm": 1.2015124939615707, "learning_rate": 8.515815085158151e-07, "loss": 1.0033, "step": 175 }, { "epoch": 0.0052553977314199795, "grad_norm": 1.069873860952257, "learning_rate": 8.759124087591242e-07, "loss": 0.9258, "step": 180 }, { "epoch": 0.005401381001737201, "grad_norm": 1.1748585863836318, "learning_rate": 9.002433090024332e-07, "loss": 0.937, "step": 185 }, { "epoch": 0.005547364272054422, "grad_norm": 1.2293604496752308, "learning_rate": 9.245742092457422e-07, "loss": 0.956, "step": 190 }, { "epoch": 0.005693347542371645, "grad_norm": 1.2532152211900447, "learning_rate": 9.489051094890511e-07, "loss": 0.9413, "step": 195 }, { "epoch": 0.005839330812688866, "grad_norm": 1.1938540755550202, "learning_rate": 9.7323600973236e-07, "loss": 0.8888, "step": 200 }, { "epoch": 0.0059853140830060875, "grad_norm": 1.206782313025968, "learning_rate": 9.97566909975669e-07, "loss": 1.0088, "step": 205 }, { "epoch": 0.006131297353323309, "grad_norm": 1.0688286098760433, "learning_rate": 1.0218978102189781e-06, "loss": 0.9468, "step": 210 }, { "epoch": 0.00627728062364053, "grad_norm": 1.1157350567374333, "learning_rate": 1.0462287104622873e-06, "loss": 0.9127, "step": 215 }, { "epoch": 0.006423263893957753, "grad_norm": 1.1637070826547746, "learning_rate": 1.0705596107055962e-06, "loss": 0.8953, "step": 220 }, { "epoch": 0.006569247164274974, "grad_norm": 1.0728031590164238, "learning_rate": 1.0948905109489052e-06, "loss": 0.8908, "step": 225 }, { "epoch": 0.0067152304345921955, "grad_norm": 1.1063978917729096, "learning_rate": 1.119221411192214e-06, "loss": 0.9136, "step": 230 }, { "epoch": 0.006861213704909417, "grad_norm": 1.1004940068983993, "learning_rate": 1.1435523114355232e-06, "loss": 0.8573, "step": 235 }, { "epoch": 0.007007196975226639, "grad_norm": 1.152946422281967, "learning_rate": 1.1678832116788322e-06, "loss": 0.8886, "step": 240 }, { "epoch": 0.007153180245543861, "grad_norm": 1.2669785316375102, "learning_rate": 1.192214111922141e-06, "loss": 0.9035, "step": 245 }, { "epoch": 0.007299163515861082, "grad_norm": 1.0673108133978506, "learning_rate": 1.2165450121654502e-06, "loss": 0.8707, "step": 250 }, { "epoch": 0.007445146786178304, "grad_norm": 0.9603149216210788, "learning_rate": 1.2408759124087592e-06, "loss": 0.9042, "step": 255 }, { "epoch": 0.007591130056495526, "grad_norm": 1.1191932529381867, "learning_rate": 1.2652068126520683e-06, "loss": 0.8352, "step": 260 }, { "epoch": 0.007737113326812747, "grad_norm": 1.0520678748394148, "learning_rate": 1.289537712895377e-06, "loss": 0.8662, "step": 265 }, { "epoch": 0.00788309659712997, "grad_norm": 1.1239141153067842, "learning_rate": 1.3138686131386862e-06, "loss": 0.8559, "step": 270 }, { "epoch": 0.00802907986744719, "grad_norm": 1.1502318234173345, "learning_rate": 1.3381995133819951e-06, "loss": 0.8131, "step": 275 }, { "epoch": 0.008175063137764412, "grad_norm": 1.1218149785688973, "learning_rate": 1.3625304136253043e-06, "loss": 0.8727, "step": 280 }, { "epoch": 0.008321046408081633, "grad_norm": 1.1206068842484291, "learning_rate": 1.3868613138686132e-06, "loss": 0.9182, "step": 285 }, { "epoch": 0.008467029678398855, "grad_norm": 1.0725810633194763, "learning_rate": 1.4111922141119221e-06, "loss": 0.8251, "step": 290 }, { "epoch": 0.008613012948716078, "grad_norm": 1.0990732314075116, "learning_rate": 1.4355231143552313e-06, "loss": 0.7909, "step": 295 }, { "epoch": 0.008758996219033298, "grad_norm": 1.152447214566088, "learning_rate": 1.4598540145985402e-06, "loss": 0.8336, "step": 300 }, { "epoch": 0.00890497948935052, "grad_norm": 1.1364129687488391, "learning_rate": 1.4841849148418493e-06, "loss": 0.897, "step": 305 }, { "epoch": 0.009050962759667743, "grad_norm": 1.1417722799073817, "learning_rate": 1.508515815085158e-06, "loss": 0.8461, "step": 310 }, { "epoch": 0.009196946029984963, "grad_norm": 1.139564970272825, "learning_rate": 1.5328467153284672e-06, "loss": 0.8404, "step": 315 }, { "epoch": 0.009342929300302186, "grad_norm": 1.1545940184252468, "learning_rate": 1.5571776155717764e-06, "loss": 0.8268, "step": 320 }, { "epoch": 0.009488912570619406, "grad_norm": 1.0989089810954717, "learning_rate": 1.5815085158150853e-06, "loss": 0.7862, "step": 325 }, { "epoch": 0.009634895840936629, "grad_norm": 1.2482562770418066, "learning_rate": 1.6058394160583942e-06, "loss": 0.8079, "step": 330 }, { "epoch": 0.009780879111253851, "grad_norm": 1.1068800442081592, "learning_rate": 1.6301703163017032e-06, "loss": 0.8127, "step": 335 }, { "epoch": 0.009926862381571071, "grad_norm": 1.1398724892070935, "learning_rate": 1.654501216545012e-06, "loss": 0.8121, "step": 340 }, { "epoch": 0.010072845651888294, "grad_norm": 1.1343721207793218, "learning_rate": 1.6788321167883214e-06, "loss": 0.7859, "step": 345 }, { "epoch": 0.010218828922205516, "grad_norm": 1.0942866896462946, "learning_rate": 1.7031630170316302e-06, "loss": 0.7969, "step": 350 }, { "epoch": 0.010364812192522737, "grad_norm": 1.1227761315028448, "learning_rate": 1.727493917274939e-06, "loss": 0.8676, "step": 355 }, { "epoch": 0.010510795462839959, "grad_norm": 1.0357896534933713, "learning_rate": 1.7518248175182485e-06, "loss": 0.7866, "step": 360 }, { "epoch": 0.01065677873315718, "grad_norm": 1.0048090781457149, "learning_rate": 1.7761557177615574e-06, "loss": 0.8284, "step": 365 }, { "epoch": 0.010802762003474402, "grad_norm": 1.0483833581022282, "learning_rate": 1.8004866180048663e-06, "loss": 0.7482, "step": 370 }, { "epoch": 0.010948745273791624, "grad_norm": 1.1209249697463157, "learning_rate": 1.824817518248175e-06, "loss": 0.7905, "step": 375 }, { "epoch": 0.011094728544108845, "grad_norm": 1.041838481064581, "learning_rate": 1.8491484184914844e-06, "loss": 0.8192, "step": 380 }, { "epoch": 0.011240711814426067, "grad_norm": 1.1217268228963113, "learning_rate": 1.8734793187347933e-06, "loss": 0.8098, "step": 385 }, { "epoch": 0.01138669508474329, "grad_norm": 1.0983204133226774, "learning_rate": 1.8978102189781023e-06, "loss": 0.7636, "step": 390 }, { "epoch": 0.01153267835506051, "grad_norm": 1.0868703237701054, "learning_rate": 1.9221411192214114e-06, "loss": 0.8084, "step": 395 }, { "epoch": 0.011678661625377732, "grad_norm": 1.1117253614392386, "learning_rate": 1.94647201946472e-06, "loss": 0.7896, "step": 400 }, { "epoch": 0.011824644895694953, "grad_norm": 1.0629872646914156, "learning_rate": 1.9708029197080293e-06, "loss": 0.7505, "step": 405 }, { "epoch": 0.011970628166012175, "grad_norm": 1.2396395668286473, "learning_rate": 1.995133819951338e-06, "loss": 0.8103, "step": 410 }, { "epoch": 0.012116611436329397, "grad_norm": 1.1428059715271053, "learning_rate": 2.0194647201946476e-06, "loss": 0.7967, "step": 415 }, { "epoch": 0.012262594706646618, "grad_norm": 1.1726900953491965, "learning_rate": 2.0437956204379563e-06, "loss": 0.8046, "step": 420 }, { "epoch": 0.01240857797696384, "grad_norm": 1.1009427767066897, "learning_rate": 2.068126520681265e-06, "loss": 0.7488, "step": 425 }, { "epoch": 0.01255456124728106, "grad_norm": 1.0418600046987547, "learning_rate": 2.0924574209245746e-06, "loss": 0.7388, "step": 430 }, { "epoch": 0.012700544517598283, "grad_norm": 1.0588869294264958, "learning_rate": 2.1167883211678833e-06, "loss": 0.7714, "step": 435 }, { "epoch": 0.012846527787915505, "grad_norm": 1.0880134783335718, "learning_rate": 2.1411192214111924e-06, "loss": 0.712, "step": 440 }, { "epoch": 0.012992511058232726, "grad_norm": 1.2413799314665148, "learning_rate": 2.165450121654501e-06, "loss": 0.7866, "step": 445 }, { "epoch": 0.013138494328549948, "grad_norm": 1.1335363639303422, "learning_rate": 2.1897810218978103e-06, "loss": 0.7793, "step": 450 }, { "epoch": 0.01328447759886717, "grad_norm": 1.0617263455756567, "learning_rate": 2.2141119221411194e-06, "loss": 0.768, "step": 455 }, { "epoch": 0.013430460869184391, "grad_norm": 1.2130908606629078, "learning_rate": 2.238442822384428e-06, "loss": 0.8063, "step": 460 }, { "epoch": 0.013576444139501613, "grad_norm": 1.0619936648242452, "learning_rate": 2.2627737226277373e-06, "loss": 0.7615, "step": 465 }, { "epoch": 0.013722427409818834, "grad_norm": 1.162260826366757, "learning_rate": 2.2871046228710465e-06, "loss": 0.8044, "step": 470 }, { "epoch": 0.013868410680136056, "grad_norm": 1.0928515319863426, "learning_rate": 2.311435523114355e-06, "loss": 0.7611, "step": 475 }, { "epoch": 0.014014393950453279, "grad_norm": 1.0513182472518805, "learning_rate": 2.3357664233576643e-06, "loss": 0.7777, "step": 480 }, { "epoch": 0.0141603772207705, "grad_norm": 1.07516867496532, "learning_rate": 2.3600973236009735e-06, "loss": 0.7578, "step": 485 }, { "epoch": 0.014306360491087721, "grad_norm": 1.0122564286198743, "learning_rate": 2.384428223844282e-06, "loss": 0.7396, "step": 490 }, { "epoch": 0.014452343761404944, "grad_norm": 1.1169949668693298, "learning_rate": 2.4087591240875913e-06, "loss": 0.8103, "step": 495 }, { "epoch": 0.014598327031722164, "grad_norm": 1.0334749430838255, "learning_rate": 2.4330900243309005e-06, "loss": 0.746, "step": 500 }, { "epoch": 0.014744310302039387, "grad_norm": 1.182560959749987, "learning_rate": 2.4574209245742096e-06, "loss": 0.8286, "step": 505 }, { "epoch": 0.014890293572356607, "grad_norm": 1.0933440111660697, "learning_rate": 2.4817518248175183e-06, "loss": 0.7701, "step": 510 }, { "epoch": 0.01503627684267383, "grad_norm": 1.194510205492082, "learning_rate": 2.506082725060827e-06, "loss": 0.7649, "step": 515 }, { "epoch": 0.015182260112991052, "grad_norm": 1.1090128868491524, "learning_rate": 2.5304136253041366e-06, "loss": 0.7653, "step": 520 }, { "epoch": 0.015328243383308272, "grad_norm": 1.0989869369726193, "learning_rate": 2.5547445255474454e-06, "loss": 0.7191, "step": 525 }, { "epoch": 0.015474226653625495, "grad_norm": 1.1418151944449637, "learning_rate": 2.579075425790754e-06, "loss": 0.7591, "step": 530 }, { "epoch": 0.015620209923942717, "grad_norm": 1.1771852395179339, "learning_rate": 2.6034063260340636e-06, "loss": 0.789, "step": 535 }, { "epoch": 0.01576619319425994, "grad_norm": 1.2072738568756187, "learning_rate": 2.6277372262773724e-06, "loss": 0.7571, "step": 540 }, { "epoch": 0.015912176464577158, "grad_norm": 1.1040625198876601, "learning_rate": 2.6520681265206815e-06, "loss": 0.7093, "step": 545 }, { "epoch": 0.01605815973489438, "grad_norm": 1.08908194045111, "learning_rate": 2.6763990267639902e-06, "loss": 0.7621, "step": 550 }, { "epoch": 0.016204143005211603, "grad_norm": 1.0960040065089307, "learning_rate": 2.7007299270072994e-06, "loss": 0.7536, "step": 555 }, { "epoch": 0.016350126275528825, "grad_norm": 1.1546375280816017, "learning_rate": 2.7250608272506085e-06, "loss": 0.7984, "step": 560 }, { "epoch": 0.016496109545846047, "grad_norm": 1.0433023290962715, "learning_rate": 2.7493917274939172e-06, "loss": 0.7731, "step": 565 }, { "epoch": 0.016642092816163266, "grad_norm": 1.1184723775498289, "learning_rate": 2.7737226277372264e-06, "loss": 0.7438, "step": 570 }, { "epoch": 0.01678807608648049, "grad_norm": 1.2110390658361487, "learning_rate": 2.7980535279805355e-06, "loss": 0.7821, "step": 575 }, { "epoch": 0.01693405935679771, "grad_norm": 1.162343741728263, "learning_rate": 2.8223844282238443e-06, "loss": 0.7306, "step": 580 }, { "epoch": 0.017080042627114933, "grad_norm": 1.09294982754361, "learning_rate": 2.8467153284671534e-06, "loss": 0.7747, "step": 585 }, { "epoch": 0.017226025897432155, "grad_norm": 1.172526415680499, "learning_rate": 2.8710462287104625e-06, "loss": 0.7501, "step": 590 }, { "epoch": 0.017372009167749378, "grad_norm": 1.0589551937167652, "learning_rate": 2.8953771289537713e-06, "loss": 0.7484, "step": 595 }, { "epoch": 0.017517992438066596, "grad_norm": 1.040459951685634, "learning_rate": 2.9197080291970804e-06, "loss": 0.6952, "step": 600 }, { "epoch": 0.01766397570838382, "grad_norm": 1.1993857073006937, "learning_rate": 2.9440389294403896e-06, "loss": 0.7498, "step": 605 }, { "epoch": 0.01780995897870104, "grad_norm": 1.1031131244658219, "learning_rate": 2.9683698296836987e-06, "loss": 0.6983, "step": 610 }, { "epoch": 0.017955942249018263, "grad_norm": 1.0409611020351133, "learning_rate": 2.9927007299270074e-06, "loss": 0.7438, "step": 615 }, { "epoch": 0.018101925519335486, "grad_norm": 1.1221997884160466, "learning_rate": 3.017031630170316e-06, "loss": 0.7407, "step": 620 }, { "epoch": 0.018247908789652705, "grad_norm": 1.1324515477564967, "learning_rate": 3.0413625304136257e-06, "loss": 0.7536, "step": 625 }, { "epoch": 0.018393892059969927, "grad_norm": 1.071146037513235, "learning_rate": 3.0656934306569344e-06, "loss": 0.7216, "step": 630 }, { "epoch": 0.01853987533028715, "grad_norm": 1.1606603729275768, "learning_rate": 3.0900243309002436e-06, "loss": 0.739, "step": 635 }, { "epoch": 0.01868585860060437, "grad_norm": 1.164604867460925, "learning_rate": 3.1143552311435527e-06, "loss": 0.7329, "step": 640 }, { "epoch": 0.018831841870921594, "grad_norm": 1.0357228581990328, "learning_rate": 3.1386861313868614e-06, "loss": 0.6852, "step": 645 }, { "epoch": 0.018977825141238813, "grad_norm": 1.2011838290001802, "learning_rate": 3.1630170316301706e-06, "loss": 0.781, "step": 650 }, { "epoch": 0.019123808411556035, "grad_norm": 1.1156914673401361, "learning_rate": 3.1873479318734793e-06, "loss": 0.7207, "step": 655 }, { "epoch": 0.019269791681873257, "grad_norm": 1.1385356975332563, "learning_rate": 3.2116788321167884e-06, "loss": 0.754, "step": 660 }, { "epoch": 0.01941577495219048, "grad_norm": 1.079629079655071, "learning_rate": 3.236009732360097e-06, "loss": 0.7256, "step": 665 }, { "epoch": 0.019561758222507702, "grad_norm": 1.1348749145050068, "learning_rate": 3.2603406326034063e-06, "loss": 0.7034, "step": 670 }, { "epoch": 0.01970774149282492, "grad_norm": 1.0637601461450767, "learning_rate": 3.2846715328467155e-06, "loss": 0.7365, "step": 675 }, { "epoch": 0.019853724763142143, "grad_norm": 1.052968638890429, "learning_rate": 3.309002433090024e-06, "loss": 0.6775, "step": 680 }, { "epoch": 0.019999708033459365, "grad_norm": 1.0663183808007253, "learning_rate": 3.3333333333333333e-06, "loss": 0.7137, "step": 685 }, { "epoch": 0.020145691303776588, "grad_norm": 1.0113734234780898, "learning_rate": 3.357664233576643e-06, "loss": 0.7468, "step": 690 }, { "epoch": 0.02029167457409381, "grad_norm": 1.085289999606216, "learning_rate": 3.381995133819951e-06, "loss": 0.7394, "step": 695 }, { "epoch": 0.020437657844411032, "grad_norm": 1.0751010242822217, "learning_rate": 3.4063260340632603e-06, "loss": 0.7219, "step": 700 }, { "epoch": 0.02058364111472825, "grad_norm": 1.100132571010049, "learning_rate": 3.43065693430657e-06, "loss": 0.722, "step": 705 }, { "epoch": 0.020729624385045473, "grad_norm": 1.1410992573917647, "learning_rate": 3.454987834549878e-06, "loss": 0.7474, "step": 710 }, { "epoch": 0.020875607655362696, "grad_norm": 0.9981912666533425, "learning_rate": 3.4793187347931878e-06, "loss": 0.6938, "step": 715 }, { "epoch": 0.021021590925679918, "grad_norm": 1.0208984860933283, "learning_rate": 3.503649635036497e-06, "loss": 0.6914, "step": 720 }, { "epoch": 0.02116757419599714, "grad_norm": 1.0821084847336737, "learning_rate": 3.527980535279805e-06, "loss": 0.7219, "step": 725 }, { "epoch": 0.02131355746631436, "grad_norm": 1.1229023278234784, "learning_rate": 3.5523114355231148e-06, "loss": 0.7249, "step": 730 }, { "epoch": 0.02145954073663158, "grad_norm": 1.131019512109436, "learning_rate": 3.576642335766423e-06, "loss": 0.7178, "step": 735 }, { "epoch": 0.021605524006948804, "grad_norm": 1.045358490910148, "learning_rate": 3.6009732360097326e-06, "loss": 0.7024, "step": 740 }, { "epoch": 0.021751507277266026, "grad_norm": 1.0569215931078801, "learning_rate": 3.6253041362530418e-06, "loss": 0.7074, "step": 745 }, { "epoch": 0.021897490547583248, "grad_norm": 1.102193836185754, "learning_rate": 3.64963503649635e-06, "loss": 0.7127, "step": 750 }, { "epoch": 0.022043473817900467, "grad_norm": 1.1341053477950076, "learning_rate": 3.6739659367396597e-06, "loss": 0.7536, "step": 755 }, { "epoch": 0.02218945708821769, "grad_norm": 1.0670675773181917, "learning_rate": 3.698296836982969e-06, "loss": 0.6704, "step": 760 }, { "epoch": 0.02233544035853491, "grad_norm": 1.0570907372600484, "learning_rate": 3.722627737226277e-06, "loss": 0.7222, "step": 765 }, { "epoch": 0.022481423628852134, "grad_norm": 1.182673323999646, "learning_rate": 3.7469586374695867e-06, "loss": 0.7384, "step": 770 }, { "epoch": 0.022627406899169356, "grad_norm": 1.1014337562617371, "learning_rate": 3.771289537712896e-06, "loss": 0.7135, "step": 775 }, { "epoch": 0.02277339016948658, "grad_norm": 1.0826206425727083, "learning_rate": 3.7956204379562045e-06, "loss": 0.7029, "step": 780 }, { "epoch": 0.022919373439803797, "grad_norm": 1.1275416857364615, "learning_rate": 3.819951338199514e-06, "loss": 0.7421, "step": 785 }, { "epoch": 0.02306535671012102, "grad_norm": 1.1396898964983322, "learning_rate": 3.844282238442823e-06, "loss": 0.7624, "step": 790 }, { "epoch": 0.023211339980438242, "grad_norm": 1.1095930070455153, "learning_rate": 3.868613138686131e-06, "loss": 0.7176, "step": 795 }, { "epoch": 0.023357323250755464, "grad_norm": 1.1522491611640513, "learning_rate": 3.89294403892944e-06, "loss": 0.7321, "step": 800 }, { "epoch": 0.023503306521072687, "grad_norm": 1.0468333603055306, "learning_rate": 3.917274939172749e-06, "loss": 0.7006, "step": 805 }, { "epoch": 0.023649289791389905, "grad_norm": 1.1144173018429924, "learning_rate": 3.9416058394160585e-06, "loss": 0.7087, "step": 810 }, { "epoch": 0.023795273061707128, "grad_norm": 1.077825794092833, "learning_rate": 3.965936739659368e-06, "loss": 0.7173, "step": 815 }, { "epoch": 0.02394125633202435, "grad_norm": 0.9875362065120836, "learning_rate": 3.990267639902676e-06, "loss": 0.6884, "step": 820 }, { "epoch": 0.024087239602341572, "grad_norm": 1.065925617161558, "learning_rate": 4.014598540145985e-06, "loss": 0.665, "step": 825 }, { "epoch": 0.024233222872658795, "grad_norm": 1.0828831375687444, "learning_rate": 4.038929440389295e-06, "loss": 0.7187, "step": 830 }, { "epoch": 0.024379206142976013, "grad_norm": 1.094866910561538, "learning_rate": 4.0632603406326034e-06, "loss": 0.6882, "step": 835 }, { "epoch": 0.024525189413293236, "grad_norm": 1.0779495754837793, "learning_rate": 4.0875912408759126e-06, "loss": 0.7395, "step": 840 }, { "epoch": 0.024671172683610458, "grad_norm": 1.1228024918254016, "learning_rate": 4.111922141119222e-06, "loss": 0.7611, "step": 845 }, { "epoch": 0.02481715595392768, "grad_norm": 1.0633482339843714, "learning_rate": 4.13625304136253e-06, "loss": 0.7277, "step": 850 }, { "epoch": 0.024963139224244903, "grad_norm": 1.0313340399559963, "learning_rate": 4.16058394160584e-06, "loss": 0.7324, "step": 855 }, { "epoch": 0.02510912249456212, "grad_norm": 1.079192427906707, "learning_rate": 4.184914841849149e-06, "loss": 0.7239, "step": 860 }, { "epoch": 0.025255105764879344, "grad_norm": 1.0502040378515483, "learning_rate": 4.2092457420924574e-06, "loss": 0.6896, "step": 865 }, { "epoch": 0.025401089035196566, "grad_norm": 1.1319555905561047, "learning_rate": 4.233576642335767e-06, "loss": 0.7513, "step": 870 }, { "epoch": 0.02554707230551379, "grad_norm": 1.0986896144013205, "learning_rate": 4.257907542579075e-06, "loss": 0.6886, "step": 875 }, { "epoch": 0.02569305557583101, "grad_norm": 1.0579072239627985, "learning_rate": 4.282238442822385e-06, "loss": 0.688, "step": 880 }, { "epoch": 0.025839038846148233, "grad_norm": 1.0178382990648671, "learning_rate": 4.306569343065694e-06, "loss": 0.6541, "step": 885 }, { "epoch": 0.025985022116465452, "grad_norm": 1.0438958849306772, "learning_rate": 4.330900243309002e-06, "loss": 0.6503, "step": 890 }, { "epoch": 0.026131005386782674, "grad_norm": 1.2299561661297567, "learning_rate": 4.3552311435523115e-06, "loss": 0.7083, "step": 895 }, { "epoch": 0.026276988657099896, "grad_norm": 1.1068961204156378, "learning_rate": 4.379562043795621e-06, "loss": 0.6754, "step": 900 }, { "epoch": 0.02642297192741712, "grad_norm": 1.0961947653947883, "learning_rate": 4.40389294403893e-06, "loss": 0.7151, "step": 905 }, { "epoch": 0.02656895519773434, "grad_norm": 0.973430009977252, "learning_rate": 4.428223844282239e-06, "loss": 0.7051, "step": 910 }, { "epoch": 0.02671493846805156, "grad_norm": 1.0765941815522466, "learning_rate": 4.452554744525548e-06, "loss": 0.7062, "step": 915 }, { "epoch": 0.026860921738368782, "grad_norm": 1.02256647041762, "learning_rate": 4.476885644768856e-06, "loss": 0.7003, "step": 920 }, { "epoch": 0.027006905008686004, "grad_norm": 1.0946621578863134, "learning_rate": 4.5012165450121655e-06, "loss": 0.6899, "step": 925 }, { "epoch": 0.027152888279003227, "grad_norm": 1.1776938852243213, "learning_rate": 4.525547445255475e-06, "loss": 0.7156, "step": 930 }, { "epoch": 0.02729887154932045, "grad_norm": 1.0487774985486529, "learning_rate": 4.549878345498784e-06, "loss": 0.71, "step": 935 }, { "epoch": 0.027444854819637668, "grad_norm": 1.0745792018067446, "learning_rate": 4.574209245742093e-06, "loss": 0.6845, "step": 940 }, { "epoch": 0.02759083808995489, "grad_norm": 1.295459961096797, "learning_rate": 4.598540145985401e-06, "loss": 0.7239, "step": 945 }, { "epoch": 0.027736821360272113, "grad_norm": 1.2052063570676725, "learning_rate": 4.62287104622871e-06, "loss": 0.7039, "step": 950 }, { "epoch": 0.027882804630589335, "grad_norm": 1.1027379775252584, "learning_rate": 4.6472019464720195e-06, "loss": 0.75, "step": 955 }, { "epoch": 0.028028787900906557, "grad_norm": 1.1182622545777567, "learning_rate": 4.671532846715329e-06, "loss": 0.6813, "step": 960 }, { "epoch": 0.02817477117122378, "grad_norm": 1.1290029824316878, "learning_rate": 4.695863746958638e-06, "loss": 0.6923, "step": 965 }, { "epoch": 0.028320754441541, "grad_norm": 1.1435044543460782, "learning_rate": 4.720194647201947e-06, "loss": 0.7333, "step": 970 }, { "epoch": 0.02846673771185822, "grad_norm": 1.0996758289887403, "learning_rate": 4.744525547445255e-06, "loss": 0.7347, "step": 975 }, { "epoch": 0.028612720982175443, "grad_norm": 0.9835261215618665, "learning_rate": 4.768856447688564e-06, "loss": 0.7055, "step": 980 }, { "epoch": 0.028758704252492665, "grad_norm": 1.1844018935581475, "learning_rate": 4.793187347931874e-06, "loss": 0.709, "step": 985 }, { "epoch": 0.028904687522809887, "grad_norm": 1.1057628240182857, "learning_rate": 4.817518248175183e-06, "loss": 0.6833, "step": 990 }, { "epoch": 0.029050670793127106, "grad_norm": 1.179456016248955, "learning_rate": 4.841849148418492e-06, "loss": 0.6859, "step": 995 }, { "epoch": 0.02919665406344433, "grad_norm": 0.9904673674210718, "learning_rate": 4.866180048661801e-06, "loss": 0.7068, "step": 1000 }, { "epoch": 0.02934263733376155, "grad_norm": 1.0359892929904204, "learning_rate": 4.890510948905109e-06, "loss": 0.6501, "step": 1005 }, { "epoch": 0.029488620604078773, "grad_norm": 1.1448787365915682, "learning_rate": 4.914841849148419e-06, "loss": 0.6772, "step": 1010 }, { "epoch": 0.029634603874395996, "grad_norm": 1.0827418287166644, "learning_rate": 4.9391727493917275e-06, "loss": 0.7463, "step": 1015 }, { "epoch": 0.029780587144713214, "grad_norm": 1.012520904078615, "learning_rate": 4.963503649635037e-06, "loss": 0.6749, "step": 1020 }, { "epoch": 0.029926570415030437, "grad_norm": 1.0131409283080033, "learning_rate": 4.987834549878346e-06, "loss": 0.6838, "step": 1025 }, { "epoch": 0.03007255368534766, "grad_norm": 1.154179720087236, "learning_rate": 5.012165450121654e-06, "loss": 0.6973, "step": 1030 }, { "epoch": 0.03021853695566488, "grad_norm": 1.1457063671421355, "learning_rate": 5.036496350364963e-06, "loss": 0.696, "step": 1035 }, { "epoch": 0.030364520225982104, "grad_norm": 1.0980733918133105, "learning_rate": 5.060827250608273e-06, "loss": 0.6839, "step": 1040 }, { "epoch": 0.030510503496299322, "grad_norm": 1.2047563770621, "learning_rate": 5.0851581508515816e-06, "loss": 0.7356, "step": 1045 }, { "epoch": 0.030656486766616545, "grad_norm": 1.0388102148229954, "learning_rate": 5.109489051094891e-06, "loss": 0.6777, "step": 1050 }, { "epoch": 0.030802470036933767, "grad_norm": 1.2731045259532254, "learning_rate": 5.1338199513382e-06, "loss": 0.7216, "step": 1055 }, { "epoch": 0.03094845330725099, "grad_norm": 1.140850131116536, "learning_rate": 5.158150851581508e-06, "loss": 0.6622, "step": 1060 }, { "epoch": 0.03109443657756821, "grad_norm": 1.1062961390177195, "learning_rate": 5.182481751824818e-06, "loss": 0.6626, "step": 1065 }, { "epoch": 0.031240419847885434, "grad_norm": 1.1173164437097731, "learning_rate": 5.206812652068127e-06, "loss": 0.7138, "step": 1070 }, { "epoch": 0.031386403118202656, "grad_norm": 1.0440534665448769, "learning_rate": 5.231143552311436e-06, "loss": 0.6828, "step": 1075 }, { "epoch": 0.03153238638851988, "grad_norm": 1.042755969521519, "learning_rate": 5.255474452554745e-06, "loss": 0.6861, "step": 1080 }, { "epoch": 0.031678369658837094, "grad_norm": 1.0832107028576843, "learning_rate": 5.279805352798053e-06, "loss": 0.6965, "step": 1085 }, { "epoch": 0.031824352929154316, "grad_norm": 1.2471339467849314, "learning_rate": 5.304136253041363e-06, "loss": 0.7006, "step": 1090 }, { "epoch": 0.03197033619947154, "grad_norm": 1.0812438373967403, "learning_rate": 5.328467153284672e-06, "loss": 0.6889, "step": 1095 }, { "epoch": 0.03211631946978876, "grad_norm": 1.1537261400127952, "learning_rate": 5.3527980535279805e-06, "loss": 0.6613, "step": 1100 }, { "epoch": 0.03226230274010598, "grad_norm": 1.0823492350636892, "learning_rate": 5.37712895377129e-06, "loss": 0.6789, "step": 1105 }, { "epoch": 0.032408286010423205, "grad_norm": 1.1136189214177372, "learning_rate": 5.401459854014599e-06, "loss": 0.6863, "step": 1110 }, { "epoch": 0.03255426928074043, "grad_norm": 1.0648182497478877, "learning_rate": 5.425790754257908e-06, "loss": 0.653, "step": 1115 }, { "epoch": 0.03270025255105765, "grad_norm": 1.0801822699800558, "learning_rate": 5.450121654501217e-06, "loss": 0.6723, "step": 1120 }, { "epoch": 0.03284623582137487, "grad_norm": 1.0544590423726392, "learning_rate": 5.474452554744526e-06, "loss": 0.6898, "step": 1125 }, { "epoch": 0.032992219091692095, "grad_norm": 0.9889912689940816, "learning_rate": 5.4987834549878345e-06, "loss": 0.6681, "step": 1130 }, { "epoch": 0.03313820236200932, "grad_norm": 1.0273989572615296, "learning_rate": 5.523114355231144e-06, "loss": 0.6638, "step": 1135 }, { "epoch": 0.03328418563232653, "grad_norm": 1.0640448701127798, "learning_rate": 5.547445255474453e-06, "loss": 0.6992, "step": 1140 }, { "epoch": 0.033430168902643755, "grad_norm": 1.0338762905349677, "learning_rate": 5.571776155717762e-06, "loss": 0.6828, "step": 1145 }, { "epoch": 0.03357615217296098, "grad_norm": 1.1060745318855087, "learning_rate": 5.596107055961071e-06, "loss": 0.7001, "step": 1150 }, { "epoch": 0.0337221354432782, "grad_norm": 1.014292725669262, "learning_rate": 5.620437956204379e-06, "loss": 0.6641, "step": 1155 }, { "epoch": 0.03386811871359542, "grad_norm": 1.1677871974554235, "learning_rate": 5.6447688564476885e-06, "loss": 0.7107, "step": 1160 }, { "epoch": 0.034014101983912644, "grad_norm": 1.2129002960075048, "learning_rate": 5.669099756690998e-06, "loss": 0.7379, "step": 1165 }, { "epoch": 0.034160085254229866, "grad_norm": 1.076840421009346, "learning_rate": 5.693430656934307e-06, "loss": 0.6635, "step": 1170 }, { "epoch": 0.03430606852454709, "grad_norm": 1.0762286090973714, "learning_rate": 5.717761557177616e-06, "loss": 0.7025, "step": 1175 }, { "epoch": 0.03445205179486431, "grad_norm": 1.0534821615455856, "learning_rate": 5.742092457420925e-06, "loss": 0.681, "step": 1180 }, { "epoch": 0.03459803506518153, "grad_norm": 1.0545335507959306, "learning_rate": 5.766423357664233e-06, "loss": 0.7132, "step": 1185 }, { "epoch": 0.034744018335498755, "grad_norm": 1.1968228840222463, "learning_rate": 5.7907542579075425e-06, "loss": 0.712, "step": 1190 }, { "epoch": 0.03489000160581597, "grad_norm": 1.0760186759024661, "learning_rate": 5.8150851581508525e-06, "loss": 0.6751, "step": 1195 }, { "epoch": 0.03503598487613319, "grad_norm": 1.0326484460178955, "learning_rate": 5.839416058394161e-06, "loss": 0.7015, "step": 1200 }, { "epoch": 0.035181968146450415, "grad_norm": 1.0768181592496169, "learning_rate": 5.86374695863747e-06, "loss": 0.6856, "step": 1205 }, { "epoch": 0.03532795141676764, "grad_norm": 0.9799469892733584, "learning_rate": 5.888077858880779e-06, "loss": 0.7245, "step": 1210 }, { "epoch": 0.03547393468708486, "grad_norm": 1.0914390441561796, "learning_rate": 5.912408759124087e-06, "loss": 0.7022, "step": 1215 }, { "epoch": 0.03561991795740208, "grad_norm": 1.254089235536257, "learning_rate": 5.936739659367397e-06, "loss": 0.6884, "step": 1220 }, { "epoch": 0.035765901227719304, "grad_norm": 1.0376527949351193, "learning_rate": 5.961070559610706e-06, "loss": 0.6422, "step": 1225 }, { "epoch": 0.03591188449803653, "grad_norm": 1.0923042755992038, "learning_rate": 5.985401459854015e-06, "loss": 0.701, "step": 1230 }, { "epoch": 0.03605786776835375, "grad_norm": 1.209624964345522, "learning_rate": 6.009732360097324e-06, "loss": 0.7159, "step": 1235 }, { "epoch": 0.03620385103867097, "grad_norm": 1.086145545325886, "learning_rate": 6.034063260340632e-06, "loss": 0.6766, "step": 1240 }, { "epoch": 0.03634983430898819, "grad_norm": 1.05998691356967, "learning_rate": 6.058394160583942e-06, "loss": 0.6437, "step": 1245 }, { "epoch": 0.03649581757930541, "grad_norm": 0.9867783345368116, "learning_rate": 6.082725060827251e-06, "loss": 0.664, "step": 1250 }, { "epoch": 0.03664180084962263, "grad_norm": 1.121201113771594, "learning_rate": 6.10705596107056e-06, "loss": 0.7008, "step": 1255 }, { "epoch": 0.036787784119939854, "grad_norm": 1.0897497303303705, "learning_rate": 6.131386861313869e-06, "loss": 0.687, "step": 1260 }, { "epoch": 0.036933767390257076, "grad_norm": 1.1771547803273603, "learning_rate": 6.155717761557178e-06, "loss": 0.6603, "step": 1265 }, { "epoch": 0.0370797506605743, "grad_norm": 1.0977984160038317, "learning_rate": 6.180048661800487e-06, "loss": 0.6807, "step": 1270 }, { "epoch": 0.03722573393089152, "grad_norm": 0.9655261754587873, "learning_rate": 6.204379562043796e-06, "loss": 0.6565, "step": 1275 }, { "epoch": 0.03737171720120874, "grad_norm": 1.0225701814464048, "learning_rate": 6.2287104622871054e-06, "loss": 0.6861, "step": 1280 }, { "epoch": 0.037517700471525965, "grad_norm": 1.0261512112414461, "learning_rate": 6.253041362530414e-06, "loss": 0.6626, "step": 1285 }, { "epoch": 0.03766368374184319, "grad_norm": 1.0428715270148843, "learning_rate": 6.277372262773723e-06, "loss": 0.6659, "step": 1290 }, { "epoch": 0.03780966701216041, "grad_norm": 1.1269860165914014, "learning_rate": 6.301703163017032e-06, "loss": 0.6653, "step": 1295 }, { "epoch": 0.037955650282477625, "grad_norm": 1.0254578452814378, "learning_rate": 6.326034063260341e-06, "loss": 0.6342, "step": 1300 }, { "epoch": 0.03810163355279485, "grad_norm": 1.1543947840874005, "learning_rate": 6.3503649635036495e-06, "loss": 0.6904, "step": 1305 }, { "epoch": 0.03824761682311207, "grad_norm": 1.1967513989477108, "learning_rate": 6.374695863746959e-06, "loss": 0.6716, "step": 1310 }, { "epoch": 0.03839360009342929, "grad_norm": 1.1184621133890142, "learning_rate": 6.399026763990268e-06, "loss": 0.667, "step": 1315 }, { "epoch": 0.038539583363746514, "grad_norm": 1.1410719648638121, "learning_rate": 6.423357664233577e-06, "loss": 0.6762, "step": 1320 }, { "epoch": 0.03868556663406374, "grad_norm": 1.0401946087380416, "learning_rate": 6.447688564476886e-06, "loss": 0.6843, "step": 1325 }, { "epoch": 0.03883154990438096, "grad_norm": 1.0077071938646953, "learning_rate": 6.472019464720194e-06, "loss": 0.6918, "step": 1330 }, { "epoch": 0.03897753317469818, "grad_norm": 1.0553205708560847, "learning_rate": 6.4963503649635035e-06, "loss": 0.6919, "step": 1335 }, { "epoch": 0.039123516445015404, "grad_norm": 1.101131460336862, "learning_rate": 6.520681265206813e-06, "loss": 0.6979, "step": 1340 }, { "epoch": 0.039269499715332626, "grad_norm": 1.1764324646654138, "learning_rate": 6.545012165450122e-06, "loss": 0.6554, "step": 1345 }, { "epoch": 0.03941548298564984, "grad_norm": 1.128358820013896, "learning_rate": 6.569343065693431e-06, "loss": 0.6988, "step": 1350 }, { "epoch": 0.039561466255967064, "grad_norm": 1.0766682557603027, "learning_rate": 6.593673965936741e-06, "loss": 0.7134, "step": 1355 }, { "epoch": 0.039707449526284286, "grad_norm": 1.0375799962406123, "learning_rate": 6.618004866180048e-06, "loss": 0.6767, "step": 1360 }, { "epoch": 0.03985343279660151, "grad_norm": 1.097049513484227, "learning_rate": 6.6423357664233575e-06, "loss": 0.6996, "step": 1365 }, { "epoch": 0.03999941606691873, "grad_norm": 1.1767931081669207, "learning_rate": 6.666666666666667e-06, "loss": 0.6821, "step": 1370 }, { "epoch": 0.04014539933723595, "grad_norm": 1.1373563330901157, "learning_rate": 6.690997566909976e-06, "loss": 0.6978, "step": 1375 }, { "epoch": 0.040291382607553175, "grad_norm": 1.1096492505192663, "learning_rate": 6.715328467153286e-06, "loss": 0.691, "step": 1380 }, { "epoch": 0.0404373658778704, "grad_norm": 1.0437273909382008, "learning_rate": 6.739659367396595e-06, "loss": 0.6444, "step": 1385 }, { "epoch": 0.04058334914818762, "grad_norm": 1.124368553459045, "learning_rate": 6.763990267639902e-06, "loss": 0.7056, "step": 1390 }, { "epoch": 0.04072933241850484, "grad_norm": 1.1810689344196872, "learning_rate": 6.7883211678832115e-06, "loss": 0.7154, "step": 1395 }, { "epoch": 0.040875315688822064, "grad_norm": 1.1137859001203947, "learning_rate": 6.812652068126521e-06, "loss": 0.6762, "step": 1400 }, { "epoch": 0.04102129895913928, "grad_norm": 1.0805089185470662, "learning_rate": 6.836982968369831e-06, "loss": 0.6684, "step": 1405 }, { "epoch": 0.0411672822294565, "grad_norm": 1.1964122219799527, "learning_rate": 6.86131386861314e-06, "loss": 0.6638, "step": 1410 }, { "epoch": 0.041313265499773724, "grad_norm": 1.280295607513549, "learning_rate": 6.885644768856447e-06, "loss": 0.6535, "step": 1415 }, { "epoch": 0.041459248770090947, "grad_norm": 1.0498466493279215, "learning_rate": 6.909975669099756e-06, "loss": 0.6775, "step": 1420 }, { "epoch": 0.04160523204040817, "grad_norm": 1.1214086240400303, "learning_rate": 6.9343065693430655e-06, "loss": 0.6707, "step": 1425 }, { "epoch": 0.04175121531072539, "grad_norm": 1.0305782277100757, "learning_rate": 6.9586374695863755e-06, "loss": 0.6822, "step": 1430 }, { "epoch": 0.04189719858104261, "grad_norm": 0.9995518862972913, "learning_rate": 6.982968369829685e-06, "loss": 0.6818, "step": 1435 }, { "epoch": 0.042043181851359836, "grad_norm": 1.0398243412666694, "learning_rate": 7.007299270072994e-06, "loss": 0.7088, "step": 1440 }, { "epoch": 0.04218916512167706, "grad_norm": 1.0365506177413617, "learning_rate": 7.031630170316301e-06, "loss": 0.6809, "step": 1445 }, { "epoch": 0.04233514839199428, "grad_norm": 1.0610685052119897, "learning_rate": 7.05596107055961e-06, "loss": 0.6813, "step": 1450 }, { "epoch": 0.042481131662311496, "grad_norm": 1.0722215287941126, "learning_rate": 7.08029197080292e-06, "loss": 0.6799, "step": 1455 }, { "epoch": 0.04262711493262872, "grad_norm": 1.0766758734520605, "learning_rate": 7.1046228710462296e-06, "loss": 0.6706, "step": 1460 }, { "epoch": 0.04277309820294594, "grad_norm": 1.0445744265191608, "learning_rate": 7.128953771289539e-06, "loss": 0.644, "step": 1465 }, { "epoch": 0.04291908147326316, "grad_norm": 1.2472056788792396, "learning_rate": 7.153284671532846e-06, "loss": 0.667, "step": 1470 }, { "epoch": 0.043065064743580385, "grad_norm": 1.0827361456522622, "learning_rate": 7.177615571776155e-06, "loss": 0.6308, "step": 1475 }, { "epoch": 0.04321104801389761, "grad_norm": 1.0610485762487334, "learning_rate": 7.201946472019465e-06, "loss": 0.624, "step": 1480 }, { "epoch": 0.04335703128421483, "grad_norm": 1.1043670779365973, "learning_rate": 7.2262773722627744e-06, "loss": 0.6961, "step": 1485 }, { "epoch": 0.04350301455453205, "grad_norm": 1.01029464512237, "learning_rate": 7.2506082725060836e-06, "loss": 0.6743, "step": 1490 }, { "epoch": 0.043648997824849274, "grad_norm": 1.0168659972618277, "learning_rate": 7.274939172749393e-06, "loss": 0.6629, "step": 1495 }, { "epoch": 0.043794981095166496, "grad_norm": 1.1263986336283744, "learning_rate": 7.2992700729927e-06, "loss": 0.6843, "step": 1500 }, { "epoch": 0.04394096436548372, "grad_norm": 1.114557608043331, "learning_rate": 7.32360097323601e-06, "loss": 0.6825, "step": 1505 }, { "epoch": 0.044086947635800934, "grad_norm": 1.057991408152746, "learning_rate": 7.347931873479319e-06, "loss": 0.6316, "step": 1510 }, { "epoch": 0.044232930906118156, "grad_norm": 1.0195199202473486, "learning_rate": 7.3722627737226285e-06, "loss": 0.6784, "step": 1515 }, { "epoch": 0.04437891417643538, "grad_norm": 1.075673386068337, "learning_rate": 7.396593673965938e-06, "loss": 0.6619, "step": 1520 }, { "epoch": 0.0445248974467526, "grad_norm": 1.148665009960324, "learning_rate": 7.420924574209247e-06, "loss": 0.6477, "step": 1525 }, { "epoch": 0.04467088071706982, "grad_norm": 1.114356466447888, "learning_rate": 7.445255474452554e-06, "loss": 0.6833, "step": 1530 }, { "epoch": 0.044816863987387046, "grad_norm": 1.0797037057867025, "learning_rate": 7.469586374695864e-06, "loss": 0.6856, "step": 1535 }, { "epoch": 0.04496284725770427, "grad_norm": 1.0527684124101035, "learning_rate": 7.493917274939173e-06, "loss": 0.6277, "step": 1540 }, { "epoch": 0.04510883052802149, "grad_norm": 1.061461617422836, "learning_rate": 7.5182481751824825e-06, "loss": 0.6775, "step": 1545 }, { "epoch": 0.04525481379833871, "grad_norm": 1.0101434068605746, "learning_rate": 7.542579075425792e-06, "loss": 0.6502, "step": 1550 }, { "epoch": 0.045400797068655935, "grad_norm": 1.038358145990159, "learning_rate": 7.566909975669099e-06, "loss": 0.6747, "step": 1555 }, { "epoch": 0.04554678033897316, "grad_norm": 1.257887565677066, "learning_rate": 7.591240875912409e-06, "loss": 0.6795, "step": 1560 }, { "epoch": 0.04569276360929037, "grad_norm": 1.01589863022899, "learning_rate": 7.615571776155718e-06, "loss": 0.6653, "step": 1565 }, { "epoch": 0.045838746879607595, "grad_norm": 1.1228790322722928, "learning_rate": 7.639902676399027e-06, "loss": 0.6821, "step": 1570 }, { "epoch": 0.04598473014992482, "grad_norm": 1.0325187244482779, "learning_rate": 7.664233576642336e-06, "loss": 0.6862, "step": 1575 }, { "epoch": 0.04613071342024204, "grad_norm": 1.1844397298306408, "learning_rate": 7.688564476885646e-06, "loss": 0.6413, "step": 1580 }, { "epoch": 0.04627669669055926, "grad_norm": 1.0584517543452543, "learning_rate": 7.712895377128953e-06, "loss": 0.6493, "step": 1585 }, { "epoch": 0.046422679960876484, "grad_norm": 1.11680171108719, "learning_rate": 7.737226277372262e-06, "loss": 0.6687, "step": 1590 }, { "epoch": 0.046568663231193706, "grad_norm": 1.0317146666639343, "learning_rate": 7.761557177615571e-06, "loss": 0.6775, "step": 1595 }, { "epoch": 0.04671464650151093, "grad_norm": 1.1154001182436173, "learning_rate": 7.78588807785888e-06, "loss": 0.6398, "step": 1600 }, { "epoch": 0.04686062977182815, "grad_norm": 1.0873527897730348, "learning_rate": 7.810218978102191e-06, "loss": 0.7449, "step": 1605 }, { "epoch": 0.04700661304214537, "grad_norm": 1.093658118749798, "learning_rate": 7.834549878345499e-06, "loss": 0.6781, "step": 1610 }, { "epoch": 0.04715259631246259, "grad_norm": 1.002523745566156, "learning_rate": 7.858880778588808e-06, "loss": 0.657, "step": 1615 }, { "epoch": 0.04729857958277981, "grad_norm": 1.0796492812730232, "learning_rate": 7.883211678832117e-06, "loss": 0.6592, "step": 1620 }, { "epoch": 0.04744456285309703, "grad_norm": 1.0899168842086289, "learning_rate": 7.907542579075426e-06, "loss": 0.6595, "step": 1625 }, { "epoch": 0.047590546123414255, "grad_norm": 1.0431623637830085, "learning_rate": 7.931873479318735e-06, "loss": 0.7196, "step": 1630 }, { "epoch": 0.04773652939373148, "grad_norm": 1.1066269495488787, "learning_rate": 7.956204379562045e-06, "loss": 0.6598, "step": 1635 }, { "epoch": 0.0478825126640487, "grad_norm": 1.0586681528186794, "learning_rate": 7.980535279805352e-06, "loss": 0.6543, "step": 1640 }, { "epoch": 0.04802849593436592, "grad_norm": 1.0411062117360788, "learning_rate": 8.004866180048661e-06, "loss": 0.6454, "step": 1645 }, { "epoch": 0.048174479204683145, "grad_norm": 1.0932656270376693, "learning_rate": 8.02919708029197e-06, "loss": 0.6882, "step": 1650 }, { "epoch": 0.04832046247500037, "grad_norm": 1.1269820970402116, "learning_rate": 8.053527980535281e-06, "loss": 0.6322, "step": 1655 }, { "epoch": 0.04846644574531759, "grad_norm": 1.127758705660014, "learning_rate": 8.07785888077859e-06, "loss": 0.6916, "step": 1660 }, { "epoch": 0.04861242901563481, "grad_norm": 1.1632740663514483, "learning_rate": 8.1021897810219e-06, "loss": 0.6126, "step": 1665 }, { "epoch": 0.04875841228595203, "grad_norm": 1.145389992942058, "learning_rate": 8.126520681265207e-06, "loss": 0.7038, "step": 1670 }, { "epoch": 0.04890439555626925, "grad_norm": 0.9451603434670909, "learning_rate": 8.150851581508516e-06, "loss": 0.5986, "step": 1675 }, { "epoch": 0.04905037882658647, "grad_norm": 1.1012433984998233, "learning_rate": 8.175182481751825e-06, "loss": 0.6977, "step": 1680 }, { "epoch": 0.049196362096903694, "grad_norm": 1.0263298714272502, "learning_rate": 8.199513381995134e-06, "loss": 0.6454, "step": 1685 }, { "epoch": 0.049342345367220916, "grad_norm": 0.9782661843595571, "learning_rate": 8.223844282238443e-06, "loss": 0.619, "step": 1690 }, { "epoch": 0.04948832863753814, "grad_norm": 1.025087541180172, "learning_rate": 8.248175182481751e-06, "loss": 0.6638, "step": 1695 }, { "epoch": 0.04963431190785536, "grad_norm": 1.0210315896658668, "learning_rate": 8.27250608272506e-06, "loss": 0.6544, "step": 1700 }, { "epoch": 0.04978029517817258, "grad_norm": 1.0819816993288818, "learning_rate": 8.296836982968371e-06, "loss": 0.6367, "step": 1705 }, { "epoch": 0.049926278448489805, "grad_norm": 1.0603311906464405, "learning_rate": 8.32116788321168e-06, "loss": 0.6492, "step": 1710 }, { "epoch": 0.05007226171880703, "grad_norm": 0.9960174267609848, "learning_rate": 8.345498783454989e-06, "loss": 0.6905, "step": 1715 }, { "epoch": 0.05021824498912424, "grad_norm": 1.1891899351386306, "learning_rate": 8.369829683698298e-06, "loss": 0.6701, "step": 1720 }, { "epoch": 0.050364228259441465, "grad_norm": 1.133462417418228, "learning_rate": 8.394160583941606e-06, "loss": 0.6182, "step": 1725 }, { "epoch": 0.05051021152975869, "grad_norm": 1.044181725992598, "learning_rate": 8.418491484184915e-06, "loss": 0.6519, "step": 1730 }, { "epoch": 0.05065619480007591, "grad_norm": 1.001622003656427, "learning_rate": 8.442822384428224e-06, "loss": 0.6375, "step": 1735 }, { "epoch": 0.05080217807039313, "grad_norm": 1.0558341412606116, "learning_rate": 8.467153284671533e-06, "loss": 0.683, "step": 1740 }, { "epoch": 0.050948161340710355, "grad_norm": 1.1578624644279656, "learning_rate": 8.491484184914842e-06, "loss": 0.657, "step": 1745 }, { "epoch": 0.05109414461102758, "grad_norm": 0.9974840194604616, "learning_rate": 8.51581508515815e-06, "loss": 0.6433, "step": 1750 }, { "epoch": 0.0512401278813448, "grad_norm": 1.016583768357513, "learning_rate": 8.54014598540146e-06, "loss": 0.6601, "step": 1755 }, { "epoch": 0.05138611115166202, "grad_norm": 1.0852918523822312, "learning_rate": 8.56447688564477e-06, "loss": 0.6706, "step": 1760 }, { "epoch": 0.051532094421979244, "grad_norm": 1.0739115498736695, "learning_rate": 8.588807785888079e-06, "loss": 0.6735, "step": 1765 }, { "epoch": 0.051678077692296466, "grad_norm": 1.0424125082278208, "learning_rate": 8.613138686131388e-06, "loss": 0.6825, "step": 1770 }, { "epoch": 0.05182406096261368, "grad_norm": 0.9930350895385334, "learning_rate": 8.637469586374697e-06, "loss": 0.6586, "step": 1775 }, { "epoch": 0.051970044232930904, "grad_norm": 1.1165893314497837, "learning_rate": 8.661800486618005e-06, "loss": 0.6496, "step": 1780 }, { "epoch": 0.052116027503248126, "grad_norm": 1.0758174369054392, "learning_rate": 8.686131386861314e-06, "loss": 0.6888, "step": 1785 }, { "epoch": 0.05226201077356535, "grad_norm": 1.1247718659007386, "learning_rate": 8.710462287104623e-06, "loss": 0.6847, "step": 1790 }, { "epoch": 0.05240799404388257, "grad_norm": 1.0217333227450298, "learning_rate": 8.734793187347932e-06, "loss": 0.6569, "step": 1795 }, { "epoch": 0.05255397731419979, "grad_norm": 1.000776696120032, "learning_rate": 8.759124087591241e-06, "loss": 0.6501, "step": 1800 }, { "epoch": 0.052699960584517015, "grad_norm": 0.9906635584842388, "learning_rate": 8.78345498783455e-06, "loss": 0.6546, "step": 1805 }, { "epoch": 0.05284594385483424, "grad_norm": 0.9819764057190565, "learning_rate": 8.80778588807786e-06, "loss": 0.6402, "step": 1810 }, { "epoch": 0.05299192712515146, "grad_norm": 1.093118351977509, "learning_rate": 8.832116788321169e-06, "loss": 0.6895, "step": 1815 }, { "epoch": 0.05313791039546868, "grad_norm": 1.1057359920705614, "learning_rate": 8.856447688564478e-06, "loss": 0.6654, "step": 1820 }, { "epoch": 0.0532838936657859, "grad_norm": 1.0300388400229283, "learning_rate": 8.880778588807787e-06, "loss": 0.6498, "step": 1825 }, { "epoch": 0.05342987693610312, "grad_norm": 1.02689088912957, "learning_rate": 8.905109489051096e-06, "loss": 0.6594, "step": 1830 }, { "epoch": 0.05357586020642034, "grad_norm": 1.0487253340606695, "learning_rate": 8.929440389294404e-06, "loss": 0.6196, "step": 1835 }, { "epoch": 0.053721843476737564, "grad_norm": 1.0752987933017581, "learning_rate": 8.953771289537713e-06, "loss": 0.661, "step": 1840 }, { "epoch": 0.05386782674705479, "grad_norm": 1.154843315463436, "learning_rate": 8.978102189781022e-06, "loss": 0.6546, "step": 1845 }, { "epoch": 0.05401381001737201, "grad_norm": 1.0282514736298605, "learning_rate": 9.002433090024331e-06, "loss": 0.6699, "step": 1850 }, { "epoch": 0.05415979328768923, "grad_norm": 0.9687712383710668, "learning_rate": 9.02676399026764e-06, "loss": 0.6492, "step": 1855 }, { "epoch": 0.054305776558006454, "grad_norm": 1.0886570291049076, "learning_rate": 9.05109489051095e-06, "loss": 0.673, "step": 1860 }, { "epoch": 0.054451759828323676, "grad_norm": 1.0982489543255047, "learning_rate": 9.075425790754258e-06, "loss": 0.6434, "step": 1865 }, { "epoch": 0.0545977430986409, "grad_norm": 1.014181347417034, "learning_rate": 9.099756690997568e-06, "loss": 0.6807, "step": 1870 }, { "epoch": 0.05474372636895812, "grad_norm": 0.9804873511272889, "learning_rate": 9.124087591240877e-06, "loss": 0.66, "step": 1875 }, { "epoch": 0.054889709639275336, "grad_norm": 1.1069632288422862, "learning_rate": 9.148418491484186e-06, "loss": 0.6793, "step": 1880 }, { "epoch": 0.05503569290959256, "grad_norm": 0.991451633515147, "learning_rate": 9.172749391727495e-06, "loss": 0.6651, "step": 1885 }, { "epoch": 0.05518167617990978, "grad_norm": 1.0165073847704933, "learning_rate": 9.197080291970802e-06, "loss": 0.667, "step": 1890 }, { "epoch": 0.055327659450227, "grad_norm": 0.9563621410026635, "learning_rate": 9.221411192214112e-06, "loss": 0.6151, "step": 1895 }, { "epoch": 0.055473642720544225, "grad_norm": 1.0317194783634613, "learning_rate": 9.24574209245742e-06, "loss": 0.6189, "step": 1900 }, { "epoch": 0.05561962599086145, "grad_norm": 1.0463393964705463, "learning_rate": 9.27007299270073e-06, "loss": 0.6642, "step": 1905 }, { "epoch": 0.05576560926117867, "grad_norm": 1.0886067037058664, "learning_rate": 9.294403892944039e-06, "loss": 0.6602, "step": 1910 }, { "epoch": 0.05591159253149589, "grad_norm": 1.1017217744384855, "learning_rate": 9.318734793187348e-06, "loss": 0.6437, "step": 1915 }, { "epoch": 0.056057575801813114, "grad_norm": 1.0016790259241664, "learning_rate": 9.343065693430657e-06, "loss": 0.6532, "step": 1920 }, { "epoch": 0.05620355907213034, "grad_norm": 1.1011692741718946, "learning_rate": 9.367396593673966e-06, "loss": 0.6543, "step": 1925 }, { "epoch": 0.05634954234244756, "grad_norm": 1.115101723747087, "learning_rate": 9.391727493917276e-06, "loss": 0.6596, "step": 1930 }, { "epoch": 0.056495525612764774, "grad_norm": 1.0637709348726914, "learning_rate": 9.416058394160585e-06, "loss": 0.6466, "step": 1935 }, { "epoch": 0.056641508883082, "grad_norm": 1.0231108267585052, "learning_rate": 9.440389294403894e-06, "loss": 0.6678, "step": 1940 }, { "epoch": 0.05678749215339922, "grad_norm": 1.011104580422753, "learning_rate": 9.464720194647203e-06, "loss": 0.6458, "step": 1945 }, { "epoch": 0.05693347542371644, "grad_norm": 0.9324478897746696, "learning_rate": 9.48905109489051e-06, "loss": 0.5796, "step": 1950 }, { "epoch": 0.05707945869403366, "grad_norm": 1.063717901386418, "learning_rate": 9.51338199513382e-06, "loss": 0.662, "step": 1955 }, { "epoch": 0.057225441964350886, "grad_norm": 0.9789221193345159, "learning_rate": 9.537712895377129e-06, "loss": 0.6379, "step": 1960 }, { "epoch": 0.05737142523466811, "grad_norm": 1.0532286439232958, "learning_rate": 9.562043795620438e-06, "loss": 0.6448, "step": 1965 }, { "epoch": 0.05751740850498533, "grad_norm": 1.0282755395544276, "learning_rate": 9.586374695863749e-06, "loss": 0.6398, "step": 1970 }, { "epoch": 0.05766339177530255, "grad_norm": 1.180391935630018, "learning_rate": 9.610705596107056e-06, "loss": 0.659, "step": 1975 }, { "epoch": 0.057809375045619775, "grad_norm": 1.1257510142749163, "learning_rate": 9.635036496350365e-06, "loss": 0.6646, "step": 1980 }, { "epoch": 0.05795535831593699, "grad_norm": 1.0644880282874571, "learning_rate": 9.659367396593674e-06, "loss": 0.6463, "step": 1985 }, { "epoch": 0.05810134158625421, "grad_norm": 1.0598691367378976, "learning_rate": 9.683698296836984e-06, "loss": 0.6282, "step": 1990 }, { "epoch": 0.058247324856571435, "grad_norm": 0.9780678356278717, "learning_rate": 9.708029197080293e-06, "loss": 0.594, "step": 1995 }, { "epoch": 0.05839330812688866, "grad_norm": 1.1514801482372872, "learning_rate": 9.732360097323602e-06, "loss": 0.6635, "step": 2000 }, { "epoch": 0.05853929139720588, "grad_norm": 1.1120875424645227, "learning_rate": 9.75669099756691e-06, "loss": 0.6595, "step": 2005 }, { "epoch": 0.0586852746675231, "grad_norm": 1.0326419353800216, "learning_rate": 9.781021897810219e-06, "loss": 0.6444, "step": 2010 }, { "epoch": 0.058831257937840324, "grad_norm": 1.141436081331945, "learning_rate": 9.805352798053528e-06, "loss": 0.671, "step": 2015 }, { "epoch": 0.058977241208157546, "grad_norm": 1.0413150691289215, "learning_rate": 9.829683698296839e-06, "loss": 0.6794, "step": 2020 }, { "epoch": 0.05912322447847477, "grad_norm": 1.000245410938656, "learning_rate": 9.854014598540148e-06, "loss": 0.6444, "step": 2025 }, { "epoch": 0.05926920774879199, "grad_norm": 1.0445104931431857, "learning_rate": 9.878345498783455e-06, "loss": 0.6487, "step": 2030 }, { "epoch": 0.05941519101910921, "grad_norm": 1.094683719378844, "learning_rate": 9.902676399026764e-06, "loss": 0.6606, "step": 2035 }, { "epoch": 0.05956117428942643, "grad_norm": 1.124470835876263, "learning_rate": 9.927007299270073e-06, "loss": 0.6635, "step": 2040 }, { "epoch": 0.05970715755974365, "grad_norm": 1.137566581718097, "learning_rate": 9.951338199513383e-06, "loss": 0.6443, "step": 2045 }, { "epoch": 0.05985314083006087, "grad_norm": 1.0872723224538003, "learning_rate": 9.975669099756692e-06, "loss": 0.6466, "step": 2050 }, { "epoch": 0.059999124100378096, "grad_norm": 1.1594160234482798, "learning_rate": 1e-05, "loss": 0.6514, "step": 2055 }, { "epoch": 0.06014510737069532, "grad_norm": 1.0102912080732591, "learning_rate": 1.0024330900243308e-05, "loss": 0.6414, "step": 2060 }, { "epoch": 0.06029109064101254, "grad_norm": 1.0984929969292085, "learning_rate": 1.0048661800486617e-05, "loss": 0.6676, "step": 2065 }, { "epoch": 0.06043707391132976, "grad_norm": 1.098688195739189, "learning_rate": 1.0072992700729927e-05, "loss": 0.6208, "step": 2070 }, { "epoch": 0.060583057181646985, "grad_norm": 0.9967878613347284, "learning_rate": 1.0097323600973237e-05, "loss": 0.6638, "step": 2075 }, { "epoch": 0.06072904045196421, "grad_norm": 1.0090826759435656, "learning_rate": 1.0121654501216547e-05, "loss": 0.6358, "step": 2080 }, { "epoch": 0.06087502372228143, "grad_norm": 1.0071201653031916, "learning_rate": 1.0145985401459856e-05, "loss": 0.6322, "step": 2085 }, { "epoch": 0.061021006992598645, "grad_norm": 0.982564945642022, "learning_rate": 1.0170316301703163e-05, "loss": 0.6552, "step": 2090 }, { "epoch": 0.06116699026291587, "grad_norm": 1.1045533055674064, "learning_rate": 1.0194647201946472e-05, "loss": 0.6413, "step": 2095 }, { "epoch": 0.06131297353323309, "grad_norm": 1.0163271065144983, "learning_rate": 1.0218978102189781e-05, "loss": 0.6723, "step": 2100 }, { "epoch": 0.06145895680355031, "grad_norm": 1.1475659791139992, "learning_rate": 1.024330900243309e-05, "loss": 0.6695, "step": 2105 }, { "epoch": 0.061604940073867534, "grad_norm": 1.1045266861083372, "learning_rate": 1.02676399026764e-05, "loss": 0.6561, "step": 2110 }, { "epoch": 0.061750923344184756, "grad_norm": 0.9936661618147944, "learning_rate": 1.0291970802919707e-05, "loss": 0.6951, "step": 2115 }, { "epoch": 0.06189690661450198, "grad_norm": 1.0649374147685278, "learning_rate": 1.0316301703163016e-05, "loss": 0.6432, "step": 2120 }, { "epoch": 0.0620428898848192, "grad_norm": 1.0496361382316572, "learning_rate": 1.0340632603406327e-05, "loss": 0.6581, "step": 2125 }, { "epoch": 0.06218887315513642, "grad_norm": 1.0523483055710823, "learning_rate": 1.0364963503649636e-05, "loss": 0.6676, "step": 2130 }, { "epoch": 0.062334856425453646, "grad_norm": 1.04062459948657, "learning_rate": 1.0389294403892945e-05, "loss": 0.6101, "step": 2135 }, { "epoch": 0.06248083969577087, "grad_norm": 1.026249687207735, "learning_rate": 1.0413625304136255e-05, "loss": 0.6621, "step": 2140 }, { "epoch": 0.06262682296608808, "grad_norm": 1.1527505360847816, "learning_rate": 1.0437956204379562e-05, "loss": 0.7267, "step": 2145 }, { "epoch": 0.06277280623640531, "grad_norm": 1.1264121335113781, "learning_rate": 1.0462287104622871e-05, "loss": 0.6726, "step": 2150 }, { "epoch": 0.06291878950672253, "grad_norm": 0.9967284587329811, "learning_rate": 1.048661800486618e-05, "loss": 0.6528, "step": 2155 }, { "epoch": 0.06306477277703976, "grad_norm": 1.0288724832614848, "learning_rate": 1.051094890510949e-05, "loss": 0.658, "step": 2160 }, { "epoch": 0.06321075604735697, "grad_norm": 1.0087673530310133, "learning_rate": 1.0535279805352799e-05, "loss": 0.6334, "step": 2165 }, { "epoch": 0.06335673931767419, "grad_norm": 0.9737537675020499, "learning_rate": 1.0559610705596106e-05, "loss": 0.6812, "step": 2170 }, { "epoch": 0.06350272258799142, "grad_norm": 1.1855614379201536, "learning_rate": 1.0583941605839417e-05, "loss": 0.6761, "step": 2175 }, { "epoch": 0.06364870585830863, "grad_norm": 1.1220384937142855, "learning_rate": 1.0608272506082726e-05, "loss": 0.6793, "step": 2180 }, { "epoch": 0.06379468912862586, "grad_norm": 1.0578065733251834, "learning_rate": 1.0632603406326035e-05, "loss": 0.652, "step": 2185 }, { "epoch": 0.06394067239894308, "grad_norm": 0.9973680573321747, "learning_rate": 1.0656934306569344e-05, "loss": 0.709, "step": 2190 }, { "epoch": 0.0640866556692603, "grad_norm": 1.0647065542939091, "learning_rate": 1.0681265206812653e-05, "loss": 0.6894, "step": 2195 }, { "epoch": 0.06423263893957752, "grad_norm": 1.0052961787304342, "learning_rate": 1.0705596107055961e-05, "loss": 0.6499, "step": 2200 }, { "epoch": 0.06437862220989475, "grad_norm": 0.9773203237983517, "learning_rate": 1.072992700729927e-05, "loss": 0.6527, "step": 2205 }, { "epoch": 0.06452460548021197, "grad_norm": 1.026527031281549, "learning_rate": 1.075425790754258e-05, "loss": 0.6415, "step": 2210 }, { "epoch": 0.0646705887505292, "grad_norm": 1.049599463624097, "learning_rate": 1.0778588807785888e-05, "loss": 0.6644, "step": 2215 }, { "epoch": 0.06481657202084641, "grad_norm": 1.0581570602818287, "learning_rate": 1.0802919708029198e-05, "loss": 0.6022, "step": 2220 }, { "epoch": 0.06496255529116363, "grad_norm": 1.0521306735134344, "learning_rate": 1.0827250608272507e-05, "loss": 0.651, "step": 2225 }, { "epoch": 0.06510853856148086, "grad_norm": 1.032616511876304, "learning_rate": 1.0851581508515816e-05, "loss": 0.6569, "step": 2230 }, { "epoch": 0.06525452183179807, "grad_norm": 1.0720570655316675, "learning_rate": 1.0875912408759125e-05, "loss": 0.6173, "step": 2235 }, { "epoch": 0.0654005051021153, "grad_norm": 1.0250364658910363, "learning_rate": 1.0900243309002434e-05, "loss": 0.6675, "step": 2240 }, { "epoch": 0.06554648837243252, "grad_norm": 0.9361404219761684, "learning_rate": 1.0924574209245743e-05, "loss": 0.6286, "step": 2245 }, { "epoch": 0.06569247164274974, "grad_norm": 1.0103927579521073, "learning_rate": 1.0948905109489052e-05, "loss": 0.639, "step": 2250 }, { "epoch": 0.06583845491306696, "grad_norm": 0.9365991149957937, "learning_rate": 1.097323600973236e-05, "loss": 0.6352, "step": 2255 }, { "epoch": 0.06598443818338419, "grad_norm": 1.073607429797902, "learning_rate": 1.0997566909975669e-05, "loss": 0.6446, "step": 2260 }, { "epoch": 0.0661304214537014, "grad_norm": 1.009205686384502, "learning_rate": 1.1021897810218978e-05, "loss": 0.6162, "step": 2265 }, { "epoch": 0.06627640472401863, "grad_norm": 1.0142795061671825, "learning_rate": 1.1046228710462287e-05, "loss": 0.6148, "step": 2270 }, { "epoch": 0.06642238799433585, "grad_norm": 1.0312557914157374, "learning_rate": 1.1070559610705596e-05, "loss": 0.6795, "step": 2275 }, { "epoch": 0.06656837126465306, "grad_norm": 1.0321376114000371, "learning_rate": 1.1094890510948906e-05, "loss": 0.6855, "step": 2280 }, { "epoch": 0.0667143545349703, "grad_norm": 1.00712995040054, "learning_rate": 1.1119221411192215e-05, "loss": 0.6459, "step": 2285 }, { "epoch": 0.06686033780528751, "grad_norm": 0.9815780613208677, "learning_rate": 1.1143552311435524e-05, "loss": 0.6099, "step": 2290 }, { "epoch": 0.06700632107560474, "grad_norm": 1.0984421720007602, "learning_rate": 1.1167883211678833e-05, "loss": 0.6441, "step": 2295 }, { "epoch": 0.06715230434592195, "grad_norm": 1.0863579088248585, "learning_rate": 1.1192214111922142e-05, "loss": 0.6419, "step": 2300 }, { "epoch": 0.06729828761623918, "grad_norm": 0.9725508855998113, "learning_rate": 1.1216545012165451e-05, "loss": 0.6515, "step": 2305 }, { "epoch": 0.0674442708865564, "grad_norm": 1.049760682235673, "learning_rate": 1.1240875912408759e-05, "loss": 0.6665, "step": 2310 }, { "epoch": 0.06759025415687363, "grad_norm": 0.9569664385006853, "learning_rate": 1.1265206812652068e-05, "loss": 0.6596, "step": 2315 }, { "epoch": 0.06773623742719084, "grad_norm": 1.106639192207738, "learning_rate": 1.1289537712895377e-05, "loss": 0.6923, "step": 2320 }, { "epoch": 0.06788222069750807, "grad_norm": 1.100976497624694, "learning_rate": 1.1313868613138686e-05, "loss": 0.6623, "step": 2325 }, { "epoch": 0.06802820396782529, "grad_norm": 0.9844415307513532, "learning_rate": 1.1338199513381995e-05, "loss": 0.6128, "step": 2330 }, { "epoch": 0.0681741872381425, "grad_norm": 0.96913826066382, "learning_rate": 1.1362530413625304e-05, "loss": 0.6028, "step": 2335 }, { "epoch": 0.06832017050845973, "grad_norm": 1.113166692963723, "learning_rate": 1.1386861313868614e-05, "loss": 0.6418, "step": 2340 }, { "epoch": 0.06846615377877695, "grad_norm": 1.1296715310465835, "learning_rate": 1.1411192214111923e-05, "loss": 0.6507, "step": 2345 }, { "epoch": 0.06861213704909418, "grad_norm": 1.0100492593051285, "learning_rate": 1.1435523114355232e-05, "loss": 0.6213, "step": 2350 }, { "epoch": 0.06875812031941139, "grad_norm": 0.9775621756757757, "learning_rate": 1.1459854014598541e-05, "loss": 0.6119, "step": 2355 }, { "epoch": 0.06890410358972862, "grad_norm": 1.0496866598572168, "learning_rate": 1.148418491484185e-05, "loss": 0.6846, "step": 2360 }, { "epoch": 0.06905008686004584, "grad_norm": 1.0773000511044672, "learning_rate": 1.150851581508516e-05, "loss": 0.6342, "step": 2365 }, { "epoch": 0.06919607013036307, "grad_norm": 1.1063868586848227, "learning_rate": 1.1532846715328467e-05, "loss": 0.6339, "step": 2370 }, { "epoch": 0.06934205340068028, "grad_norm": 1.065981063334108, "learning_rate": 1.1557177615571776e-05, "loss": 0.649, "step": 2375 }, { "epoch": 0.06948803667099751, "grad_norm": 1.1448708763001623, "learning_rate": 1.1581508515815085e-05, "loss": 0.5995, "step": 2380 }, { "epoch": 0.06963401994131473, "grad_norm": 1.094554659763449, "learning_rate": 1.1605839416058394e-05, "loss": 0.6747, "step": 2385 }, { "epoch": 0.06978000321163194, "grad_norm": 1.0113789967074263, "learning_rate": 1.1630170316301705e-05, "loss": 0.6435, "step": 2390 }, { "epoch": 0.06992598648194917, "grad_norm": 0.9921204442253752, "learning_rate": 1.1654501216545012e-05, "loss": 0.6255, "step": 2395 }, { "epoch": 0.07007196975226639, "grad_norm": 1.0649137891677893, "learning_rate": 1.1678832116788322e-05, "loss": 0.6266, "step": 2400 }, { "epoch": 0.07021795302258362, "grad_norm": 1.0095921667847978, "learning_rate": 1.170316301703163e-05, "loss": 0.6346, "step": 2405 }, { "epoch": 0.07036393629290083, "grad_norm": 1.1614312009956222, "learning_rate": 1.172749391727494e-05, "loss": 0.6549, "step": 2410 }, { "epoch": 0.07050991956321806, "grad_norm": 1.058951888086372, "learning_rate": 1.1751824817518249e-05, "loss": 0.6601, "step": 2415 }, { "epoch": 0.07065590283353528, "grad_norm": 0.9473968650916121, "learning_rate": 1.1776155717761558e-05, "loss": 0.6237, "step": 2420 }, { "epoch": 0.0708018861038525, "grad_norm": 1.0714725814128068, "learning_rate": 1.1800486618004866e-05, "loss": 0.6846, "step": 2425 }, { "epoch": 0.07094786937416972, "grad_norm": 1.0125351160861302, "learning_rate": 1.1824817518248175e-05, "loss": 0.6101, "step": 2430 }, { "epoch": 0.07109385264448694, "grad_norm": 0.948296852418875, "learning_rate": 1.1849148418491484e-05, "loss": 0.6308, "step": 2435 }, { "epoch": 0.07123983591480416, "grad_norm": 0.9757300849667117, "learning_rate": 1.1873479318734795e-05, "loss": 0.6364, "step": 2440 }, { "epoch": 0.07138581918512138, "grad_norm": 1.0686958156750839, "learning_rate": 1.1897810218978104e-05, "loss": 0.6757, "step": 2445 }, { "epoch": 0.07153180245543861, "grad_norm": 0.9688559804942202, "learning_rate": 1.1922141119221411e-05, "loss": 0.6176, "step": 2450 }, { "epoch": 0.07167778572575582, "grad_norm": 1.0434464054863388, "learning_rate": 1.194647201946472e-05, "loss": 0.6285, "step": 2455 }, { "epoch": 0.07182376899607305, "grad_norm": 1.0374785635198298, "learning_rate": 1.197080291970803e-05, "loss": 0.6595, "step": 2460 }, { "epoch": 0.07196975226639027, "grad_norm": 1.009149847846611, "learning_rate": 1.1995133819951339e-05, "loss": 0.6151, "step": 2465 }, { "epoch": 0.0721157355367075, "grad_norm": 0.9769851547373942, "learning_rate": 1.2019464720194648e-05, "loss": 0.6641, "step": 2470 }, { "epoch": 0.07226171880702471, "grad_norm": 1.0180141854735585, "learning_rate": 1.2043795620437957e-05, "loss": 0.6428, "step": 2475 }, { "epoch": 0.07240770207734194, "grad_norm": 1.0011013128055215, "learning_rate": 1.2068126520681265e-05, "loss": 0.627, "step": 2480 }, { "epoch": 0.07255368534765916, "grad_norm": 1.147888911343086, "learning_rate": 1.2092457420924574e-05, "loss": 0.6851, "step": 2485 }, { "epoch": 0.07269966861797637, "grad_norm": 0.980053770836525, "learning_rate": 1.2116788321167885e-05, "loss": 0.6515, "step": 2490 }, { "epoch": 0.0728456518882936, "grad_norm": 0.9601942545463479, "learning_rate": 1.2141119221411194e-05, "loss": 0.6154, "step": 2495 }, { "epoch": 0.07299163515861082, "grad_norm": 1.0446090549891138, "learning_rate": 1.2165450121654503e-05, "loss": 0.5857, "step": 2500 }, { "epoch": 0.07313761842892805, "grad_norm": 1.1492545173964284, "learning_rate": 1.2189781021897812e-05, "loss": 0.6621, "step": 2505 }, { "epoch": 0.07328360169924526, "grad_norm": 1.0539042149910622, "learning_rate": 1.221411192214112e-05, "loss": 0.6707, "step": 2510 }, { "epoch": 0.07342958496956249, "grad_norm": 1.5737681580578402, "learning_rate": 1.2238442822384429e-05, "loss": 0.6279, "step": 2515 }, { "epoch": 0.07357556823987971, "grad_norm": 1.025474761778126, "learning_rate": 1.2262773722627738e-05, "loss": 0.6483, "step": 2520 }, { "epoch": 0.07372155151019694, "grad_norm": 1.0894014720627059, "learning_rate": 1.2287104622871047e-05, "loss": 0.6743, "step": 2525 }, { "epoch": 0.07386753478051415, "grad_norm": 0.9650766806866045, "learning_rate": 1.2311435523114356e-05, "loss": 0.6706, "step": 2530 }, { "epoch": 0.07401351805083138, "grad_norm": 0.9835147989124174, "learning_rate": 1.2335766423357663e-05, "loss": 0.6058, "step": 2535 }, { "epoch": 0.0741595013211486, "grad_norm": 1.0161872355835484, "learning_rate": 1.2360097323600974e-05, "loss": 0.6585, "step": 2540 }, { "epoch": 0.07430548459146581, "grad_norm": 1.0865445111359393, "learning_rate": 1.2384428223844283e-05, "loss": 0.6604, "step": 2545 }, { "epoch": 0.07445146786178304, "grad_norm": 0.9492130243987648, "learning_rate": 1.2408759124087593e-05, "loss": 0.6703, "step": 2550 }, { "epoch": 0.07459745113210026, "grad_norm": 0.8331116684817887, "learning_rate": 1.2433090024330902e-05, "loss": 0.5891, "step": 2555 }, { "epoch": 0.07474343440241749, "grad_norm": 1.0465806243703897, "learning_rate": 1.2457420924574211e-05, "loss": 0.6546, "step": 2560 }, { "epoch": 0.0748894176727347, "grad_norm": 1.1344641524983463, "learning_rate": 1.2481751824817518e-05, "loss": 0.6334, "step": 2565 }, { "epoch": 0.07503540094305193, "grad_norm": 0.9342524968333438, "learning_rate": 1.2506082725060827e-05, "loss": 0.6212, "step": 2570 }, { "epoch": 0.07518138421336915, "grad_norm": 1.0006326744730427, "learning_rate": 1.2530413625304138e-05, "loss": 0.6268, "step": 2575 }, { "epoch": 0.07532736748368637, "grad_norm": 1.096727822781896, "learning_rate": 1.2554744525547446e-05, "loss": 0.6343, "step": 2580 }, { "epoch": 0.07547335075400359, "grad_norm": 0.9172756497953684, "learning_rate": 1.2579075425790753e-05, "loss": 0.6553, "step": 2585 }, { "epoch": 0.07561933402432082, "grad_norm": 0.9779080818698995, "learning_rate": 1.2603406326034064e-05, "loss": 0.646, "step": 2590 }, { "epoch": 0.07576531729463803, "grad_norm": 0.9311096332656557, "learning_rate": 1.2627737226277373e-05, "loss": 0.6381, "step": 2595 }, { "epoch": 0.07591130056495525, "grad_norm": 0.9924795209995113, "learning_rate": 1.2652068126520682e-05, "loss": 0.6264, "step": 2600 }, { "epoch": 0.07605728383527248, "grad_norm": 1.0724450898989515, "learning_rate": 1.2676399026763991e-05, "loss": 0.6836, "step": 2605 }, { "epoch": 0.0762032671055897, "grad_norm": 0.9786942780560326, "learning_rate": 1.2700729927007299e-05, "loss": 0.6307, "step": 2610 }, { "epoch": 0.07634925037590692, "grad_norm": 1.0876266094214353, "learning_rate": 1.272506082725061e-05, "loss": 0.6412, "step": 2615 }, { "epoch": 0.07649523364622414, "grad_norm": 1.0807098067832979, "learning_rate": 1.2749391727493917e-05, "loss": 0.679, "step": 2620 }, { "epoch": 0.07664121691654137, "grad_norm": 0.960305725055445, "learning_rate": 1.2773722627737228e-05, "loss": 0.6614, "step": 2625 }, { "epoch": 0.07678720018685858, "grad_norm": 1.0665230723147958, "learning_rate": 1.2798053527980536e-05, "loss": 0.6753, "step": 2630 }, { "epoch": 0.07693318345717581, "grad_norm": 0.9667120170953246, "learning_rate": 1.2822384428223843e-05, "loss": 0.6231, "step": 2635 }, { "epoch": 0.07707916672749303, "grad_norm": 1.1840566086155655, "learning_rate": 1.2846715328467154e-05, "loss": 0.6955, "step": 2640 }, { "epoch": 0.07722514999781024, "grad_norm": 1.0824773982776075, "learning_rate": 1.2871046228710463e-05, "loss": 0.6499, "step": 2645 }, { "epoch": 0.07737113326812747, "grad_norm": 1.0030453236913028, "learning_rate": 1.2895377128953772e-05, "loss": 0.6719, "step": 2650 }, { "epoch": 0.07751711653844469, "grad_norm": 0.9361689217692117, "learning_rate": 1.2919708029197081e-05, "loss": 0.6451, "step": 2655 }, { "epoch": 0.07766309980876192, "grad_norm": 0.953948359631142, "learning_rate": 1.2944038929440389e-05, "loss": 0.6345, "step": 2660 }, { "epoch": 0.07780908307907913, "grad_norm": 1.0219660110361792, "learning_rate": 1.29683698296837e-05, "loss": 0.6568, "step": 2665 }, { "epoch": 0.07795506634939636, "grad_norm": 1.0550975124426707, "learning_rate": 1.2992700729927007e-05, "loss": 0.6429, "step": 2670 }, { "epoch": 0.07810104961971358, "grad_norm": 1.087507091131986, "learning_rate": 1.3017031630170318e-05, "loss": 0.6346, "step": 2675 }, { "epoch": 0.07824703289003081, "grad_norm": 1.0636631631531903, "learning_rate": 1.3041362530413625e-05, "loss": 0.6295, "step": 2680 }, { "epoch": 0.07839301616034802, "grad_norm": 1.000849664082566, "learning_rate": 1.3065693430656936e-05, "loss": 0.657, "step": 2685 }, { "epoch": 0.07853899943066525, "grad_norm": 1.0723331920375239, "learning_rate": 1.3090024330900244e-05, "loss": 0.6936, "step": 2690 }, { "epoch": 0.07868498270098247, "grad_norm": 0.988186458899287, "learning_rate": 1.3114355231143553e-05, "loss": 0.6513, "step": 2695 }, { "epoch": 0.07883096597129968, "grad_norm": 0.9394467345425972, "learning_rate": 1.3138686131386862e-05, "loss": 0.6266, "step": 2700 }, { "epoch": 0.07897694924161691, "grad_norm": 0.9585337843922896, "learning_rate": 1.3163017031630171e-05, "loss": 0.6347, "step": 2705 }, { "epoch": 0.07912293251193413, "grad_norm": 0.8815530405708681, "learning_rate": 1.3187347931873482e-05, "loss": 0.5831, "step": 2710 }, { "epoch": 0.07926891578225136, "grad_norm": 0.9876267475886173, "learning_rate": 1.321167883211679e-05, "loss": 0.6476, "step": 2715 }, { "epoch": 0.07941489905256857, "grad_norm": 1.1079718622543004, "learning_rate": 1.3236009732360097e-05, "loss": 0.6703, "step": 2720 }, { "epoch": 0.0795608823228858, "grad_norm": 1.0928141982699704, "learning_rate": 1.3260340632603408e-05, "loss": 0.6687, "step": 2725 }, { "epoch": 0.07970686559320302, "grad_norm": 1.1723950338215956, "learning_rate": 1.3284671532846715e-05, "loss": 0.6989, "step": 2730 }, { "epoch": 0.07985284886352025, "grad_norm": 0.985446527077055, "learning_rate": 1.3309002433090026e-05, "loss": 0.586, "step": 2735 }, { "epoch": 0.07999883213383746, "grad_norm": 1.0828495932979694, "learning_rate": 1.3333333333333333e-05, "loss": 0.6699, "step": 2740 }, { "epoch": 0.08014481540415469, "grad_norm": 1.0599079298419112, "learning_rate": 1.3357664233576642e-05, "loss": 0.6712, "step": 2745 }, { "epoch": 0.0802907986744719, "grad_norm": 1.0610560895792012, "learning_rate": 1.3381995133819952e-05, "loss": 0.6504, "step": 2750 }, { "epoch": 0.08043678194478912, "grad_norm": 0.985442989053913, "learning_rate": 1.340632603406326e-05, "loss": 0.6312, "step": 2755 }, { "epoch": 0.08058276521510635, "grad_norm": 1.0268877813914308, "learning_rate": 1.3430656934306572e-05, "loss": 0.6443, "step": 2760 }, { "epoch": 0.08072874848542357, "grad_norm": 0.9784423997421926, "learning_rate": 1.3454987834549879e-05, "loss": 0.6264, "step": 2765 }, { "epoch": 0.0808747317557408, "grad_norm": 1.0039916201722225, "learning_rate": 1.347931873479319e-05, "loss": 0.628, "step": 2770 }, { "epoch": 0.08102071502605801, "grad_norm": 1.1613122335895238, "learning_rate": 1.3503649635036497e-05, "loss": 0.6641, "step": 2775 }, { "epoch": 0.08116669829637524, "grad_norm": 0.9884859799322899, "learning_rate": 1.3527980535279805e-05, "loss": 0.6751, "step": 2780 }, { "epoch": 0.08131268156669245, "grad_norm": 1.0095293334763118, "learning_rate": 1.3552311435523116e-05, "loss": 0.643, "step": 2785 }, { "epoch": 0.08145866483700968, "grad_norm": 1.0376578212374137, "learning_rate": 1.3576642335766423e-05, "loss": 0.6568, "step": 2790 }, { "epoch": 0.0816046481073269, "grad_norm": 0.9476717540520615, "learning_rate": 1.3600973236009734e-05, "loss": 0.6288, "step": 2795 }, { "epoch": 0.08175063137764413, "grad_norm": 0.9665136080695061, "learning_rate": 1.3625304136253041e-05, "loss": 0.6442, "step": 2800 }, { "epoch": 0.08189661464796134, "grad_norm": 0.9846513844517989, "learning_rate": 1.364963503649635e-05, "loss": 0.6215, "step": 2805 }, { "epoch": 0.08204259791827856, "grad_norm": 1.025707878069238, "learning_rate": 1.3673965936739661e-05, "loss": 0.6412, "step": 2810 }, { "epoch": 0.08218858118859579, "grad_norm": 2.2088324817870104, "learning_rate": 1.3698296836982969e-05, "loss": 0.6425, "step": 2815 }, { "epoch": 0.082334564458913, "grad_norm": 1.01415012856855, "learning_rate": 1.372262773722628e-05, "loss": 0.6286, "step": 2820 }, { "epoch": 0.08248054772923023, "grad_norm": 0.9209091146417786, "learning_rate": 1.3746958637469587e-05, "loss": 0.6037, "step": 2825 }, { "epoch": 0.08262653099954745, "grad_norm": 0.9260736614050992, "learning_rate": 1.3771289537712895e-05, "loss": 0.648, "step": 2830 }, { "epoch": 0.08277251426986468, "grad_norm": 1.087398722975517, "learning_rate": 1.3795620437956205e-05, "loss": 0.6419, "step": 2835 }, { "epoch": 0.08291849754018189, "grad_norm": 0.8914116136685821, "learning_rate": 1.3819951338199513e-05, "loss": 0.6071, "step": 2840 }, { "epoch": 0.08306448081049912, "grad_norm": 0.9981890388648262, "learning_rate": 1.3844282238442824e-05, "loss": 0.6229, "step": 2845 }, { "epoch": 0.08321046408081634, "grad_norm": 1.039770857012158, "learning_rate": 1.3868613138686131e-05, "loss": 0.6567, "step": 2850 }, { "epoch": 0.08335644735113357, "grad_norm": 0.9367360214553977, "learning_rate": 1.3892944038929442e-05, "loss": 0.6408, "step": 2855 }, { "epoch": 0.08350243062145078, "grad_norm": 1.0024059354184711, "learning_rate": 1.3917274939172751e-05, "loss": 0.6542, "step": 2860 }, { "epoch": 0.083648413891768, "grad_norm": 0.9858654126217539, "learning_rate": 1.3941605839416059e-05, "loss": 0.5922, "step": 2865 }, { "epoch": 0.08379439716208523, "grad_norm": 0.9789878788250154, "learning_rate": 1.396593673965937e-05, "loss": 0.6312, "step": 2870 }, { "epoch": 0.08394038043240244, "grad_norm": 0.9700436969527052, "learning_rate": 1.3990267639902677e-05, "loss": 0.6223, "step": 2875 }, { "epoch": 0.08408636370271967, "grad_norm": 0.9256984676966532, "learning_rate": 1.4014598540145988e-05, "loss": 0.6097, "step": 2880 }, { "epoch": 0.08423234697303689, "grad_norm": 1.0379551818208894, "learning_rate": 1.4038929440389295e-05, "loss": 0.6307, "step": 2885 }, { "epoch": 0.08437833024335412, "grad_norm": 1.024407455181849, "learning_rate": 1.4063260340632603e-05, "loss": 0.6689, "step": 2890 }, { "epoch": 0.08452431351367133, "grad_norm": 0.8757291156782419, "learning_rate": 1.4087591240875913e-05, "loss": 0.6091, "step": 2895 }, { "epoch": 0.08467029678398856, "grad_norm": 1.0482709565992343, "learning_rate": 1.411192214111922e-05, "loss": 0.6499, "step": 2900 }, { "epoch": 0.08481628005430578, "grad_norm": 0.9854878257233862, "learning_rate": 1.4136253041362532e-05, "loss": 0.6486, "step": 2905 }, { "epoch": 0.08496226332462299, "grad_norm": 0.9884206353572355, "learning_rate": 1.416058394160584e-05, "loss": 0.6733, "step": 2910 }, { "epoch": 0.08510824659494022, "grad_norm": 0.9027173962721812, "learning_rate": 1.4184914841849148e-05, "loss": 0.6261, "step": 2915 }, { "epoch": 0.08525422986525744, "grad_norm": 0.9491834892996827, "learning_rate": 1.4209245742092459e-05, "loss": 0.5718, "step": 2920 }, { "epoch": 0.08540021313557467, "grad_norm": 0.9837474121263516, "learning_rate": 1.4233576642335767e-05, "loss": 0.6026, "step": 2925 }, { "epoch": 0.08554619640589188, "grad_norm": 0.9524699541520707, "learning_rate": 1.4257907542579077e-05, "loss": 0.6468, "step": 2930 }, { "epoch": 0.08569217967620911, "grad_norm": 0.9479654222056534, "learning_rate": 1.4282238442822385e-05, "loss": 0.6406, "step": 2935 }, { "epoch": 0.08583816294652633, "grad_norm": 0.8744162996993567, "learning_rate": 1.4306569343065692e-05, "loss": 0.5935, "step": 2940 }, { "epoch": 0.08598414621684355, "grad_norm": 0.8982789337931302, "learning_rate": 1.4330900243309003e-05, "loss": 0.6105, "step": 2945 }, { "epoch": 0.08613012948716077, "grad_norm": 0.980447871766528, "learning_rate": 1.435523114355231e-05, "loss": 0.6619, "step": 2950 }, { "epoch": 0.086276112757478, "grad_norm": 0.9706881751556573, "learning_rate": 1.4379562043795621e-05, "loss": 0.6035, "step": 2955 }, { "epoch": 0.08642209602779521, "grad_norm": 1.047034413533566, "learning_rate": 1.440389294403893e-05, "loss": 0.6851, "step": 2960 }, { "epoch": 0.08656807929811243, "grad_norm": 0.9199947145874801, "learning_rate": 1.442822384428224e-05, "loss": 0.6359, "step": 2965 }, { "epoch": 0.08671406256842966, "grad_norm": 1.026145958975653, "learning_rate": 1.4452554744525549e-05, "loss": 0.6054, "step": 2970 }, { "epoch": 0.08686004583874687, "grad_norm": 0.9860851871636626, "learning_rate": 1.4476885644768856e-05, "loss": 0.6322, "step": 2975 }, { "epoch": 0.0870060291090641, "grad_norm": 0.9504350140885961, "learning_rate": 1.4501216545012167e-05, "loss": 0.6456, "step": 2980 }, { "epoch": 0.08715201237938132, "grad_norm": 0.940392980978771, "learning_rate": 1.4525547445255475e-05, "loss": 0.6304, "step": 2985 }, { "epoch": 0.08729799564969855, "grad_norm": 0.930702318433142, "learning_rate": 1.4549878345498785e-05, "loss": 0.5983, "step": 2990 }, { "epoch": 0.08744397892001576, "grad_norm": 1.070486744916805, "learning_rate": 1.4574209245742093e-05, "loss": 0.6415, "step": 2995 }, { "epoch": 0.08758996219033299, "grad_norm": 0.9443359191021974, "learning_rate": 1.45985401459854e-05, "loss": 0.6685, "step": 3000 }, { "epoch": 0.08773594546065021, "grad_norm": 1.0212944689362435, "learning_rate": 1.4622871046228711e-05, "loss": 0.6714, "step": 3005 }, { "epoch": 0.08788192873096744, "grad_norm": 0.9196317908466266, "learning_rate": 1.464720194647202e-05, "loss": 0.6347, "step": 3010 }, { "epoch": 0.08802791200128465, "grad_norm": 1.0413729955455537, "learning_rate": 1.467153284671533e-05, "loss": 0.6719, "step": 3015 }, { "epoch": 0.08817389527160187, "grad_norm": 0.9837308522540283, "learning_rate": 1.4695863746958639e-05, "loss": 0.6368, "step": 3020 }, { "epoch": 0.0883198785419191, "grad_norm": 0.9394346021006221, "learning_rate": 1.4720194647201946e-05, "loss": 0.6294, "step": 3025 }, { "epoch": 0.08846586181223631, "grad_norm": 1.0896163046799323, "learning_rate": 1.4744525547445257e-05, "loss": 0.6607, "step": 3030 }, { "epoch": 0.08861184508255354, "grad_norm": 1.0115667056859434, "learning_rate": 1.4768856447688564e-05, "loss": 0.6341, "step": 3035 }, { "epoch": 0.08875782835287076, "grad_norm": 1.0438201454062972, "learning_rate": 1.4793187347931875e-05, "loss": 0.6378, "step": 3040 }, { "epoch": 0.08890381162318799, "grad_norm": 0.9609754765776692, "learning_rate": 1.4817518248175183e-05, "loss": 0.676, "step": 3045 }, { "epoch": 0.0890497948935052, "grad_norm": 1.1175939396873704, "learning_rate": 1.4841849148418493e-05, "loss": 0.6495, "step": 3050 }, { "epoch": 0.08919577816382243, "grad_norm": 1.0632624876269263, "learning_rate": 1.4866180048661801e-05, "loss": 0.6391, "step": 3055 }, { "epoch": 0.08934176143413965, "grad_norm": 1.018063982276338, "learning_rate": 1.4890510948905108e-05, "loss": 0.6268, "step": 3060 }, { "epoch": 0.08948774470445688, "grad_norm": 1.041956020451929, "learning_rate": 1.491484184914842e-05, "loss": 0.6346, "step": 3065 }, { "epoch": 0.08963372797477409, "grad_norm": 1.0340124727671234, "learning_rate": 1.4939172749391728e-05, "loss": 0.6107, "step": 3070 }, { "epoch": 0.0897797112450913, "grad_norm": 0.9570602923798521, "learning_rate": 1.496350364963504e-05, "loss": 0.6265, "step": 3075 }, { "epoch": 0.08992569451540854, "grad_norm": 0.9926528945293053, "learning_rate": 1.4987834549878347e-05, "loss": 0.6722, "step": 3080 }, { "epoch": 0.09007167778572575, "grad_norm": 1.241104730146903, "learning_rate": 1.5012165450121654e-05, "loss": 0.685, "step": 3085 }, { "epoch": 0.09021766105604298, "grad_norm": 0.982217213012648, "learning_rate": 1.5036496350364965e-05, "loss": 0.6124, "step": 3090 }, { "epoch": 0.0903636443263602, "grad_norm": 1.0070577181993419, "learning_rate": 1.5060827250608272e-05, "loss": 0.6296, "step": 3095 }, { "epoch": 0.09050962759667742, "grad_norm": 1.0565244878186, "learning_rate": 1.5085158150851583e-05, "loss": 0.6517, "step": 3100 }, { "epoch": 0.09065561086699464, "grad_norm": 1.0002705101163332, "learning_rate": 1.510948905109489e-05, "loss": 0.6269, "step": 3105 }, { "epoch": 0.09080159413731187, "grad_norm": 0.9213940378260086, "learning_rate": 1.5133819951338198e-05, "loss": 0.603, "step": 3110 }, { "epoch": 0.09094757740762908, "grad_norm": 0.9961378144227675, "learning_rate": 1.5158150851581509e-05, "loss": 0.6445, "step": 3115 }, { "epoch": 0.09109356067794631, "grad_norm": 0.9332368350747055, "learning_rate": 1.5182481751824818e-05, "loss": 0.6679, "step": 3120 }, { "epoch": 0.09123954394826353, "grad_norm": 0.9604637530313338, "learning_rate": 1.5206812652068127e-05, "loss": 0.6685, "step": 3125 }, { "epoch": 0.09138552721858074, "grad_norm": 1.0297713246599174, "learning_rate": 1.5231143552311436e-05, "loss": 0.6162, "step": 3130 }, { "epoch": 0.09153151048889797, "grad_norm": 0.948570189495746, "learning_rate": 1.5255474452554747e-05, "loss": 0.6515, "step": 3135 }, { "epoch": 0.09167749375921519, "grad_norm": 0.9725596836996608, "learning_rate": 1.5279805352798055e-05, "loss": 0.6495, "step": 3140 }, { "epoch": 0.09182347702953242, "grad_norm": 1.0839034378126473, "learning_rate": 1.5304136253041362e-05, "loss": 0.6429, "step": 3145 }, { "epoch": 0.09196946029984963, "grad_norm": 0.947793583053614, "learning_rate": 1.5328467153284673e-05, "loss": 0.6668, "step": 3150 }, { "epoch": 0.09211544357016686, "grad_norm": 0.9741482064128704, "learning_rate": 1.535279805352798e-05, "loss": 0.6263, "step": 3155 }, { "epoch": 0.09226142684048408, "grad_norm": 0.9370764952240859, "learning_rate": 1.537712895377129e-05, "loss": 0.6621, "step": 3160 }, { "epoch": 0.09240741011080131, "grad_norm": 0.9833696935086282, "learning_rate": 1.54014598540146e-05, "loss": 0.6294, "step": 3165 }, { "epoch": 0.09255339338111852, "grad_norm": 0.9352224982826948, "learning_rate": 1.5425790754257906e-05, "loss": 0.6453, "step": 3170 }, { "epoch": 0.09269937665143574, "grad_norm": 1.017603573251686, "learning_rate": 1.5450121654501217e-05, "loss": 0.6345, "step": 3175 }, { "epoch": 0.09284535992175297, "grad_norm": 0.9382854967016984, "learning_rate": 1.5474452554744524e-05, "loss": 0.6306, "step": 3180 }, { "epoch": 0.09299134319207018, "grad_norm": 0.8682588850728733, "learning_rate": 1.5498783454987835e-05, "loss": 0.6241, "step": 3185 }, { "epoch": 0.09313732646238741, "grad_norm": 0.9800390697114262, "learning_rate": 1.5523114355231143e-05, "loss": 0.6757, "step": 3190 }, { "epoch": 0.09328330973270463, "grad_norm": 1.1258703214568762, "learning_rate": 1.5547445255474454e-05, "loss": 0.6742, "step": 3195 }, { "epoch": 0.09342929300302186, "grad_norm": 0.9510914649899436, "learning_rate": 1.557177615571776e-05, "loss": 0.6187, "step": 3200 }, { "epoch": 0.09357527627333907, "grad_norm": 0.9717572995785265, "learning_rate": 1.5596107055961072e-05, "loss": 0.6303, "step": 3205 }, { "epoch": 0.0937212595436563, "grad_norm": 1.0691119058055012, "learning_rate": 1.5620437956204383e-05, "loss": 0.632, "step": 3210 }, { "epoch": 0.09386724281397352, "grad_norm": 0.9815291925381027, "learning_rate": 1.564476885644769e-05, "loss": 0.6091, "step": 3215 }, { "epoch": 0.09401322608429075, "grad_norm": 0.9667007364841954, "learning_rate": 1.5669099756690998e-05, "loss": 0.6581, "step": 3220 }, { "epoch": 0.09415920935460796, "grad_norm": 0.9416355473029303, "learning_rate": 1.569343065693431e-05, "loss": 0.6247, "step": 3225 }, { "epoch": 0.09430519262492518, "grad_norm": 1.0524891717449327, "learning_rate": 1.5717761557177616e-05, "loss": 0.6435, "step": 3230 }, { "epoch": 0.0944511758952424, "grad_norm": 0.944755703348167, "learning_rate": 1.5742092457420927e-05, "loss": 0.6649, "step": 3235 }, { "epoch": 0.09459715916555962, "grad_norm": 1.0571290915668252, "learning_rate": 1.5766423357664234e-05, "loss": 0.6401, "step": 3240 }, { "epoch": 0.09474314243587685, "grad_norm": 0.9460159097913406, "learning_rate": 1.5790754257907545e-05, "loss": 0.6207, "step": 3245 }, { "epoch": 0.09488912570619407, "grad_norm": 0.8930330115913967, "learning_rate": 1.5815085158150852e-05, "loss": 0.6032, "step": 3250 }, { "epoch": 0.0950351089765113, "grad_norm": 1.0263109081034396, "learning_rate": 1.583941605839416e-05, "loss": 0.6411, "step": 3255 }, { "epoch": 0.09518109224682851, "grad_norm": 0.9000359631286017, "learning_rate": 1.586374695863747e-05, "loss": 0.6626, "step": 3260 }, { "epoch": 0.09532707551714574, "grad_norm": 1.10180487572192, "learning_rate": 1.5888077858880778e-05, "loss": 0.6367, "step": 3265 }, { "epoch": 0.09547305878746296, "grad_norm": 0.9011509825336991, "learning_rate": 1.591240875912409e-05, "loss": 0.6121, "step": 3270 }, { "epoch": 0.09561904205778018, "grad_norm": 0.9369233304597034, "learning_rate": 1.5936739659367397e-05, "loss": 0.6314, "step": 3275 }, { "epoch": 0.0957650253280974, "grad_norm": 0.8664182188333204, "learning_rate": 1.5961070559610704e-05, "loss": 0.6224, "step": 3280 }, { "epoch": 0.09591100859841462, "grad_norm": 0.9243260421315137, "learning_rate": 1.5985401459854015e-05, "loss": 0.6134, "step": 3285 }, { "epoch": 0.09605699186873184, "grad_norm": 1.0278573888495475, "learning_rate": 1.6009732360097322e-05, "loss": 0.6391, "step": 3290 }, { "epoch": 0.09620297513904906, "grad_norm": 0.8407091057042889, "learning_rate": 1.6034063260340633e-05, "loss": 0.5752, "step": 3295 }, { "epoch": 0.09634895840936629, "grad_norm": 0.8828484152837366, "learning_rate": 1.605839416058394e-05, "loss": 0.6141, "step": 3300 }, { "epoch": 0.0964949416796835, "grad_norm": 0.9603339501169939, "learning_rate": 1.608272506082725e-05, "loss": 0.6591, "step": 3305 }, { "epoch": 0.09664092495000073, "grad_norm": 0.9022669858518785, "learning_rate": 1.6107055961070562e-05, "loss": 0.6414, "step": 3310 }, { "epoch": 0.09678690822031795, "grad_norm": 0.971266396462605, "learning_rate": 1.613138686131387e-05, "loss": 0.6396, "step": 3315 }, { "epoch": 0.09693289149063518, "grad_norm": 0.9817380474037248, "learning_rate": 1.615571776155718e-05, "loss": 0.6604, "step": 3320 }, { "epoch": 0.0970788747609524, "grad_norm": 0.9648243470289849, "learning_rate": 1.6180048661800488e-05, "loss": 0.6524, "step": 3325 }, { "epoch": 0.09722485803126962, "grad_norm": 0.9364248198826892, "learning_rate": 1.62043795620438e-05, "loss": 0.6517, "step": 3330 }, { "epoch": 0.09737084130158684, "grad_norm": 1.0309054692910615, "learning_rate": 1.6228710462287106e-05, "loss": 0.6055, "step": 3335 }, { "epoch": 0.09751682457190405, "grad_norm": 0.9220816835217326, "learning_rate": 1.6253041362530414e-05, "loss": 0.5725, "step": 3340 }, { "epoch": 0.09766280784222128, "grad_norm": 1.050475039412863, "learning_rate": 1.6277372262773725e-05, "loss": 0.6543, "step": 3345 }, { "epoch": 0.0978087911125385, "grad_norm": 0.9202004160881526, "learning_rate": 1.6301703163017032e-05, "loss": 0.6192, "step": 3350 }, { "epoch": 0.09795477438285573, "grad_norm": 0.9943592137058166, "learning_rate": 1.6326034063260343e-05, "loss": 0.6589, "step": 3355 }, { "epoch": 0.09810075765317294, "grad_norm": 0.9191257196253058, "learning_rate": 1.635036496350365e-05, "loss": 0.6523, "step": 3360 }, { "epoch": 0.09824674092349017, "grad_norm": 0.9882517623326437, "learning_rate": 1.6374695863746958e-05, "loss": 0.6436, "step": 3365 }, { "epoch": 0.09839272419380739, "grad_norm": 0.9586144695829365, "learning_rate": 1.639902676399027e-05, "loss": 0.6557, "step": 3370 }, { "epoch": 0.09853870746412462, "grad_norm": 0.8833072772448087, "learning_rate": 1.6423357664233576e-05, "loss": 0.6428, "step": 3375 }, { "epoch": 0.09868469073444183, "grad_norm": 0.8887942644027853, "learning_rate": 1.6447688564476887e-05, "loss": 0.6268, "step": 3380 }, { "epoch": 0.09883067400475906, "grad_norm": 0.9889170782743705, "learning_rate": 1.6472019464720194e-05, "loss": 0.64, "step": 3385 }, { "epoch": 0.09897665727507628, "grad_norm": 0.9853321740540486, "learning_rate": 1.6496350364963502e-05, "loss": 0.6234, "step": 3390 }, { "epoch": 0.09912264054539349, "grad_norm": 0.9279495594019942, "learning_rate": 1.6520681265206813e-05, "loss": 0.6569, "step": 3395 }, { "epoch": 0.09926862381571072, "grad_norm": 0.9646402738216642, "learning_rate": 1.654501216545012e-05, "loss": 0.6294, "step": 3400 }, { "epoch": 0.09941460708602794, "grad_norm": 0.8850316167296319, "learning_rate": 1.656934306569343e-05, "loss": 0.6269, "step": 3405 }, { "epoch": 0.09956059035634517, "grad_norm": 1.0057490158587108, "learning_rate": 1.6593673965936742e-05, "loss": 0.6402, "step": 3410 }, { "epoch": 0.09970657362666238, "grad_norm": 0.8855518318061717, "learning_rate": 1.661800486618005e-05, "loss": 0.6195, "step": 3415 }, { "epoch": 0.09985255689697961, "grad_norm": 0.9734139907762563, "learning_rate": 1.664233576642336e-05, "loss": 0.6449, "step": 3420 }, { "epoch": 0.09999854016729683, "grad_norm": 0.8843086772095267, "learning_rate": 1.6666666666666667e-05, "loss": 0.6294, "step": 3425 }, { "epoch": 0.10014452343761406, "grad_norm": 1.0220153918493968, "learning_rate": 1.6690997566909978e-05, "loss": 0.6181, "step": 3430 }, { "epoch": 0.10029050670793127, "grad_norm": 0.9129937250373339, "learning_rate": 1.6715328467153286e-05, "loss": 0.6221, "step": 3435 }, { "epoch": 0.10043648997824849, "grad_norm": 0.9567072192082745, "learning_rate": 1.6739659367396597e-05, "loss": 0.6388, "step": 3440 }, { "epoch": 0.10058247324856572, "grad_norm": 0.9104302378016137, "learning_rate": 1.6763990267639904e-05, "loss": 0.636, "step": 3445 }, { "epoch": 0.10072845651888293, "grad_norm": 0.8916015284161499, "learning_rate": 1.678832116788321e-05, "loss": 0.6326, "step": 3450 }, { "epoch": 0.10087443978920016, "grad_norm": 0.9468506892921814, "learning_rate": 1.6812652068126522e-05, "loss": 0.6259, "step": 3455 }, { "epoch": 0.10102042305951738, "grad_norm": 0.9218643891308228, "learning_rate": 1.683698296836983e-05, "loss": 0.6384, "step": 3460 }, { "epoch": 0.1011664063298346, "grad_norm": 1.0045824662425968, "learning_rate": 1.686131386861314e-05, "loss": 0.6557, "step": 3465 }, { "epoch": 0.10131238960015182, "grad_norm": 0.9765372894989729, "learning_rate": 1.6885644768856448e-05, "loss": 0.6445, "step": 3470 }, { "epoch": 0.10145837287046905, "grad_norm": 0.9502730406904643, "learning_rate": 1.6909975669099756e-05, "loss": 0.6305, "step": 3475 }, { "epoch": 0.10160435614078626, "grad_norm": 0.9273201713510885, "learning_rate": 1.6934306569343066e-05, "loss": 0.6544, "step": 3480 }, { "epoch": 0.1017503394111035, "grad_norm": 0.9369347644365921, "learning_rate": 1.6958637469586374e-05, "loss": 0.6331, "step": 3485 }, { "epoch": 0.10189632268142071, "grad_norm": 0.8921693044349193, "learning_rate": 1.6982968369829685e-05, "loss": 0.6381, "step": 3490 }, { "epoch": 0.10204230595173792, "grad_norm": 0.9528599404019067, "learning_rate": 1.7007299270072992e-05, "loss": 0.6152, "step": 3495 }, { "epoch": 0.10218828922205515, "grad_norm": 0.9744804347500702, "learning_rate": 1.70316301703163e-05, "loss": 0.618, "step": 3500 }, { "epoch": 0.10233427249237237, "grad_norm": 0.9104778570837566, "learning_rate": 1.705596107055961e-05, "loss": 0.613, "step": 3505 }, { "epoch": 0.1024802557626896, "grad_norm": 0.8779669824565652, "learning_rate": 1.708029197080292e-05, "loss": 0.6127, "step": 3510 }, { "epoch": 0.10262623903300681, "grad_norm": 0.9143659343268017, "learning_rate": 1.710462287104623e-05, "loss": 0.6167, "step": 3515 }, { "epoch": 0.10277222230332404, "grad_norm": 0.9139915970698947, "learning_rate": 1.712895377128954e-05, "loss": 0.6139, "step": 3520 }, { "epoch": 0.10291820557364126, "grad_norm": 0.9262525733115563, "learning_rate": 1.715328467153285e-05, "loss": 0.5955, "step": 3525 }, { "epoch": 0.10306418884395849, "grad_norm": 0.9422864182398167, "learning_rate": 1.7177615571776158e-05, "loss": 0.6398, "step": 3530 }, { "epoch": 0.1032101721142757, "grad_norm": 0.9265297307543063, "learning_rate": 1.7201946472019465e-05, "loss": 0.6351, "step": 3535 }, { "epoch": 0.10335615538459293, "grad_norm": 0.9569512499581062, "learning_rate": 1.7226277372262776e-05, "loss": 0.5994, "step": 3540 }, { "epoch": 0.10350213865491015, "grad_norm": 0.9342072551495006, "learning_rate": 1.7250608272506084e-05, "loss": 0.6422, "step": 3545 }, { "epoch": 0.10364812192522736, "grad_norm": 0.80250479631373, "learning_rate": 1.7274939172749394e-05, "loss": 0.6346, "step": 3550 }, { "epoch": 0.10379410519554459, "grad_norm": 0.9862862221418844, "learning_rate": 1.7299270072992702e-05, "loss": 0.6574, "step": 3555 }, { "epoch": 0.10394008846586181, "grad_norm": 0.931406585361183, "learning_rate": 1.732360097323601e-05, "loss": 0.6295, "step": 3560 }, { "epoch": 0.10408607173617904, "grad_norm": 0.9962099269227461, "learning_rate": 1.734793187347932e-05, "loss": 0.6396, "step": 3565 }, { "epoch": 0.10423205500649625, "grad_norm": 0.9046598720470926, "learning_rate": 1.7372262773722628e-05, "loss": 0.5921, "step": 3570 }, { "epoch": 0.10437803827681348, "grad_norm": 0.8813107795168904, "learning_rate": 1.739659367396594e-05, "loss": 0.5823, "step": 3575 }, { "epoch": 0.1045240215471307, "grad_norm": 0.9064525948216533, "learning_rate": 1.7420924574209246e-05, "loss": 0.5951, "step": 3580 }, { "epoch": 0.10467000481744793, "grad_norm": 0.8565505549327814, "learning_rate": 1.7445255474452553e-05, "loss": 0.6194, "step": 3585 }, { "epoch": 0.10481598808776514, "grad_norm": 0.9329702686324102, "learning_rate": 1.7469586374695864e-05, "loss": 0.6348, "step": 3590 }, { "epoch": 0.10496197135808237, "grad_norm": 0.9875807857182143, "learning_rate": 1.749391727493917e-05, "loss": 0.6177, "step": 3595 }, { "epoch": 0.10510795462839959, "grad_norm": 0.8917537699537514, "learning_rate": 1.7518248175182482e-05, "loss": 0.6428, "step": 3600 }, { "epoch": 0.1052539378987168, "grad_norm": 0.9070114032734711, "learning_rate": 1.754257907542579e-05, "loss": 0.5865, "step": 3605 }, { "epoch": 0.10539992116903403, "grad_norm": 0.9816271027612188, "learning_rate": 1.75669099756691e-05, "loss": 0.6638, "step": 3610 }, { "epoch": 0.10554590443935125, "grad_norm": 1.0195513930297653, "learning_rate": 1.7591240875912408e-05, "loss": 0.6342, "step": 3615 }, { "epoch": 0.10569188770966848, "grad_norm": 0.951290687849642, "learning_rate": 1.761557177615572e-05, "loss": 0.5961, "step": 3620 }, { "epoch": 0.10583787097998569, "grad_norm": 0.9361762074943982, "learning_rate": 1.763990267639903e-05, "loss": 0.6442, "step": 3625 }, { "epoch": 0.10598385425030292, "grad_norm": 0.8629039116588092, "learning_rate": 1.7664233576642337e-05, "loss": 0.6163, "step": 3630 }, { "epoch": 0.10612983752062013, "grad_norm": 0.9451848305439768, "learning_rate": 1.7688564476885648e-05, "loss": 0.6293, "step": 3635 }, { "epoch": 0.10627582079093736, "grad_norm": 0.8857666812150803, "learning_rate": 1.7712895377128956e-05, "loss": 0.6163, "step": 3640 }, { "epoch": 0.10642180406125458, "grad_norm": 1.0014538467357708, "learning_rate": 1.7737226277372263e-05, "loss": 0.6276, "step": 3645 }, { "epoch": 0.1065677873315718, "grad_norm": 0.9881210908644728, "learning_rate": 1.7761557177615574e-05, "loss": 0.6325, "step": 3650 }, { "epoch": 0.10671377060188902, "grad_norm": 1.028520707354522, "learning_rate": 1.778588807785888e-05, "loss": 0.6587, "step": 3655 }, { "epoch": 0.10685975387220624, "grad_norm": 0.8730254478555157, "learning_rate": 1.7810218978102192e-05, "loss": 0.6141, "step": 3660 }, { "epoch": 0.10700573714252347, "grad_norm": 0.9393291726209878, "learning_rate": 1.78345498783455e-05, "loss": 0.6172, "step": 3665 }, { "epoch": 0.10715172041284068, "grad_norm": 0.9226973356782863, "learning_rate": 1.7858880778588807e-05, "loss": 0.6258, "step": 3670 }, { "epoch": 0.10729770368315791, "grad_norm": 0.8867446305776202, "learning_rate": 1.7883211678832118e-05, "loss": 0.5881, "step": 3675 }, { "epoch": 0.10744368695347513, "grad_norm": 0.9405940983007027, "learning_rate": 1.7907542579075425e-05, "loss": 0.6338, "step": 3680 }, { "epoch": 0.10758967022379236, "grad_norm": 0.8014525410512059, "learning_rate": 1.7931873479318736e-05, "loss": 0.6017, "step": 3685 }, { "epoch": 0.10773565349410957, "grad_norm": 0.9964153850804147, "learning_rate": 1.7956204379562044e-05, "loss": 0.6118, "step": 3690 }, { "epoch": 0.1078816367644268, "grad_norm": 0.8406568438239255, "learning_rate": 1.7980535279805354e-05, "loss": 0.6025, "step": 3695 }, { "epoch": 0.10802762003474402, "grad_norm": 0.8227591058805861, "learning_rate": 1.8004866180048662e-05, "loss": 0.5993, "step": 3700 }, { "epoch": 0.10817360330506123, "grad_norm": 0.8678635975633703, "learning_rate": 1.802919708029197e-05, "loss": 0.6181, "step": 3705 }, { "epoch": 0.10831958657537846, "grad_norm": 0.9008870399494682, "learning_rate": 1.805352798053528e-05, "loss": 0.5917, "step": 3710 }, { "epoch": 0.10846556984569568, "grad_norm": 0.9502365451811936, "learning_rate": 1.8077858880778588e-05, "loss": 0.6372, "step": 3715 }, { "epoch": 0.10861155311601291, "grad_norm": 0.9656343933757582, "learning_rate": 1.81021897810219e-05, "loss": 0.6154, "step": 3720 }, { "epoch": 0.10875753638633012, "grad_norm": 1.0514435624406095, "learning_rate": 1.812652068126521e-05, "loss": 0.6264, "step": 3725 }, { "epoch": 0.10890351965664735, "grad_norm": 0.9455957418837644, "learning_rate": 1.8150851581508517e-05, "loss": 0.6462, "step": 3730 }, { "epoch": 0.10904950292696457, "grad_norm": 0.9517490351860379, "learning_rate": 1.8175182481751828e-05, "loss": 0.648, "step": 3735 }, { "epoch": 0.1091954861972818, "grad_norm": 0.8444590213718816, "learning_rate": 1.8199513381995135e-05, "loss": 0.6256, "step": 3740 }, { "epoch": 0.10934146946759901, "grad_norm": 1.0226921033154235, "learning_rate": 1.8223844282238446e-05, "loss": 0.6352, "step": 3745 }, { "epoch": 0.10948745273791624, "grad_norm": 0.9356996160556783, "learning_rate": 1.8248175182481753e-05, "loss": 0.682, "step": 3750 }, { "epoch": 0.10963343600823346, "grad_norm": 0.8996757189420435, "learning_rate": 1.827250608272506e-05, "loss": 0.6549, "step": 3755 }, { "epoch": 0.10977941927855067, "grad_norm": 0.8841444923458596, "learning_rate": 1.829683698296837e-05, "loss": 0.6053, "step": 3760 }, { "epoch": 0.1099254025488679, "grad_norm": 0.9197886004663965, "learning_rate": 1.832116788321168e-05, "loss": 0.6496, "step": 3765 }, { "epoch": 0.11007138581918512, "grad_norm": 0.9548166455055739, "learning_rate": 1.834549878345499e-05, "loss": 0.6163, "step": 3770 }, { "epoch": 0.11021736908950235, "grad_norm": 0.8584744999797594, "learning_rate": 1.8369829683698297e-05, "loss": 0.6034, "step": 3775 }, { "epoch": 0.11036335235981956, "grad_norm": 0.8627701727553239, "learning_rate": 1.8394160583941605e-05, "loss": 0.6185, "step": 3780 }, { "epoch": 0.11050933563013679, "grad_norm": 1.0977858194842025, "learning_rate": 1.8418491484184916e-05, "loss": 0.664, "step": 3785 }, { "epoch": 0.110655318900454, "grad_norm": 0.9931453349691322, "learning_rate": 1.8442822384428223e-05, "loss": 0.6221, "step": 3790 }, { "epoch": 0.11080130217077123, "grad_norm": 1.0394486011471797, "learning_rate": 1.8467153284671534e-05, "loss": 0.6547, "step": 3795 }, { "epoch": 0.11094728544108845, "grad_norm": 0.8837675475708588, "learning_rate": 1.849148418491484e-05, "loss": 0.6325, "step": 3800 }, { "epoch": 0.11109326871140568, "grad_norm": 0.9720639799918811, "learning_rate": 1.8515815085158152e-05, "loss": 0.6028, "step": 3805 }, { "epoch": 0.1112392519817229, "grad_norm": 0.9568582299260352, "learning_rate": 1.854014598540146e-05, "loss": 0.6452, "step": 3810 }, { "epoch": 0.11138523525204011, "grad_norm": 0.983009819127101, "learning_rate": 1.8564476885644767e-05, "loss": 0.6159, "step": 3815 }, { "epoch": 0.11153121852235734, "grad_norm": 0.9835895522552901, "learning_rate": 1.8588807785888078e-05, "loss": 0.6255, "step": 3820 }, { "epoch": 0.11167720179267455, "grad_norm": 0.9526136447260727, "learning_rate": 1.861313868613139e-05, "loss": 0.6093, "step": 3825 }, { "epoch": 0.11182318506299178, "grad_norm": 0.9635270962314467, "learning_rate": 1.8637469586374696e-05, "loss": 0.5982, "step": 3830 }, { "epoch": 0.111969168333309, "grad_norm": 0.8621362217531646, "learning_rate": 1.8661800486618007e-05, "loss": 0.6017, "step": 3835 }, { "epoch": 0.11211515160362623, "grad_norm": 0.8624411951781509, "learning_rate": 1.8686131386861315e-05, "loss": 0.6011, "step": 3840 }, { "epoch": 0.11226113487394344, "grad_norm": 0.9348691734103473, "learning_rate": 1.8710462287104625e-05, "loss": 0.6299, "step": 3845 }, { "epoch": 0.11240711814426067, "grad_norm": 0.9643866162462174, "learning_rate": 1.8734793187347933e-05, "loss": 0.5997, "step": 3850 }, { "epoch": 0.11255310141457789, "grad_norm": 0.8984799443064555, "learning_rate": 1.8759124087591244e-05, "loss": 0.6232, "step": 3855 }, { "epoch": 0.11269908468489512, "grad_norm": 1.1805390604141521, "learning_rate": 1.878345498783455e-05, "loss": 0.6447, "step": 3860 }, { "epoch": 0.11284506795521233, "grad_norm": 0.8834184827257258, "learning_rate": 1.880778588807786e-05, "loss": 0.5999, "step": 3865 }, { "epoch": 0.11299105122552955, "grad_norm": 0.9392666077582535, "learning_rate": 1.883211678832117e-05, "loss": 0.6674, "step": 3870 }, { "epoch": 0.11313703449584678, "grad_norm": 0.9159700650864391, "learning_rate": 1.8856447688564477e-05, "loss": 0.6032, "step": 3875 }, { "epoch": 0.113283017766164, "grad_norm": 0.9014178658643752, "learning_rate": 1.8880778588807788e-05, "loss": 0.6138, "step": 3880 }, { "epoch": 0.11342900103648122, "grad_norm": 0.8452782990251396, "learning_rate": 1.8905109489051095e-05, "loss": 0.5959, "step": 3885 }, { "epoch": 0.11357498430679844, "grad_norm": 0.9483235874870263, "learning_rate": 1.8929440389294406e-05, "loss": 0.6542, "step": 3890 }, { "epoch": 0.11372096757711567, "grad_norm": 0.9338023777538912, "learning_rate": 1.8953771289537714e-05, "loss": 0.6205, "step": 3895 }, { "epoch": 0.11386695084743288, "grad_norm": 0.9162245898954774, "learning_rate": 1.897810218978102e-05, "loss": 0.6338, "step": 3900 }, { "epoch": 0.11401293411775011, "grad_norm": 0.8992607920742498, "learning_rate": 1.9002433090024332e-05, "loss": 0.6295, "step": 3905 }, { "epoch": 0.11415891738806733, "grad_norm": 0.9580967477759709, "learning_rate": 1.902676399026764e-05, "loss": 0.6553, "step": 3910 }, { "epoch": 0.11430490065838454, "grad_norm": 0.887953417765955, "learning_rate": 1.905109489051095e-05, "loss": 0.6359, "step": 3915 }, { "epoch": 0.11445088392870177, "grad_norm": 1.1592535957055239, "learning_rate": 1.9075425790754258e-05, "loss": 0.6668, "step": 3920 }, { "epoch": 0.11459686719901899, "grad_norm": 0.7627042068325025, "learning_rate": 1.909975669099757e-05, "loss": 0.643, "step": 3925 }, { "epoch": 0.11474285046933622, "grad_norm": 0.9299925376648479, "learning_rate": 1.9124087591240876e-05, "loss": 0.6582, "step": 3930 }, { "epoch": 0.11488883373965343, "grad_norm": 0.9947786298953688, "learning_rate": 1.9148418491484187e-05, "loss": 0.6572, "step": 3935 }, { "epoch": 0.11503481700997066, "grad_norm": 0.9448786975348246, "learning_rate": 1.9172749391727497e-05, "loss": 0.6205, "step": 3940 }, { "epoch": 0.11518080028028788, "grad_norm": 0.9239139055618858, "learning_rate": 1.9197080291970805e-05, "loss": 0.6187, "step": 3945 }, { "epoch": 0.1153267835506051, "grad_norm": 1.0937756990655694, "learning_rate": 1.9221411192214112e-05, "loss": 0.6313, "step": 3950 }, { "epoch": 0.11547276682092232, "grad_norm": 1.0269255792336949, "learning_rate": 1.9245742092457423e-05, "loss": 0.6324, "step": 3955 }, { "epoch": 0.11561875009123955, "grad_norm": 0.9864854050397118, "learning_rate": 1.927007299270073e-05, "loss": 0.5783, "step": 3960 }, { "epoch": 0.11576473336155677, "grad_norm": 0.8661156565743579, "learning_rate": 1.929440389294404e-05, "loss": 0.6821, "step": 3965 }, { "epoch": 0.11591071663187398, "grad_norm": 0.9424808774163539, "learning_rate": 1.931873479318735e-05, "loss": 0.6754, "step": 3970 }, { "epoch": 0.11605669990219121, "grad_norm": 0.8858437362815744, "learning_rate": 1.934306569343066e-05, "loss": 0.6398, "step": 3975 }, { "epoch": 0.11620268317250843, "grad_norm": 0.9649105029738014, "learning_rate": 1.9367396593673967e-05, "loss": 0.6754, "step": 3980 }, { "epoch": 0.11634866644282565, "grad_norm": 0.9181588701445184, "learning_rate": 1.9391727493917275e-05, "loss": 0.639, "step": 3985 }, { "epoch": 0.11649464971314287, "grad_norm": 0.8975818928666023, "learning_rate": 1.9416058394160586e-05, "loss": 0.6014, "step": 3990 }, { "epoch": 0.1166406329834601, "grad_norm": 0.8939631537968934, "learning_rate": 1.9440389294403893e-05, "loss": 0.589, "step": 3995 }, { "epoch": 0.11678661625377731, "grad_norm": 0.915816479337968, "learning_rate": 1.9464720194647204e-05, "loss": 0.6131, "step": 4000 }, { "epoch": 0.11693259952409454, "grad_norm": 0.9249634653028403, "learning_rate": 1.948905109489051e-05, "loss": 0.6009, "step": 4005 }, { "epoch": 0.11707858279441176, "grad_norm": 0.8691333860356314, "learning_rate": 1.951338199513382e-05, "loss": 0.641, "step": 4010 }, { "epoch": 0.11722456606472899, "grad_norm": 0.874251449088223, "learning_rate": 1.953771289537713e-05, "loss": 0.6244, "step": 4015 }, { "epoch": 0.1173705493350462, "grad_norm": 0.8810532710129906, "learning_rate": 1.9562043795620437e-05, "loss": 0.6217, "step": 4020 }, { "epoch": 0.11751653260536342, "grad_norm": 0.9072008853635054, "learning_rate": 1.9586374695863748e-05, "loss": 0.65, "step": 4025 }, { "epoch": 0.11766251587568065, "grad_norm": 0.9026820794637814, "learning_rate": 1.9610705596107055e-05, "loss": 0.621, "step": 4030 }, { "epoch": 0.11780849914599786, "grad_norm": 0.8995607524478506, "learning_rate": 1.9635036496350366e-05, "loss": 0.6365, "step": 4035 }, { "epoch": 0.11795448241631509, "grad_norm": 0.9203685681464477, "learning_rate": 1.9659367396593677e-05, "loss": 0.666, "step": 4040 }, { "epoch": 0.11810046568663231, "grad_norm": 0.9360779652128011, "learning_rate": 1.9683698296836984e-05, "loss": 0.6245, "step": 4045 }, { "epoch": 0.11824644895694954, "grad_norm": 0.9070939764476013, "learning_rate": 1.9708029197080295e-05, "loss": 0.6284, "step": 4050 }, { "epoch": 0.11839243222726675, "grad_norm": 0.9036992514305449, "learning_rate": 1.9732360097323603e-05, "loss": 0.6182, "step": 4055 }, { "epoch": 0.11853841549758398, "grad_norm": 0.879450743448652, "learning_rate": 1.975669099756691e-05, "loss": 0.6565, "step": 4060 }, { "epoch": 0.1186843987679012, "grad_norm": 0.9563706637329302, "learning_rate": 1.978102189781022e-05, "loss": 0.6302, "step": 4065 }, { "epoch": 0.11883038203821843, "grad_norm": 0.8734868308320373, "learning_rate": 1.980535279805353e-05, "loss": 0.6266, "step": 4070 }, { "epoch": 0.11897636530853564, "grad_norm": 0.9387656782204379, "learning_rate": 1.982968369829684e-05, "loss": 0.6437, "step": 4075 }, { "epoch": 0.11912234857885286, "grad_norm": 0.9525727483226683, "learning_rate": 1.9854014598540147e-05, "loss": 0.6288, "step": 4080 }, { "epoch": 0.11926833184917009, "grad_norm": 0.9294479846408924, "learning_rate": 1.9878345498783458e-05, "loss": 0.6534, "step": 4085 }, { "epoch": 0.1194143151194873, "grad_norm": 0.9458948885144624, "learning_rate": 1.9902676399026765e-05, "loss": 0.6063, "step": 4090 }, { "epoch": 0.11956029838980453, "grad_norm": 0.8692860723177874, "learning_rate": 1.9927007299270073e-05, "loss": 0.6305, "step": 4095 }, { "epoch": 0.11970628166012175, "grad_norm": 0.9037505764927531, "learning_rate": 1.9951338199513383e-05, "loss": 0.6183, "step": 4100 }, { "epoch": 0.11985226493043898, "grad_norm": 0.9273134626167606, "learning_rate": 1.997566909975669e-05, "loss": 0.6719, "step": 4105 }, { "epoch": 0.11999824820075619, "grad_norm": 0.9709717197880285, "learning_rate": 2e-05, "loss": 0.6341, "step": 4110 }, { "epoch": 0.12014423147107342, "grad_norm": 0.9235831128535517, "learning_rate": 2.002433090024331e-05, "loss": 0.6488, "step": 4115 }, { "epoch": 0.12029021474139064, "grad_norm": 0.8716151749132673, "learning_rate": 2.0048661800486617e-05, "loss": 0.6356, "step": 4120 }, { "epoch": 0.12043619801170787, "grad_norm": 0.9528081381882848, "learning_rate": 2.0072992700729927e-05, "loss": 0.6306, "step": 4125 }, { "epoch": 0.12058218128202508, "grad_norm": 0.8539663178921629, "learning_rate": 2.0097323600973235e-05, "loss": 0.6149, "step": 4130 }, { "epoch": 0.1207281645523423, "grad_norm": 0.8788288683599234, "learning_rate": 2.0121654501216546e-05, "loss": 0.6285, "step": 4135 }, { "epoch": 0.12087414782265953, "grad_norm": 0.9299415055798135, "learning_rate": 2.0145985401459853e-05, "loss": 0.6322, "step": 4140 }, { "epoch": 0.12102013109297674, "grad_norm": 0.9269568153661615, "learning_rate": 2.0170316301703164e-05, "loss": 0.6117, "step": 4145 }, { "epoch": 0.12116611436329397, "grad_norm": 0.913762746233763, "learning_rate": 2.0194647201946475e-05, "loss": 0.6287, "step": 4150 }, { "epoch": 0.12131209763361119, "grad_norm": 0.9386912043150886, "learning_rate": 2.0218978102189782e-05, "loss": 0.595, "step": 4155 }, { "epoch": 0.12145808090392841, "grad_norm": 0.9177260585075151, "learning_rate": 2.0243309002433093e-05, "loss": 0.7076, "step": 4160 }, { "epoch": 0.12160406417424563, "grad_norm": 0.8761768277536077, "learning_rate": 2.02676399026764e-05, "loss": 0.6553, "step": 4165 }, { "epoch": 0.12175004744456286, "grad_norm": 0.9724688067643973, "learning_rate": 2.029197080291971e-05, "loss": 0.6423, "step": 4170 }, { "epoch": 0.12189603071488007, "grad_norm": 0.9155067980317908, "learning_rate": 2.031630170316302e-05, "loss": 0.6436, "step": 4175 }, { "epoch": 0.12204201398519729, "grad_norm": 0.8738842286412986, "learning_rate": 2.0340632603406326e-05, "loss": 0.6211, "step": 4180 }, { "epoch": 0.12218799725551452, "grad_norm": 0.8445563869246018, "learning_rate": 2.0364963503649637e-05, "loss": 0.6085, "step": 4185 }, { "epoch": 0.12233398052583173, "grad_norm": 0.8933525060415234, "learning_rate": 2.0389294403892945e-05, "loss": 0.6233, "step": 4190 }, { "epoch": 0.12247996379614896, "grad_norm": 0.9798091248247099, "learning_rate": 2.0413625304136255e-05, "loss": 0.6622, "step": 4195 }, { "epoch": 0.12262594706646618, "grad_norm": 0.783649090845468, "learning_rate": 2.0437956204379563e-05, "loss": 0.6198, "step": 4200 }, { "epoch": 0.12277193033678341, "grad_norm": 0.9448996327352105, "learning_rate": 2.046228710462287e-05, "loss": 0.6148, "step": 4205 }, { "epoch": 0.12291791360710062, "grad_norm": 0.8805850770622847, "learning_rate": 2.048661800486618e-05, "loss": 0.651, "step": 4210 }, { "epoch": 0.12306389687741785, "grad_norm": 0.9575041815941484, "learning_rate": 2.051094890510949e-05, "loss": 0.6275, "step": 4215 }, { "epoch": 0.12320988014773507, "grad_norm": 0.8973479935331582, "learning_rate": 2.05352798053528e-05, "loss": 0.6493, "step": 4220 }, { "epoch": 0.1233558634180523, "grad_norm": 0.9331161156412054, "learning_rate": 2.0559610705596107e-05, "loss": 0.6129, "step": 4225 }, { "epoch": 0.12350184668836951, "grad_norm": 0.9290701647847177, "learning_rate": 2.0583941605839414e-05, "loss": 0.6114, "step": 4230 }, { "epoch": 0.12364782995868673, "grad_norm": 0.9016197928887314, "learning_rate": 2.0608272506082725e-05, "loss": 0.6116, "step": 4235 }, { "epoch": 0.12379381322900396, "grad_norm": 0.8964194588377595, "learning_rate": 2.0632603406326033e-05, "loss": 0.6323, "step": 4240 }, { "epoch": 0.12393979649932117, "grad_norm": 0.8687121203143947, "learning_rate": 2.0656934306569343e-05, "loss": 0.6408, "step": 4245 }, { "epoch": 0.1240857797696384, "grad_norm": 0.8253910812585677, "learning_rate": 2.0681265206812654e-05, "loss": 0.5924, "step": 4250 }, { "epoch": 0.12423176303995562, "grad_norm": 0.9089625839088685, "learning_rate": 2.0705596107055962e-05, "loss": 0.6382, "step": 4255 }, { "epoch": 0.12437774631027285, "grad_norm": 0.9802531025149145, "learning_rate": 2.0729927007299273e-05, "loss": 0.6555, "step": 4260 }, { "epoch": 0.12452372958059006, "grad_norm": 0.893571366479731, "learning_rate": 2.075425790754258e-05, "loss": 0.6286, "step": 4265 }, { "epoch": 0.12466971285090729, "grad_norm": 0.862806065063863, "learning_rate": 2.077858880778589e-05, "loss": 0.5578, "step": 4270 }, { "epoch": 0.1248156961212245, "grad_norm": 0.9236326692548457, "learning_rate": 2.08029197080292e-05, "loss": 0.6168, "step": 4275 }, { "epoch": 0.12496167939154174, "grad_norm": 0.9631635068447993, "learning_rate": 2.082725060827251e-05, "loss": 0.6705, "step": 4280 }, { "epoch": 0.12510766266185894, "grad_norm": 0.8728155516045637, "learning_rate": 2.0851581508515817e-05, "loss": 0.6383, "step": 4285 }, { "epoch": 0.12525364593217617, "grad_norm": 0.9382268133240195, "learning_rate": 2.0875912408759124e-05, "loss": 0.6225, "step": 4290 }, { "epoch": 0.1253996292024934, "grad_norm": 0.8197654013939785, "learning_rate": 2.0900243309002435e-05, "loss": 0.6218, "step": 4295 }, { "epoch": 0.12554561247281062, "grad_norm": 0.9739106550582881, "learning_rate": 2.0924574209245742e-05, "loss": 0.6456, "step": 4300 }, { "epoch": 0.12569159574312783, "grad_norm": 0.9294005824636061, "learning_rate": 2.0948905109489053e-05, "loss": 0.6228, "step": 4305 }, { "epoch": 0.12583757901344506, "grad_norm": 0.8805194421934548, "learning_rate": 2.097323600973236e-05, "loss": 0.6595, "step": 4310 }, { "epoch": 0.12598356228376228, "grad_norm": 0.9198509461014133, "learning_rate": 2.0997566909975668e-05, "loss": 0.6094, "step": 4315 }, { "epoch": 0.12612954555407951, "grad_norm": 1.0052208096403141, "learning_rate": 2.102189781021898e-05, "loss": 0.6272, "step": 4320 }, { "epoch": 0.12627552882439672, "grad_norm": 0.8402726733422226, "learning_rate": 2.1046228710462286e-05, "loss": 0.5823, "step": 4325 }, { "epoch": 0.12642151209471394, "grad_norm": 0.8863584096795871, "learning_rate": 2.1070559610705597e-05, "loss": 0.6439, "step": 4330 }, { "epoch": 0.12656749536503117, "grad_norm": 0.8693288483166202, "learning_rate": 2.1094890510948905e-05, "loss": 0.6039, "step": 4335 }, { "epoch": 0.12671347863534838, "grad_norm": 0.8422954765466536, "learning_rate": 2.1119221411192212e-05, "loss": 0.5989, "step": 4340 }, { "epoch": 0.1268594619056656, "grad_norm": 0.8315273834350954, "learning_rate": 2.1143552311435523e-05, "loss": 0.6238, "step": 4345 }, { "epoch": 0.12700544517598283, "grad_norm": 0.8497998011129457, "learning_rate": 2.1167883211678834e-05, "loss": 0.6208, "step": 4350 }, { "epoch": 0.12715142844630006, "grad_norm": 1.0029667813821472, "learning_rate": 2.119221411192214e-05, "loss": 0.6531, "step": 4355 }, { "epoch": 0.12729741171661726, "grad_norm": 1.0602628304069992, "learning_rate": 2.1216545012165452e-05, "loss": 0.6369, "step": 4360 }, { "epoch": 0.1274433949869345, "grad_norm": 0.7912541444833613, "learning_rate": 2.1240875912408763e-05, "loss": 0.5943, "step": 4365 }, { "epoch": 0.12758937825725172, "grad_norm": 0.8232496054367843, "learning_rate": 2.126520681265207e-05, "loss": 0.6522, "step": 4370 }, { "epoch": 0.12773536152756895, "grad_norm": 0.9066537440977953, "learning_rate": 2.1289537712895378e-05, "loss": 0.6479, "step": 4375 }, { "epoch": 0.12788134479788615, "grad_norm": 0.8443908311529335, "learning_rate": 2.131386861313869e-05, "loss": 0.5739, "step": 4380 }, { "epoch": 0.12802732806820338, "grad_norm": 0.9243240971970227, "learning_rate": 2.1338199513381996e-05, "loss": 0.5904, "step": 4385 }, { "epoch": 0.1281733113385206, "grad_norm": 0.8555020264254551, "learning_rate": 2.1362530413625307e-05, "loss": 0.597, "step": 4390 }, { "epoch": 0.12831929460883781, "grad_norm": 0.8414411022434235, "learning_rate": 2.1386861313868614e-05, "loss": 0.6132, "step": 4395 }, { "epoch": 0.12846527787915504, "grad_norm": 0.8604281469924865, "learning_rate": 2.1411192214111922e-05, "loss": 0.6267, "step": 4400 }, { "epoch": 0.12861126114947227, "grad_norm": 0.9229619177963749, "learning_rate": 2.1435523114355233e-05, "loss": 0.645, "step": 4405 }, { "epoch": 0.1287572444197895, "grad_norm": 0.8143794315528158, "learning_rate": 2.145985401459854e-05, "loss": 0.6453, "step": 4410 }, { "epoch": 0.1289032276901067, "grad_norm": 0.7831496960142181, "learning_rate": 2.148418491484185e-05, "loss": 0.5897, "step": 4415 }, { "epoch": 0.12904921096042393, "grad_norm": 0.8287829358394562, "learning_rate": 2.150851581508516e-05, "loss": 0.6251, "step": 4420 }, { "epoch": 0.12919519423074116, "grad_norm": 0.9297350861781918, "learning_rate": 2.1532846715328466e-05, "loss": 0.6248, "step": 4425 }, { "epoch": 0.1293411775010584, "grad_norm": 0.9448353434114958, "learning_rate": 2.1557177615571777e-05, "loss": 0.6275, "step": 4430 }, { "epoch": 0.1294871607713756, "grad_norm": 0.8630845638119087, "learning_rate": 2.1581508515815084e-05, "loss": 0.6285, "step": 4435 }, { "epoch": 0.12963314404169282, "grad_norm": 0.8242496496624677, "learning_rate": 2.1605839416058395e-05, "loss": 0.6126, "step": 4440 }, { "epoch": 0.12977912731201005, "grad_norm": 0.8677067380636048, "learning_rate": 2.1630170316301702e-05, "loss": 0.6418, "step": 4445 }, { "epoch": 0.12992511058232725, "grad_norm": 0.7781752615605212, "learning_rate": 2.1654501216545013e-05, "loss": 0.6023, "step": 4450 }, { "epoch": 0.13007109385264448, "grad_norm": 0.859934074032087, "learning_rate": 2.167883211678832e-05, "loss": 0.6102, "step": 4455 }, { "epoch": 0.1302170771229617, "grad_norm": 0.8554247790129033, "learning_rate": 2.170316301703163e-05, "loss": 0.6093, "step": 4460 }, { "epoch": 0.13036306039327894, "grad_norm": 0.8681526167427474, "learning_rate": 2.1727493917274942e-05, "loss": 0.6184, "step": 4465 }, { "epoch": 0.13050904366359614, "grad_norm": 0.8301735133045118, "learning_rate": 2.175182481751825e-05, "loss": 0.6488, "step": 4470 }, { "epoch": 0.13065502693391337, "grad_norm": 0.8016416450859741, "learning_rate": 2.177615571776156e-05, "loss": 0.6084, "step": 4475 }, { "epoch": 0.1308010102042306, "grad_norm": 0.9550164294303798, "learning_rate": 2.1800486618004868e-05, "loss": 0.6251, "step": 4480 }, { "epoch": 0.13094699347454783, "grad_norm": 0.9509368946744294, "learning_rate": 2.1824817518248176e-05, "loss": 0.6508, "step": 4485 }, { "epoch": 0.13109297674486503, "grad_norm": 0.9008829560343478, "learning_rate": 2.1849148418491486e-05, "loss": 0.6361, "step": 4490 }, { "epoch": 0.13123896001518226, "grad_norm": 0.8135810608492989, "learning_rate": 2.1873479318734794e-05, "loss": 0.6181, "step": 4495 }, { "epoch": 0.1313849432854995, "grad_norm": 0.877920679498534, "learning_rate": 2.1897810218978105e-05, "loss": 0.6574, "step": 4500 }, { "epoch": 0.1315309265558167, "grad_norm": 0.8745205379339864, "learning_rate": 2.1922141119221412e-05, "loss": 0.6122, "step": 4505 }, { "epoch": 0.13167690982613392, "grad_norm": 0.8450837202056459, "learning_rate": 2.194647201946472e-05, "loss": 0.5944, "step": 4510 }, { "epoch": 0.13182289309645115, "grad_norm": 0.8921021078377239, "learning_rate": 2.197080291970803e-05, "loss": 0.6476, "step": 4515 }, { "epoch": 0.13196887636676838, "grad_norm": 0.8221698202168477, "learning_rate": 2.1995133819951338e-05, "loss": 0.5948, "step": 4520 }, { "epoch": 0.13211485963708558, "grad_norm": 0.8792840365568917, "learning_rate": 2.201946472019465e-05, "loss": 0.6267, "step": 4525 }, { "epoch": 0.1322608429074028, "grad_norm": 1.0165088106788145, "learning_rate": 2.2043795620437956e-05, "loss": 0.6509, "step": 4530 }, { "epoch": 0.13240682617772004, "grad_norm": 0.9765222969791317, "learning_rate": 2.2068126520681267e-05, "loss": 0.6451, "step": 4535 }, { "epoch": 0.13255280944803727, "grad_norm": 0.9074444183268844, "learning_rate": 2.2092457420924575e-05, "loss": 0.6376, "step": 4540 }, { "epoch": 0.13269879271835447, "grad_norm": 0.8170251260041087, "learning_rate": 2.2116788321167882e-05, "loss": 0.6147, "step": 4545 }, { "epoch": 0.1328447759886717, "grad_norm": 0.9017517566727044, "learning_rate": 2.2141119221411193e-05, "loss": 0.6228, "step": 4550 }, { "epoch": 0.13299075925898893, "grad_norm": 0.8639679046518136, "learning_rate": 2.21654501216545e-05, "loss": 0.5954, "step": 4555 }, { "epoch": 0.13313674252930613, "grad_norm": 0.8367332977820845, "learning_rate": 2.218978102189781e-05, "loss": 0.5821, "step": 4560 }, { "epoch": 0.13328272579962336, "grad_norm": 0.9310811563025889, "learning_rate": 2.2214111922141122e-05, "loss": 0.599, "step": 4565 }, { "epoch": 0.1334287090699406, "grad_norm": 0.9311856814326028, "learning_rate": 2.223844282238443e-05, "loss": 0.6668, "step": 4570 }, { "epoch": 0.13357469234025782, "grad_norm": 0.7998774101274249, "learning_rate": 2.226277372262774e-05, "loss": 0.6261, "step": 4575 }, { "epoch": 0.13372067561057502, "grad_norm": 0.8957356715874791, "learning_rate": 2.2287104622871048e-05, "loss": 0.6219, "step": 4580 }, { "epoch": 0.13386665888089225, "grad_norm": 0.8991615524588343, "learning_rate": 2.231143552311436e-05, "loss": 0.6462, "step": 4585 }, { "epoch": 0.13401264215120948, "grad_norm": 0.9071111668354367, "learning_rate": 2.2335766423357666e-05, "loss": 0.5985, "step": 4590 }, { "epoch": 0.1341586254215267, "grad_norm": 0.8964596875167224, "learning_rate": 2.2360097323600973e-05, "loss": 0.6292, "step": 4595 }, { "epoch": 0.1343046086918439, "grad_norm": 0.8297438812745427, "learning_rate": 2.2384428223844284e-05, "loss": 0.6226, "step": 4600 }, { "epoch": 0.13445059196216114, "grad_norm": 0.8989659662079835, "learning_rate": 2.2408759124087592e-05, "loss": 0.6377, "step": 4605 }, { "epoch": 0.13459657523247837, "grad_norm": 0.8106955880989682, "learning_rate": 2.2433090024330903e-05, "loss": 0.5749, "step": 4610 }, { "epoch": 0.13474255850279557, "grad_norm": 0.911708163573274, "learning_rate": 2.245742092457421e-05, "loss": 0.6193, "step": 4615 }, { "epoch": 0.1348885417731128, "grad_norm": 1.04716258164848, "learning_rate": 2.2481751824817517e-05, "loss": 0.6323, "step": 4620 }, { "epoch": 0.13503452504343003, "grad_norm": 0.8215241809627195, "learning_rate": 2.2506082725060828e-05, "loss": 0.6269, "step": 4625 }, { "epoch": 0.13518050831374726, "grad_norm": 0.8894532033721778, "learning_rate": 2.2530413625304136e-05, "loss": 0.5869, "step": 4630 }, { "epoch": 0.13532649158406446, "grad_norm": 0.8528680997650449, "learning_rate": 2.2554744525547447e-05, "loss": 0.6217, "step": 4635 }, { "epoch": 0.13547247485438169, "grad_norm": 0.8823579479981204, "learning_rate": 2.2579075425790754e-05, "loss": 0.6442, "step": 4640 }, { "epoch": 0.13561845812469892, "grad_norm": 0.8749508393390975, "learning_rate": 2.2603406326034065e-05, "loss": 0.6637, "step": 4645 }, { "epoch": 0.13576444139501614, "grad_norm": 0.9491816072663314, "learning_rate": 2.2627737226277372e-05, "loss": 0.673, "step": 4650 }, { "epoch": 0.13591042466533335, "grad_norm": 0.9180684701547891, "learning_rate": 2.265206812652068e-05, "loss": 0.6268, "step": 4655 }, { "epoch": 0.13605640793565058, "grad_norm": 0.9363667042178946, "learning_rate": 2.267639902676399e-05, "loss": 0.6368, "step": 4660 }, { "epoch": 0.1362023912059678, "grad_norm": 0.9793365343885624, "learning_rate": 2.27007299270073e-05, "loss": 0.6604, "step": 4665 }, { "epoch": 0.136348374476285, "grad_norm": 0.9520205929149286, "learning_rate": 2.272506082725061e-05, "loss": 0.6037, "step": 4670 }, { "epoch": 0.13649435774660224, "grad_norm": 0.9941069458631222, "learning_rate": 2.274939172749392e-05, "loss": 0.6365, "step": 4675 }, { "epoch": 0.13664034101691946, "grad_norm": 0.818056762056786, "learning_rate": 2.2773722627737227e-05, "loss": 0.5924, "step": 4680 }, { "epoch": 0.1367863242872367, "grad_norm": 0.8330045193614967, "learning_rate": 2.2798053527980538e-05, "loss": 0.6298, "step": 4685 }, { "epoch": 0.1369323075575539, "grad_norm": 0.9202496432625897, "learning_rate": 2.2822384428223845e-05, "loss": 0.6216, "step": 4690 }, { "epoch": 0.13707829082787112, "grad_norm": 0.9650315828826932, "learning_rate": 2.2846715328467156e-05, "loss": 0.6266, "step": 4695 }, { "epoch": 0.13722427409818835, "grad_norm": 0.9799213807021516, "learning_rate": 2.2871046228710464e-05, "loss": 0.6637, "step": 4700 }, { "epoch": 0.13737025736850558, "grad_norm": 0.8741427596089288, "learning_rate": 2.289537712895377e-05, "loss": 0.6316, "step": 4705 }, { "epoch": 0.13751624063882278, "grad_norm": 0.9230562000241292, "learning_rate": 2.2919708029197082e-05, "loss": 0.6562, "step": 4710 }, { "epoch": 0.13766222390914, "grad_norm": 0.8621261578424665, "learning_rate": 2.294403892944039e-05, "loss": 0.6425, "step": 4715 }, { "epoch": 0.13780820717945724, "grad_norm": 0.9055111213294714, "learning_rate": 2.29683698296837e-05, "loss": 0.6496, "step": 4720 }, { "epoch": 0.13795419044977444, "grad_norm": 0.9214521265136072, "learning_rate": 2.2992700729927008e-05, "loss": 0.6468, "step": 4725 }, { "epoch": 0.13810017372009167, "grad_norm": 0.8026035372941225, "learning_rate": 2.301703163017032e-05, "loss": 0.58, "step": 4730 }, { "epoch": 0.1382461569904089, "grad_norm": 0.8729553474040727, "learning_rate": 2.3041362530413626e-05, "loss": 0.5973, "step": 4735 }, { "epoch": 0.13839214026072613, "grad_norm": 0.8962705397843973, "learning_rate": 2.3065693430656934e-05, "loss": 0.6252, "step": 4740 }, { "epoch": 0.13853812353104333, "grad_norm": 0.8430470110215986, "learning_rate": 2.3090024330900244e-05, "loss": 0.5978, "step": 4745 }, { "epoch": 0.13868410680136056, "grad_norm": 0.9257907138424152, "learning_rate": 2.3114355231143552e-05, "loss": 0.6467, "step": 4750 }, { "epoch": 0.1388300900716778, "grad_norm": 0.8413496691159379, "learning_rate": 2.3138686131386863e-05, "loss": 0.6084, "step": 4755 }, { "epoch": 0.13897607334199502, "grad_norm": 0.8119558795124817, "learning_rate": 2.316301703163017e-05, "loss": 0.6274, "step": 4760 }, { "epoch": 0.13912205661231222, "grad_norm": 0.8481610902020462, "learning_rate": 2.318734793187348e-05, "loss": 0.5807, "step": 4765 }, { "epoch": 0.13926803988262945, "grad_norm": 0.8545356369205491, "learning_rate": 2.321167883211679e-05, "loss": 0.6323, "step": 4770 }, { "epoch": 0.13941402315294668, "grad_norm": 0.8848324167077204, "learning_rate": 2.32360097323601e-05, "loss": 0.6366, "step": 4775 }, { "epoch": 0.13956000642326388, "grad_norm": 0.8564722700822047, "learning_rate": 2.326034063260341e-05, "loss": 0.629, "step": 4780 }, { "epoch": 0.1397059896935811, "grad_norm": 0.8553953290005251, "learning_rate": 2.3284671532846718e-05, "loss": 0.6275, "step": 4785 }, { "epoch": 0.13985197296389834, "grad_norm": 0.8316950178986381, "learning_rate": 2.3309002433090025e-05, "loss": 0.6108, "step": 4790 }, { "epoch": 0.13999795623421557, "grad_norm": 0.9001831455877115, "learning_rate": 2.3333333333333336e-05, "loss": 0.6379, "step": 4795 }, { "epoch": 0.14014393950453277, "grad_norm": 0.8290061508248043, "learning_rate": 2.3357664233576643e-05, "loss": 0.6373, "step": 4800 }, { "epoch": 0.14028992277485, "grad_norm": 0.7958414165247939, "learning_rate": 2.3381995133819954e-05, "loss": 0.6084, "step": 4805 }, { "epoch": 0.14043590604516723, "grad_norm": 0.9340933022410733, "learning_rate": 2.340632603406326e-05, "loss": 0.6367, "step": 4810 }, { "epoch": 0.14058188931548443, "grad_norm": 0.9042176344234701, "learning_rate": 2.3430656934306572e-05, "loss": 0.6144, "step": 4815 }, { "epoch": 0.14072787258580166, "grad_norm": 1.0108727308214873, "learning_rate": 2.345498783454988e-05, "loss": 0.6643, "step": 4820 }, { "epoch": 0.1408738558561189, "grad_norm": 0.8748545879839138, "learning_rate": 2.3479318734793187e-05, "loss": 0.6156, "step": 4825 }, { "epoch": 0.14101983912643612, "grad_norm": 0.818935878167211, "learning_rate": 2.3503649635036498e-05, "loss": 0.5962, "step": 4830 }, { "epoch": 0.14116582239675332, "grad_norm": 0.8871102713953423, "learning_rate": 2.3527980535279806e-05, "loss": 0.6161, "step": 4835 }, { "epoch": 0.14131180566707055, "grad_norm": 0.9319524396234155, "learning_rate": 2.3552311435523116e-05, "loss": 0.6367, "step": 4840 }, { "epoch": 0.14145778893738778, "grad_norm": 0.9561050201497617, "learning_rate": 2.3576642335766424e-05, "loss": 0.6245, "step": 4845 }, { "epoch": 0.141603772207705, "grad_norm": 0.922545873133313, "learning_rate": 2.360097323600973e-05, "loss": 0.6824, "step": 4850 }, { "epoch": 0.1417497554780222, "grad_norm": 0.8417742885218893, "learning_rate": 2.3625304136253042e-05, "loss": 0.6325, "step": 4855 }, { "epoch": 0.14189573874833944, "grad_norm": 0.8993483030533479, "learning_rate": 2.364963503649635e-05, "loss": 0.6226, "step": 4860 }, { "epoch": 0.14204172201865667, "grad_norm": 0.8437741261536611, "learning_rate": 2.367396593673966e-05, "loss": 0.6271, "step": 4865 }, { "epoch": 0.14218770528897387, "grad_norm": 0.9387797077454345, "learning_rate": 2.3698296836982968e-05, "loss": 0.6493, "step": 4870 }, { "epoch": 0.1423336885592911, "grad_norm": 0.7953732867251657, "learning_rate": 2.372262773722628e-05, "loss": 0.5924, "step": 4875 }, { "epoch": 0.14247967182960833, "grad_norm": 0.8771590183839169, "learning_rate": 2.374695863746959e-05, "loss": 0.6499, "step": 4880 }, { "epoch": 0.14262565509992556, "grad_norm": 0.875519525007587, "learning_rate": 2.3771289537712897e-05, "loss": 0.6392, "step": 4885 }, { "epoch": 0.14277163837024276, "grad_norm": 0.8244505768370063, "learning_rate": 2.3795620437956208e-05, "loss": 0.634, "step": 4890 }, { "epoch": 0.14291762164056, "grad_norm": 0.8154073749470514, "learning_rate": 2.3819951338199515e-05, "loss": 0.6472, "step": 4895 }, { "epoch": 0.14306360491087722, "grad_norm": 0.9159400639258487, "learning_rate": 2.3844282238442823e-05, "loss": 0.6068, "step": 4900 }, { "epoch": 0.14320958818119445, "grad_norm": 0.8140148597326545, "learning_rate": 2.3868613138686134e-05, "loss": 0.6139, "step": 4905 }, { "epoch": 0.14335557145151165, "grad_norm": 0.9196652488266713, "learning_rate": 2.389294403892944e-05, "loss": 0.5786, "step": 4910 }, { "epoch": 0.14350155472182888, "grad_norm": 0.8521079033924304, "learning_rate": 2.3917274939172752e-05, "loss": 0.628, "step": 4915 }, { "epoch": 0.1436475379921461, "grad_norm": 0.7767589678303731, "learning_rate": 2.394160583941606e-05, "loss": 0.6368, "step": 4920 }, { "epoch": 0.1437935212624633, "grad_norm": 0.8349378500724967, "learning_rate": 2.396593673965937e-05, "loss": 0.6303, "step": 4925 }, { "epoch": 0.14393950453278054, "grad_norm": 0.8623370086708687, "learning_rate": 2.3990267639902678e-05, "loss": 0.613, "step": 4930 }, { "epoch": 0.14408548780309777, "grad_norm": 0.8299492079039261, "learning_rate": 2.4014598540145985e-05, "loss": 0.5958, "step": 4935 }, { "epoch": 0.144231471073415, "grad_norm": 0.7943014947944986, "learning_rate": 2.4038929440389296e-05, "loss": 0.6577, "step": 4940 }, { "epoch": 0.1443774543437322, "grad_norm": 0.8230175574766205, "learning_rate": 2.4063260340632603e-05, "loss": 0.6179, "step": 4945 }, { "epoch": 0.14452343761404943, "grad_norm": 0.8821238756251095, "learning_rate": 2.4087591240875914e-05, "loss": 0.6723, "step": 4950 }, { "epoch": 0.14466942088436666, "grad_norm": 0.840813002184079, "learning_rate": 2.411192214111922e-05, "loss": 0.6341, "step": 4955 }, { "epoch": 0.14481540415468389, "grad_norm": 0.9550068306562246, "learning_rate": 2.413625304136253e-05, "loss": 0.6225, "step": 4960 }, { "epoch": 0.1449613874250011, "grad_norm": 0.8040917755267593, "learning_rate": 2.416058394160584e-05, "loss": 0.6265, "step": 4965 }, { "epoch": 0.14510737069531832, "grad_norm": 0.8497586020077165, "learning_rate": 2.4184914841849147e-05, "loss": 0.6574, "step": 4970 }, { "epoch": 0.14525335396563555, "grad_norm": 0.7661290795815082, "learning_rate": 2.4209245742092458e-05, "loss": 0.5719, "step": 4975 }, { "epoch": 0.14539933723595275, "grad_norm": 0.7704484381478995, "learning_rate": 2.423357664233577e-05, "loss": 0.6555, "step": 4980 }, { "epoch": 0.14554532050626998, "grad_norm": 0.9290540493224796, "learning_rate": 2.4257907542579077e-05, "loss": 0.6395, "step": 4985 }, { "epoch": 0.1456913037765872, "grad_norm": 0.8654902393967402, "learning_rate": 2.4282238442822387e-05, "loss": 0.6567, "step": 4990 }, { "epoch": 0.14583728704690443, "grad_norm": 0.9105266184274299, "learning_rate": 2.4306569343065695e-05, "loss": 0.6702, "step": 4995 }, { "epoch": 0.14598327031722164, "grad_norm": 0.8254571066831617, "learning_rate": 2.4330900243309006e-05, "loss": 0.6353, "step": 5000 }, { "epoch": 0.14612925358753887, "grad_norm": 0.8760081429495625, "learning_rate": 2.4355231143552313e-05, "loss": 0.6106, "step": 5005 }, { "epoch": 0.1462752368578561, "grad_norm": 0.8718115043727328, "learning_rate": 2.4379562043795624e-05, "loss": 0.633, "step": 5010 }, { "epoch": 0.14642122012817332, "grad_norm": 0.9242401085418694, "learning_rate": 2.440389294403893e-05, "loss": 0.6664, "step": 5015 }, { "epoch": 0.14656720339849053, "grad_norm": 0.9149183697228536, "learning_rate": 2.442822384428224e-05, "loss": 0.6684, "step": 5020 }, { "epoch": 0.14671318666880775, "grad_norm": 0.832454945482591, "learning_rate": 2.445255474452555e-05, "loss": 0.6202, "step": 5025 }, { "epoch": 0.14685916993912498, "grad_norm": 0.815972870567183, "learning_rate": 2.4476885644768857e-05, "loss": 0.6593, "step": 5030 }, { "epoch": 0.14700515320944219, "grad_norm": 0.8738726437545629, "learning_rate": 2.4501216545012168e-05, "loss": 0.6348, "step": 5035 }, { "epoch": 0.14715113647975941, "grad_norm": 0.8105289817156797, "learning_rate": 2.4525547445255475e-05, "loss": 0.6055, "step": 5040 }, { "epoch": 0.14729711975007664, "grad_norm": 0.9607358191905301, "learning_rate": 2.4549878345498783e-05, "loss": 0.649, "step": 5045 }, { "epoch": 0.14744310302039387, "grad_norm": 0.8254526083905375, "learning_rate": 2.4574209245742094e-05, "loss": 0.619, "step": 5050 }, { "epoch": 0.14758908629071107, "grad_norm": 0.9687896043562412, "learning_rate": 2.45985401459854e-05, "loss": 0.6418, "step": 5055 }, { "epoch": 0.1477350695610283, "grad_norm": 0.8591114679367666, "learning_rate": 2.4622871046228712e-05, "loss": 0.592, "step": 5060 }, { "epoch": 0.14788105283134553, "grad_norm": 0.8580981997230523, "learning_rate": 2.464720194647202e-05, "loss": 0.6224, "step": 5065 }, { "epoch": 0.14802703610166276, "grad_norm": 0.9028784098921173, "learning_rate": 2.4671532846715327e-05, "loss": 0.6313, "step": 5070 }, { "epoch": 0.14817301937197996, "grad_norm": 0.7908612395374891, "learning_rate": 2.4695863746958638e-05, "loss": 0.6071, "step": 5075 }, { "epoch": 0.1483190026422972, "grad_norm": 0.770010586307044, "learning_rate": 2.472019464720195e-05, "loss": 0.5954, "step": 5080 }, { "epoch": 0.14846498591261442, "grad_norm": 0.9345839016165857, "learning_rate": 2.4744525547445256e-05, "loss": 0.6347, "step": 5085 }, { "epoch": 0.14861096918293162, "grad_norm": 0.8921831330271106, "learning_rate": 2.4768856447688567e-05, "loss": 0.6382, "step": 5090 }, { "epoch": 0.14875695245324885, "grad_norm": 0.8433098376002723, "learning_rate": 2.4793187347931878e-05, "loss": 0.5833, "step": 5095 }, { "epoch": 0.14890293572356608, "grad_norm": 0.8438885938138798, "learning_rate": 2.4817518248175185e-05, "loss": 0.6212, "step": 5100 }, { "epoch": 0.1490489189938833, "grad_norm": 0.7868124521127404, "learning_rate": 2.4841849148418493e-05, "loss": 0.6092, "step": 5105 }, { "epoch": 0.1491949022642005, "grad_norm": 0.9362418079257293, "learning_rate": 2.4866180048661803e-05, "loss": 0.6363, "step": 5110 }, { "epoch": 0.14934088553451774, "grad_norm": 0.7708486713280572, "learning_rate": 2.489051094890511e-05, "loss": 0.6225, "step": 5115 }, { "epoch": 0.14948686880483497, "grad_norm": 0.8086012713660988, "learning_rate": 2.4914841849148422e-05, "loss": 0.6044, "step": 5120 }, { "epoch": 0.1496328520751522, "grad_norm": 0.7832708117827137, "learning_rate": 2.493917274939173e-05, "loss": 0.608, "step": 5125 }, { "epoch": 0.1497788353454694, "grad_norm": 0.8234017410466474, "learning_rate": 2.4963503649635037e-05, "loss": 0.6212, "step": 5130 }, { "epoch": 0.14992481861578663, "grad_norm": 0.8268614053650797, "learning_rate": 2.4987834549878347e-05, "loss": 0.6386, "step": 5135 }, { "epoch": 0.15007080188610386, "grad_norm": 0.8031126514487132, "learning_rate": 2.5012165450121655e-05, "loss": 0.5968, "step": 5140 }, { "epoch": 0.15021678515642106, "grad_norm": 0.9628516450959159, "learning_rate": 2.5036496350364962e-05, "loss": 0.5916, "step": 5145 }, { "epoch": 0.1503627684267383, "grad_norm": 0.8466706819130789, "learning_rate": 2.5060827250608277e-05, "loss": 0.621, "step": 5150 }, { "epoch": 0.15050875169705552, "grad_norm": 0.9132277554113488, "learning_rate": 2.5085158150851584e-05, "loss": 0.6505, "step": 5155 }, { "epoch": 0.15065473496737275, "grad_norm": 0.8255443401257966, "learning_rate": 2.510948905109489e-05, "loss": 0.6145, "step": 5160 }, { "epoch": 0.15080071823768995, "grad_norm": 0.859720769709107, "learning_rate": 2.51338199513382e-05, "loss": 0.6625, "step": 5165 }, { "epoch": 0.15094670150800718, "grad_norm": 0.926175160436885, "learning_rate": 2.5158150851581506e-05, "loss": 0.5935, "step": 5170 }, { "epoch": 0.1510926847783244, "grad_norm": 0.8799201064873352, "learning_rate": 2.518248175182482e-05, "loss": 0.6294, "step": 5175 }, { "epoch": 0.15123866804864164, "grad_norm": 0.8381599420644903, "learning_rate": 2.5206812652068128e-05, "loss": 0.6253, "step": 5180 }, { "epoch": 0.15138465131895884, "grad_norm": 0.7627108276764573, "learning_rate": 2.5231143552311436e-05, "loss": 0.6133, "step": 5185 }, { "epoch": 0.15153063458927607, "grad_norm": 0.873899623840022, "learning_rate": 2.5255474452554746e-05, "loss": 0.668, "step": 5190 }, { "epoch": 0.1516766178595933, "grad_norm": 0.8529505582634179, "learning_rate": 2.5279805352798054e-05, "loss": 0.6211, "step": 5195 }, { "epoch": 0.1518226011299105, "grad_norm": 0.8801820003280447, "learning_rate": 2.5304136253041365e-05, "loss": 0.6384, "step": 5200 }, { "epoch": 0.15196858440022773, "grad_norm": 0.8860545862718293, "learning_rate": 2.5328467153284675e-05, "loss": 0.6194, "step": 5205 }, { "epoch": 0.15211456767054496, "grad_norm": 0.8385126175299933, "learning_rate": 2.5352798053527983e-05, "loss": 0.6284, "step": 5210 }, { "epoch": 0.1522605509408622, "grad_norm": 0.7615019757621445, "learning_rate": 2.537712895377129e-05, "loss": 0.6248, "step": 5215 }, { "epoch": 0.1524065342111794, "grad_norm": 0.8655239378621916, "learning_rate": 2.5401459854014598e-05, "loss": 0.6142, "step": 5220 }, { "epoch": 0.15255251748149662, "grad_norm": 0.7618209732798576, "learning_rate": 2.5425790754257912e-05, "loss": 0.6214, "step": 5225 }, { "epoch": 0.15269850075181385, "grad_norm": 0.7888789344002302, "learning_rate": 2.545012165450122e-05, "loss": 0.6124, "step": 5230 }, { "epoch": 0.15284448402213108, "grad_norm": 0.7936494890285448, "learning_rate": 2.5474452554744527e-05, "loss": 0.5855, "step": 5235 }, { "epoch": 0.15299046729244828, "grad_norm": 0.7970929097668037, "learning_rate": 2.5498783454987834e-05, "loss": 0.632, "step": 5240 }, { "epoch": 0.1531364505627655, "grad_norm": 0.9374499425871229, "learning_rate": 2.5523114355231142e-05, "loss": 0.6391, "step": 5245 }, { "epoch": 0.15328243383308274, "grad_norm": 0.7953861210700666, "learning_rate": 2.5547445255474456e-05, "loss": 0.6418, "step": 5250 }, { "epoch": 0.15342841710339994, "grad_norm": 0.9115095554305476, "learning_rate": 2.5571776155717764e-05, "loss": 0.6637, "step": 5255 }, { "epoch": 0.15357440037371717, "grad_norm": 0.9523654447562945, "learning_rate": 2.559610705596107e-05, "loss": 0.5881, "step": 5260 }, { "epoch": 0.1537203836440344, "grad_norm": 0.9268059734369446, "learning_rate": 2.562043795620438e-05, "loss": 0.7114, "step": 5265 }, { "epoch": 0.15386636691435163, "grad_norm": 0.7319925507401965, "learning_rate": 2.5644768856447686e-05, "loss": 0.6116, "step": 5270 }, { "epoch": 0.15401235018466883, "grad_norm": 0.7700959891167185, "learning_rate": 2.5669099756691e-05, "loss": 0.5879, "step": 5275 }, { "epoch": 0.15415833345498606, "grad_norm": 0.8662401800504871, "learning_rate": 2.5693430656934308e-05, "loss": 0.6207, "step": 5280 }, { "epoch": 0.1543043167253033, "grad_norm": 0.8652552374110656, "learning_rate": 2.5717761557177615e-05, "loss": 0.6479, "step": 5285 }, { "epoch": 0.1544502999956205, "grad_norm": 0.8011727368194005, "learning_rate": 2.5742092457420926e-05, "loss": 0.6242, "step": 5290 }, { "epoch": 0.15459628326593772, "grad_norm": 0.800238762710085, "learning_rate": 2.5766423357664233e-05, "loss": 0.6211, "step": 5295 }, { "epoch": 0.15474226653625495, "grad_norm": 0.8668750779061192, "learning_rate": 2.5790754257907544e-05, "loss": 0.6026, "step": 5300 }, { "epoch": 0.15488824980657218, "grad_norm": 0.8086344310145382, "learning_rate": 2.5815085158150855e-05, "loss": 0.6315, "step": 5305 }, { "epoch": 0.15503423307688938, "grad_norm": 0.824453854884454, "learning_rate": 2.5839416058394162e-05, "loss": 0.6174, "step": 5310 }, { "epoch": 0.1551802163472066, "grad_norm": 0.8147993275331734, "learning_rate": 2.586374695863747e-05, "loss": 0.6152, "step": 5315 }, { "epoch": 0.15532619961752384, "grad_norm": 0.7309043058697221, "learning_rate": 2.5888077858880777e-05, "loss": 0.6175, "step": 5320 }, { "epoch": 0.15547218288784106, "grad_norm": 0.7717210733786775, "learning_rate": 2.591240875912409e-05, "loss": 0.6004, "step": 5325 }, { "epoch": 0.15561816615815827, "grad_norm": 0.8107011291703888, "learning_rate": 2.59367396593674e-05, "loss": 0.6832, "step": 5330 }, { "epoch": 0.1557641494284755, "grad_norm": 0.8508904313376053, "learning_rate": 2.5961070559610706e-05, "loss": 0.6707, "step": 5335 }, { "epoch": 0.15591013269879272, "grad_norm": 0.7615007324171521, "learning_rate": 2.5985401459854014e-05, "loss": 0.5978, "step": 5340 }, { "epoch": 0.15605611596910993, "grad_norm": 0.8170617757191142, "learning_rate": 2.6009732360097328e-05, "loss": 0.645, "step": 5345 }, { "epoch": 0.15620209923942716, "grad_norm": 0.791940099484617, "learning_rate": 2.6034063260340636e-05, "loss": 0.616, "step": 5350 }, { "epoch": 0.15634808250974438, "grad_norm": 0.8025847459416066, "learning_rate": 2.6058394160583943e-05, "loss": 0.6054, "step": 5355 }, { "epoch": 0.15649406578006161, "grad_norm": 0.8657275970942363, "learning_rate": 2.608272506082725e-05, "loss": 0.6279, "step": 5360 }, { "epoch": 0.15664004905037882, "grad_norm": 0.7791384471021843, "learning_rate": 2.6107055961070558e-05, "loss": 0.6069, "step": 5365 }, { "epoch": 0.15678603232069604, "grad_norm": 0.9200732939920427, "learning_rate": 2.6131386861313872e-05, "loss": 0.6258, "step": 5370 }, { "epoch": 0.15693201559101327, "grad_norm": 0.8487035517908222, "learning_rate": 2.615571776155718e-05, "loss": 0.6042, "step": 5375 }, { "epoch": 0.1570779988613305, "grad_norm": 0.8458318338473945, "learning_rate": 2.6180048661800487e-05, "loss": 0.6051, "step": 5380 }, { "epoch": 0.1572239821316477, "grad_norm": 0.7694197768827594, "learning_rate": 2.6204379562043795e-05, "loss": 0.6044, "step": 5385 }, { "epoch": 0.15736996540196493, "grad_norm": 0.8372302807034281, "learning_rate": 2.6228710462287105e-05, "loss": 0.6274, "step": 5390 }, { "epoch": 0.15751594867228216, "grad_norm": 0.6954295984637767, "learning_rate": 2.6253041362530416e-05, "loss": 0.5971, "step": 5395 }, { "epoch": 0.15766193194259936, "grad_norm": 0.8222967423960912, "learning_rate": 2.6277372262773724e-05, "loss": 0.6409, "step": 5400 }, { "epoch": 0.1578079152129166, "grad_norm": 0.7750148648438596, "learning_rate": 2.6301703163017035e-05, "loss": 0.6044, "step": 5405 }, { "epoch": 0.15795389848323382, "grad_norm": 0.7738751135368035, "learning_rate": 2.6326034063260342e-05, "loss": 0.6489, "step": 5410 }, { "epoch": 0.15809988175355105, "grad_norm": 0.8610020176979226, "learning_rate": 2.635036496350365e-05, "loss": 0.6138, "step": 5415 }, { "epoch": 0.15824586502386825, "grad_norm": 0.7818628215977476, "learning_rate": 2.6374695863746964e-05, "loss": 0.621, "step": 5420 }, { "epoch": 0.15839184829418548, "grad_norm": 0.8284383802159098, "learning_rate": 2.639902676399027e-05, "loss": 0.6588, "step": 5425 }, { "epoch": 0.1585378315645027, "grad_norm": 0.7848166237512199, "learning_rate": 2.642335766423358e-05, "loss": 0.5946, "step": 5430 }, { "epoch": 0.15868381483481994, "grad_norm": 0.8099087563436927, "learning_rate": 2.6447688564476886e-05, "loss": 0.6254, "step": 5435 }, { "epoch": 0.15882979810513714, "grad_norm": 0.7946686369332037, "learning_rate": 2.6472019464720193e-05, "loss": 0.6319, "step": 5440 }, { "epoch": 0.15897578137545437, "grad_norm": 0.8378098815739509, "learning_rate": 2.6496350364963508e-05, "loss": 0.6072, "step": 5445 }, { "epoch": 0.1591217646457716, "grad_norm": 0.8388112576715467, "learning_rate": 2.6520681265206815e-05, "loss": 0.6425, "step": 5450 }, { "epoch": 0.1592677479160888, "grad_norm": 0.8866098120902335, "learning_rate": 2.6545012165450123e-05, "loss": 0.6074, "step": 5455 }, { "epoch": 0.15941373118640603, "grad_norm": 0.8836448655327187, "learning_rate": 2.656934306569343e-05, "loss": 0.6483, "step": 5460 }, { "epoch": 0.15955971445672326, "grad_norm": 0.8639147634509221, "learning_rate": 2.6593673965936737e-05, "loss": 0.6485, "step": 5465 }, { "epoch": 0.1597056977270405, "grad_norm": 0.777138983210983, "learning_rate": 2.661800486618005e-05, "loss": 0.643, "step": 5470 }, { "epoch": 0.1598516809973577, "grad_norm": 0.8780958306273938, "learning_rate": 2.664233576642336e-05, "loss": 0.6469, "step": 5475 }, { "epoch": 0.15999766426767492, "grad_norm": 0.8103035287965834, "learning_rate": 2.6666666666666667e-05, "loss": 0.6373, "step": 5480 }, { "epoch": 0.16014364753799215, "grad_norm": 0.7866790545454218, "learning_rate": 2.6690997566909974e-05, "loss": 0.6416, "step": 5485 }, { "epoch": 0.16028963080830938, "grad_norm": 0.8525204701697023, "learning_rate": 2.6715328467153285e-05, "loss": 0.612, "step": 5490 }, { "epoch": 0.16043561407862658, "grad_norm": 2.327025188938901, "learning_rate": 2.6739659367396596e-05, "loss": 0.6899, "step": 5495 }, { "epoch": 0.1605815973489438, "grad_norm": 0.855198057457757, "learning_rate": 2.6763990267639903e-05, "loss": 0.6268, "step": 5500 }, { "epoch": 0.16072758061926104, "grad_norm": 0.7640199245056426, "learning_rate": 2.6788321167883214e-05, "loss": 0.614, "step": 5505 }, { "epoch": 0.16087356388957824, "grad_norm": 0.833269421410475, "learning_rate": 2.681265206812652e-05, "loss": 0.6183, "step": 5510 }, { "epoch": 0.16101954715989547, "grad_norm": 0.7714532947710426, "learning_rate": 2.6836982968369832e-05, "loss": 0.6331, "step": 5515 }, { "epoch": 0.1611655304302127, "grad_norm": 0.8252406327960303, "learning_rate": 2.6861313868613143e-05, "loss": 0.6323, "step": 5520 }, { "epoch": 0.16131151370052993, "grad_norm": 0.8734621619364913, "learning_rate": 2.688564476885645e-05, "loss": 0.6853, "step": 5525 }, { "epoch": 0.16145749697084713, "grad_norm": 0.8253838224680681, "learning_rate": 2.6909975669099758e-05, "loss": 0.6212, "step": 5530 }, { "epoch": 0.16160348024116436, "grad_norm": 0.7710803555469515, "learning_rate": 2.6934306569343065e-05, "loss": 0.6173, "step": 5535 }, { "epoch": 0.1617494635114816, "grad_norm": 0.847649175434807, "learning_rate": 2.695863746958638e-05, "loss": 0.6197, "step": 5540 }, { "epoch": 0.16189544678179882, "grad_norm": 0.771165873651422, "learning_rate": 2.6982968369829687e-05, "loss": 0.6624, "step": 5545 }, { "epoch": 0.16204143005211602, "grad_norm": 0.7883748505365049, "learning_rate": 2.7007299270072995e-05, "loss": 0.5964, "step": 5550 }, { "epoch": 0.16218741332243325, "grad_norm": 0.7169524762573917, "learning_rate": 2.7031630170316302e-05, "loss": 0.5915, "step": 5555 }, { "epoch": 0.16233339659275048, "grad_norm": 0.8355843987051327, "learning_rate": 2.705596107055961e-05, "loss": 0.6044, "step": 5560 }, { "epoch": 0.16247937986306768, "grad_norm": 0.8078367852736061, "learning_rate": 2.7080291970802924e-05, "loss": 0.6147, "step": 5565 }, { "epoch": 0.1626253631333849, "grad_norm": 0.826966253778798, "learning_rate": 2.710462287104623e-05, "loss": 0.6429, "step": 5570 }, { "epoch": 0.16277134640370214, "grad_norm": 0.8312136741582802, "learning_rate": 2.712895377128954e-05, "loss": 0.6305, "step": 5575 }, { "epoch": 0.16291732967401937, "grad_norm": 0.7627141616165518, "learning_rate": 2.7153284671532846e-05, "loss": 0.6371, "step": 5580 }, { "epoch": 0.16306331294433657, "grad_norm": 0.8084064761187151, "learning_rate": 2.7177615571776154e-05, "loss": 0.6449, "step": 5585 }, { "epoch": 0.1632092962146538, "grad_norm": 0.8629649707141198, "learning_rate": 2.7201946472019468e-05, "loss": 0.6179, "step": 5590 }, { "epoch": 0.16335527948497103, "grad_norm": 0.844810173242678, "learning_rate": 2.7226277372262775e-05, "loss": 0.5968, "step": 5595 }, { "epoch": 0.16350126275528826, "grad_norm": 0.8143484210064909, "learning_rate": 2.7250608272506083e-05, "loss": 0.5881, "step": 5600 }, { "epoch": 0.16364724602560546, "grad_norm": 0.7716422356532674, "learning_rate": 2.7274939172749394e-05, "loss": 0.6183, "step": 5605 }, { "epoch": 0.1637932292959227, "grad_norm": 0.759694273292439, "learning_rate": 2.72992700729927e-05, "loss": 0.6194, "step": 5610 }, { "epoch": 0.16393921256623992, "grad_norm": 0.8219586430762094, "learning_rate": 2.7323600973236012e-05, "loss": 0.6423, "step": 5615 }, { "epoch": 0.16408519583655712, "grad_norm": 0.8527260869859682, "learning_rate": 2.7347931873479323e-05, "loss": 0.637, "step": 5620 }, { "epoch": 0.16423117910687435, "grad_norm": 0.8148018290886838, "learning_rate": 2.737226277372263e-05, "loss": 0.6244, "step": 5625 }, { "epoch": 0.16437716237719158, "grad_norm": 0.8607337574412628, "learning_rate": 2.7396593673965938e-05, "loss": 0.6459, "step": 5630 }, { "epoch": 0.1645231456475088, "grad_norm": 0.8196675588831168, "learning_rate": 2.7420924574209245e-05, "loss": 0.5896, "step": 5635 }, { "epoch": 0.164669128917826, "grad_norm": 0.805828570052097, "learning_rate": 2.744525547445256e-05, "loss": 0.6441, "step": 5640 }, { "epoch": 0.16481511218814324, "grad_norm": 0.7820351037332344, "learning_rate": 2.7469586374695867e-05, "loss": 0.6229, "step": 5645 }, { "epoch": 0.16496109545846047, "grad_norm": 0.806134971562293, "learning_rate": 2.7493917274939174e-05, "loss": 0.5655, "step": 5650 }, { "epoch": 0.1651070787287777, "grad_norm": 0.7550527812705056, "learning_rate": 2.751824817518248e-05, "loss": 0.597, "step": 5655 }, { "epoch": 0.1652530619990949, "grad_norm": 0.7496851679051605, "learning_rate": 2.754257907542579e-05, "loss": 0.6282, "step": 5660 }, { "epoch": 0.16539904526941213, "grad_norm": 0.8697973669802647, "learning_rate": 2.7566909975669103e-05, "loss": 0.6541, "step": 5665 }, { "epoch": 0.16554502853972936, "grad_norm": 0.8336066319811535, "learning_rate": 2.759124087591241e-05, "loss": 0.6265, "step": 5670 }, { "epoch": 0.16569101181004656, "grad_norm": 0.8085562072301987, "learning_rate": 2.7615571776155718e-05, "loss": 0.6282, "step": 5675 }, { "epoch": 0.16583699508036379, "grad_norm": 0.824482137898562, "learning_rate": 2.7639902676399026e-05, "loss": 0.6095, "step": 5680 }, { "epoch": 0.16598297835068102, "grad_norm": 0.8219268769200928, "learning_rate": 2.7664233576642333e-05, "loss": 0.63, "step": 5685 }, { "epoch": 0.16612896162099824, "grad_norm": 0.8123948447672751, "learning_rate": 2.7688564476885647e-05, "loss": 0.6291, "step": 5690 }, { "epoch": 0.16627494489131545, "grad_norm": 0.7808592644328205, "learning_rate": 2.7712895377128955e-05, "loss": 0.5946, "step": 5695 }, { "epoch": 0.16642092816163268, "grad_norm": 0.7745647277556607, "learning_rate": 2.7737226277372262e-05, "loss": 0.615, "step": 5700 }, { "epoch": 0.1665669114319499, "grad_norm": 1.211241341382824, "learning_rate": 2.7761557177615573e-05, "loss": 0.6318, "step": 5705 }, { "epoch": 0.16671289470226713, "grad_norm": 0.7614012577638861, "learning_rate": 2.7785888077858884e-05, "loss": 0.6182, "step": 5710 }, { "epoch": 0.16685887797258434, "grad_norm": 0.7550347921290246, "learning_rate": 2.781021897810219e-05, "loss": 0.6, "step": 5715 }, { "epoch": 0.16700486124290156, "grad_norm": 0.8735659042350397, "learning_rate": 2.7834549878345502e-05, "loss": 0.6012, "step": 5720 }, { "epoch": 0.1671508445132188, "grad_norm": 0.7477716873137125, "learning_rate": 2.785888077858881e-05, "loss": 0.5874, "step": 5725 }, { "epoch": 0.167296827783536, "grad_norm": 0.7971192758169687, "learning_rate": 2.7883211678832117e-05, "loss": 0.6392, "step": 5730 }, { "epoch": 0.16744281105385322, "grad_norm": 0.7585036891881396, "learning_rate": 2.790754257907543e-05, "loss": 0.6402, "step": 5735 }, { "epoch": 0.16758879432417045, "grad_norm": 0.8161865200797365, "learning_rate": 2.793187347931874e-05, "loss": 0.6558, "step": 5740 }, { "epoch": 0.16773477759448768, "grad_norm": 0.8210919116036522, "learning_rate": 2.7956204379562046e-05, "loss": 0.6032, "step": 5745 }, { "epoch": 0.16788076086480488, "grad_norm": 0.8075743230386488, "learning_rate": 2.7980535279805354e-05, "loss": 0.6133, "step": 5750 }, { "epoch": 0.1680267441351221, "grad_norm": 0.8677255898104017, "learning_rate": 2.800486618004866e-05, "loss": 0.5793, "step": 5755 }, { "epoch": 0.16817272740543934, "grad_norm": 0.7696010504987801, "learning_rate": 2.8029197080291975e-05, "loss": 0.6105, "step": 5760 }, { "epoch": 0.16831871067575657, "grad_norm": 0.7610441354946961, "learning_rate": 2.8053527980535283e-05, "loss": 0.6599, "step": 5765 }, { "epoch": 0.16846469394607377, "grad_norm": 0.8087701503839941, "learning_rate": 2.807785888077859e-05, "loss": 0.63, "step": 5770 }, { "epoch": 0.168610677216391, "grad_norm": 0.7785921754400141, "learning_rate": 2.8102189781021898e-05, "loss": 0.5697, "step": 5775 }, { "epoch": 0.16875666048670823, "grad_norm": 0.7605581481468644, "learning_rate": 2.8126520681265205e-05, "loss": 0.6049, "step": 5780 }, { "epoch": 0.16890264375702543, "grad_norm": 0.7566085275057329, "learning_rate": 2.815085158150852e-05, "loss": 0.6036, "step": 5785 }, { "epoch": 0.16904862702734266, "grad_norm": 0.8032970221577824, "learning_rate": 2.8175182481751827e-05, "loss": 0.6143, "step": 5790 }, { "epoch": 0.1691946102976599, "grad_norm": 0.800820393764147, "learning_rate": 2.8199513381995134e-05, "loss": 0.6039, "step": 5795 }, { "epoch": 0.16934059356797712, "grad_norm": 0.8544931212362319, "learning_rate": 2.822384428223844e-05, "loss": 0.6658, "step": 5800 }, { "epoch": 0.16948657683829432, "grad_norm": 0.7683179783760316, "learning_rate": 2.8248175182481753e-05, "loss": 0.6646, "step": 5805 }, { "epoch": 0.16963256010861155, "grad_norm": 0.802339481077416, "learning_rate": 2.8272506082725063e-05, "loss": 0.6563, "step": 5810 }, { "epoch": 0.16977854337892878, "grad_norm": 0.7825482088044208, "learning_rate": 2.829683698296837e-05, "loss": 0.6203, "step": 5815 }, { "epoch": 0.16992452664924598, "grad_norm": 0.8373645019979749, "learning_rate": 2.832116788321168e-05, "loss": 0.6211, "step": 5820 }, { "epoch": 0.1700705099195632, "grad_norm": 0.8053887383716081, "learning_rate": 2.834549878345499e-05, "loss": 0.6602, "step": 5825 }, { "epoch": 0.17021649318988044, "grad_norm": 0.9279138416348278, "learning_rate": 2.8369829683698297e-05, "loss": 0.6257, "step": 5830 }, { "epoch": 0.17036247646019767, "grad_norm": 0.8156854795614265, "learning_rate": 2.839416058394161e-05, "loss": 0.6005, "step": 5835 }, { "epoch": 0.17050845973051487, "grad_norm": 0.7868725062829328, "learning_rate": 2.8418491484184918e-05, "loss": 0.5902, "step": 5840 }, { "epoch": 0.1706544430008321, "grad_norm": 0.8814952721673379, "learning_rate": 2.8442822384428226e-05, "loss": 0.6446, "step": 5845 }, { "epoch": 0.17080042627114933, "grad_norm": 0.8702076747446111, "learning_rate": 2.8467153284671533e-05, "loss": 0.6491, "step": 5850 }, { "epoch": 0.17094640954146656, "grad_norm": 0.7602624078269723, "learning_rate": 2.849148418491484e-05, "loss": 0.5972, "step": 5855 }, { "epoch": 0.17109239281178376, "grad_norm": 0.7617357763090764, "learning_rate": 2.8515815085158155e-05, "loss": 0.6062, "step": 5860 }, { "epoch": 0.171238376082101, "grad_norm": 0.7857005631860701, "learning_rate": 2.8540145985401462e-05, "loss": 0.6451, "step": 5865 }, { "epoch": 0.17138435935241822, "grad_norm": 0.7706959634500377, "learning_rate": 2.856447688564477e-05, "loss": 0.6011, "step": 5870 }, { "epoch": 0.17153034262273542, "grad_norm": 0.8188773889812999, "learning_rate": 2.8588807785888077e-05, "loss": 0.6375, "step": 5875 }, { "epoch": 0.17167632589305265, "grad_norm": 0.8602978774348494, "learning_rate": 2.8613138686131385e-05, "loss": 0.6807, "step": 5880 }, { "epoch": 0.17182230916336988, "grad_norm": 0.7554177419238912, "learning_rate": 2.86374695863747e-05, "loss": 0.6153, "step": 5885 }, { "epoch": 0.1719682924336871, "grad_norm": 0.7658179125842774, "learning_rate": 2.8661800486618006e-05, "loss": 0.6024, "step": 5890 }, { "epoch": 0.1721142757040043, "grad_norm": 0.8244847110606007, "learning_rate": 2.8686131386861314e-05, "loss": 0.6138, "step": 5895 }, { "epoch": 0.17226025897432154, "grad_norm": 0.8656799279511889, "learning_rate": 2.871046228710462e-05, "loss": 0.6302, "step": 5900 }, { "epoch": 0.17240624224463877, "grad_norm": 0.8361687439844107, "learning_rate": 2.8734793187347935e-05, "loss": 0.5896, "step": 5905 }, { "epoch": 0.172552225514956, "grad_norm": 0.7211520438040004, "learning_rate": 2.8759124087591243e-05, "loss": 0.6015, "step": 5910 }, { "epoch": 0.1726982087852732, "grad_norm": 0.7728004264720227, "learning_rate": 2.878345498783455e-05, "loss": 0.583, "step": 5915 }, { "epoch": 0.17284419205559043, "grad_norm": 0.7737612827984123, "learning_rate": 2.880778588807786e-05, "loss": 0.6457, "step": 5920 }, { "epoch": 0.17299017532590766, "grad_norm": 0.8043692498037314, "learning_rate": 2.883211678832117e-05, "loss": 0.6216, "step": 5925 }, { "epoch": 0.17313615859622486, "grad_norm": 0.8003279351717913, "learning_rate": 2.885644768856448e-05, "loss": 0.6605, "step": 5930 }, { "epoch": 0.1732821418665421, "grad_norm": 0.7802082142650687, "learning_rate": 2.888077858880779e-05, "loss": 0.6344, "step": 5935 }, { "epoch": 0.17342812513685932, "grad_norm": 0.8713454500791625, "learning_rate": 2.8905109489051098e-05, "loss": 0.6168, "step": 5940 }, { "epoch": 0.17357410840717655, "grad_norm": 0.7879889902780091, "learning_rate": 2.8929440389294405e-05, "loss": 0.6329, "step": 5945 }, { "epoch": 0.17372009167749375, "grad_norm": 0.8205414004475794, "learning_rate": 2.8953771289537713e-05, "loss": 0.6423, "step": 5950 }, { "epoch": 0.17386607494781098, "grad_norm": 0.774218462235127, "learning_rate": 2.8978102189781027e-05, "loss": 0.6291, "step": 5955 }, { "epoch": 0.1740120582181282, "grad_norm": 0.8919460228886098, "learning_rate": 2.9002433090024334e-05, "loss": 0.6483, "step": 5960 }, { "epoch": 0.17415804148844544, "grad_norm": 0.7502087644229644, "learning_rate": 2.9026763990267642e-05, "loss": 0.6079, "step": 5965 }, { "epoch": 0.17430402475876264, "grad_norm": 0.7531392624004443, "learning_rate": 2.905109489051095e-05, "loss": 0.6078, "step": 5970 }, { "epoch": 0.17445000802907987, "grad_norm": 0.750630482551639, "learning_rate": 2.9075425790754257e-05, "loss": 0.6218, "step": 5975 }, { "epoch": 0.1745959912993971, "grad_norm": 0.7530641334942451, "learning_rate": 2.909975669099757e-05, "loss": 0.5864, "step": 5980 }, { "epoch": 0.1747419745697143, "grad_norm": 0.8010290827269229, "learning_rate": 2.912408759124088e-05, "loss": 0.5904, "step": 5985 }, { "epoch": 0.17488795784003153, "grad_norm": 0.8792604490072283, "learning_rate": 2.9148418491484186e-05, "loss": 0.6425, "step": 5990 }, { "epoch": 0.17503394111034876, "grad_norm": 0.7530220860480629, "learning_rate": 2.9172749391727493e-05, "loss": 0.6185, "step": 5995 }, { "epoch": 0.17517992438066599, "grad_norm": 0.839309498577383, "learning_rate": 2.91970802919708e-05, "loss": 0.6403, "step": 6000 }, { "epoch": 0.1753259076509832, "grad_norm": 0.8088029319486585, "learning_rate": 2.9221411192214115e-05, "loss": 0.5893, "step": 6005 }, { "epoch": 0.17547189092130042, "grad_norm": 0.7952015047625696, "learning_rate": 2.9245742092457422e-05, "loss": 0.5938, "step": 6010 }, { "epoch": 0.17561787419161765, "grad_norm": 0.7689109844848915, "learning_rate": 2.927007299270073e-05, "loss": 0.6374, "step": 6015 }, { "epoch": 0.17576385746193487, "grad_norm": 0.6761522147992304, "learning_rate": 2.929440389294404e-05, "loss": 0.6109, "step": 6020 }, { "epoch": 0.17590984073225208, "grad_norm": 0.9111305971770707, "learning_rate": 2.9318734793187348e-05, "loss": 0.6094, "step": 6025 }, { "epoch": 0.1760558240025693, "grad_norm": 0.7178047685169449, "learning_rate": 2.934306569343066e-05, "loss": 0.634, "step": 6030 }, { "epoch": 0.17620180727288653, "grad_norm": 0.8703534271586797, "learning_rate": 2.936739659367397e-05, "loss": 0.639, "step": 6035 }, { "epoch": 0.17634779054320374, "grad_norm": 0.7870328709045311, "learning_rate": 2.9391727493917277e-05, "loss": 0.6448, "step": 6040 }, { "epoch": 0.17649377381352097, "grad_norm": 0.8640256698435386, "learning_rate": 2.9416058394160585e-05, "loss": 0.6892, "step": 6045 }, { "epoch": 0.1766397570838382, "grad_norm": 0.7813890975160601, "learning_rate": 2.9440389294403892e-05, "loss": 0.6617, "step": 6050 }, { "epoch": 0.17678574035415542, "grad_norm": 0.749552069098781, "learning_rate": 2.9464720194647206e-05, "loss": 0.6111, "step": 6055 }, { "epoch": 0.17693172362447263, "grad_norm": 0.8240072384035373, "learning_rate": 2.9489051094890514e-05, "loss": 0.6376, "step": 6060 }, { "epoch": 0.17707770689478985, "grad_norm": 0.7886055941899679, "learning_rate": 2.951338199513382e-05, "loss": 0.636, "step": 6065 }, { "epoch": 0.17722369016510708, "grad_norm": 0.732614943847749, "learning_rate": 2.953771289537713e-05, "loss": 0.6035, "step": 6070 }, { "epoch": 0.1773696734354243, "grad_norm": 0.7134703058573358, "learning_rate": 2.9562043795620443e-05, "loss": 0.6308, "step": 6075 }, { "epoch": 0.17751565670574151, "grad_norm": 0.754584153338418, "learning_rate": 2.958637469586375e-05, "loss": 0.6121, "step": 6080 }, { "epoch": 0.17766163997605874, "grad_norm": 0.7761141405632233, "learning_rate": 2.9610705596107058e-05, "loss": 0.6286, "step": 6085 }, { "epoch": 0.17780762324637597, "grad_norm": 0.8063987550538987, "learning_rate": 2.9635036496350365e-05, "loss": 0.6076, "step": 6090 }, { "epoch": 0.17795360651669317, "grad_norm": 0.8605187112713648, "learning_rate": 2.9659367396593673e-05, "loss": 0.6269, "step": 6095 }, { "epoch": 0.1780995897870104, "grad_norm": 0.7507102249642311, "learning_rate": 2.9683698296836987e-05, "loss": 0.6049, "step": 6100 }, { "epoch": 0.17824557305732763, "grad_norm": 1.0901021455101685, "learning_rate": 2.9708029197080294e-05, "loss": 0.6545, "step": 6105 }, { "epoch": 0.17839155632764486, "grad_norm": 0.8380643764109628, "learning_rate": 2.9732360097323602e-05, "loss": 0.6369, "step": 6110 }, { "epoch": 0.17853753959796206, "grad_norm": 0.8299953083358621, "learning_rate": 2.975669099756691e-05, "loss": 0.6097, "step": 6115 }, { "epoch": 0.1786835228682793, "grad_norm": 0.7472959206166612, "learning_rate": 2.9781021897810217e-05, "loss": 0.618, "step": 6120 }, { "epoch": 0.17882950613859652, "grad_norm": 0.8518253597593448, "learning_rate": 2.980535279805353e-05, "loss": 0.6382, "step": 6125 }, { "epoch": 0.17897548940891375, "grad_norm": 0.6272786649271319, "learning_rate": 2.982968369829684e-05, "loss": 0.627, "step": 6130 }, { "epoch": 0.17912147267923095, "grad_norm": 0.8202451575748464, "learning_rate": 2.985401459854015e-05, "loss": 0.6337, "step": 6135 }, { "epoch": 0.17926745594954818, "grad_norm": 0.8078847048901726, "learning_rate": 2.9878345498783457e-05, "loss": 0.6232, "step": 6140 }, { "epoch": 0.1794134392198654, "grad_norm": 0.7483397320275881, "learning_rate": 2.9902676399026764e-05, "loss": 0.6419, "step": 6145 }, { "epoch": 0.1795594224901826, "grad_norm": 0.8510235478422691, "learning_rate": 2.992700729927008e-05, "loss": 0.5925, "step": 6150 }, { "epoch": 0.17970540576049984, "grad_norm": 0.8492097034497252, "learning_rate": 2.9951338199513386e-05, "loss": 0.6497, "step": 6155 }, { "epoch": 0.17985138903081707, "grad_norm": 0.8444589926832368, "learning_rate": 2.9975669099756693e-05, "loss": 0.6323, "step": 6160 }, { "epoch": 0.1799973723011343, "grad_norm": 0.916632266638607, "learning_rate": 3e-05, "loss": 0.6946, "step": 6165 }, { "epoch": 0.1801433555714515, "grad_norm": 0.8242160867629514, "learning_rate": 3.0024330900243308e-05, "loss": 0.6593, "step": 6170 }, { "epoch": 0.18028933884176873, "grad_norm": 0.8296924767358203, "learning_rate": 3.0048661800486622e-05, "loss": 0.6151, "step": 6175 }, { "epoch": 0.18043532211208596, "grad_norm": 0.7629456150681924, "learning_rate": 3.007299270072993e-05, "loss": 0.5707, "step": 6180 }, { "epoch": 0.1805813053824032, "grad_norm": 0.7883968003058237, "learning_rate": 3.0097323600973237e-05, "loss": 0.5691, "step": 6185 }, { "epoch": 0.1807272886527204, "grad_norm": 0.9358866397427482, "learning_rate": 3.0121654501216545e-05, "loss": 0.6807, "step": 6190 }, { "epoch": 0.18087327192303762, "grad_norm": 0.8480507253264363, "learning_rate": 3.0145985401459852e-05, "loss": 0.6344, "step": 6195 }, { "epoch": 0.18101925519335485, "grad_norm": 0.8801194702904062, "learning_rate": 3.0170316301703166e-05, "loss": 0.6759, "step": 6200 }, { "epoch": 0.18116523846367205, "grad_norm": 0.7621661446239105, "learning_rate": 3.0194647201946474e-05, "loss": 0.617, "step": 6205 }, { "epoch": 0.18131122173398928, "grad_norm": 0.819798767936743, "learning_rate": 3.021897810218978e-05, "loss": 0.6181, "step": 6210 }, { "epoch": 0.1814572050043065, "grad_norm": 0.8514227945589902, "learning_rate": 3.024330900243309e-05, "loss": 0.6256, "step": 6215 }, { "epoch": 0.18160318827462374, "grad_norm": 0.7923803416812871, "learning_rate": 3.0267639902676396e-05, "loss": 0.6366, "step": 6220 }, { "epoch": 0.18174917154494094, "grad_norm": 0.8510731624394369, "learning_rate": 3.029197080291971e-05, "loss": 0.6277, "step": 6225 }, { "epoch": 0.18189515481525817, "grad_norm": 1.02533764979724, "learning_rate": 3.0316301703163018e-05, "loss": 0.6339, "step": 6230 }, { "epoch": 0.1820411380855754, "grad_norm": 0.73146959824759, "learning_rate": 3.0340632603406325e-05, "loss": 0.603, "step": 6235 }, { "epoch": 0.18218712135589263, "grad_norm": 0.7309870327473336, "learning_rate": 3.0364963503649636e-05, "loss": 0.6235, "step": 6240 }, { "epoch": 0.18233310462620983, "grad_norm": 0.7324073686227454, "learning_rate": 3.0389294403892944e-05, "loss": 0.6061, "step": 6245 }, { "epoch": 0.18247908789652706, "grad_norm": 0.7977531052095748, "learning_rate": 3.0413625304136255e-05, "loss": 0.6008, "step": 6250 }, { "epoch": 0.1826250711668443, "grad_norm": 0.8173058824259528, "learning_rate": 3.0437956204379565e-05, "loss": 0.622, "step": 6255 }, { "epoch": 0.1827710544371615, "grad_norm": 0.7733297116333411, "learning_rate": 3.0462287104622873e-05, "loss": 0.6043, "step": 6260 }, { "epoch": 0.18291703770747872, "grad_norm": 0.7688597709282735, "learning_rate": 3.048661800486618e-05, "loss": 0.6375, "step": 6265 }, { "epoch": 0.18306302097779595, "grad_norm": 0.8259196692866437, "learning_rate": 3.0510948905109494e-05, "loss": 0.6655, "step": 6270 }, { "epoch": 0.18320900424811318, "grad_norm": 0.8159201220635213, "learning_rate": 3.05352798053528e-05, "loss": 0.6603, "step": 6275 }, { "epoch": 0.18335498751843038, "grad_norm": 0.8325234480271753, "learning_rate": 3.055961070559611e-05, "loss": 0.6397, "step": 6280 }, { "epoch": 0.1835009707887476, "grad_norm": 0.7314511172802979, "learning_rate": 3.058394160583942e-05, "loss": 0.7366, "step": 6285 }, { "epoch": 0.18364695405906484, "grad_norm": 0.7207972785107233, "learning_rate": 3.0608272506082724e-05, "loss": 0.6299, "step": 6290 }, { "epoch": 0.18379293732938204, "grad_norm": 0.7486254539580961, "learning_rate": 3.063260340632604e-05, "loss": 0.6408, "step": 6295 }, { "epoch": 0.18393892059969927, "grad_norm": 0.8150229618859418, "learning_rate": 3.0656934306569346e-05, "loss": 0.6461, "step": 6300 }, { "epoch": 0.1840849038700165, "grad_norm": 0.654468752683776, "learning_rate": 3.0681265206812653e-05, "loss": 0.5989, "step": 6305 }, { "epoch": 0.18423088714033373, "grad_norm": 0.7939460212232718, "learning_rate": 3.070559610705596e-05, "loss": 0.6477, "step": 6310 }, { "epoch": 0.18437687041065093, "grad_norm": 0.7205381496445183, "learning_rate": 3.072992700729927e-05, "loss": 0.6242, "step": 6315 }, { "epoch": 0.18452285368096816, "grad_norm": 0.8145941752535684, "learning_rate": 3.075425790754258e-05, "loss": 0.6463, "step": 6320 }, { "epoch": 0.1846688369512854, "grad_norm": 0.7924863581563002, "learning_rate": 3.077858880778589e-05, "loss": 0.6158, "step": 6325 }, { "epoch": 0.18481482022160262, "grad_norm": 0.7961072337952539, "learning_rate": 3.08029197080292e-05, "loss": 0.6656, "step": 6330 }, { "epoch": 0.18496080349191982, "grad_norm": 0.8126067635800343, "learning_rate": 3.0827250608272505e-05, "loss": 0.6255, "step": 6335 }, { "epoch": 0.18510678676223705, "grad_norm": 0.7278911671696665, "learning_rate": 3.085158150851581e-05, "loss": 0.6012, "step": 6340 }, { "epoch": 0.18525277003255428, "grad_norm": 0.7629121791114035, "learning_rate": 3.0875912408759127e-05, "loss": 0.645, "step": 6345 }, { "epoch": 0.18539875330287148, "grad_norm": 0.7616268375819831, "learning_rate": 3.0900243309002434e-05, "loss": 0.6157, "step": 6350 }, { "epoch": 0.1855447365731887, "grad_norm": 0.7433827097553016, "learning_rate": 3.092457420924574e-05, "loss": 0.6237, "step": 6355 }, { "epoch": 0.18569071984350594, "grad_norm": 0.7508939533969061, "learning_rate": 3.094890510948905e-05, "loss": 0.6157, "step": 6360 }, { "epoch": 0.18583670311382317, "grad_norm": 0.9062514252186021, "learning_rate": 3.0973236009732356e-05, "loss": 0.6404, "step": 6365 }, { "epoch": 0.18598268638414037, "grad_norm": 0.735156686347465, "learning_rate": 3.099756690997567e-05, "loss": 0.6362, "step": 6370 }, { "epoch": 0.1861286696544576, "grad_norm": 0.8015278221097115, "learning_rate": 3.102189781021898e-05, "loss": 0.6261, "step": 6375 }, { "epoch": 0.18627465292477483, "grad_norm": 0.8514301478167846, "learning_rate": 3.1046228710462286e-05, "loss": 0.6071, "step": 6380 }, { "epoch": 0.18642063619509205, "grad_norm": 0.7638709205327211, "learning_rate": 3.107055961070559e-05, "loss": 0.6618, "step": 6385 }, { "epoch": 0.18656661946540926, "grad_norm": 0.7464522059831632, "learning_rate": 3.109489051094891e-05, "loss": 0.6382, "step": 6390 }, { "epoch": 0.18671260273572649, "grad_norm": 0.7682935752536874, "learning_rate": 3.1119221411192215e-05, "loss": 0.5894, "step": 6395 }, { "epoch": 0.18685858600604371, "grad_norm": 0.7732232887403432, "learning_rate": 3.114355231143552e-05, "loss": 0.6277, "step": 6400 }, { "epoch": 0.18700456927636092, "grad_norm": 0.752397456044161, "learning_rate": 3.1167883211678836e-05, "loss": 0.603, "step": 6405 }, { "epoch": 0.18715055254667814, "grad_norm": 0.8402586996656388, "learning_rate": 3.1192214111922144e-05, "loss": 0.6346, "step": 6410 }, { "epoch": 0.18729653581699537, "grad_norm": 0.9351545488904954, "learning_rate": 3.121654501216545e-05, "loss": 0.6625, "step": 6415 }, { "epoch": 0.1874425190873126, "grad_norm": 0.8333272522654135, "learning_rate": 3.1240875912408765e-05, "loss": 0.6677, "step": 6420 }, { "epoch": 0.1875885023576298, "grad_norm": 0.8392711604611844, "learning_rate": 3.126520681265207e-05, "loss": 0.6577, "step": 6425 }, { "epoch": 0.18773448562794703, "grad_norm": 0.7938972237974632, "learning_rate": 3.128953771289538e-05, "loss": 0.6694, "step": 6430 }, { "epoch": 0.18788046889826426, "grad_norm": 0.9130463302080325, "learning_rate": 3.131386861313869e-05, "loss": 0.5956, "step": 6435 }, { "epoch": 0.1880264521685815, "grad_norm": 0.7069805215734916, "learning_rate": 3.1338199513381995e-05, "loss": 0.6228, "step": 6440 }, { "epoch": 0.1881724354388987, "grad_norm": 0.7187045715018774, "learning_rate": 3.136253041362531e-05, "loss": 0.6463, "step": 6445 }, { "epoch": 0.18831841870921592, "grad_norm": 0.8002525693852416, "learning_rate": 3.138686131386862e-05, "loss": 0.6182, "step": 6450 }, { "epoch": 0.18846440197953315, "grad_norm": 0.8173601332799795, "learning_rate": 3.1411192214111924e-05, "loss": 0.6356, "step": 6455 }, { "epoch": 0.18861038524985035, "grad_norm": 0.8160027417313548, "learning_rate": 3.143552311435523e-05, "loss": 0.6359, "step": 6460 }, { "epoch": 0.18875636852016758, "grad_norm": 0.7406738949084944, "learning_rate": 3.1459854014598546e-05, "loss": 0.6445, "step": 6465 }, { "epoch": 0.1889023517904848, "grad_norm": 0.7678977464660778, "learning_rate": 3.1484184914841853e-05, "loss": 0.5998, "step": 6470 }, { "epoch": 0.18904833506080204, "grad_norm": 0.8121919148625077, "learning_rate": 3.150851581508516e-05, "loss": 0.6132, "step": 6475 }, { "epoch": 0.18919431833111924, "grad_norm": 0.7992536306110918, "learning_rate": 3.153284671532847e-05, "loss": 0.6306, "step": 6480 }, { "epoch": 0.18934030160143647, "grad_norm": 0.7798270722222513, "learning_rate": 3.1557177615571776e-05, "loss": 0.6423, "step": 6485 }, { "epoch": 0.1894862848717537, "grad_norm": 0.8320843213004776, "learning_rate": 3.158150851581509e-05, "loss": 0.6396, "step": 6490 }, { "epoch": 0.18963226814207093, "grad_norm": 0.7397060184830424, "learning_rate": 3.16058394160584e-05, "loss": 0.6503, "step": 6495 }, { "epoch": 0.18977825141238813, "grad_norm": 0.7342683388789367, "learning_rate": 3.1630170316301705e-05, "loss": 0.5917, "step": 6500 }, { "epoch": 0.18992423468270536, "grad_norm": 0.8084280288391961, "learning_rate": 3.165450121654501e-05, "loss": 0.6409, "step": 6505 }, { "epoch": 0.1900702179530226, "grad_norm": 0.7291038253761555, "learning_rate": 3.167883211678832e-05, "loss": 0.611, "step": 6510 }, { "epoch": 0.1902162012233398, "grad_norm": 0.7804894722484865, "learning_rate": 3.1703163017031634e-05, "loss": 0.6488, "step": 6515 }, { "epoch": 0.19036218449365702, "grad_norm": 0.7725896966045029, "learning_rate": 3.172749391727494e-05, "loss": 0.6591, "step": 6520 }, { "epoch": 0.19050816776397425, "grad_norm": 0.7322673012203511, "learning_rate": 3.175182481751825e-05, "loss": 0.6032, "step": 6525 }, { "epoch": 0.19065415103429148, "grad_norm": 0.8152983830772197, "learning_rate": 3.1776155717761556e-05, "loss": 0.6141, "step": 6530 }, { "epoch": 0.19080013430460868, "grad_norm": 0.8069040354467321, "learning_rate": 3.1800486618004864e-05, "loss": 0.629, "step": 6535 }, { "epoch": 0.1909461175749259, "grad_norm": 0.7615214658909035, "learning_rate": 3.182481751824818e-05, "loss": 0.622, "step": 6540 }, { "epoch": 0.19109210084524314, "grad_norm": 0.7435183716144067, "learning_rate": 3.1849148418491486e-05, "loss": 0.6046, "step": 6545 }, { "epoch": 0.19123808411556037, "grad_norm": 0.8455728468520806, "learning_rate": 3.187347931873479e-05, "loss": 0.6558, "step": 6550 }, { "epoch": 0.19138406738587757, "grad_norm": 0.7326381893210617, "learning_rate": 3.18978102189781e-05, "loss": 0.6215, "step": 6555 }, { "epoch": 0.1915300506561948, "grad_norm": 0.7189207335992801, "learning_rate": 3.192214111922141e-05, "loss": 0.6213, "step": 6560 }, { "epoch": 0.19167603392651203, "grad_norm": 0.7806767779714885, "learning_rate": 3.194647201946472e-05, "loss": 0.6106, "step": 6565 }, { "epoch": 0.19182201719682923, "grad_norm": 0.7918141940270615, "learning_rate": 3.197080291970803e-05, "loss": 0.6514, "step": 6570 }, { "epoch": 0.19196800046714646, "grad_norm": 0.7741375326178336, "learning_rate": 3.199513381995134e-05, "loss": 0.613, "step": 6575 }, { "epoch": 0.1921139837374637, "grad_norm": 0.8332698131490197, "learning_rate": 3.2019464720194645e-05, "loss": 0.6546, "step": 6580 }, { "epoch": 0.19225996700778092, "grad_norm": 0.7573420239298628, "learning_rate": 3.204379562043795e-05, "loss": 0.6343, "step": 6585 }, { "epoch": 0.19240595027809812, "grad_norm": 0.7278057556430957, "learning_rate": 3.2068126520681266e-05, "loss": 0.5968, "step": 6590 }, { "epoch": 0.19255193354841535, "grad_norm": 0.7665510560330022, "learning_rate": 3.2092457420924574e-05, "loss": 0.5839, "step": 6595 }, { "epoch": 0.19269791681873258, "grad_norm": 0.7915392005419868, "learning_rate": 3.211678832116788e-05, "loss": 0.6525, "step": 6600 }, { "epoch": 0.1928439000890498, "grad_norm": 0.7264360466294593, "learning_rate": 3.2141119221411195e-05, "loss": 0.6205, "step": 6605 }, { "epoch": 0.192989883359367, "grad_norm": 0.7731865341655382, "learning_rate": 3.21654501216545e-05, "loss": 0.6577, "step": 6610 }, { "epoch": 0.19313586662968424, "grad_norm": 0.7926110989325365, "learning_rate": 3.218978102189781e-05, "loss": 0.6158, "step": 6615 }, { "epoch": 0.19328184990000147, "grad_norm": 0.7284394519023343, "learning_rate": 3.2214111922141124e-05, "loss": 0.6045, "step": 6620 }, { "epoch": 0.19342783317031867, "grad_norm": 0.7862914597843379, "learning_rate": 3.223844282238443e-05, "loss": 0.6268, "step": 6625 }, { "epoch": 0.1935738164406359, "grad_norm": 0.7937026758379515, "learning_rate": 3.226277372262774e-05, "loss": 0.6181, "step": 6630 }, { "epoch": 0.19371979971095313, "grad_norm": 0.7588830581563417, "learning_rate": 3.2287104622871054e-05, "loss": 0.6428, "step": 6635 }, { "epoch": 0.19386578298127036, "grad_norm": 0.742552015215193, "learning_rate": 3.231143552311436e-05, "loss": 0.6105, "step": 6640 }, { "epoch": 0.19401176625158756, "grad_norm": 0.783281553182545, "learning_rate": 3.233576642335767e-05, "loss": 0.613, "step": 6645 }, { "epoch": 0.1941577495219048, "grad_norm": 0.7828528159558973, "learning_rate": 3.2360097323600976e-05, "loss": 0.6131, "step": 6650 }, { "epoch": 0.19430373279222202, "grad_norm": 0.8071807372959976, "learning_rate": 3.238442822384428e-05, "loss": 0.6036, "step": 6655 }, { "epoch": 0.19444971606253925, "grad_norm": 0.8832401553684055, "learning_rate": 3.24087591240876e-05, "loss": 0.6213, "step": 6660 }, { "epoch": 0.19459569933285645, "grad_norm": 0.7522550485762255, "learning_rate": 3.2433090024330905e-05, "loss": 0.6258, "step": 6665 }, { "epoch": 0.19474168260317368, "grad_norm": 0.7460744771499422, "learning_rate": 3.245742092457421e-05, "loss": 0.6326, "step": 6670 }, { "epoch": 0.1948876658734909, "grad_norm": 0.7348355734900649, "learning_rate": 3.248175182481752e-05, "loss": 0.6401, "step": 6675 }, { "epoch": 0.1950336491438081, "grad_norm": 0.7909352263993328, "learning_rate": 3.250608272506083e-05, "loss": 0.6372, "step": 6680 }, { "epoch": 0.19517963241412534, "grad_norm": 0.7557738605175301, "learning_rate": 3.253041362530414e-05, "loss": 0.588, "step": 6685 }, { "epoch": 0.19532561568444257, "grad_norm": 0.7518791088625048, "learning_rate": 3.255474452554745e-05, "loss": 0.6304, "step": 6690 }, { "epoch": 0.1954715989547598, "grad_norm": 0.711380757328123, "learning_rate": 3.2579075425790757e-05, "loss": 0.6577, "step": 6695 }, { "epoch": 0.195617582225077, "grad_norm": 0.8000226082938228, "learning_rate": 3.2603406326034064e-05, "loss": 0.6024, "step": 6700 }, { "epoch": 0.19576356549539423, "grad_norm": 0.7848790383473221, "learning_rate": 3.262773722627737e-05, "loss": 0.6387, "step": 6705 }, { "epoch": 0.19590954876571146, "grad_norm": 0.7638395426135561, "learning_rate": 3.2652068126520686e-05, "loss": 0.6207, "step": 6710 }, { "epoch": 0.19605553203602868, "grad_norm": 0.7309330650610583, "learning_rate": 3.267639902676399e-05, "loss": 0.6089, "step": 6715 }, { "epoch": 0.19620151530634589, "grad_norm": 0.8306891717228526, "learning_rate": 3.27007299270073e-05, "loss": 0.6209, "step": 6720 }, { "epoch": 0.19634749857666312, "grad_norm": 0.8002865097890528, "learning_rate": 3.272506082725061e-05, "loss": 0.6448, "step": 6725 }, { "epoch": 0.19649348184698034, "grad_norm": 0.7902355814028907, "learning_rate": 3.2749391727493915e-05, "loss": 0.6255, "step": 6730 }, { "epoch": 0.19663946511729755, "grad_norm": 0.8219243564518186, "learning_rate": 3.277372262773723e-05, "loss": 0.617, "step": 6735 }, { "epoch": 0.19678544838761478, "grad_norm": 0.838444018610637, "learning_rate": 3.279805352798054e-05, "loss": 0.6569, "step": 6740 }, { "epoch": 0.196931431657932, "grad_norm": 0.729467460715718, "learning_rate": 3.2822384428223845e-05, "loss": 0.6377, "step": 6745 }, { "epoch": 0.19707741492824923, "grad_norm": 0.8326081428211443, "learning_rate": 3.284671532846715e-05, "loss": 0.6405, "step": 6750 }, { "epoch": 0.19722339819856644, "grad_norm": 0.8267811336365591, "learning_rate": 3.287104622871046e-05, "loss": 0.6819, "step": 6755 }, { "epoch": 0.19736938146888366, "grad_norm": 0.7694321265903733, "learning_rate": 3.2895377128953774e-05, "loss": 0.6312, "step": 6760 }, { "epoch": 0.1975153647392009, "grad_norm": 0.7747038959297358, "learning_rate": 3.291970802919708e-05, "loss": 0.624, "step": 6765 }, { "epoch": 0.19766134800951812, "grad_norm": 0.7165637315924329, "learning_rate": 3.294403892944039e-05, "loss": 0.6566, "step": 6770 }, { "epoch": 0.19780733127983532, "grad_norm": 0.6738869877069713, "learning_rate": 3.2968369829683696e-05, "loss": 0.6417, "step": 6775 }, { "epoch": 0.19795331455015255, "grad_norm": 0.692486864649118, "learning_rate": 3.2992700729927004e-05, "loss": 0.6375, "step": 6780 }, { "epoch": 0.19809929782046978, "grad_norm": 0.8598680241820967, "learning_rate": 3.301703163017032e-05, "loss": 0.6202, "step": 6785 }, { "epoch": 0.19824528109078698, "grad_norm": 0.7291821695349057, "learning_rate": 3.3041362530413625e-05, "loss": 0.6462, "step": 6790 }, { "epoch": 0.1983912643611042, "grad_norm": 0.7604975770994157, "learning_rate": 3.306569343065693e-05, "loss": 0.6222, "step": 6795 }, { "epoch": 0.19853724763142144, "grad_norm": 0.8180144102072738, "learning_rate": 3.309002433090024e-05, "loss": 0.6098, "step": 6800 }, { "epoch": 0.19868323090173867, "grad_norm": 0.797358770428745, "learning_rate": 3.3114355231143554e-05, "loss": 0.5969, "step": 6805 }, { "epoch": 0.19882921417205587, "grad_norm": 0.7750751358033705, "learning_rate": 3.313868613138686e-05, "loss": 0.6385, "step": 6810 }, { "epoch": 0.1989751974423731, "grad_norm": 0.7229525242039908, "learning_rate": 3.316301703163017e-05, "loss": 0.6252, "step": 6815 }, { "epoch": 0.19912118071269033, "grad_norm": 0.7751092710374655, "learning_rate": 3.3187347931873483e-05, "loss": 0.6623, "step": 6820 }, { "epoch": 0.19926716398300753, "grad_norm": 0.831814090948696, "learning_rate": 3.321167883211679e-05, "loss": 0.5981, "step": 6825 }, { "epoch": 0.19941314725332476, "grad_norm": 0.6934019213848833, "learning_rate": 3.32360097323601e-05, "loss": 0.6219, "step": 6830 }, { "epoch": 0.199559130523642, "grad_norm": 0.6825981385716419, "learning_rate": 3.326034063260341e-05, "loss": 0.6415, "step": 6835 }, { "epoch": 0.19970511379395922, "grad_norm": 0.77304423464539, "learning_rate": 3.328467153284672e-05, "loss": 0.6162, "step": 6840 }, { "epoch": 0.19985109706427642, "grad_norm": 0.7735447938722817, "learning_rate": 3.330900243309003e-05, "loss": 0.6065, "step": 6845 }, { "epoch": 0.19999708033459365, "grad_norm": 0.7719507563831133, "learning_rate": 3.3333333333333335e-05, "loss": 0.5884, "step": 6850 }, { "epoch": 0.20014306360491088, "grad_norm": 0.8034770898408372, "learning_rate": 3.335766423357665e-05, "loss": 0.6446, "step": 6855 }, { "epoch": 0.2002890468752281, "grad_norm": 0.7445769699724006, "learning_rate": 3.3381995133819957e-05, "loss": 0.6255, "step": 6860 }, { "epoch": 0.2004350301455453, "grad_norm": 0.7709124725374451, "learning_rate": 3.3406326034063264e-05, "loss": 0.6217, "step": 6865 }, { "epoch": 0.20058101341586254, "grad_norm": 0.7056645437551351, "learning_rate": 3.343065693430657e-05, "loss": 0.6287, "step": 6870 }, { "epoch": 0.20072699668617977, "grad_norm": 0.8299082628775778, "learning_rate": 3.345498783454988e-05, "loss": 0.602, "step": 6875 }, { "epoch": 0.20087297995649697, "grad_norm": 0.7343261578694973, "learning_rate": 3.347931873479319e-05, "loss": 0.6418, "step": 6880 }, { "epoch": 0.2010189632268142, "grad_norm": 0.832272873840541, "learning_rate": 3.35036496350365e-05, "loss": 0.6226, "step": 6885 }, { "epoch": 0.20116494649713143, "grad_norm": 0.7373254824736588, "learning_rate": 3.352798053527981e-05, "loss": 0.6124, "step": 6890 }, { "epoch": 0.20131092976744866, "grad_norm": 0.7878808651297754, "learning_rate": 3.3552311435523116e-05, "loss": 0.6265, "step": 6895 }, { "epoch": 0.20145691303776586, "grad_norm": 0.7524528673769058, "learning_rate": 3.357664233576642e-05, "loss": 0.5925, "step": 6900 }, { "epoch": 0.2016028963080831, "grad_norm": 0.7522617290433391, "learning_rate": 3.360097323600974e-05, "loss": 0.6251, "step": 6905 }, { "epoch": 0.20174887957840032, "grad_norm": 0.7697319402648676, "learning_rate": 3.3625304136253045e-05, "loss": 0.6363, "step": 6910 }, { "epoch": 0.20189486284871755, "grad_norm": 0.8059318768491467, "learning_rate": 3.364963503649635e-05, "loss": 0.6703, "step": 6915 }, { "epoch": 0.20204084611903475, "grad_norm": 0.7378128686129569, "learning_rate": 3.367396593673966e-05, "loss": 0.6307, "step": 6920 }, { "epoch": 0.20218682938935198, "grad_norm": 0.8168892821863061, "learning_rate": 3.369829683698297e-05, "loss": 0.5976, "step": 6925 }, { "epoch": 0.2023328126596692, "grad_norm": 0.794227436636757, "learning_rate": 3.372262773722628e-05, "loss": 0.6589, "step": 6930 }, { "epoch": 0.2024787959299864, "grad_norm": 0.9057623960698756, "learning_rate": 3.374695863746959e-05, "loss": 0.6377, "step": 6935 }, { "epoch": 0.20262477920030364, "grad_norm": 0.741454991266939, "learning_rate": 3.3771289537712896e-05, "loss": 0.5974, "step": 6940 }, { "epoch": 0.20277076247062087, "grad_norm": 0.7951416213710152, "learning_rate": 3.3795620437956204e-05, "loss": 0.6381, "step": 6945 }, { "epoch": 0.2029167457409381, "grad_norm": 0.7545767301931471, "learning_rate": 3.381995133819951e-05, "loss": 0.6119, "step": 6950 }, { "epoch": 0.2030627290112553, "grad_norm": 0.8419975075121428, "learning_rate": 3.3844282238442825e-05, "loss": 0.588, "step": 6955 }, { "epoch": 0.20320871228157253, "grad_norm": 0.8197457379798154, "learning_rate": 3.386861313868613e-05, "loss": 0.6194, "step": 6960 }, { "epoch": 0.20335469555188976, "grad_norm": 0.7664437957689811, "learning_rate": 3.389294403892944e-05, "loss": 0.6418, "step": 6965 }, { "epoch": 0.203500678822207, "grad_norm": 0.6648977748893229, "learning_rate": 3.391727493917275e-05, "loss": 0.6065, "step": 6970 }, { "epoch": 0.2036466620925242, "grad_norm": 0.7484392752283002, "learning_rate": 3.3941605839416055e-05, "loss": 0.6282, "step": 6975 }, { "epoch": 0.20379264536284142, "grad_norm": 0.6954912761934964, "learning_rate": 3.396593673965937e-05, "loss": 0.5939, "step": 6980 }, { "epoch": 0.20393862863315865, "grad_norm": 0.787425926843276, "learning_rate": 3.399026763990268e-05, "loss": 0.6188, "step": 6985 }, { "epoch": 0.20408461190347585, "grad_norm": 0.6995339843049856, "learning_rate": 3.4014598540145984e-05, "loss": 0.6289, "step": 6990 }, { "epoch": 0.20423059517379308, "grad_norm": 0.7351137644117084, "learning_rate": 3.403892944038929e-05, "loss": 0.6239, "step": 6995 }, { "epoch": 0.2043765784441103, "grad_norm": 0.7782908759948953, "learning_rate": 3.40632603406326e-05, "loss": 0.6405, "step": 7000 }, { "epoch": 0.20452256171442754, "grad_norm": 0.8024131226258834, "learning_rate": 3.408759124087591e-05, "loss": 0.6393, "step": 7005 }, { "epoch": 0.20466854498474474, "grad_norm": 0.6925634252972837, "learning_rate": 3.411192214111922e-05, "loss": 0.6211, "step": 7010 }, { "epoch": 0.20481452825506197, "grad_norm": 0.7509382101146292, "learning_rate": 3.413625304136253e-05, "loss": 0.6455, "step": 7015 }, { "epoch": 0.2049605115253792, "grad_norm": 0.8806428394142493, "learning_rate": 3.416058394160584e-05, "loss": 0.6941, "step": 7020 }, { "epoch": 0.20510649479569643, "grad_norm": 0.7798633019055106, "learning_rate": 3.418491484184915e-05, "loss": 0.6298, "step": 7025 }, { "epoch": 0.20525247806601363, "grad_norm": 0.7536732945528989, "learning_rate": 3.420924574209246e-05, "loss": 0.6115, "step": 7030 }, { "epoch": 0.20539846133633086, "grad_norm": 0.6486171526152725, "learning_rate": 3.423357664233577e-05, "loss": 0.5825, "step": 7035 }, { "epoch": 0.20554444460664809, "grad_norm": 0.7708301637108846, "learning_rate": 3.425790754257908e-05, "loss": 0.6304, "step": 7040 }, { "epoch": 0.2056904278769653, "grad_norm": 0.7439028596542171, "learning_rate": 3.4282238442822386e-05, "loss": 0.5947, "step": 7045 }, { "epoch": 0.20583641114728252, "grad_norm": 0.7172935775536068, "learning_rate": 3.43065693430657e-05, "loss": 0.6413, "step": 7050 }, { "epoch": 0.20598239441759975, "grad_norm": 0.7264285419148697, "learning_rate": 3.433090024330901e-05, "loss": 0.6259, "step": 7055 }, { "epoch": 0.20612837768791697, "grad_norm": 0.7160732612352947, "learning_rate": 3.4355231143552316e-05, "loss": 0.6601, "step": 7060 }, { "epoch": 0.20627436095823418, "grad_norm": 0.7957843028521852, "learning_rate": 3.437956204379562e-05, "loss": 0.6075, "step": 7065 }, { "epoch": 0.2064203442285514, "grad_norm": 0.7580658481489113, "learning_rate": 3.440389294403893e-05, "loss": 0.6214, "step": 7070 }, { "epoch": 0.20656632749886863, "grad_norm": 0.770594739955195, "learning_rate": 3.4428223844282245e-05, "loss": 0.6682, "step": 7075 }, { "epoch": 0.20671231076918586, "grad_norm": 0.7791013565191063, "learning_rate": 3.445255474452555e-05, "loss": 0.6149, "step": 7080 }, { "epoch": 0.20685829403950307, "grad_norm": 0.7593259996915833, "learning_rate": 3.447688564476886e-05, "loss": 0.6523, "step": 7085 }, { "epoch": 0.2070042773098203, "grad_norm": 0.7953519497731848, "learning_rate": 3.450121654501217e-05, "loss": 0.6196, "step": 7090 }, { "epoch": 0.20715026058013752, "grad_norm": 1.0355850186998308, "learning_rate": 3.4525547445255475e-05, "loss": 0.6486, "step": 7095 }, { "epoch": 0.20729624385045473, "grad_norm": 0.7850355579423837, "learning_rate": 3.454987834549879e-05, "loss": 0.6483, "step": 7100 }, { "epoch": 0.20744222712077195, "grad_norm": 0.7884334510563824, "learning_rate": 3.4574209245742096e-05, "loss": 0.6575, "step": 7105 }, { "epoch": 0.20758821039108918, "grad_norm": 0.7330793565506803, "learning_rate": 3.4598540145985404e-05, "loss": 0.6316, "step": 7110 }, { "epoch": 0.2077341936614064, "grad_norm": 0.7292180576898057, "learning_rate": 3.462287104622871e-05, "loss": 0.6165, "step": 7115 }, { "epoch": 0.20788017693172361, "grad_norm": 0.7733056631289503, "learning_rate": 3.464720194647202e-05, "loss": 0.6226, "step": 7120 }, { "epoch": 0.20802616020204084, "grad_norm": 0.7279115695130859, "learning_rate": 3.467153284671533e-05, "loss": 0.6367, "step": 7125 }, { "epoch": 0.20817214347235807, "grad_norm": 0.8140650670564465, "learning_rate": 3.469586374695864e-05, "loss": 0.6883, "step": 7130 }, { "epoch": 0.2083181267426753, "grad_norm": 0.7638175021838948, "learning_rate": 3.472019464720195e-05, "loss": 0.6229, "step": 7135 }, { "epoch": 0.2084641100129925, "grad_norm": 0.7314375006535435, "learning_rate": 3.4744525547445255e-05, "loss": 0.6102, "step": 7140 }, { "epoch": 0.20861009328330973, "grad_norm": 0.7953154161242119, "learning_rate": 3.476885644768856e-05, "loss": 0.6484, "step": 7145 }, { "epoch": 0.20875607655362696, "grad_norm": 0.7930320618941659, "learning_rate": 3.479318734793188e-05, "loss": 0.6088, "step": 7150 }, { "epoch": 0.20890205982394416, "grad_norm": 0.7658141371007064, "learning_rate": 3.4817518248175184e-05, "loss": 0.6389, "step": 7155 }, { "epoch": 0.2090480430942614, "grad_norm": 0.775794565515556, "learning_rate": 3.484184914841849e-05, "loss": 0.5984, "step": 7160 }, { "epoch": 0.20919402636457862, "grad_norm": 0.6976016507320919, "learning_rate": 3.48661800486618e-05, "loss": 0.6261, "step": 7165 }, { "epoch": 0.20934000963489585, "grad_norm": 0.7374945705094068, "learning_rate": 3.489051094890511e-05, "loss": 0.6106, "step": 7170 }, { "epoch": 0.20948599290521305, "grad_norm": 0.7430255883036492, "learning_rate": 3.491484184914842e-05, "loss": 0.68, "step": 7175 }, { "epoch": 0.20963197617553028, "grad_norm": 0.7161310996314846, "learning_rate": 3.493917274939173e-05, "loss": 0.5864, "step": 7180 }, { "epoch": 0.2097779594458475, "grad_norm": 0.7438463334733131, "learning_rate": 3.4963503649635036e-05, "loss": 0.6043, "step": 7185 }, { "epoch": 0.20992394271616474, "grad_norm": 0.7295113929152389, "learning_rate": 3.498783454987834e-05, "loss": 0.6264, "step": 7190 }, { "epoch": 0.21006992598648194, "grad_norm": 0.81676003397262, "learning_rate": 3.501216545012166e-05, "loss": 0.6417, "step": 7195 }, { "epoch": 0.21021590925679917, "grad_norm": 0.6941329762670455, "learning_rate": 3.5036496350364965e-05, "loss": 0.597, "step": 7200 }, { "epoch": 0.2103618925271164, "grad_norm": 0.764277718492314, "learning_rate": 3.506082725060827e-05, "loss": 0.6532, "step": 7205 }, { "epoch": 0.2105078757974336, "grad_norm": 0.7272445165757668, "learning_rate": 3.508515815085158e-05, "loss": 0.6402, "step": 7210 }, { "epoch": 0.21065385906775083, "grad_norm": 0.7747448136072628, "learning_rate": 3.510948905109489e-05, "loss": 0.6172, "step": 7215 }, { "epoch": 0.21079984233806806, "grad_norm": 0.7655286780593106, "learning_rate": 3.51338199513382e-05, "loss": 0.633, "step": 7220 }, { "epoch": 0.2109458256083853, "grad_norm": 0.735042687683594, "learning_rate": 3.515815085158151e-05, "loss": 0.5889, "step": 7225 }, { "epoch": 0.2110918088787025, "grad_norm": 0.7126646956458578, "learning_rate": 3.5182481751824816e-05, "loss": 0.5934, "step": 7230 }, { "epoch": 0.21123779214901972, "grad_norm": 0.678707772449002, "learning_rate": 3.520681265206813e-05, "loss": 0.5884, "step": 7235 }, { "epoch": 0.21138377541933695, "grad_norm": 0.8108306805450807, "learning_rate": 3.523114355231144e-05, "loss": 0.6319, "step": 7240 }, { "epoch": 0.21152975868965418, "grad_norm": 0.7638718491591281, "learning_rate": 3.5255474452554745e-05, "loss": 0.6162, "step": 7245 }, { "epoch": 0.21167574195997138, "grad_norm": 0.7339561750361248, "learning_rate": 3.527980535279806e-05, "loss": 0.6359, "step": 7250 }, { "epoch": 0.2118217252302886, "grad_norm": 0.8568316897973092, "learning_rate": 3.530413625304137e-05, "loss": 0.6531, "step": 7255 }, { "epoch": 0.21196770850060584, "grad_norm": 0.7024232376992082, "learning_rate": 3.5328467153284675e-05, "loss": 0.6383, "step": 7260 }, { "epoch": 0.21211369177092304, "grad_norm": 0.7188509644597083, "learning_rate": 3.535279805352798e-05, "loss": 0.6237, "step": 7265 }, { "epoch": 0.21225967504124027, "grad_norm": 0.6894740347996079, "learning_rate": 3.5377128953771296e-05, "loss": 0.6069, "step": 7270 }, { "epoch": 0.2124056583115575, "grad_norm": 0.7518279681656564, "learning_rate": 3.5401459854014604e-05, "loss": 0.612, "step": 7275 }, { "epoch": 0.21255164158187473, "grad_norm": 0.7715698548815871, "learning_rate": 3.542579075425791e-05, "loss": 0.6603, "step": 7280 }, { "epoch": 0.21269762485219193, "grad_norm": 0.8279484013380123, "learning_rate": 3.545012165450122e-05, "loss": 0.6557, "step": 7285 }, { "epoch": 0.21284360812250916, "grad_norm": 0.7003944813737859, "learning_rate": 3.5474452554744526e-05, "loss": 0.618, "step": 7290 }, { "epoch": 0.2129895913928264, "grad_norm": 0.7757520159255052, "learning_rate": 3.549878345498784e-05, "loss": 0.6642, "step": 7295 }, { "epoch": 0.2131355746631436, "grad_norm": 0.7890205415255652, "learning_rate": 3.552311435523115e-05, "loss": 0.6548, "step": 7300 }, { "epoch": 0.21328155793346082, "grad_norm": 0.7504834554131281, "learning_rate": 3.5547445255474455e-05, "loss": 0.6302, "step": 7305 }, { "epoch": 0.21342754120377805, "grad_norm": 0.7222718408793617, "learning_rate": 3.557177615571776e-05, "loss": 0.6454, "step": 7310 }, { "epoch": 0.21357352447409528, "grad_norm": 0.7381243709498527, "learning_rate": 3.559610705596107e-05, "loss": 0.6036, "step": 7315 }, { "epoch": 0.21371950774441248, "grad_norm": 0.6801315060566314, "learning_rate": 3.5620437956204384e-05, "loss": 0.6201, "step": 7320 }, { "epoch": 0.2138654910147297, "grad_norm": 0.6964277121140448, "learning_rate": 3.564476885644769e-05, "loss": 0.5916, "step": 7325 }, { "epoch": 0.21401147428504694, "grad_norm": 0.8584907124665627, "learning_rate": 3.5669099756691e-05, "loss": 0.6536, "step": 7330 }, { "epoch": 0.21415745755536417, "grad_norm": 0.7170603926365512, "learning_rate": 3.569343065693431e-05, "loss": 0.5968, "step": 7335 }, { "epoch": 0.21430344082568137, "grad_norm": 0.7180805203517295, "learning_rate": 3.5717761557177614e-05, "loss": 0.5879, "step": 7340 }, { "epoch": 0.2144494240959986, "grad_norm": 0.7075533374658468, "learning_rate": 3.574209245742093e-05, "loss": 0.615, "step": 7345 }, { "epoch": 0.21459540736631583, "grad_norm": 0.8049123492552398, "learning_rate": 3.5766423357664236e-05, "loss": 0.612, "step": 7350 }, { "epoch": 0.21474139063663303, "grad_norm": 0.7525058880431025, "learning_rate": 3.579075425790754e-05, "loss": 0.6272, "step": 7355 }, { "epoch": 0.21488737390695026, "grad_norm": 0.7519765876519449, "learning_rate": 3.581508515815085e-05, "loss": 0.6708, "step": 7360 }, { "epoch": 0.2150333571772675, "grad_norm": 0.715666705349937, "learning_rate": 3.583941605839416e-05, "loss": 0.6227, "step": 7365 }, { "epoch": 0.21517934044758472, "grad_norm": 0.7717061571462726, "learning_rate": 3.586374695863747e-05, "loss": 0.6332, "step": 7370 }, { "epoch": 0.21532532371790192, "grad_norm": 0.8225576589919823, "learning_rate": 3.588807785888078e-05, "loss": 0.6412, "step": 7375 }, { "epoch": 0.21547130698821915, "grad_norm": 0.7281721246045418, "learning_rate": 3.591240875912409e-05, "loss": 0.6167, "step": 7380 }, { "epoch": 0.21561729025853638, "grad_norm": 0.8008245380691672, "learning_rate": 3.5936739659367395e-05, "loss": 0.6354, "step": 7385 }, { "epoch": 0.2157632735288536, "grad_norm": 0.724253491991002, "learning_rate": 3.596107055961071e-05, "loss": 0.6279, "step": 7390 }, { "epoch": 0.2159092567991708, "grad_norm": 0.741573035817975, "learning_rate": 3.5985401459854016e-05, "loss": 0.6346, "step": 7395 }, { "epoch": 0.21605524006948804, "grad_norm": 0.7196006872496434, "learning_rate": 3.6009732360097324e-05, "loss": 0.6267, "step": 7400 }, { "epoch": 0.21620122333980527, "grad_norm": 0.691765615157003, "learning_rate": 3.603406326034063e-05, "loss": 0.5679, "step": 7405 }, { "epoch": 0.21634720661012247, "grad_norm": 0.7195033338610979, "learning_rate": 3.605839416058394e-05, "loss": 0.621, "step": 7410 }, { "epoch": 0.2164931898804397, "grad_norm": 0.7812599672796311, "learning_rate": 3.608272506082725e-05, "loss": 0.6422, "step": 7415 }, { "epoch": 0.21663917315075693, "grad_norm": 0.762032730204255, "learning_rate": 3.610705596107056e-05, "loss": 0.6671, "step": 7420 }, { "epoch": 0.21678515642107415, "grad_norm": 0.6896868634260253, "learning_rate": 3.613138686131387e-05, "loss": 0.6118, "step": 7425 }, { "epoch": 0.21693113969139136, "grad_norm": 0.756121684798624, "learning_rate": 3.6155717761557175e-05, "loss": 0.6167, "step": 7430 }, { "epoch": 0.21707712296170859, "grad_norm": 0.6672844727694871, "learning_rate": 3.618004866180049e-05, "loss": 0.6211, "step": 7435 }, { "epoch": 0.21722310623202581, "grad_norm": 0.6952907170830105, "learning_rate": 3.62043795620438e-05, "loss": 0.674, "step": 7440 }, { "epoch": 0.21736908950234304, "grad_norm": 0.73337533405837, "learning_rate": 3.6228710462287104e-05, "loss": 0.6071, "step": 7445 }, { "epoch": 0.21751507277266025, "grad_norm": 0.6621777339737791, "learning_rate": 3.625304136253042e-05, "loss": 0.5786, "step": 7450 }, { "epoch": 0.21766105604297747, "grad_norm": 0.7521825538091554, "learning_rate": 3.6277372262773726e-05, "loss": 0.6155, "step": 7455 }, { "epoch": 0.2178070393132947, "grad_norm": 0.7105614261343465, "learning_rate": 3.6301703163017034e-05, "loss": 0.616, "step": 7460 }, { "epoch": 0.2179530225836119, "grad_norm": 0.762553097832908, "learning_rate": 3.632603406326035e-05, "loss": 0.6426, "step": 7465 }, { "epoch": 0.21809900585392913, "grad_norm": 0.7258895820453283, "learning_rate": 3.6350364963503655e-05, "loss": 0.6136, "step": 7470 }, { "epoch": 0.21824498912424636, "grad_norm": 0.6938961592563734, "learning_rate": 3.637469586374696e-05, "loss": 0.6294, "step": 7475 }, { "epoch": 0.2183909723945636, "grad_norm": 0.7469593534153605, "learning_rate": 3.639902676399027e-05, "loss": 0.5861, "step": 7480 }, { "epoch": 0.2185369556648808, "grad_norm": 0.7511246967860922, "learning_rate": 3.642335766423358e-05, "loss": 0.576, "step": 7485 }, { "epoch": 0.21868293893519802, "grad_norm": 0.7080252481973186, "learning_rate": 3.644768856447689e-05, "loss": 0.6236, "step": 7490 }, { "epoch": 0.21882892220551525, "grad_norm": 0.716098487256955, "learning_rate": 3.64720194647202e-05, "loss": 0.6062, "step": 7495 }, { "epoch": 0.21897490547583248, "grad_norm": 0.72242144589018, "learning_rate": 3.649635036496351e-05, "loss": 0.6025, "step": 7500 }, { "epoch": 0.21912088874614968, "grad_norm": 0.6730040009284396, "learning_rate": 3.6520681265206814e-05, "loss": 0.605, "step": 7505 }, { "epoch": 0.2192668720164669, "grad_norm": 0.7299195845039229, "learning_rate": 3.654501216545012e-05, "loss": 0.6211, "step": 7510 }, { "epoch": 0.21941285528678414, "grad_norm": 0.7671977702428762, "learning_rate": 3.6569343065693436e-05, "loss": 0.6789, "step": 7515 }, { "epoch": 0.21955883855710134, "grad_norm": 0.8222753215106856, "learning_rate": 3.659367396593674e-05, "loss": 0.6239, "step": 7520 }, { "epoch": 0.21970482182741857, "grad_norm": 0.7898570602188189, "learning_rate": 3.661800486618005e-05, "loss": 0.6314, "step": 7525 }, { "epoch": 0.2198508050977358, "grad_norm": 0.6768046462112383, "learning_rate": 3.664233576642336e-05, "loss": 0.5938, "step": 7530 }, { "epoch": 0.21999678836805303, "grad_norm": 0.7405543078703651, "learning_rate": 3.6666666666666666e-05, "loss": 0.6351, "step": 7535 }, { "epoch": 0.22014277163837023, "grad_norm": 0.769477556233622, "learning_rate": 3.669099756690998e-05, "loss": 0.5666, "step": 7540 }, { "epoch": 0.22028875490868746, "grad_norm": 0.7398303255023368, "learning_rate": 3.671532846715329e-05, "loss": 0.6064, "step": 7545 }, { "epoch": 0.2204347381790047, "grad_norm": 0.7595432382934072, "learning_rate": 3.6739659367396595e-05, "loss": 0.6722, "step": 7550 }, { "epoch": 0.22058072144932192, "grad_norm": 0.7076700611936384, "learning_rate": 3.67639902676399e-05, "loss": 0.5859, "step": 7555 }, { "epoch": 0.22072670471963912, "grad_norm": 0.868810174890599, "learning_rate": 3.678832116788321e-05, "loss": 0.6464, "step": 7560 }, { "epoch": 0.22087268798995635, "grad_norm": 0.7172370212991215, "learning_rate": 3.6812652068126524e-05, "loss": 0.6496, "step": 7565 }, { "epoch": 0.22101867126027358, "grad_norm": 0.7273497393507823, "learning_rate": 3.683698296836983e-05, "loss": 0.6336, "step": 7570 }, { "epoch": 0.22116465453059078, "grad_norm": 0.7253970203872644, "learning_rate": 3.686131386861314e-05, "loss": 0.634, "step": 7575 }, { "epoch": 0.221310637800908, "grad_norm": 0.7366732441285796, "learning_rate": 3.6885644768856446e-05, "loss": 0.6347, "step": 7580 }, { "epoch": 0.22145662107122524, "grad_norm": 0.7538053585857436, "learning_rate": 3.690997566909976e-05, "loss": 0.6617, "step": 7585 }, { "epoch": 0.22160260434154247, "grad_norm": 0.6757596845077538, "learning_rate": 3.693430656934307e-05, "loss": 0.6462, "step": 7590 }, { "epoch": 0.22174858761185967, "grad_norm": 0.870077303187197, "learning_rate": 3.6958637469586375e-05, "loss": 0.6641, "step": 7595 }, { "epoch": 0.2218945708821769, "grad_norm": 0.7316437503125511, "learning_rate": 3.698296836982968e-05, "loss": 0.6274, "step": 7600 }, { "epoch": 0.22204055415249413, "grad_norm": 0.7313930487988697, "learning_rate": 3.700729927007299e-05, "loss": 0.5971, "step": 7605 }, { "epoch": 0.22218653742281136, "grad_norm": 0.7465068856237284, "learning_rate": 3.7031630170316305e-05, "loss": 0.6363, "step": 7610 }, { "epoch": 0.22233252069312856, "grad_norm": 0.7185467602735226, "learning_rate": 3.705596107055961e-05, "loss": 0.6392, "step": 7615 }, { "epoch": 0.2224785039634458, "grad_norm": 0.6707781249868507, "learning_rate": 3.708029197080292e-05, "loss": 0.5833, "step": 7620 }, { "epoch": 0.22262448723376302, "grad_norm": 0.8020038716790657, "learning_rate": 3.710462287104623e-05, "loss": 0.6465, "step": 7625 }, { "epoch": 0.22277047050408022, "grad_norm": 0.7362315626713599, "learning_rate": 3.7128953771289534e-05, "loss": 0.6269, "step": 7630 }, { "epoch": 0.22291645377439745, "grad_norm": 0.6867139002336934, "learning_rate": 3.715328467153285e-05, "loss": 0.6357, "step": 7635 }, { "epoch": 0.22306243704471468, "grad_norm": 0.7285834653336768, "learning_rate": 3.7177615571776156e-05, "loss": 0.7151, "step": 7640 }, { "epoch": 0.2232084203150319, "grad_norm": 0.9886077227807147, "learning_rate": 3.7201946472019464e-05, "loss": 0.6232, "step": 7645 }, { "epoch": 0.2233544035853491, "grad_norm": 0.6977932487944212, "learning_rate": 3.722627737226278e-05, "loss": 0.6059, "step": 7650 }, { "epoch": 0.22350038685566634, "grad_norm": 0.7504483656800675, "learning_rate": 3.7250608272506085e-05, "loss": 0.6256, "step": 7655 }, { "epoch": 0.22364637012598357, "grad_norm": 0.7867307675307759, "learning_rate": 3.727493917274939e-05, "loss": 0.6527, "step": 7660 }, { "epoch": 0.2237923533963008, "grad_norm": 0.6978354070446469, "learning_rate": 3.729927007299271e-05, "loss": 0.6517, "step": 7665 }, { "epoch": 0.223938336666618, "grad_norm": 0.7376374066398008, "learning_rate": 3.7323600973236014e-05, "loss": 0.6422, "step": 7670 }, { "epoch": 0.22408431993693523, "grad_norm": 0.7270297123183034, "learning_rate": 3.734793187347932e-05, "loss": 0.5986, "step": 7675 }, { "epoch": 0.22423030320725246, "grad_norm": 0.7343593117522559, "learning_rate": 3.737226277372263e-05, "loss": 0.6288, "step": 7680 }, { "epoch": 0.22437628647756966, "grad_norm": 0.7838746621510457, "learning_rate": 3.7396593673965943e-05, "loss": 0.6191, "step": 7685 }, { "epoch": 0.2245222697478869, "grad_norm": 0.8023495570024143, "learning_rate": 3.742092457420925e-05, "loss": 0.6664, "step": 7690 }, { "epoch": 0.22466825301820412, "grad_norm": 0.6518367741366444, "learning_rate": 3.744525547445256e-05, "loss": 0.5897, "step": 7695 }, { "epoch": 0.22481423628852135, "grad_norm": 0.7318833107319597, "learning_rate": 3.7469586374695866e-05, "loss": 0.6237, "step": 7700 }, { "epoch": 0.22496021955883855, "grad_norm": 0.7665332152303261, "learning_rate": 3.749391727493917e-05, "loss": 0.6522, "step": 7705 }, { "epoch": 0.22510620282915578, "grad_norm": 0.824574818786909, "learning_rate": 3.751824817518249e-05, "loss": 0.6494, "step": 7710 }, { "epoch": 0.225252186099473, "grad_norm": 0.8189772660957644, "learning_rate": 3.7542579075425795e-05, "loss": 0.6123, "step": 7715 }, { "epoch": 0.22539816936979024, "grad_norm": 0.7903067856152443, "learning_rate": 3.75669099756691e-05, "loss": 0.652, "step": 7720 }, { "epoch": 0.22554415264010744, "grad_norm": 0.7575224923707352, "learning_rate": 3.759124087591241e-05, "loss": 0.6429, "step": 7725 }, { "epoch": 0.22569013591042467, "grad_norm": 0.7324551658659787, "learning_rate": 3.761557177615572e-05, "loss": 0.5876, "step": 7730 }, { "epoch": 0.2258361191807419, "grad_norm": 0.7389773488073114, "learning_rate": 3.763990267639903e-05, "loss": 0.6102, "step": 7735 }, { "epoch": 0.2259821024510591, "grad_norm": 0.6893940641049677, "learning_rate": 3.766423357664234e-05, "loss": 0.6069, "step": 7740 }, { "epoch": 0.22612808572137633, "grad_norm": 0.8277784110288743, "learning_rate": 3.7688564476885646e-05, "loss": 0.6386, "step": 7745 }, { "epoch": 0.22627406899169356, "grad_norm": 0.8644183319348767, "learning_rate": 3.7712895377128954e-05, "loss": 0.6408, "step": 7750 }, { "epoch": 0.22642005226201078, "grad_norm": 0.6734576708704596, "learning_rate": 3.773722627737227e-05, "loss": 0.6437, "step": 7755 }, { "epoch": 0.226566035532328, "grad_norm": 0.748707981585647, "learning_rate": 3.7761557177615576e-05, "loss": 0.6247, "step": 7760 }, { "epoch": 0.22671201880264522, "grad_norm": 0.649019778857916, "learning_rate": 3.778588807785888e-05, "loss": 0.6086, "step": 7765 }, { "epoch": 0.22685800207296244, "grad_norm": 0.7350147107970709, "learning_rate": 3.781021897810219e-05, "loss": 0.5952, "step": 7770 }, { "epoch": 0.22700398534327967, "grad_norm": 0.7256679301458653, "learning_rate": 3.78345498783455e-05, "loss": 0.637, "step": 7775 }, { "epoch": 0.22714996861359688, "grad_norm": 0.8145484474694016, "learning_rate": 3.785888077858881e-05, "loss": 0.5939, "step": 7780 }, { "epoch": 0.2272959518839141, "grad_norm": 0.7443147314750598, "learning_rate": 3.788321167883212e-05, "loss": 0.6148, "step": 7785 }, { "epoch": 0.22744193515423133, "grad_norm": 0.737237133070499, "learning_rate": 3.790754257907543e-05, "loss": 0.6072, "step": 7790 }, { "epoch": 0.22758791842454854, "grad_norm": 0.7314291141273338, "learning_rate": 3.7931873479318734e-05, "loss": 0.6311, "step": 7795 }, { "epoch": 0.22773390169486576, "grad_norm": 0.7526682202346864, "learning_rate": 3.795620437956204e-05, "loss": 0.6253, "step": 7800 }, { "epoch": 0.227879884965183, "grad_norm": 0.6471800846484029, "learning_rate": 3.7980535279805356e-05, "loss": 0.5967, "step": 7805 }, { "epoch": 0.22802586823550022, "grad_norm": 0.7181027336107947, "learning_rate": 3.8004866180048664e-05, "loss": 0.6338, "step": 7810 }, { "epoch": 0.22817185150581742, "grad_norm": 0.7522155301665915, "learning_rate": 3.802919708029197e-05, "loss": 0.6152, "step": 7815 }, { "epoch": 0.22831783477613465, "grad_norm": 0.6830100269070738, "learning_rate": 3.805352798053528e-05, "loss": 0.6253, "step": 7820 }, { "epoch": 0.22846381804645188, "grad_norm": 0.7338424408579513, "learning_rate": 3.8077858880778586e-05, "loss": 0.606, "step": 7825 }, { "epoch": 0.22860980131676908, "grad_norm": 0.7257908978084738, "learning_rate": 3.81021897810219e-05, "loss": 0.5511, "step": 7830 }, { "epoch": 0.2287557845870863, "grad_norm": 0.8009799402638087, "learning_rate": 3.812652068126521e-05, "loss": 0.6566, "step": 7835 }, { "epoch": 0.22890176785740354, "grad_norm": 0.7135433230416794, "learning_rate": 3.8150851581508515e-05, "loss": 0.6405, "step": 7840 }, { "epoch": 0.22904775112772077, "grad_norm": 0.6430211130892507, "learning_rate": 3.817518248175182e-05, "loss": 0.5922, "step": 7845 }, { "epoch": 0.22919373439803797, "grad_norm": 1.0019622484706636, "learning_rate": 3.819951338199514e-05, "loss": 0.6688, "step": 7850 }, { "epoch": 0.2293397176683552, "grad_norm": 1.266262300748206, "learning_rate": 3.8223844282238444e-05, "loss": 0.6072, "step": 7855 }, { "epoch": 0.22948570093867243, "grad_norm": 0.7173390708064131, "learning_rate": 3.824817518248175e-05, "loss": 0.6304, "step": 7860 }, { "epoch": 0.22963168420898966, "grad_norm": 0.7093965901052158, "learning_rate": 3.8272506082725066e-05, "loss": 0.6507, "step": 7865 }, { "epoch": 0.22977766747930686, "grad_norm": 0.7194172426078854, "learning_rate": 3.829683698296837e-05, "loss": 0.627, "step": 7870 }, { "epoch": 0.2299236507496241, "grad_norm": 0.7295156631512394, "learning_rate": 3.832116788321168e-05, "loss": 0.6331, "step": 7875 }, { "epoch": 0.23006963401994132, "grad_norm": 0.9472431104147637, "learning_rate": 3.8345498783454995e-05, "loss": 0.636, "step": 7880 }, { "epoch": 0.23021561729025852, "grad_norm": 0.6485015506229117, "learning_rate": 3.83698296836983e-05, "loss": 0.6123, "step": 7885 }, { "epoch": 0.23036160056057575, "grad_norm": 0.7608642090911342, "learning_rate": 3.839416058394161e-05, "loss": 0.6343, "step": 7890 }, { "epoch": 0.23050758383089298, "grad_norm": 0.6608894203325456, "learning_rate": 3.841849148418492e-05, "loss": 0.6402, "step": 7895 }, { "epoch": 0.2306535671012102, "grad_norm": 0.7155097181313194, "learning_rate": 3.8442822384428225e-05, "loss": 0.6271, "step": 7900 }, { "epoch": 0.2307995503715274, "grad_norm": 0.7925972626723489, "learning_rate": 3.846715328467154e-05, "loss": 0.6628, "step": 7905 }, { "epoch": 0.23094553364184464, "grad_norm": 0.7575917071576599, "learning_rate": 3.8491484184914846e-05, "loss": 0.6197, "step": 7910 }, { "epoch": 0.23109151691216187, "grad_norm": 0.666660398613513, "learning_rate": 3.8515815085158154e-05, "loss": 0.5846, "step": 7915 }, { "epoch": 0.2312375001824791, "grad_norm": 0.6996181389355657, "learning_rate": 3.854014598540146e-05, "loss": 0.6378, "step": 7920 }, { "epoch": 0.2313834834527963, "grad_norm": 0.7957569063841213, "learning_rate": 3.856447688564477e-05, "loss": 0.6312, "step": 7925 }, { "epoch": 0.23152946672311353, "grad_norm": 0.7309480903034795, "learning_rate": 3.858880778588808e-05, "loss": 0.627, "step": 7930 }, { "epoch": 0.23167544999343076, "grad_norm": 0.733826366013101, "learning_rate": 3.861313868613139e-05, "loss": 0.629, "step": 7935 }, { "epoch": 0.23182143326374796, "grad_norm": 0.651831141692836, "learning_rate": 3.86374695863747e-05, "loss": 0.6185, "step": 7940 }, { "epoch": 0.2319674165340652, "grad_norm": 0.7824696330265986, "learning_rate": 3.8661800486618005e-05, "loss": 0.6256, "step": 7945 }, { "epoch": 0.23211339980438242, "grad_norm": 0.7163524694442868, "learning_rate": 3.868613138686132e-05, "loss": 0.5864, "step": 7950 }, { "epoch": 0.23225938307469965, "grad_norm": 0.7318648066646594, "learning_rate": 3.871046228710463e-05, "loss": 0.6136, "step": 7955 }, { "epoch": 0.23240536634501685, "grad_norm": 0.7692724726194416, "learning_rate": 3.8734793187347935e-05, "loss": 0.6681, "step": 7960 }, { "epoch": 0.23255134961533408, "grad_norm": 0.7447304749797866, "learning_rate": 3.875912408759124e-05, "loss": 0.6241, "step": 7965 }, { "epoch": 0.2326973328856513, "grad_norm": 0.7492958933668784, "learning_rate": 3.878345498783455e-05, "loss": 0.6325, "step": 7970 }, { "epoch": 0.23284331615596854, "grad_norm": 0.6795104313573395, "learning_rate": 3.8807785888077864e-05, "loss": 0.6195, "step": 7975 }, { "epoch": 0.23298929942628574, "grad_norm": 0.707829177522709, "learning_rate": 3.883211678832117e-05, "loss": 0.6274, "step": 7980 }, { "epoch": 0.23313528269660297, "grad_norm": 0.7675286648631781, "learning_rate": 3.885644768856448e-05, "loss": 0.6557, "step": 7985 }, { "epoch": 0.2332812659669202, "grad_norm": 0.7184257985255329, "learning_rate": 3.8880778588807786e-05, "loss": 0.6161, "step": 7990 }, { "epoch": 0.2334272492372374, "grad_norm": 0.7058157909171557, "learning_rate": 3.8905109489051093e-05, "loss": 0.6082, "step": 7995 }, { "epoch": 0.23357323250755463, "grad_norm": 0.7184638774184454, "learning_rate": 3.892944038929441e-05, "loss": 0.598, "step": 8000 }, { "epoch": 0.23371921577787186, "grad_norm": 0.7714630941325679, "learning_rate": 3.8953771289537715e-05, "loss": 0.6403, "step": 8005 }, { "epoch": 0.2338651990481891, "grad_norm": 0.6850295470652937, "learning_rate": 3.897810218978102e-05, "loss": 0.6186, "step": 8010 }, { "epoch": 0.2340111823185063, "grad_norm": 0.7429282354524196, "learning_rate": 3.900243309002433e-05, "loss": 0.6125, "step": 8015 }, { "epoch": 0.23415716558882352, "grad_norm": 0.7226738716978687, "learning_rate": 3.902676399026764e-05, "loss": 0.6187, "step": 8020 }, { "epoch": 0.23430314885914075, "grad_norm": 0.8182553571448405, "learning_rate": 3.905109489051095e-05, "loss": 0.6525, "step": 8025 }, { "epoch": 0.23444913212945798, "grad_norm": 0.7851985916465047, "learning_rate": 3.907542579075426e-05, "loss": 0.622, "step": 8030 }, { "epoch": 0.23459511539977518, "grad_norm": 0.7228669681001082, "learning_rate": 3.909975669099757e-05, "loss": 0.6029, "step": 8035 }, { "epoch": 0.2347410986700924, "grad_norm": 0.7044793835510117, "learning_rate": 3.9124087591240874e-05, "loss": 0.6164, "step": 8040 }, { "epoch": 0.23488708194040964, "grad_norm": 0.7505924107237081, "learning_rate": 3.914841849148418e-05, "loss": 0.6578, "step": 8045 }, { "epoch": 0.23503306521072684, "grad_norm": 0.8249963525392774, "learning_rate": 3.9172749391727496e-05, "loss": 0.6812, "step": 8050 }, { "epoch": 0.23517904848104407, "grad_norm": 0.7254927567875956, "learning_rate": 3.91970802919708e-05, "loss": 0.6432, "step": 8055 }, { "epoch": 0.2353250317513613, "grad_norm": 0.6961129338659763, "learning_rate": 3.922141119221411e-05, "loss": 0.6399, "step": 8060 }, { "epoch": 0.23547101502167853, "grad_norm": 0.693053805405167, "learning_rate": 3.9245742092457425e-05, "loss": 0.6364, "step": 8065 }, { "epoch": 0.23561699829199573, "grad_norm": 0.6640131042865576, "learning_rate": 3.927007299270073e-05, "loss": 0.6214, "step": 8070 }, { "epoch": 0.23576298156231296, "grad_norm": 0.7071125213643585, "learning_rate": 3.929440389294404e-05, "loss": 0.6326, "step": 8075 }, { "epoch": 0.23590896483263019, "grad_norm": 0.9136340114897117, "learning_rate": 3.9318734793187354e-05, "loss": 0.6405, "step": 8080 }, { "epoch": 0.23605494810294742, "grad_norm": 0.7689137096508465, "learning_rate": 3.934306569343066e-05, "loss": 0.6633, "step": 8085 }, { "epoch": 0.23620093137326462, "grad_norm": 0.8027177506155135, "learning_rate": 3.936739659367397e-05, "loss": 0.6891, "step": 8090 }, { "epoch": 0.23634691464358185, "grad_norm": 0.7030099489832032, "learning_rate": 3.9391727493917276e-05, "loss": 0.6307, "step": 8095 }, { "epoch": 0.23649289791389908, "grad_norm": 0.7248413638452603, "learning_rate": 3.941605839416059e-05, "loss": 0.6254, "step": 8100 }, { "epoch": 0.23663888118421628, "grad_norm": 0.684896987472752, "learning_rate": 3.94403892944039e-05, "loss": 0.6248, "step": 8105 }, { "epoch": 0.2367848644545335, "grad_norm": 0.6642631736382643, "learning_rate": 3.9464720194647205e-05, "loss": 0.6165, "step": 8110 }, { "epoch": 0.23693084772485073, "grad_norm": 0.7182191322448866, "learning_rate": 3.948905109489051e-05, "loss": 0.667, "step": 8115 }, { "epoch": 0.23707683099516796, "grad_norm": 0.6559691823473891, "learning_rate": 3.951338199513382e-05, "loss": 0.6004, "step": 8120 }, { "epoch": 0.23722281426548517, "grad_norm": 0.6602301648859444, "learning_rate": 3.9537712895377135e-05, "loss": 0.615, "step": 8125 }, { "epoch": 0.2373687975358024, "grad_norm": 0.6860494374027645, "learning_rate": 3.956204379562044e-05, "loss": 0.6236, "step": 8130 }, { "epoch": 0.23751478080611962, "grad_norm": 0.7181494212143815, "learning_rate": 3.958637469586375e-05, "loss": 0.6565, "step": 8135 }, { "epoch": 0.23766076407643685, "grad_norm": 0.7052607434195022, "learning_rate": 3.961070559610706e-05, "loss": 0.5998, "step": 8140 }, { "epoch": 0.23780674734675405, "grad_norm": 0.755313799848642, "learning_rate": 3.963503649635037e-05, "loss": 0.6064, "step": 8145 }, { "epoch": 0.23795273061707128, "grad_norm": 0.7021050857384293, "learning_rate": 3.965936739659368e-05, "loss": 0.6101, "step": 8150 }, { "epoch": 0.2380987138873885, "grad_norm": 0.7081184582330688, "learning_rate": 3.9683698296836986e-05, "loss": 0.6422, "step": 8155 }, { "epoch": 0.23824469715770571, "grad_norm": 0.8083863761767113, "learning_rate": 3.9708029197080294e-05, "loss": 0.6224, "step": 8160 }, { "epoch": 0.23839068042802294, "grad_norm": 0.628060443964092, "learning_rate": 3.97323600973236e-05, "loss": 0.5963, "step": 8165 }, { "epoch": 0.23853666369834017, "grad_norm": 0.7393040276406013, "learning_rate": 3.9756690997566915e-05, "loss": 0.6108, "step": 8170 }, { "epoch": 0.2386826469686574, "grad_norm": 0.7607503959515614, "learning_rate": 3.978102189781022e-05, "loss": 0.6262, "step": 8175 }, { "epoch": 0.2388286302389746, "grad_norm": 0.6520612134826729, "learning_rate": 3.980535279805353e-05, "loss": 0.6257, "step": 8180 }, { "epoch": 0.23897461350929183, "grad_norm": 0.7064902363570613, "learning_rate": 3.982968369829684e-05, "loss": 0.6475, "step": 8185 }, { "epoch": 0.23912059677960906, "grad_norm": 0.7138757912912429, "learning_rate": 3.9854014598540145e-05, "loss": 0.6532, "step": 8190 }, { "epoch": 0.2392665800499263, "grad_norm": 0.7306113736418214, "learning_rate": 3.987834549878346e-05, "loss": 0.622, "step": 8195 }, { "epoch": 0.2394125633202435, "grad_norm": 0.7068506212837556, "learning_rate": 3.990267639902677e-05, "loss": 0.6305, "step": 8200 }, { "epoch": 0.23955854659056072, "grad_norm": 0.7349557112444394, "learning_rate": 3.9927007299270074e-05, "loss": 0.6403, "step": 8205 }, { "epoch": 0.23970452986087795, "grad_norm": 0.7557245768870593, "learning_rate": 3.995133819951338e-05, "loss": 0.6113, "step": 8210 }, { "epoch": 0.23985051313119515, "grad_norm": 0.6694609757012322, "learning_rate": 3.997566909975669e-05, "loss": 0.6059, "step": 8215 }, { "epoch": 0.23999649640151238, "grad_norm": 0.7133685131357099, "learning_rate": 4e-05, "loss": 0.6338, "step": 8220 }, { "epoch": 0.2401424796718296, "grad_norm": 0.7737978866556039, "learning_rate": 4.002433090024331e-05, "loss": 0.6736, "step": 8225 }, { "epoch": 0.24028846294214684, "grad_norm": 0.6713728207753517, "learning_rate": 4.004866180048662e-05, "loss": 0.6266, "step": 8230 }, { "epoch": 0.24043444621246404, "grad_norm": 0.721355145299782, "learning_rate": 4.0072992700729926e-05, "loss": 0.6157, "step": 8235 }, { "epoch": 0.24058042948278127, "grad_norm": 0.6694392076914084, "learning_rate": 4.009732360097323e-05, "loss": 0.6615, "step": 8240 }, { "epoch": 0.2407264127530985, "grad_norm": 0.7190484941988063, "learning_rate": 4.012165450121655e-05, "loss": 0.6271, "step": 8245 }, { "epoch": 0.24087239602341573, "grad_norm": 0.6941059442235329, "learning_rate": 4.0145985401459855e-05, "loss": 0.6469, "step": 8250 }, { "epoch": 0.24101837929373293, "grad_norm": 0.7365285887689498, "learning_rate": 4.017031630170316e-05, "loss": 0.6207, "step": 8255 }, { "epoch": 0.24116436256405016, "grad_norm": 0.659950133047933, "learning_rate": 4.019464720194647e-05, "loss": 0.6168, "step": 8260 }, { "epoch": 0.2413103458343674, "grad_norm": 0.7381496678196539, "learning_rate": 4.021897810218978e-05, "loss": 0.6719, "step": 8265 }, { "epoch": 0.2414563291046846, "grad_norm": 0.7323597255606062, "learning_rate": 4.024330900243309e-05, "loss": 0.6552, "step": 8270 }, { "epoch": 0.24160231237500182, "grad_norm": 0.734307678936904, "learning_rate": 4.02676399026764e-05, "loss": 0.6388, "step": 8275 }, { "epoch": 0.24174829564531905, "grad_norm": 0.7888551055385278, "learning_rate": 4.0291970802919706e-05, "loss": 0.6548, "step": 8280 }, { "epoch": 0.24189427891563628, "grad_norm": 0.7209920067681566, "learning_rate": 4.031630170316302e-05, "loss": 0.6325, "step": 8285 }, { "epoch": 0.24204026218595348, "grad_norm": 0.7517466735922685, "learning_rate": 4.034063260340633e-05, "loss": 0.633, "step": 8290 }, { "epoch": 0.2421862454562707, "grad_norm": 0.6547027779674609, "learning_rate": 4.0364963503649635e-05, "loss": 0.6425, "step": 8295 }, { "epoch": 0.24233222872658794, "grad_norm": 0.6825485162288305, "learning_rate": 4.038929440389295e-05, "loss": 0.6149, "step": 8300 }, { "epoch": 0.24247821199690514, "grad_norm": 0.7168162188030609, "learning_rate": 4.041362530413626e-05, "loss": 0.6611, "step": 8305 }, { "epoch": 0.24262419526722237, "grad_norm": 0.8266989059375217, "learning_rate": 4.0437956204379564e-05, "loss": 0.6511, "step": 8310 }, { "epoch": 0.2427701785375396, "grad_norm": 0.7317994694505503, "learning_rate": 4.046228710462288e-05, "loss": 0.6671, "step": 8315 }, { "epoch": 0.24291616180785683, "grad_norm": 0.7408380535747058, "learning_rate": 4.0486618004866186e-05, "loss": 0.6354, "step": 8320 }, { "epoch": 0.24306214507817403, "grad_norm": 0.7037856765542251, "learning_rate": 4.0510948905109494e-05, "loss": 0.6131, "step": 8325 }, { "epoch": 0.24320812834849126, "grad_norm": 0.6632173984095833, "learning_rate": 4.05352798053528e-05, "loss": 0.6346, "step": 8330 }, { "epoch": 0.2433541116188085, "grad_norm": 0.7383716368558151, "learning_rate": 4.055961070559611e-05, "loss": 0.6384, "step": 8335 }, { "epoch": 0.24350009488912572, "grad_norm": 0.7590309752533279, "learning_rate": 4.058394160583942e-05, "loss": 0.6381, "step": 8340 }, { "epoch": 0.24364607815944292, "grad_norm": 0.7870132936023922, "learning_rate": 4.060827250608273e-05, "loss": 0.6704, "step": 8345 }, { "epoch": 0.24379206142976015, "grad_norm": 0.653814155760067, "learning_rate": 4.063260340632604e-05, "loss": 0.6189, "step": 8350 }, { "epoch": 0.24393804470007738, "grad_norm": 0.8712442198168862, "learning_rate": 4.0656934306569345e-05, "loss": 0.6134, "step": 8355 }, { "epoch": 0.24408402797039458, "grad_norm": 0.7287577392521262, "learning_rate": 4.068126520681265e-05, "loss": 0.6457, "step": 8360 }, { "epoch": 0.2442300112407118, "grad_norm": 0.7015582371121836, "learning_rate": 4.070559610705597e-05, "loss": 0.6011, "step": 8365 }, { "epoch": 0.24437599451102904, "grad_norm": 0.7681595691687733, "learning_rate": 4.0729927007299274e-05, "loss": 0.6308, "step": 8370 }, { "epoch": 0.24452197778134627, "grad_norm": 0.687694439444443, "learning_rate": 4.075425790754258e-05, "loss": 0.6212, "step": 8375 }, { "epoch": 0.24466796105166347, "grad_norm": 0.7456253633109385, "learning_rate": 4.077858880778589e-05, "loss": 0.6234, "step": 8380 }, { "epoch": 0.2448139443219807, "grad_norm": 0.6862899344019103, "learning_rate": 4.0802919708029197e-05, "loss": 0.6485, "step": 8385 }, { "epoch": 0.24495992759229793, "grad_norm": 0.7443676523693873, "learning_rate": 4.082725060827251e-05, "loss": 0.6207, "step": 8390 }, { "epoch": 0.24510591086261516, "grad_norm": 0.6807470784410189, "learning_rate": 4.085158150851582e-05, "loss": 0.6099, "step": 8395 }, { "epoch": 0.24525189413293236, "grad_norm": 0.6512306182633479, "learning_rate": 4.0875912408759126e-05, "loss": 0.5887, "step": 8400 }, { "epoch": 0.2453978774032496, "grad_norm": 0.675727873211609, "learning_rate": 4.090024330900243e-05, "loss": 0.6138, "step": 8405 }, { "epoch": 0.24554386067356682, "grad_norm": 0.6748252589755368, "learning_rate": 4.092457420924574e-05, "loss": 0.6139, "step": 8410 }, { "epoch": 0.24568984394388402, "grad_norm": 0.6139975005820061, "learning_rate": 4.0948905109489055e-05, "loss": 0.5947, "step": 8415 }, { "epoch": 0.24583582721420125, "grad_norm": 0.6838488599469548, "learning_rate": 4.097323600973236e-05, "loss": 0.6616, "step": 8420 }, { "epoch": 0.24598181048451848, "grad_norm": 0.7393428168439785, "learning_rate": 4.099756690997567e-05, "loss": 0.6508, "step": 8425 }, { "epoch": 0.2461277937548357, "grad_norm": 0.7690332974599591, "learning_rate": 4.102189781021898e-05, "loss": 0.6642, "step": 8430 }, { "epoch": 0.2462737770251529, "grad_norm": 0.7096531147286199, "learning_rate": 4.1046228710462285e-05, "loss": 0.6247, "step": 8435 }, { "epoch": 0.24641976029547014, "grad_norm": 0.7165412690360717, "learning_rate": 4.10705596107056e-05, "loss": 0.6769, "step": 8440 }, { "epoch": 0.24656574356578737, "grad_norm": 0.7639134518064602, "learning_rate": 4.1094890510948906e-05, "loss": 0.6, "step": 8445 }, { "epoch": 0.2467117268361046, "grad_norm": 0.6508914120539607, "learning_rate": 4.1119221411192214e-05, "loss": 0.6241, "step": 8450 }, { "epoch": 0.2468577101064218, "grad_norm": 0.6536498867224365, "learning_rate": 4.114355231143552e-05, "loss": 0.5944, "step": 8455 }, { "epoch": 0.24700369337673903, "grad_norm": 0.6890103874935606, "learning_rate": 4.116788321167883e-05, "loss": 0.5948, "step": 8460 }, { "epoch": 0.24714967664705625, "grad_norm": 0.7264352950975856, "learning_rate": 4.119221411192214e-05, "loss": 0.628, "step": 8465 }, { "epoch": 0.24729565991737346, "grad_norm": 0.7756410547711566, "learning_rate": 4.121654501216545e-05, "loss": 0.6606, "step": 8470 }, { "epoch": 0.24744164318769069, "grad_norm": 0.7746140719620775, "learning_rate": 4.124087591240876e-05, "loss": 0.6361, "step": 8475 }, { "epoch": 0.24758762645800791, "grad_norm": 0.7959451758279131, "learning_rate": 4.1265206812652065e-05, "loss": 0.6467, "step": 8480 }, { "epoch": 0.24773360972832514, "grad_norm": 0.7110298611370701, "learning_rate": 4.128953771289538e-05, "loss": 0.6259, "step": 8485 }, { "epoch": 0.24787959299864235, "grad_norm": 0.6780019588885866, "learning_rate": 4.131386861313869e-05, "loss": 0.6241, "step": 8490 }, { "epoch": 0.24802557626895957, "grad_norm": 0.6558995869994487, "learning_rate": 4.1338199513381994e-05, "loss": 0.6172, "step": 8495 }, { "epoch": 0.2481715595392768, "grad_norm": 0.7149915066745266, "learning_rate": 4.136253041362531e-05, "loss": 0.6519, "step": 8500 }, { "epoch": 0.24831754280959403, "grad_norm": 0.6694219731002278, "learning_rate": 4.1386861313868616e-05, "loss": 0.6226, "step": 8505 }, { "epoch": 0.24846352607991123, "grad_norm": 0.7099135161593804, "learning_rate": 4.1411192214111923e-05, "loss": 0.6144, "step": 8510 }, { "epoch": 0.24860950935022846, "grad_norm": 0.7208132517173812, "learning_rate": 4.143552311435524e-05, "loss": 0.6321, "step": 8515 }, { "epoch": 0.2487554926205457, "grad_norm": 0.7086282784860954, "learning_rate": 4.1459854014598545e-05, "loss": 0.6082, "step": 8520 }, { "epoch": 0.2489014758908629, "grad_norm": 0.62117812025317, "learning_rate": 4.148418491484185e-05, "loss": 0.5961, "step": 8525 }, { "epoch": 0.24904745916118012, "grad_norm": 0.6385663950471355, "learning_rate": 4.150851581508516e-05, "loss": 0.6485, "step": 8530 }, { "epoch": 0.24919344243149735, "grad_norm": 0.71136459237128, "learning_rate": 4.1532846715328474e-05, "loss": 0.6346, "step": 8535 }, { "epoch": 0.24933942570181458, "grad_norm": 0.6826865573540116, "learning_rate": 4.155717761557178e-05, "loss": 0.6335, "step": 8540 }, { "epoch": 0.24948540897213178, "grad_norm": 1.5949440372206956, "learning_rate": 4.158150851581509e-05, "loss": 0.6141, "step": 8545 }, { "epoch": 0.249631392242449, "grad_norm": 0.6764575622931731, "learning_rate": 4.16058394160584e-05, "loss": 0.6113, "step": 8550 }, { "epoch": 0.24977737551276624, "grad_norm": 0.7672699586627426, "learning_rate": 4.1630170316301704e-05, "loss": 0.6437, "step": 8555 }, { "epoch": 0.24992335878308347, "grad_norm": 0.7095610703260047, "learning_rate": 4.165450121654502e-05, "loss": 0.6701, "step": 8560 }, { "epoch": 0.2500693420534007, "grad_norm": 0.6991032959420912, "learning_rate": 4.1678832116788326e-05, "loss": 0.691, "step": 8565 }, { "epoch": 0.2502153253237179, "grad_norm": 0.6318553769015972, "learning_rate": 4.170316301703163e-05, "loss": 0.626, "step": 8570 }, { "epoch": 0.2503613085940351, "grad_norm": 0.6576202121433973, "learning_rate": 4.172749391727494e-05, "loss": 0.5915, "step": 8575 }, { "epoch": 0.25050729186435233, "grad_norm": 0.6365652580858931, "learning_rate": 4.175182481751825e-05, "loss": 0.5841, "step": 8580 }, { "epoch": 0.25065327513466956, "grad_norm": 0.6950868723990127, "learning_rate": 4.177615571776156e-05, "loss": 0.6232, "step": 8585 }, { "epoch": 0.2507992584049868, "grad_norm": 0.6705445072854658, "learning_rate": 4.180048661800487e-05, "loss": 0.6355, "step": 8590 }, { "epoch": 0.250945241675304, "grad_norm": 0.6215811294150474, "learning_rate": 4.182481751824818e-05, "loss": 0.656, "step": 8595 }, { "epoch": 0.25109122494562125, "grad_norm": 0.7404523127837509, "learning_rate": 4.1849148418491485e-05, "loss": 0.6282, "step": 8600 }, { "epoch": 0.2512372082159385, "grad_norm": 0.6816488734809539, "learning_rate": 4.187347931873479e-05, "loss": 0.6376, "step": 8605 }, { "epoch": 0.25138319148625565, "grad_norm": 0.7335339826952855, "learning_rate": 4.1897810218978106e-05, "loss": 0.6581, "step": 8610 }, { "epoch": 0.2515291747565729, "grad_norm": 0.7162126998934001, "learning_rate": 4.1922141119221414e-05, "loss": 0.6297, "step": 8615 }, { "epoch": 0.2516751580268901, "grad_norm": 0.6488137907183239, "learning_rate": 4.194647201946472e-05, "loss": 0.6194, "step": 8620 }, { "epoch": 0.25182114129720734, "grad_norm": 0.6642207882037423, "learning_rate": 4.197080291970803e-05, "loss": 0.6456, "step": 8625 }, { "epoch": 0.25196712456752457, "grad_norm": 0.6467685978884173, "learning_rate": 4.1995133819951336e-05, "loss": 0.6506, "step": 8630 }, { "epoch": 0.2521131078378418, "grad_norm": 0.6785863129609421, "learning_rate": 4.201946472019465e-05, "loss": 0.6418, "step": 8635 }, { "epoch": 0.25225909110815903, "grad_norm": 0.6724689573981676, "learning_rate": 4.204379562043796e-05, "loss": 0.6211, "step": 8640 }, { "epoch": 0.2524050743784762, "grad_norm": 0.7249035684984528, "learning_rate": 4.2068126520681265e-05, "loss": 0.6379, "step": 8645 }, { "epoch": 0.25255105764879343, "grad_norm": 0.7399832455423903, "learning_rate": 4.209245742092457e-05, "loss": 0.6704, "step": 8650 }, { "epoch": 0.25269704091911066, "grad_norm": 0.6679969635205074, "learning_rate": 4.211678832116788e-05, "loss": 0.6483, "step": 8655 }, { "epoch": 0.2528430241894279, "grad_norm": 0.7038923041560049, "learning_rate": 4.2141119221411194e-05, "loss": 0.6293, "step": 8660 }, { "epoch": 0.2529890074597451, "grad_norm": 0.7242507797325447, "learning_rate": 4.21654501216545e-05, "loss": 0.6759, "step": 8665 }, { "epoch": 0.25313499073006235, "grad_norm": 0.7100843814860884, "learning_rate": 4.218978102189781e-05, "loss": 0.6734, "step": 8670 }, { "epoch": 0.2532809740003796, "grad_norm": 0.6995927141932382, "learning_rate": 4.221411192214112e-05, "loss": 0.6497, "step": 8675 }, { "epoch": 0.25342695727069675, "grad_norm": 0.762813281263538, "learning_rate": 4.2238442822384424e-05, "loss": 0.6512, "step": 8680 }, { "epoch": 0.253572940541014, "grad_norm": 0.7242393156664918, "learning_rate": 4.226277372262774e-05, "loss": 0.6224, "step": 8685 }, { "epoch": 0.2537189238113312, "grad_norm": 0.751196607050425, "learning_rate": 4.2287104622871046e-05, "loss": 0.6297, "step": 8690 }, { "epoch": 0.25386490708164844, "grad_norm": 0.7755835397190157, "learning_rate": 4.231143552311435e-05, "loss": 0.6624, "step": 8695 }, { "epoch": 0.25401089035196567, "grad_norm": 0.72130470804512, "learning_rate": 4.233576642335767e-05, "loss": 0.647, "step": 8700 }, { "epoch": 0.2541568736222829, "grad_norm": 0.6900389796463375, "learning_rate": 4.2360097323600975e-05, "loss": 0.6161, "step": 8705 }, { "epoch": 0.2543028568926001, "grad_norm": 0.6956593039902138, "learning_rate": 4.238442822384428e-05, "loss": 0.6224, "step": 8710 }, { "epoch": 0.25444884016291736, "grad_norm": 0.6892241227694034, "learning_rate": 4.24087591240876e-05, "loss": 0.6606, "step": 8715 }, { "epoch": 0.25459482343323453, "grad_norm": 0.7380046027559907, "learning_rate": 4.2433090024330904e-05, "loss": 0.6539, "step": 8720 }, { "epoch": 0.25474080670355176, "grad_norm": 0.6903456130785492, "learning_rate": 4.245742092457421e-05, "loss": 0.6411, "step": 8725 }, { "epoch": 0.254886789973869, "grad_norm": 0.5957029227530986, "learning_rate": 4.2481751824817526e-05, "loss": 0.6102, "step": 8730 }, { "epoch": 0.2550327732441862, "grad_norm": 0.6754978046463795, "learning_rate": 4.250608272506083e-05, "loss": 0.6212, "step": 8735 }, { "epoch": 0.25517875651450345, "grad_norm": 0.7208294005011991, "learning_rate": 4.253041362530414e-05, "loss": 0.6466, "step": 8740 }, { "epoch": 0.2553247397848207, "grad_norm": 0.7008639190817127, "learning_rate": 4.255474452554745e-05, "loss": 0.6523, "step": 8745 }, { "epoch": 0.2554707230551379, "grad_norm": 0.6668768297933493, "learning_rate": 4.2579075425790756e-05, "loss": 0.6403, "step": 8750 }, { "epoch": 0.2556167063254551, "grad_norm": 0.7527570966844452, "learning_rate": 4.260340632603407e-05, "loss": 0.6464, "step": 8755 }, { "epoch": 0.2557626895957723, "grad_norm": 0.6638411085913487, "learning_rate": 4.262773722627738e-05, "loss": 0.5897, "step": 8760 }, { "epoch": 0.25590867286608954, "grad_norm": 0.6750592177623281, "learning_rate": 4.2652068126520685e-05, "loss": 0.6357, "step": 8765 }, { "epoch": 0.25605465613640677, "grad_norm": 0.7566274692337256, "learning_rate": 4.267639902676399e-05, "loss": 0.6355, "step": 8770 }, { "epoch": 0.256200639406724, "grad_norm": 0.7624135188554543, "learning_rate": 4.27007299270073e-05, "loss": 0.7031, "step": 8775 }, { "epoch": 0.2563466226770412, "grad_norm": 0.6354555748361398, "learning_rate": 4.2725060827250614e-05, "loss": 0.6278, "step": 8780 }, { "epoch": 0.25649260594735845, "grad_norm": 0.6249895669440003, "learning_rate": 4.274939172749392e-05, "loss": 0.5722, "step": 8785 }, { "epoch": 0.25663858921767563, "grad_norm": 0.6694865536227529, "learning_rate": 4.277372262773723e-05, "loss": 0.5911, "step": 8790 }, { "epoch": 0.25678457248799286, "grad_norm": 0.6650550837691017, "learning_rate": 4.2798053527980536e-05, "loss": 0.6082, "step": 8795 }, { "epoch": 0.2569305557583101, "grad_norm": 0.6793446132516151, "learning_rate": 4.2822384428223844e-05, "loss": 0.588, "step": 8800 }, { "epoch": 0.2570765390286273, "grad_norm": 0.6308369660918431, "learning_rate": 4.284671532846716e-05, "loss": 0.6335, "step": 8805 }, { "epoch": 0.25722252229894454, "grad_norm": 0.7691648723745644, "learning_rate": 4.2871046228710465e-05, "loss": 0.6721, "step": 8810 }, { "epoch": 0.2573685055692618, "grad_norm": 0.6453013690244378, "learning_rate": 4.289537712895377e-05, "loss": 0.6616, "step": 8815 }, { "epoch": 0.257514488839579, "grad_norm": 0.6762570216866051, "learning_rate": 4.291970802919708e-05, "loss": 0.6525, "step": 8820 }, { "epoch": 0.25766047210989623, "grad_norm": 0.7467899174750237, "learning_rate": 4.294403892944039e-05, "loss": 0.6577, "step": 8825 }, { "epoch": 0.2578064553802134, "grad_norm": 0.7083698000522516, "learning_rate": 4.29683698296837e-05, "loss": 0.64, "step": 8830 }, { "epoch": 0.25795243865053064, "grad_norm": 0.6522126807246118, "learning_rate": 4.299270072992701e-05, "loss": 0.6076, "step": 8835 }, { "epoch": 0.25809842192084786, "grad_norm": 0.755058708693052, "learning_rate": 4.301703163017032e-05, "loss": 0.6567, "step": 8840 }, { "epoch": 0.2582444051911651, "grad_norm": 0.7198484653733076, "learning_rate": 4.3041362530413624e-05, "loss": 0.5826, "step": 8845 }, { "epoch": 0.2583903884614823, "grad_norm": 0.7057399917069733, "learning_rate": 4.306569343065693e-05, "loss": 0.6503, "step": 8850 }, { "epoch": 0.25853637173179955, "grad_norm": 0.6988575199986146, "learning_rate": 4.3090024330900246e-05, "loss": 0.6041, "step": 8855 }, { "epoch": 0.2586823550021168, "grad_norm": 0.6799893371840533, "learning_rate": 4.3114355231143553e-05, "loss": 0.6195, "step": 8860 }, { "epoch": 0.25882833827243396, "grad_norm": 0.6506180727580385, "learning_rate": 4.313868613138686e-05, "loss": 0.641, "step": 8865 }, { "epoch": 0.2589743215427512, "grad_norm": 0.716832155513108, "learning_rate": 4.316301703163017e-05, "loss": 0.6174, "step": 8870 }, { "epoch": 0.2591203048130684, "grad_norm": 0.6813570796298795, "learning_rate": 4.318734793187348e-05, "loss": 0.6302, "step": 8875 }, { "epoch": 0.25926628808338564, "grad_norm": 0.7268629829938544, "learning_rate": 4.321167883211679e-05, "loss": 0.6362, "step": 8880 }, { "epoch": 0.2594122713537029, "grad_norm": 0.6200597914015429, "learning_rate": 4.32360097323601e-05, "loss": 0.5933, "step": 8885 }, { "epoch": 0.2595582546240201, "grad_norm": 0.733055181574479, "learning_rate": 4.3260340632603405e-05, "loss": 0.6344, "step": 8890 }, { "epoch": 0.25970423789433733, "grad_norm": 0.7166457064565452, "learning_rate": 4.328467153284671e-05, "loss": 0.6261, "step": 8895 }, { "epoch": 0.2598502211646545, "grad_norm": 0.7130699066078638, "learning_rate": 4.3309002433090027e-05, "loss": 0.6236, "step": 8900 }, { "epoch": 0.25999620443497173, "grad_norm": 0.5970166516482048, "learning_rate": 4.3333333333333334e-05, "loss": 0.5812, "step": 8905 }, { "epoch": 0.26014218770528896, "grad_norm": 0.6588001208983794, "learning_rate": 4.335766423357664e-05, "loss": 0.5982, "step": 8910 }, { "epoch": 0.2602881709756062, "grad_norm": 0.7338289724970768, "learning_rate": 4.3381995133819956e-05, "loss": 0.6635, "step": 8915 }, { "epoch": 0.2604341542459234, "grad_norm": 0.8285221881682702, "learning_rate": 4.340632603406326e-05, "loss": 0.6624, "step": 8920 }, { "epoch": 0.26058013751624065, "grad_norm": 0.7579197206273319, "learning_rate": 4.343065693430657e-05, "loss": 0.621, "step": 8925 }, { "epoch": 0.2607261207865579, "grad_norm": 0.7343256172181518, "learning_rate": 4.3454987834549885e-05, "loss": 0.6167, "step": 8930 }, { "epoch": 0.2608721040568751, "grad_norm": 0.6913936413168462, "learning_rate": 4.347931873479319e-05, "loss": 0.6012, "step": 8935 }, { "epoch": 0.2610180873271923, "grad_norm": 0.6386295012440955, "learning_rate": 4.35036496350365e-05, "loss": 0.6712, "step": 8940 }, { "epoch": 0.2611640705975095, "grad_norm": 0.714319932388476, "learning_rate": 4.352798053527981e-05, "loss": 0.6382, "step": 8945 }, { "epoch": 0.26131005386782674, "grad_norm": 0.705907374637893, "learning_rate": 4.355231143552312e-05, "loss": 0.6119, "step": 8950 }, { "epoch": 0.26145603713814397, "grad_norm": 0.6461564864114065, "learning_rate": 4.357664233576643e-05, "loss": 0.5942, "step": 8955 }, { "epoch": 0.2616020204084612, "grad_norm": 0.7075259999215104, "learning_rate": 4.3600973236009736e-05, "loss": 0.6397, "step": 8960 }, { "epoch": 0.26174800367877843, "grad_norm": 0.6794245785412478, "learning_rate": 4.3625304136253044e-05, "loss": 0.6318, "step": 8965 }, { "epoch": 0.26189398694909566, "grad_norm": 0.6987731433151896, "learning_rate": 4.364963503649635e-05, "loss": 0.6032, "step": 8970 }, { "epoch": 0.26203997021941283, "grad_norm": 0.7020489342527838, "learning_rate": 4.3673965936739665e-05, "loss": 0.6438, "step": 8975 }, { "epoch": 0.26218595348973006, "grad_norm": 0.6997762680868904, "learning_rate": 4.369829683698297e-05, "loss": 0.6328, "step": 8980 }, { "epoch": 0.2623319367600473, "grad_norm": 0.6150374374516636, "learning_rate": 4.372262773722628e-05, "loss": 0.5857, "step": 8985 }, { "epoch": 0.2624779200303645, "grad_norm": 0.5862493281101487, "learning_rate": 4.374695863746959e-05, "loss": 0.6024, "step": 8990 }, { "epoch": 0.26262390330068175, "grad_norm": 0.6593301285826855, "learning_rate": 4.3771289537712895e-05, "loss": 0.6365, "step": 8995 }, { "epoch": 0.262769886570999, "grad_norm": 0.6464393388004055, "learning_rate": 4.379562043795621e-05, "loss": 0.6455, "step": 9000 }, { "epoch": 0.2629158698413162, "grad_norm": 0.594075179832482, "learning_rate": 4.381995133819952e-05, "loss": 0.597, "step": 9005 }, { "epoch": 0.2630618531116334, "grad_norm": 0.610719005003346, "learning_rate": 4.3844282238442824e-05, "loss": 0.5838, "step": 9010 }, { "epoch": 0.2632078363819506, "grad_norm": 0.6470468667639276, "learning_rate": 4.386861313868613e-05, "loss": 0.5709, "step": 9015 }, { "epoch": 0.26335381965226784, "grad_norm": 0.7304259319774902, "learning_rate": 4.389294403892944e-05, "loss": 0.6918, "step": 9020 }, { "epoch": 0.26349980292258507, "grad_norm": 0.6724938918569654, "learning_rate": 4.3917274939172754e-05, "loss": 0.6311, "step": 9025 }, { "epoch": 0.2636457861929023, "grad_norm": 0.6374611065942237, "learning_rate": 4.394160583941606e-05, "loss": 0.5976, "step": 9030 }, { "epoch": 0.2637917694632195, "grad_norm": 0.6823505676784346, "learning_rate": 4.396593673965937e-05, "loss": 0.6262, "step": 9035 }, { "epoch": 0.26393775273353676, "grad_norm": 0.6741218782463025, "learning_rate": 4.3990267639902676e-05, "loss": 0.6128, "step": 9040 }, { "epoch": 0.264083736003854, "grad_norm": 0.6468850157203718, "learning_rate": 4.401459854014598e-05, "loss": 0.5822, "step": 9045 }, { "epoch": 0.26422971927417116, "grad_norm": 0.7077675839653137, "learning_rate": 4.40389294403893e-05, "loss": 0.5941, "step": 9050 }, { "epoch": 0.2643757025444884, "grad_norm": 0.6269597001721301, "learning_rate": 4.4063260340632605e-05, "loss": 0.6483, "step": 9055 }, { "epoch": 0.2645216858148056, "grad_norm": 0.7216153319356166, "learning_rate": 4.408759124087591e-05, "loss": 0.6025, "step": 9060 }, { "epoch": 0.26466766908512285, "grad_norm": 0.758534329865464, "learning_rate": 4.411192214111922e-05, "loss": 0.6685, "step": 9065 }, { "epoch": 0.2648136523554401, "grad_norm": 0.6662385563641573, "learning_rate": 4.4136253041362534e-05, "loss": 0.6394, "step": 9070 }, { "epoch": 0.2649596356257573, "grad_norm": 0.759435581981224, "learning_rate": 4.416058394160584e-05, "loss": 0.6261, "step": 9075 }, { "epoch": 0.26510561889607454, "grad_norm": 0.670528761271008, "learning_rate": 4.418491484184915e-05, "loss": 0.6841, "step": 9080 }, { "epoch": 0.2652516021663917, "grad_norm": 0.6566771138345927, "learning_rate": 4.4209245742092456e-05, "loss": 0.5998, "step": 9085 }, { "epoch": 0.26539758543670894, "grad_norm": 0.83730148028778, "learning_rate": 4.4233576642335764e-05, "loss": 0.6047, "step": 9090 }, { "epoch": 0.26554356870702617, "grad_norm": 0.6697748602995394, "learning_rate": 4.425790754257908e-05, "loss": 0.6362, "step": 9095 }, { "epoch": 0.2656895519773434, "grad_norm": 0.660428456063564, "learning_rate": 4.4282238442822386e-05, "loss": 0.6647, "step": 9100 }, { "epoch": 0.2658355352476606, "grad_norm": 0.7559590763966676, "learning_rate": 4.430656934306569e-05, "loss": 0.6334, "step": 9105 }, { "epoch": 0.26598151851797786, "grad_norm": 0.7187313729187563, "learning_rate": 4.4330900243309e-05, "loss": 0.6222, "step": 9110 }, { "epoch": 0.2661275017882951, "grad_norm": 0.7213953797852982, "learning_rate": 4.4355231143552315e-05, "loss": 0.6254, "step": 9115 }, { "epoch": 0.26627348505861226, "grad_norm": 0.7829504644213579, "learning_rate": 4.437956204379562e-05, "loss": 0.627, "step": 9120 }, { "epoch": 0.2664194683289295, "grad_norm": 0.7799787064330898, "learning_rate": 4.440389294403893e-05, "loss": 0.6513, "step": 9125 }, { "epoch": 0.2665654515992467, "grad_norm": 0.7439195726506364, "learning_rate": 4.4428223844282244e-05, "loss": 0.6009, "step": 9130 }, { "epoch": 0.26671143486956395, "grad_norm": 0.6692650516526419, "learning_rate": 4.445255474452555e-05, "loss": 0.6233, "step": 9135 }, { "epoch": 0.2668574181398812, "grad_norm": 0.6875141036169742, "learning_rate": 4.447688564476886e-05, "loss": 0.6303, "step": 9140 }, { "epoch": 0.2670034014101984, "grad_norm": 0.679391567918564, "learning_rate": 4.450121654501217e-05, "loss": 0.6341, "step": 9145 }, { "epoch": 0.26714938468051563, "grad_norm": 0.6664224061407676, "learning_rate": 4.452554744525548e-05, "loss": 0.6121, "step": 9150 }, { "epoch": 0.2672953679508328, "grad_norm": 0.6967831688110182, "learning_rate": 4.454987834549879e-05, "loss": 0.6703, "step": 9155 }, { "epoch": 0.26744135122115004, "grad_norm": 0.8142685309300464, "learning_rate": 4.4574209245742095e-05, "loss": 0.6528, "step": 9160 }, { "epoch": 0.26758733449146727, "grad_norm": 0.6991382775691959, "learning_rate": 4.45985401459854e-05, "loss": 0.6143, "step": 9165 }, { "epoch": 0.2677333177617845, "grad_norm": 0.6967921535292766, "learning_rate": 4.462287104622872e-05, "loss": 0.6332, "step": 9170 }, { "epoch": 0.2678793010321017, "grad_norm": 0.6723359316614823, "learning_rate": 4.4647201946472024e-05, "loss": 0.6554, "step": 9175 }, { "epoch": 0.26802528430241895, "grad_norm": 0.6628259373175462, "learning_rate": 4.467153284671533e-05, "loss": 0.6227, "step": 9180 }, { "epoch": 0.2681712675727362, "grad_norm": 0.6579854195218746, "learning_rate": 4.469586374695864e-05, "loss": 0.6105, "step": 9185 }, { "epoch": 0.2683172508430534, "grad_norm": 0.7767476111992493, "learning_rate": 4.472019464720195e-05, "loss": 0.6231, "step": 9190 }, { "epoch": 0.2684632341133706, "grad_norm": 0.6665264402490156, "learning_rate": 4.474452554744526e-05, "loss": 0.6253, "step": 9195 }, { "epoch": 0.2686092173836878, "grad_norm": 0.673493046695988, "learning_rate": 4.476885644768857e-05, "loss": 0.6258, "step": 9200 }, { "epoch": 0.26875520065400504, "grad_norm": 0.6439154840678812, "learning_rate": 4.4793187347931876e-05, "loss": 0.6232, "step": 9205 }, { "epoch": 0.2689011839243223, "grad_norm": 0.6850357435830109, "learning_rate": 4.4817518248175183e-05, "loss": 0.6034, "step": 9210 }, { "epoch": 0.2690471671946395, "grad_norm": 0.6526769961593598, "learning_rate": 4.484184914841849e-05, "loss": 0.6547, "step": 9215 }, { "epoch": 0.26919315046495673, "grad_norm": 0.6457975191443547, "learning_rate": 4.4866180048661805e-05, "loss": 0.6472, "step": 9220 }, { "epoch": 0.26933913373527396, "grad_norm": 0.7188223870380711, "learning_rate": 4.489051094890511e-05, "loss": 0.6213, "step": 9225 }, { "epoch": 0.26948511700559113, "grad_norm": 0.6946808447427443, "learning_rate": 4.491484184914842e-05, "loss": 0.6243, "step": 9230 }, { "epoch": 0.26963110027590836, "grad_norm": 0.7467957124618672, "learning_rate": 4.493917274939173e-05, "loss": 0.6388, "step": 9235 }, { "epoch": 0.2697770835462256, "grad_norm": 0.6838499992518882, "learning_rate": 4.4963503649635035e-05, "loss": 0.6523, "step": 9240 }, { "epoch": 0.2699230668165428, "grad_norm": 0.7008447681772246, "learning_rate": 4.498783454987835e-05, "loss": 0.645, "step": 9245 }, { "epoch": 0.27006905008686005, "grad_norm": 0.6660167455093713, "learning_rate": 4.5012165450121657e-05, "loss": 0.6357, "step": 9250 }, { "epoch": 0.2702150333571773, "grad_norm": 0.676240547605085, "learning_rate": 4.5036496350364964e-05, "loss": 0.6295, "step": 9255 }, { "epoch": 0.2703610166274945, "grad_norm": 0.7017116415906482, "learning_rate": 4.506082725060827e-05, "loss": 0.6256, "step": 9260 }, { "epoch": 0.2705069998978117, "grad_norm": 0.7003478766868524, "learning_rate": 4.5085158150851586e-05, "loss": 0.6483, "step": 9265 }, { "epoch": 0.2706529831681289, "grad_norm": 0.6195726084018173, "learning_rate": 4.510948905109489e-05, "loss": 0.6087, "step": 9270 }, { "epoch": 0.27079896643844614, "grad_norm": 0.6502685392914859, "learning_rate": 4.51338199513382e-05, "loss": 0.5884, "step": 9275 }, { "epoch": 0.27094494970876337, "grad_norm": 0.7240164891819995, "learning_rate": 4.515815085158151e-05, "loss": 0.6331, "step": 9280 }, { "epoch": 0.2710909329790806, "grad_norm": 0.6879819182006552, "learning_rate": 4.5182481751824815e-05, "loss": 0.6212, "step": 9285 }, { "epoch": 0.27123691624939783, "grad_norm": 0.6416605876885587, "learning_rate": 4.520681265206813e-05, "loss": 0.6261, "step": 9290 }, { "epoch": 0.27138289951971506, "grad_norm": 0.6589309404963843, "learning_rate": 4.523114355231144e-05, "loss": 0.6029, "step": 9295 }, { "epoch": 0.2715288827900323, "grad_norm": 0.7038849123530598, "learning_rate": 4.5255474452554745e-05, "loss": 0.6162, "step": 9300 }, { "epoch": 0.27167486606034946, "grad_norm": 0.6964127177681786, "learning_rate": 4.527980535279805e-05, "loss": 0.6309, "step": 9305 }, { "epoch": 0.2718208493306667, "grad_norm": 0.6677202605083021, "learning_rate": 4.530413625304136e-05, "loss": 0.592, "step": 9310 }, { "epoch": 0.2719668326009839, "grad_norm": 0.7183107340390894, "learning_rate": 4.5328467153284674e-05, "loss": 0.6621, "step": 9315 }, { "epoch": 0.27211281587130115, "grad_norm": 0.6145657758290859, "learning_rate": 4.535279805352798e-05, "loss": 0.6233, "step": 9320 }, { "epoch": 0.2722587991416184, "grad_norm": 0.6811532465160341, "learning_rate": 4.537712895377129e-05, "loss": 0.6299, "step": 9325 }, { "epoch": 0.2724047824119356, "grad_norm": 0.715968905538144, "learning_rate": 4.54014598540146e-05, "loss": 0.63, "step": 9330 }, { "epoch": 0.27255076568225284, "grad_norm": 0.6656934653965813, "learning_rate": 4.542579075425791e-05, "loss": 0.616, "step": 9335 }, { "epoch": 0.27269674895257, "grad_norm": 0.7137759171830748, "learning_rate": 4.545012165450122e-05, "loss": 0.6457, "step": 9340 }, { "epoch": 0.27284273222288724, "grad_norm": 0.6794306730021675, "learning_rate": 4.547445255474453e-05, "loss": 0.6532, "step": 9345 }, { "epoch": 0.27298871549320447, "grad_norm": 0.721100328399521, "learning_rate": 4.549878345498784e-05, "loss": 0.6587, "step": 9350 }, { "epoch": 0.2731346987635217, "grad_norm": 0.6655766132369294, "learning_rate": 4.552311435523115e-05, "loss": 0.6333, "step": 9355 }, { "epoch": 0.27328068203383893, "grad_norm": 0.6289398039863169, "learning_rate": 4.5547445255474454e-05, "loss": 0.6239, "step": 9360 }, { "epoch": 0.27342666530415616, "grad_norm": 0.6574417215176658, "learning_rate": 4.557177615571777e-05, "loss": 0.5951, "step": 9365 }, { "epoch": 0.2735726485744734, "grad_norm": 0.7224176761242918, "learning_rate": 4.5596107055961076e-05, "loss": 0.6554, "step": 9370 }, { "epoch": 0.27371863184479056, "grad_norm": 0.6529332087190106, "learning_rate": 4.5620437956204383e-05, "loss": 0.6255, "step": 9375 }, { "epoch": 0.2738646151151078, "grad_norm": 0.6490429694816967, "learning_rate": 4.564476885644769e-05, "loss": 0.6295, "step": 9380 }, { "epoch": 0.274010598385425, "grad_norm": 0.6660436094005319, "learning_rate": 4.5669099756691e-05, "loss": 0.6333, "step": 9385 }, { "epoch": 0.27415658165574225, "grad_norm": 0.6442733205897975, "learning_rate": 4.569343065693431e-05, "loss": 0.6301, "step": 9390 }, { "epoch": 0.2743025649260595, "grad_norm": 0.7037304481859453, "learning_rate": 4.571776155717762e-05, "loss": 0.6299, "step": 9395 }, { "epoch": 0.2744485481963767, "grad_norm": 0.6665714264283616, "learning_rate": 4.574209245742093e-05, "loss": 0.6498, "step": 9400 }, { "epoch": 0.27459453146669394, "grad_norm": 0.6128125719070341, "learning_rate": 4.5766423357664235e-05, "loss": 0.5884, "step": 9405 }, { "epoch": 0.27474051473701117, "grad_norm": 0.6889551222038695, "learning_rate": 4.579075425790754e-05, "loss": 0.6355, "step": 9410 }, { "epoch": 0.27488649800732834, "grad_norm": 0.6201198228762245, "learning_rate": 4.581508515815086e-05, "loss": 0.6075, "step": 9415 }, { "epoch": 0.27503248127764557, "grad_norm": 0.6904270083154068, "learning_rate": 4.5839416058394164e-05, "loss": 0.6651, "step": 9420 }, { "epoch": 0.2751784645479628, "grad_norm": 0.7018261677313254, "learning_rate": 4.586374695863747e-05, "loss": 0.6132, "step": 9425 }, { "epoch": 0.27532444781828, "grad_norm": 0.6380225280895624, "learning_rate": 4.588807785888078e-05, "loss": 0.6815, "step": 9430 }, { "epoch": 0.27547043108859726, "grad_norm": 0.6584907689456101, "learning_rate": 4.591240875912409e-05, "loss": 0.6288, "step": 9435 }, { "epoch": 0.2756164143589145, "grad_norm": 0.6758939181428206, "learning_rate": 4.59367396593674e-05, "loss": 0.6252, "step": 9440 }, { "epoch": 0.2757623976292317, "grad_norm": 0.7188108575574385, "learning_rate": 4.596107055961071e-05, "loss": 0.6372, "step": 9445 }, { "epoch": 0.2759083808995489, "grad_norm": 0.6556838712931435, "learning_rate": 4.5985401459854016e-05, "loss": 0.6742, "step": 9450 }, { "epoch": 0.2760543641698661, "grad_norm": 0.6689857143587921, "learning_rate": 4.600973236009732e-05, "loss": 0.6293, "step": 9455 }, { "epoch": 0.27620034744018335, "grad_norm": 0.6598559306202467, "learning_rate": 4.603406326034064e-05, "loss": 0.6495, "step": 9460 }, { "epoch": 0.2763463307105006, "grad_norm": 0.695354213368261, "learning_rate": 4.6058394160583945e-05, "loss": 0.6424, "step": 9465 }, { "epoch": 0.2764923139808178, "grad_norm": 0.7125367692347576, "learning_rate": 4.608272506082725e-05, "loss": 0.6171, "step": 9470 }, { "epoch": 0.27663829725113503, "grad_norm": 0.734146453767496, "learning_rate": 4.610705596107056e-05, "loss": 0.6672, "step": 9475 }, { "epoch": 0.27678428052145226, "grad_norm": 0.6354333432581142, "learning_rate": 4.613138686131387e-05, "loss": 0.629, "step": 9480 }, { "epoch": 0.27693026379176944, "grad_norm": 0.662013771800081, "learning_rate": 4.615571776155718e-05, "loss": 0.6241, "step": 9485 }, { "epoch": 0.27707624706208667, "grad_norm": 0.7156667253431096, "learning_rate": 4.618004866180049e-05, "loss": 0.6579, "step": 9490 }, { "epoch": 0.2772222303324039, "grad_norm": 0.6799106982231562, "learning_rate": 4.6204379562043796e-05, "loss": 0.6129, "step": 9495 }, { "epoch": 0.2773682136027211, "grad_norm": 0.6328797249780734, "learning_rate": 4.6228710462287104e-05, "loss": 0.6047, "step": 9500 }, { "epoch": 0.27751419687303835, "grad_norm": 0.7587217068173643, "learning_rate": 4.625304136253041e-05, "loss": 0.6839, "step": 9505 }, { "epoch": 0.2776601801433556, "grad_norm": 0.6368433506177269, "learning_rate": 4.6277372262773725e-05, "loss": 0.6241, "step": 9510 }, { "epoch": 0.2778061634136728, "grad_norm": 0.619000082232031, "learning_rate": 4.630170316301703e-05, "loss": 0.6437, "step": 9515 }, { "epoch": 0.27795214668399004, "grad_norm": 0.8040221499972643, "learning_rate": 4.632603406326034e-05, "loss": 0.6719, "step": 9520 }, { "epoch": 0.2780981299543072, "grad_norm": 0.6564954734135724, "learning_rate": 4.635036496350365e-05, "loss": 0.6385, "step": 9525 }, { "epoch": 0.27824411322462445, "grad_norm": 0.6388751216304634, "learning_rate": 4.637469586374696e-05, "loss": 0.6091, "step": 9530 }, { "epoch": 0.2783900964949417, "grad_norm": 0.6456240670826258, "learning_rate": 4.639902676399027e-05, "loss": 0.6313, "step": 9535 }, { "epoch": 0.2785360797652589, "grad_norm": 0.7354341207455812, "learning_rate": 4.642335766423358e-05, "loss": 0.6237, "step": 9540 }, { "epoch": 0.27868206303557613, "grad_norm": 0.6452304788038449, "learning_rate": 4.644768856447689e-05, "loss": 0.6235, "step": 9545 }, { "epoch": 0.27882804630589336, "grad_norm": 0.6158500427993681, "learning_rate": 4.64720194647202e-05, "loss": 0.6241, "step": 9550 }, { "epoch": 0.2789740295762106, "grad_norm": 0.6350994722901018, "learning_rate": 4.6496350364963506e-05, "loss": 0.6136, "step": 9555 }, { "epoch": 0.27912001284652777, "grad_norm": 0.5937548090046418, "learning_rate": 4.652068126520682e-05, "loss": 0.6244, "step": 9560 }, { "epoch": 0.279265996116845, "grad_norm": 0.6376313615984753, "learning_rate": 4.654501216545013e-05, "loss": 0.6314, "step": 9565 }, { "epoch": 0.2794119793871622, "grad_norm": 0.6716721336560023, "learning_rate": 4.6569343065693435e-05, "loss": 0.6627, "step": 9570 }, { "epoch": 0.27955796265747945, "grad_norm": 0.6660143151238226, "learning_rate": 4.659367396593674e-05, "loss": 0.6049, "step": 9575 }, { "epoch": 0.2797039459277967, "grad_norm": 0.7077239912890698, "learning_rate": 4.661800486618005e-05, "loss": 0.6521, "step": 9580 }, { "epoch": 0.2798499291981139, "grad_norm": 0.6288992784617716, "learning_rate": 4.6642335766423364e-05, "loss": 0.6373, "step": 9585 }, { "epoch": 0.27999591246843114, "grad_norm": 0.6496897541299238, "learning_rate": 4.666666666666667e-05, "loss": 0.6356, "step": 9590 }, { "epoch": 0.2801418957387483, "grad_norm": 0.6572682507574176, "learning_rate": 4.669099756690998e-05, "loss": 0.6143, "step": 9595 }, { "epoch": 0.28028787900906554, "grad_norm": 0.6561100100564381, "learning_rate": 4.6715328467153287e-05, "loss": 0.6403, "step": 9600 }, { "epoch": 0.2804338622793828, "grad_norm": 0.6751821697987044, "learning_rate": 4.6739659367396594e-05, "loss": 0.6289, "step": 9605 }, { "epoch": 0.2805798455497, "grad_norm": 0.6507178329600638, "learning_rate": 4.676399026763991e-05, "loss": 0.6276, "step": 9610 }, { "epoch": 0.28072582882001723, "grad_norm": 0.6910613866646835, "learning_rate": 4.6788321167883216e-05, "loss": 0.6581, "step": 9615 }, { "epoch": 0.28087181209033446, "grad_norm": 0.6209785262903242, "learning_rate": 4.681265206812652e-05, "loss": 0.6491, "step": 9620 }, { "epoch": 0.2810177953606517, "grad_norm": 0.6974795890642044, "learning_rate": 4.683698296836983e-05, "loss": 0.6539, "step": 9625 }, { "epoch": 0.28116377863096886, "grad_norm": 0.693951230024555, "learning_rate": 4.6861313868613145e-05, "loss": 0.6478, "step": 9630 }, { "epoch": 0.2813097619012861, "grad_norm": 0.6758179261530638, "learning_rate": 4.688564476885645e-05, "loss": 0.6372, "step": 9635 }, { "epoch": 0.2814557451716033, "grad_norm": 0.7069978718950024, "learning_rate": 4.690997566909976e-05, "loss": 0.6398, "step": 9640 }, { "epoch": 0.28160172844192055, "grad_norm": 0.6576222597058559, "learning_rate": 4.693430656934307e-05, "loss": 0.637, "step": 9645 }, { "epoch": 0.2817477117122378, "grad_norm": 0.6427516134989116, "learning_rate": 4.6958637469586375e-05, "loss": 0.5943, "step": 9650 }, { "epoch": 0.281893694982555, "grad_norm": 0.7350097862573977, "learning_rate": 4.698296836982969e-05, "loss": 0.636, "step": 9655 }, { "epoch": 0.28203967825287224, "grad_norm": 0.657783311192349, "learning_rate": 4.7007299270072996e-05, "loss": 0.633, "step": 9660 }, { "epoch": 0.28218566152318947, "grad_norm": 0.6508283612798078, "learning_rate": 4.7031630170316304e-05, "loss": 0.6162, "step": 9665 }, { "epoch": 0.28233164479350664, "grad_norm": 0.6970438874352676, "learning_rate": 4.705596107055961e-05, "loss": 0.6239, "step": 9670 }, { "epoch": 0.28247762806382387, "grad_norm": 0.6618253002138575, "learning_rate": 4.708029197080292e-05, "loss": 0.5822, "step": 9675 }, { "epoch": 0.2826236113341411, "grad_norm": 0.6659836823245017, "learning_rate": 4.710462287104623e-05, "loss": 0.6586, "step": 9680 }, { "epoch": 0.28276959460445833, "grad_norm": 0.6406881897617358, "learning_rate": 4.712895377128954e-05, "loss": 0.6283, "step": 9685 }, { "epoch": 0.28291557787477556, "grad_norm": 0.6526149196092059, "learning_rate": 4.715328467153285e-05, "loss": 0.6108, "step": 9690 }, { "epoch": 0.2830615611450928, "grad_norm": 0.7291245138226041, "learning_rate": 4.7177615571776155e-05, "loss": 0.6769, "step": 9695 }, { "epoch": 0.28320754441541, "grad_norm": 0.6737975075810889, "learning_rate": 4.720194647201946e-05, "loss": 0.663, "step": 9700 }, { "epoch": 0.2833535276857272, "grad_norm": 0.6990463824849273, "learning_rate": 4.722627737226278e-05, "loss": 0.635, "step": 9705 }, { "epoch": 0.2834995109560444, "grad_norm": 0.6218527921195376, "learning_rate": 4.7250608272506084e-05, "loss": 0.6153, "step": 9710 }, { "epoch": 0.28364549422636165, "grad_norm": 0.7625164749124026, "learning_rate": 4.727493917274939e-05, "loss": 0.6301, "step": 9715 }, { "epoch": 0.2837914774966789, "grad_norm": 0.6995981976953023, "learning_rate": 4.72992700729927e-05, "loss": 0.6587, "step": 9720 }, { "epoch": 0.2839374607669961, "grad_norm": 0.6268266875877597, "learning_rate": 4.732360097323601e-05, "loss": 0.6423, "step": 9725 }, { "epoch": 0.28408344403731334, "grad_norm": 0.65084832946764, "learning_rate": 4.734793187347932e-05, "loss": 0.6433, "step": 9730 }, { "epoch": 0.28422942730763057, "grad_norm": 0.6233374766423536, "learning_rate": 4.737226277372263e-05, "loss": 0.6543, "step": 9735 }, { "epoch": 0.28437541057794774, "grad_norm": 0.6649959517764154, "learning_rate": 4.7396593673965936e-05, "loss": 0.6357, "step": 9740 }, { "epoch": 0.28452139384826497, "grad_norm": 0.7102286257910707, "learning_rate": 4.742092457420925e-05, "loss": 0.6752, "step": 9745 }, { "epoch": 0.2846673771185822, "grad_norm": 0.6426433390959418, "learning_rate": 4.744525547445256e-05, "loss": 0.6392, "step": 9750 }, { "epoch": 0.28481336038889943, "grad_norm": 0.5979165916628721, "learning_rate": 4.7469586374695865e-05, "loss": 0.6271, "step": 9755 }, { "epoch": 0.28495934365921666, "grad_norm": 0.6477041192199187, "learning_rate": 4.749391727493918e-05, "loss": 0.6022, "step": 9760 }, { "epoch": 0.2851053269295339, "grad_norm": 0.6155773855854846, "learning_rate": 4.7518248175182487e-05, "loss": 0.596, "step": 9765 }, { "epoch": 0.2852513101998511, "grad_norm": 0.6967983741784204, "learning_rate": 4.7542579075425794e-05, "loss": 0.662, "step": 9770 }, { "epoch": 0.28539729347016835, "grad_norm": 0.6149010832502128, "learning_rate": 4.75669099756691e-05, "loss": 0.5931, "step": 9775 }, { "epoch": 0.2855432767404855, "grad_norm": 0.6812002349521803, "learning_rate": 4.7591240875912416e-05, "loss": 0.6673, "step": 9780 }, { "epoch": 0.28568926001080275, "grad_norm": 0.6725713976784659, "learning_rate": 4.761557177615572e-05, "loss": 0.605, "step": 9785 }, { "epoch": 0.28583524328112, "grad_norm": 0.6391927905258726, "learning_rate": 4.763990267639903e-05, "loss": 0.643, "step": 9790 }, { "epoch": 0.2859812265514372, "grad_norm": 0.6683825132081725, "learning_rate": 4.766423357664234e-05, "loss": 0.608, "step": 9795 }, { "epoch": 0.28612720982175444, "grad_norm": 0.6446310390701212, "learning_rate": 4.7688564476885646e-05, "loss": 0.6153, "step": 9800 }, { "epoch": 0.28627319309207166, "grad_norm": 0.6587917285125052, "learning_rate": 4.771289537712896e-05, "loss": 0.5986, "step": 9805 }, { "epoch": 0.2864191763623889, "grad_norm": 0.6624268630993473, "learning_rate": 4.773722627737227e-05, "loss": 0.6373, "step": 9810 }, { "epoch": 0.28656515963270607, "grad_norm": 0.6259394866228557, "learning_rate": 4.7761557177615575e-05, "loss": 0.6035, "step": 9815 }, { "epoch": 0.2867111429030233, "grad_norm": 0.7155951700036312, "learning_rate": 4.778588807785888e-05, "loss": 0.6611, "step": 9820 }, { "epoch": 0.2868571261733405, "grad_norm": 0.641733478132169, "learning_rate": 4.7810218978102196e-05, "loss": 0.6068, "step": 9825 }, { "epoch": 0.28700310944365776, "grad_norm": 0.6469801928219541, "learning_rate": 4.7834549878345504e-05, "loss": 0.6296, "step": 9830 }, { "epoch": 0.287149092713975, "grad_norm": 0.6754011899672127, "learning_rate": 4.785888077858881e-05, "loss": 0.6613, "step": 9835 }, { "epoch": 0.2872950759842922, "grad_norm": 0.6708210588646772, "learning_rate": 4.788321167883212e-05, "loss": 0.6415, "step": 9840 }, { "epoch": 0.28744105925460944, "grad_norm": 0.5872592489951445, "learning_rate": 4.7907542579075426e-05, "loss": 0.6401, "step": 9845 }, { "epoch": 0.2875870425249266, "grad_norm": 0.6377289788927055, "learning_rate": 4.793187347931874e-05, "loss": 0.6089, "step": 9850 }, { "epoch": 0.28773302579524385, "grad_norm": 0.681292925882556, "learning_rate": 4.795620437956205e-05, "loss": 0.6176, "step": 9855 }, { "epoch": 0.2878790090655611, "grad_norm": 0.6524528504984103, "learning_rate": 4.7980535279805355e-05, "loss": 0.6363, "step": 9860 }, { "epoch": 0.2880249923358783, "grad_norm": 0.6770739572356963, "learning_rate": 4.800486618004866e-05, "loss": 0.6247, "step": 9865 }, { "epoch": 0.28817097560619553, "grad_norm": 0.6323819894330578, "learning_rate": 4.802919708029197e-05, "loss": 0.6321, "step": 9870 }, { "epoch": 0.28831695887651276, "grad_norm": 0.6754206507002058, "learning_rate": 4.8053527980535284e-05, "loss": 0.693, "step": 9875 }, { "epoch": 0.28846294214683, "grad_norm": 0.7788847183290157, "learning_rate": 4.807785888077859e-05, "loss": 0.6649, "step": 9880 }, { "epoch": 0.2886089254171472, "grad_norm": 0.6907616911916702, "learning_rate": 4.81021897810219e-05, "loss": 0.6053, "step": 9885 }, { "epoch": 0.2887549086874644, "grad_norm": 0.6602245625524593, "learning_rate": 4.812652068126521e-05, "loss": 0.6382, "step": 9890 }, { "epoch": 0.2889008919577816, "grad_norm": 0.6735448658253135, "learning_rate": 4.8150851581508514e-05, "loss": 0.6234, "step": 9895 }, { "epoch": 0.28904687522809885, "grad_norm": 0.7088862898413375, "learning_rate": 4.817518248175183e-05, "loss": 0.6928, "step": 9900 }, { "epoch": 0.2891928584984161, "grad_norm": 0.775009626750579, "learning_rate": 4.8199513381995136e-05, "loss": 0.6239, "step": 9905 }, { "epoch": 0.2893388417687333, "grad_norm": 0.7163618167585597, "learning_rate": 4.822384428223844e-05, "loss": 0.6718, "step": 9910 }, { "epoch": 0.28948482503905054, "grad_norm": 0.6371956532330776, "learning_rate": 4.824817518248175e-05, "loss": 0.6358, "step": 9915 }, { "epoch": 0.28963080830936777, "grad_norm": 0.7707379891092, "learning_rate": 4.827250608272506e-05, "loss": 0.6398, "step": 9920 }, { "epoch": 0.28977679157968494, "grad_norm": 0.588400308813402, "learning_rate": 4.829683698296837e-05, "loss": 0.6057, "step": 9925 }, { "epoch": 0.2899227748500022, "grad_norm": 0.6575141626344223, "learning_rate": 4.832116788321168e-05, "loss": 0.6446, "step": 9930 }, { "epoch": 0.2900687581203194, "grad_norm": 0.6604417412129263, "learning_rate": 4.834549878345499e-05, "loss": 0.6375, "step": 9935 }, { "epoch": 0.29021474139063663, "grad_norm": 0.6729183257195491, "learning_rate": 4.8369829683698295e-05, "loss": 0.641, "step": 9940 }, { "epoch": 0.29036072466095386, "grad_norm": 0.728344040760963, "learning_rate": 4.839416058394161e-05, "loss": 0.6697, "step": 9945 }, { "epoch": 0.2905067079312711, "grad_norm": 0.6428741436356425, "learning_rate": 4.8418491484184916e-05, "loss": 0.6545, "step": 9950 }, { "epoch": 0.2906526912015883, "grad_norm": 0.6840653393348645, "learning_rate": 4.8442822384428224e-05, "loss": 0.6227, "step": 9955 }, { "epoch": 0.2907986744719055, "grad_norm": 0.7317053120452711, "learning_rate": 4.846715328467154e-05, "loss": 0.6243, "step": 9960 }, { "epoch": 0.2909446577422227, "grad_norm": 0.6385429397763802, "learning_rate": 4.8491484184914846e-05, "loss": 0.6351, "step": 9965 }, { "epoch": 0.29109064101253995, "grad_norm": 0.6201557468831398, "learning_rate": 4.851581508515815e-05, "loss": 0.6243, "step": 9970 }, { "epoch": 0.2912366242828572, "grad_norm": 0.6039663768747109, "learning_rate": 4.854014598540147e-05, "loss": 0.6686, "step": 9975 }, { "epoch": 0.2913826075531744, "grad_norm": 0.6174042732281686, "learning_rate": 4.8564476885644775e-05, "loss": 0.6399, "step": 9980 }, { "epoch": 0.29152859082349164, "grad_norm": 0.6329102831578355, "learning_rate": 4.858880778588808e-05, "loss": 0.6352, "step": 9985 }, { "epoch": 0.29167457409380887, "grad_norm": 0.667368612415335, "learning_rate": 4.861313868613139e-05, "loss": 0.6668, "step": 9990 }, { "epoch": 0.2918205573641261, "grad_norm": 0.6860769067594922, "learning_rate": 4.8637469586374704e-05, "loss": 0.6648, "step": 9995 }, { "epoch": 0.2919665406344433, "grad_norm": 0.7066576958582931, "learning_rate": 4.866180048661801e-05, "loss": 0.6617, "step": 10000 }, { "epoch": 0.2921125239047605, "grad_norm": 0.6918598949731807, "learning_rate": 4.868613138686132e-05, "loss": 0.6338, "step": 10005 }, { "epoch": 0.29225850717507773, "grad_norm": 0.6376200262670983, "learning_rate": 4.8710462287104626e-05, "loss": 0.6198, "step": 10010 }, { "epoch": 0.29240449044539496, "grad_norm": 0.6330162971461978, "learning_rate": 4.8734793187347934e-05, "loss": 0.6025, "step": 10015 }, { "epoch": 0.2925504737157122, "grad_norm": 0.6299110983528541, "learning_rate": 4.875912408759125e-05, "loss": 0.6419, "step": 10020 }, { "epoch": 0.2926964569860294, "grad_norm": 0.6520967190530587, "learning_rate": 4.8783454987834555e-05, "loss": 0.6479, "step": 10025 }, { "epoch": 0.29284244025634665, "grad_norm": 0.6238975728956335, "learning_rate": 4.880778588807786e-05, "loss": 0.6421, "step": 10030 }, { "epoch": 0.2929884235266638, "grad_norm": 0.6539171268420287, "learning_rate": 4.883211678832117e-05, "loss": 0.5687, "step": 10035 }, { "epoch": 0.29313440679698105, "grad_norm": 0.754605281677868, "learning_rate": 4.885644768856448e-05, "loss": 0.654, "step": 10040 }, { "epoch": 0.2932803900672983, "grad_norm": 0.6401391216895089, "learning_rate": 4.888077858880779e-05, "loss": 0.6794, "step": 10045 }, { "epoch": 0.2934263733376155, "grad_norm": 0.6362736413182376, "learning_rate": 4.89051094890511e-05, "loss": 0.661, "step": 10050 }, { "epoch": 0.29357235660793274, "grad_norm": 0.6494392178293806, "learning_rate": 4.892944038929441e-05, "loss": 0.6318, "step": 10055 }, { "epoch": 0.29371833987824997, "grad_norm": 0.7074990820829434, "learning_rate": 4.8953771289537714e-05, "loss": 0.6312, "step": 10060 }, { "epoch": 0.2938643231485672, "grad_norm": 0.6547670864928798, "learning_rate": 4.897810218978102e-05, "loss": 0.6203, "step": 10065 }, { "epoch": 0.29401030641888437, "grad_norm": 0.6481300996712338, "learning_rate": 4.9002433090024336e-05, "loss": 0.6489, "step": 10070 }, { "epoch": 0.2941562896892016, "grad_norm": 0.5954618850494404, "learning_rate": 4.9026763990267643e-05, "loss": 0.6701, "step": 10075 }, { "epoch": 0.29430227295951883, "grad_norm": 0.6528759969543271, "learning_rate": 4.905109489051095e-05, "loss": 0.623, "step": 10080 }, { "epoch": 0.29444825622983606, "grad_norm": 0.648354744991811, "learning_rate": 4.907542579075426e-05, "loss": 0.6562, "step": 10085 }, { "epoch": 0.2945942395001533, "grad_norm": 0.6532795043280899, "learning_rate": 4.9099756690997566e-05, "loss": 0.6431, "step": 10090 }, { "epoch": 0.2947402227704705, "grad_norm": 0.6678293074618489, "learning_rate": 4.912408759124088e-05, "loss": 0.6474, "step": 10095 }, { "epoch": 0.29488620604078775, "grad_norm": 0.6920131968237248, "learning_rate": 4.914841849148419e-05, "loss": 0.6172, "step": 10100 }, { "epoch": 0.2950321893111049, "grad_norm": 0.659329413367468, "learning_rate": 4.9172749391727495e-05, "loss": 0.6734, "step": 10105 }, { "epoch": 0.29517817258142215, "grad_norm": 0.7273849052257434, "learning_rate": 4.91970802919708e-05, "loss": 0.6561, "step": 10110 }, { "epoch": 0.2953241558517394, "grad_norm": 0.6191839976405816, "learning_rate": 4.922141119221411e-05, "loss": 0.6205, "step": 10115 }, { "epoch": 0.2954701391220566, "grad_norm": 0.6505456655351832, "learning_rate": 4.9245742092457424e-05, "loss": 0.6298, "step": 10120 }, { "epoch": 0.29561612239237384, "grad_norm": 0.6073542107098728, "learning_rate": 4.927007299270073e-05, "loss": 0.6427, "step": 10125 }, { "epoch": 0.29576210566269107, "grad_norm": 0.6324830351576718, "learning_rate": 4.929440389294404e-05, "loss": 0.6635, "step": 10130 }, { "epoch": 0.2959080889330083, "grad_norm": 0.6397484234264196, "learning_rate": 4.9318734793187346e-05, "loss": 0.6213, "step": 10135 }, { "epoch": 0.2960540722033255, "grad_norm": 0.6882154921390391, "learning_rate": 4.9343065693430654e-05, "loss": 0.6325, "step": 10140 }, { "epoch": 0.2962000554736427, "grad_norm": 0.6256797269838501, "learning_rate": 4.936739659367397e-05, "loss": 0.6436, "step": 10145 }, { "epoch": 0.2963460387439599, "grad_norm": 0.6877254127655398, "learning_rate": 4.9391727493917275e-05, "loss": 0.6789, "step": 10150 }, { "epoch": 0.29649202201427716, "grad_norm": 0.650478759992472, "learning_rate": 4.941605839416058e-05, "loss": 0.6008, "step": 10155 }, { "epoch": 0.2966380052845944, "grad_norm": 0.6733316312524373, "learning_rate": 4.94403892944039e-05, "loss": 0.653, "step": 10160 }, { "epoch": 0.2967839885549116, "grad_norm": 0.6252604503789468, "learning_rate": 4.9464720194647205e-05, "loss": 0.6314, "step": 10165 }, { "epoch": 0.29692997182522884, "grad_norm": 0.6370834251039601, "learning_rate": 4.948905109489051e-05, "loss": 0.6731, "step": 10170 }, { "epoch": 0.2970759550955461, "grad_norm": 0.6752170867398202, "learning_rate": 4.9513381995133826e-05, "loss": 0.6419, "step": 10175 }, { "epoch": 0.29722193836586325, "grad_norm": 0.6168085005228792, "learning_rate": 4.9537712895377134e-05, "loss": 0.6721, "step": 10180 }, { "epoch": 0.2973679216361805, "grad_norm": 0.6729972775952605, "learning_rate": 4.956204379562044e-05, "loss": 0.7, "step": 10185 }, { "epoch": 0.2975139049064977, "grad_norm": 0.7484891877337259, "learning_rate": 4.9586374695863755e-05, "loss": 0.6426, "step": 10190 }, { "epoch": 0.29765988817681494, "grad_norm": 0.7482210058700955, "learning_rate": 4.961070559610706e-05, "loss": 0.6706, "step": 10195 }, { "epoch": 0.29780587144713216, "grad_norm": 0.6445078142110824, "learning_rate": 4.963503649635037e-05, "loss": 0.6583, "step": 10200 }, { "epoch": 0.2979518547174494, "grad_norm": 0.6379147753511217, "learning_rate": 4.965936739659368e-05, "loss": 0.6204, "step": 10205 }, { "epoch": 0.2980978379877666, "grad_norm": 0.6836216663941271, "learning_rate": 4.9683698296836985e-05, "loss": 0.6319, "step": 10210 }, { "epoch": 0.2982438212580838, "grad_norm": 0.6162305465750284, "learning_rate": 4.97080291970803e-05, "loss": 0.586, "step": 10215 }, { "epoch": 0.298389804528401, "grad_norm": 0.636981641561733, "learning_rate": 4.973236009732361e-05, "loss": 0.6321, "step": 10220 }, { "epoch": 0.29853578779871826, "grad_norm": 0.728862339745821, "learning_rate": 4.9756690997566914e-05, "loss": 0.6043, "step": 10225 }, { "epoch": 0.2986817710690355, "grad_norm": 0.6253781312878992, "learning_rate": 4.978102189781022e-05, "loss": 0.6077, "step": 10230 }, { "epoch": 0.2988277543393527, "grad_norm": 0.5821381968405813, "learning_rate": 4.980535279805353e-05, "loss": 0.6733, "step": 10235 }, { "epoch": 0.29897373760966994, "grad_norm": 0.7052252523780422, "learning_rate": 4.9829683698296843e-05, "loss": 0.611, "step": 10240 }, { "epoch": 0.29911972087998717, "grad_norm": 0.6601928788610568, "learning_rate": 4.985401459854015e-05, "loss": 0.6433, "step": 10245 }, { "epoch": 0.2992657041503044, "grad_norm": 0.6789220392046563, "learning_rate": 4.987834549878346e-05, "loss": 0.6226, "step": 10250 }, { "epoch": 0.2994116874206216, "grad_norm": 0.6612986920811125, "learning_rate": 4.9902676399026766e-05, "loss": 0.6253, "step": 10255 }, { "epoch": 0.2995576706909388, "grad_norm": 0.6156330170001653, "learning_rate": 4.992700729927007e-05, "loss": 0.6296, "step": 10260 }, { "epoch": 0.29970365396125603, "grad_norm": 0.6894974502724918, "learning_rate": 4.995133819951339e-05, "loss": 0.6258, "step": 10265 }, { "epoch": 0.29984963723157326, "grad_norm": 0.640344955091745, "learning_rate": 4.9975669099756695e-05, "loss": 0.6688, "step": 10270 }, { "epoch": 0.2999956205018905, "grad_norm": 0.5619631407874249, "learning_rate": 5e-05, "loss": 0.6225, "step": 10275 }, { "epoch": 0.3001416037722077, "grad_norm": 0.6772321978317362, "learning_rate": 4.9997296566639636e-05, "loss": 0.6271, "step": 10280 }, { "epoch": 0.30028758704252495, "grad_norm": 0.6306809633089376, "learning_rate": 4.9994593133279264e-05, "loss": 0.6685, "step": 10285 }, { "epoch": 0.3004335703128421, "grad_norm": 0.5825630366587734, "learning_rate": 4.99918896999189e-05, "loss": 0.6358, "step": 10290 }, { "epoch": 0.30057955358315935, "grad_norm": 0.6570595804986152, "learning_rate": 4.998918626655853e-05, "loss": 0.6611, "step": 10295 }, { "epoch": 0.3007255368534766, "grad_norm": 0.6544194907407777, "learning_rate": 4.998648283319816e-05, "loss": 0.6504, "step": 10300 }, { "epoch": 0.3008715201237938, "grad_norm": 0.6979706886671487, "learning_rate": 4.99837793998378e-05, "loss": 0.6168, "step": 10305 }, { "epoch": 0.30101750339411104, "grad_norm": 0.7014858087889422, "learning_rate": 4.9981075966477434e-05, "loss": 0.6249, "step": 10310 }, { "epoch": 0.30116348666442827, "grad_norm": 0.6094256933501074, "learning_rate": 4.997837253311706e-05, "loss": 0.6329, "step": 10315 }, { "epoch": 0.3013094699347455, "grad_norm": 0.6175268561914081, "learning_rate": 4.9975669099756695e-05, "loss": 0.6464, "step": 10320 }, { "epoch": 0.3014554532050627, "grad_norm": 0.6109466952365935, "learning_rate": 4.997296566639633e-05, "loss": 0.653, "step": 10325 }, { "epoch": 0.3016014364753799, "grad_norm": 0.6777125804319233, "learning_rate": 4.9970262233035956e-05, "loss": 0.6577, "step": 10330 }, { "epoch": 0.30174741974569713, "grad_norm": 0.648399838336207, "learning_rate": 4.996755879967559e-05, "loss": 0.645, "step": 10335 }, { "epoch": 0.30189340301601436, "grad_norm": 0.6922332346373273, "learning_rate": 4.9964855366315224e-05, "loss": 0.6773, "step": 10340 }, { "epoch": 0.3020393862863316, "grad_norm": 0.7353444838053, "learning_rate": 4.996215193295485e-05, "loss": 0.6646, "step": 10345 }, { "epoch": 0.3021853695566488, "grad_norm": 0.6575722659247467, "learning_rate": 4.9959448499594485e-05, "loss": 0.6216, "step": 10350 }, { "epoch": 0.30233135282696605, "grad_norm": 0.6866462445352212, "learning_rate": 4.995674506623412e-05, "loss": 0.6134, "step": 10355 }, { "epoch": 0.3024773360972833, "grad_norm": 0.6491788960891651, "learning_rate": 4.9954041632873753e-05, "loss": 0.651, "step": 10360 }, { "epoch": 0.30262331936760045, "grad_norm": 0.623003962034899, "learning_rate": 4.995133819951339e-05, "loss": 0.6445, "step": 10365 }, { "epoch": 0.3027693026379177, "grad_norm": 0.5675153113775973, "learning_rate": 4.994863476615302e-05, "loss": 0.6142, "step": 10370 }, { "epoch": 0.3029152859082349, "grad_norm": 0.7110423218458973, "learning_rate": 4.994593133279265e-05, "loss": 0.6977, "step": 10375 }, { "epoch": 0.30306126917855214, "grad_norm": 0.6324025225688492, "learning_rate": 4.994322789943228e-05, "loss": 0.6386, "step": 10380 }, { "epoch": 0.30320725244886937, "grad_norm": 0.6570216554467696, "learning_rate": 4.994052446607192e-05, "loss": 0.6643, "step": 10385 }, { "epoch": 0.3033532357191866, "grad_norm": 0.721499488789921, "learning_rate": 4.9937821032711544e-05, "loss": 0.6247, "step": 10390 }, { "epoch": 0.3034992189895038, "grad_norm": 0.7073389314778381, "learning_rate": 4.993511759935118e-05, "loss": 0.6403, "step": 10395 }, { "epoch": 0.303645202259821, "grad_norm": 0.5958897191892116, "learning_rate": 4.993241416599081e-05, "loss": 0.6275, "step": 10400 }, { "epoch": 0.30379118553013823, "grad_norm": 0.696705833414288, "learning_rate": 4.992971073263044e-05, "loss": 0.6706, "step": 10405 }, { "epoch": 0.30393716880045546, "grad_norm": 0.6690169534983128, "learning_rate": 4.992700729927007e-05, "loss": 0.6369, "step": 10410 }, { "epoch": 0.3040831520707727, "grad_norm": 0.6145139124682788, "learning_rate": 4.992430386590971e-05, "loss": 0.6731, "step": 10415 }, { "epoch": 0.3042291353410899, "grad_norm": 0.6171601722574497, "learning_rate": 4.992160043254934e-05, "loss": 0.6122, "step": 10420 }, { "epoch": 0.30437511861140715, "grad_norm": 0.7071165405524246, "learning_rate": 4.9918896999188975e-05, "loss": 0.6341, "step": 10425 }, { "epoch": 0.3045211018817244, "grad_norm": 0.6455749413006118, "learning_rate": 4.991619356582861e-05, "loss": 0.6244, "step": 10430 }, { "epoch": 0.30466708515204155, "grad_norm": 0.685661466008605, "learning_rate": 4.9913490132468237e-05, "loss": 0.6597, "step": 10435 }, { "epoch": 0.3048130684223588, "grad_norm": 0.7028733554204152, "learning_rate": 4.991078669910787e-05, "loss": 0.6728, "step": 10440 }, { "epoch": 0.304959051692676, "grad_norm": 0.6593693515021752, "learning_rate": 4.9908083265747505e-05, "loss": 0.6314, "step": 10445 }, { "epoch": 0.30510503496299324, "grad_norm": 0.6021270717644783, "learning_rate": 4.990537983238713e-05, "loss": 0.6267, "step": 10450 }, { "epoch": 0.30525101823331047, "grad_norm": 0.6556564188268688, "learning_rate": 4.9902676399026766e-05, "loss": 0.6274, "step": 10455 }, { "epoch": 0.3053970015036277, "grad_norm": 0.6225784646089534, "learning_rate": 4.98999729656664e-05, "loss": 0.6215, "step": 10460 }, { "epoch": 0.3055429847739449, "grad_norm": 0.6205695589282378, "learning_rate": 4.989726953230603e-05, "loss": 0.6049, "step": 10465 }, { "epoch": 0.30568896804426215, "grad_norm": 0.6584585628717478, "learning_rate": 4.989456609894566e-05, "loss": 0.6686, "step": 10470 }, { "epoch": 0.30583495131457933, "grad_norm": 0.6150393366755, "learning_rate": 4.9891862665585295e-05, "loss": 0.6377, "step": 10475 }, { "epoch": 0.30598093458489656, "grad_norm": 0.5638398518538796, "learning_rate": 4.988915923222493e-05, "loss": 0.6314, "step": 10480 }, { "epoch": 0.3061269178552138, "grad_norm": 0.683787649619583, "learning_rate": 4.988645579886456e-05, "loss": 0.6331, "step": 10485 }, { "epoch": 0.306272901125531, "grad_norm": 0.616158820121649, "learning_rate": 4.98837523655042e-05, "loss": 0.5897, "step": 10490 }, { "epoch": 0.30641888439584825, "grad_norm": 0.6248830597633258, "learning_rate": 4.9881048932143824e-05, "loss": 0.6504, "step": 10495 }, { "epoch": 0.3065648676661655, "grad_norm": 0.6565880133824578, "learning_rate": 4.987834549878346e-05, "loss": 0.6587, "step": 10500 }, { "epoch": 0.3067108509364827, "grad_norm": 0.6513053622605238, "learning_rate": 4.987564206542309e-05, "loss": 0.5887, "step": 10505 }, { "epoch": 0.3068568342067999, "grad_norm": 0.706562876658164, "learning_rate": 4.987293863206272e-05, "loss": 0.6263, "step": 10510 }, { "epoch": 0.3070028174771171, "grad_norm": 0.6340741417663839, "learning_rate": 4.9870235198702354e-05, "loss": 0.6565, "step": 10515 }, { "epoch": 0.30714880074743434, "grad_norm": 0.60468738432602, "learning_rate": 4.986753176534199e-05, "loss": 0.6022, "step": 10520 }, { "epoch": 0.30729478401775157, "grad_norm": 0.656270348837813, "learning_rate": 4.9864828331981615e-05, "loss": 0.6709, "step": 10525 }, { "epoch": 0.3074407672880688, "grad_norm": 0.6602463302005718, "learning_rate": 4.9862124898621256e-05, "loss": 0.6567, "step": 10530 }, { "epoch": 0.307586750558386, "grad_norm": 0.7191793575883093, "learning_rate": 4.985942146526088e-05, "loss": 0.6486, "step": 10535 }, { "epoch": 0.30773273382870325, "grad_norm": 0.6504470971814451, "learning_rate": 4.985671803190052e-05, "loss": 0.6451, "step": 10540 }, { "epoch": 0.3078787170990204, "grad_norm": 0.6704590350748797, "learning_rate": 4.985401459854015e-05, "loss": 0.649, "step": 10545 }, { "epoch": 0.30802470036933766, "grad_norm": 0.6067269444770846, "learning_rate": 4.985131116517978e-05, "loss": 0.6202, "step": 10550 }, { "epoch": 0.3081706836396549, "grad_norm": 0.6220848501218725, "learning_rate": 4.984860773181941e-05, "loss": 0.6398, "step": 10555 }, { "epoch": 0.3083166669099721, "grad_norm": 0.6421795832555619, "learning_rate": 4.9845904298459046e-05, "loss": 0.6337, "step": 10560 }, { "epoch": 0.30846265018028934, "grad_norm": 0.6681191625693835, "learning_rate": 4.984320086509868e-05, "loss": 0.6842, "step": 10565 }, { "epoch": 0.3086086334506066, "grad_norm": 0.6340238834443856, "learning_rate": 4.984049743173831e-05, "loss": 0.6688, "step": 10570 }, { "epoch": 0.3087546167209238, "grad_norm": 0.6229852841735158, "learning_rate": 4.983779399837794e-05, "loss": 0.6343, "step": 10575 }, { "epoch": 0.308900599991241, "grad_norm": 0.5857414308776459, "learning_rate": 4.9835090565017575e-05, "loss": 0.5872, "step": 10580 }, { "epoch": 0.3090465832615582, "grad_norm": 0.6534930638965825, "learning_rate": 4.98323871316572e-05, "loss": 0.5882, "step": 10585 }, { "epoch": 0.30919256653187543, "grad_norm": 0.6346303643723475, "learning_rate": 4.9829683698296843e-05, "loss": 0.6401, "step": 10590 }, { "epoch": 0.30933854980219266, "grad_norm": 0.6624443457693053, "learning_rate": 4.982698026493647e-05, "loss": 0.6708, "step": 10595 }, { "epoch": 0.3094845330725099, "grad_norm": 0.6482252193097016, "learning_rate": 4.9824276831576105e-05, "loss": 0.6313, "step": 10600 }, { "epoch": 0.3096305163428271, "grad_norm": 0.6408110753953846, "learning_rate": 4.982157339821574e-05, "loss": 0.6586, "step": 10605 }, { "epoch": 0.30977649961314435, "grad_norm": 0.8673338109935187, "learning_rate": 4.9818869964855366e-05, "loss": 0.6456, "step": 10610 }, { "epoch": 0.3099224828834616, "grad_norm": 0.6484652320700717, "learning_rate": 4.9816166531495e-05, "loss": 0.6495, "step": 10615 }, { "epoch": 0.31006846615377875, "grad_norm": 0.6277914356992697, "learning_rate": 4.9813463098134634e-05, "loss": 0.6456, "step": 10620 }, { "epoch": 0.310214449424096, "grad_norm": 0.6580314667349915, "learning_rate": 4.981075966477427e-05, "loss": 0.6571, "step": 10625 }, { "epoch": 0.3103604326944132, "grad_norm": 0.6138338614837171, "learning_rate": 4.9808056231413895e-05, "loss": 0.63, "step": 10630 }, { "epoch": 0.31050641596473044, "grad_norm": 0.5966344592573029, "learning_rate": 4.980535279805353e-05, "loss": 0.6295, "step": 10635 }, { "epoch": 0.31065239923504767, "grad_norm": 0.6981665191724395, "learning_rate": 4.980264936469316e-05, "loss": 0.6304, "step": 10640 }, { "epoch": 0.3107983825053649, "grad_norm": 0.6577540741694572, "learning_rate": 4.97999459313328e-05, "loss": 0.6358, "step": 10645 }, { "epoch": 0.31094436577568213, "grad_norm": 0.6682568720059494, "learning_rate": 4.979724249797243e-05, "loss": 0.6199, "step": 10650 }, { "epoch": 0.3110903490459993, "grad_norm": 0.6606397940632466, "learning_rate": 4.979453906461206e-05, "loss": 0.661, "step": 10655 }, { "epoch": 0.31123633231631653, "grad_norm": 0.6633606998670643, "learning_rate": 4.979183563125169e-05, "loss": 0.6179, "step": 10660 }, { "epoch": 0.31138231558663376, "grad_norm": 0.6117453900537305, "learning_rate": 4.9789132197891327e-05, "loss": 0.6343, "step": 10665 }, { "epoch": 0.311528298856951, "grad_norm": 0.5848523749207567, "learning_rate": 4.9786428764530954e-05, "loss": 0.6137, "step": 10670 }, { "epoch": 0.3116742821272682, "grad_norm": 0.5966354180314578, "learning_rate": 4.978372533117059e-05, "loss": 0.6326, "step": 10675 }, { "epoch": 0.31182026539758545, "grad_norm": 0.6680772649119279, "learning_rate": 4.978102189781022e-05, "loss": 0.6148, "step": 10680 }, { "epoch": 0.3119662486679027, "grad_norm": 0.602557831310376, "learning_rate": 4.977831846444985e-05, "loss": 0.6423, "step": 10685 }, { "epoch": 0.31211223193821985, "grad_norm": 0.6105716295610832, "learning_rate": 4.977561503108948e-05, "loss": 0.6365, "step": 10690 }, { "epoch": 0.3122582152085371, "grad_norm": 0.616193587999814, "learning_rate": 4.977291159772912e-05, "loss": 0.5966, "step": 10695 }, { "epoch": 0.3124041984788543, "grad_norm": 0.6593240623650661, "learning_rate": 4.977020816436875e-05, "loss": 0.646, "step": 10700 }, { "epoch": 0.31255018174917154, "grad_norm": 0.7202870044426262, "learning_rate": 4.9767504731008385e-05, "loss": 0.6542, "step": 10705 }, { "epoch": 0.31269616501948877, "grad_norm": 0.6317024741420867, "learning_rate": 4.976480129764802e-05, "loss": 0.6291, "step": 10710 }, { "epoch": 0.312842148289806, "grad_norm": 0.6678521686253425, "learning_rate": 4.9762097864287646e-05, "loss": 0.6461, "step": 10715 }, { "epoch": 0.31298813156012323, "grad_norm": 0.6768412111038606, "learning_rate": 4.975939443092728e-05, "loss": 0.665, "step": 10720 }, { "epoch": 0.31313411483044046, "grad_norm": 0.6399410719875598, "learning_rate": 4.9756690997566914e-05, "loss": 0.6303, "step": 10725 }, { "epoch": 0.31328009810075763, "grad_norm": 0.5934530893509128, "learning_rate": 4.975398756420654e-05, "loss": 0.6076, "step": 10730 }, { "epoch": 0.31342608137107486, "grad_norm": 0.6372101510152468, "learning_rate": 4.9751284130846176e-05, "loss": 0.6577, "step": 10735 }, { "epoch": 0.3135720646413921, "grad_norm": 0.5797065116241704, "learning_rate": 4.974858069748581e-05, "loss": 0.6128, "step": 10740 }, { "epoch": 0.3137180479117093, "grad_norm": 0.6095534947015884, "learning_rate": 4.974587726412544e-05, "loss": 0.6439, "step": 10745 }, { "epoch": 0.31386403118202655, "grad_norm": 0.7098730603922241, "learning_rate": 4.974317383076507e-05, "loss": 0.6402, "step": 10750 }, { "epoch": 0.3140100144523438, "grad_norm": 0.6947122750304384, "learning_rate": 4.9740470397404705e-05, "loss": 0.6502, "step": 10755 }, { "epoch": 0.314155997722661, "grad_norm": 0.7057584763208314, "learning_rate": 4.973776696404434e-05, "loss": 0.5997, "step": 10760 }, { "epoch": 0.3143019809929782, "grad_norm": 0.6706882032731504, "learning_rate": 4.973506353068397e-05, "loss": 0.6197, "step": 10765 }, { "epoch": 0.3144479642632954, "grad_norm": 0.6283209354269884, "learning_rate": 4.973236009732361e-05, "loss": 0.6464, "step": 10770 }, { "epoch": 0.31459394753361264, "grad_norm": 0.6541997078676132, "learning_rate": 4.9729656663963234e-05, "loss": 0.5929, "step": 10775 }, { "epoch": 0.31473993080392987, "grad_norm": 0.605025122332917, "learning_rate": 4.972695323060287e-05, "loss": 0.6412, "step": 10780 }, { "epoch": 0.3148859140742471, "grad_norm": 0.7031534817903085, "learning_rate": 4.97242497972425e-05, "loss": 0.6906, "step": 10785 }, { "epoch": 0.3150318973445643, "grad_norm": 0.6213670028548677, "learning_rate": 4.972154636388213e-05, "loss": 0.6576, "step": 10790 }, { "epoch": 0.31517788061488156, "grad_norm": 0.6504896521994404, "learning_rate": 4.9718842930521763e-05, "loss": 0.6519, "step": 10795 }, { "epoch": 0.31532386388519873, "grad_norm": 0.6503840403464507, "learning_rate": 4.97161394971614e-05, "loss": 0.6393, "step": 10800 }, { "epoch": 0.31546984715551596, "grad_norm": 0.6644837476899623, "learning_rate": 4.9713436063801025e-05, "loss": 0.6572, "step": 10805 }, { "epoch": 0.3156158304258332, "grad_norm": 0.626648885013778, "learning_rate": 4.971073263044066e-05, "loss": 0.6083, "step": 10810 }, { "epoch": 0.3157618136961504, "grad_norm": 1.2060916314244272, "learning_rate": 4.97080291970803e-05, "loss": 0.6304, "step": 10815 }, { "epoch": 0.31590779696646765, "grad_norm": 0.6802961179184174, "learning_rate": 4.970532576371993e-05, "loss": 0.6494, "step": 10820 }, { "epoch": 0.3160537802367849, "grad_norm": 0.5801557890233381, "learning_rate": 4.970262233035956e-05, "loss": 0.6264, "step": 10825 }, { "epoch": 0.3161997635071021, "grad_norm": 0.7216014049428928, "learning_rate": 4.9699918896999195e-05, "loss": 0.6757, "step": 10830 }, { "epoch": 0.31634574677741933, "grad_norm": 0.6035876444659128, "learning_rate": 4.969721546363882e-05, "loss": 0.6231, "step": 10835 }, { "epoch": 0.3164917300477365, "grad_norm": 0.6024444583325081, "learning_rate": 4.9694512030278456e-05, "loss": 0.6134, "step": 10840 }, { "epoch": 0.31663771331805374, "grad_norm": 0.6470241211016415, "learning_rate": 4.969180859691809e-05, "loss": 0.6475, "step": 10845 }, { "epoch": 0.31678369658837097, "grad_norm": 0.6526068221944237, "learning_rate": 4.968910516355772e-05, "loss": 0.6363, "step": 10850 }, { "epoch": 0.3169296798586882, "grad_norm": 0.6381905499098897, "learning_rate": 4.968640173019735e-05, "loss": 0.6381, "step": 10855 }, { "epoch": 0.3170756631290054, "grad_norm": 0.6740740009208861, "learning_rate": 4.9683698296836985e-05, "loss": 0.6777, "step": 10860 }, { "epoch": 0.31722164639932265, "grad_norm": 0.6537115856212083, "learning_rate": 4.968099486347661e-05, "loss": 0.6291, "step": 10865 }, { "epoch": 0.3173676296696399, "grad_norm": 0.6676953259311622, "learning_rate": 4.967829143011625e-05, "loss": 0.6096, "step": 10870 }, { "epoch": 0.31751361293995706, "grad_norm": 0.6489278418220821, "learning_rate": 4.967558799675589e-05, "loss": 0.6671, "step": 10875 }, { "epoch": 0.3176595962102743, "grad_norm": 0.6510752006604603, "learning_rate": 4.9672884563395514e-05, "loss": 0.6147, "step": 10880 }, { "epoch": 0.3178055794805915, "grad_norm": 0.6374762181528539, "learning_rate": 4.967018113003515e-05, "loss": 0.6067, "step": 10885 }, { "epoch": 0.31795156275090874, "grad_norm": 0.603391311490913, "learning_rate": 4.966747769667478e-05, "loss": 0.6428, "step": 10890 }, { "epoch": 0.318097546021226, "grad_norm": 0.678649039548207, "learning_rate": 4.966477426331441e-05, "loss": 0.6442, "step": 10895 }, { "epoch": 0.3182435292915432, "grad_norm": 0.6640001370118623, "learning_rate": 4.9662070829954044e-05, "loss": 0.6352, "step": 10900 }, { "epoch": 0.31838951256186043, "grad_norm": 0.6428918919413413, "learning_rate": 4.965936739659368e-05, "loss": 0.6503, "step": 10905 }, { "epoch": 0.3185354958321776, "grad_norm": 0.7063585063026281, "learning_rate": 4.9656663963233305e-05, "loss": 0.6787, "step": 10910 }, { "epoch": 0.31868147910249484, "grad_norm": 0.5758341635148336, "learning_rate": 4.965396052987294e-05, "loss": 0.6102, "step": 10915 }, { "epoch": 0.31882746237281206, "grad_norm": 0.6409006508707353, "learning_rate": 4.965125709651257e-05, "loss": 0.6802, "step": 10920 }, { "epoch": 0.3189734456431293, "grad_norm": 0.58359271810739, "learning_rate": 4.96485536631522e-05, "loss": 0.6196, "step": 10925 }, { "epoch": 0.3191194289134465, "grad_norm": 0.5770083637464098, "learning_rate": 4.964585022979184e-05, "loss": 0.62, "step": 10930 }, { "epoch": 0.31926541218376375, "grad_norm": 0.6280038329947151, "learning_rate": 4.9643146796431475e-05, "loss": 0.6343, "step": 10935 }, { "epoch": 0.319411395454081, "grad_norm": 0.5579803441224014, "learning_rate": 4.96404433630711e-05, "loss": 0.6075, "step": 10940 }, { "epoch": 0.3195573787243982, "grad_norm": 0.6697491862640177, "learning_rate": 4.9637739929710736e-05, "loss": 0.6235, "step": 10945 }, { "epoch": 0.3197033619947154, "grad_norm": 0.6695359316614582, "learning_rate": 4.963503649635037e-05, "loss": 0.6638, "step": 10950 }, { "epoch": 0.3198493452650326, "grad_norm": 0.5539298829209712, "learning_rate": 4.963233306299e-05, "loss": 0.6052, "step": 10955 }, { "epoch": 0.31999532853534984, "grad_norm": 0.7178672571573815, "learning_rate": 4.962962962962963e-05, "loss": 0.6453, "step": 10960 }, { "epoch": 0.3201413118056671, "grad_norm": 0.648743218742246, "learning_rate": 4.9626926196269266e-05, "loss": 0.6875, "step": 10965 }, { "epoch": 0.3202872950759843, "grad_norm": 0.6027601874107119, "learning_rate": 4.962422276290889e-05, "loss": 0.6426, "step": 10970 }, { "epoch": 0.32043327834630153, "grad_norm": 0.6902679383772732, "learning_rate": 4.962151932954853e-05, "loss": 0.6067, "step": 10975 }, { "epoch": 0.32057926161661876, "grad_norm": 0.6493409401818929, "learning_rate": 4.961881589618816e-05, "loss": 0.6236, "step": 10980 }, { "epoch": 0.32072524488693593, "grad_norm": 0.6421593594092969, "learning_rate": 4.9616112462827795e-05, "loss": 0.6561, "step": 10985 }, { "epoch": 0.32087122815725316, "grad_norm": 0.6544724976436537, "learning_rate": 4.961340902946743e-05, "loss": 0.6379, "step": 10990 }, { "epoch": 0.3210172114275704, "grad_norm": 0.6033669100967988, "learning_rate": 4.961070559610706e-05, "loss": 0.602, "step": 10995 }, { "epoch": 0.3211631946978876, "grad_norm": 0.5894709434139221, "learning_rate": 4.960800216274669e-05, "loss": 0.6174, "step": 11000 }, { "epoch": 0.32130917796820485, "grad_norm": 0.6250798893232851, "learning_rate": 4.9605298729386324e-05, "loss": 0.6423, "step": 11005 }, { "epoch": 0.3214551612385221, "grad_norm": 0.5863343079836169, "learning_rate": 4.960259529602596e-05, "loss": 0.6152, "step": 11010 }, { "epoch": 0.3216011445088393, "grad_norm": 0.5785190610919887, "learning_rate": 4.9599891862665585e-05, "loss": 0.639, "step": 11015 }, { "epoch": 0.3217471277791565, "grad_norm": 0.6536134354357006, "learning_rate": 4.959718842930522e-05, "loss": 0.6808, "step": 11020 }, { "epoch": 0.3218931110494737, "grad_norm": 0.6121921979568106, "learning_rate": 4.959448499594485e-05, "loss": 0.6142, "step": 11025 }, { "epoch": 0.32203909431979094, "grad_norm": 0.6513361981187136, "learning_rate": 4.959178156258448e-05, "loss": 0.6557, "step": 11030 }, { "epoch": 0.32218507759010817, "grad_norm": 0.612933934529206, "learning_rate": 4.9589078129224115e-05, "loss": 0.627, "step": 11035 }, { "epoch": 0.3223310608604254, "grad_norm": 0.6870650104391748, "learning_rate": 4.9586374695863755e-05, "loss": 0.6162, "step": 11040 }, { "epoch": 0.32247704413074263, "grad_norm": 0.6412167265207089, "learning_rate": 4.958367126250338e-05, "loss": 0.6839, "step": 11045 }, { "epoch": 0.32262302740105986, "grad_norm": 0.6483890853670763, "learning_rate": 4.958096782914302e-05, "loss": 0.6431, "step": 11050 }, { "epoch": 0.3227690106713771, "grad_norm": 0.6404391545989658, "learning_rate": 4.957826439578265e-05, "loss": 0.6221, "step": 11055 }, { "epoch": 0.32291499394169426, "grad_norm": 0.5683395565033021, "learning_rate": 4.957556096242228e-05, "loss": 0.599, "step": 11060 }, { "epoch": 0.3230609772120115, "grad_norm": 0.6989618845447856, "learning_rate": 4.957285752906191e-05, "loss": 0.694, "step": 11065 }, { "epoch": 0.3232069604823287, "grad_norm": 0.6562795794799255, "learning_rate": 4.9570154095701546e-05, "loss": 0.6747, "step": 11070 }, { "epoch": 0.32335294375264595, "grad_norm": 0.6977195687573492, "learning_rate": 4.956745066234117e-05, "loss": 0.6569, "step": 11075 }, { "epoch": 0.3234989270229632, "grad_norm": 1.8132272366051825, "learning_rate": 4.956474722898081e-05, "loss": 0.6883, "step": 11080 }, { "epoch": 0.3236449102932804, "grad_norm": 108.50206825964867, "learning_rate": 4.956204379562044e-05, "loss": 1.1828, "step": 11085 }, { "epoch": 0.32379089356359764, "grad_norm": 16.19679720143412, "learning_rate": 4.955934036226007e-05, "loss": 0.9045, "step": 11090 }, { "epoch": 0.3239368768339148, "grad_norm": 31.510537275372307, "learning_rate": 4.95566369288997e-05, "loss": 2.1154, "step": 11095 }, { "epoch": 0.32408286010423204, "grad_norm": 1.2131654834392875, "learning_rate": 4.9553933495539336e-05, "loss": 0.8866, "step": 11100 }, { "epoch": 0.32422884337454927, "grad_norm": 0.5988443645508721, "learning_rate": 4.955123006217897e-05, "loss": 0.6653, "step": 11105 }, { "epoch": 0.3243748266448665, "grad_norm": 0.6231782416587154, "learning_rate": 4.9548526628818604e-05, "loss": 0.6169, "step": 11110 }, { "epoch": 0.32452080991518373, "grad_norm": 0.5929019144257673, "learning_rate": 4.954582319545824e-05, "loss": 0.6303, "step": 11115 }, { "epoch": 0.32466679318550096, "grad_norm": 0.6481186016473275, "learning_rate": 4.9543119762097866e-05, "loss": 0.6128, "step": 11120 }, { "epoch": 0.3248127764558182, "grad_norm": 0.6377729130223453, "learning_rate": 4.95404163287375e-05, "loss": 0.6644, "step": 11125 }, { "epoch": 0.32495875972613536, "grad_norm": 0.6141142751571823, "learning_rate": 4.9537712895377134e-05, "loss": 0.6285, "step": 11130 }, { "epoch": 0.3251047429964526, "grad_norm": 0.6700305212200927, "learning_rate": 4.953500946201676e-05, "loss": 0.6415, "step": 11135 }, { "epoch": 0.3252507262667698, "grad_norm": 0.5938598230011898, "learning_rate": 4.9532306028656395e-05, "loss": 0.607, "step": 11140 }, { "epoch": 0.32539670953708705, "grad_norm": 0.5874504297083294, "learning_rate": 4.952960259529603e-05, "loss": 0.598, "step": 11145 }, { "epoch": 0.3255426928074043, "grad_norm": 0.6690491621643465, "learning_rate": 4.9526899161935656e-05, "loss": 0.6543, "step": 11150 }, { "epoch": 0.3256886760777215, "grad_norm": 0.6346062565182183, "learning_rate": 4.95241957285753e-05, "loss": 0.6217, "step": 11155 }, { "epoch": 0.32583465934803874, "grad_norm": 0.6932463252880805, "learning_rate": 4.9521492295214924e-05, "loss": 0.6596, "step": 11160 }, { "epoch": 0.3259806426183559, "grad_norm": 0.6347462665138209, "learning_rate": 4.951878886185456e-05, "loss": 0.6115, "step": 11165 }, { "epoch": 0.32612662588867314, "grad_norm": 0.6299554708201864, "learning_rate": 4.951608542849419e-05, "loss": 0.6358, "step": 11170 }, { "epoch": 0.32627260915899037, "grad_norm": 0.6217765786450798, "learning_rate": 4.9513381995133826e-05, "loss": 0.6138, "step": 11175 }, { "epoch": 0.3264185924293076, "grad_norm": 1.0601950132270208, "learning_rate": 4.9510678561773454e-05, "loss": 0.6417, "step": 11180 }, { "epoch": 0.3265645756996248, "grad_norm": 0.6002380151820396, "learning_rate": 4.950797512841309e-05, "loss": 0.6524, "step": 11185 }, { "epoch": 0.32671055896994206, "grad_norm": 0.6185468889307453, "learning_rate": 4.950527169505272e-05, "loss": 0.6319, "step": 11190 }, { "epoch": 0.3268565422402593, "grad_norm": 0.629543930125518, "learning_rate": 4.950256826169235e-05, "loss": 0.6403, "step": 11195 }, { "epoch": 0.3270025255105765, "grad_norm": 0.6202040541568039, "learning_rate": 4.949986482833198e-05, "loss": 0.6006, "step": 11200 }, { "epoch": 0.3271485087808937, "grad_norm": 0.6473144341394054, "learning_rate": 4.949716139497162e-05, "loss": 0.6317, "step": 11205 }, { "epoch": 0.3272944920512109, "grad_norm": 0.598592124954114, "learning_rate": 4.949445796161125e-05, "loss": 0.6205, "step": 11210 }, { "epoch": 0.32744047532152815, "grad_norm": 0.5931528466014921, "learning_rate": 4.9491754528250885e-05, "loss": 0.6374, "step": 11215 }, { "epoch": 0.3275864585918454, "grad_norm": 0.6273218194586475, "learning_rate": 4.948905109489051e-05, "loss": 0.6467, "step": 11220 }, { "epoch": 0.3277324418621626, "grad_norm": 0.5794846560249575, "learning_rate": 4.9486347661530146e-05, "loss": 0.66, "step": 11225 }, { "epoch": 0.32787842513247983, "grad_norm": 0.5525474989658035, "learning_rate": 4.948364422816978e-05, "loss": 0.6348, "step": 11230 }, { "epoch": 0.32802440840279706, "grad_norm": 1.7896167139160781, "learning_rate": 4.948094079480941e-05, "loss": 0.6515, "step": 11235 }, { "epoch": 0.32817039167311424, "grad_norm": 0.7378377551034714, "learning_rate": 4.947823736144904e-05, "loss": 0.6607, "step": 11240 }, { "epoch": 0.32831637494343147, "grad_norm": 0.7051143329078303, "learning_rate": 4.9475533928088675e-05, "loss": 0.6507, "step": 11245 }, { "epoch": 0.3284623582137487, "grad_norm": 0.5708632434240704, "learning_rate": 4.947283049472831e-05, "loss": 0.5891, "step": 11250 }, { "epoch": 0.3286083414840659, "grad_norm": 0.6397500494150411, "learning_rate": 4.9470127061367937e-05, "loss": 0.6454, "step": 11255 }, { "epoch": 0.32875432475438315, "grad_norm": 0.5584539099209225, "learning_rate": 4.946742362800757e-05, "loss": 0.6209, "step": 11260 }, { "epoch": 0.3289003080247004, "grad_norm": 0.6399184961956007, "learning_rate": 4.9464720194647205e-05, "loss": 0.6324, "step": 11265 }, { "epoch": 0.3290462912950176, "grad_norm": 0.655267494883307, "learning_rate": 4.946201676128684e-05, "loss": 0.6424, "step": 11270 }, { "epoch": 0.3291922745653348, "grad_norm": 0.5821731387678135, "learning_rate": 4.945931332792647e-05, "loss": 0.6611, "step": 11275 }, { "epoch": 0.329338257835652, "grad_norm": 0.6387997804567725, "learning_rate": 4.94566098945661e-05, "loss": 0.6403, "step": 11280 }, { "epoch": 0.32948424110596924, "grad_norm": 0.609378447867628, "learning_rate": 4.9453906461205734e-05, "loss": 0.6558, "step": 11285 }, { "epoch": 0.3296302243762865, "grad_norm": 1.0040015326845089, "learning_rate": 4.945120302784537e-05, "loss": 0.6435, "step": 11290 }, { "epoch": 0.3297762076466037, "grad_norm": 0.6270164945773589, "learning_rate": 4.9448499594484995e-05, "loss": 0.6621, "step": 11295 }, { "epoch": 0.32992219091692093, "grad_norm": 0.6050901535360759, "learning_rate": 4.944579616112463e-05, "loss": 0.6525, "step": 11300 }, { "epoch": 0.33006817418723816, "grad_norm": 0.6213460611776451, "learning_rate": 4.944309272776426e-05, "loss": 0.6228, "step": 11305 }, { "epoch": 0.3302141574575554, "grad_norm": 0.6457813700022043, "learning_rate": 4.94403892944039e-05, "loss": 0.6627, "step": 11310 }, { "epoch": 0.33036014072787256, "grad_norm": 0.675279088525518, "learning_rate": 4.9437685861043524e-05, "loss": 0.6623, "step": 11315 }, { "epoch": 0.3305061239981898, "grad_norm": 0.6120861220305293, "learning_rate": 4.943498242768316e-05, "loss": 0.616, "step": 11320 }, { "epoch": 0.330652107268507, "grad_norm": 0.6019935643892279, "learning_rate": 4.943227899432279e-05, "loss": 0.6379, "step": 11325 }, { "epoch": 0.33079809053882425, "grad_norm": 0.6117678692821586, "learning_rate": 4.9429575560962426e-05, "loss": 0.6238, "step": 11330 }, { "epoch": 0.3309440738091415, "grad_norm": 0.5992985481021628, "learning_rate": 4.942687212760206e-05, "loss": 0.6421, "step": 11335 }, { "epoch": 0.3310900570794587, "grad_norm": 0.5890040969176622, "learning_rate": 4.942416869424169e-05, "loss": 0.6297, "step": 11340 }, { "epoch": 0.33123604034977594, "grad_norm": 0.6346039343962746, "learning_rate": 4.942146526088132e-05, "loss": 0.6366, "step": 11345 }, { "epoch": 0.3313820236200931, "grad_norm": 0.6125325693473717, "learning_rate": 4.9418761827520956e-05, "loss": 0.633, "step": 11350 }, { "epoch": 0.33152800689041034, "grad_norm": 0.5598902845949485, "learning_rate": 4.941605839416058e-05, "loss": 0.6259, "step": 11355 }, { "epoch": 0.33167399016072757, "grad_norm": 0.6477357619596794, "learning_rate": 4.941335496080022e-05, "loss": 0.6943, "step": 11360 }, { "epoch": 0.3318199734310448, "grad_norm": 0.6310658578411423, "learning_rate": 4.941065152743985e-05, "loss": 0.6343, "step": 11365 }, { "epoch": 0.33196595670136203, "grad_norm": 0.5767568566839941, "learning_rate": 4.940794809407948e-05, "loss": 0.6766, "step": 11370 }, { "epoch": 0.33211193997167926, "grad_norm": 0.657029297994364, "learning_rate": 4.940524466071911e-05, "loss": 0.6433, "step": 11375 }, { "epoch": 0.3322579232419965, "grad_norm": 0.6417967806957905, "learning_rate": 4.940254122735875e-05, "loss": 0.644, "step": 11380 }, { "epoch": 0.33240390651231366, "grad_norm": 0.5929919722217627, "learning_rate": 4.939983779399838e-05, "loss": 0.6577, "step": 11385 }, { "epoch": 0.3325498897826309, "grad_norm": 0.6175781356331255, "learning_rate": 4.9397134360638014e-05, "loss": 0.6312, "step": 11390 }, { "epoch": 0.3326958730529481, "grad_norm": 0.6691915142545075, "learning_rate": 4.939443092727765e-05, "loss": 0.5924, "step": 11395 }, { "epoch": 0.33284185632326535, "grad_norm": 0.5886304115068558, "learning_rate": 4.9391727493917275e-05, "loss": 0.6461, "step": 11400 }, { "epoch": 0.3329878395935826, "grad_norm": 0.6063307939818792, "learning_rate": 4.938902406055691e-05, "loss": 0.622, "step": 11405 }, { "epoch": 0.3331338228638998, "grad_norm": 0.6516585512557511, "learning_rate": 4.9386320627196543e-05, "loss": 0.668, "step": 11410 }, { "epoch": 0.33327980613421704, "grad_norm": 0.6797835577991816, "learning_rate": 4.938361719383617e-05, "loss": 0.6393, "step": 11415 }, { "epoch": 0.33342578940453427, "grad_norm": 0.6318458417588858, "learning_rate": 4.9380913760475805e-05, "loss": 0.6382, "step": 11420 }, { "epoch": 0.33357177267485144, "grad_norm": 0.6565489067861231, "learning_rate": 4.937821032711544e-05, "loss": 0.6165, "step": 11425 }, { "epoch": 0.33371775594516867, "grad_norm": 0.5951176121599381, "learning_rate": 4.9375506893755066e-05, "loss": 0.6604, "step": 11430 }, { "epoch": 0.3338637392154859, "grad_norm": 0.5872840074609822, "learning_rate": 4.93728034603947e-05, "loss": 0.6432, "step": 11435 }, { "epoch": 0.33400972248580313, "grad_norm": 0.6293285529535705, "learning_rate": 4.937010002703434e-05, "loss": 0.6567, "step": 11440 }, { "epoch": 0.33415570575612036, "grad_norm": 0.6616553725389163, "learning_rate": 4.936739659367397e-05, "loss": 0.6184, "step": 11445 }, { "epoch": 0.3343016890264376, "grad_norm": 0.6567679505323948, "learning_rate": 4.93646931603136e-05, "loss": 0.6346, "step": 11450 }, { "epoch": 0.3344476722967548, "grad_norm": 0.6487081057036923, "learning_rate": 4.9361989726953236e-05, "loss": 0.6478, "step": 11455 }, { "epoch": 0.334593655567072, "grad_norm": 0.5631161580699238, "learning_rate": 4.935928629359286e-05, "loss": 0.6015, "step": 11460 }, { "epoch": 0.3347396388373892, "grad_norm": 0.6576777892504974, "learning_rate": 4.93565828602325e-05, "loss": 0.6461, "step": 11465 }, { "epoch": 0.33488562210770645, "grad_norm": 0.5396543961581688, "learning_rate": 4.935387942687213e-05, "loss": 0.584, "step": 11470 }, { "epoch": 0.3350316053780237, "grad_norm": 0.7076128394713522, "learning_rate": 4.935117599351176e-05, "loss": 0.6517, "step": 11475 }, { "epoch": 0.3351775886483409, "grad_norm": 0.639844314372993, "learning_rate": 4.934847256015139e-05, "loss": 0.6496, "step": 11480 }, { "epoch": 0.33532357191865814, "grad_norm": 0.6498520310515272, "learning_rate": 4.9345769126791027e-05, "loss": 0.6354, "step": 11485 }, { "epoch": 0.33546955518897537, "grad_norm": 0.5693659483856012, "learning_rate": 4.9343065693430654e-05, "loss": 0.6165, "step": 11490 }, { "epoch": 0.33561553845929254, "grad_norm": 0.5669210511275126, "learning_rate": 4.9340362260070295e-05, "loss": 0.5976, "step": 11495 }, { "epoch": 0.33576152172960977, "grad_norm": 0.601550284521559, "learning_rate": 4.933765882670993e-05, "loss": 0.6754, "step": 11500 }, { "epoch": 0.335907504999927, "grad_norm": 0.6036553847225342, "learning_rate": 4.9334955393349556e-05, "loss": 0.6299, "step": 11505 }, { "epoch": 0.3360534882702442, "grad_norm": 0.6260571693708703, "learning_rate": 4.933225195998919e-05, "loss": 0.6148, "step": 11510 }, { "epoch": 0.33619947154056146, "grad_norm": 0.6098398990319047, "learning_rate": 4.9329548526628824e-05, "loss": 0.6194, "step": 11515 }, { "epoch": 0.3363454548108787, "grad_norm": 0.6030574228658917, "learning_rate": 4.932684509326845e-05, "loss": 0.6398, "step": 11520 }, { "epoch": 0.3364914380811959, "grad_norm": 0.6123974307746181, "learning_rate": 4.9324141659908085e-05, "loss": 0.6372, "step": 11525 }, { "epoch": 0.33663742135151314, "grad_norm": 0.5894405464106908, "learning_rate": 4.932143822654772e-05, "loss": 0.6252, "step": 11530 }, { "epoch": 0.3367834046218303, "grad_norm": 0.631858343970314, "learning_rate": 4.9318734793187346e-05, "loss": 0.6198, "step": 11535 }, { "epoch": 0.33692938789214755, "grad_norm": 0.6465531899945162, "learning_rate": 4.931603135982698e-05, "loss": 0.6504, "step": 11540 }, { "epoch": 0.3370753711624648, "grad_norm": 0.5818000363413108, "learning_rate": 4.9313327926466614e-05, "loss": 0.6075, "step": 11545 }, { "epoch": 0.337221354432782, "grad_norm": 0.5954057449790303, "learning_rate": 4.931062449310625e-05, "loss": 0.5945, "step": 11550 }, { "epoch": 0.33736733770309923, "grad_norm": 0.6118102920032826, "learning_rate": 4.930792105974588e-05, "loss": 0.6261, "step": 11555 }, { "epoch": 0.33751332097341646, "grad_norm": 0.6581980531372345, "learning_rate": 4.9305217626385516e-05, "loss": 0.6566, "step": 11560 }, { "epoch": 0.3376593042437337, "grad_norm": 0.7162156994900535, "learning_rate": 4.9302514193025144e-05, "loss": 0.6195, "step": 11565 }, { "epoch": 0.33780528751405087, "grad_norm": 0.5822692938278152, "learning_rate": 4.929981075966478e-05, "loss": 0.6174, "step": 11570 }, { "epoch": 0.3379512707843681, "grad_norm": 0.7177167600978944, "learning_rate": 4.929710732630441e-05, "loss": 0.6409, "step": 11575 }, { "epoch": 0.3380972540546853, "grad_norm": 0.5982742578844874, "learning_rate": 4.929440389294404e-05, "loss": 0.6163, "step": 11580 }, { "epoch": 0.33824323732500255, "grad_norm": 0.6153651278755156, "learning_rate": 4.929170045958367e-05, "loss": 0.6352, "step": 11585 }, { "epoch": 0.3383892205953198, "grad_norm": 0.5883452201914625, "learning_rate": 4.928899702622331e-05, "loss": 0.645, "step": 11590 }, { "epoch": 0.338535203865637, "grad_norm": 0.5704293515477616, "learning_rate": 4.9286293592862934e-05, "loss": 0.6328, "step": 11595 }, { "epoch": 0.33868118713595424, "grad_norm": 0.6005787329045771, "learning_rate": 4.928359015950257e-05, "loss": 0.6528, "step": 11600 }, { "epoch": 0.3388271704062714, "grad_norm": 0.6592289806642916, "learning_rate": 4.92808867261422e-05, "loss": 0.6742, "step": 11605 }, { "epoch": 0.33897315367658865, "grad_norm": 0.6592716303521634, "learning_rate": 4.9278183292781836e-05, "loss": 0.6443, "step": 11610 }, { "epoch": 0.3391191369469059, "grad_norm": 0.5509924615874413, "learning_rate": 4.927547985942147e-05, "loss": 0.6103, "step": 11615 }, { "epoch": 0.3392651202172231, "grad_norm": 0.6550496129906007, "learning_rate": 4.9272776426061104e-05, "loss": 0.6655, "step": 11620 }, { "epoch": 0.33941110348754033, "grad_norm": 0.6586642980533117, "learning_rate": 4.927007299270073e-05, "loss": 0.6224, "step": 11625 }, { "epoch": 0.33955708675785756, "grad_norm": 0.5566066825224627, "learning_rate": 4.9267369559340365e-05, "loss": 0.6098, "step": 11630 }, { "epoch": 0.3397030700281748, "grad_norm": 0.641459528904216, "learning_rate": 4.926466612598e-05, "loss": 0.6365, "step": 11635 }, { "epoch": 0.33984905329849197, "grad_norm": 0.6236233292066599, "learning_rate": 4.926196269261963e-05, "loss": 0.5952, "step": 11640 }, { "epoch": 0.3399950365688092, "grad_norm": 0.6342817773518803, "learning_rate": 4.925925925925926e-05, "loss": 0.6036, "step": 11645 }, { "epoch": 0.3401410198391264, "grad_norm": 0.5731261277696961, "learning_rate": 4.9256555825898895e-05, "loss": 0.6087, "step": 11650 }, { "epoch": 0.34028700310944365, "grad_norm": 0.6272239541664084, "learning_rate": 4.925385239253852e-05, "loss": 0.6349, "step": 11655 }, { "epoch": 0.3404329863797609, "grad_norm": 0.6070537760012055, "learning_rate": 4.9251148959178156e-05, "loss": 0.6134, "step": 11660 }, { "epoch": 0.3405789696500781, "grad_norm": 0.5937770160467011, "learning_rate": 4.92484455258178e-05, "loss": 0.6128, "step": 11665 }, { "epoch": 0.34072495292039534, "grad_norm": 0.6379433452054875, "learning_rate": 4.9245742092457424e-05, "loss": 0.6675, "step": 11670 }, { "epoch": 0.34087093619071257, "grad_norm": 0.6375303918231916, "learning_rate": 4.924303865909706e-05, "loss": 0.6248, "step": 11675 }, { "epoch": 0.34101691946102974, "grad_norm": 0.6128877546763478, "learning_rate": 4.924033522573669e-05, "loss": 0.6191, "step": 11680 }, { "epoch": 0.341162902731347, "grad_norm": 0.6221510910201042, "learning_rate": 4.923763179237632e-05, "loss": 0.6252, "step": 11685 }, { "epoch": 0.3413088860016642, "grad_norm": 0.6263561410445694, "learning_rate": 4.923492835901595e-05, "loss": 0.6549, "step": 11690 }, { "epoch": 0.34145486927198143, "grad_norm": 0.6927855621443829, "learning_rate": 4.923222492565559e-05, "loss": 0.6795, "step": 11695 }, { "epoch": 0.34160085254229866, "grad_norm": 0.5939517207688467, "learning_rate": 4.9229521492295215e-05, "loss": 0.6199, "step": 11700 }, { "epoch": 0.3417468358126159, "grad_norm": 0.5980138047958881, "learning_rate": 4.922681805893485e-05, "loss": 0.6785, "step": 11705 }, { "epoch": 0.3418928190829331, "grad_norm": 0.6172631000317944, "learning_rate": 4.922411462557448e-05, "loss": 0.661, "step": 11710 }, { "epoch": 0.3420388023532503, "grad_norm": 0.7914707275429881, "learning_rate": 4.922141119221411e-05, "loss": 0.6478, "step": 11715 }, { "epoch": 0.3421847856235675, "grad_norm": 0.6016107463108724, "learning_rate": 4.921870775885375e-05, "loss": 0.6919, "step": 11720 }, { "epoch": 0.34233076889388475, "grad_norm": 0.6201011497434932, "learning_rate": 4.921600432549338e-05, "loss": 0.652, "step": 11725 }, { "epoch": 0.342476752164202, "grad_norm": 0.6920191843332792, "learning_rate": 4.921330089213301e-05, "loss": 0.6478, "step": 11730 }, { "epoch": 0.3426227354345192, "grad_norm": 0.6991803527142565, "learning_rate": 4.9210597458772646e-05, "loss": 0.6029, "step": 11735 }, { "epoch": 0.34276871870483644, "grad_norm": 0.5852135042279726, "learning_rate": 4.920789402541228e-05, "loss": 0.6266, "step": 11740 }, { "epoch": 0.34291470197515367, "grad_norm": 1.1506750553132823, "learning_rate": 4.920519059205191e-05, "loss": 0.6518, "step": 11745 }, { "epoch": 0.34306068524547084, "grad_norm": 0.6470026464917955, "learning_rate": 4.920248715869154e-05, "loss": 0.6426, "step": 11750 }, { "epoch": 0.34320666851578807, "grad_norm": 0.638609412261499, "learning_rate": 4.9199783725331175e-05, "loss": 0.6573, "step": 11755 }, { "epoch": 0.3433526517861053, "grad_norm": 0.8230477575809569, "learning_rate": 4.91970802919708e-05, "loss": 0.6871, "step": 11760 }, { "epoch": 0.34349863505642253, "grad_norm": 0.691173551906303, "learning_rate": 4.9194376858610436e-05, "loss": 0.6472, "step": 11765 }, { "epoch": 0.34364461832673976, "grad_norm": 0.576050452472675, "learning_rate": 4.919167342525007e-05, "loss": 0.6386, "step": 11770 }, { "epoch": 0.343790601597057, "grad_norm": 0.6249230250215989, "learning_rate": 4.91889699918897e-05, "loss": 0.6415, "step": 11775 }, { "epoch": 0.3439365848673742, "grad_norm": 0.6231059558253434, "learning_rate": 4.918626655852934e-05, "loss": 0.6104, "step": 11780 }, { "epoch": 0.34408256813769145, "grad_norm": 0.6325956742677107, "learning_rate": 4.9183563125168966e-05, "loss": 0.6366, "step": 11785 }, { "epoch": 0.3442285514080086, "grad_norm": 0.6364180202425738, "learning_rate": 4.91808596918086e-05, "loss": 0.6359, "step": 11790 }, { "epoch": 0.34437453467832585, "grad_norm": 0.6163328180967562, "learning_rate": 4.9178156258448234e-05, "loss": 0.6154, "step": 11795 }, { "epoch": 0.3445205179486431, "grad_norm": 0.623667420468545, "learning_rate": 4.917545282508787e-05, "loss": 0.6271, "step": 11800 }, { "epoch": 0.3446665012189603, "grad_norm": 0.6586772992828728, "learning_rate": 4.9172749391727495e-05, "loss": 0.7119, "step": 11805 }, { "epoch": 0.34481248448927754, "grad_norm": 0.6968662510696583, "learning_rate": 4.917004595836713e-05, "loss": 0.6338, "step": 11810 }, { "epoch": 0.34495846775959477, "grad_norm": 0.654378885034374, "learning_rate": 4.916734252500676e-05, "loss": 0.6561, "step": 11815 }, { "epoch": 0.345104451029912, "grad_norm": 0.5557520017077167, "learning_rate": 4.916463909164639e-05, "loss": 0.6071, "step": 11820 }, { "epoch": 0.34525043430022917, "grad_norm": 0.6081369584963007, "learning_rate": 4.9161935658286024e-05, "loss": 0.6336, "step": 11825 }, { "epoch": 0.3453964175705464, "grad_norm": 0.6228433464290024, "learning_rate": 4.915923222492566e-05, "loss": 0.6767, "step": 11830 }, { "epoch": 0.34554240084086363, "grad_norm": 0.6003233661786018, "learning_rate": 4.915652879156529e-05, "loss": 0.6391, "step": 11835 }, { "epoch": 0.34568838411118086, "grad_norm": 0.7049947129238636, "learning_rate": 4.9153825358204926e-05, "loss": 0.6417, "step": 11840 }, { "epoch": 0.3458343673814981, "grad_norm": 0.6400111862744281, "learning_rate": 4.9151121924844553e-05, "loss": 0.6503, "step": 11845 }, { "epoch": 0.3459803506518153, "grad_norm": 0.6421985053141074, "learning_rate": 4.914841849148419e-05, "loss": 0.6686, "step": 11850 }, { "epoch": 0.34612633392213255, "grad_norm": 0.6301580075671566, "learning_rate": 4.914571505812382e-05, "loss": 0.6097, "step": 11855 }, { "epoch": 0.3462723171924497, "grad_norm": 0.6008424221722837, "learning_rate": 4.914301162476345e-05, "loss": 0.6093, "step": 11860 }, { "epoch": 0.34641830046276695, "grad_norm": 0.6040731603382133, "learning_rate": 4.914030819140308e-05, "loss": 0.6033, "step": 11865 }, { "epoch": 0.3465642837330842, "grad_norm": 0.6126563615804506, "learning_rate": 4.913760475804272e-05, "loss": 0.6287, "step": 11870 }, { "epoch": 0.3467102670034014, "grad_norm": 0.6620313520096649, "learning_rate": 4.913490132468235e-05, "loss": 0.5955, "step": 11875 }, { "epoch": 0.34685625027371864, "grad_norm": 0.640243182691163, "learning_rate": 4.913219789132198e-05, "loss": 0.6426, "step": 11880 }, { "epoch": 0.34700223354403587, "grad_norm": 0.5695302688325717, "learning_rate": 4.912949445796161e-05, "loss": 0.6545, "step": 11885 }, { "epoch": 0.3471482168143531, "grad_norm": 0.6234463323457335, "learning_rate": 4.9126791024601246e-05, "loss": 0.6142, "step": 11890 }, { "epoch": 0.3472942000846703, "grad_norm": 0.6365238980260993, "learning_rate": 4.912408759124088e-05, "loss": 0.6442, "step": 11895 }, { "epoch": 0.3474401833549875, "grad_norm": 0.6484429940963363, "learning_rate": 4.9121384157880514e-05, "loss": 0.6204, "step": 11900 }, { "epoch": 0.3475861666253047, "grad_norm": 0.5949446917740668, "learning_rate": 4.911868072452014e-05, "loss": 0.619, "step": 11905 }, { "epoch": 0.34773214989562196, "grad_norm": 0.7833542600408665, "learning_rate": 4.9115977291159775e-05, "loss": 0.6186, "step": 11910 }, { "epoch": 0.3478781331659392, "grad_norm": 0.6345304776681397, "learning_rate": 4.911327385779941e-05, "loss": 0.6254, "step": 11915 }, { "epoch": 0.3480241164362564, "grad_norm": 0.5551128514803334, "learning_rate": 4.9110570424439036e-05, "loss": 0.6023, "step": 11920 }, { "epoch": 0.34817009970657364, "grad_norm": 0.5884208091741251, "learning_rate": 4.910786699107867e-05, "loss": 0.6357, "step": 11925 }, { "epoch": 0.3483160829768909, "grad_norm": 0.6364307310588033, "learning_rate": 4.9105163557718304e-05, "loss": 0.6042, "step": 11930 }, { "epoch": 0.34846206624720805, "grad_norm": 0.6375974508119141, "learning_rate": 4.910246012435794e-05, "loss": 0.654, "step": 11935 }, { "epoch": 0.3486080495175253, "grad_norm": 0.5436916821576514, "learning_rate": 4.9099756690997566e-05, "loss": 0.588, "step": 11940 }, { "epoch": 0.3487540327878425, "grad_norm": 0.687728556938014, "learning_rate": 4.90970532576372e-05, "loss": 0.6472, "step": 11945 }, { "epoch": 0.34890001605815973, "grad_norm": 0.5570595516159386, "learning_rate": 4.9094349824276834e-05, "loss": 0.6063, "step": 11950 }, { "epoch": 0.34904599932847696, "grad_norm": 0.6456258063902083, "learning_rate": 4.909164639091647e-05, "loss": 0.6424, "step": 11955 }, { "epoch": 0.3491919825987942, "grad_norm": 0.6135423054022296, "learning_rate": 4.90889429575561e-05, "loss": 0.6463, "step": 11960 }, { "epoch": 0.3493379658691114, "grad_norm": 0.6315011622708381, "learning_rate": 4.908623952419573e-05, "loss": 0.5986, "step": 11965 }, { "epoch": 0.3494839491394286, "grad_norm": 0.6354394425145208, "learning_rate": 4.908353609083536e-05, "loss": 0.6207, "step": 11970 }, { "epoch": 0.3496299324097458, "grad_norm": 0.6345026582686328, "learning_rate": 4.9080832657475e-05, "loss": 0.6565, "step": 11975 }, { "epoch": 0.34977591568006305, "grad_norm": 0.6405324054402441, "learning_rate": 4.9078129224114624e-05, "loss": 0.6168, "step": 11980 }, { "epoch": 0.3499218989503803, "grad_norm": 0.6370972087836607, "learning_rate": 4.907542579075426e-05, "loss": 0.635, "step": 11985 }, { "epoch": 0.3500678822206975, "grad_norm": 0.5811555598655526, "learning_rate": 4.907272235739389e-05, "loss": 0.6093, "step": 11990 }, { "epoch": 0.35021386549101474, "grad_norm": 0.5931180385070356, "learning_rate": 4.907001892403352e-05, "loss": 0.6604, "step": 11995 }, { "epoch": 0.35035984876133197, "grad_norm": 0.5740800574990044, "learning_rate": 4.9067315490673154e-05, "loss": 0.6084, "step": 12000 }, { "epoch": 0.3505058320316492, "grad_norm": 0.6009715187265984, "learning_rate": 4.9064612057312794e-05, "loss": 0.6486, "step": 12005 }, { "epoch": 0.3506518153019664, "grad_norm": 0.6064917962541634, "learning_rate": 4.906190862395242e-05, "loss": 0.6169, "step": 12010 }, { "epoch": 0.3507977985722836, "grad_norm": 0.6326618156440175, "learning_rate": 4.9059205190592056e-05, "loss": 0.6373, "step": 12015 }, { "epoch": 0.35094378184260083, "grad_norm": 0.6020473446246061, "learning_rate": 4.905650175723169e-05, "loss": 0.6028, "step": 12020 }, { "epoch": 0.35108976511291806, "grad_norm": 0.6829224711055839, "learning_rate": 4.905379832387132e-05, "loss": 0.6601, "step": 12025 }, { "epoch": 0.3512357483832353, "grad_norm": 0.61454224299072, "learning_rate": 4.905109489051095e-05, "loss": 0.6524, "step": 12030 }, { "epoch": 0.3513817316535525, "grad_norm": 0.5592299487625007, "learning_rate": 4.9048391457150585e-05, "loss": 0.6524, "step": 12035 }, { "epoch": 0.35152771492386975, "grad_norm": 0.557806281538698, "learning_rate": 4.904568802379021e-05, "loss": 0.6233, "step": 12040 }, { "epoch": 0.3516736981941869, "grad_norm": 0.6237264335540915, "learning_rate": 4.9042984590429846e-05, "loss": 0.6634, "step": 12045 }, { "epoch": 0.35181968146450415, "grad_norm": 0.6222147612223752, "learning_rate": 4.904028115706948e-05, "loss": 0.644, "step": 12050 }, { "epoch": 0.3519656647348214, "grad_norm": 0.597500583734974, "learning_rate": 4.903757772370911e-05, "loss": 0.6108, "step": 12055 }, { "epoch": 0.3521116480051386, "grad_norm": 0.5394920327494341, "learning_rate": 4.903487429034875e-05, "loss": 0.5887, "step": 12060 }, { "epoch": 0.35225763127545584, "grad_norm": 0.5620142002020623, "learning_rate": 4.903217085698838e-05, "loss": 0.6122, "step": 12065 }, { "epoch": 0.35240361454577307, "grad_norm": 0.5603091649794568, "learning_rate": 4.902946742362801e-05, "loss": 0.6315, "step": 12070 }, { "epoch": 0.3525495978160903, "grad_norm": 0.5626618945264124, "learning_rate": 4.9026763990267643e-05, "loss": 0.5909, "step": 12075 }, { "epoch": 0.3526955810864075, "grad_norm": 0.6447137417332769, "learning_rate": 4.902406055690728e-05, "loss": 0.6359, "step": 12080 }, { "epoch": 0.3528415643567247, "grad_norm": 0.5802895791467356, "learning_rate": 4.9021357123546905e-05, "loss": 0.6224, "step": 12085 }, { "epoch": 0.35298754762704193, "grad_norm": 0.6931560658265117, "learning_rate": 4.901865369018654e-05, "loss": 0.6332, "step": 12090 }, { "epoch": 0.35313353089735916, "grad_norm": 0.6102345870652787, "learning_rate": 4.901595025682617e-05, "loss": 0.6467, "step": 12095 }, { "epoch": 0.3532795141676764, "grad_norm": 0.5972236367797237, "learning_rate": 4.90132468234658e-05, "loss": 0.6198, "step": 12100 }, { "epoch": 0.3534254974379936, "grad_norm": 0.6011810793345504, "learning_rate": 4.9010543390105434e-05, "loss": 0.6138, "step": 12105 }, { "epoch": 0.35357148070831085, "grad_norm": 0.6063848078421769, "learning_rate": 4.900783995674507e-05, "loss": 0.6601, "step": 12110 }, { "epoch": 0.353717463978628, "grad_norm": 0.6441514638953261, "learning_rate": 4.9005136523384695e-05, "loss": 0.648, "step": 12115 }, { "epoch": 0.35386344724894525, "grad_norm": 0.5475911297870001, "learning_rate": 4.9002433090024336e-05, "loss": 0.5992, "step": 12120 }, { "epoch": 0.3540094305192625, "grad_norm": 0.7499477753605966, "learning_rate": 4.899972965666397e-05, "loss": 0.6319, "step": 12125 }, { "epoch": 0.3541554137895797, "grad_norm": 0.5911317495694952, "learning_rate": 4.89970262233036e-05, "loss": 0.618, "step": 12130 }, { "epoch": 0.35430139705989694, "grad_norm": 0.6368272094853985, "learning_rate": 4.899432278994323e-05, "loss": 0.6305, "step": 12135 }, { "epoch": 0.35444738033021417, "grad_norm": 0.6688922793262446, "learning_rate": 4.8991619356582865e-05, "loss": 0.6478, "step": 12140 }, { "epoch": 0.3545933636005314, "grad_norm": 0.593399417048618, "learning_rate": 4.898891592322249e-05, "loss": 0.6296, "step": 12145 }, { "epoch": 0.3547393468708486, "grad_norm": 0.6594760278023793, "learning_rate": 4.8986212489862126e-05, "loss": 0.6274, "step": 12150 }, { "epoch": 0.3548853301411658, "grad_norm": 0.5704450477936943, "learning_rate": 4.898350905650176e-05, "loss": 0.6407, "step": 12155 }, { "epoch": 0.35503131341148303, "grad_norm": 0.6547588335784653, "learning_rate": 4.898080562314139e-05, "loss": 0.634, "step": 12160 }, { "epoch": 0.35517729668180026, "grad_norm": 0.586627282810945, "learning_rate": 4.897810218978102e-05, "loss": 0.6686, "step": 12165 }, { "epoch": 0.3553232799521175, "grad_norm": 0.6075227057382895, "learning_rate": 4.8975398756420656e-05, "loss": 0.6283, "step": 12170 }, { "epoch": 0.3554692632224347, "grad_norm": 0.6236802685079594, "learning_rate": 4.897269532306029e-05, "loss": 0.6458, "step": 12175 }, { "epoch": 0.35561524649275195, "grad_norm": 0.6533396571500417, "learning_rate": 4.8969991889699924e-05, "loss": 0.6722, "step": 12180 }, { "epoch": 0.3557612297630692, "grad_norm": 0.5747812311973532, "learning_rate": 4.896728845633956e-05, "loss": 0.6206, "step": 12185 }, { "epoch": 0.35590721303338635, "grad_norm": 0.6473090625138365, "learning_rate": 4.8964585022979185e-05, "loss": 0.6315, "step": 12190 }, { "epoch": 0.3560531963037036, "grad_norm": 0.6247251432298638, "learning_rate": 4.896188158961882e-05, "loss": 0.6032, "step": 12195 }, { "epoch": 0.3561991795740208, "grad_norm": 0.6689205218494121, "learning_rate": 4.895917815625845e-05, "loss": 0.6416, "step": 12200 }, { "epoch": 0.35634516284433804, "grad_norm": 0.6528231258383939, "learning_rate": 4.895647472289808e-05, "loss": 0.6934, "step": 12205 }, { "epoch": 0.35649114611465527, "grad_norm": 0.5645011035722701, "learning_rate": 4.8953771289537714e-05, "loss": 0.6351, "step": 12210 }, { "epoch": 0.3566371293849725, "grad_norm": 0.5894931084839063, "learning_rate": 4.895106785617735e-05, "loss": 0.6351, "step": 12215 }, { "epoch": 0.3567831126552897, "grad_norm": 0.6413856030452779, "learning_rate": 4.8948364422816976e-05, "loss": 0.6308, "step": 12220 }, { "epoch": 0.3569290959256069, "grad_norm": 0.5940973885009, "learning_rate": 4.894566098945661e-05, "loss": 0.6004, "step": 12225 }, { "epoch": 0.3570750791959241, "grad_norm": 0.5728492639592416, "learning_rate": 4.894295755609625e-05, "loss": 0.6064, "step": 12230 }, { "epoch": 0.35722106246624136, "grad_norm": 0.6865936273698121, "learning_rate": 4.894025412273588e-05, "loss": 0.6016, "step": 12235 }, { "epoch": 0.3573670457365586, "grad_norm": 0.6135935757253707, "learning_rate": 4.893755068937551e-05, "loss": 0.6667, "step": 12240 }, { "epoch": 0.3575130290068758, "grad_norm": 0.5939356303612304, "learning_rate": 4.8934847256015146e-05, "loss": 0.5929, "step": 12245 }, { "epoch": 0.35765901227719304, "grad_norm": 0.5651523797428528, "learning_rate": 4.893214382265477e-05, "loss": 0.6326, "step": 12250 }, { "epoch": 0.3578049955475103, "grad_norm": 0.5959178661337913, "learning_rate": 4.892944038929441e-05, "loss": 0.6535, "step": 12255 }, { "epoch": 0.3579509788178275, "grad_norm": 0.5433985052238499, "learning_rate": 4.892673695593404e-05, "loss": 0.6133, "step": 12260 }, { "epoch": 0.3580969620881447, "grad_norm": 0.5655724333612483, "learning_rate": 4.892403352257367e-05, "loss": 0.5899, "step": 12265 }, { "epoch": 0.3582429453584619, "grad_norm": 0.5802071746985764, "learning_rate": 4.89213300892133e-05, "loss": 0.6154, "step": 12270 }, { "epoch": 0.35838892862877914, "grad_norm": 0.7286057181486426, "learning_rate": 4.8918626655852936e-05, "loss": 0.6195, "step": 12275 }, { "epoch": 0.35853491189909636, "grad_norm": 0.628057828068437, "learning_rate": 4.891592322249256e-05, "loss": 0.6785, "step": 12280 }, { "epoch": 0.3586808951694136, "grad_norm": 0.6336867659018769, "learning_rate": 4.89132197891322e-05, "loss": 0.641, "step": 12285 }, { "epoch": 0.3588268784397308, "grad_norm": 0.6135020783247113, "learning_rate": 4.891051635577184e-05, "loss": 0.6365, "step": 12290 }, { "epoch": 0.35897286171004805, "grad_norm": 0.605352975741542, "learning_rate": 4.8907812922411465e-05, "loss": 0.6588, "step": 12295 }, { "epoch": 0.3591188449803652, "grad_norm": 0.6527141596358769, "learning_rate": 4.89051094890511e-05, "loss": 0.6502, "step": 12300 }, { "epoch": 0.35926482825068246, "grad_norm": 0.5698014551803031, "learning_rate": 4.890240605569073e-05, "loss": 0.6347, "step": 12305 }, { "epoch": 0.3594108115209997, "grad_norm": 0.5806572874653546, "learning_rate": 4.889970262233036e-05, "loss": 0.6131, "step": 12310 }, { "epoch": 0.3595567947913169, "grad_norm": 0.6176251241782283, "learning_rate": 4.8896999188969995e-05, "loss": 0.6414, "step": 12315 }, { "epoch": 0.35970277806163414, "grad_norm": 0.6718303984104831, "learning_rate": 4.889429575560963e-05, "loss": 0.642, "step": 12320 }, { "epoch": 0.3598487613319514, "grad_norm": 0.5820886100322817, "learning_rate": 4.8891592322249256e-05, "loss": 0.6771, "step": 12325 }, { "epoch": 0.3599947446022686, "grad_norm": 0.6094871408964635, "learning_rate": 4.888888888888889e-05, "loss": 0.6239, "step": 12330 }, { "epoch": 0.3601407278725858, "grad_norm": 0.5978096748179517, "learning_rate": 4.8886185455528524e-05, "loss": 0.624, "step": 12335 }, { "epoch": 0.360286711142903, "grad_norm": 0.6438314077160066, "learning_rate": 4.888348202216815e-05, "loss": 0.6567, "step": 12340 }, { "epoch": 0.36043269441322023, "grad_norm": 0.6012765016281569, "learning_rate": 4.888077858880779e-05, "loss": 0.6324, "step": 12345 }, { "epoch": 0.36057867768353746, "grad_norm": 0.5453174948117361, "learning_rate": 4.8878075155447426e-05, "loss": 0.606, "step": 12350 }, { "epoch": 0.3607246609538547, "grad_norm": 0.7231042727044589, "learning_rate": 4.887537172208705e-05, "loss": 0.6451, "step": 12355 }, { "epoch": 0.3608706442241719, "grad_norm": 0.5979183184238166, "learning_rate": 4.887266828872669e-05, "loss": 0.6163, "step": 12360 }, { "epoch": 0.36101662749448915, "grad_norm": 0.6072928268300761, "learning_rate": 4.886996485536632e-05, "loss": 0.6293, "step": 12365 }, { "epoch": 0.3611626107648064, "grad_norm": 0.640855658256175, "learning_rate": 4.886726142200595e-05, "loss": 0.6303, "step": 12370 }, { "epoch": 0.36130859403512355, "grad_norm": 0.5984751231165244, "learning_rate": 4.886455798864558e-05, "loss": 0.6535, "step": 12375 }, { "epoch": 0.3614545773054408, "grad_norm": 0.6052524166562585, "learning_rate": 4.8861854555285216e-05, "loss": 0.6413, "step": 12380 }, { "epoch": 0.361600560575758, "grad_norm": 0.6527532761273022, "learning_rate": 4.8859151121924844e-05, "loss": 0.6154, "step": 12385 }, { "epoch": 0.36174654384607524, "grad_norm": 0.6066542771511585, "learning_rate": 4.885644768856448e-05, "loss": 0.6202, "step": 12390 }, { "epoch": 0.36189252711639247, "grad_norm": 0.5957535402788712, "learning_rate": 4.885374425520411e-05, "loss": 0.6483, "step": 12395 }, { "epoch": 0.3620385103867097, "grad_norm": 0.6832610413315782, "learning_rate": 4.8851040821843746e-05, "loss": 0.6428, "step": 12400 }, { "epoch": 0.36218449365702693, "grad_norm": 0.6102538474463333, "learning_rate": 4.884833738848338e-05, "loss": 0.6264, "step": 12405 }, { "epoch": 0.3623304769273441, "grad_norm": 0.6266054487795327, "learning_rate": 4.884563395512301e-05, "loss": 0.6432, "step": 12410 }, { "epoch": 0.36247646019766133, "grad_norm": 0.6995008976607672, "learning_rate": 4.884293052176264e-05, "loss": 0.6599, "step": 12415 }, { "epoch": 0.36262244346797856, "grad_norm": 0.6020473174806048, "learning_rate": 4.8840227088402275e-05, "loss": 0.6134, "step": 12420 }, { "epoch": 0.3627684267382958, "grad_norm": 0.5549716687448318, "learning_rate": 4.883752365504191e-05, "loss": 0.5908, "step": 12425 }, { "epoch": 0.362914410008613, "grad_norm": 0.6206111905854252, "learning_rate": 4.8834820221681536e-05, "loss": 0.627, "step": 12430 }, { "epoch": 0.36306039327893025, "grad_norm": 0.5936050995360024, "learning_rate": 4.883211678832117e-05, "loss": 0.6042, "step": 12435 }, { "epoch": 0.3632063765492475, "grad_norm": 0.6222076481677011, "learning_rate": 4.8829413354960804e-05, "loss": 0.6645, "step": 12440 }, { "epoch": 0.36335235981956465, "grad_norm": 0.9247632388600519, "learning_rate": 4.882670992160043e-05, "loss": 0.6417, "step": 12445 }, { "epoch": 0.3634983430898819, "grad_norm": 0.6253249559967652, "learning_rate": 4.8824006488240065e-05, "loss": 0.6631, "step": 12450 }, { "epoch": 0.3636443263601991, "grad_norm": 0.5874124259706648, "learning_rate": 4.88213030548797e-05, "loss": 0.6047, "step": 12455 }, { "epoch": 0.36379030963051634, "grad_norm": 0.6200832408644056, "learning_rate": 4.8818599621519334e-05, "loss": 0.6146, "step": 12460 }, { "epoch": 0.36393629290083357, "grad_norm": 0.5862491861560749, "learning_rate": 4.881589618815897e-05, "loss": 0.6095, "step": 12465 }, { "epoch": 0.3640822761711508, "grad_norm": 0.5862510231184974, "learning_rate": 4.8813192754798595e-05, "loss": 0.6473, "step": 12470 }, { "epoch": 0.364228259441468, "grad_norm": 0.5838920264380311, "learning_rate": 4.881048932143823e-05, "loss": 0.6046, "step": 12475 }, { "epoch": 0.36437424271178526, "grad_norm": 0.6161019780631896, "learning_rate": 4.880778588807786e-05, "loss": 0.6326, "step": 12480 }, { "epoch": 0.36452022598210243, "grad_norm": 0.5667660611631842, "learning_rate": 4.88050824547175e-05, "loss": 0.619, "step": 12485 }, { "epoch": 0.36466620925241966, "grad_norm": 0.6620840184421316, "learning_rate": 4.8802379021357124e-05, "loss": 0.6331, "step": 12490 }, { "epoch": 0.3648121925227369, "grad_norm": 0.6326585105020984, "learning_rate": 4.879967558799676e-05, "loss": 0.6692, "step": 12495 }, { "epoch": 0.3649581757930541, "grad_norm": 0.5664551958847732, "learning_rate": 4.879697215463639e-05, "loss": 0.646, "step": 12500 }, { "epoch": 0.36510415906337135, "grad_norm": 0.7923627985428263, "learning_rate": 4.879426872127602e-05, "loss": 0.6339, "step": 12505 }, { "epoch": 0.3652501423336886, "grad_norm": 0.6131065749824284, "learning_rate": 4.879156528791565e-05, "loss": 0.6178, "step": 12510 }, { "epoch": 0.3653961256040058, "grad_norm": 0.6230250345066418, "learning_rate": 4.878886185455529e-05, "loss": 0.6349, "step": 12515 }, { "epoch": 0.365542108874323, "grad_norm": 0.5984624003689029, "learning_rate": 4.878615842119492e-05, "loss": 0.6321, "step": 12520 }, { "epoch": 0.3656880921446402, "grad_norm": 0.6001394648114572, "learning_rate": 4.8783454987834555e-05, "loss": 0.6108, "step": 12525 }, { "epoch": 0.36583407541495744, "grad_norm": 0.6592615034927443, "learning_rate": 4.878075155447418e-05, "loss": 0.6858, "step": 12530 }, { "epoch": 0.36598005868527467, "grad_norm": 0.5892312896320738, "learning_rate": 4.8778048121113817e-05, "loss": 0.6347, "step": 12535 }, { "epoch": 0.3661260419555919, "grad_norm": 0.6509998580258033, "learning_rate": 4.877534468775345e-05, "loss": 0.7067, "step": 12540 }, { "epoch": 0.3662720252259091, "grad_norm": 0.5880092173959289, "learning_rate": 4.877264125439308e-05, "loss": 0.606, "step": 12545 }, { "epoch": 0.36641800849622636, "grad_norm": 0.5891258249522935, "learning_rate": 4.876993782103271e-05, "loss": 0.611, "step": 12550 }, { "epoch": 0.36656399176654353, "grad_norm": 0.5998060064151732, "learning_rate": 4.8767234387672346e-05, "loss": 0.6093, "step": 12555 }, { "epoch": 0.36670997503686076, "grad_norm": 0.5993015396878628, "learning_rate": 4.876453095431198e-05, "loss": 0.6152, "step": 12560 }, { "epoch": 0.366855958307178, "grad_norm": 0.7516175121545295, "learning_rate": 4.876182752095161e-05, "loss": 0.6239, "step": 12565 }, { "epoch": 0.3670019415774952, "grad_norm": 0.5708026585255056, "learning_rate": 4.875912408759125e-05, "loss": 0.6365, "step": 12570 }, { "epoch": 0.36714792484781245, "grad_norm": 0.6060800424413864, "learning_rate": 4.8756420654230875e-05, "loss": 0.6203, "step": 12575 }, { "epoch": 0.3672939081181297, "grad_norm": 0.6499852825874252, "learning_rate": 4.875371722087051e-05, "loss": 0.645, "step": 12580 }, { "epoch": 0.3674398913884469, "grad_norm": 0.7083545485305899, "learning_rate": 4.875101378751014e-05, "loss": 0.6427, "step": 12585 }, { "epoch": 0.3675858746587641, "grad_norm": 0.7958203201366855, "learning_rate": 4.874831035414977e-05, "loss": 0.6376, "step": 12590 }, { "epoch": 0.3677318579290813, "grad_norm": 0.6181279941646265, "learning_rate": 4.8745606920789404e-05, "loss": 0.6574, "step": 12595 }, { "epoch": 0.36787784119939854, "grad_norm": 0.6433332905605389, "learning_rate": 4.874290348742904e-05, "loss": 0.6889, "step": 12600 }, { "epoch": 0.36802382446971577, "grad_norm": 0.6907224740539258, "learning_rate": 4.8740200054068666e-05, "loss": 0.6248, "step": 12605 }, { "epoch": 0.368169807740033, "grad_norm": 0.6703991103775566, "learning_rate": 4.87374966207083e-05, "loss": 0.6257, "step": 12610 }, { "epoch": 0.3683157910103502, "grad_norm": 0.6024771120531374, "learning_rate": 4.8734793187347934e-05, "loss": 0.633, "step": 12615 }, { "epoch": 0.36846177428066745, "grad_norm": 0.603548145119275, "learning_rate": 4.873208975398756e-05, "loss": 0.6213, "step": 12620 }, { "epoch": 0.3686077575509847, "grad_norm": 0.5882674341648864, "learning_rate": 4.8729386320627195e-05, "loss": 0.644, "step": 12625 }, { "epoch": 0.36875374082130186, "grad_norm": 0.9576273104753452, "learning_rate": 4.8726682887266836e-05, "loss": 0.6531, "step": 12630 }, { "epoch": 0.3688997240916191, "grad_norm": 0.6404241884584355, "learning_rate": 4.872397945390646e-05, "loss": 0.6358, "step": 12635 }, { "epoch": 0.3690457073619363, "grad_norm": 0.6908449958256744, "learning_rate": 4.87212760205461e-05, "loss": 0.6479, "step": 12640 }, { "epoch": 0.36919169063225354, "grad_norm": 0.8120415361430315, "learning_rate": 4.871857258718573e-05, "loss": 0.6994, "step": 12645 }, { "epoch": 0.3693376739025708, "grad_norm": 0.6016156616957473, "learning_rate": 4.871586915382536e-05, "loss": 0.622, "step": 12650 }, { "epoch": 0.369483657172888, "grad_norm": 0.6190972327123008, "learning_rate": 4.871316572046499e-05, "loss": 0.6061, "step": 12655 }, { "epoch": 0.36962964044320523, "grad_norm": 0.6744529885758876, "learning_rate": 4.8710462287104626e-05, "loss": 0.6981, "step": 12660 }, { "epoch": 0.3697756237135224, "grad_norm": 0.559604674855468, "learning_rate": 4.8707758853744253e-05, "loss": 0.594, "step": 12665 }, { "epoch": 0.36992160698383963, "grad_norm": 0.6267672220006224, "learning_rate": 4.870505542038389e-05, "loss": 0.6163, "step": 12670 }, { "epoch": 0.37006759025415686, "grad_norm": 0.6021522182241678, "learning_rate": 4.870235198702352e-05, "loss": 0.5975, "step": 12675 }, { "epoch": 0.3702135735244741, "grad_norm": 0.5771177808851099, "learning_rate": 4.869964855366315e-05, "loss": 0.6182, "step": 12680 }, { "epoch": 0.3703595567947913, "grad_norm": 0.578544192829128, "learning_rate": 4.869694512030279e-05, "loss": 0.6377, "step": 12685 }, { "epoch": 0.37050554006510855, "grad_norm": 0.6129255550368247, "learning_rate": 4.8694241686942423e-05, "loss": 0.6639, "step": 12690 }, { "epoch": 0.3706515233354258, "grad_norm": 0.5655112519200888, "learning_rate": 4.869153825358205e-05, "loss": 0.6387, "step": 12695 }, { "epoch": 0.37079750660574295, "grad_norm": 0.5877071154041199, "learning_rate": 4.8688834820221685e-05, "loss": 0.6405, "step": 12700 }, { "epoch": 0.3709434898760602, "grad_norm": 0.6112623678177275, "learning_rate": 4.868613138686132e-05, "loss": 0.6224, "step": 12705 }, { "epoch": 0.3710894731463774, "grad_norm": 0.6198160131428885, "learning_rate": 4.8683427953500946e-05, "loss": 0.6302, "step": 12710 }, { "epoch": 0.37123545641669464, "grad_norm": 0.5685348702085996, "learning_rate": 4.868072452014058e-05, "loss": 0.6677, "step": 12715 }, { "epoch": 0.37138143968701187, "grad_norm": 0.6573181805421536, "learning_rate": 4.8678021086780214e-05, "loss": 0.6523, "step": 12720 }, { "epoch": 0.3715274229573291, "grad_norm": 0.5829381586998916, "learning_rate": 4.867531765341984e-05, "loss": 0.6119, "step": 12725 }, { "epoch": 0.37167340622764633, "grad_norm": 0.5848057294728659, "learning_rate": 4.8672614220059475e-05, "loss": 0.5633, "step": 12730 }, { "epoch": 0.37181938949796356, "grad_norm": 0.5592092337960117, "learning_rate": 4.866991078669911e-05, "loss": 0.6678, "step": 12735 }, { "epoch": 0.37196537276828073, "grad_norm": 0.5949737170504324, "learning_rate": 4.866720735333874e-05, "loss": 0.6127, "step": 12740 }, { "epoch": 0.37211135603859796, "grad_norm": 0.6114328771590811, "learning_rate": 4.866450391997838e-05, "loss": 0.6543, "step": 12745 }, { "epoch": 0.3722573393089152, "grad_norm": 0.6368020533765617, "learning_rate": 4.866180048661801e-05, "loss": 0.6621, "step": 12750 }, { "epoch": 0.3724033225792324, "grad_norm": 0.5847753195251042, "learning_rate": 4.865909705325764e-05, "loss": 0.6322, "step": 12755 }, { "epoch": 0.37254930584954965, "grad_norm": 0.6444506887363148, "learning_rate": 4.865639361989727e-05, "loss": 0.5947, "step": 12760 }, { "epoch": 0.3726952891198669, "grad_norm": 0.623967871757896, "learning_rate": 4.8653690186536907e-05, "loss": 0.6298, "step": 12765 }, { "epoch": 0.3728412723901841, "grad_norm": 0.692248280306805, "learning_rate": 4.8650986753176534e-05, "loss": 0.6348, "step": 12770 }, { "epoch": 0.3729872556605013, "grad_norm": 0.5885573850808816, "learning_rate": 4.864828331981617e-05, "loss": 0.6519, "step": 12775 }, { "epoch": 0.3731332389308185, "grad_norm": 0.6024037607224555, "learning_rate": 4.86455798864558e-05, "loss": 0.6218, "step": 12780 }, { "epoch": 0.37327922220113574, "grad_norm": 0.59230673056019, "learning_rate": 4.864287645309543e-05, "loss": 0.6389, "step": 12785 }, { "epoch": 0.37342520547145297, "grad_norm": 0.6171339611615158, "learning_rate": 4.864017301973506e-05, "loss": 0.5867, "step": 12790 }, { "epoch": 0.3735711887417702, "grad_norm": 0.6556639814691108, "learning_rate": 4.8637469586374704e-05, "loss": 0.6177, "step": 12795 }, { "epoch": 0.37371717201208743, "grad_norm": 0.562935941365602, "learning_rate": 4.863476615301433e-05, "loss": 0.6587, "step": 12800 }, { "epoch": 0.37386315528240466, "grad_norm": 0.6359716834640944, "learning_rate": 4.8632062719653965e-05, "loss": 0.6171, "step": 12805 }, { "epoch": 0.37400913855272183, "grad_norm": 0.5698014659598661, "learning_rate": 4.86293592862936e-05, "loss": 0.6214, "step": 12810 }, { "epoch": 0.37415512182303906, "grad_norm": 0.5804517145564462, "learning_rate": 4.8626655852933226e-05, "loss": 0.6614, "step": 12815 }, { "epoch": 0.3743011050933563, "grad_norm": 0.5796107611628815, "learning_rate": 4.862395241957286e-05, "loss": 0.6352, "step": 12820 }, { "epoch": 0.3744470883636735, "grad_norm": 0.5580091476306775, "learning_rate": 4.8621248986212494e-05, "loss": 0.6087, "step": 12825 }, { "epoch": 0.37459307163399075, "grad_norm": 0.5859508937036844, "learning_rate": 4.861854555285212e-05, "loss": 0.6373, "step": 12830 }, { "epoch": 0.374739054904308, "grad_norm": 0.5745297170486265, "learning_rate": 4.8615842119491756e-05, "loss": 0.6038, "step": 12835 }, { "epoch": 0.3748850381746252, "grad_norm": 0.607862319063766, "learning_rate": 4.861313868613139e-05, "loss": 0.6491, "step": 12840 }, { "epoch": 0.37503102144494244, "grad_norm": 0.5634310056225832, "learning_rate": 4.861043525277102e-05, "loss": 0.6524, "step": 12845 }, { "epoch": 0.3751770047152596, "grad_norm": 0.6109212812494176, "learning_rate": 4.860773181941065e-05, "loss": 0.6187, "step": 12850 }, { "epoch": 0.37532298798557684, "grad_norm": 0.8959709005600293, "learning_rate": 4.860502838605029e-05, "loss": 0.6248, "step": 12855 }, { "epoch": 0.37546897125589407, "grad_norm": 0.6687927833165627, "learning_rate": 4.860232495268992e-05, "loss": 0.6751, "step": 12860 }, { "epoch": 0.3756149545262113, "grad_norm": 0.5636219540995314, "learning_rate": 4.859962151932955e-05, "loss": 0.607, "step": 12865 }, { "epoch": 0.3757609377965285, "grad_norm": 0.5922610629772492, "learning_rate": 4.859691808596919e-05, "loss": 0.6202, "step": 12870 }, { "epoch": 0.37590692106684576, "grad_norm": 0.5654641606204232, "learning_rate": 4.8594214652608814e-05, "loss": 0.6271, "step": 12875 }, { "epoch": 0.376052904337163, "grad_norm": 0.6279841865570892, "learning_rate": 4.859151121924845e-05, "loss": 0.6695, "step": 12880 }, { "epoch": 0.37619888760748016, "grad_norm": 0.5922855541797544, "learning_rate": 4.858880778588808e-05, "loss": 0.6179, "step": 12885 }, { "epoch": 0.3763448708777974, "grad_norm": 0.5386585259149005, "learning_rate": 4.858610435252771e-05, "loss": 0.6172, "step": 12890 }, { "epoch": 0.3764908541481146, "grad_norm": 0.7448765279816585, "learning_rate": 4.8583400919167343e-05, "loss": 0.6294, "step": 12895 }, { "epoch": 0.37663683741843185, "grad_norm": 0.5474344927068282, "learning_rate": 4.858069748580698e-05, "loss": 0.6343, "step": 12900 }, { "epoch": 0.3767828206887491, "grad_norm": 0.5841025489309908, "learning_rate": 4.8577994052446605e-05, "loss": 0.6096, "step": 12905 }, { "epoch": 0.3769288039590663, "grad_norm": 0.5523971470651894, "learning_rate": 4.8575290619086245e-05, "loss": 0.616, "step": 12910 }, { "epoch": 0.37707478722938353, "grad_norm": 0.7119947485176099, "learning_rate": 4.857258718572588e-05, "loss": 0.6766, "step": 12915 }, { "epoch": 0.3772207704997007, "grad_norm": 0.6837744114790388, "learning_rate": 4.856988375236551e-05, "loss": 0.633, "step": 12920 }, { "epoch": 0.37736675377001794, "grad_norm": 0.560238246594641, "learning_rate": 4.856718031900514e-05, "loss": 0.6167, "step": 12925 }, { "epoch": 0.37751273704033517, "grad_norm": 0.560577579786689, "learning_rate": 4.8564476885644775e-05, "loss": 0.6373, "step": 12930 }, { "epoch": 0.3776587203106524, "grad_norm": 0.5321234402399063, "learning_rate": 4.85617734522844e-05, "loss": 0.6111, "step": 12935 }, { "epoch": 0.3778047035809696, "grad_norm": 0.5850399807550558, "learning_rate": 4.8559070018924036e-05, "loss": 0.5996, "step": 12940 }, { "epoch": 0.37795068685128685, "grad_norm": 0.5713538953819866, "learning_rate": 4.855636658556367e-05, "loss": 0.6333, "step": 12945 }, { "epoch": 0.3780966701216041, "grad_norm": 0.6594214395313338, "learning_rate": 4.85536631522033e-05, "loss": 0.6577, "step": 12950 }, { "epoch": 0.3782426533919213, "grad_norm": 0.6498290080423903, "learning_rate": 4.855095971884293e-05, "loss": 0.6603, "step": 12955 }, { "epoch": 0.3783886366622385, "grad_norm": 0.6360594784577296, "learning_rate": 4.8548256285482565e-05, "loss": 0.6669, "step": 12960 }, { "epoch": 0.3785346199325557, "grad_norm": 0.5986508848063895, "learning_rate": 4.85455528521222e-05, "loss": 0.6397, "step": 12965 }, { "epoch": 0.37868060320287295, "grad_norm": 0.6106054502242548, "learning_rate": 4.854284941876183e-05, "loss": 0.6245, "step": 12970 }, { "epoch": 0.3788265864731902, "grad_norm": 0.5798069379805871, "learning_rate": 4.854014598540147e-05, "loss": 0.6385, "step": 12975 }, { "epoch": 0.3789725697435074, "grad_norm": 0.6114775000211796, "learning_rate": 4.8537442552041095e-05, "loss": 0.6295, "step": 12980 }, { "epoch": 0.37911855301382463, "grad_norm": 0.7830457498966018, "learning_rate": 4.853473911868073e-05, "loss": 0.6708, "step": 12985 }, { "epoch": 0.37926453628414186, "grad_norm": 0.5348395350586562, "learning_rate": 4.853203568532036e-05, "loss": 0.6489, "step": 12990 }, { "epoch": 0.37941051955445904, "grad_norm": 0.6054712108701257, "learning_rate": 4.852933225195999e-05, "loss": 0.634, "step": 12995 }, { "epoch": 0.37955650282477627, "grad_norm": 0.6185589449399796, "learning_rate": 4.8526628818599624e-05, "loss": 0.6459, "step": 13000 }, { "epoch": 0.3797024860950935, "grad_norm": 0.8094441219275489, "learning_rate": 4.852392538523926e-05, "loss": 0.6374, "step": 13005 }, { "epoch": 0.3798484693654107, "grad_norm": 0.5584121743204362, "learning_rate": 4.8521221951878885e-05, "loss": 0.6258, "step": 13010 }, { "epoch": 0.37999445263572795, "grad_norm": 0.5993423486048521, "learning_rate": 4.851851851851852e-05, "loss": 0.6393, "step": 13015 }, { "epoch": 0.3801404359060452, "grad_norm": 0.595791194444469, "learning_rate": 4.851581508515815e-05, "loss": 0.6342, "step": 13020 }, { "epoch": 0.3802864191763624, "grad_norm": 0.5781712339725433, "learning_rate": 4.851311165179779e-05, "loss": 0.6551, "step": 13025 }, { "epoch": 0.3804324024466796, "grad_norm": 0.5603819621569373, "learning_rate": 4.851040821843742e-05, "loss": 0.6303, "step": 13030 }, { "epoch": 0.3805783857169968, "grad_norm": 0.6383117908168893, "learning_rate": 4.850770478507705e-05, "loss": 0.6346, "step": 13035 }, { "epoch": 0.38072436898731404, "grad_norm": 0.5809791628853015, "learning_rate": 4.850500135171668e-05, "loss": 0.6129, "step": 13040 }, { "epoch": 0.3808703522576313, "grad_norm": 0.7748092147868735, "learning_rate": 4.8502297918356316e-05, "loss": 0.6788, "step": 13045 }, { "epoch": 0.3810163355279485, "grad_norm": 0.6029634496492015, "learning_rate": 4.849959448499595e-05, "loss": 0.6334, "step": 13050 }, { "epoch": 0.38116231879826573, "grad_norm": 0.5871445391087797, "learning_rate": 4.849689105163558e-05, "loss": 0.6256, "step": 13055 }, { "epoch": 0.38130830206858296, "grad_norm": 0.6271968071817166, "learning_rate": 4.849418761827521e-05, "loss": 0.6333, "step": 13060 }, { "epoch": 0.3814542853389002, "grad_norm": 0.6044808704024659, "learning_rate": 4.8491484184914846e-05, "loss": 0.633, "step": 13065 }, { "epoch": 0.38160026860921736, "grad_norm": 0.5842351822355413, "learning_rate": 4.848878075155447e-05, "loss": 0.6233, "step": 13070 }, { "epoch": 0.3817462518795346, "grad_norm": 0.5525687681863918, "learning_rate": 4.848607731819411e-05, "loss": 0.6063, "step": 13075 }, { "epoch": 0.3818922351498518, "grad_norm": 0.6402579751889071, "learning_rate": 4.848337388483374e-05, "loss": 0.6487, "step": 13080 }, { "epoch": 0.38203821842016905, "grad_norm": 0.5886285187122324, "learning_rate": 4.8480670451473375e-05, "loss": 0.5752, "step": 13085 }, { "epoch": 0.3821842016904863, "grad_norm": 0.5720173206625389, "learning_rate": 4.847796701811301e-05, "loss": 0.6416, "step": 13090 }, { "epoch": 0.3823301849608035, "grad_norm": 0.5869222765162784, "learning_rate": 4.8475263584752636e-05, "loss": 0.6661, "step": 13095 }, { "epoch": 0.38247616823112074, "grad_norm": 0.5627254788902222, "learning_rate": 4.847256015139227e-05, "loss": 0.5975, "step": 13100 }, { "epoch": 0.3826221515014379, "grad_norm": 0.5735546354930915, "learning_rate": 4.8469856718031904e-05, "loss": 0.6057, "step": 13105 }, { "epoch": 0.38276813477175514, "grad_norm": 0.5320633339879983, "learning_rate": 4.846715328467154e-05, "loss": 0.6124, "step": 13110 }, { "epoch": 0.38291411804207237, "grad_norm": 0.6698030500894326, "learning_rate": 4.8464449851311165e-05, "loss": 0.6245, "step": 13115 }, { "epoch": 0.3830601013123896, "grad_norm": 0.5804093925311304, "learning_rate": 4.84617464179508e-05, "loss": 0.5975, "step": 13120 }, { "epoch": 0.38320608458270683, "grad_norm": 0.6285127927032222, "learning_rate": 4.8459042984590433e-05, "loss": 0.6584, "step": 13125 }, { "epoch": 0.38335206785302406, "grad_norm": 0.609864810145721, "learning_rate": 4.845633955123006e-05, "loss": 0.6403, "step": 13130 }, { "epoch": 0.3834980511233413, "grad_norm": 0.629663136478776, "learning_rate": 4.84536361178697e-05, "loss": 0.606, "step": 13135 }, { "epoch": 0.38364403439365846, "grad_norm": 0.611520565063972, "learning_rate": 4.845093268450933e-05, "loss": 0.6164, "step": 13140 }, { "epoch": 0.3837900176639757, "grad_norm": 0.5995729577163473, "learning_rate": 4.844822925114896e-05, "loss": 0.6158, "step": 13145 }, { "epoch": 0.3839360009342929, "grad_norm": 0.5652798265963269, "learning_rate": 4.84455258177886e-05, "loss": 0.6039, "step": 13150 }, { "epoch": 0.38408198420461015, "grad_norm": 0.556100342395009, "learning_rate": 4.8442822384428224e-05, "loss": 0.6391, "step": 13155 }, { "epoch": 0.3842279674749274, "grad_norm": 0.5886913927367371, "learning_rate": 4.844011895106786e-05, "loss": 0.6128, "step": 13160 }, { "epoch": 0.3843739507452446, "grad_norm": 0.6051836725723101, "learning_rate": 4.843741551770749e-05, "loss": 0.6471, "step": 13165 }, { "epoch": 0.38451993401556184, "grad_norm": 0.6186620574533692, "learning_rate": 4.843471208434712e-05, "loss": 0.6523, "step": 13170 }, { "epoch": 0.384665917285879, "grad_norm": 0.5711871552841481, "learning_rate": 4.843200865098675e-05, "loss": 0.5995, "step": 13175 }, { "epoch": 0.38481190055619624, "grad_norm": 0.5809753586768955, "learning_rate": 4.842930521762639e-05, "loss": 0.5883, "step": 13180 }, { "epoch": 0.38495788382651347, "grad_norm": 0.5777550504603944, "learning_rate": 4.842660178426602e-05, "loss": 0.6065, "step": 13185 }, { "epoch": 0.3851038670968307, "grad_norm": 0.5463737662721782, "learning_rate": 4.842389835090565e-05, "loss": 0.606, "step": 13190 }, { "epoch": 0.38524985036714793, "grad_norm": 0.5451049600745076, "learning_rate": 4.842119491754529e-05, "loss": 0.6437, "step": 13195 }, { "epoch": 0.38539583363746516, "grad_norm": 0.5817144481035311, "learning_rate": 4.8418491484184916e-05, "loss": 0.6323, "step": 13200 }, { "epoch": 0.3855418169077824, "grad_norm": 0.5897451339234079, "learning_rate": 4.841578805082455e-05, "loss": 0.6276, "step": 13205 }, { "epoch": 0.3856878001780996, "grad_norm": 0.5874675747116752, "learning_rate": 4.8413084617464184e-05, "loss": 0.5817, "step": 13210 }, { "epoch": 0.3858337834484168, "grad_norm": 0.5701180327321237, "learning_rate": 4.841038118410381e-05, "loss": 0.6667, "step": 13215 }, { "epoch": 0.385979766718734, "grad_norm": 0.6016508163591237, "learning_rate": 4.8407677750743446e-05, "loss": 0.6228, "step": 13220 }, { "epoch": 0.38612574998905125, "grad_norm": 0.5546009859941242, "learning_rate": 4.840497431738308e-05, "loss": 0.6238, "step": 13225 }, { "epoch": 0.3862717332593685, "grad_norm": 0.5522028145964721, "learning_rate": 4.840227088402271e-05, "loss": 0.6182, "step": 13230 }, { "epoch": 0.3864177165296857, "grad_norm": 0.5787483746402474, "learning_rate": 4.839956745066234e-05, "loss": 0.6199, "step": 13235 }, { "epoch": 0.38656369980000294, "grad_norm": 0.6178077616857798, "learning_rate": 4.8396864017301975e-05, "loss": 0.5959, "step": 13240 }, { "epoch": 0.38670968307032016, "grad_norm": 0.5954626727396192, "learning_rate": 4.839416058394161e-05, "loss": 0.6409, "step": 13245 }, { "epoch": 0.38685566634063734, "grad_norm": 0.557884968123409, "learning_rate": 4.839145715058124e-05, "loss": 0.6351, "step": 13250 }, { "epoch": 0.38700164961095457, "grad_norm": 0.569327260677646, "learning_rate": 4.838875371722088e-05, "loss": 0.62, "step": 13255 }, { "epoch": 0.3871476328812718, "grad_norm": 0.6552305954590455, "learning_rate": 4.8386050283860504e-05, "loss": 0.6453, "step": 13260 }, { "epoch": 0.387293616151589, "grad_norm": 0.6036462200351763, "learning_rate": 4.838334685050014e-05, "loss": 0.6353, "step": 13265 }, { "epoch": 0.38743959942190626, "grad_norm": 0.6017992385145876, "learning_rate": 4.838064341713977e-05, "loss": 0.601, "step": 13270 }, { "epoch": 0.3875855826922235, "grad_norm": 0.6373035874013446, "learning_rate": 4.83779399837794e-05, "loss": 0.6311, "step": 13275 }, { "epoch": 0.3877315659625407, "grad_norm": 0.6839948530572061, "learning_rate": 4.8375236550419034e-05, "loss": 0.6416, "step": 13280 }, { "epoch": 0.3878775492328579, "grad_norm": 0.5770893803108389, "learning_rate": 4.837253311705867e-05, "loss": 0.645, "step": 13285 }, { "epoch": 0.3880235325031751, "grad_norm": 0.5137913745982523, "learning_rate": 4.8369829683698295e-05, "loss": 0.5851, "step": 13290 }, { "epoch": 0.38816951577349235, "grad_norm": 0.6342779150525764, "learning_rate": 4.836712625033793e-05, "loss": 0.6606, "step": 13295 }, { "epoch": 0.3883154990438096, "grad_norm": 0.6234830940456734, "learning_rate": 4.836442281697756e-05, "loss": 0.6087, "step": 13300 }, { "epoch": 0.3884614823141268, "grad_norm": 0.5413672674584493, "learning_rate": 4.83617193836172e-05, "loss": 0.5695, "step": 13305 }, { "epoch": 0.38860746558444403, "grad_norm": 0.5978935268553068, "learning_rate": 4.835901595025683e-05, "loss": 0.635, "step": 13310 }, { "epoch": 0.38875344885476126, "grad_norm": 0.5950437817157057, "learning_rate": 4.8356312516896465e-05, "loss": 0.6266, "step": 13315 }, { "epoch": 0.3888994321250785, "grad_norm": 0.5954374674043204, "learning_rate": 4.835360908353609e-05, "loss": 0.6448, "step": 13320 }, { "epoch": 0.38904541539539567, "grad_norm": 0.636349145329153, "learning_rate": 4.8350905650175726e-05, "loss": 0.6453, "step": 13325 }, { "epoch": 0.3891913986657129, "grad_norm": 0.6480030342207099, "learning_rate": 4.834820221681536e-05, "loss": 0.6211, "step": 13330 }, { "epoch": 0.3893373819360301, "grad_norm": 0.5562679580791031, "learning_rate": 4.834549878345499e-05, "loss": 0.6323, "step": 13335 }, { "epoch": 0.38948336520634735, "grad_norm": 0.617172843406433, "learning_rate": 4.834279535009462e-05, "loss": 0.6762, "step": 13340 }, { "epoch": 0.3896293484766646, "grad_norm": 0.561698773707272, "learning_rate": 4.8340091916734255e-05, "loss": 0.613, "step": 13345 }, { "epoch": 0.3897753317469818, "grad_norm": 0.5690611482119903, "learning_rate": 4.833738848337388e-05, "loss": 0.6166, "step": 13350 }, { "epoch": 0.38992131501729904, "grad_norm": 0.5876142803013586, "learning_rate": 4.8334685050013517e-05, "loss": 0.6032, "step": 13355 }, { "epoch": 0.3900672982876162, "grad_norm": 0.5864582491303391, "learning_rate": 4.833198161665315e-05, "loss": 0.6047, "step": 13360 }, { "epoch": 0.39021328155793344, "grad_norm": 0.6198214794785935, "learning_rate": 4.8329278183292785e-05, "loss": 0.6385, "step": 13365 }, { "epoch": 0.3903592648282507, "grad_norm": 0.6188568797791153, "learning_rate": 4.832657474993242e-05, "loss": 0.6184, "step": 13370 }, { "epoch": 0.3905052480985679, "grad_norm": 0.5856215429330996, "learning_rate": 4.832387131657205e-05, "loss": 0.6411, "step": 13375 }, { "epoch": 0.39065123136888513, "grad_norm": 0.6100169853569555, "learning_rate": 4.832116788321168e-05, "loss": 0.6178, "step": 13380 }, { "epoch": 0.39079721463920236, "grad_norm": 0.602018836614156, "learning_rate": 4.8318464449851314e-05, "loss": 0.6487, "step": 13385 }, { "epoch": 0.3909431979095196, "grad_norm": 0.5699469757619349, "learning_rate": 4.831576101649095e-05, "loss": 0.6622, "step": 13390 }, { "epoch": 0.39108918117983676, "grad_norm": 0.5985680559739165, "learning_rate": 4.8313057583130575e-05, "loss": 0.6224, "step": 13395 }, { "epoch": 0.391235164450154, "grad_norm": 0.5982365645848827, "learning_rate": 4.831035414977021e-05, "loss": 0.5849, "step": 13400 }, { "epoch": 0.3913811477204712, "grad_norm": 0.6204589683889659, "learning_rate": 4.830765071640984e-05, "loss": 0.6406, "step": 13405 }, { "epoch": 0.39152713099078845, "grad_norm": 0.6574772672711884, "learning_rate": 4.830494728304947e-05, "loss": 0.6129, "step": 13410 }, { "epoch": 0.3916731142611057, "grad_norm": 0.5035523854970092, "learning_rate": 4.8302243849689104e-05, "loss": 0.6239, "step": 13415 }, { "epoch": 0.3918190975314229, "grad_norm": 0.6896538785022752, "learning_rate": 4.8299540416328745e-05, "loss": 0.6514, "step": 13420 }, { "epoch": 0.39196508080174014, "grad_norm": 0.5888988629451054, "learning_rate": 4.829683698296837e-05, "loss": 0.6186, "step": 13425 }, { "epoch": 0.39211106407205737, "grad_norm": 0.5486854119021275, "learning_rate": 4.8294133549608006e-05, "loss": 0.5967, "step": 13430 }, { "epoch": 0.39225704734237454, "grad_norm": 0.5339958385661482, "learning_rate": 4.829143011624764e-05, "loss": 0.6325, "step": 13435 }, { "epoch": 0.39240303061269177, "grad_norm": 0.6357704204075441, "learning_rate": 4.828872668288727e-05, "loss": 0.6564, "step": 13440 }, { "epoch": 0.392549013883009, "grad_norm": 0.555658973598852, "learning_rate": 4.82860232495269e-05, "loss": 0.6119, "step": 13445 }, { "epoch": 0.39269499715332623, "grad_norm": 0.5052074776157773, "learning_rate": 4.8283319816166536e-05, "loss": 0.6153, "step": 13450 }, { "epoch": 0.39284098042364346, "grad_norm": 0.6347240046648459, "learning_rate": 4.828061638280616e-05, "loss": 0.6345, "step": 13455 }, { "epoch": 0.3929869636939607, "grad_norm": 0.608673162609016, "learning_rate": 4.82779129494458e-05, "loss": 0.5916, "step": 13460 }, { "epoch": 0.3931329469642779, "grad_norm": 0.5570460045071075, "learning_rate": 4.827520951608543e-05, "loss": 0.6231, "step": 13465 }, { "epoch": 0.3932789302345951, "grad_norm": 0.5411347914345332, "learning_rate": 4.827250608272506e-05, "loss": 0.6257, "step": 13470 }, { "epoch": 0.3934249135049123, "grad_norm": 0.6490964122417958, "learning_rate": 4.82698026493647e-05, "loss": 0.6453, "step": 13475 }, { "epoch": 0.39357089677522955, "grad_norm": 0.6171107479448474, "learning_rate": 4.826709921600433e-05, "loss": 0.6326, "step": 13480 }, { "epoch": 0.3937168800455468, "grad_norm": 0.549334057473608, "learning_rate": 4.826439578264396e-05, "loss": 0.6066, "step": 13485 }, { "epoch": 0.393862863315864, "grad_norm": 0.5804051825732701, "learning_rate": 4.8261692349283594e-05, "loss": 0.6435, "step": 13490 }, { "epoch": 0.39400884658618124, "grad_norm": 0.5503691137300669, "learning_rate": 4.825898891592323e-05, "loss": 0.6021, "step": 13495 }, { "epoch": 0.39415482985649847, "grad_norm": 0.6529272669794585, "learning_rate": 4.8256285482562856e-05, "loss": 0.6178, "step": 13500 }, { "epoch": 0.39430081312681564, "grad_norm": 0.6638808222908785, "learning_rate": 4.825358204920249e-05, "loss": 0.639, "step": 13505 }, { "epoch": 0.39444679639713287, "grad_norm": 0.6311458090296774, "learning_rate": 4.8250878615842124e-05, "loss": 0.5933, "step": 13510 }, { "epoch": 0.3945927796674501, "grad_norm": 0.5754368142588844, "learning_rate": 4.824817518248175e-05, "loss": 0.6175, "step": 13515 }, { "epoch": 0.39473876293776733, "grad_norm": 0.586380654848983, "learning_rate": 4.8245471749121385e-05, "loss": 0.6611, "step": 13520 }, { "epoch": 0.39488474620808456, "grad_norm": 0.5433305789867815, "learning_rate": 4.824276831576102e-05, "loss": 0.6089, "step": 13525 }, { "epoch": 0.3950307294784018, "grad_norm": 0.6038279002461314, "learning_rate": 4.8240064882400646e-05, "loss": 0.6124, "step": 13530 }, { "epoch": 0.395176712748719, "grad_norm": 0.5202522707921962, "learning_rate": 4.823736144904029e-05, "loss": 0.5995, "step": 13535 }, { "epoch": 0.39532269601903625, "grad_norm": 0.572624013512938, "learning_rate": 4.823465801567992e-05, "loss": 0.6306, "step": 13540 }, { "epoch": 0.3954686792893534, "grad_norm": 0.60999659333733, "learning_rate": 4.823195458231955e-05, "loss": 0.6284, "step": 13545 }, { "epoch": 0.39561466255967065, "grad_norm": 0.5267393186280748, "learning_rate": 4.822925114895918e-05, "loss": 0.6108, "step": 13550 }, { "epoch": 0.3957606458299879, "grad_norm": 0.592592354964922, "learning_rate": 4.8226547715598816e-05, "loss": 0.6105, "step": 13555 }, { "epoch": 0.3959066291003051, "grad_norm": 0.5588945216035991, "learning_rate": 4.822384428223844e-05, "loss": 0.5869, "step": 13560 }, { "epoch": 0.39605261237062234, "grad_norm": 0.5672988878658998, "learning_rate": 4.822114084887808e-05, "loss": 0.6106, "step": 13565 }, { "epoch": 0.39619859564093957, "grad_norm": 0.5850633985529624, "learning_rate": 4.821843741551771e-05, "loss": 0.6259, "step": 13570 }, { "epoch": 0.3963445789112568, "grad_norm": 0.5818849365486771, "learning_rate": 4.821573398215734e-05, "loss": 0.6472, "step": 13575 }, { "epoch": 0.39649056218157397, "grad_norm": 0.5895216383506031, "learning_rate": 4.821303054879697e-05, "loss": 0.6242, "step": 13580 }, { "epoch": 0.3966365454518912, "grad_norm": 0.6484447229400049, "learning_rate": 4.8210327115436607e-05, "loss": 0.6312, "step": 13585 }, { "epoch": 0.3967825287222084, "grad_norm": 0.5354978776402249, "learning_rate": 4.820762368207624e-05, "loss": 0.6171, "step": 13590 }, { "epoch": 0.39692851199252566, "grad_norm": 0.5918177804284143, "learning_rate": 4.8204920248715875e-05, "loss": 0.6445, "step": 13595 }, { "epoch": 0.3970744952628429, "grad_norm": 0.5911045369576485, "learning_rate": 4.820221681535551e-05, "loss": 0.6108, "step": 13600 }, { "epoch": 0.3972204785331601, "grad_norm": 0.5449920194400009, "learning_rate": 4.8199513381995136e-05, "loss": 0.6154, "step": 13605 }, { "epoch": 0.39736646180347734, "grad_norm": 0.5472241075485961, "learning_rate": 4.819680994863477e-05, "loss": 0.6452, "step": 13610 }, { "epoch": 0.3975124450737945, "grad_norm": 0.6161537919372189, "learning_rate": 4.8194106515274404e-05, "loss": 0.6322, "step": 13615 }, { "epoch": 0.39765842834411175, "grad_norm": 0.539313949490527, "learning_rate": 4.819140308191403e-05, "loss": 0.6143, "step": 13620 }, { "epoch": 0.397804411614429, "grad_norm": 0.5889903338233424, "learning_rate": 4.8188699648553665e-05, "loss": 0.5947, "step": 13625 }, { "epoch": 0.3979503948847462, "grad_norm": 0.6136076195009579, "learning_rate": 4.81859962151933e-05, "loss": 0.6204, "step": 13630 }, { "epoch": 0.39809637815506344, "grad_norm": 0.5688571856541051, "learning_rate": 4.8183292781832926e-05, "loss": 0.6224, "step": 13635 }, { "epoch": 0.39824236142538066, "grad_norm": 0.5897611008210192, "learning_rate": 4.818058934847256e-05, "loss": 0.6333, "step": 13640 }, { "epoch": 0.3983883446956979, "grad_norm": 0.5888150735460673, "learning_rate": 4.8177885915112194e-05, "loss": 0.6589, "step": 13645 }, { "epoch": 0.39853432796601507, "grad_norm": 0.5497111226061524, "learning_rate": 4.817518248175183e-05, "loss": 0.6405, "step": 13650 }, { "epoch": 0.3986803112363323, "grad_norm": 0.527908581373583, "learning_rate": 4.817247904839146e-05, "loss": 0.6206, "step": 13655 }, { "epoch": 0.3988262945066495, "grad_norm": 0.5399338999614443, "learning_rate": 4.816977561503109e-05, "loss": 0.619, "step": 13660 }, { "epoch": 0.39897227777696675, "grad_norm": 0.6048041646121848, "learning_rate": 4.8167072181670724e-05, "loss": 0.656, "step": 13665 }, { "epoch": 0.399118261047284, "grad_norm": 0.595250282642277, "learning_rate": 4.816436874831036e-05, "loss": 0.6096, "step": 13670 }, { "epoch": 0.3992642443176012, "grad_norm": 0.5492264302659231, "learning_rate": 4.816166531494999e-05, "loss": 0.6311, "step": 13675 }, { "epoch": 0.39941022758791844, "grad_norm": 0.6008894891821256, "learning_rate": 4.815896188158962e-05, "loss": 0.6334, "step": 13680 }, { "epoch": 0.39955621085823567, "grad_norm": 0.567778359904405, "learning_rate": 4.815625844822925e-05, "loss": 0.6601, "step": 13685 }, { "epoch": 0.39970219412855285, "grad_norm": 0.6188811137149708, "learning_rate": 4.815355501486889e-05, "loss": 0.6242, "step": 13690 }, { "epoch": 0.3998481773988701, "grad_norm": 0.6236403020452721, "learning_rate": 4.8150851581508514e-05, "loss": 0.6093, "step": 13695 }, { "epoch": 0.3999941606691873, "grad_norm": 0.6100704824184444, "learning_rate": 4.814814814814815e-05, "loss": 0.5976, "step": 13700 }, { "epoch": 0.40014014393950453, "grad_norm": 0.5363824425063557, "learning_rate": 4.814544471478778e-05, "loss": 0.607, "step": 13705 }, { "epoch": 0.40028612720982176, "grad_norm": 0.5903032359052065, "learning_rate": 4.8142741281427416e-05, "loss": 0.6569, "step": 13710 }, { "epoch": 0.400432110480139, "grad_norm": 0.5730287258614853, "learning_rate": 4.814003784806705e-05, "loss": 0.6027, "step": 13715 }, { "epoch": 0.4005780937504562, "grad_norm": 0.5241573139327931, "learning_rate": 4.813733441470668e-05, "loss": 0.5654, "step": 13720 }, { "epoch": 0.4007240770207734, "grad_norm": 0.6082443355220395, "learning_rate": 4.813463098134631e-05, "loss": 0.6532, "step": 13725 }, { "epoch": 0.4008700602910906, "grad_norm": 0.5625180267851674, "learning_rate": 4.8131927547985945e-05, "loss": 0.64, "step": 13730 }, { "epoch": 0.40101604356140785, "grad_norm": 0.5571897860122178, "learning_rate": 4.812922411462558e-05, "loss": 0.6017, "step": 13735 }, { "epoch": 0.4011620268317251, "grad_norm": 0.529356312489634, "learning_rate": 4.812652068126521e-05, "loss": 0.6037, "step": 13740 }, { "epoch": 0.4013080101020423, "grad_norm": 0.7005621331259769, "learning_rate": 4.812381724790484e-05, "loss": 0.5969, "step": 13745 }, { "epoch": 0.40145399337235954, "grad_norm": 0.6184041888079226, "learning_rate": 4.8121113814544475e-05, "loss": 0.6908, "step": 13750 }, { "epoch": 0.40159997664267677, "grad_norm": 0.6626803494163699, "learning_rate": 4.81184103811841e-05, "loss": 0.6293, "step": 13755 }, { "epoch": 0.40174595991299394, "grad_norm": 0.5529586818576537, "learning_rate": 4.811570694782374e-05, "loss": 0.5646, "step": 13760 }, { "epoch": 0.4018919431833112, "grad_norm": 0.5784466459438176, "learning_rate": 4.811300351446337e-05, "loss": 0.5993, "step": 13765 }, { "epoch": 0.4020379264536284, "grad_norm": 0.5520967971830402, "learning_rate": 4.8110300081103004e-05, "loss": 0.6117, "step": 13770 }, { "epoch": 0.40218390972394563, "grad_norm": 0.5993096248356738, "learning_rate": 4.810759664774264e-05, "loss": 0.6299, "step": 13775 }, { "epoch": 0.40232989299426286, "grad_norm": 0.5351475244526395, "learning_rate": 4.8104893214382265e-05, "loss": 0.6373, "step": 13780 }, { "epoch": 0.4024758762645801, "grad_norm": 0.5600182803856265, "learning_rate": 4.81021897810219e-05, "loss": 0.6246, "step": 13785 }, { "epoch": 0.4026218595348973, "grad_norm": 0.5952425297770852, "learning_rate": 4.809948634766153e-05, "loss": 0.6025, "step": 13790 }, { "epoch": 0.40276784280521455, "grad_norm": 0.6070290600177501, "learning_rate": 4.809678291430116e-05, "loss": 0.5742, "step": 13795 }, { "epoch": 0.4029138260755317, "grad_norm": 0.5580191217669657, "learning_rate": 4.8094079480940795e-05, "loss": 0.636, "step": 13800 }, { "epoch": 0.40305980934584895, "grad_norm": 0.6329915384049158, "learning_rate": 4.809137604758043e-05, "loss": 0.6294, "step": 13805 }, { "epoch": 0.4032057926161662, "grad_norm": 0.5876804747016203, "learning_rate": 4.808867261422006e-05, "loss": 0.6438, "step": 13810 }, { "epoch": 0.4033517758864834, "grad_norm": 0.5728198559415657, "learning_rate": 4.8085969180859697e-05, "loss": 0.6331, "step": 13815 }, { "epoch": 0.40349775915680064, "grad_norm": 0.5626684576414865, "learning_rate": 4.808326574749933e-05, "loss": 0.6286, "step": 13820 }, { "epoch": 0.40364374242711787, "grad_norm": 0.5977672159105003, "learning_rate": 4.808056231413896e-05, "loss": 0.6121, "step": 13825 }, { "epoch": 0.4037897256974351, "grad_norm": 0.5853592215432869, "learning_rate": 4.807785888077859e-05, "loss": 0.6407, "step": 13830 }, { "epoch": 0.40393570896775227, "grad_norm": 0.5828121243269805, "learning_rate": 4.8075155447418226e-05, "loss": 0.6145, "step": 13835 }, { "epoch": 0.4040816922380695, "grad_norm": 0.5894281705688602, "learning_rate": 4.807245201405785e-05, "loss": 0.6315, "step": 13840 }, { "epoch": 0.40422767550838673, "grad_norm": 0.5845636894524643, "learning_rate": 4.806974858069749e-05, "loss": 0.6125, "step": 13845 }, { "epoch": 0.40437365877870396, "grad_norm": 0.6256009842976737, "learning_rate": 4.806704514733712e-05, "loss": 0.6241, "step": 13850 }, { "epoch": 0.4045196420490212, "grad_norm": 0.5673015653630545, "learning_rate": 4.806434171397675e-05, "loss": 0.5918, "step": 13855 }, { "epoch": 0.4046656253193384, "grad_norm": 0.6604285222380256, "learning_rate": 4.806163828061638e-05, "loss": 0.6406, "step": 13860 }, { "epoch": 0.40481160858965565, "grad_norm": 0.5690281082172786, "learning_rate": 4.8058934847256016e-05, "loss": 0.6129, "step": 13865 }, { "epoch": 0.4049575918599728, "grad_norm": 0.5530667362191154, "learning_rate": 4.805623141389565e-05, "loss": 0.618, "step": 13870 }, { "epoch": 0.40510357513029005, "grad_norm": 0.635344585685735, "learning_rate": 4.8053527980535284e-05, "loss": 0.6295, "step": 13875 }, { "epoch": 0.4052495584006073, "grad_norm": 0.5885478681203666, "learning_rate": 4.805082454717492e-05, "loss": 0.633, "step": 13880 }, { "epoch": 0.4053955416709245, "grad_norm": 0.6491217014559412, "learning_rate": 4.8048121113814546e-05, "loss": 0.6259, "step": 13885 }, { "epoch": 0.40554152494124174, "grad_norm": 0.5464511357306445, "learning_rate": 4.804541768045418e-05, "loss": 0.5995, "step": 13890 }, { "epoch": 0.40568750821155897, "grad_norm": 0.5138669250484678, "learning_rate": 4.8042714247093814e-05, "loss": 0.6447, "step": 13895 }, { "epoch": 0.4058334914818762, "grad_norm": 0.5534219003363693, "learning_rate": 4.804001081373344e-05, "loss": 0.6202, "step": 13900 }, { "epoch": 0.4059794747521934, "grad_norm": 0.5896194444215137, "learning_rate": 4.8037307380373075e-05, "loss": 0.6244, "step": 13905 }, { "epoch": 0.4061254580225106, "grad_norm": 0.546727840099815, "learning_rate": 4.803460394701271e-05, "loss": 0.6125, "step": 13910 }, { "epoch": 0.40627144129282783, "grad_norm": 0.5915755959155173, "learning_rate": 4.8031900513652336e-05, "loss": 0.6295, "step": 13915 }, { "epoch": 0.40641742456314506, "grad_norm": 0.6167267909361001, "learning_rate": 4.802919708029197e-05, "loss": 0.6489, "step": 13920 }, { "epoch": 0.4065634078334623, "grad_norm": 0.6374198371179493, "learning_rate": 4.8026493646931604e-05, "loss": 0.6391, "step": 13925 }, { "epoch": 0.4067093911037795, "grad_norm": 0.5530045653454013, "learning_rate": 4.802379021357124e-05, "loss": 0.615, "step": 13930 }, { "epoch": 0.40685537437409675, "grad_norm": 0.5605993706710557, "learning_rate": 4.802108678021087e-05, "loss": 0.6372, "step": 13935 }, { "epoch": 0.407001357644414, "grad_norm": 1.273577645633241, "learning_rate": 4.8018383346850506e-05, "loss": 0.6315, "step": 13940 }, { "epoch": 0.40714734091473115, "grad_norm": 0.5737363090639885, "learning_rate": 4.8015679913490133e-05, "loss": 0.663, "step": 13945 }, { "epoch": 0.4072933241850484, "grad_norm": 0.6162128288089332, "learning_rate": 4.801297648012977e-05, "loss": 0.5957, "step": 13950 }, { "epoch": 0.4074393074553656, "grad_norm": 0.5544842436801466, "learning_rate": 4.80102730467694e-05, "loss": 0.6122, "step": 13955 }, { "epoch": 0.40758529072568284, "grad_norm": 0.6104654931156226, "learning_rate": 4.800756961340903e-05, "loss": 0.6146, "step": 13960 }, { "epoch": 0.40773127399600007, "grad_norm": 0.5842113609759672, "learning_rate": 4.800486618004866e-05, "loss": 0.5939, "step": 13965 }, { "epoch": 0.4078772572663173, "grad_norm": 0.6195490172136169, "learning_rate": 4.80021627466883e-05, "loss": 0.6105, "step": 13970 }, { "epoch": 0.4080232405366345, "grad_norm": 0.5634973403984403, "learning_rate": 4.7999459313327924e-05, "loss": 0.6039, "step": 13975 }, { "epoch": 0.4081692238069517, "grad_norm": 0.6104206677312637, "learning_rate": 4.799675587996756e-05, "loss": 0.6488, "step": 13980 }, { "epoch": 0.4083152070772689, "grad_norm": 0.5770614446261808, "learning_rate": 4.79940524466072e-05, "loss": 0.6593, "step": 13985 }, { "epoch": 0.40846119034758616, "grad_norm": 0.5643469584875525, "learning_rate": 4.7991349013246826e-05, "loss": 0.6671, "step": 13990 }, { "epoch": 0.4086071736179034, "grad_norm": 0.5742280553447636, "learning_rate": 4.798864557988646e-05, "loss": 0.6536, "step": 13995 }, { "epoch": 0.4087531568882206, "grad_norm": 0.492136826214384, "learning_rate": 4.7985942146526094e-05, "loss": 0.5879, "step": 14000 }, { "epoch": 0.40889914015853784, "grad_norm": 0.5545128660214425, "learning_rate": 4.798323871316572e-05, "loss": 0.6355, "step": 14005 }, { "epoch": 0.4090451234288551, "grad_norm": 0.5923179142250109, "learning_rate": 4.7980535279805355e-05, "loss": 0.5941, "step": 14010 }, { "epoch": 0.4091911066991723, "grad_norm": 0.5418349285269045, "learning_rate": 4.797783184644499e-05, "loss": 0.6212, "step": 14015 }, { "epoch": 0.4093370899694895, "grad_norm": 0.5169049193971016, "learning_rate": 4.7975128413084617e-05, "loss": 0.644, "step": 14020 }, { "epoch": 0.4094830732398067, "grad_norm": 0.5426999554364043, "learning_rate": 4.797242497972425e-05, "loss": 0.6272, "step": 14025 }, { "epoch": 0.40962905651012393, "grad_norm": 0.5398226205236281, "learning_rate": 4.7969721546363885e-05, "loss": 0.5964, "step": 14030 }, { "epoch": 0.40977503978044116, "grad_norm": 0.603507472541539, "learning_rate": 4.796701811300351e-05, "loss": 0.6168, "step": 14035 }, { "epoch": 0.4099210230507584, "grad_norm": 0.6019556448676615, "learning_rate": 4.7964314679643146e-05, "loss": 0.6285, "step": 14040 }, { "epoch": 0.4100670063210756, "grad_norm": 0.6017919944967465, "learning_rate": 4.7961611246282787e-05, "loss": 0.6098, "step": 14045 }, { "epoch": 0.41021298959139285, "grad_norm": 0.5530994915363676, "learning_rate": 4.7958907812922414e-05, "loss": 0.5975, "step": 14050 }, { "epoch": 0.41035897286171, "grad_norm": 0.6486457928506527, "learning_rate": 4.795620437956205e-05, "loss": 0.6283, "step": 14055 }, { "epoch": 0.41050495613202725, "grad_norm": 0.542977897642404, "learning_rate": 4.795350094620168e-05, "loss": 0.6257, "step": 14060 }, { "epoch": 0.4106509394023445, "grad_norm": 0.5640311894438893, "learning_rate": 4.795079751284131e-05, "loss": 0.6238, "step": 14065 }, { "epoch": 0.4107969226726617, "grad_norm": 0.5719389303898036, "learning_rate": 4.794809407948094e-05, "loss": 0.6088, "step": 14070 }, { "epoch": 0.41094290594297894, "grad_norm": 0.5445196495690764, "learning_rate": 4.794539064612058e-05, "loss": 0.6253, "step": 14075 }, { "epoch": 0.41108888921329617, "grad_norm": 0.5439791902601374, "learning_rate": 4.7942687212760204e-05, "loss": 0.6099, "step": 14080 }, { "epoch": 0.4112348724836134, "grad_norm": 0.5200705944556075, "learning_rate": 4.793998377939984e-05, "loss": 0.6238, "step": 14085 }, { "epoch": 0.4113808557539306, "grad_norm": 0.5735614927478834, "learning_rate": 4.793728034603947e-05, "loss": 0.6355, "step": 14090 }, { "epoch": 0.4115268390242478, "grad_norm": 0.5576876182090678, "learning_rate": 4.79345769126791e-05, "loss": 0.6233, "step": 14095 }, { "epoch": 0.41167282229456503, "grad_norm": 0.5380941182216373, "learning_rate": 4.793187347931874e-05, "loss": 0.6268, "step": 14100 }, { "epoch": 0.41181880556488226, "grad_norm": 0.6081117728987654, "learning_rate": 4.7929170045958374e-05, "loss": 0.6436, "step": 14105 }, { "epoch": 0.4119647888351995, "grad_norm": 0.5781878760622993, "learning_rate": 4.7926466612598e-05, "loss": 0.6117, "step": 14110 }, { "epoch": 0.4121107721055167, "grad_norm": 0.5495580043457678, "learning_rate": 4.7923763179237636e-05, "loss": 0.5999, "step": 14115 }, { "epoch": 0.41225675537583395, "grad_norm": 0.6207172386972359, "learning_rate": 4.792105974587727e-05, "loss": 0.6355, "step": 14120 }, { "epoch": 0.4124027386461511, "grad_norm": 0.6398626556738594, "learning_rate": 4.79183563125169e-05, "loss": 0.6376, "step": 14125 }, { "epoch": 0.41254872191646835, "grad_norm": 0.5636701271851832, "learning_rate": 4.791565287915653e-05, "loss": 0.6074, "step": 14130 }, { "epoch": 0.4126947051867856, "grad_norm": 0.5624112304109277, "learning_rate": 4.7912949445796165e-05, "loss": 0.6325, "step": 14135 }, { "epoch": 0.4128406884571028, "grad_norm": 0.5317561355448891, "learning_rate": 4.791024601243579e-05, "loss": 0.6336, "step": 14140 }, { "epoch": 0.41298667172742004, "grad_norm": 0.5301946625756764, "learning_rate": 4.7907542579075426e-05, "loss": 0.6137, "step": 14145 }, { "epoch": 0.41313265499773727, "grad_norm": 0.5758066704615487, "learning_rate": 4.790483914571506e-05, "loss": 0.6539, "step": 14150 }, { "epoch": 0.4132786382680545, "grad_norm": 0.5982848351957974, "learning_rate": 4.7902135712354694e-05, "loss": 0.5953, "step": 14155 }, { "epoch": 0.41342462153837173, "grad_norm": 0.5959391922591865, "learning_rate": 4.789943227899433e-05, "loss": 0.6408, "step": 14160 }, { "epoch": 0.4135706048086889, "grad_norm": 0.6552490044991649, "learning_rate": 4.789672884563396e-05, "loss": 0.6407, "step": 14165 }, { "epoch": 0.41371658807900613, "grad_norm": 0.670378416099931, "learning_rate": 4.789402541227359e-05, "loss": 0.6403, "step": 14170 }, { "epoch": 0.41386257134932336, "grad_norm": 0.5681557106291291, "learning_rate": 4.7891321978913223e-05, "loss": 0.6183, "step": 14175 }, { "epoch": 0.4140085546196406, "grad_norm": 0.5733117688607022, "learning_rate": 4.788861854555286e-05, "loss": 0.6454, "step": 14180 }, { "epoch": 0.4141545378899578, "grad_norm": 0.643859661033023, "learning_rate": 4.7885915112192485e-05, "loss": 0.6075, "step": 14185 }, { "epoch": 0.41430052116027505, "grad_norm": 0.5506475165232687, "learning_rate": 4.788321167883212e-05, "loss": 0.6017, "step": 14190 }, { "epoch": 0.4144465044305923, "grad_norm": 0.626252976198556, "learning_rate": 4.788050824547175e-05, "loss": 0.6411, "step": 14195 }, { "epoch": 0.41459248770090945, "grad_norm": 0.5958511444930751, "learning_rate": 4.787780481211138e-05, "loss": 0.6234, "step": 14200 }, { "epoch": 0.4147384709712267, "grad_norm": 0.5351148448484172, "learning_rate": 4.7875101378751014e-05, "loss": 0.6244, "step": 14205 }, { "epoch": 0.4148844542415439, "grad_norm": 0.5918132584355974, "learning_rate": 4.787239794539065e-05, "loss": 0.6372, "step": 14210 }, { "epoch": 0.41503043751186114, "grad_norm": 0.5909626075453321, "learning_rate": 4.786969451203028e-05, "loss": 0.633, "step": 14215 }, { "epoch": 0.41517642078217837, "grad_norm": 0.5219501236351763, "learning_rate": 4.7866991078669916e-05, "loss": 0.6037, "step": 14220 }, { "epoch": 0.4153224040524956, "grad_norm": 0.575328539393164, "learning_rate": 4.786428764530955e-05, "loss": 0.6329, "step": 14225 }, { "epoch": 0.4154683873228128, "grad_norm": 0.6773655232047358, "learning_rate": 4.786158421194918e-05, "loss": 0.639, "step": 14230 }, { "epoch": 0.41561437059313, "grad_norm": 0.5760503833566822, "learning_rate": 4.785888077858881e-05, "loss": 0.6454, "step": 14235 }, { "epoch": 0.41576035386344723, "grad_norm": 0.5950003096858704, "learning_rate": 4.7856177345228445e-05, "loss": 0.64, "step": 14240 }, { "epoch": 0.41590633713376446, "grad_norm": 0.5885681503069466, "learning_rate": 4.785347391186807e-05, "loss": 0.6633, "step": 14245 }, { "epoch": 0.4160523204040817, "grad_norm": 0.5678903347565218, "learning_rate": 4.7850770478507706e-05, "loss": 0.5934, "step": 14250 }, { "epoch": 0.4161983036743989, "grad_norm": 0.5282933494025543, "learning_rate": 4.784806704514734e-05, "loss": 0.5872, "step": 14255 }, { "epoch": 0.41634428694471615, "grad_norm": 0.6540934640966019, "learning_rate": 4.784536361178697e-05, "loss": 0.6151, "step": 14260 }, { "epoch": 0.4164902702150334, "grad_norm": 0.5533760624817811, "learning_rate": 4.78426601784266e-05, "loss": 0.6037, "step": 14265 }, { "epoch": 0.4166362534853506, "grad_norm": 0.5372531714701423, "learning_rate": 4.7839956745066236e-05, "loss": 0.6087, "step": 14270 }, { "epoch": 0.4167822367556678, "grad_norm": 0.5224895747060857, "learning_rate": 4.783725331170587e-05, "loss": 0.6335, "step": 14275 }, { "epoch": 0.416928220025985, "grad_norm": 0.6014149622174179, "learning_rate": 4.7834549878345504e-05, "loss": 0.6329, "step": 14280 }, { "epoch": 0.41707420329630224, "grad_norm": 0.5999889410745648, "learning_rate": 4.783184644498514e-05, "loss": 0.6251, "step": 14285 }, { "epoch": 0.41722018656661947, "grad_norm": 0.5362317932095414, "learning_rate": 4.7829143011624765e-05, "loss": 0.6089, "step": 14290 }, { "epoch": 0.4173661698369367, "grad_norm": 0.5819934085662117, "learning_rate": 4.78264395782644e-05, "loss": 0.6436, "step": 14295 }, { "epoch": 0.4175121531072539, "grad_norm": 0.5864607213051065, "learning_rate": 4.782373614490403e-05, "loss": 0.6411, "step": 14300 }, { "epoch": 0.41765813637757115, "grad_norm": 0.630612711115032, "learning_rate": 4.782103271154366e-05, "loss": 0.6527, "step": 14305 }, { "epoch": 0.41780411964788833, "grad_norm": 0.5590868223376916, "learning_rate": 4.7818329278183294e-05, "loss": 0.6047, "step": 14310 }, { "epoch": 0.41795010291820556, "grad_norm": 0.5663035333793045, "learning_rate": 4.781562584482293e-05, "loss": 0.6299, "step": 14315 }, { "epoch": 0.4180960861885228, "grad_norm": 0.6326837090966333, "learning_rate": 4.7812922411462556e-05, "loss": 0.6598, "step": 14320 }, { "epoch": 0.41824206945884, "grad_norm": 0.5950934964099677, "learning_rate": 4.7810218978102196e-05, "loss": 0.618, "step": 14325 }, { "epoch": 0.41838805272915724, "grad_norm": 0.6221417935147694, "learning_rate": 4.7807515544741824e-05, "loss": 0.6577, "step": 14330 }, { "epoch": 0.4185340359994745, "grad_norm": 0.5567674140249421, "learning_rate": 4.780481211138146e-05, "loss": 0.6229, "step": 14335 }, { "epoch": 0.4186800192697917, "grad_norm": 0.5552223949166325, "learning_rate": 4.780210867802109e-05, "loss": 0.6233, "step": 14340 }, { "epoch": 0.4188260025401089, "grad_norm": 0.5775856282207062, "learning_rate": 4.779940524466072e-05, "loss": 0.656, "step": 14345 }, { "epoch": 0.4189719858104261, "grad_norm": 0.7372780319047465, "learning_rate": 4.779670181130035e-05, "loss": 0.6495, "step": 14350 }, { "epoch": 0.41911796908074334, "grad_norm": 0.6014956671880775, "learning_rate": 4.779399837793999e-05, "loss": 0.6708, "step": 14355 }, { "epoch": 0.41926395235106056, "grad_norm": 0.6073466417856523, "learning_rate": 4.779129494457962e-05, "loss": 0.6488, "step": 14360 }, { "epoch": 0.4194099356213778, "grad_norm": 0.6136275214500743, "learning_rate": 4.778859151121925e-05, "loss": 0.621, "step": 14365 }, { "epoch": 0.419555918891695, "grad_norm": 0.5320966178341432, "learning_rate": 4.778588807785888e-05, "loss": 0.6186, "step": 14370 }, { "epoch": 0.41970190216201225, "grad_norm": 0.6369771296590356, "learning_rate": 4.7783184644498516e-05, "loss": 0.6443, "step": 14375 }, { "epoch": 0.4198478854323295, "grad_norm": 0.8087671895559168, "learning_rate": 4.778048121113814e-05, "loss": 0.613, "step": 14380 }, { "epoch": 0.41999386870264666, "grad_norm": 0.5530997373701292, "learning_rate": 4.7777777777777784e-05, "loss": 0.6563, "step": 14385 }, { "epoch": 0.4201398519729639, "grad_norm": 0.5546995221341589, "learning_rate": 4.777507434441741e-05, "loss": 0.6089, "step": 14390 }, { "epoch": 0.4202858352432811, "grad_norm": 0.5006443831648811, "learning_rate": 4.7772370911057045e-05, "loss": 0.5927, "step": 14395 }, { "epoch": 0.42043181851359834, "grad_norm": 0.5345581512685684, "learning_rate": 4.776966747769668e-05, "loss": 0.6163, "step": 14400 }, { "epoch": 0.4205778017839156, "grad_norm": 0.5332499205327512, "learning_rate": 4.776696404433631e-05, "loss": 0.5831, "step": 14405 }, { "epoch": 0.4207237850542328, "grad_norm": 0.564490785050572, "learning_rate": 4.776426061097594e-05, "loss": 0.5937, "step": 14410 }, { "epoch": 0.42086976832455003, "grad_norm": 0.5550784412403166, "learning_rate": 4.7761557177615575e-05, "loss": 0.6183, "step": 14415 }, { "epoch": 0.4210157515948672, "grad_norm": 0.5686258741773763, "learning_rate": 4.775885374425521e-05, "loss": 0.6178, "step": 14420 }, { "epoch": 0.42116173486518443, "grad_norm": 0.577465418665917, "learning_rate": 4.7756150310894836e-05, "loss": 0.5923, "step": 14425 }, { "epoch": 0.42130771813550166, "grad_norm": 0.546915649066711, "learning_rate": 4.775344687753447e-05, "loss": 0.6053, "step": 14430 }, { "epoch": 0.4214537014058189, "grad_norm": 0.595593335659836, "learning_rate": 4.7750743444174104e-05, "loss": 0.6568, "step": 14435 }, { "epoch": 0.4215996846761361, "grad_norm": 0.6427408161037369, "learning_rate": 4.774804001081374e-05, "loss": 0.6272, "step": 14440 }, { "epoch": 0.42174566794645335, "grad_norm": 0.5998278209621615, "learning_rate": 4.774533657745337e-05, "loss": 0.6065, "step": 14445 }, { "epoch": 0.4218916512167706, "grad_norm": 0.5774191545563878, "learning_rate": 4.7742633144093e-05, "loss": 0.6124, "step": 14450 }, { "epoch": 0.42203763448708775, "grad_norm": 0.704150279458017, "learning_rate": 4.773992971073263e-05, "loss": 0.6632, "step": 14455 }, { "epoch": 0.422183617757405, "grad_norm": 0.6461041452050003, "learning_rate": 4.773722627737227e-05, "loss": 0.619, "step": 14460 }, { "epoch": 0.4223296010277222, "grad_norm": 0.6348269712322392, "learning_rate": 4.7734522844011894e-05, "loss": 0.6261, "step": 14465 }, { "epoch": 0.42247558429803944, "grad_norm": 0.5925314788571437, "learning_rate": 4.773181941065153e-05, "loss": 0.6661, "step": 14470 }, { "epoch": 0.42262156756835667, "grad_norm": 0.6196697779962924, "learning_rate": 4.772911597729116e-05, "loss": 0.6083, "step": 14475 }, { "epoch": 0.4227675508386739, "grad_norm": 0.6111876389996954, "learning_rate": 4.772641254393079e-05, "loss": 0.616, "step": 14480 }, { "epoch": 0.42291353410899113, "grad_norm": 0.6538562718770257, "learning_rate": 4.7723709110570424e-05, "loss": 0.6545, "step": 14485 }, { "epoch": 0.42305951737930836, "grad_norm": 0.5717646753154614, "learning_rate": 4.772100567721006e-05, "loss": 0.6271, "step": 14490 }, { "epoch": 0.42320550064962553, "grad_norm": 0.5395934631205521, "learning_rate": 4.771830224384969e-05, "loss": 0.6023, "step": 14495 }, { "epoch": 0.42335148391994276, "grad_norm": 0.6137581178981804, "learning_rate": 4.7715598810489326e-05, "loss": 0.6654, "step": 14500 }, { "epoch": 0.42349746719026, "grad_norm": 1.2136568077016328, "learning_rate": 4.771289537712896e-05, "loss": 0.6116, "step": 14505 }, { "epoch": 0.4236434504605772, "grad_norm": 0.5772345084300267, "learning_rate": 4.771019194376859e-05, "loss": 0.6194, "step": 14510 }, { "epoch": 0.42378943373089445, "grad_norm": 0.5952845774984014, "learning_rate": 4.770748851040822e-05, "loss": 0.6478, "step": 14515 }, { "epoch": 0.4239354170012117, "grad_norm": 0.5726745964831638, "learning_rate": 4.7704785077047855e-05, "loss": 0.626, "step": 14520 }, { "epoch": 0.4240814002715289, "grad_norm": 0.62085907103269, "learning_rate": 4.770208164368748e-05, "loss": 0.6255, "step": 14525 }, { "epoch": 0.4242273835418461, "grad_norm": 0.5563473651655958, "learning_rate": 4.7699378210327116e-05, "loss": 0.6069, "step": 14530 }, { "epoch": 0.4243733668121633, "grad_norm": 0.5670589071122301, "learning_rate": 4.769667477696675e-05, "loss": 0.6208, "step": 14535 }, { "epoch": 0.42451935008248054, "grad_norm": 0.6095020706440947, "learning_rate": 4.769397134360638e-05, "loss": 0.631, "step": 14540 }, { "epoch": 0.42466533335279777, "grad_norm": 0.6092775011356596, "learning_rate": 4.769126791024601e-05, "loss": 0.6216, "step": 14545 }, { "epoch": 0.424811316623115, "grad_norm": 0.567945261419009, "learning_rate": 4.7688564476885646e-05, "loss": 0.6274, "step": 14550 }, { "epoch": 0.4249572998934322, "grad_norm": 0.5684867527899196, "learning_rate": 4.768586104352528e-05, "loss": 0.64, "step": 14555 }, { "epoch": 0.42510328316374946, "grad_norm": 0.507487417906064, "learning_rate": 4.7683157610164914e-05, "loss": 0.6088, "step": 14560 }, { "epoch": 0.42524926643406663, "grad_norm": 0.6018554843484353, "learning_rate": 4.768045417680455e-05, "loss": 0.606, "step": 14565 }, { "epoch": 0.42539524970438386, "grad_norm": 0.5475738306372401, "learning_rate": 4.7677750743444175e-05, "loss": 0.641, "step": 14570 }, { "epoch": 0.4255412329747011, "grad_norm": 0.5773356107899155, "learning_rate": 4.767504731008381e-05, "loss": 0.6073, "step": 14575 }, { "epoch": 0.4256872162450183, "grad_norm": 0.6750584527853102, "learning_rate": 4.767234387672344e-05, "loss": 0.6503, "step": 14580 }, { "epoch": 0.42583319951533555, "grad_norm": 0.6115976374012592, "learning_rate": 4.766964044336307e-05, "loss": 0.6048, "step": 14585 }, { "epoch": 0.4259791827856528, "grad_norm": 0.5393789243292255, "learning_rate": 4.7666937010002704e-05, "loss": 0.6317, "step": 14590 }, { "epoch": 0.42612516605597, "grad_norm": 0.5568482423338176, "learning_rate": 4.766423357664234e-05, "loss": 0.6445, "step": 14595 }, { "epoch": 0.4262711493262872, "grad_norm": 0.5332933023323138, "learning_rate": 4.7661530143281965e-05, "loss": 0.6376, "step": 14600 }, { "epoch": 0.4264171325966044, "grad_norm": 0.5294400387457988, "learning_rate": 4.76588267099216e-05, "loss": 0.5828, "step": 14605 }, { "epoch": 0.42656311586692164, "grad_norm": 0.5939282486186367, "learning_rate": 4.765612327656124e-05, "loss": 0.6449, "step": 14610 }, { "epoch": 0.42670909913723887, "grad_norm": 0.648142408355032, "learning_rate": 4.765341984320087e-05, "loss": 0.6206, "step": 14615 }, { "epoch": 0.4268550824075561, "grad_norm": 0.6011774898094062, "learning_rate": 4.76507164098405e-05, "loss": 0.6091, "step": 14620 }, { "epoch": 0.4270010656778733, "grad_norm": 0.5393605266845601, "learning_rate": 4.7648012976480135e-05, "loss": 0.6047, "step": 14625 }, { "epoch": 0.42714704894819056, "grad_norm": 0.5988901565246673, "learning_rate": 4.764530954311976e-05, "loss": 0.6327, "step": 14630 }, { "epoch": 0.4272930322185078, "grad_norm": 0.5286812080974206, "learning_rate": 4.7642606109759397e-05, "loss": 0.6073, "step": 14635 }, { "epoch": 0.42743901548882496, "grad_norm": 0.6086845013753541, "learning_rate": 4.763990267639903e-05, "loss": 0.6758, "step": 14640 }, { "epoch": 0.4275849987591422, "grad_norm": 0.5383638541235962, "learning_rate": 4.763719924303866e-05, "loss": 0.628, "step": 14645 }, { "epoch": 0.4277309820294594, "grad_norm": 0.5504631301694595, "learning_rate": 4.763449580967829e-05, "loss": 0.6131, "step": 14650 }, { "epoch": 0.42787696529977665, "grad_norm": 0.5346930546706193, "learning_rate": 4.7631792376317926e-05, "loss": 0.5793, "step": 14655 }, { "epoch": 0.4280229485700939, "grad_norm": 0.5502844505082993, "learning_rate": 4.762908894295755e-05, "loss": 0.6418, "step": 14660 }, { "epoch": 0.4281689318404111, "grad_norm": 0.5603008510387754, "learning_rate": 4.7626385509597194e-05, "loss": 0.6072, "step": 14665 }, { "epoch": 0.42831491511072833, "grad_norm": 0.9901796814725236, "learning_rate": 4.762368207623683e-05, "loss": 0.658, "step": 14670 }, { "epoch": 0.4284608983810455, "grad_norm": 0.5544606906079934, "learning_rate": 4.7620978642876455e-05, "loss": 0.5983, "step": 14675 }, { "epoch": 0.42860688165136274, "grad_norm": 0.6239517061221805, "learning_rate": 4.761827520951609e-05, "loss": 0.6546, "step": 14680 }, { "epoch": 0.42875286492167997, "grad_norm": 0.6291713455457486, "learning_rate": 4.761557177615572e-05, "loss": 0.6131, "step": 14685 }, { "epoch": 0.4288988481919972, "grad_norm": 0.6125084498373969, "learning_rate": 4.761286834279535e-05, "loss": 0.6329, "step": 14690 }, { "epoch": 0.4290448314623144, "grad_norm": 0.5658415960779482, "learning_rate": 4.7610164909434984e-05, "loss": 0.6218, "step": 14695 }, { "epoch": 0.42919081473263165, "grad_norm": 0.5550784000378393, "learning_rate": 4.760746147607462e-05, "loss": 0.625, "step": 14700 }, { "epoch": 0.4293367980029489, "grad_norm": 0.6076585616111062, "learning_rate": 4.7604758042714246e-05, "loss": 0.5785, "step": 14705 }, { "epoch": 0.42948278127326606, "grad_norm": 0.58625695064861, "learning_rate": 4.760205460935388e-05, "loss": 0.6166, "step": 14710 }, { "epoch": 0.4296287645435833, "grad_norm": 0.5566597573096869, "learning_rate": 4.7599351175993514e-05, "loss": 0.6186, "step": 14715 }, { "epoch": 0.4297747478139005, "grad_norm": 0.5474228757734869, "learning_rate": 4.759664774263314e-05, "loss": 0.6764, "step": 14720 }, { "epoch": 0.42992073108421774, "grad_norm": 0.5683114324817959, "learning_rate": 4.759394430927278e-05, "loss": 0.6469, "step": 14725 }, { "epoch": 0.430066714354535, "grad_norm": 0.8332405880928151, "learning_rate": 4.7591240875912416e-05, "loss": 0.6377, "step": 14730 }, { "epoch": 0.4302126976248522, "grad_norm": 0.5752128630581596, "learning_rate": 4.758853744255204e-05, "loss": 0.6117, "step": 14735 }, { "epoch": 0.43035868089516943, "grad_norm": 0.5227951798751092, "learning_rate": 4.758583400919168e-05, "loss": 0.6254, "step": 14740 }, { "epoch": 0.43050466416548666, "grad_norm": 0.5952212016269874, "learning_rate": 4.758313057583131e-05, "loss": 0.6186, "step": 14745 }, { "epoch": 0.43065064743580383, "grad_norm": 0.5557886621532497, "learning_rate": 4.758042714247094e-05, "loss": 0.5878, "step": 14750 }, { "epoch": 0.43079663070612106, "grad_norm": 0.707267722670665, "learning_rate": 4.757772370911057e-05, "loss": 0.6161, "step": 14755 }, { "epoch": 0.4309426139764383, "grad_norm": 0.5792752499844859, "learning_rate": 4.7575020275750206e-05, "loss": 0.5921, "step": 14760 }, { "epoch": 0.4310885972467555, "grad_norm": 0.5568813442307703, "learning_rate": 4.7572316842389833e-05, "loss": 0.6536, "step": 14765 }, { "epoch": 0.43123458051707275, "grad_norm": 0.583675683385802, "learning_rate": 4.756961340902947e-05, "loss": 0.6118, "step": 14770 }, { "epoch": 0.43138056378739, "grad_norm": 0.5293607527851778, "learning_rate": 4.75669099756691e-05, "loss": 0.6231, "step": 14775 }, { "epoch": 0.4315265470577072, "grad_norm": 0.5749944944052778, "learning_rate": 4.7564206542308736e-05, "loss": 0.6284, "step": 14780 }, { "epoch": 0.4316725303280244, "grad_norm": 0.5231554381885988, "learning_rate": 4.756150310894837e-05, "loss": 0.6491, "step": 14785 }, { "epoch": 0.4318185135983416, "grad_norm": 0.5671524859766144, "learning_rate": 4.7558799675588004e-05, "loss": 0.6137, "step": 14790 }, { "epoch": 0.43196449686865884, "grad_norm": 0.5813027171579915, "learning_rate": 4.755609624222763e-05, "loss": 0.6452, "step": 14795 }, { "epoch": 0.43211048013897607, "grad_norm": 0.6213944468438584, "learning_rate": 4.7553392808867265e-05, "loss": 0.6517, "step": 14800 }, { "epoch": 0.4322564634092933, "grad_norm": 0.6256087382516181, "learning_rate": 4.75506893755069e-05, "loss": 0.6463, "step": 14805 }, { "epoch": 0.43240244667961053, "grad_norm": 0.5861138553262848, "learning_rate": 4.7547985942146526e-05, "loss": 0.6046, "step": 14810 }, { "epoch": 0.43254842994992776, "grad_norm": 0.6144056158702217, "learning_rate": 4.754528250878616e-05, "loss": 0.6166, "step": 14815 }, { "epoch": 0.43269441322024493, "grad_norm": 0.5948953445740336, "learning_rate": 4.7542579075425794e-05, "loss": 0.6129, "step": 14820 }, { "epoch": 0.43284039649056216, "grad_norm": 0.5442103358676299, "learning_rate": 4.753987564206542e-05, "loss": 0.6319, "step": 14825 }, { "epoch": 0.4329863797608794, "grad_norm": 0.5759023875590016, "learning_rate": 4.7537172208705055e-05, "loss": 0.5804, "step": 14830 }, { "epoch": 0.4331323630311966, "grad_norm": 0.5867711137243031, "learning_rate": 4.753446877534469e-05, "loss": 0.645, "step": 14835 }, { "epoch": 0.43327834630151385, "grad_norm": 0.673915943578541, "learning_rate": 4.753176534198432e-05, "loss": 0.5794, "step": 14840 }, { "epoch": 0.4334243295718311, "grad_norm": 0.5804803347350066, "learning_rate": 4.752906190862396e-05, "loss": 0.6243, "step": 14845 }, { "epoch": 0.4335703128421483, "grad_norm": 0.5860025771473061, "learning_rate": 4.752635847526359e-05, "loss": 0.6098, "step": 14850 }, { "epoch": 0.43371629611246554, "grad_norm": 0.5259930202083696, "learning_rate": 4.752365504190322e-05, "loss": 0.6178, "step": 14855 }, { "epoch": 0.4338622793827827, "grad_norm": 0.5170465571142333, "learning_rate": 4.752095160854285e-05, "loss": 0.6369, "step": 14860 }, { "epoch": 0.43400826265309994, "grad_norm": 0.5205677710282668, "learning_rate": 4.7518248175182487e-05, "loss": 0.5856, "step": 14865 }, { "epoch": 0.43415424592341717, "grad_norm": 0.629815987126462, "learning_rate": 4.7515544741822114e-05, "loss": 0.6454, "step": 14870 }, { "epoch": 0.4343002291937344, "grad_norm": 0.5635609645700121, "learning_rate": 4.751284130846175e-05, "loss": 0.625, "step": 14875 }, { "epoch": 0.43444621246405163, "grad_norm": 0.5517170878715527, "learning_rate": 4.751013787510138e-05, "loss": 0.6031, "step": 14880 }, { "epoch": 0.43459219573436886, "grad_norm": 0.5169602061774705, "learning_rate": 4.750743444174101e-05, "loss": 0.5852, "step": 14885 }, { "epoch": 0.4347381790046861, "grad_norm": 0.5871365339841025, "learning_rate": 4.750473100838064e-05, "loss": 0.6231, "step": 14890 }, { "epoch": 0.43488416227500326, "grad_norm": 0.6317539373715709, "learning_rate": 4.750202757502028e-05, "loss": 0.6298, "step": 14895 }, { "epoch": 0.4350301455453205, "grad_norm": 0.512251902091586, "learning_rate": 4.749932414165991e-05, "loss": 0.5712, "step": 14900 }, { "epoch": 0.4351761288156377, "grad_norm": 0.5880661059012032, "learning_rate": 4.7496620708299545e-05, "loss": 0.6311, "step": 14905 }, { "epoch": 0.43532211208595495, "grad_norm": 0.58060593200023, "learning_rate": 4.749391727493918e-05, "loss": 0.6637, "step": 14910 }, { "epoch": 0.4354680953562722, "grad_norm": 0.5488811269988002, "learning_rate": 4.7491213841578806e-05, "loss": 0.5776, "step": 14915 }, { "epoch": 0.4356140786265894, "grad_norm": 0.5482297105524859, "learning_rate": 4.748851040821844e-05, "loss": 0.5832, "step": 14920 }, { "epoch": 0.43576006189690664, "grad_norm": 0.5467006511610535, "learning_rate": 4.7485806974858074e-05, "loss": 0.6437, "step": 14925 }, { "epoch": 0.4359060451672238, "grad_norm": 0.5174619243889135, "learning_rate": 4.74831035414977e-05, "loss": 0.5989, "step": 14930 }, { "epoch": 0.43605202843754104, "grad_norm": 0.542229800802304, "learning_rate": 4.7480400108137336e-05, "loss": 0.6167, "step": 14935 }, { "epoch": 0.43619801170785827, "grad_norm": 0.5225111929760029, "learning_rate": 4.747769667477697e-05, "loss": 0.569, "step": 14940 }, { "epoch": 0.4363439949781755, "grad_norm": 0.6021648707419298, "learning_rate": 4.74749932414166e-05, "loss": 0.6064, "step": 14945 }, { "epoch": 0.4364899782484927, "grad_norm": 0.5608644982773399, "learning_rate": 4.747228980805624e-05, "loss": 0.6138, "step": 14950 }, { "epoch": 0.43663596151880996, "grad_norm": 0.567693394978589, "learning_rate": 4.7469586374695865e-05, "loss": 0.6147, "step": 14955 }, { "epoch": 0.4367819447891272, "grad_norm": 0.49966830451586114, "learning_rate": 4.74668829413355e-05, "loss": 0.6285, "step": 14960 }, { "epoch": 0.4369279280594444, "grad_norm": 0.5331508892472621, "learning_rate": 4.746417950797513e-05, "loss": 0.6, "step": 14965 }, { "epoch": 0.4370739113297616, "grad_norm": 0.5845410655829496, "learning_rate": 4.746147607461476e-05, "loss": 0.6783, "step": 14970 }, { "epoch": 0.4372198946000788, "grad_norm": 0.5626190236802848, "learning_rate": 4.7458772641254394e-05, "loss": 0.6296, "step": 14975 }, { "epoch": 0.43736587787039605, "grad_norm": 0.5545807467070619, "learning_rate": 4.745606920789403e-05, "loss": 0.5989, "step": 14980 }, { "epoch": 0.4375118611407133, "grad_norm": 0.5949140280389832, "learning_rate": 4.745336577453366e-05, "loss": 0.6361, "step": 14985 }, { "epoch": 0.4376578444110305, "grad_norm": 0.6468709849850041, "learning_rate": 4.745066234117329e-05, "loss": 0.6486, "step": 14990 }, { "epoch": 0.43780382768134773, "grad_norm": 0.570250974875432, "learning_rate": 4.7447958907812923e-05, "loss": 0.5877, "step": 14995 }, { "epoch": 0.43794981095166496, "grad_norm": 0.5701599824783088, "learning_rate": 4.744525547445256e-05, "loss": 0.6077, "step": 15000 }, { "epoch": 0.43809579422198214, "grad_norm": 0.6094708085508597, "learning_rate": 4.744255204109219e-05, "loss": 0.63, "step": 15005 }, { "epoch": 0.43824177749229937, "grad_norm": 0.5837348012522444, "learning_rate": 4.7439848607731825e-05, "loss": 0.692, "step": 15010 }, { "epoch": 0.4383877607626166, "grad_norm": 0.5865173012574277, "learning_rate": 4.743714517437145e-05, "loss": 0.5793, "step": 15015 }, { "epoch": 0.4385337440329338, "grad_norm": 0.5526490692549908, "learning_rate": 4.743444174101109e-05, "loss": 0.6085, "step": 15020 }, { "epoch": 0.43867972730325105, "grad_norm": 0.5956116356547411, "learning_rate": 4.743173830765072e-05, "loss": 0.6034, "step": 15025 }, { "epoch": 0.4388257105735683, "grad_norm": 0.45225271827945196, "learning_rate": 4.742903487429035e-05, "loss": 0.5671, "step": 15030 }, { "epoch": 0.4389716938438855, "grad_norm": 0.5519019746921718, "learning_rate": 4.742633144092998e-05, "loss": 0.6119, "step": 15035 }, { "epoch": 0.4391176771142027, "grad_norm": 0.5560095645764966, "learning_rate": 4.7423628007569616e-05, "loss": 0.628, "step": 15040 }, { "epoch": 0.4392636603845199, "grad_norm": 0.6008477107917062, "learning_rate": 4.742092457420925e-05, "loss": 0.6263, "step": 15045 }, { "epoch": 0.43940964365483715, "grad_norm": 0.5673151146886368, "learning_rate": 4.741822114084888e-05, "loss": 0.6692, "step": 15050 }, { "epoch": 0.4395556269251544, "grad_norm": 0.5775239137627979, "learning_rate": 4.741551770748851e-05, "loss": 0.6079, "step": 15055 }, { "epoch": 0.4397016101954716, "grad_norm": 0.5554519608013776, "learning_rate": 4.7412814274128145e-05, "loss": 0.6105, "step": 15060 }, { "epoch": 0.43984759346578883, "grad_norm": 0.5633451872059699, "learning_rate": 4.741011084076778e-05, "loss": 0.5951, "step": 15065 }, { "epoch": 0.43999357673610606, "grad_norm": 0.6007746854538194, "learning_rate": 4.740740740740741e-05, "loss": 0.6253, "step": 15070 }, { "epoch": 0.4401395600064233, "grad_norm": 0.53672096067127, "learning_rate": 4.740470397404704e-05, "loss": 0.5901, "step": 15075 }, { "epoch": 0.44028554327674047, "grad_norm": 0.5853708991672343, "learning_rate": 4.7402000540686675e-05, "loss": 0.6501, "step": 15080 }, { "epoch": 0.4404315265470577, "grad_norm": 0.5456242991873035, "learning_rate": 4.739929710732631e-05, "loss": 0.6446, "step": 15085 }, { "epoch": 0.4405775098173749, "grad_norm": 0.5756199396825251, "learning_rate": 4.7396593673965936e-05, "loss": 0.6202, "step": 15090 }, { "epoch": 0.44072349308769215, "grad_norm": 0.5807434291536673, "learning_rate": 4.739389024060557e-05, "loss": 0.6517, "step": 15095 }, { "epoch": 0.4408694763580094, "grad_norm": 0.531421722398951, "learning_rate": 4.7391186807245204e-05, "loss": 0.6321, "step": 15100 }, { "epoch": 0.4410154596283266, "grad_norm": 0.5265379065308265, "learning_rate": 4.738848337388483e-05, "loss": 0.6299, "step": 15105 }, { "epoch": 0.44116144289864384, "grad_norm": 0.5217385484118593, "learning_rate": 4.7385779940524465e-05, "loss": 0.6132, "step": 15110 }, { "epoch": 0.441307426168961, "grad_norm": 0.5812968851261482, "learning_rate": 4.73830765071641e-05, "loss": 0.5943, "step": 15115 }, { "epoch": 0.44145340943927824, "grad_norm": 0.5374514046548683, "learning_rate": 4.738037307380373e-05, "loss": 0.5994, "step": 15120 }, { "epoch": 0.4415993927095955, "grad_norm": 0.5510964211281447, "learning_rate": 4.737766964044337e-05, "loss": 0.6459, "step": 15125 }, { "epoch": 0.4417453759799127, "grad_norm": 0.5473553478193733, "learning_rate": 4.7374966207083e-05, "loss": 0.5883, "step": 15130 }, { "epoch": 0.44189135925022993, "grad_norm": 0.5641722955934065, "learning_rate": 4.737226277372263e-05, "loss": 0.5955, "step": 15135 }, { "epoch": 0.44203734252054716, "grad_norm": 0.599332705910589, "learning_rate": 4.736955934036226e-05, "loss": 0.646, "step": 15140 }, { "epoch": 0.4421833257908644, "grad_norm": 0.6002782362507009, "learning_rate": 4.7366855907001896e-05, "loss": 0.6327, "step": 15145 }, { "epoch": 0.44232930906118156, "grad_norm": 0.5555091519892404, "learning_rate": 4.7364152473641524e-05, "loss": 0.6381, "step": 15150 }, { "epoch": 0.4424752923314988, "grad_norm": 0.54026183914682, "learning_rate": 4.736144904028116e-05, "loss": 0.6077, "step": 15155 }, { "epoch": 0.442621275601816, "grad_norm": 0.5389053888480392, "learning_rate": 4.735874560692079e-05, "loss": 0.595, "step": 15160 }, { "epoch": 0.44276725887213325, "grad_norm": 0.5789848383864082, "learning_rate": 4.735604217356042e-05, "loss": 0.6631, "step": 15165 }, { "epoch": 0.4429132421424505, "grad_norm": 0.5570329389851263, "learning_rate": 4.735333874020005e-05, "loss": 0.6228, "step": 15170 }, { "epoch": 0.4430592254127677, "grad_norm": 0.5677499809039627, "learning_rate": 4.7350635306839694e-05, "loss": 0.5981, "step": 15175 }, { "epoch": 0.44320520868308494, "grad_norm": 0.49744818769448645, "learning_rate": 4.734793187347932e-05, "loss": 0.5991, "step": 15180 }, { "epoch": 0.4433511919534021, "grad_norm": 0.6009032945324806, "learning_rate": 4.7345228440118955e-05, "loss": 0.6247, "step": 15185 }, { "epoch": 0.44349717522371934, "grad_norm": 0.5749816820252209, "learning_rate": 4.734252500675859e-05, "loss": 0.6532, "step": 15190 }, { "epoch": 0.44364315849403657, "grad_norm": 0.5485819173479449, "learning_rate": 4.7339821573398216e-05, "loss": 0.626, "step": 15195 }, { "epoch": 0.4437891417643538, "grad_norm": 0.5919646396095716, "learning_rate": 4.733711814003785e-05, "loss": 0.5969, "step": 15200 }, { "epoch": 0.44393512503467103, "grad_norm": 0.5431837218914498, "learning_rate": 4.7334414706677484e-05, "loss": 0.6055, "step": 15205 }, { "epoch": 0.44408110830498826, "grad_norm": 0.5035164042483142, "learning_rate": 4.733171127331711e-05, "loss": 0.5908, "step": 15210 }, { "epoch": 0.4442270915753055, "grad_norm": 0.5406009985516937, "learning_rate": 4.7329007839956745e-05, "loss": 0.6448, "step": 15215 }, { "epoch": 0.4443730748456227, "grad_norm": 0.5700195811298573, "learning_rate": 4.732630440659638e-05, "loss": 0.6046, "step": 15220 }, { "epoch": 0.4445190581159399, "grad_norm": 0.639486104657483, "learning_rate": 4.732360097323601e-05, "loss": 0.5954, "step": 15225 }, { "epoch": 0.4446650413862571, "grad_norm": 0.6566501163841857, "learning_rate": 4.732089753987564e-05, "loss": 0.6616, "step": 15230 }, { "epoch": 0.44481102465657435, "grad_norm": 0.549491903213852, "learning_rate": 4.731819410651528e-05, "loss": 0.5695, "step": 15235 }, { "epoch": 0.4449570079268916, "grad_norm": 0.5607182253131537, "learning_rate": 4.731549067315491e-05, "loss": 0.5885, "step": 15240 }, { "epoch": 0.4451029911972088, "grad_norm": 0.5455410564225572, "learning_rate": 4.731278723979454e-05, "loss": 0.6473, "step": 15245 }, { "epoch": 0.44524897446752604, "grad_norm": 0.5667293749230643, "learning_rate": 4.731008380643418e-05, "loss": 0.6197, "step": 15250 }, { "epoch": 0.44539495773784327, "grad_norm": 0.5349574339337645, "learning_rate": 4.7307380373073804e-05, "loss": 0.6184, "step": 15255 }, { "epoch": 0.44554094100816044, "grad_norm": 0.552431421366624, "learning_rate": 4.730467693971344e-05, "loss": 0.6353, "step": 15260 }, { "epoch": 0.44568692427847767, "grad_norm": 0.5064350398566154, "learning_rate": 4.730197350635307e-05, "loss": 0.6057, "step": 15265 }, { "epoch": 0.4458329075487949, "grad_norm": 0.5706968427564492, "learning_rate": 4.72992700729927e-05, "loss": 0.596, "step": 15270 }, { "epoch": 0.44597889081911213, "grad_norm": 0.5256465984065866, "learning_rate": 4.729656663963233e-05, "loss": 0.5993, "step": 15275 }, { "epoch": 0.44612487408942936, "grad_norm": 0.5192378033851354, "learning_rate": 4.729386320627197e-05, "loss": 0.6176, "step": 15280 }, { "epoch": 0.4462708573597466, "grad_norm": 0.5347363086752858, "learning_rate": 4.7291159772911594e-05, "loss": 0.6608, "step": 15285 }, { "epoch": 0.4464168406300638, "grad_norm": 0.5230463923405243, "learning_rate": 4.7288456339551235e-05, "loss": 0.6167, "step": 15290 }, { "epoch": 0.446562823900381, "grad_norm": 0.5464364657066977, "learning_rate": 4.728575290619087e-05, "loss": 0.6249, "step": 15295 }, { "epoch": 0.4467088071706982, "grad_norm": 0.5261397152615143, "learning_rate": 4.7283049472830496e-05, "loss": 0.62, "step": 15300 }, { "epoch": 0.44685479044101545, "grad_norm": 0.6209758720939637, "learning_rate": 4.728034603947013e-05, "loss": 0.6017, "step": 15305 }, { "epoch": 0.4470007737113327, "grad_norm": 0.6465779514312491, "learning_rate": 4.7277642606109765e-05, "loss": 0.6329, "step": 15310 }, { "epoch": 0.4471467569816499, "grad_norm": 0.5187160912758685, "learning_rate": 4.727493917274939e-05, "loss": 0.6189, "step": 15315 }, { "epoch": 0.44729274025196714, "grad_norm": 0.5564921109221025, "learning_rate": 4.7272235739389026e-05, "loss": 0.5919, "step": 15320 }, { "epoch": 0.44743872352228437, "grad_norm": 0.5628091261357, "learning_rate": 4.726953230602866e-05, "loss": 0.5777, "step": 15325 }, { "epoch": 0.4475847067926016, "grad_norm": 0.5925946715854808, "learning_rate": 4.726682887266829e-05, "loss": 0.6267, "step": 15330 }, { "epoch": 0.44773069006291877, "grad_norm": 0.5642438643580875, "learning_rate": 4.726412543930792e-05, "loss": 0.6262, "step": 15335 }, { "epoch": 0.447876673333236, "grad_norm": 0.5696343429888227, "learning_rate": 4.7261422005947555e-05, "loss": 0.6282, "step": 15340 }, { "epoch": 0.4480226566035532, "grad_norm": 0.5620041435373223, "learning_rate": 4.725871857258719e-05, "loss": 0.616, "step": 15345 }, { "epoch": 0.44816863987387046, "grad_norm": 0.57003207766856, "learning_rate": 4.725601513922682e-05, "loss": 0.6159, "step": 15350 }, { "epoch": 0.4483146231441877, "grad_norm": 0.5413359906830201, "learning_rate": 4.725331170586646e-05, "loss": 0.637, "step": 15355 }, { "epoch": 0.4484606064145049, "grad_norm": 0.5722811117823948, "learning_rate": 4.7250608272506084e-05, "loss": 0.6202, "step": 15360 }, { "epoch": 0.44860658968482214, "grad_norm": 0.5087793100204177, "learning_rate": 4.724790483914572e-05, "loss": 0.6059, "step": 15365 }, { "epoch": 0.4487525729551393, "grad_norm": 0.5397072737603043, "learning_rate": 4.724520140578535e-05, "loss": 0.6174, "step": 15370 }, { "epoch": 0.44889855622545655, "grad_norm": 0.5661603439571411, "learning_rate": 4.724249797242498e-05, "loss": 0.6216, "step": 15375 }, { "epoch": 0.4490445394957738, "grad_norm": 0.5871682648306185, "learning_rate": 4.7239794539064614e-05, "loss": 0.6072, "step": 15380 }, { "epoch": 0.449190522766091, "grad_norm": 0.5638585303135278, "learning_rate": 4.723709110570425e-05, "loss": 0.6113, "step": 15385 }, { "epoch": 0.44933650603640823, "grad_norm": 0.5532251482778174, "learning_rate": 4.7234387672343875e-05, "loss": 0.6206, "step": 15390 }, { "epoch": 0.44948248930672546, "grad_norm": 0.7570680756206447, "learning_rate": 4.723168423898351e-05, "loss": 0.6233, "step": 15395 }, { "epoch": 0.4496284725770427, "grad_norm": 0.5138196598231981, "learning_rate": 4.722898080562315e-05, "loss": 0.6131, "step": 15400 }, { "epoch": 0.44977445584735987, "grad_norm": 0.5487285636252611, "learning_rate": 4.722627737226278e-05, "loss": 0.6083, "step": 15405 }, { "epoch": 0.4499204391176771, "grad_norm": 0.5780523682793443, "learning_rate": 4.722357393890241e-05, "loss": 0.6451, "step": 15410 }, { "epoch": 0.4500664223879943, "grad_norm": 0.5549223188287931, "learning_rate": 4.7220870505542045e-05, "loss": 0.6236, "step": 15415 }, { "epoch": 0.45021240565831155, "grad_norm": 0.5498299281790255, "learning_rate": 4.721816707218167e-05, "loss": 0.6183, "step": 15420 }, { "epoch": 0.4503583889286288, "grad_norm": 0.5656845082927867, "learning_rate": 4.7215463638821306e-05, "loss": 0.6518, "step": 15425 }, { "epoch": 0.450504372198946, "grad_norm": 0.6366565270094132, "learning_rate": 4.721276020546094e-05, "loss": 0.6332, "step": 15430 }, { "epoch": 0.45065035546926324, "grad_norm": 0.5260753406781975, "learning_rate": 4.721005677210057e-05, "loss": 0.6186, "step": 15435 }, { "epoch": 0.45079633873958047, "grad_norm": 0.5200659048227143, "learning_rate": 4.72073533387402e-05, "loss": 0.5765, "step": 15440 }, { "epoch": 0.45094232200989764, "grad_norm": 0.5316151575545541, "learning_rate": 4.7204649905379835e-05, "loss": 0.5816, "step": 15445 }, { "epoch": 0.4510883052802149, "grad_norm": 0.5598949285230957, "learning_rate": 4.720194647201946e-05, "loss": 0.622, "step": 15450 }, { "epoch": 0.4512342885505321, "grad_norm": 0.5711139061484966, "learning_rate": 4.71992430386591e-05, "loss": 0.6002, "step": 15455 }, { "epoch": 0.45138027182084933, "grad_norm": 0.5202019951361655, "learning_rate": 4.719653960529874e-05, "loss": 0.6445, "step": 15460 }, { "epoch": 0.45152625509116656, "grad_norm": 0.5436875026481466, "learning_rate": 4.7193836171938365e-05, "loss": 0.6021, "step": 15465 }, { "epoch": 0.4516722383614838, "grad_norm": 0.5060585775031423, "learning_rate": 4.7191132738578e-05, "loss": 0.5772, "step": 15470 }, { "epoch": 0.451818221631801, "grad_norm": 0.720348907233305, "learning_rate": 4.718842930521763e-05, "loss": 0.5983, "step": 15475 }, { "epoch": 0.4519642049021182, "grad_norm": 0.5634930111145983, "learning_rate": 4.718572587185726e-05, "loss": 0.6385, "step": 15480 }, { "epoch": 0.4521101881724354, "grad_norm": 0.5372383574620718, "learning_rate": 4.7183022438496894e-05, "loss": 0.6145, "step": 15485 }, { "epoch": 0.45225617144275265, "grad_norm": 0.5042008766466329, "learning_rate": 4.718031900513653e-05, "loss": 0.5816, "step": 15490 }, { "epoch": 0.4524021547130699, "grad_norm": 0.5494861931203215, "learning_rate": 4.7177615571776155e-05, "loss": 0.5918, "step": 15495 }, { "epoch": 0.4525481379833871, "grad_norm": 0.5662358813611855, "learning_rate": 4.717491213841579e-05, "loss": 0.6189, "step": 15500 }, { "epoch": 0.45269412125370434, "grad_norm": 0.5612761240125937, "learning_rate": 4.717220870505542e-05, "loss": 0.6691, "step": 15505 }, { "epoch": 0.45284010452402157, "grad_norm": 0.6046111770847484, "learning_rate": 4.716950527169505e-05, "loss": 0.6283, "step": 15510 }, { "epoch": 0.45298608779433874, "grad_norm": 0.5389684646366926, "learning_rate": 4.716680183833469e-05, "loss": 0.5914, "step": 15515 }, { "epoch": 0.453132071064656, "grad_norm": 0.5518120051016374, "learning_rate": 4.716409840497432e-05, "loss": 0.6256, "step": 15520 }, { "epoch": 0.4532780543349732, "grad_norm": 0.5829072186451332, "learning_rate": 4.716139497161395e-05, "loss": 0.6146, "step": 15525 }, { "epoch": 0.45342403760529043, "grad_norm": 0.5641285944951848, "learning_rate": 4.7158691538253586e-05, "loss": 0.6129, "step": 15530 }, { "epoch": 0.45357002087560766, "grad_norm": 0.5377758781626534, "learning_rate": 4.715598810489322e-05, "loss": 0.6247, "step": 15535 }, { "epoch": 0.4537160041459249, "grad_norm": 0.5990590362603182, "learning_rate": 4.715328467153285e-05, "loss": 0.64, "step": 15540 }, { "epoch": 0.4538619874162421, "grad_norm": 0.5386331161554287, "learning_rate": 4.715058123817248e-05, "loss": 0.6024, "step": 15545 }, { "epoch": 0.45400797068655935, "grad_norm": 0.5454799894523141, "learning_rate": 4.7147877804812116e-05, "loss": 0.6086, "step": 15550 }, { "epoch": 0.4541539539568765, "grad_norm": 0.543075673757686, "learning_rate": 4.714517437145174e-05, "loss": 0.6169, "step": 15555 }, { "epoch": 0.45429993722719375, "grad_norm": 0.5378890919365913, "learning_rate": 4.714247093809138e-05, "loss": 0.6313, "step": 15560 }, { "epoch": 0.454445920497511, "grad_norm": 0.5330585985552906, "learning_rate": 4.713976750473101e-05, "loss": 0.6054, "step": 15565 }, { "epoch": 0.4545919037678282, "grad_norm": 0.5150075079239149, "learning_rate": 4.7137064071370645e-05, "loss": 0.6363, "step": 15570 }, { "epoch": 0.45473788703814544, "grad_norm": 0.5920492403460517, "learning_rate": 4.713436063801028e-05, "loss": 0.5957, "step": 15575 }, { "epoch": 0.45488387030846267, "grad_norm": 0.5282119092067251, "learning_rate": 4.7131657204649906e-05, "loss": 0.6216, "step": 15580 }, { "epoch": 0.4550298535787799, "grad_norm": 0.5299839541085969, "learning_rate": 4.712895377128954e-05, "loss": 0.6297, "step": 15585 }, { "epoch": 0.45517583684909707, "grad_norm": 0.6001720592716427, "learning_rate": 4.7126250337929174e-05, "loss": 0.6217, "step": 15590 }, { "epoch": 0.4553218201194143, "grad_norm": 0.5969921541378782, "learning_rate": 4.712354690456881e-05, "loss": 0.6134, "step": 15595 }, { "epoch": 0.45546780338973153, "grad_norm": 0.5809471665625311, "learning_rate": 4.7120843471208436e-05, "loss": 0.6143, "step": 15600 }, { "epoch": 0.45561378666004876, "grad_norm": 0.5717615291071074, "learning_rate": 4.711814003784807e-05, "loss": 0.6128, "step": 15605 }, { "epoch": 0.455759769930366, "grad_norm": 0.5119213868602907, "learning_rate": 4.7115436604487704e-05, "loss": 0.6332, "step": 15610 }, { "epoch": 0.4559057532006832, "grad_norm": 0.5788925115389725, "learning_rate": 4.711273317112733e-05, "loss": 0.6308, "step": 15615 }, { "epoch": 0.45605173647100045, "grad_norm": 0.5555378030259501, "learning_rate": 4.7110029737766965e-05, "loss": 0.6444, "step": 15620 }, { "epoch": 0.4561977197413176, "grad_norm": 0.5471198215725881, "learning_rate": 4.71073263044066e-05, "loss": 0.6048, "step": 15625 }, { "epoch": 0.45634370301163485, "grad_norm": 0.5525120209069239, "learning_rate": 4.710462287104623e-05, "loss": 0.6116, "step": 15630 }, { "epoch": 0.4564896862819521, "grad_norm": 0.6214254750538756, "learning_rate": 4.710191943768587e-05, "loss": 0.6138, "step": 15635 }, { "epoch": 0.4566356695522693, "grad_norm": 0.6001290872721866, "learning_rate": 4.7099216004325494e-05, "loss": 0.6099, "step": 15640 }, { "epoch": 0.45678165282258654, "grad_norm": 0.5525033340615649, "learning_rate": 4.709651257096513e-05, "loss": 0.6129, "step": 15645 }, { "epoch": 0.45692763609290377, "grad_norm": 0.5362994533216555, "learning_rate": 4.709380913760476e-05, "loss": 0.583, "step": 15650 }, { "epoch": 0.457073619363221, "grad_norm": 0.5827745373050004, "learning_rate": 4.709110570424439e-05, "loss": 0.6026, "step": 15655 }, { "epoch": 0.45721960263353817, "grad_norm": 0.5454085826474693, "learning_rate": 4.708840227088402e-05, "loss": 0.6291, "step": 15660 }, { "epoch": 0.4573655859038554, "grad_norm": 0.5539907090200854, "learning_rate": 4.708569883752366e-05, "loss": 0.5869, "step": 15665 }, { "epoch": 0.4575115691741726, "grad_norm": 0.4904539333354378, "learning_rate": 4.708299540416329e-05, "loss": 0.6311, "step": 15670 }, { "epoch": 0.45765755244448986, "grad_norm": 0.5507914209634887, "learning_rate": 4.708029197080292e-05, "loss": 0.5836, "step": 15675 }, { "epoch": 0.4578035357148071, "grad_norm": 0.5662277507811845, "learning_rate": 4.707758853744255e-05, "loss": 0.5935, "step": 15680 }, { "epoch": 0.4579495189851243, "grad_norm": 0.5748646588922051, "learning_rate": 4.707488510408219e-05, "loss": 0.5936, "step": 15685 }, { "epoch": 0.45809550225544154, "grad_norm": 0.5319514169455475, "learning_rate": 4.707218167072182e-05, "loss": 0.5889, "step": 15690 }, { "epoch": 0.4582414855257588, "grad_norm": 0.5850131848317295, "learning_rate": 4.7069478237361455e-05, "loss": 0.6175, "step": 15695 }, { "epoch": 0.45838746879607595, "grad_norm": 0.5537557125409989, "learning_rate": 4.706677480400108e-05, "loss": 0.6524, "step": 15700 }, { "epoch": 0.4585334520663932, "grad_norm": 0.5682081258154776, "learning_rate": 4.7064071370640716e-05, "loss": 0.5902, "step": 15705 }, { "epoch": 0.4586794353367104, "grad_norm": 0.5954861399326254, "learning_rate": 4.706136793728035e-05, "loss": 0.6051, "step": 15710 }, { "epoch": 0.45882541860702764, "grad_norm": 0.5790868334617216, "learning_rate": 4.705866450391998e-05, "loss": 0.6241, "step": 15715 }, { "epoch": 0.45897140187734486, "grad_norm": 0.5569720311501382, "learning_rate": 4.705596107055961e-05, "loss": 0.6576, "step": 15720 }, { "epoch": 0.4591173851476621, "grad_norm": 0.5656721874637045, "learning_rate": 4.7053257637199245e-05, "loss": 0.6252, "step": 15725 }, { "epoch": 0.4592633684179793, "grad_norm": 0.5314098684585375, "learning_rate": 4.705055420383888e-05, "loss": 0.5918, "step": 15730 }, { "epoch": 0.4594093516882965, "grad_norm": 0.6084686088580945, "learning_rate": 4.7047850770478506e-05, "loss": 0.6069, "step": 15735 }, { "epoch": 0.4595553349586137, "grad_norm": 0.5265888125630561, "learning_rate": 4.704514733711815e-05, "loss": 0.6255, "step": 15740 }, { "epoch": 0.45970131822893096, "grad_norm": 0.5466123390579329, "learning_rate": 4.7042443903757774e-05, "loss": 0.6191, "step": 15745 }, { "epoch": 0.4598473014992482, "grad_norm": 0.5514626259167926, "learning_rate": 4.703974047039741e-05, "loss": 0.6052, "step": 15750 }, { "epoch": 0.4599932847695654, "grad_norm": 0.5734246812173655, "learning_rate": 4.703703703703704e-05, "loss": 0.6577, "step": 15755 }, { "epoch": 0.46013926803988264, "grad_norm": 0.5973051239199958, "learning_rate": 4.703433360367667e-05, "loss": 0.6351, "step": 15760 }, { "epoch": 0.46028525131019987, "grad_norm": 0.5201472790322383, "learning_rate": 4.7031630170316304e-05, "loss": 0.6118, "step": 15765 }, { "epoch": 0.46043123458051705, "grad_norm": 0.5404520076435908, "learning_rate": 4.702892673695594e-05, "loss": 0.5968, "step": 15770 }, { "epoch": 0.4605772178508343, "grad_norm": 0.5312106034575116, "learning_rate": 4.7026223303595565e-05, "loss": 0.6107, "step": 15775 }, { "epoch": 0.4607232011211515, "grad_norm": 0.5592142567060285, "learning_rate": 4.70235198702352e-05, "loss": 0.6127, "step": 15780 }, { "epoch": 0.46086918439146873, "grad_norm": 0.5737657119400131, "learning_rate": 4.702081643687483e-05, "loss": 0.6549, "step": 15785 }, { "epoch": 0.46101516766178596, "grad_norm": 0.5486699551827647, "learning_rate": 4.701811300351446e-05, "loss": 0.6181, "step": 15790 }, { "epoch": 0.4611611509321032, "grad_norm": 0.534200252891127, "learning_rate": 4.7015409570154094e-05, "loss": 0.577, "step": 15795 }, { "epoch": 0.4613071342024204, "grad_norm": 0.5590245221715597, "learning_rate": 4.7012706136793735e-05, "loss": 0.5988, "step": 15800 }, { "epoch": 0.46145311747273765, "grad_norm": 0.5477556819460847, "learning_rate": 4.701000270343336e-05, "loss": 0.6289, "step": 15805 }, { "epoch": 0.4615991007430548, "grad_norm": 0.5383885346532785, "learning_rate": 4.7007299270072996e-05, "loss": 0.6317, "step": 15810 }, { "epoch": 0.46174508401337205, "grad_norm": 0.5518543724175755, "learning_rate": 4.700459583671263e-05, "loss": 0.589, "step": 15815 }, { "epoch": 0.4618910672836893, "grad_norm": 0.6046732497939296, "learning_rate": 4.700189240335226e-05, "loss": 0.6474, "step": 15820 }, { "epoch": 0.4620370505540065, "grad_norm": 0.5487672347137477, "learning_rate": 4.699918896999189e-05, "loss": 0.5659, "step": 15825 }, { "epoch": 0.46218303382432374, "grad_norm": 0.83287553232491, "learning_rate": 4.6996485536631526e-05, "loss": 0.6407, "step": 15830 }, { "epoch": 0.46232901709464097, "grad_norm": 0.5361693369507567, "learning_rate": 4.699378210327115e-05, "loss": 0.5811, "step": 15835 }, { "epoch": 0.4624750003649582, "grad_norm": 0.5160312980765558, "learning_rate": 4.699107866991079e-05, "loss": 0.5873, "step": 15840 }, { "epoch": 0.4626209836352754, "grad_norm": 0.5411362249683649, "learning_rate": 4.698837523655042e-05, "loss": 0.5812, "step": 15845 }, { "epoch": 0.4627669669055926, "grad_norm": 0.5733041342325128, "learning_rate": 4.698567180319005e-05, "loss": 0.5853, "step": 15850 }, { "epoch": 0.46291295017590983, "grad_norm": 0.5329862682273919, "learning_rate": 4.698296836982969e-05, "loss": 0.5933, "step": 15855 }, { "epoch": 0.46305893344622706, "grad_norm": 0.5535174241509717, "learning_rate": 4.698026493646932e-05, "loss": 0.5935, "step": 15860 }, { "epoch": 0.4632049167165443, "grad_norm": 0.6121605230013536, "learning_rate": 4.697756150310895e-05, "loss": 0.6318, "step": 15865 }, { "epoch": 0.4633508999868615, "grad_norm": 0.5136010419997492, "learning_rate": 4.6974858069748584e-05, "loss": 0.617, "step": 15870 }, { "epoch": 0.46349688325717875, "grad_norm": 0.6007578012027794, "learning_rate": 4.697215463638822e-05, "loss": 0.6378, "step": 15875 }, { "epoch": 0.4636428665274959, "grad_norm": 0.5065441603029421, "learning_rate": 4.6969451203027845e-05, "loss": 0.5984, "step": 15880 }, { "epoch": 0.46378884979781315, "grad_norm": 0.5124585757895133, "learning_rate": 4.696674776966748e-05, "loss": 0.5786, "step": 15885 }, { "epoch": 0.4639348330681304, "grad_norm": 0.5935445331071425, "learning_rate": 4.696404433630711e-05, "loss": 0.6165, "step": 15890 }, { "epoch": 0.4640808163384476, "grad_norm": 0.5870426451824831, "learning_rate": 4.696134090294674e-05, "loss": 0.6024, "step": 15895 }, { "epoch": 0.46422679960876484, "grad_norm": 0.5797026738724984, "learning_rate": 4.6958637469586375e-05, "loss": 0.6404, "step": 15900 }, { "epoch": 0.46437278287908207, "grad_norm": 0.551143377510831, "learning_rate": 4.695593403622601e-05, "loss": 0.586, "step": 15905 }, { "epoch": 0.4645187661493993, "grad_norm": 0.5152764233493159, "learning_rate": 4.695323060286564e-05, "loss": 0.6201, "step": 15910 }, { "epoch": 0.4646647494197165, "grad_norm": 0.5487264188154376, "learning_rate": 4.6950527169505277e-05, "loss": 0.6129, "step": 15915 }, { "epoch": 0.4648107326900337, "grad_norm": 0.5154144441545331, "learning_rate": 4.694782373614491e-05, "loss": 0.6069, "step": 15920 }, { "epoch": 0.46495671596035093, "grad_norm": 0.6016844188712887, "learning_rate": 4.694512030278454e-05, "loss": 0.6105, "step": 15925 }, { "epoch": 0.46510269923066816, "grad_norm": 0.5576486949840727, "learning_rate": 4.694241686942417e-05, "loss": 0.6192, "step": 15930 }, { "epoch": 0.4652486825009854, "grad_norm": 0.5599899590130991, "learning_rate": 4.6939713436063806e-05, "loss": 0.5842, "step": 15935 }, { "epoch": 0.4653946657713026, "grad_norm": 0.5693755065293508, "learning_rate": 4.693701000270343e-05, "loss": 0.6077, "step": 15940 }, { "epoch": 0.46554064904161985, "grad_norm": 0.5296903537824138, "learning_rate": 4.693430656934307e-05, "loss": 0.5912, "step": 15945 }, { "epoch": 0.4656866323119371, "grad_norm": 0.6439675128339943, "learning_rate": 4.69316031359827e-05, "loss": 0.6073, "step": 15950 }, { "epoch": 0.46583261558225425, "grad_norm": 0.5834437849423337, "learning_rate": 4.692889970262233e-05, "loss": 0.6371, "step": 15955 }, { "epoch": 0.4659785988525715, "grad_norm": 0.5618834639346143, "learning_rate": 4.692619626926196e-05, "loss": 0.6138, "step": 15960 }, { "epoch": 0.4661245821228887, "grad_norm": 0.546218702242409, "learning_rate": 4.6923492835901596e-05, "loss": 0.5859, "step": 15965 }, { "epoch": 0.46627056539320594, "grad_norm": 0.5525193815334988, "learning_rate": 4.692078940254123e-05, "loss": 0.6076, "step": 15970 }, { "epoch": 0.46641654866352317, "grad_norm": 0.6635850008138243, "learning_rate": 4.6918085969180864e-05, "loss": 0.6332, "step": 15975 }, { "epoch": 0.4665625319338404, "grad_norm": 0.584976120012212, "learning_rate": 4.69153825358205e-05, "loss": 0.6144, "step": 15980 }, { "epoch": 0.4667085152041576, "grad_norm": 0.5372376712944634, "learning_rate": 4.6912679102460126e-05, "loss": 0.5989, "step": 15985 }, { "epoch": 0.4668544984744748, "grad_norm": 0.5849985452804795, "learning_rate": 4.690997566909976e-05, "loss": 0.6102, "step": 15990 }, { "epoch": 0.46700048174479203, "grad_norm": 0.5630519709527289, "learning_rate": 4.6907272235739394e-05, "loss": 0.6065, "step": 15995 }, { "epoch": 0.46714646501510926, "grad_norm": 0.5309100396080108, "learning_rate": 4.690456880237902e-05, "loss": 0.5873, "step": 16000 }, { "epoch": 0.4672924482854265, "grad_norm": 0.5661700345357988, "learning_rate": 4.6901865369018655e-05, "loss": 0.6082, "step": 16005 }, { "epoch": 0.4674384315557437, "grad_norm": 0.5578651073826456, "learning_rate": 4.689916193565829e-05, "loss": 0.6514, "step": 16010 }, { "epoch": 0.46758441482606095, "grad_norm": 0.5526457346594763, "learning_rate": 4.6896458502297916e-05, "loss": 0.6127, "step": 16015 }, { "epoch": 0.4677303980963782, "grad_norm": 0.5388878221842137, "learning_rate": 4.689375506893755e-05, "loss": 0.626, "step": 16020 }, { "epoch": 0.4678763813666954, "grad_norm": 0.6143797117587122, "learning_rate": 4.689105163557719e-05, "loss": 0.6334, "step": 16025 }, { "epoch": 0.4680223646370126, "grad_norm": 0.5542945252256575, "learning_rate": 4.688834820221682e-05, "loss": 0.628, "step": 16030 }, { "epoch": 0.4681683479073298, "grad_norm": 0.5292452257922104, "learning_rate": 4.688564476885645e-05, "loss": 0.5984, "step": 16035 }, { "epoch": 0.46831433117764704, "grad_norm": 0.5469936052624728, "learning_rate": 4.6882941335496086e-05, "loss": 0.6318, "step": 16040 }, { "epoch": 0.46846031444796427, "grad_norm": 0.6468912024432483, "learning_rate": 4.6880237902135713e-05, "loss": 0.6657, "step": 16045 }, { "epoch": 0.4686062977182815, "grad_norm": 0.5251976147112034, "learning_rate": 4.687753446877535e-05, "loss": 0.6101, "step": 16050 }, { "epoch": 0.4687522809885987, "grad_norm": 0.5458265209600057, "learning_rate": 4.687483103541498e-05, "loss": 0.58, "step": 16055 }, { "epoch": 0.46889826425891595, "grad_norm": 0.6236739152866784, "learning_rate": 4.687212760205461e-05, "loss": 0.6032, "step": 16060 }, { "epoch": 0.4690442475292331, "grad_norm": 0.543132068284204, "learning_rate": 4.686942416869424e-05, "loss": 0.603, "step": 16065 }, { "epoch": 0.46919023079955036, "grad_norm": 0.4955388691061977, "learning_rate": 4.686672073533388e-05, "loss": 0.6115, "step": 16070 }, { "epoch": 0.4693362140698676, "grad_norm": 0.5790995888307855, "learning_rate": 4.6864017301973504e-05, "loss": 0.6139, "step": 16075 }, { "epoch": 0.4694821973401848, "grad_norm": 0.5607499791301838, "learning_rate": 4.6861313868613145e-05, "loss": 0.6506, "step": 16080 }, { "epoch": 0.46962818061050204, "grad_norm": 0.5493112846748571, "learning_rate": 4.685861043525278e-05, "loss": 0.6477, "step": 16085 }, { "epoch": 0.4697741638808193, "grad_norm": 0.5706092955129195, "learning_rate": 4.6855907001892406e-05, "loss": 0.635, "step": 16090 }, { "epoch": 0.4699201471511365, "grad_norm": 0.5327315380035479, "learning_rate": 4.685320356853204e-05, "loss": 0.6472, "step": 16095 }, { "epoch": 0.4700661304214537, "grad_norm": 0.5685729111766955, "learning_rate": 4.6850500135171674e-05, "loss": 0.5975, "step": 16100 }, { "epoch": 0.4702121136917709, "grad_norm": 0.5387252073597887, "learning_rate": 4.68477967018113e-05, "loss": 0.6179, "step": 16105 }, { "epoch": 0.47035809696208813, "grad_norm": 0.5258576202248014, "learning_rate": 4.6845093268450935e-05, "loss": 0.5965, "step": 16110 }, { "epoch": 0.47050408023240536, "grad_norm": 0.5168155111630452, "learning_rate": 4.684238983509057e-05, "loss": 0.5986, "step": 16115 }, { "epoch": 0.4706500635027226, "grad_norm": 0.6947708605808995, "learning_rate": 4.6839686401730197e-05, "loss": 0.6227, "step": 16120 }, { "epoch": 0.4707960467730398, "grad_norm": 0.5672202897858298, "learning_rate": 4.683698296836983e-05, "loss": 0.6202, "step": 16125 }, { "epoch": 0.47094203004335705, "grad_norm": 0.5273882168862528, "learning_rate": 4.6834279535009465e-05, "loss": 0.6219, "step": 16130 }, { "epoch": 0.4710880133136742, "grad_norm": 0.5712844614666192, "learning_rate": 4.683157610164909e-05, "loss": 0.6036, "step": 16135 }, { "epoch": 0.47123399658399145, "grad_norm": 0.6111079571098611, "learning_rate": 4.682887266828873e-05, "loss": 0.6351, "step": 16140 }, { "epoch": 0.4713799798543087, "grad_norm": 0.541007526184854, "learning_rate": 4.682616923492836e-05, "loss": 0.6162, "step": 16145 }, { "epoch": 0.4715259631246259, "grad_norm": 0.521545310243873, "learning_rate": 4.6823465801567994e-05, "loss": 0.6222, "step": 16150 }, { "epoch": 0.47167194639494314, "grad_norm": 0.5612439965972084, "learning_rate": 4.682076236820763e-05, "loss": 0.636, "step": 16155 }, { "epoch": 0.47181792966526037, "grad_norm": 0.5538329538687076, "learning_rate": 4.681805893484726e-05, "loss": 0.6142, "step": 16160 }, { "epoch": 0.4719639129355776, "grad_norm": 0.48459571194787654, "learning_rate": 4.681535550148689e-05, "loss": 0.6071, "step": 16165 }, { "epoch": 0.47210989620589483, "grad_norm": 0.5813897964419782, "learning_rate": 4.681265206812652e-05, "loss": 0.599, "step": 16170 }, { "epoch": 0.472255879476212, "grad_norm": 0.5166405089326996, "learning_rate": 4.680994863476616e-05, "loss": 0.6006, "step": 16175 }, { "epoch": 0.47240186274652923, "grad_norm": 0.5510317720793136, "learning_rate": 4.6807245201405784e-05, "loss": 0.6206, "step": 16180 }, { "epoch": 0.47254784601684646, "grad_norm": 0.5970015333857877, "learning_rate": 4.680454176804542e-05, "loss": 0.6219, "step": 16185 }, { "epoch": 0.4726938292871637, "grad_norm": 0.561037257051168, "learning_rate": 4.680183833468505e-05, "loss": 0.6102, "step": 16190 }, { "epoch": 0.4728398125574809, "grad_norm": 0.5689321509493627, "learning_rate": 4.6799134901324686e-05, "loss": 0.6103, "step": 16195 }, { "epoch": 0.47298579582779815, "grad_norm": 0.6156068469805882, "learning_rate": 4.679643146796432e-05, "loss": 0.5982, "step": 16200 }, { "epoch": 0.4731317790981154, "grad_norm": 0.5512131249022849, "learning_rate": 4.679372803460395e-05, "loss": 0.596, "step": 16205 }, { "epoch": 0.47327776236843255, "grad_norm": 0.5442497759449095, "learning_rate": 4.679102460124358e-05, "loss": 0.6161, "step": 16210 }, { "epoch": 0.4734237456387498, "grad_norm": 0.49522025220808036, "learning_rate": 4.6788321167883216e-05, "loss": 0.5949, "step": 16215 }, { "epoch": 0.473569728909067, "grad_norm": 0.5362415218814496, "learning_rate": 4.678561773452285e-05, "loss": 0.6075, "step": 16220 }, { "epoch": 0.47371571217938424, "grad_norm": 0.531934120295828, "learning_rate": 4.678291430116248e-05, "loss": 0.6337, "step": 16225 }, { "epoch": 0.47386169544970147, "grad_norm": 0.5322619176159046, "learning_rate": 4.678021086780211e-05, "loss": 0.6026, "step": 16230 }, { "epoch": 0.4740076787200187, "grad_norm": 0.5201468836575386, "learning_rate": 4.6777507434441745e-05, "loss": 0.6231, "step": 16235 }, { "epoch": 0.47415366199033593, "grad_norm": 0.5554420697123588, "learning_rate": 4.677480400108137e-05, "loss": 0.608, "step": 16240 }, { "epoch": 0.4742996452606531, "grad_norm": 0.5656648450013787, "learning_rate": 4.6772100567721006e-05, "loss": 0.637, "step": 16245 }, { "epoch": 0.47444562853097033, "grad_norm": 0.5374415238262449, "learning_rate": 4.676939713436064e-05, "loss": 0.6233, "step": 16250 }, { "epoch": 0.47459161180128756, "grad_norm": 0.5453467705393706, "learning_rate": 4.6766693701000274e-05, "loss": 0.6222, "step": 16255 }, { "epoch": 0.4747375950716048, "grad_norm": 0.5611112694123417, "learning_rate": 4.676399026763991e-05, "loss": 0.6119, "step": 16260 }, { "epoch": 0.474883578341922, "grad_norm": 0.5561265467687111, "learning_rate": 4.6761286834279535e-05, "loss": 0.6703, "step": 16265 }, { "epoch": 0.47502956161223925, "grad_norm": 0.5400843791009509, "learning_rate": 4.675858340091917e-05, "loss": 0.6025, "step": 16270 }, { "epoch": 0.4751755448825565, "grad_norm": 0.5706260307982078, "learning_rate": 4.6755879967558803e-05, "loss": 0.6458, "step": 16275 }, { "epoch": 0.4753215281528737, "grad_norm": 0.5612188715114244, "learning_rate": 4.675317653419843e-05, "loss": 0.6104, "step": 16280 }, { "epoch": 0.4754675114231909, "grad_norm": 0.6321560460580287, "learning_rate": 4.6750473100838065e-05, "loss": 0.618, "step": 16285 }, { "epoch": 0.4756134946935081, "grad_norm": 0.6397755798305966, "learning_rate": 4.67477696674777e-05, "loss": 0.6184, "step": 16290 }, { "epoch": 0.47575947796382534, "grad_norm": 0.5616661548770581, "learning_rate": 4.674506623411733e-05, "loss": 0.6098, "step": 16295 }, { "epoch": 0.47590546123414257, "grad_norm": 0.5721039345813077, "learning_rate": 4.674236280075696e-05, "loss": 0.6341, "step": 16300 }, { "epoch": 0.4760514445044598, "grad_norm": 0.5334671858515345, "learning_rate": 4.6739659367396594e-05, "loss": 0.6074, "step": 16305 }, { "epoch": 0.476197427774777, "grad_norm": 0.5614368934315533, "learning_rate": 4.673695593403623e-05, "loss": 0.622, "step": 16310 }, { "epoch": 0.47634341104509426, "grad_norm": 0.5697832817986224, "learning_rate": 4.673425250067586e-05, "loss": 0.6337, "step": 16315 }, { "epoch": 0.47648939431541143, "grad_norm": 0.6127279608289158, "learning_rate": 4.6731549067315496e-05, "loss": 0.6206, "step": 16320 }, { "epoch": 0.47663537758572866, "grad_norm": 0.5894527536572378, "learning_rate": 4.672884563395512e-05, "loss": 0.634, "step": 16325 }, { "epoch": 0.4767813608560459, "grad_norm": 0.5311194839019544, "learning_rate": 4.672614220059476e-05, "loss": 0.6074, "step": 16330 }, { "epoch": 0.4769273441263631, "grad_norm": 0.5422769123804063, "learning_rate": 4.672343876723439e-05, "loss": 0.6302, "step": 16335 }, { "epoch": 0.47707332739668035, "grad_norm": 0.5438902284296591, "learning_rate": 4.672073533387402e-05, "loss": 0.5729, "step": 16340 }, { "epoch": 0.4772193106669976, "grad_norm": 0.5329961478560854, "learning_rate": 4.671803190051365e-05, "loss": 0.5923, "step": 16345 }, { "epoch": 0.4773652939373148, "grad_norm": 0.566767223875555, "learning_rate": 4.6715328467153287e-05, "loss": 0.6182, "step": 16350 }, { "epoch": 0.477511277207632, "grad_norm": 0.5690780187515713, "learning_rate": 4.671262503379292e-05, "loss": 0.595, "step": 16355 }, { "epoch": 0.4776572604779492, "grad_norm": 0.5253660816965695, "learning_rate": 4.670992160043255e-05, "loss": 0.6021, "step": 16360 }, { "epoch": 0.47780324374826644, "grad_norm": 0.4977742088736971, "learning_rate": 4.670721816707219e-05, "loss": 0.6142, "step": 16365 }, { "epoch": 0.47794922701858367, "grad_norm": 0.5920708109877112, "learning_rate": 4.6704514733711816e-05, "loss": 0.6506, "step": 16370 }, { "epoch": 0.4780952102889009, "grad_norm": 0.5145938815877322, "learning_rate": 4.670181130035145e-05, "loss": 0.6109, "step": 16375 }, { "epoch": 0.4782411935592181, "grad_norm": 0.5541499494316863, "learning_rate": 4.6699107866991084e-05, "loss": 0.6677, "step": 16380 }, { "epoch": 0.47838717682953535, "grad_norm": 0.5410252123137367, "learning_rate": 4.669640443363071e-05, "loss": 0.5862, "step": 16385 }, { "epoch": 0.4785331600998526, "grad_norm": 0.5936483565514383, "learning_rate": 4.6693701000270345e-05, "loss": 0.6303, "step": 16390 }, { "epoch": 0.47867914337016976, "grad_norm": 0.6038113549907862, "learning_rate": 4.669099756690998e-05, "loss": 0.6218, "step": 16395 }, { "epoch": 0.478825126640487, "grad_norm": 0.4952092739121793, "learning_rate": 4.6688294133549606e-05, "loss": 0.6228, "step": 16400 }, { "epoch": 0.4789711099108042, "grad_norm": 0.5540974580328555, "learning_rate": 4.668559070018924e-05, "loss": 0.6447, "step": 16405 }, { "epoch": 0.47911709318112145, "grad_norm": 0.5755186210237012, "learning_rate": 4.6682887266828874e-05, "loss": 0.6114, "step": 16410 }, { "epoch": 0.4792630764514387, "grad_norm": 0.550886181815762, "learning_rate": 4.66801838334685e-05, "loss": 0.5856, "step": 16415 }, { "epoch": 0.4794090597217559, "grad_norm": 0.5742642186872743, "learning_rate": 4.667748040010814e-05, "loss": 0.6394, "step": 16420 }, { "epoch": 0.47955504299207313, "grad_norm": 0.5575765867586104, "learning_rate": 4.6674776966747776e-05, "loss": 0.636, "step": 16425 }, { "epoch": 0.4797010262623903, "grad_norm": 0.5501312182242082, "learning_rate": 4.6672073533387404e-05, "loss": 0.5998, "step": 16430 }, { "epoch": 0.47984700953270754, "grad_norm": 0.5302250074860058, "learning_rate": 4.666937010002704e-05, "loss": 0.5915, "step": 16435 }, { "epoch": 0.47999299280302477, "grad_norm": 0.5662099718316581, "learning_rate": 4.666666666666667e-05, "loss": 0.5919, "step": 16440 }, { "epoch": 0.480138976073342, "grad_norm": 0.5101808192104135, "learning_rate": 4.66639632333063e-05, "loss": 0.6152, "step": 16445 }, { "epoch": 0.4802849593436592, "grad_norm": 0.6107037360001379, "learning_rate": 4.666125979994593e-05, "loss": 0.5959, "step": 16450 }, { "epoch": 0.48043094261397645, "grad_norm": 0.5326363915175557, "learning_rate": 4.665855636658557e-05, "loss": 0.5783, "step": 16455 }, { "epoch": 0.4805769258842937, "grad_norm": 0.5376708435066619, "learning_rate": 4.6655852933225194e-05, "loss": 0.5607, "step": 16460 }, { "epoch": 0.48072290915461086, "grad_norm": 0.5382945470745575, "learning_rate": 4.665314949986483e-05, "loss": 0.6397, "step": 16465 }, { "epoch": 0.4808688924249281, "grad_norm": 0.5557978169114938, "learning_rate": 4.665044606650446e-05, "loss": 0.6459, "step": 16470 }, { "epoch": 0.4810148756952453, "grad_norm": 0.5141368717391156, "learning_rate": 4.664774263314409e-05, "loss": 0.5823, "step": 16475 }, { "epoch": 0.48116085896556254, "grad_norm": 0.5977387372772145, "learning_rate": 4.664503919978373e-05, "loss": 0.6175, "step": 16480 }, { "epoch": 0.4813068422358798, "grad_norm": 0.5397649303249606, "learning_rate": 4.6642335766423364e-05, "loss": 0.625, "step": 16485 }, { "epoch": 0.481452825506197, "grad_norm": 0.5193418914189558, "learning_rate": 4.663963233306299e-05, "loss": 0.6396, "step": 16490 }, { "epoch": 0.48159880877651423, "grad_norm": 0.5330659026974429, "learning_rate": 4.6636928899702625e-05, "loss": 0.5669, "step": 16495 }, { "epoch": 0.48174479204683146, "grad_norm": 0.5852731597454871, "learning_rate": 4.663422546634226e-05, "loss": 0.6349, "step": 16500 }, { "epoch": 0.48189077531714863, "grad_norm": 0.5697992429560276, "learning_rate": 4.663152203298189e-05, "loss": 0.5852, "step": 16505 }, { "epoch": 0.48203675858746586, "grad_norm": 0.5224976135542594, "learning_rate": 4.662881859962152e-05, "loss": 0.5933, "step": 16510 }, { "epoch": 0.4821827418577831, "grad_norm": 0.5231093604480924, "learning_rate": 4.6626115166261155e-05, "loss": 0.6078, "step": 16515 }, { "epoch": 0.4823287251281003, "grad_norm": 0.493374682721493, "learning_rate": 4.662341173290078e-05, "loss": 0.5516, "step": 16520 }, { "epoch": 0.48247470839841755, "grad_norm": 0.5795284504836861, "learning_rate": 4.6620708299540416e-05, "loss": 0.5966, "step": 16525 }, { "epoch": 0.4826206916687348, "grad_norm": 0.5104618520162245, "learning_rate": 4.661800486618005e-05, "loss": 0.5763, "step": 16530 }, { "epoch": 0.482766674939052, "grad_norm": 0.5220947814002097, "learning_rate": 4.6615301432819684e-05, "loss": 0.6223, "step": 16535 }, { "epoch": 0.4829126582093692, "grad_norm": 0.5673930808827273, "learning_rate": 4.661259799945932e-05, "loss": 0.6359, "step": 16540 }, { "epoch": 0.4830586414796864, "grad_norm": 0.5563744696150158, "learning_rate": 4.660989456609895e-05, "loss": 0.6418, "step": 16545 }, { "epoch": 0.48320462475000364, "grad_norm": 0.5514605154064898, "learning_rate": 4.660719113273858e-05, "loss": 0.5841, "step": 16550 }, { "epoch": 0.48335060802032087, "grad_norm": 0.5397745211750822, "learning_rate": 4.660448769937821e-05, "loss": 0.636, "step": 16555 }, { "epoch": 0.4834965912906381, "grad_norm": 0.6318420850848462, "learning_rate": 4.660178426601785e-05, "loss": 0.6026, "step": 16560 }, { "epoch": 0.48364257456095533, "grad_norm": 0.5720789098304183, "learning_rate": 4.6599080832657474e-05, "loss": 0.6324, "step": 16565 }, { "epoch": 0.48378855783127256, "grad_norm": 0.5297086520579678, "learning_rate": 4.659637739929711e-05, "loss": 0.6198, "step": 16570 }, { "epoch": 0.48393454110158973, "grad_norm": 0.528390089218424, "learning_rate": 4.659367396593674e-05, "loss": 0.638, "step": 16575 }, { "epoch": 0.48408052437190696, "grad_norm": 0.5735682427966877, "learning_rate": 4.659097053257637e-05, "loss": 0.6236, "step": 16580 }, { "epoch": 0.4842265076422242, "grad_norm": 0.5702738928634796, "learning_rate": 4.6588267099216004e-05, "loss": 0.6691, "step": 16585 }, { "epoch": 0.4843724909125414, "grad_norm": 0.5431803666221396, "learning_rate": 4.6585563665855645e-05, "loss": 0.6336, "step": 16590 }, { "epoch": 0.48451847418285865, "grad_norm": 0.5789885980536128, "learning_rate": 4.658286023249527e-05, "loss": 0.6189, "step": 16595 }, { "epoch": 0.4846644574531759, "grad_norm": 0.5597757906297751, "learning_rate": 4.6580156799134906e-05, "loss": 0.6031, "step": 16600 }, { "epoch": 0.4848104407234931, "grad_norm": 0.5707474614458734, "learning_rate": 4.657745336577454e-05, "loss": 0.5951, "step": 16605 }, { "epoch": 0.4849564239938103, "grad_norm": 0.5766954883416442, "learning_rate": 4.657474993241417e-05, "loss": 0.598, "step": 16610 }, { "epoch": 0.4851024072641275, "grad_norm": 0.6012629769097461, "learning_rate": 4.65720464990538e-05, "loss": 0.574, "step": 16615 }, { "epoch": 0.48524839053444474, "grad_norm": 0.5323285999580412, "learning_rate": 4.6569343065693435e-05, "loss": 0.5814, "step": 16620 }, { "epoch": 0.48539437380476197, "grad_norm": 0.5613908360945232, "learning_rate": 4.656663963233306e-05, "loss": 0.5942, "step": 16625 }, { "epoch": 0.4855403570750792, "grad_norm": 0.4987368233312124, "learning_rate": 4.6563936198972696e-05, "loss": 0.6032, "step": 16630 }, { "epoch": 0.48568634034539643, "grad_norm": 0.8179714732266726, "learning_rate": 4.656123276561233e-05, "loss": 0.6289, "step": 16635 }, { "epoch": 0.48583232361571366, "grad_norm": 0.5214850446475455, "learning_rate": 4.655852933225196e-05, "loss": 0.6149, "step": 16640 }, { "epoch": 0.4859783068860309, "grad_norm": 0.5688002053082798, "learning_rate": 4.655582589889159e-05, "loss": 0.6169, "step": 16645 }, { "epoch": 0.48612429015634806, "grad_norm": 0.5481733526624336, "learning_rate": 4.655312246553123e-05, "loss": 0.6243, "step": 16650 }, { "epoch": 0.4862702734266653, "grad_norm": 0.5067694727970572, "learning_rate": 4.655041903217086e-05, "loss": 0.5828, "step": 16655 }, { "epoch": 0.4864162566969825, "grad_norm": 0.5046630735361853, "learning_rate": 4.6547715598810494e-05, "loss": 0.5969, "step": 16660 }, { "epoch": 0.48656223996729975, "grad_norm": 0.5799267981671673, "learning_rate": 4.654501216545013e-05, "loss": 0.6012, "step": 16665 }, { "epoch": 0.486708223237617, "grad_norm": 0.6120763105129645, "learning_rate": 4.6542308732089755e-05, "loss": 0.6358, "step": 16670 }, { "epoch": 0.4868542065079342, "grad_norm": 0.5235368530786301, "learning_rate": 4.653960529872939e-05, "loss": 0.6004, "step": 16675 }, { "epoch": 0.48700018977825144, "grad_norm": 0.5631769312856789, "learning_rate": 4.653690186536902e-05, "loss": 0.6279, "step": 16680 }, { "epoch": 0.4871461730485686, "grad_norm": 0.5450115414745881, "learning_rate": 4.653419843200865e-05, "loss": 0.5988, "step": 16685 }, { "epoch": 0.48729215631888584, "grad_norm": 0.5355601760791703, "learning_rate": 4.6531494998648284e-05, "loss": 0.6397, "step": 16690 }, { "epoch": 0.48743813958920307, "grad_norm": 0.549874254823354, "learning_rate": 4.652879156528792e-05, "loss": 0.6114, "step": 16695 }, { "epoch": 0.4875841228595203, "grad_norm": 0.557799362159355, "learning_rate": 4.6526088131927545e-05, "loss": 0.6014, "step": 16700 }, { "epoch": 0.4877301061298375, "grad_norm": 0.5154648758871827, "learning_rate": 4.6523384698567186e-05, "loss": 0.5779, "step": 16705 }, { "epoch": 0.48787608940015476, "grad_norm": 0.5173114261290371, "learning_rate": 4.652068126520682e-05, "loss": 0.5661, "step": 16710 }, { "epoch": 0.488022072670472, "grad_norm": 0.6028114715362921, "learning_rate": 4.651797783184645e-05, "loss": 0.6145, "step": 16715 }, { "epoch": 0.48816805594078916, "grad_norm": 0.5790209251185404, "learning_rate": 4.651527439848608e-05, "loss": 0.5725, "step": 16720 }, { "epoch": 0.4883140392111064, "grad_norm": 0.6041495472185747, "learning_rate": 4.6512570965125715e-05, "loss": 0.572, "step": 16725 }, { "epoch": 0.4884600224814236, "grad_norm": 0.5336589142151101, "learning_rate": 4.650986753176534e-05, "loss": 0.5907, "step": 16730 }, { "epoch": 0.48860600575174085, "grad_norm": 0.5235487895215197, "learning_rate": 4.650716409840498e-05, "loss": 0.6189, "step": 16735 }, { "epoch": 0.4887519890220581, "grad_norm": 0.5490964036605158, "learning_rate": 4.650446066504461e-05, "loss": 0.6454, "step": 16740 }, { "epoch": 0.4888979722923753, "grad_norm": 0.5632830044255733, "learning_rate": 4.650175723168424e-05, "loss": 0.608, "step": 16745 }, { "epoch": 0.48904395556269253, "grad_norm": 0.5608471996584113, "learning_rate": 4.649905379832387e-05, "loss": 0.6398, "step": 16750 }, { "epoch": 0.48918993883300976, "grad_norm": 0.5891352056011373, "learning_rate": 4.6496350364963506e-05, "loss": 0.608, "step": 16755 }, { "epoch": 0.48933592210332694, "grad_norm": 0.5788698173155645, "learning_rate": 4.649364693160314e-05, "loss": 0.6386, "step": 16760 }, { "epoch": 0.48948190537364417, "grad_norm": 0.5267418473112014, "learning_rate": 4.6490943498242774e-05, "loss": 0.6078, "step": 16765 }, { "epoch": 0.4896278886439614, "grad_norm": 0.5398781325660236, "learning_rate": 4.648824006488241e-05, "loss": 0.5848, "step": 16770 }, { "epoch": 0.4897738719142786, "grad_norm": 0.545569929317801, "learning_rate": 4.6485536631522035e-05, "loss": 0.6002, "step": 16775 }, { "epoch": 0.48991985518459585, "grad_norm": 0.5843381893915467, "learning_rate": 4.648283319816167e-05, "loss": 0.6303, "step": 16780 }, { "epoch": 0.4900658384549131, "grad_norm": 0.502655572650046, "learning_rate": 4.64801297648013e-05, "loss": 0.6125, "step": 16785 }, { "epoch": 0.4902118217252303, "grad_norm": 0.5519121999575003, "learning_rate": 4.647742633144093e-05, "loss": 0.601, "step": 16790 }, { "epoch": 0.4903578049955475, "grad_norm": 0.5881643386544755, "learning_rate": 4.6474722898080564e-05, "loss": 0.6333, "step": 16795 }, { "epoch": 0.4905037882658647, "grad_norm": 0.5612430605480988, "learning_rate": 4.64720194647202e-05, "loss": 0.6013, "step": 16800 }, { "epoch": 0.49064977153618194, "grad_norm": 0.5726209063678781, "learning_rate": 4.6469316031359826e-05, "loss": 0.5995, "step": 16805 }, { "epoch": 0.4907957548064992, "grad_norm": 0.6132485402645615, "learning_rate": 4.646661259799946e-05, "loss": 0.6098, "step": 16810 }, { "epoch": 0.4909417380768164, "grad_norm": 0.5566813818911035, "learning_rate": 4.6463909164639094e-05, "loss": 0.5864, "step": 16815 }, { "epoch": 0.49108772134713363, "grad_norm": 0.5457564420895977, "learning_rate": 4.646120573127873e-05, "loss": 0.6046, "step": 16820 }, { "epoch": 0.49123370461745086, "grad_norm": 0.6000224310864479, "learning_rate": 4.645850229791836e-05, "loss": 0.657, "step": 16825 }, { "epoch": 0.49137968788776804, "grad_norm": 0.5303186564803849, "learning_rate": 4.645579886455799e-05, "loss": 0.6263, "step": 16830 }, { "epoch": 0.49152567115808526, "grad_norm": 0.49894877155346873, "learning_rate": 4.645309543119762e-05, "loss": 0.6006, "step": 16835 }, { "epoch": 0.4916716544284025, "grad_norm": 0.5136047838171374, "learning_rate": 4.645039199783726e-05, "loss": 0.5769, "step": 16840 }, { "epoch": 0.4918176376987197, "grad_norm": 0.5187079758847501, "learning_rate": 4.644768856447689e-05, "loss": 0.6324, "step": 16845 }, { "epoch": 0.49196362096903695, "grad_norm": 0.49869374945129197, "learning_rate": 4.644498513111652e-05, "loss": 0.6204, "step": 16850 }, { "epoch": 0.4921096042393542, "grad_norm": 0.5022799100264047, "learning_rate": 4.644228169775615e-05, "loss": 0.5895, "step": 16855 }, { "epoch": 0.4922555875096714, "grad_norm": 0.5482835546699515, "learning_rate": 4.6439578264395786e-05, "loss": 0.6263, "step": 16860 }, { "epoch": 0.49240157077998864, "grad_norm": 0.5345693425241459, "learning_rate": 4.6436874831035413e-05, "loss": 0.6176, "step": 16865 }, { "epoch": 0.4925475540503058, "grad_norm": 0.5462907479220529, "learning_rate": 4.643417139767505e-05, "loss": 0.5895, "step": 16870 }, { "epoch": 0.49269353732062304, "grad_norm": 0.5840497927254815, "learning_rate": 4.643146796431468e-05, "loss": 0.6276, "step": 16875 }, { "epoch": 0.49283952059094027, "grad_norm": 0.5540547922781122, "learning_rate": 4.6428764530954316e-05, "loss": 0.6338, "step": 16880 }, { "epoch": 0.4929855038612575, "grad_norm": 0.5823547511183615, "learning_rate": 4.642606109759395e-05, "loss": 0.6084, "step": 16885 }, { "epoch": 0.49313148713157473, "grad_norm": 0.546922256874686, "learning_rate": 4.642335766423358e-05, "loss": 0.6545, "step": 16890 }, { "epoch": 0.49327747040189196, "grad_norm": 0.5919135475992182, "learning_rate": 4.642065423087321e-05, "loss": 0.5661, "step": 16895 }, { "epoch": 0.4934234536722092, "grad_norm": 0.5876122439646296, "learning_rate": 4.6417950797512845e-05, "loss": 0.6079, "step": 16900 }, { "epoch": 0.49356943694252636, "grad_norm": 0.5600210074525301, "learning_rate": 4.641524736415248e-05, "loss": 0.5913, "step": 16905 }, { "epoch": 0.4937154202128436, "grad_norm": 0.5186253956908681, "learning_rate": 4.6412543930792106e-05, "loss": 0.5791, "step": 16910 }, { "epoch": 0.4938614034831608, "grad_norm": 0.5741871208331442, "learning_rate": 4.640984049743174e-05, "loss": 0.6156, "step": 16915 }, { "epoch": 0.49400738675347805, "grad_norm": 0.6145886842840658, "learning_rate": 4.6407137064071374e-05, "loss": 0.6299, "step": 16920 }, { "epoch": 0.4941533700237953, "grad_norm": 0.5736795110895258, "learning_rate": 4.6404433630711e-05, "loss": 0.6406, "step": 16925 }, { "epoch": 0.4942993532941125, "grad_norm": 0.5049524851453295, "learning_rate": 4.640173019735064e-05, "loss": 0.5837, "step": 16930 }, { "epoch": 0.49444533656442974, "grad_norm": 0.5329324724063282, "learning_rate": 4.639902676399027e-05, "loss": 0.6056, "step": 16935 }, { "epoch": 0.4945913198347469, "grad_norm": 0.5638936063094359, "learning_rate": 4.63963233306299e-05, "loss": 0.6024, "step": 16940 }, { "epoch": 0.49473730310506414, "grad_norm": 0.5620839288801913, "learning_rate": 4.639361989726954e-05, "loss": 0.6017, "step": 16945 }, { "epoch": 0.49488328637538137, "grad_norm": 0.545384570867283, "learning_rate": 4.6390916463909165e-05, "loss": 0.5758, "step": 16950 }, { "epoch": 0.4950292696456986, "grad_norm": 0.5087764280003269, "learning_rate": 4.63882130305488e-05, "loss": 0.6258, "step": 16955 }, { "epoch": 0.49517525291601583, "grad_norm": 0.5568851303803399, "learning_rate": 4.638550959718843e-05, "loss": 0.5848, "step": 16960 }, { "epoch": 0.49532123618633306, "grad_norm": 0.5612536858030446, "learning_rate": 4.638280616382806e-05, "loss": 0.6305, "step": 16965 }, { "epoch": 0.4954672194566503, "grad_norm": 0.5825372886819901, "learning_rate": 4.6380102730467694e-05, "loss": 0.6458, "step": 16970 }, { "epoch": 0.4956132027269675, "grad_norm": 0.6200084904873533, "learning_rate": 4.637739929710733e-05, "loss": 0.6096, "step": 16975 }, { "epoch": 0.4957591859972847, "grad_norm": 0.5544755815010598, "learning_rate": 4.637469586374696e-05, "loss": 0.628, "step": 16980 }, { "epoch": 0.4959051692676019, "grad_norm": 0.48782334597349136, "learning_rate": 4.637199243038659e-05, "loss": 0.6044, "step": 16985 }, { "epoch": 0.49605115253791915, "grad_norm": 0.5301694243008461, "learning_rate": 4.636928899702623e-05, "loss": 0.5977, "step": 16990 }, { "epoch": 0.4961971358082364, "grad_norm": 0.5419451216695622, "learning_rate": 4.636658556366586e-05, "loss": 0.5975, "step": 16995 }, { "epoch": 0.4963431190785536, "grad_norm": 0.5359323889345654, "learning_rate": 4.636388213030549e-05, "loss": 0.6254, "step": 17000 }, { "epoch": 0.49648910234887084, "grad_norm": 0.5311955617688976, "learning_rate": 4.6361178696945125e-05, "loss": 0.586, "step": 17005 }, { "epoch": 0.49663508561918807, "grad_norm": 0.593632747590338, "learning_rate": 4.635847526358475e-05, "loss": 0.676, "step": 17010 }, { "epoch": 0.49678106888950524, "grad_norm": 0.5647024702304798, "learning_rate": 4.6355771830224386e-05, "loss": 0.612, "step": 17015 }, { "epoch": 0.49692705215982247, "grad_norm": 0.5438621681160035, "learning_rate": 4.635306839686402e-05, "loss": 0.6242, "step": 17020 }, { "epoch": 0.4970730354301397, "grad_norm": 0.5330771080365081, "learning_rate": 4.635036496350365e-05, "loss": 0.621, "step": 17025 }, { "epoch": 0.4972190187004569, "grad_norm": 0.4906610844715358, "learning_rate": 4.634766153014328e-05, "loss": 0.6134, "step": 17030 }, { "epoch": 0.49736500197077416, "grad_norm": 0.47893419139393634, "learning_rate": 4.6344958096782916e-05, "loss": 0.5688, "step": 17035 }, { "epoch": 0.4975109852410914, "grad_norm": 0.5106037691104442, "learning_rate": 4.634225466342254e-05, "loss": 0.5764, "step": 17040 }, { "epoch": 0.4976569685114086, "grad_norm": 0.5588192768930121, "learning_rate": 4.6339551230062184e-05, "loss": 0.6243, "step": 17045 }, { "epoch": 0.4978029517817258, "grad_norm": 0.5281454251995449, "learning_rate": 4.633684779670182e-05, "loss": 0.6182, "step": 17050 }, { "epoch": 0.497948935052043, "grad_norm": 0.5784565132551404, "learning_rate": 4.6334144363341445e-05, "loss": 0.6221, "step": 17055 }, { "epoch": 0.49809491832236025, "grad_norm": 0.6490969097138366, "learning_rate": 4.633144092998108e-05, "loss": 0.601, "step": 17060 }, { "epoch": 0.4982409015926775, "grad_norm": 0.5205721464805123, "learning_rate": 4.632873749662071e-05, "loss": 0.5951, "step": 17065 }, { "epoch": 0.4983868848629947, "grad_norm": 0.5325099674031899, "learning_rate": 4.632603406326034e-05, "loss": 0.627, "step": 17070 }, { "epoch": 0.49853286813331193, "grad_norm": 0.5029247826661273, "learning_rate": 4.6323330629899974e-05, "loss": 0.5865, "step": 17075 }, { "epoch": 0.49867885140362916, "grad_norm": 0.5358040330056364, "learning_rate": 4.632062719653961e-05, "loss": 0.6184, "step": 17080 }, { "epoch": 0.4988248346739464, "grad_norm": 0.5311172057410292, "learning_rate": 4.6317923763179235e-05, "loss": 0.631, "step": 17085 }, { "epoch": 0.49897081794426357, "grad_norm": 0.5539693897898266, "learning_rate": 4.631522032981887e-05, "loss": 0.5991, "step": 17090 }, { "epoch": 0.4991168012145808, "grad_norm": 0.5928164912827334, "learning_rate": 4.6312516896458503e-05, "loss": 0.5928, "step": 17095 }, { "epoch": 0.499262784484898, "grad_norm": 0.5335359755850916, "learning_rate": 4.630981346309814e-05, "loss": 0.5934, "step": 17100 }, { "epoch": 0.49940876775521525, "grad_norm": 0.49167648875576764, "learning_rate": 4.630711002973777e-05, "loss": 0.5881, "step": 17105 }, { "epoch": 0.4995547510255325, "grad_norm": 0.5260818975491866, "learning_rate": 4.6304406596377406e-05, "loss": 0.6069, "step": 17110 }, { "epoch": 0.4997007342958497, "grad_norm": 0.4943475535951897, "learning_rate": 4.630170316301703e-05, "loss": 0.6155, "step": 17115 }, { "epoch": 0.49984671756616694, "grad_norm": 0.5449236916591248, "learning_rate": 4.629899972965667e-05, "loss": 0.6132, "step": 17120 }, { "epoch": 0.4999927008364841, "grad_norm": 0.5723634755357243, "learning_rate": 4.62962962962963e-05, "loss": 0.6296, "step": 17125 }, { "epoch": 0.5001386841068014, "grad_norm": 0.6727415060214523, "learning_rate": 4.629359286293593e-05, "loss": 0.6046, "step": 17130 }, { "epoch": 0.5002846673771186, "grad_norm": 0.6202962256064387, "learning_rate": 4.629088942957556e-05, "loss": 0.6079, "step": 17135 }, { "epoch": 0.5004306506474357, "grad_norm": 0.5338638709274104, "learning_rate": 4.6288185996215196e-05, "loss": 0.6312, "step": 17140 }, { "epoch": 0.500576633917753, "grad_norm": 0.5441117572584347, "learning_rate": 4.628548256285482e-05, "loss": 0.6273, "step": 17145 }, { "epoch": 0.5007226171880702, "grad_norm": 0.5428033993550863, "learning_rate": 4.628277912949446e-05, "loss": 0.6122, "step": 17150 }, { "epoch": 0.5008686004583874, "grad_norm": 0.5353359038502242, "learning_rate": 4.628007569613409e-05, "loss": 0.6361, "step": 17155 }, { "epoch": 0.5010145837287047, "grad_norm": 0.5885627900386893, "learning_rate": 4.6277372262773725e-05, "loss": 0.6123, "step": 17160 }, { "epoch": 0.5011605669990219, "grad_norm": 0.5623380590739617, "learning_rate": 4.627466882941336e-05, "loss": 0.61, "step": 17165 }, { "epoch": 0.5013065502693391, "grad_norm": 0.563716596664982, "learning_rate": 4.627196539605299e-05, "loss": 0.6115, "step": 17170 }, { "epoch": 0.5014525335396564, "grad_norm": 0.5068730763324885, "learning_rate": 4.626926196269262e-05, "loss": 0.6038, "step": 17175 }, { "epoch": 0.5015985168099736, "grad_norm": 0.5206778434869347, "learning_rate": 4.6266558529332255e-05, "loss": 0.6063, "step": 17180 }, { "epoch": 0.5017445000802908, "grad_norm": 0.5682142114686005, "learning_rate": 4.626385509597189e-05, "loss": 0.6028, "step": 17185 }, { "epoch": 0.501890483350608, "grad_norm": 0.5098951888212562, "learning_rate": 4.6261151662611516e-05, "loss": 0.6194, "step": 17190 }, { "epoch": 0.5020364666209253, "grad_norm": 0.7453708365982028, "learning_rate": 4.625844822925115e-05, "loss": 0.5766, "step": 17195 }, { "epoch": 0.5021824498912425, "grad_norm": 0.5563754958295535, "learning_rate": 4.6255744795890784e-05, "loss": 0.6112, "step": 17200 }, { "epoch": 0.5023284331615597, "grad_norm": 0.5036189185821675, "learning_rate": 4.625304136253041e-05, "loss": 0.5896, "step": 17205 }, { "epoch": 0.502474416431877, "grad_norm": 0.5007149160915846, "learning_rate": 4.6250337929170045e-05, "loss": 0.6028, "step": 17210 }, { "epoch": 0.5026203997021941, "grad_norm": 0.5233127713782786, "learning_rate": 4.6247634495809686e-05, "loss": 0.5985, "step": 17215 }, { "epoch": 0.5027663829725113, "grad_norm": 0.6811566299538412, "learning_rate": 4.624493106244931e-05, "loss": 0.6663, "step": 17220 }, { "epoch": 0.5029123662428285, "grad_norm": 0.5442043200794732, "learning_rate": 4.624222762908895e-05, "loss": 0.6108, "step": 17225 }, { "epoch": 0.5030583495131458, "grad_norm": 0.5239600775021512, "learning_rate": 4.623952419572858e-05, "loss": 0.626, "step": 17230 }, { "epoch": 0.503204332783463, "grad_norm": 0.5280195023988898, "learning_rate": 4.623682076236821e-05, "loss": 0.6092, "step": 17235 }, { "epoch": 0.5033503160537802, "grad_norm": 0.5268539640863878, "learning_rate": 4.623411732900784e-05, "loss": 0.5956, "step": 17240 }, { "epoch": 0.5034962993240975, "grad_norm": 1.1976308241609146, "learning_rate": 4.6231413895647476e-05, "loss": 0.6351, "step": 17245 }, { "epoch": 0.5036422825944147, "grad_norm": 0.5250112441588537, "learning_rate": 4.6228710462287104e-05, "loss": 0.6182, "step": 17250 }, { "epoch": 0.5037882658647319, "grad_norm": 0.5551221985508004, "learning_rate": 4.622600702892674e-05, "loss": 0.6037, "step": 17255 }, { "epoch": 0.5039342491350491, "grad_norm": 0.546052677428141, "learning_rate": 4.622330359556637e-05, "loss": 0.6021, "step": 17260 }, { "epoch": 0.5040802324053664, "grad_norm": 0.5712683148702355, "learning_rate": 4.6220600162206e-05, "loss": 0.6278, "step": 17265 }, { "epoch": 0.5042262156756836, "grad_norm": 0.5584500097165142, "learning_rate": 4.621789672884564e-05, "loss": 0.6017, "step": 17270 }, { "epoch": 0.5043721989460008, "grad_norm": 0.5487522461673299, "learning_rate": 4.6215193295485274e-05, "loss": 0.6145, "step": 17275 }, { "epoch": 0.5045181822163181, "grad_norm": 0.5465563509637834, "learning_rate": 4.62124898621249e-05, "loss": 0.6522, "step": 17280 }, { "epoch": 0.5046641654866353, "grad_norm": 0.522536825611309, "learning_rate": 4.6209786428764535e-05, "loss": 0.6, "step": 17285 }, { "epoch": 0.5048101487569524, "grad_norm": 0.48748490378821646, "learning_rate": 4.620708299540417e-05, "loss": 0.5952, "step": 17290 }, { "epoch": 0.5049561320272696, "grad_norm": 0.4921866433448234, "learning_rate": 4.6204379562043796e-05, "loss": 0.5906, "step": 17295 }, { "epoch": 0.5051021152975869, "grad_norm": 0.5187358612895876, "learning_rate": 4.620167612868343e-05, "loss": 0.5999, "step": 17300 }, { "epoch": 0.5052480985679041, "grad_norm": 0.5570235398147041, "learning_rate": 4.6198972695323064e-05, "loss": 0.6186, "step": 17305 }, { "epoch": 0.5053940818382213, "grad_norm": 0.5823527092590742, "learning_rate": 4.619626926196269e-05, "loss": 0.6253, "step": 17310 }, { "epoch": 0.5055400651085385, "grad_norm": 0.552217443804009, "learning_rate": 4.6193565828602325e-05, "loss": 0.5939, "step": 17315 }, { "epoch": 0.5056860483788558, "grad_norm": 0.5980300339055814, "learning_rate": 4.619086239524196e-05, "loss": 0.6239, "step": 17320 }, { "epoch": 0.505832031649173, "grad_norm": 0.558533975441857, "learning_rate": 4.618815896188159e-05, "loss": 0.6001, "step": 17325 }, { "epoch": 0.5059780149194902, "grad_norm": 0.5451445837550073, "learning_rate": 4.618545552852123e-05, "loss": 0.6412, "step": 17330 }, { "epoch": 0.5061239981898075, "grad_norm": 0.5632284848421168, "learning_rate": 4.618275209516086e-05, "loss": 0.6013, "step": 17335 }, { "epoch": 0.5062699814601247, "grad_norm": 0.5270583024196583, "learning_rate": 4.618004866180049e-05, "loss": 0.6124, "step": 17340 }, { "epoch": 0.5064159647304419, "grad_norm": 0.4890548156553372, "learning_rate": 4.617734522844012e-05, "loss": 0.581, "step": 17345 }, { "epoch": 0.5065619480007592, "grad_norm": 0.5354940469616136, "learning_rate": 4.617464179507976e-05, "loss": 0.604, "step": 17350 }, { "epoch": 0.5067079312710764, "grad_norm": 0.5948861488713441, "learning_rate": 4.6171938361719384e-05, "loss": 0.6257, "step": 17355 }, { "epoch": 0.5068539145413935, "grad_norm": 0.5032483565171315, "learning_rate": 4.616923492835902e-05, "loss": 0.6143, "step": 17360 }, { "epoch": 0.5069998978117107, "grad_norm": 0.5286900217643943, "learning_rate": 4.616653149499865e-05, "loss": 0.6266, "step": 17365 }, { "epoch": 0.507145881082028, "grad_norm": 0.6276722453378392, "learning_rate": 4.616382806163828e-05, "loss": 0.624, "step": 17370 }, { "epoch": 0.5072918643523452, "grad_norm": 0.5430405290625364, "learning_rate": 4.616112462827791e-05, "loss": 0.6296, "step": 17375 }, { "epoch": 0.5074378476226624, "grad_norm": 0.5256577754356891, "learning_rate": 4.615842119491755e-05, "loss": 0.6125, "step": 17380 }, { "epoch": 0.5075838308929796, "grad_norm": 0.5338162174466479, "learning_rate": 4.615571776155718e-05, "loss": 0.6036, "step": 17385 }, { "epoch": 0.5077298141632969, "grad_norm": 0.569450403632422, "learning_rate": 4.6153014328196815e-05, "loss": 0.5973, "step": 17390 }, { "epoch": 0.5078757974336141, "grad_norm": 0.5366034919664591, "learning_rate": 4.615031089483645e-05, "loss": 0.5942, "step": 17395 }, { "epoch": 0.5080217807039313, "grad_norm": 0.49345207935366087, "learning_rate": 4.6147607461476077e-05, "loss": 0.6026, "step": 17400 }, { "epoch": 0.5081677639742486, "grad_norm": 0.5599964645771868, "learning_rate": 4.614490402811571e-05, "loss": 0.6197, "step": 17405 }, { "epoch": 0.5083137472445658, "grad_norm": 0.6165053280192326, "learning_rate": 4.6142200594755345e-05, "loss": 0.614, "step": 17410 }, { "epoch": 0.508459730514883, "grad_norm": 0.5302647733309572, "learning_rate": 4.613949716139497e-05, "loss": 0.6453, "step": 17415 }, { "epoch": 0.5086057137852003, "grad_norm": 0.4970351984516791, "learning_rate": 4.6136793728034606e-05, "loss": 0.6016, "step": 17420 }, { "epoch": 0.5087516970555175, "grad_norm": 0.5731603946542199, "learning_rate": 4.613409029467424e-05, "loss": 0.6276, "step": 17425 }, { "epoch": 0.5088976803258347, "grad_norm": 0.5350094748149674, "learning_rate": 4.613138686131387e-05, "loss": 0.595, "step": 17430 }, { "epoch": 0.5090436635961518, "grad_norm": 0.5142250165193423, "learning_rate": 4.61286834279535e-05, "loss": 0.6374, "step": 17435 }, { "epoch": 0.5091896468664691, "grad_norm": 0.5073043969992144, "learning_rate": 4.6125979994593135e-05, "loss": 0.6233, "step": 17440 }, { "epoch": 0.5093356301367863, "grad_norm": 0.5329074412465812, "learning_rate": 4.612327656123277e-05, "loss": 0.5951, "step": 17445 }, { "epoch": 0.5094816134071035, "grad_norm": 0.5886793992956307, "learning_rate": 4.61205731278724e-05, "loss": 0.6261, "step": 17450 }, { "epoch": 0.5096275966774207, "grad_norm": 0.5653650042172803, "learning_rate": 4.611786969451203e-05, "loss": 0.6161, "step": 17455 }, { "epoch": 0.509773579947738, "grad_norm": 0.508840435600891, "learning_rate": 4.6115166261151664e-05, "loss": 0.5981, "step": 17460 }, { "epoch": 0.5099195632180552, "grad_norm": 0.5249137145216677, "learning_rate": 4.61124628277913e-05, "loss": 0.634, "step": 17465 }, { "epoch": 0.5100655464883724, "grad_norm": 0.553312401762271, "learning_rate": 4.610975939443093e-05, "loss": 0.6213, "step": 17470 }, { "epoch": 0.5102115297586897, "grad_norm": 0.639476669570359, "learning_rate": 4.610705596107056e-05, "loss": 0.6573, "step": 17475 }, { "epoch": 0.5103575130290069, "grad_norm": 0.5288494295798151, "learning_rate": 4.6104352527710194e-05, "loss": 0.5845, "step": 17480 }, { "epoch": 0.5105034962993241, "grad_norm": 0.5898911581838512, "learning_rate": 4.610164909434983e-05, "loss": 0.6114, "step": 17485 }, { "epoch": 0.5106494795696414, "grad_norm": 0.5323451920516473, "learning_rate": 4.6098945660989455e-05, "loss": 0.639, "step": 17490 }, { "epoch": 0.5107954628399586, "grad_norm": 0.5230043427934977, "learning_rate": 4.609624222762909e-05, "loss": 0.6085, "step": 17495 }, { "epoch": 0.5109414461102758, "grad_norm": 0.5262716339626886, "learning_rate": 4.609353879426872e-05, "loss": 0.6207, "step": 17500 }, { "epoch": 0.511087429380593, "grad_norm": 0.5391382541448773, "learning_rate": 4.609083536090836e-05, "loss": 0.5981, "step": 17505 }, { "epoch": 0.5112334126509102, "grad_norm": 0.5185067040296433, "learning_rate": 4.608813192754799e-05, "loss": 0.6061, "step": 17510 }, { "epoch": 0.5113793959212274, "grad_norm": 0.4984077503965733, "learning_rate": 4.608542849418762e-05, "loss": 0.5554, "step": 17515 }, { "epoch": 0.5115253791915446, "grad_norm": 0.5230442190927751, "learning_rate": 4.608272506082725e-05, "loss": 0.5655, "step": 17520 }, { "epoch": 0.5116713624618618, "grad_norm": 0.5291647862995698, "learning_rate": 4.6080021627466886e-05, "loss": 0.6391, "step": 17525 }, { "epoch": 0.5118173457321791, "grad_norm": 0.5117922746745421, "learning_rate": 4.607731819410652e-05, "loss": 0.6125, "step": 17530 }, { "epoch": 0.5119633290024963, "grad_norm": 0.5768777514604257, "learning_rate": 4.607461476074615e-05, "loss": 0.6113, "step": 17535 }, { "epoch": 0.5121093122728135, "grad_norm": 0.5342751918684056, "learning_rate": 4.607191132738578e-05, "loss": 0.6263, "step": 17540 }, { "epoch": 0.5122552955431308, "grad_norm": 0.5297774109479831, "learning_rate": 4.6069207894025415e-05, "loss": 0.5973, "step": 17545 }, { "epoch": 0.512401278813448, "grad_norm": 0.5398855664524839, "learning_rate": 4.606650446066504e-05, "loss": 0.6152, "step": 17550 }, { "epoch": 0.5125472620837652, "grad_norm": 0.5690326908114647, "learning_rate": 4.6063801027304683e-05, "loss": 0.6128, "step": 17555 }, { "epoch": 0.5126932453540824, "grad_norm": 0.5686524292299043, "learning_rate": 4.606109759394431e-05, "loss": 0.6017, "step": 17560 }, { "epoch": 0.5128392286243997, "grad_norm": 0.5532330273549457, "learning_rate": 4.6058394160583945e-05, "loss": 0.5809, "step": 17565 }, { "epoch": 0.5129852118947169, "grad_norm": 0.5485557096637803, "learning_rate": 4.605569072722358e-05, "loss": 0.6178, "step": 17570 }, { "epoch": 0.5131311951650341, "grad_norm": 0.5662659942452479, "learning_rate": 4.6052987293863206e-05, "loss": 0.6011, "step": 17575 }, { "epoch": 0.5132771784353513, "grad_norm": 0.5328815450040824, "learning_rate": 4.605028386050284e-05, "loss": 0.6038, "step": 17580 }, { "epoch": 0.5134231617056685, "grad_norm": 0.5515691402548247, "learning_rate": 4.6047580427142474e-05, "loss": 0.5992, "step": 17585 }, { "epoch": 0.5135691449759857, "grad_norm": 0.5387891550805294, "learning_rate": 4.60448769937821e-05, "loss": 0.5974, "step": 17590 }, { "epoch": 0.5137151282463029, "grad_norm": 0.5874981512096928, "learning_rate": 4.6042173560421735e-05, "loss": 0.6155, "step": 17595 }, { "epoch": 0.5138611115166202, "grad_norm": 0.4998480146370833, "learning_rate": 4.603947012706137e-05, "loss": 0.576, "step": 17600 }, { "epoch": 0.5140070947869374, "grad_norm": 0.5074870205741471, "learning_rate": 4.6036766693701e-05, "loss": 0.62, "step": 17605 }, { "epoch": 0.5141530780572546, "grad_norm": 0.5499618050555979, "learning_rate": 4.603406326034064e-05, "loss": 0.5809, "step": 17610 }, { "epoch": 0.5142990613275719, "grad_norm": 0.5545023506391402, "learning_rate": 4.603135982698027e-05, "loss": 0.6245, "step": 17615 }, { "epoch": 0.5144450445978891, "grad_norm": 0.497066345349247, "learning_rate": 4.60286563936199e-05, "loss": 0.5844, "step": 17620 }, { "epoch": 0.5145910278682063, "grad_norm": 0.7027309131511855, "learning_rate": 4.602595296025953e-05, "loss": 0.6404, "step": 17625 }, { "epoch": 0.5147370111385235, "grad_norm": 0.5333561288727651, "learning_rate": 4.6023249526899167e-05, "loss": 0.5771, "step": 17630 }, { "epoch": 0.5148829944088408, "grad_norm": 0.5153690866819969, "learning_rate": 4.6020546093538794e-05, "loss": 0.5814, "step": 17635 }, { "epoch": 0.515028977679158, "grad_norm": 0.5325392940836462, "learning_rate": 4.601784266017843e-05, "loss": 0.6157, "step": 17640 }, { "epoch": 0.5151749609494752, "grad_norm": 0.5239589529233041, "learning_rate": 4.601513922681806e-05, "loss": 0.6004, "step": 17645 }, { "epoch": 0.5153209442197925, "grad_norm": 0.5564196351344042, "learning_rate": 4.601243579345769e-05, "loss": 0.5984, "step": 17650 }, { "epoch": 0.5154669274901096, "grad_norm": 0.5263987201808064, "learning_rate": 4.600973236009732e-05, "loss": 0.581, "step": 17655 }, { "epoch": 0.5156129107604268, "grad_norm": 0.5954220140145401, "learning_rate": 4.600702892673696e-05, "loss": 0.6489, "step": 17660 }, { "epoch": 0.515758894030744, "grad_norm": 0.5481187142076669, "learning_rate": 4.600432549337659e-05, "loss": 0.6084, "step": 17665 }, { "epoch": 0.5159048773010613, "grad_norm": 0.5241575486300385, "learning_rate": 4.6001622060016225e-05, "loss": 0.6142, "step": 17670 }, { "epoch": 0.5160508605713785, "grad_norm": 0.5742275421739631, "learning_rate": 4.599891862665586e-05, "loss": 0.6225, "step": 17675 }, { "epoch": 0.5161968438416957, "grad_norm": 0.5764639931607461, "learning_rate": 4.5996215193295486e-05, "loss": 0.6058, "step": 17680 }, { "epoch": 0.516342827112013, "grad_norm": 0.54829786609596, "learning_rate": 4.599351175993512e-05, "loss": 0.5691, "step": 17685 }, { "epoch": 0.5164888103823302, "grad_norm": 0.5391860107248938, "learning_rate": 4.5990808326574754e-05, "loss": 0.6091, "step": 17690 }, { "epoch": 0.5166347936526474, "grad_norm": 0.5416441810011436, "learning_rate": 4.598810489321438e-05, "loss": 0.5817, "step": 17695 }, { "epoch": 0.5167807769229646, "grad_norm": 0.5870568834350448, "learning_rate": 4.5985401459854016e-05, "loss": 0.6365, "step": 17700 }, { "epoch": 0.5169267601932819, "grad_norm": 0.582964322573314, "learning_rate": 4.598269802649365e-05, "loss": 0.6448, "step": 17705 }, { "epoch": 0.5170727434635991, "grad_norm": 0.632778545939507, "learning_rate": 4.597999459313328e-05, "loss": 0.5991, "step": 17710 }, { "epoch": 0.5172187267339163, "grad_norm": 0.5512598449097826, "learning_rate": 4.597729115977291e-05, "loss": 0.5944, "step": 17715 }, { "epoch": 0.5173647100042336, "grad_norm": 0.530927743606321, "learning_rate": 4.5974587726412545e-05, "loss": 0.6298, "step": 17720 }, { "epoch": 0.5175106932745507, "grad_norm": 0.5156376762966991, "learning_rate": 4.597188429305218e-05, "loss": 0.5712, "step": 17725 }, { "epoch": 0.5176566765448679, "grad_norm": 0.5597431785125089, "learning_rate": 4.596918085969181e-05, "loss": 0.6139, "step": 17730 }, { "epoch": 0.5178026598151851, "grad_norm": 0.5393046220382341, "learning_rate": 4.596647742633145e-05, "loss": 0.5957, "step": 17735 }, { "epoch": 0.5179486430855024, "grad_norm": 0.5518130741734123, "learning_rate": 4.5963773992971074e-05, "loss": 0.6304, "step": 17740 }, { "epoch": 0.5180946263558196, "grad_norm": 0.5025734336533174, "learning_rate": 4.596107055961071e-05, "loss": 0.6022, "step": 17745 }, { "epoch": 0.5182406096261368, "grad_norm": 0.4995880118838702, "learning_rate": 4.595836712625034e-05, "loss": 0.5986, "step": 17750 }, { "epoch": 0.5183865928964541, "grad_norm": 0.5832547337072677, "learning_rate": 4.595566369288997e-05, "loss": 0.6288, "step": 17755 }, { "epoch": 0.5185325761667713, "grad_norm": 0.6021870753355774, "learning_rate": 4.59529602595296e-05, "loss": 0.6256, "step": 17760 }, { "epoch": 0.5186785594370885, "grad_norm": 0.5666516044585509, "learning_rate": 4.595025682616924e-05, "loss": 0.602, "step": 17765 }, { "epoch": 0.5188245427074057, "grad_norm": 0.5396728906061304, "learning_rate": 4.5947553392808865e-05, "loss": 0.6296, "step": 17770 }, { "epoch": 0.518970525977723, "grad_norm": 0.5930412315025229, "learning_rate": 4.59448499594485e-05, "loss": 0.6413, "step": 17775 }, { "epoch": 0.5191165092480402, "grad_norm": 0.5008870068548928, "learning_rate": 4.594214652608814e-05, "loss": 0.6186, "step": 17780 }, { "epoch": 0.5192624925183574, "grad_norm": 0.6134801019075921, "learning_rate": 4.593944309272777e-05, "loss": 0.5881, "step": 17785 }, { "epoch": 0.5194084757886747, "grad_norm": 0.4995411789485275, "learning_rate": 4.59367396593674e-05, "loss": 0.6404, "step": 17790 }, { "epoch": 0.5195544590589919, "grad_norm": 0.5524066008300738, "learning_rate": 4.5934036226007035e-05, "loss": 0.6692, "step": 17795 }, { "epoch": 0.519700442329309, "grad_norm": 0.5487462146482087, "learning_rate": 4.593133279264666e-05, "loss": 0.6288, "step": 17800 }, { "epoch": 0.5198464255996262, "grad_norm": 0.6294290137588404, "learning_rate": 4.5928629359286296e-05, "loss": 0.6304, "step": 17805 }, { "epoch": 0.5199924088699435, "grad_norm": 0.5205434650521948, "learning_rate": 4.592592592592593e-05, "loss": 0.5987, "step": 17810 }, { "epoch": 0.5201383921402607, "grad_norm": 0.5410761739555808, "learning_rate": 4.592322249256556e-05, "loss": 0.6069, "step": 17815 }, { "epoch": 0.5202843754105779, "grad_norm": 0.5885095459327325, "learning_rate": 4.592051905920519e-05, "loss": 0.635, "step": 17820 }, { "epoch": 0.5204303586808952, "grad_norm": 0.5021319291371769, "learning_rate": 4.5917815625844825e-05, "loss": 0.594, "step": 17825 }, { "epoch": 0.5205763419512124, "grad_norm": 0.49556114240055543, "learning_rate": 4.591511219248445e-05, "loss": 0.6267, "step": 17830 }, { "epoch": 0.5207223252215296, "grad_norm": 0.5927004251431888, "learning_rate": 4.591240875912409e-05, "loss": 0.6289, "step": 17835 }, { "epoch": 0.5208683084918468, "grad_norm": 0.5171439084762813, "learning_rate": 4.590970532576373e-05, "loss": 0.621, "step": 17840 }, { "epoch": 0.5210142917621641, "grad_norm": 0.5259660393792761, "learning_rate": 4.5907001892403354e-05, "loss": 0.6154, "step": 17845 }, { "epoch": 0.5211602750324813, "grad_norm": 0.5528694317161862, "learning_rate": 4.590429845904299e-05, "loss": 0.599, "step": 17850 }, { "epoch": 0.5213062583027985, "grad_norm": 0.5175774131403615, "learning_rate": 4.590159502568262e-05, "loss": 0.5844, "step": 17855 }, { "epoch": 0.5214522415731158, "grad_norm": 0.5585180462981081, "learning_rate": 4.589889159232225e-05, "loss": 0.6114, "step": 17860 }, { "epoch": 0.521598224843433, "grad_norm": 0.556671385464559, "learning_rate": 4.5896188158961884e-05, "loss": 0.6458, "step": 17865 }, { "epoch": 0.5217442081137502, "grad_norm": 0.5202709632685114, "learning_rate": 4.589348472560152e-05, "loss": 0.602, "step": 17870 }, { "epoch": 0.5218901913840673, "grad_norm": 0.5679002798063851, "learning_rate": 4.5890781292241145e-05, "loss": 0.6138, "step": 17875 }, { "epoch": 0.5220361746543846, "grad_norm": 0.5240335540875292, "learning_rate": 4.588807785888078e-05, "loss": 0.6276, "step": 17880 }, { "epoch": 0.5221821579247018, "grad_norm": 0.5298161595172741, "learning_rate": 4.588537442552041e-05, "loss": 0.6017, "step": 17885 }, { "epoch": 0.522328141195019, "grad_norm": 0.5730185088801922, "learning_rate": 4.588267099216004e-05, "loss": 0.5969, "step": 17890 }, { "epoch": 0.5224741244653363, "grad_norm": 0.5524020984592508, "learning_rate": 4.587996755879968e-05, "loss": 0.6033, "step": 17895 }, { "epoch": 0.5226201077356535, "grad_norm": 0.5218067571493095, "learning_rate": 4.5877264125439315e-05, "loss": 0.623, "step": 17900 }, { "epoch": 0.5227660910059707, "grad_norm": 0.5303986924407446, "learning_rate": 4.587456069207894e-05, "loss": 0.5824, "step": 17905 }, { "epoch": 0.5229120742762879, "grad_norm": 0.5379273967850846, "learning_rate": 4.5871857258718576e-05, "loss": 0.6138, "step": 17910 }, { "epoch": 0.5230580575466052, "grad_norm": 0.5994997279670321, "learning_rate": 4.586915382535821e-05, "loss": 0.6012, "step": 17915 }, { "epoch": 0.5232040408169224, "grad_norm": 0.5682110910254785, "learning_rate": 4.586645039199784e-05, "loss": 0.5908, "step": 17920 }, { "epoch": 0.5233500240872396, "grad_norm": 0.547042452595046, "learning_rate": 4.586374695863747e-05, "loss": 0.607, "step": 17925 }, { "epoch": 0.5234960073575569, "grad_norm": 0.5241925619535249, "learning_rate": 4.5861043525277106e-05, "loss": 0.5952, "step": 17930 }, { "epoch": 0.5236419906278741, "grad_norm": 0.541570178670958, "learning_rate": 4.585834009191673e-05, "loss": 0.617, "step": 17935 }, { "epoch": 0.5237879738981913, "grad_norm": 0.5791729539790017, "learning_rate": 4.585563665855637e-05, "loss": 0.6532, "step": 17940 }, { "epoch": 0.5239339571685084, "grad_norm": 0.5218281141752631, "learning_rate": 4.5852933225196e-05, "loss": 0.5978, "step": 17945 }, { "epoch": 0.5240799404388257, "grad_norm": 0.539409539589466, "learning_rate": 4.5850229791835635e-05, "loss": 0.6384, "step": 17950 }, { "epoch": 0.5242259237091429, "grad_norm": 0.5328364669679144, "learning_rate": 4.584752635847527e-05, "loss": 0.6351, "step": 17955 }, { "epoch": 0.5243719069794601, "grad_norm": 0.5299933172647429, "learning_rate": 4.58448229251149e-05, "loss": 0.6271, "step": 17960 }, { "epoch": 0.5245178902497774, "grad_norm": 0.5376781954081115, "learning_rate": 4.584211949175453e-05, "loss": 0.5911, "step": 17965 }, { "epoch": 0.5246638735200946, "grad_norm": 0.5940717471710094, "learning_rate": 4.5839416058394164e-05, "loss": 0.6065, "step": 17970 }, { "epoch": 0.5248098567904118, "grad_norm": 0.5214959933930681, "learning_rate": 4.58367126250338e-05, "loss": 0.6119, "step": 17975 }, { "epoch": 0.524955840060729, "grad_norm": 0.5237734200276871, "learning_rate": 4.5834009191673425e-05, "loss": 0.6095, "step": 17980 }, { "epoch": 0.5251018233310463, "grad_norm": 0.5003832926809073, "learning_rate": 4.583130575831306e-05, "loss": 0.611, "step": 17985 }, { "epoch": 0.5252478066013635, "grad_norm": 0.6314217048123303, "learning_rate": 4.582860232495269e-05, "loss": 0.6418, "step": 17990 }, { "epoch": 0.5253937898716807, "grad_norm": 0.5267366149338499, "learning_rate": 4.582589889159232e-05, "loss": 0.6245, "step": 17995 }, { "epoch": 0.525539773141998, "grad_norm": 0.5845447205585715, "learning_rate": 4.5823195458231955e-05, "loss": 0.6117, "step": 18000 }, { "epoch": 0.5256857564123152, "grad_norm": 0.5068082288860963, "learning_rate": 4.582049202487159e-05, "loss": 0.5892, "step": 18005 }, { "epoch": 0.5258317396826324, "grad_norm": 0.5948732841123204, "learning_rate": 4.581778859151122e-05, "loss": 0.6703, "step": 18010 }, { "epoch": 0.5259777229529496, "grad_norm": 0.550974099947845, "learning_rate": 4.581508515815086e-05, "loss": 0.5771, "step": 18015 }, { "epoch": 0.5261237062232668, "grad_norm": 0.5268185786437379, "learning_rate": 4.581238172479049e-05, "loss": 0.6115, "step": 18020 }, { "epoch": 0.526269689493584, "grad_norm": 0.5138993860138216, "learning_rate": 4.580967829143012e-05, "loss": 0.5756, "step": 18025 }, { "epoch": 0.5264156727639012, "grad_norm": 0.5157284404002512, "learning_rate": 4.580697485806975e-05, "loss": 0.5692, "step": 18030 }, { "epoch": 0.5265616560342185, "grad_norm": 0.5841214940568519, "learning_rate": 4.5804271424709386e-05, "loss": 0.6156, "step": 18035 }, { "epoch": 0.5267076393045357, "grad_norm": 0.49139064743594596, "learning_rate": 4.580156799134901e-05, "loss": 0.6393, "step": 18040 }, { "epoch": 0.5268536225748529, "grad_norm": 0.5710730266670754, "learning_rate": 4.579886455798865e-05, "loss": 0.5783, "step": 18045 }, { "epoch": 0.5269996058451701, "grad_norm": 0.5541380803672209, "learning_rate": 4.579616112462828e-05, "loss": 0.6205, "step": 18050 }, { "epoch": 0.5271455891154874, "grad_norm": 0.49766222310435615, "learning_rate": 4.579345769126791e-05, "loss": 0.6362, "step": 18055 }, { "epoch": 0.5272915723858046, "grad_norm": 0.5451275837391734, "learning_rate": 4.579075425790754e-05, "loss": 0.5813, "step": 18060 }, { "epoch": 0.5274375556561218, "grad_norm": 0.5053752736305893, "learning_rate": 4.5788050824547176e-05, "loss": 0.5847, "step": 18065 }, { "epoch": 0.527583538926439, "grad_norm": 0.5090999435158883, "learning_rate": 4.578534739118681e-05, "loss": 0.5876, "step": 18070 }, { "epoch": 0.5277295221967563, "grad_norm": 0.5940444876316726, "learning_rate": 4.5782643957826444e-05, "loss": 0.5983, "step": 18075 }, { "epoch": 0.5278755054670735, "grad_norm": 0.5284634442787537, "learning_rate": 4.577994052446607e-05, "loss": 0.5901, "step": 18080 }, { "epoch": 0.5280214887373907, "grad_norm": 0.5453890616759011, "learning_rate": 4.5777237091105706e-05, "loss": 0.6136, "step": 18085 }, { "epoch": 0.528167472007708, "grad_norm": 0.5127580605321781, "learning_rate": 4.577453365774534e-05, "loss": 0.5904, "step": 18090 }, { "epoch": 0.5283134552780251, "grad_norm": 0.543478366110841, "learning_rate": 4.5771830224384974e-05, "loss": 0.6424, "step": 18095 }, { "epoch": 0.5284594385483423, "grad_norm": 0.5066840271209221, "learning_rate": 4.57691267910246e-05, "loss": 0.6367, "step": 18100 }, { "epoch": 0.5286054218186595, "grad_norm": 0.5373026074815805, "learning_rate": 4.5766423357664235e-05, "loss": 0.6126, "step": 18105 }, { "epoch": 0.5287514050889768, "grad_norm": 0.619514182510262, "learning_rate": 4.576371992430387e-05, "loss": 0.5856, "step": 18110 }, { "epoch": 0.528897388359294, "grad_norm": 0.5521815995208244, "learning_rate": 4.5761016490943496e-05, "loss": 0.6457, "step": 18115 }, { "epoch": 0.5290433716296112, "grad_norm": 0.5274555258314673, "learning_rate": 4.575831305758314e-05, "loss": 0.5908, "step": 18120 }, { "epoch": 0.5291893548999285, "grad_norm": 0.5324803032331696, "learning_rate": 4.5755609624222764e-05, "loss": 0.5951, "step": 18125 }, { "epoch": 0.5293353381702457, "grad_norm": 0.5673115968734731, "learning_rate": 4.57529061908624e-05, "loss": 0.6191, "step": 18130 }, { "epoch": 0.5294813214405629, "grad_norm": 0.5057186027179493, "learning_rate": 4.575020275750203e-05, "loss": 0.5876, "step": 18135 }, { "epoch": 0.5296273047108802, "grad_norm": 0.5842217566579142, "learning_rate": 4.574749932414166e-05, "loss": 0.6192, "step": 18140 }, { "epoch": 0.5297732879811974, "grad_norm": 0.534108076024988, "learning_rate": 4.5744795890781293e-05, "loss": 0.5657, "step": 18145 }, { "epoch": 0.5299192712515146, "grad_norm": 0.5047794860465806, "learning_rate": 4.574209245742093e-05, "loss": 0.619, "step": 18150 }, { "epoch": 0.5300652545218318, "grad_norm": 0.4748730158924875, "learning_rate": 4.573938902406056e-05, "loss": 0.5827, "step": 18155 }, { "epoch": 0.5302112377921491, "grad_norm": 0.500226537762145, "learning_rate": 4.573668559070019e-05, "loss": 0.5924, "step": 18160 }, { "epoch": 0.5303572210624662, "grad_norm": 0.5241880235450748, "learning_rate": 4.573398215733982e-05, "loss": 0.6242, "step": 18165 }, { "epoch": 0.5305032043327834, "grad_norm": 0.5310066982909863, "learning_rate": 4.573127872397946e-05, "loss": 0.6315, "step": 18170 }, { "epoch": 0.5306491876031006, "grad_norm": 0.5325533597917129, "learning_rate": 4.572857529061909e-05, "loss": 0.5552, "step": 18175 }, { "epoch": 0.5307951708734179, "grad_norm": 0.5300681107675916, "learning_rate": 4.5725871857258725e-05, "loss": 0.5998, "step": 18180 }, { "epoch": 0.5309411541437351, "grad_norm": 0.5063864098226575, "learning_rate": 4.572316842389835e-05, "loss": 0.5895, "step": 18185 }, { "epoch": 0.5310871374140523, "grad_norm": 0.5495406170799971, "learning_rate": 4.5720464990537986e-05, "loss": 0.6011, "step": 18190 }, { "epoch": 0.5312331206843696, "grad_norm": 0.5222779647788617, "learning_rate": 4.571776155717762e-05, "loss": 0.5954, "step": 18195 }, { "epoch": 0.5313791039546868, "grad_norm": 0.5388198064740098, "learning_rate": 4.571505812381725e-05, "loss": 0.5893, "step": 18200 }, { "epoch": 0.531525087225004, "grad_norm": 0.49893217382103283, "learning_rate": 4.571235469045688e-05, "loss": 0.5912, "step": 18205 }, { "epoch": 0.5316710704953213, "grad_norm": 0.6752273432634167, "learning_rate": 4.5709651257096515e-05, "loss": 0.6343, "step": 18210 }, { "epoch": 0.5318170537656385, "grad_norm": 0.5996855576510569, "learning_rate": 4.570694782373614e-05, "loss": 0.6027, "step": 18215 }, { "epoch": 0.5319630370359557, "grad_norm": 0.5281054979976517, "learning_rate": 4.5704244390375777e-05, "loss": 0.6184, "step": 18220 }, { "epoch": 0.5321090203062729, "grad_norm": 0.5592107694748837, "learning_rate": 4.570154095701541e-05, "loss": 0.6104, "step": 18225 }, { "epoch": 0.5322550035765902, "grad_norm": 0.5298380520808182, "learning_rate": 4.5698837523655045e-05, "loss": 0.5928, "step": 18230 }, { "epoch": 0.5324009868469074, "grad_norm": 0.5454819178005015, "learning_rate": 4.569613409029468e-05, "loss": 0.6458, "step": 18235 }, { "epoch": 0.5325469701172245, "grad_norm": 0.5667959732397364, "learning_rate": 4.569343065693431e-05, "loss": 0.6192, "step": 18240 }, { "epoch": 0.5326929533875417, "grad_norm": 0.5403336956640895, "learning_rate": 4.569072722357394e-05, "loss": 0.6208, "step": 18245 }, { "epoch": 0.532838936657859, "grad_norm": 0.5306842276753847, "learning_rate": 4.5688023790213574e-05, "loss": 0.6184, "step": 18250 }, { "epoch": 0.5329849199281762, "grad_norm": 0.5303831789778535, "learning_rate": 4.568532035685321e-05, "loss": 0.5852, "step": 18255 }, { "epoch": 0.5331309031984934, "grad_norm": 0.5478184518560043, "learning_rate": 4.5682616923492835e-05, "loss": 0.6177, "step": 18260 }, { "epoch": 0.5332768864688107, "grad_norm": 0.5326075590880464, "learning_rate": 4.567991349013247e-05, "loss": 0.6479, "step": 18265 }, { "epoch": 0.5334228697391279, "grad_norm": 0.48938152275239055, "learning_rate": 4.56772100567721e-05, "loss": 0.6154, "step": 18270 }, { "epoch": 0.5335688530094451, "grad_norm": 0.5324027805270428, "learning_rate": 4.567450662341173e-05, "loss": 0.6103, "step": 18275 }, { "epoch": 0.5337148362797624, "grad_norm": 0.5044429630284076, "learning_rate": 4.5671803190051364e-05, "loss": 0.6024, "step": 18280 }, { "epoch": 0.5338608195500796, "grad_norm": 0.5557956068052521, "learning_rate": 4.5669099756691e-05, "loss": 0.6418, "step": 18285 }, { "epoch": 0.5340068028203968, "grad_norm": 0.5868760962575986, "learning_rate": 4.566639632333063e-05, "loss": 0.6262, "step": 18290 }, { "epoch": 0.534152786090714, "grad_norm": 0.5720381524163987, "learning_rate": 4.5663692889970266e-05, "loss": 0.618, "step": 18295 }, { "epoch": 0.5342987693610313, "grad_norm": 0.5178134416002489, "learning_rate": 4.56609894566099e-05, "loss": 0.5737, "step": 18300 }, { "epoch": 0.5344447526313485, "grad_norm": 0.5636083535815926, "learning_rate": 4.565828602324953e-05, "loss": 0.6109, "step": 18305 }, { "epoch": 0.5345907359016656, "grad_norm": 0.5206947824398082, "learning_rate": 4.565558258988916e-05, "loss": 0.5958, "step": 18310 }, { "epoch": 0.5347367191719828, "grad_norm": 0.5394766720919074, "learning_rate": 4.5652879156528796e-05, "loss": 0.6173, "step": 18315 }, { "epoch": 0.5348827024423001, "grad_norm": 0.5197426820472387, "learning_rate": 4.565017572316842e-05, "loss": 0.607, "step": 18320 }, { "epoch": 0.5350286857126173, "grad_norm": 0.5402116296659059, "learning_rate": 4.564747228980806e-05, "loss": 0.6012, "step": 18325 }, { "epoch": 0.5351746689829345, "grad_norm": 0.5486277779586328, "learning_rate": 4.564476885644769e-05, "loss": 0.6163, "step": 18330 }, { "epoch": 0.5353206522532518, "grad_norm": 0.5370760867302229, "learning_rate": 4.564206542308732e-05, "loss": 0.5999, "step": 18335 }, { "epoch": 0.535466635523569, "grad_norm": 0.5334222126823226, "learning_rate": 4.563936198972695e-05, "loss": 0.5935, "step": 18340 }, { "epoch": 0.5356126187938862, "grad_norm": 0.5344822928859627, "learning_rate": 4.563665855636659e-05, "loss": 0.5782, "step": 18345 }, { "epoch": 0.5357586020642034, "grad_norm": 0.6012189236745535, "learning_rate": 4.563395512300622e-05, "loss": 0.6169, "step": 18350 }, { "epoch": 0.5359045853345207, "grad_norm": 0.5418497191604659, "learning_rate": 4.5631251689645854e-05, "loss": 0.5998, "step": 18355 }, { "epoch": 0.5360505686048379, "grad_norm": 0.5510359631405447, "learning_rate": 4.562854825628549e-05, "loss": 0.6125, "step": 18360 }, { "epoch": 0.5361965518751551, "grad_norm": 0.582493215836978, "learning_rate": 4.5625844822925115e-05, "loss": 0.6184, "step": 18365 }, { "epoch": 0.5363425351454724, "grad_norm": 0.5279017752089963, "learning_rate": 4.562314138956475e-05, "loss": 0.6152, "step": 18370 }, { "epoch": 0.5364885184157896, "grad_norm": 0.5369791231527755, "learning_rate": 4.5620437956204383e-05, "loss": 0.6167, "step": 18375 }, { "epoch": 0.5366345016861068, "grad_norm": 0.5520558097536238, "learning_rate": 4.561773452284401e-05, "loss": 0.6083, "step": 18380 }, { "epoch": 0.5367804849564239, "grad_norm": 0.5170471286243362, "learning_rate": 4.5615031089483645e-05, "loss": 0.5999, "step": 18385 }, { "epoch": 0.5369264682267412, "grad_norm": 0.5394117331182121, "learning_rate": 4.561232765612328e-05, "loss": 0.6192, "step": 18390 }, { "epoch": 0.5370724514970584, "grad_norm": 0.5526186889114081, "learning_rate": 4.5609624222762906e-05, "loss": 0.6017, "step": 18395 }, { "epoch": 0.5372184347673756, "grad_norm": 0.5012070336061855, "learning_rate": 4.560692078940254e-05, "loss": 0.5974, "step": 18400 }, { "epoch": 0.5373644180376929, "grad_norm": 0.5415996844663135, "learning_rate": 4.560421735604218e-05, "loss": 0.6285, "step": 18405 }, { "epoch": 0.5375104013080101, "grad_norm": 0.5109350727965541, "learning_rate": 4.560151392268181e-05, "loss": 0.6277, "step": 18410 }, { "epoch": 0.5376563845783273, "grad_norm": 0.5117460303564554, "learning_rate": 4.559881048932144e-05, "loss": 0.6046, "step": 18415 }, { "epoch": 0.5378023678486445, "grad_norm": 0.529534815414338, "learning_rate": 4.5596107055961076e-05, "loss": 0.5676, "step": 18420 }, { "epoch": 0.5379483511189618, "grad_norm": 0.5591795601785178, "learning_rate": 4.55934036226007e-05, "loss": 0.6379, "step": 18425 }, { "epoch": 0.538094334389279, "grad_norm": 0.5171542071397226, "learning_rate": 4.559070018924034e-05, "loss": 0.6282, "step": 18430 }, { "epoch": 0.5382403176595962, "grad_norm": 0.5895908195697006, "learning_rate": 4.558799675587997e-05, "loss": 0.5932, "step": 18435 }, { "epoch": 0.5383863009299135, "grad_norm": 0.542178226110131, "learning_rate": 4.55852933225196e-05, "loss": 0.629, "step": 18440 }, { "epoch": 0.5385322842002307, "grad_norm": 0.5677293471826059, "learning_rate": 4.558258988915923e-05, "loss": 0.586, "step": 18445 }, { "epoch": 0.5386782674705479, "grad_norm": 0.554881402908094, "learning_rate": 4.5579886455798867e-05, "loss": 0.5876, "step": 18450 }, { "epoch": 0.5388242507408652, "grad_norm": 0.5421972234509463, "learning_rate": 4.5577183022438494e-05, "loss": 0.5923, "step": 18455 }, { "epoch": 0.5389702340111823, "grad_norm": 0.5060291515025914, "learning_rate": 4.5574479589078135e-05, "loss": 0.5857, "step": 18460 }, { "epoch": 0.5391162172814995, "grad_norm": 0.5469420011400953, "learning_rate": 4.557177615571777e-05, "loss": 0.5872, "step": 18465 }, { "epoch": 0.5392622005518167, "grad_norm": 0.5269671027288776, "learning_rate": 4.5569072722357396e-05, "loss": 0.6176, "step": 18470 }, { "epoch": 0.539408183822134, "grad_norm": 0.6563769192154402, "learning_rate": 4.556636928899703e-05, "loss": 0.6374, "step": 18475 }, { "epoch": 0.5395541670924512, "grad_norm": 0.5769416608872748, "learning_rate": 4.5563665855636664e-05, "loss": 0.651, "step": 18480 }, { "epoch": 0.5397001503627684, "grad_norm": 0.5213004743670057, "learning_rate": 4.556096242227629e-05, "loss": 0.6082, "step": 18485 }, { "epoch": 0.5398461336330856, "grad_norm": 0.5198590885248205, "learning_rate": 4.5558258988915925e-05, "loss": 0.5775, "step": 18490 }, { "epoch": 0.5399921169034029, "grad_norm": 0.5015313639427793, "learning_rate": 4.555555555555556e-05, "loss": 0.5891, "step": 18495 }, { "epoch": 0.5401381001737201, "grad_norm": 0.4769438867273908, "learning_rate": 4.5552852122195186e-05, "loss": 0.564, "step": 18500 }, { "epoch": 0.5402840834440373, "grad_norm": 0.5489432915898897, "learning_rate": 4.555014868883482e-05, "loss": 0.6021, "step": 18505 }, { "epoch": 0.5404300667143546, "grad_norm": 0.5113631511663604, "learning_rate": 4.5547445255474454e-05, "loss": 0.6114, "step": 18510 }, { "epoch": 0.5405760499846718, "grad_norm": 0.5098373918809664, "learning_rate": 4.554474182211409e-05, "loss": 0.5536, "step": 18515 }, { "epoch": 0.540722033254989, "grad_norm": 0.5356994193747002, "learning_rate": 4.554203838875372e-05, "loss": 0.6231, "step": 18520 }, { "epoch": 0.5408680165253062, "grad_norm": 0.526532711300977, "learning_rate": 4.5539334955393356e-05, "loss": 0.6053, "step": 18525 }, { "epoch": 0.5410139997956234, "grad_norm": 0.5484369154184798, "learning_rate": 4.5536631522032984e-05, "loss": 0.6415, "step": 18530 }, { "epoch": 0.5411599830659406, "grad_norm": 0.46785884210586204, "learning_rate": 4.553392808867262e-05, "loss": 0.5624, "step": 18535 }, { "epoch": 0.5413059663362578, "grad_norm": 0.535967354310144, "learning_rate": 4.553122465531225e-05, "loss": 0.6063, "step": 18540 }, { "epoch": 0.5414519496065751, "grad_norm": 0.5327791119087909, "learning_rate": 4.552852122195188e-05, "loss": 0.5737, "step": 18545 }, { "epoch": 0.5415979328768923, "grad_norm": 0.5637792038215595, "learning_rate": 4.552581778859151e-05, "loss": 0.5715, "step": 18550 }, { "epoch": 0.5417439161472095, "grad_norm": 0.5056246243294249, "learning_rate": 4.552311435523115e-05, "loss": 0.582, "step": 18555 }, { "epoch": 0.5418898994175267, "grad_norm": 0.5061726964109681, "learning_rate": 4.5520410921870774e-05, "loss": 0.6128, "step": 18560 }, { "epoch": 0.542035882687844, "grad_norm": 0.5170959900343682, "learning_rate": 4.551770748851041e-05, "loss": 0.6087, "step": 18565 }, { "epoch": 0.5421818659581612, "grad_norm": 0.5226074275917842, "learning_rate": 4.551500405515004e-05, "loss": 0.6254, "step": 18570 }, { "epoch": 0.5423278492284784, "grad_norm": 0.5087263323413437, "learning_rate": 4.5512300621789676e-05, "loss": 0.5782, "step": 18575 }, { "epoch": 0.5424738324987957, "grad_norm": 0.55027622759916, "learning_rate": 4.550959718842931e-05, "loss": 0.5949, "step": 18580 }, { "epoch": 0.5426198157691129, "grad_norm": 0.5505458852551283, "learning_rate": 4.5506893755068944e-05, "loss": 0.6076, "step": 18585 }, { "epoch": 0.5427657990394301, "grad_norm": 0.5109154501621297, "learning_rate": 4.550419032170857e-05, "loss": 0.5596, "step": 18590 }, { "epoch": 0.5429117823097473, "grad_norm": 0.53538655429689, "learning_rate": 4.5501486888348205e-05, "loss": 0.6167, "step": 18595 }, { "epoch": 0.5430577655800646, "grad_norm": 0.5569792660578002, "learning_rate": 4.549878345498784e-05, "loss": 0.5953, "step": 18600 }, { "epoch": 0.5432037488503817, "grad_norm": 0.525737078707082, "learning_rate": 4.549608002162747e-05, "loss": 0.5901, "step": 18605 }, { "epoch": 0.5433497321206989, "grad_norm": 0.5146648819045482, "learning_rate": 4.54933765882671e-05, "loss": 0.6146, "step": 18610 }, { "epoch": 0.5434957153910162, "grad_norm": 0.4928154509483728, "learning_rate": 4.5490673154906735e-05, "loss": 0.5903, "step": 18615 }, { "epoch": 0.5436416986613334, "grad_norm": 0.5095012750658364, "learning_rate": 4.548796972154636e-05, "loss": 0.582, "step": 18620 }, { "epoch": 0.5437876819316506, "grad_norm": 0.5421706802807564, "learning_rate": 4.5485266288185996e-05, "loss": 0.6279, "step": 18625 }, { "epoch": 0.5439336652019678, "grad_norm": 0.4929154336382529, "learning_rate": 4.548256285482563e-05, "loss": 0.5921, "step": 18630 }, { "epoch": 0.5440796484722851, "grad_norm": 0.5533280129093477, "learning_rate": 4.5479859421465264e-05, "loss": 0.6278, "step": 18635 }, { "epoch": 0.5442256317426023, "grad_norm": 0.5392461727161664, "learning_rate": 4.54771559881049e-05, "loss": 0.6349, "step": 18640 }, { "epoch": 0.5443716150129195, "grad_norm": 0.5432391437331467, "learning_rate": 4.547445255474453e-05, "loss": 0.6041, "step": 18645 }, { "epoch": 0.5445175982832368, "grad_norm": 0.5173935443833148, "learning_rate": 4.547174912138416e-05, "loss": 0.61, "step": 18650 }, { "epoch": 0.544663581553554, "grad_norm": 0.5273789618729712, "learning_rate": 4.546904568802379e-05, "loss": 0.5747, "step": 18655 }, { "epoch": 0.5448095648238712, "grad_norm": 0.5313448265033429, "learning_rate": 4.546634225466343e-05, "loss": 0.5876, "step": 18660 }, { "epoch": 0.5449555480941884, "grad_norm": 0.5859494019434554, "learning_rate": 4.5463638821303054e-05, "loss": 0.5935, "step": 18665 }, { "epoch": 0.5451015313645057, "grad_norm": 0.5608639385313327, "learning_rate": 4.546093538794269e-05, "loss": 0.6192, "step": 18670 }, { "epoch": 0.5452475146348228, "grad_norm": 0.5014010024786479, "learning_rate": 4.545823195458232e-05, "loss": 0.5591, "step": 18675 }, { "epoch": 0.54539349790514, "grad_norm": 0.5251372890374991, "learning_rate": 4.545552852122195e-05, "loss": 0.6074, "step": 18680 }, { "epoch": 0.5455394811754573, "grad_norm": 0.5000363380019294, "learning_rate": 4.545282508786159e-05, "loss": 0.641, "step": 18685 }, { "epoch": 0.5456854644457745, "grad_norm": 0.5277882594948369, "learning_rate": 4.545012165450122e-05, "loss": 0.6357, "step": 18690 }, { "epoch": 0.5458314477160917, "grad_norm": 0.512411540894182, "learning_rate": 4.544741822114085e-05, "loss": 0.608, "step": 18695 }, { "epoch": 0.5459774309864089, "grad_norm": 0.518642859215036, "learning_rate": 4.5444714787780486e-05, "loss": 0.5993, "step": 18700 }, { "epoch": 0.5461234142567262, "grad_norm": 0.5160734416693824, "learning_rate": 4.544201135442012e-05, "loss": 0.6008, "step": 18705 }, { "epoch": 0.5462693975270434, "grad_norm": 0.5089387829538504, "learning_rate": 4.543930792105975e-05, "loss": 0.5979, "step": 18710 }, { "epoch": 0.5464153807973606, "grad_norm": 0.4909091791072555, "learning_rate": 4.543660448769938e-05, "loss": 0.6421, "step": 18715 }, { "epoch": 0.5465613640676779, "grad_norm": 0.5868004488008307, "learning_rate": 4.5433901054339015e-05, "loss": 0.605, "step": 18720 }, { "epoch": 0.5467073473379951, "grad_norm": 0.5564706815196934, "learning_rate": 4.543119762097864e-05, "loss": 0.6297, "step": 18725 }, { "epoch": 0.5468533306083123, "grad_norm": 0.5575373912819331, "learning_rate": 4.5428494187618276e-05, "loss": 0.6377, "step": 18730 }, { "epoch": 0.5469993138786295, "grad_norm": 0.5290591842833527, "learning_rate": 4.542579075425791e-05, "loss": 0.6039, "step": 18735 }, { "epoch": 0.5471452971489468, "grad_norm": 0.5129118111154229, "learning_rate": 4.542308732089754e-05, "loss": 0.6189, "step": 18740 }, { "epoch": 0.547291280419264, "grad_norm": 0.5616657448858406, "learning_rate": 4.542038388753718e-05, "loss": 0.6299, "step": 18745 }, { "epoch": 0.5474372636895811, "grad_norm": 0.5586807438214056, "learning_rate": 4.5417680454176806e-05, "loss": 0.599, "step": 18750 }, { "epoch": 0.5475832469598984, "grad_norm": 0.5062869451998543, "learning_rate": 4.541497702081644e-05, "loss": 0.6459, "step": 18755 }, { "epoch": 0.5477292302302156, "grad_norm": 0.5517451594796408, "learning_rate": 4.5412273587456074e-05, "loss": 0.6107, "step": 18760 }, { "epoch": 0.5478752135005328, "grad_norm": 0.5248122041583071, "learning_rate": 4.54095701540957e-05, "loss": 0.6001, "step": 18765 }, { "epoch": 0.54802119677085, "grad_norm": 0.49174074437606086, "learning_rate": 4.5406866720735335e-05, "loss": 0.5992, "step": 18770 }, { "epoch": 0.5481671800411673, "grad_norm": 0.5578116233071242, "learning_rate": 4.540416328737497e-05, "loss": 0.6219, "step": 18775 }, { "epoch": 0.5483131633114845, "grad_norm": 0.5414785341000722, "learning_rate": 4.54014598540146e-05, "loss": 0.6294, "step": 18780 }, { "epoch": 0.5484591465818017, "grad_norm": 0.5323524518240859, "learning_rate": 4.539875642065423e-05, "loss": 0.6367, "step": 18785 }, { "epoch": 0.548605129852119, "grad_norm": 0.5676524864479695, "learning_rate": 4.5396052987293864e-05, "loss": 0.6051, "step": 18790 }, { "epoch": 0.5487511131224362, "grad_norm": 0.526998890060228, "learning_rate": 4.53933495539335e-05, "loss": 0.6064, "step": 18795 }, { "epoch": 0.5488970963927534, "grad_norm": 0.5485618878964378, "learning_rate": 4.539064612057313e-05, "loss": 0.5929, "step": 18800 }, { "epoch": 0.5490430796630706, "grad_norm": 0.5667194975571348, "learning_rate": 4.5387942687212766e-05, "loss": 0.595, "step": 18805 }, { "epoch": 0.5491890629333879, "grad_norm": 0.6017690869954873, "learning_rate": 4.538523925385239e-05, "loss": 0.6023, "step": 18810 }, { "epoch": 0.5493350462037051, "grad_norm": 0.5588598012273578, "learning_rate": 4.538253582049203e-05, "loss": 0.6244, "step": 18815 }, { "epoch": 0.5494810294740223, "grad_norm": 0.5559850540347822, "learning_rate": 4.537983238713166e-05, "loss": 0.6248, "step": 18820 }, { "epoch": 0.5496270127443394, "grad_norm": 0.5577520950104908, "learning_rate": 4.537712895377129e-05, "loss": 0.6116, "step": 18825 }, { "epoch": 0.5497729960146567, "grad_norm": 0.5205196952738224, "learning_rate": 4.537442552041092e-05, "loss": 0.6212, "step": 18830 }, { "epoch": 0.5499189792849739, "grad_norm": 0.5195066302597962, "learning_rate": 4.537172208705056e-05, "loss": 0.5892, "step": 18835 }, { "epoch": 0.5500649625552911, "grad_norm": 0.5457630865236552, "learning_rate": 4.536901865369019e-05, "loss": 0.6012, "step": 18840 }, { "epoch": 0.5502109458256084, "grad_norm": 0.5098308442139444, "learning_rate": 4.536631522032982e-05, "loss": 0.6197, "step": 18845 }, { "epoch": 0.5503569290959256, "grad_norm": 0.5341922117029696, "learning_rate": 4.536361178696945e-05, "loss": 0.5951, "step": 18850 }, { "epoch": 0.5505029123662428, "grad_norm": 0.512808483929326, "learning_rate": 4.5360908353609086e-05, "loss": 0.6279, "step": 18855 }, { "epoch": 0.55064889563656, "grad_norm": 0.5234882693151924, "learning_rate": 4.535820492024872e-05, "loss": 0.6183, "step": 18860 }, { "epoch": 0.5507948789068773, "grad_norm": 0.5699545544907847, "learning_rate": 4.5355501486888354e-05, "loss": 0.6475, "step": 18865 }, { "epoch": 0.5509408621771945, "grad_norm": 0.5064705915683005, "learning_rate": 4.535279805352798e-05, "loss": 0.6137, "step": 18870 }, { "epoch": 0.5510868454475117, "grad_norm": 0.5004410592987748, "learning_rate": 4.5350094620167615e-05, "loss": 0.5552, "step": 18875 }, { "epoch": 0.551232828717829, "grad_norm": 0.532855839122181, "learning_rate": 4.534739118680725e-05, "loss": 0.6316, "step": 18880 }, { "epoch": 0.5513788119881462, "grad_norm": 0.5279724474741516, "learning_rate": 4.5344687753446876e-05, "loss": 0.6613, "step": 18885 }, { "epoch": 0.5515247952584634, "grad_norm": 0.4718137303078297, "learning_rate": 4.534198432008651e-05, "loss": 0.5764, "step": 18890 }, { "epoch": 0.5516707785287805, "grad_norm": 0.4985821597513674, "learning_rate": 4.5339280886726144e-05, "loss": 0.5893, "step": 18895 }, { "epoch": 0.5518167617990978, "grad_norm": 0.5004359697116036, "learning_rate": 4.533657745336577e-05, "loss": 0.6152, "step": 18900 }, { "epoch": 0.551962745069415, "grad_norm": 0.5408368786833385, "learning_rate": 4.5333874020005406e-05, "loss": 0.6122, "step": 18905 }, { "epoch": 0.5521087283397322, "grad_norm": 0.5355120977006662, "learning_rate": 4.533117058664504e-05, "loss": 0.5943, "step": 18910 }, { "epoch": 0.5522547116100495, "grad_norm": 0.5710063441566376, "learning_rate": 4.5328467153284674e-05, "loss": 0.5638, "step": 18915 }, { "epoch": 0.5524006948803667, "grad_norm": 0.5640739379250156, "learning_rate": 4.532576371992431e-05, "loss": 0.6139, "step": 18920 }, { "epoch": 0.5525466781506839, "grad_norm": 0.5252617882321298, "learning_rate": 4.532306028656394e-05, "loss": 0.6007, "step": 18925 }, { "epoch": 0.5526926614210012, "grad_norm": 0.5738424570522028, "learning_rate": 4.532035685320357e-05, "loss": 0.6106, "step": 18930 }, { "epoch": 0.5528386446913184, "grad_norm": 0.5716386169304746, "learning_rate": 4.53176534198432e-05, "loss": 0.6097, "step": 18935 }, { "epoch": 0.5529846279616356, "grad_norm": 0.5188901078435065, "learning_rate": 4.531494998648284e-05, "loss": 0.5753, "step": 18940 }, { "epoch": 0.5531306112319528, "grad_norm": 0.6311576312783735, "learning_rate": 4.5312246553122464e-05, "loss": 0.5973, "step": 18945 }, { "epoch": 0.5532765945022701, "grad_norm": 0.5175951596680509, "learning_rate": 4.53095431197621e-05, "loss": 0.6157, "step": 18950 }, { "epoch": 0.5534225777725873, "grad_norm": 0.5151516700041762, "learning_rate": 4.530683968640173e-05, "loss": 0.6056, "step": 18955 }, { "epoch": 0.5535685610429045, "grad_norm": 0.5442250349390672, "learning_rate": 4.530413625304136e-05, "loss": 0.6053, "step": 18960 }, { "epoch": 0.5537145443132218, "grad_norm": 0.5342447507462039, "learning_rate": 4.5301432819680994e-05, "loss": 0.6029, "step": 18965 }, { "epoch": 0.5538605275835389, "grad_norm": 0.4935964517853512, "learning_rate": 4.5298729386320634e-05, "loss": 0.598, "step": 18970 }, { "epoch": 0.5540065108538561, "grad_norm": 0.5589138947029269, "learning_rate": 4.529602595296026e-05, "loss": 0.5884, "step": 18975 }, { "epoch": 0.5541524941241733, "grad_norm": 0.563388706600983, "learning_rate": 4.5293322519599896e-05, "loss": 0.6261, "step": 18980 }, { "epoch": 0.5542984773944906, "grad_norm": 0.5853817981476358, "learning_rate": 4.529061908623953e-05, "loss": 0.6132, "step": 18985 }, { "epoch": 0.5544444606648078, "grad_norm": 0.5351939615497852, "learning_rate": 4.528791565287916e-05, "loss": 0.6281, "step": 18990 }, { "epoch": 0.554590443935125, "grad_norm": 0.5308973953664892, "learning_rate": 4.528521221951879e-05, "loss": 0.5895, "step": 18995 }, { "epoch": 0.5547364272054423, "grad_norm": 0.5055587523157887, "learning_rate": 4.5282508786158425e-05, "loss": 0.5816, "step": 19000 }, { "epoch": 0.5548824104757595, "grad_norm": 0.5470558151714557, "learning_rate": 4.527980535279805e-05, "loss": 0.6148, "step": 19005 }, { "epoch": 0.5550283937460767, "grad_norm": 0.5498918621773083, "learning_rate": 4.5277101919437686e-05, "loss": 0.6152, "step": 19010 }, { "epoch": 0.5551743770163939, "grad_norm": 0.538380027313104, "learning_rate": 4.527439848607732e-05, "loss": 0.6231, "step": 19015 }, { "epoch": 0.5553203602867112, "grad_norm": 0.5316679727220168, "learning_rate": 4.527169505271695e-05, "loss": 0.6487, "step": 19020 }, { "epoch": 0.5554663435570284, "grad_norm": 0.5056607843722363, "learning_rate": 4.526899161935659e-05, "loss": 0.6199, "step": 19025 }, { "epoch": 0.5556123268273456, "grad_norm": 0.5279953493559938, "learning_rate": 4.526628818599622e-05, "loss": 0.6309, "step": 19030 }, { "epoch": 0.5557583100976629, "grad_norm": 0.5001026095282187, "learning_rate": 4.526358475263585e-05, "loss": 0.6206, "step": 19035 }, { "epoch": 0.5559042933679801, "grad_norm": 0.5275138741387942, "learning_rate": 4.526088131927548e-05, "loss": 0.6217, "step": 19040 }, { "epoch": 0.5560502766382972, "grad_norm": 0.5137639410634565, "learning_rate": 4.525817788591512e-05, "loss": 0.5906, "step": 19045 }, { "epoch": 0.5561962599086144, "grad_norm": 0.5222241393364797, "learning_rate": 4.5255474452554745e-05, "loss": 0.6036, "step": 19050 }, { "epoch": 0.5563422431789317, "grad_norm": 0.5311879835743336, "learning_rate": 4.525277101919438e-05, "loss": 0.6292, "step": 19055 }, { "epoch": 0.5564882264492489, "grad_norm": 0.5200411084701743, "learning_rate": 4.525006758583401e-05, "loss": 0.6251, "step": 19060 }, { "epoch": 0.5566342097195661, "grad_norm": 0.5425391350779043, "learning_rate": 4.524736415247364e-05, "loss": 0.6059, "step": 19065 }, { "epoch": 0.5567801929898833, "grad_norm": 0.4732448934776145, "learning_rate": 4.5244660719113274e-05, "loss": 0.5721, "step": 19070 }, { "epoch": 0.5569261762602006, "grad_norm": 0.5474848981368049, "learning_rate": 4.524195728575291e-05, "loss": 0.6249, "step": 19075 }, { "epoch": 0.5570721595305178, "grad_norm": 0.5195240227492052, "learning_rate": 4.5239253852392535e-05, "loss": 0.646, "step": 19080 }, { "epoch": 0.557218142800835, "grad_norm": 0.5059259551598296, "learning_rate": 4.5236550419032176e-05, "loss": 0.5755, "step": 19085 }, { "epoch": 0.5573641260711523, "grad_norm": 0.528661232406953, "learning_rate": 4.523384698567181e-05, "loss": 0.6207, "step": 19090 }, { "epoch": 0.5575101093414695, "grad_norm": 0.4956393856944702, "learning_rate": 4.523114355231144e-05, "loss": 0.5844, "step": 19095 }, { "epoch": 0.5576560926117867, "grad_norm": 0.5263918259342882, "learning_rate": 4.522844011895107e-05, "loss": 0.6245, "step": 19100 }, { "epoch": 0.557802075882104, "grad_norm": 0.5408088351545431, "learning_rate": 4.5225736685590705e-05, "loss": 0.5838, "step": 19105 }, { "epoch": 0.5579480591524212, "grad_norm": 0.5253702049872329, "learning_rate": 4.522303325223033e-05, "loss": 0.5929, "step": 19110 }, { "epoch": 0.5580940424227383, "grad_norm": 0.5155587710776086, "learning_rate": 4.5220329818869966e-05, "loss": 0.6263, "step": 19115 }, { "epoch": 0.5582400256930555, "grad_norm": 0.5425368334503023, "learning_rate": 4.52176263855096e-05, "loss": 0.6207, "step": 19120 }, { "epoch": 0.5583860089633728, "grad_norm": 0.6103198226232035, "learning_rate": 4.521492295214923e-05, "loss": 0.6309, "step": 19125 }, { "epoch": 0.55853199223369, "grad_norm": 0.4985430993615654, "learning_rate": 4.521221951878886e-05, "loss": 0.5862, "step": 19130 }, { "epoch": 0.5586779755040072, "grad_norm": 0.5234243101686614, "learning_rate": 4.5209516085428496e-05, "loss": 0.6016, "step": 19135 }, { "epoch": 0.5588239587743244, "grad_norm": 0.582988209942187, "learning_rate": 4.520681265206813e-05, "loss": 0.6035, "step": 19140 }, { "epoch": 0.5589699420446417, "grad_norm": 0.5651014357160041, "learning_rate": 4.5204109218707764e-05, "loss": 0.592, "step": 19145 }, { "epoch": 0.5591159253149589, "grad_norm": 0.5435826564629287, "learning_rate": 4.52014057853474e-05, "loss": 0.6208, "step": 19150 }, { "epoch": 0.5592619085852761, "grad_norm": 0.5655059711562558, "learning_rate": 4.5198702351987025e-05, "loss": 0.6369, "step": 19155 }, { "epoch": 0.5594078918555934, "grad_norm": 0.5258816220868879, "learning_rate": 4.519599891862666e-05, "loss": 0.6021, "step": 19160 }, { "epoch": 0.5595538751259106, "grad_norm": 0.4904636252209619, "learning_rate": 4.519329548526629e-05, "loss": 0.61, "step": 19165 }, { "epoch": 0.5596998583962278, "grad_norm": 0.5369995870558625, "learning_rate": 4.519059205190592e-05, "loss": 0.6316, "step": 19170 }, { "epoch": 0.559845841666545, "grad_norm": 0.5109220808644749, "learning_rate": 4.5187888618545554e-05, "loss": 0.5742, "step": 19175 }, { "epoch": 0.5599918249368623, "grad_norm": 0.5387438673684749, "learning_rate": 4.518518518518519e-05, "loss": 0.6307, "step": 19180 }, { "epoch": 0.5601378082071795, "grad_norm": 0.5198984099514699, "learning_rate": 4.5182481751824815e-05, "loss": 0.5879, "step": 19185 }, { "epoch": 0.5602837914774966, "grad_norm": 0.5647303438008358, "learning_rate": 4.517977831846445e-05, "loss": 0.6057, "step": 19190 }, { "epoch": 0.5604297747478139, "grad_norm": 0.5215307312042665, "learning_rate": 4.517707488510409e-05, "loss": 0.6151, "step": 19195 }, { "epoch": 0.5605757580181311, "grad_norm": 0.5140003074970235, "learning_rate": 4.517437145174372e-05, "loss": 0.6073, "step": 19200 }, { "epoch": 0.5607217412884483, "grad_norm": 0.5259238952323224, "learning_rate": 4.517166801838335e-05, "loss": 0.6048, "step": 19205 }, { "epoch": 0.5608677245587655, "grad_norm": 0.5539331003433391, "learning_rate": 4.5168964585022986e-05, "loss": 0.6259, "step": 19210 }, { "epoch": 0.5610137078290828, "grad_norm": 0.5345920471429909, "learning_rate": 4.516626115166261e-05, "loss": 0.6318, "step": 19215 }, { "epoch": 0.5611596910994, "grad_norm": 0.565094446104684, "learning_rate": 4.516355771830225e-05, "loss": 0.5966, "step": 19220 }, { "epoch": 0.5613056743697172, "grad_norm": 0.5363611394622307, "learning_rate": 4.516085428494188e-05, "loss": 0.5962, "step": 19225 }, { "epoch": 0.5614516576400345, "grad_norm": 0.5548101494521527, "learning_rate": 4.515815085158151e-05, "loss": 0.5968, "step": 19230 }, { "epoch": 0.5615976409103517, "grad_norm": 0.583161332331477, "learning_rate": 4.515544741822114e-05, "loss": 0.6423, "step": 19235 }, { "epoch": 0.5617436241806689, "grad_norm": 0.5333086229478412, "learning_rate": 4.5152743984860776e-05, "loss": 0.619, "step": 19240 }, { "epoch": 0.5618896074509862, "grad_norm": 0.618968886032447, "learning_rate": 4.51500405515004e-05, "loss": 0.6113, "step": 19245 }, { "epoch": 0.5620355907213034, "grad_norm": 0.5054397300347485, "learning_rate": 4.514733711814004e-05, "loss": 0.563, "step": 19250 }, { "epoch": 0.5621815739916206, "grad_norm": 0.5075846929276718, "learning_rate": 4.514463368477967e-05, "loss": 0.5913, "step": 19255 }, { "epoch": 0.5623275572619377, "grad_norm": 0.5558345673977165, "learning_rate": 4.5141930251419305e-05, "loss": 0.5867, "step": 19260 }, { "epoch": 0.562473540532255, "grad_norm": 0.5189415079861535, "learning_rate": 4.513922681805894e-05, "loss": 0.597, "step": 19265 }, { "epoch": 0.5626195238025722, "grad_norm": 0.5160973045192887, "learning_rate": 4.513652338469857e-05, "loss": 0.5871, "step": 19270 }, { "epoch": 0.5627655070728894, "grad_norm": 0.47553093514626416, "learning_rate": 4.51338199513382e-05, "loss": 0.6133, "step": 19275 }, { "epoch": 0.5629114903432066, "grad_norm": 0.5208606483213375, "learning_rate": 4.5131116517977835e-05, "loss": 0.6084, "step": 19280 }, { "epoch": 0.5630574736135239, "grad_norm": 0.5641480114687389, "learning_rate": 4.512841308461747e-05, "loss": 0.6016, "step": 19285 }, { "epoch": 0.5632034568838411, "grad_norm": 0.5015128435600417, "learning_rate": 4.5125709651257096e-05, "loss": 0.6039, "step": 19290 }, { "epoch": 0.5633494401541583, "grad_norm": 0.5462935410512028, "learning_rate": 4.512300621789673e-05, "loss": 0.5876, "step": 19295 }, { "epoch": 0.5634954234244756, "grad_norm": 0.5456127595129504, "learning_rate": 4.5120302784536364e-05, "loss": 0.6047, "step": 19300 }, { "epoch": 0.5636414066947928, "grad_norm": 0.5300929646231298, "learning_rate": 4.511759935117599e-05, "loss": 0.5703, "step": 19305 }, { "epoch": 0.56378738996511, "grad_norm": 0.4943685729744576, "learning_rate": 4.511489591781563e-05, "loss": 0.5968, "step": 19310 }, { "epoch": 0.5639333732354272, "grad_norm": 0.5026679339821392, "learning_rate": 4.511219248445526e-05, "loss": 0.6137, "step": 19315 }, { "epoch": 0.5640793565057445, "grad_norm": 0.485799857121107, "learning_rate": 4.510948905109489e-05, "loss": 0.6026, "step": 19320 }, { "epoch": 0.5642253397760617, "grad_norm": 0.4915117481875091, "learning_rate": 4.510678561773453e-05, "loss": 0.6077, "step": 19325 }, { "epoch": 0.5643713230463789, "grad_norm": 0.5167979189882679, "learning_rate": 4.510408218437416e-05, "loss": 0.6191, "step": 19330 }, { "epoch": 0.564517306316696, "grad_norm": 0.5211379192391135, "learning_rate": 4.510137875101379e-05, "loss": 0.5935, "step": 19335 }, { "epoch": 0.5646632895870133, "grad_norm": 0.4882343533007258, "learning_rate": 4.509867531765342e-05, "loss": 0.5786, "step": 19340 }, { "epoch": 0.5648092728573305, "grad_norm": 0.5517051744016799, "learning_rate": 4.5095971884293056e-05, "loss": 0.6446, "step": 19345 }, { "epoch": 0.5649552561276477, "grad_norm": 0.4945769081482207, "learning_rate": 4.5093268450932684e-05, "loss": 0.6093, "step": 19350 }, { "epoch": 0.565101239397965, "grad_norm": 0.5445760808835749, "learning_rate": 4.509056501757232e-05, "loss": 0.5957, "step": 19355 }, { "epoch": 0.5652472226682822, "grad_norm": 0.5617659401605355, "learning_rate": 4.508786158421195e-05, "loss": 0.6022, "step": 19360 }, { "epoch": 0.5653932059385994, "grad_norm": 0.56354575740454, "learning_rate": 4.5085158150851586e-05, "loss": 0.6375, "step": 19365 }, { "epoch": 0.5655391892089167, "grad_norm": 0.5150851440984527, "learning_rate": 4.508245471749122e-05, "loss": 0.5912, "step": 19370 }, { "epoch": 0.5656851724792339, "grad_norm": 0.5539867885908808, "learning_rate": 4.507975128413085e-05, "loss": 0.6027, "step": 19375 }, { "epoch": 0.5658311557495511, "grad_norm": 0.5559726812914333, "learning_rate": 4.507704785077048e-05, "loss": 0.5849, "step": 19380 }, { "epoch": 0.5659771390198683, "grad_norm": 0.557153156072408, "learning_rate": 4.5074344417410115e-05, "loss": 0.6004, "step": 19385 }, { "epoch": 0.5661231222901856, "grad_norm": 0.5298660781981641, "learning_rate": 4.507164098404974e-05, "loss": 0.5794, "step": 19390 }, { "epoch": 0.5662691055605028, "grad_norm": 0.5553289658019337, "learning_rate": 4.5068937550689376e-05, "loss": 0.5808, "step": 19395 }, { "epoch": 0.56641508883082, "grad_norm": 0.5622160217785903, "learning_rate": 4.506623411732901e-05, "loss": 0.6168, "step": 19400 }, { "epoch": 0.5665610721011373, "grad_norm": 0.5230199612502522, "learning_rate": 4.5063530683968644e-05, "loss": 0.6057, "step": 19405 }, { "epoch": 0.5667070553714544, "grad_norm": 0.5391114080184191, "learning_rate": 4.506082725060827e-05, "loss": 0.5993, "step": 19410 }, { "epoch": 0.5668530386417716, "grad_norm": 0.4898963172925197, "learning_rate": 4.5058123817247905e-05, "loss": 0.5919, "step": 19415 }, { "epoch": 0.5669990219120888, "grad_norm": 0.4977238933490844, "learning_rate": 4.505542038388754e-05, "loss": 0.5903, "step": 19420 }, { "epoch": 0.5671450051824061, "grad_norm": 0.524239082660867, "learning_rate": 4.5052716950527173e-05, "loss": 0.6107, "step": 19425 }, { "epoch": 0.5672909884527233, "grad_norm": 0.6806971985213145, "learning_rate": 4.505001351716681e-05, "loss": 0.5891, "step": 19430 }, { "epoch": 0.5674369717230405, "grad_norm": 0.49918478892596946, "learning_rate": 4.5047310083806435e-05, "loss": 0.6015, "step": 19435 }, { "epoch": 0.5675829549933578, "grad_norm": 0.5368034870421895, "learning_rate": 4.504460665044607e-05, "loss": 0.5868, "step": 19440 }, { "epoch": 0.567728938263675, "grad_norm": 0.5101208430313222, "learning_rate": 4.50419032170857e-05, "loss": 0.6067, "step": 19445 }, { "epoch": 0.5678749215339922, "grad_norm": 0.551934175967126, "learning_rate": 4.503919978372533e-05, "loss": 0.5919, "step": 19450 }, { "epoch": 0.5680209048043094, "grad_norm": 0.5194793293941657, "learning_rate": 4.5036496350364964e-05, "loss": 0.5775, "step": 19455 }, { "epoch": 0.5681668880746267, "grad_norm": 0.5610593309240872, "learning_rate": 4.50337929170046e-05, "loss": 0.6108, "step": 19460 }, { "epoch": 0.5683128713449439, "grad_norm": 0.535372192153917, "learning_rate": 4.503108948364423e-05, "loss": 0.5905, "step": 19465 }, { "epoch": 0.5684588546152611, "grad_norm": 0.5330860102717824, "learning_rate": 4.502838605028386e-05, "loss": 0.6123, "step": 19470 }, { "epoch": 0.5686048378855784, "grad_norm": 0.4909435437185811, "learning_rate": 4.502568261692349e-05, "loss": 0.6039, "step": 19475 }, { "epoch": 0.5687508211558955, "grad_norm": 0.5708584368859768, "learning_rate": 4.502297918356313e-05, "loss": 0.6168, "step": 19480 }, { "epoch": 0.5688968044262127, "grad_norm": 0.5451907328103331, "learning_rate": 4.502027575020276e-05, "loss": 0.6131, "step": 19485 }, { "epoch": 0.5690427876965299, "grad_norm": 0.5386318139007584, "learning_rate": 4.5017572316842395e-05, "loss": 0.6192, "step": 19490 }, { "epoch": 0.5691887709668472, "grad_norm": 0.5301751138711543, "learning_rate": 4.501486888348202e-05, "loss": 0.6354, "step": 19495 }, { "epoch": 0.5693347542371644, "grad_norm": 0.5310606618148789, "learning_rate": 4.5012165450121657e-05, "loss": 0.5922, "step": 19500 }, { "epoch": 0.5694807375074816, "grad_norm": 0.5422634650244126, "learning_rate": 4.500946201676129e-05, "loss": 0.6249, "step": 19505 }, { "epoch": 0.5696267207777989, "grad_norm": 0.5432122872165427, "learning_rate": 4.500675858340092e-05, "loss": 0.5896, "step": 19510 }, { "epoch": 0.5697727040481161, "grad_norm": 0.5273048180123165, "learning_rate": 4.500405515004055e-05, "loss": 0.5721, "step": 19515 }, { "epoch": 0.5699186873184333, "grad_norm": 0.5361465362073147, "learning_rate": 4.5001351716680186e-05, "loss": 0.6053, "step": 19520 }, { "epoch": 0.5700646705887505, "grad_norm": 0.5593338912740292, "learning_rate": 4.499864828331981e-05, "loss": 0.6212, "step": 19525 }, { "epoch": 0.5702106538590678, "grad_norm": 0.573530881004244, "learning_rate": 4.499594484995945e-05, "loss": 0.6231, "step": 19530 }, { "epoch": 0.570356637129385, "grad_norm": 0.5612311365032929, "learning_rate": 4.499324141659909e-05, "loss": 0.6505, "step": 19535 }, { "epoch": 0.5705026203997022, "grad_norm": 0.5294463801056656, "learning_rate": 4.4990537983238715e-05, "loss": 0.5614, "step": 19540 }, { "epoch": 0.5706486036700195, "grad_norm": 0.5069559322086844, "learning_rate": 4.498783454987835e-05, "loss": 0.6333, "step": 19545 }, { "epoch": 0.5707945869403367, "grad_norm": 0.5164126791590793, "learning_rate": 4.498513111651798e-05, "loss": 0.5782, "step": 19550 }, { "epoch": 0.5709405702106538, "grad_norm": 0.5669775227131665, "learning_rate": 4.498242768315761e-05, "loss": 0.6097, "step": 19555 }, { "epoch": 0.571086553480971, "grad_norm": 0.5349043302803141, "learning_rate": 4.4979724249797244e-05, "loss": 0.6167, "step": 19560 }, { "epoch": 0.5712325367512883, "grad_norm": 0.5108536745810422, "learning_rate": 4.497702081643688e-05, "loss": 0.5978, "step": 19565 }, { "epoch": 0.5713785200216055, "grad_norm": 0.5115750792547324, "learning_rate": 4.4974317383076506e-05, "loss": 0.6284, "step": 19570 }, { "epoch": 0.5715245032919227, "grad_norm": 0.5119541675288372, "learning_rate": 4.497161394971614e-05, "loss": 0.6062, "step": 19575 }, { "epoch": 0.57167048656224, "grad_norm": 0.5080551359112248, "learning_rate": 4.4968910516355774e-05, "loss": 0.629, "step": 19580 }, { "epoch": 0.5718164698325572, "grad_norm": 0.4915655463815255, "learning_rate": 4.49662070829954e-05, "loss": 0.5837, "step": 19585 }, { "epoch": 0.5719624531028744, "grad_norm": 0.5437395085625304, "learning_rate": 4.4963503649635035e-05, "loss": 0.6234, "step": 19590 }, { "epoch": 0.5721084363731916, "grad_norm": 0.5000983368624712, "learning_rate": 4.4960800216274676e-05, "loss": 0.6574, "step": 19595 }, { "epoch": 0.5722544196435089, "grad_norm": 0.5356124232663281, "learning_rate": 4.49580967829143e-05, "loss": 0.5981, "step": 19600 }, { "epoch": 0.5724004029138261, "grad_norm": 0.5600913072829055, "learning_rate": 4.495539334955394e-05, "loss": 0.6023, "step": 19605 }, { "epoch": 0.5725463861841433, "grad_norm": 0.5538040463584383, "learning_rate": 4.495268991619357e-05, "loss": 0.6092, "step": 19610 }, { "epoch": 0.5726923694544606, "grad_norm": 0.5638576825969278, "learning_rate": 4.49499864828332e-05, "loss": 0.6058, "step": 19615 }, { "epoch": 0.5728383527247778, "grad_norm": 0.5255944584668155, "learning_rate": 4.494728304947283e-05, "loss": 0.613, "step": 19620 }, { "epoch": 0.5729843359950949, "grad_norm": 0.5224880951008937, "learning_rate": 4.4944579616112466e-05, "loss": 0.6254, "step": 19625 }, { "epoch": 0.5731303192654121, "grad_norm": 0.5288748834731228, "learning_rate": 4.4941876182752093e-05, "loss": 0.5914, "step": 19630 }, { "epoch": 0.5732763025357294, "grad_norm": 0.7717502077191815, "learning_rate": 4.493917274939173e-05, "loss": 0.6028, "step": 19635 }, { "epoch": 0.5734222858060466, "grad_norm": 0.5136291343290488, "learning_rate": 4.493646931603136e-05, "loss": 0.5805, "step": 19640 }, { "epoch": 0.5735682690763638, "grad_norm": 0.5503803554453947, "learning_rate": 4.493376588267099e-05, "loss": 0.6557, "step": 19645 }, { "epoch": 0.573714252346681, "grad_norm": 0.528825142480543, "learning_rate": 4.493106244931063e-05, "loss": 0.605, "step": 19650 }, { "epoch": 0.5738602356169983, "grad_norm": 0.5520953731640457, "learning_rate": 4.4928359015950263e-05, "loss": 0.5676, "step": 19655 }, { "epoch": 0.5740062188873155, "grad_norm": 0.549290543591012, "learning_rate": 4.492565558258989e-05, "loss": 0.5793, "step": 19660 }, { "epoch": 0.5741522021576327, "grad_norm": 0.5429155039839156, "learning_rate": 4.4922952149229525e-05, "loss": 0.6206, "step": 19665 }, { "epoch": 0.57429818542795, "grad_norm": 0.5534442227160533, "learning_rate": 4.492024871586916e-05, "loss": 0.612, "step": 19670 }, { "epoch": 0.5744441686982672, "grad_norm": 0.4609273590041608, "learning_rate": 4.4917545282508786e-05, "loss": 0.5908, "step": 19675 }, { "epoch": 0.5745901519685844, "grad_norm": 0.552004900151352, "learning_rate": 4.491484184914842e-05, "loss": 0.601, "step": 19680 }, { "epoch": 0.5747361352389017, "grad_norm": 0.5574145129016108, "learning_rate": 4.4912138415788054e-05, "loss": 0.5972, "step": 19685 }, { "epoch": 0.5748821185092189, "grad_norm": 0.5026293980687788, "learning_rate": 4.490943498242768e-05, "loss": 0.5789, "step": 19690 }, { "epoch": 0.5750281017795361, "grad_norm": 0.5213011012742088, "learning_rate": 4.4906731549067315e-05, "loss": 0.6261, "step": 19695 }, { "epoch": 0.5751740850498532, "grad_norm": 0.5340756192453368, "learning_rate": 4.490402811570695e-05, "loss": 0.6018, "step": 19700 }, { "epoch": 0.5753200683201705, "grad_norm": 0.5401526623144691, "learning_rate": 4.490132468234658e-05, "loss": 0.5925, "step": 19705 }, { "epoch": 0.5754660515904877, "grad_norm": 0.5532033456828838, "learning_rate": 4.489862124898622e-05, "loss": 0.6433, "step": 19710 }, { "epoch": 0.5756120348608049, "grad_norm": 0.5246150294827149, "learning_rate": 4.489591781562585e-05, "loss": 0.6175, "step": 19715 }, { "epoch": 0.5757580181311222, "grad_norm": 0.5306394047904085, "learning_rate": 4.489321438226548e-05, "loss": 0.6048, "step": 19720 }, { "epoch": 0.5759040014014394, "grad_norm": 0.48782578319691094, "learning_rate": 4.489051094890511e-05, "loss": 0.578, "step": 19725 }, { "epoch": 0.5760499846717566, "grad_norm": 0.4948994118143281, "learning_rate": 4.4887807515544747e-05, "loss": 0.5477, "step": 19730 }, { "epoch": 0.5761959679420738, "grad_norm": 0.5300049825145906, "learning_rate": 4.4885104082184374e-05, "loss": 0.5765, "step": 19735 }, { "epoch": 0.5763419512123911, "grad_norm": 0.5091379742467375, "learning_rate": 4.488240064882401e-05, "loss": 0.5806, "step": 19740 }, { "epoch": 0.5764879344827083, "grad_norm": 0.520217484646599, "learning_rate": 4.487969721546364e-05, "loss": 0.5949, "step": 19745 }, { "epoch": 0.5766339177530255, "grad_norm": 0.5479034049670833, "learning_rate": 4.487699378210327e-05, "loss": 0.6028, "step": 19750 }, { "epoch": 0.5767799010233428, "grad_norm": 0.5655312267710766, "learning_rate": 4.48742903487429e-05, "loss": 0.5866, "step": 19755 }, { "epoch": 0.57692588429366, "grad_norm": 0.4543642125645842, "learning_rate": 4.487158691538254e-05, "loss": 0.5401, "step": 19760 }, { "epoch": 0.5770718675639772, "grad_norm": 0.5502952330629199, "learning_rate": 4.486888348202217e-05, "loss": 0.6137, "step": 19765 }, { "epoch": 0.5772178508342944, "grad_norm": 0.48464540252447263, "learning_rate": 4.4866180048661805e-05, "loss": 0.5905, "step": 19770 }, { "epoch": 0.5773638341046116, "grad_norm": 0.4416555866504117, "learning_rate": 4.486347661530144e-05, "loss": 0.5659, "step": 19775 }, { "epoch": 0.5775098173749288, "grad_norm": 0.5089931394965442, "learning_rate": 4.4860773181941066e-05, "loss": 0.582, "step": 19780 }, { "epoch": 0.577655800645246, "grad_norm": 0.5924042333344081, "learning_rate": 4.48580697485807e-05, "loss": 0.6257, "step": 19785 }, { "epoch": 0.5778017839155632, "grad_norm": 0.5322137346205769, "learning_rate": 4.4855366315220334e-05, "loss": 0.6106, "step": 19790 }, { "epoch": 0.5779477671858805, "grad_norm": 0.5639416419320503, "learning_rate": 4.485266288185996e-05, "loss": 0.615, "step": 19795 }, { "epoch": 0.5780937504561977, "grad_norm": 0.5067910790764664, "learning_rate": 4.4849959448499596e-05, "loss": 0.6088, "step": 19800 }, { "epoch": 0.5782397337265149, "grad_norm": 0.512498581390303, "learning_rate": 4.484725601513923e-05, "loss": 0.6075, "step": 19805 }, { "epoch": 0.5783857169968322, "grad_norm": 0.5187950264450112, "learning_rate": 4.484455258177886e-05, "loss": 0.5914, "step": 19810 }, { "epoch": 0.5785317002671494, "grad_norm": 0.5168466410119198, "learning_rate": 4.484184914841849e-05, "loss": 0.5803, "step": 19815 }, { "epoch": 0.5786776835374666, "grad_norm": 0.4938639072704367, "learning_rate": 4.483914571505813e-05, "loss": 0.6487, "step": 19820 }, { "epoch": 0.5788236668077839, "grad_norm": 0.494421556085233, "learning_rate": 4.483644228169776e-05, "loss": 0.5729, "step": 19825 }, { "epoch": 0.5789696500781011, "grad_norm": 0.49107714608309555, "learning_rate": 4.483373884833739e-05, "loss": 0.5939, "step": 19830 }, { "epoch": 0.5791156333484183, "grad_norm": 0.5366442440848106, "learning_rate": 4.483103541497703e-05, "loss": 0.6219, "step": 19835 }, { "epoch": 0.5792616166187355, "grad_norm": 0.5066980984605385, "learning_rate": 4.4828331981616654e-05, "loss": 0.6071, "step": 19840 }, { "epoch": 0.5794075998890527, "grad_norm": 0.608744877163204, "learning_rate": 4.482562854825629e-05, "loss": 0.6363, "step": 19845 }, { "epoch": 0.5795535831593699, "grad_norm": 0.5038100595888345, "learning_rate": 4.482292511489592e-05, "loss": 0.6009, "step": 19850 }, { "epoch": 0.5796995664296871, "grad_norm": 0.50651728977615, "learning_rate": 4.482022168153555e-05, "loss": 0.5999, "step": 19855 }, { "epoch": 0.5798455497000043, "grad_norm": 0.6652620104694111, "learning_rate": 4.4817518248175183e-05, "loss": 0.5502, "step": 19860 }, { "epoch": 0.5799915329703216, "grad_norm": 0.5323203873143298, "learning_rate": 4.481481481481482e-05, "loss": 0.6043, "step": 19865 }, { "epoch": 0.5801375162406388, "grad_norm": 0.5553655052786117, "learning_rate": 4.4812111381454445e-05, "loss": 0.5773, "step": 19870 }, { "epoch": 0.580283499510956, "grad_norm": 0.6130339590173922, "learning_rate": 4.4809407948094085e-05, "loss": 0.6205, "step": 19875 }, { "epoch": 0.5804294827812733, "grad_norm": 0.5201589876758402, "learning_rate": 4.480670451473372e-05, "loss": 0.5829, "step": 19880 }, { "epoch": 0.5805754660515905, "grad_norm": 0.5080401656352185, "learning_rate": 4.480400108137335e-05, "loss": 0.5754, "step": 19885 }, { "epoch": 0.5807214493219077, "grad_norm": 0.5297085282357092, "learning_rate": 4.480129764801298e-05, "loss": 0.5941, "step": 19890 }, { "epoch": 0.580867432592225, "grad_norm": 0.499955776744573, "learning_rate": 4.4798594214652615e-05, "loss": 0.5894, "step": 19895 }, { "epoch": 0.5810134158625422, "grad_norm": 0.5150147161323556, "learning_rate": 4.479589078129224e-05, "loss": 0.6206, "step": 19900 }, { "epoch": 0.5811593991328594, "grad_norm": 0.5150854285470204, "learning_rate": 4.4793187347931876e-05, "loss": 0.6493, "step": 19905 }, { "epoch": 0.5813053824031766, "grad_norm": 0.5065345276224908, "learning_rate": 4.479048391457151e-05, "loss": 0.5618, "step": 19910 }, { "epoch": 0.5814513656734939, "grad_norm": 0.49853230989811925, "learning_rate": 4.478778048121114e-05, "loss": 0.5502, "step": 19915 }, { "epoch": 0.581597348943811, "grad_norm": 0.5217009567060807, "learning_rate": 4.478507704785077e-05, "loss": 0.5754, "step": 19920 }, { "epoch": 0.5817433322141282, "grad_norm": 0.4889312470829242, "learning_rate": 4.4782373614490405e-05, "loss": 0.5909, "step": 19925 }, { "epoch": 0.5818893154844454, "grad_norm": 0.5008049533228965, "learning_rate": 4.477967018113003e-05, "loss": 0.5945, "step": 19930 }, { "epoch": 0.5820352987547627, "grad_norm": 0.47625434577895304, "learning_rate": 4.477696674776967e-05, "loss": 0.6168, "step": 19935 }, { "epoch": 0.5821812820250799, "grad_norm": 0.5496863868291497, "learning_rate": 4.47742633144093e-05, "loss": 0.6215, "step": 19940 }, { "epoch": 0.5823272652953971, "grad_norm": 0.5036117903376038, "learning_rate": 4.4771559881048934e-05, "loss": 0.5707, "step": 19945 }, { "epoch": 0.5824732485657144, "grad_norm": 0.5463223532506648, "learning_rate": 4.476885644768857e-05, "loss": 0.6137, "step": 19950 }, { "epoch": 0.5826192318360316, "grad_norm": 0.49879098080382056, "learning_rate": 4.47661530143282e-05, "loss": 0.601, "step": 19955 }, { "epoch": 0.5827652151063488, "grad_norm": 0.5294124229234316, "learning_rate": 4.476344958096783e-05, "loss": 0.617, "step": 19960 }, { "epoch": 0.582911198376666, "grad_norm": 0.5762176421649561, "learning_rate": 4.4760746147607464e-05, "loss": 0.5989, "step": 19965 }, { "epoch": 0.5830571816469833, "grad_norm": 0.5697402594736615, "learning_rate": 4.47580427142471e-05, "loss": 0.6432, "step": 19970 }, { "epoch": 0.5832031649173005, "grad_norm": 0.5277274386699345, "learning_rate": 4.4755339280886725e-05, "loss": 0.5775, "step": 19975 }, { "epoch": 0.5833491481876177, "grad_norm": 0.5531513491572233, "learning_rate": 4.475263584752636e-05, "loss": 0.6106, "step": 19980 }, { "epoch": 0.583495131457935, "grad_norm": 0.5543413751265274, "learning_rate": 4.474993241416599e-05, "loss": 0.5967, "step": 19985 }, { "epoch": 0.5836411147282522, "grad_norm": 0.5148658920287462, "learning_rate": 4.474722898080563e-05, "loss": 0.5744, "step": 19990 }, { "epoch": 0.5837870979985693, "grad_norm": 0.5326008438661827, "learning_rate": 4.474452554744526e-05, "loss": 0.6133, "step": 19995 }, { "epoch": 0.5839330812688865, "grad_norm": 0.5242767907258993, "learning_rate": 4.474182211408489e-05, "loss": 0.6163, "step": 20000 }, { "epoch": 0.5840790645392038, "grad_norm": 0.5367243773442244, "learning_rate": 4.473911868072452e-05, "loss": 0.5349, "step": 20005 }, { "epoch": 0.584225047809521, "grad_norm": 0.5590852669110019, "learning_rate": 4.4736415247364156e-05, "loss": 0.6128, "step": 20010 }, { "epoch": 0.5843710310798382, "grad_norm": 0.581369855905517, "learning_rate": 4.473371181400379e-05, "loss": 0.5854, "step": 20015 }, { "epoch": 0.5845170143501555, "grad_norm": 0.5582974073527456, "learning_rate": 4.473100838064342e-05, "loss": 0.6278, "step": 20020 }, { "epoch": 0.5846629976204727, "grad_norm": 0.507512753536092, "learning_rate": 4.472830494728305e-05, "loss": 0.5849, "step": 20025 }, { "epoch": 0.5848089808907899, "grad_norm": 0.5161934534757127, "learning_rate": 4.4725601513922686e-05, "loss": 0.5614, "step": 20030 }, { "epoch": 0.5849549641611071, "grad_norm": 0.5185609880995263, "learning_rate": 4.472289808056231e-05, "loss": 0.6107, "step": 20035 }, { "epoch": 0.5851009474314244, "grad_norm": 0.5698084779986311, "learning_rate": 4.472019464720195e-05, "loss": 0.5864, "step": 20040 }, { "epoch": 0.5852469307017416, "grad_norm": 0.5366184961682068, "learning_rate": 4.471749121384158e-05, "loss": 0.5918, "step": 20045 }, { "epoch": 0.5853929139720588, "grad_norm": 0.48120547925137025, "learning_rate": 4.4714787780481215e-05, "loss": 0.578, "step": 20050 }, { "epoch": 0.5855388972423761, "grad_norm": 0.5531447477210907, "learning_rate": 4.471208434712085e-05, "loss": 0.604, "step": 20055 }, { "epoch": 0.5856848805126933, "grad_norm": 0.4996036267134929, "learning_rate": 4.4709380913760476e-05, "loss": 0.6151, "step": 20060 }, { "epoch": 0.5858308637830104, "grad_norm": 0.5024942076804834, "learning_rate": 4.470667748040011e-05, "loss": 0.5953, "step": 20065 }, { "epoch": 0.5859768470533276, "grad_norm": 0.5173145281237388, "learning_rate": 4.4703974047039744e-05, "loss": 0.5921, "step": 20070 }, { "epoch": 0.5861228303236449, "grad_norm": 0.5594464297691722, "learning_rate": 4.470127061367937e-05, "loss": 0.6253, "step": 20075 }, { "epoch": 0.5862688135939621, "grad_norm": 0.5059751038210885, "learning_rate": 4.4698567180319005e-05, "loss": 0.578, "step": 20080 }, { "epoch": 0.5864147968642793, "grad_norm": 0.5605769047944181, "learning_rate": 4.469586374695864e-05, "loss": 0.5904, "step": 20085 }, { "epoch": 0.5865607801345966, "grad_norm": 0.4950760368222179, "learning_rate": 4.469316031359827e-05, "loss": 0.5784, "step": 20090 }, { "epoch": 0.5867067634049138, "grad_norm": 0.48762344011751385, "learning_rate": 4.46904568802379e-05, "loss": 0.6213, "step": 20095 }, { "epoch": 0.586852746675231, "grad_norm": 0.5543069746800924, "learning_rate": 4.4687753446877535e-05, "loss": 0.5686, "step": 20100 }, { "epoch": 0.5869987299455482, "grad_norm": 0.5111204709153353, "learning_rate": 4.468505001351717e-05, "loss": 0.573, "step": 20105 }, { "epoch": 0.5871447132158655, "grad_norm": 0.5328559108757386, "learning_rate": 4.46823465801568e-05, "loss": 0.5947, "step": 20110 }, { "epoch": 0.5872906964861827, "grad_norm": 0.4984912615252644, "learning_rate": 4.467964314679644e-05, "loss": 0.5897, "step": 20115 }, { "epoch": 0.5874366797564999, "grad_norm": 0.5698138579675339, "learning_rate": 4.4676939713436064e-05, "loss": 0.6156, "step": 20120 }, { "epoch": 0.5875826630268172, "grad_norm": 0.47604298588143124, "learning_rate": 4.46742362800757e-05, "loss": 0.5531, "step": 20125 }, { "epoch": 0.5877286462971344, "grad_norm": 0.49365500800999684, "learning_rate": 4.467153284671533e-05, "loss": 0.5688, "step": 20130 }, { "epoch": 0.5878746295674516, "grad_norm": 0.5525762227279142, "learning_rate": 4.466882941335496e-05, "loss": 0.6435, "step": 20135 }, { "epoch": 0.5880206128377687, "grad_norm": 0.49893036909713795, "learning_rate": 4.466612597999459e-05, "loss": 0.596, "step": 20140 }, { "epoch": 0.588166596108086, "grad_norm": 0.5256121007553529, "learning_rate": 4.466342254663423e-05, "loss": 0.6001, "step": 20145 }, { "epoch": 0.5883125793784032, "grad_norm": 0.5043540818698576, "learning_rate": 4.466071911327386e-05, "loss": 0.5733, "step": 20150 }, { "epoch": 0.5884585626487204, "grad_norm": 0.5697384436243159, "learning_rate": 4.465801567991349e-05, "loss": 0.6182, "step": 20155 }, { "epoch": 0.5886045459190377, "grad_norm": 0.519044491075997, "learning_rate": 4.465531224655313e-05, "loss": 0.5769, "step": 20160 }, { "epoch": 0.5887505291893549, "grad_norm": 0.5382844429365499, "learning_rate": 4.4652608813192756e-05, "loss": 0.595, "step": 20165 }, { "epoch": 0.5888965124596721, "grad_norm": 0.49129274332244616, "learning_rate": 4.464990537983239e-05, "loss": 0.6058, "step": 20170 }, { "epoch": 0.5890424957299893, "grad_norm": 0.548661866198551, "learning_rate": 4.4647201946472024e-05, "loss": 0.581, "step": 20175 }, { "epoch": 0.5891884790003066, "grad_norm": 0.5307569863115063, "learning_rate": 4.464449851311165e-05, "loss": 0.6194, "step": 20180 }, { "epoch": 0.5893344622706238, "grad_norm": 0.5290375313290941, "learning_rate": 4.4641795079751286e-05, "loss": 0.5598, "step": 20185 }, { "epoch": 0.589480445540941, "grad_norm": 0.48878304825049196, "learning_rate": 4.463909164639092e-05, "loss": 0.6119, "step": 20190 }, { "epoch": 0.5896264288112583, "grad_norm": 0.4979775523140539, "learning_rate": 4.463638821303055e-05, "loss": 0.5867, "step": 20195 }, { "epoch": 0.5897724120815755, "grad_norm": 0.48950475242151675, "learning_rate": 4.463368477967018e-05, "loss": 0.6061, "step": 20200 }, { "epoch": 0.5899183953518927, "grad_norm": 0.5379579297067267, "learning_rate": 4.4630981346309815e-05, "loss": 0.5774, "step": 20205 }, { "epoch": 0.5900643786222098, "grad_norm": 0.5127987212896161, "learning_rate": 4.462827791294944e-05, "loss": 0.5938, "step": 20210 }, { "epoch": 0.5902103618925271, "grad_norm": 0.5476157503300966, "learning_rate": 4.462557447958908e-05, "loss": 0.6052, "step": 20215 }, { "epoch": 0.5903563451628443, "grad_norm": 0.505278773173354, "learning_rate": 4.462287104622872e-05, "loss": 0.5852, "step": 20220 }, { "epoch": 0.5905023284331615, "grad_norm": 0.5303212560484518, "learning_rate": 4.4620167612868344e-05, "loss": 0.6018, "step": 20225 }, { "epoch": 0.5906483117034788, "grad_norm": 0.4996260349641142, "learning_rate": 4.461746417950798e-05, "loss": 0.5761, "step": 20230 }, { "epoch": 0.590794294973796, "grad_norm": 0.5238221293945687, "learning_rate": 4.461476074614761e-05, "loss": 0.5996, "step": 20235 }, { "epoch": 0.5909402782441132, "grad_norm": 0.5060294923299958, "learning_rate": 4.461205731278724e-05, "loss": 0.584, "step": 20240 }, { "epoch": 0.5910862615144304, "grad_norm": 0.5484436295566004, "learning_rate": 4.4609353879426874e-05, "loss": 0.5955, "step": 20245 }, { "epoch": 0.5912322447847477, "grad_norm": 0.5070934994517512, "learning_rate": 4.460665044606651e-05, "loss": 0.5856, "step": 20250 }, { "epoch": 0.5913782280550649, "grad_norm": 0.5324677546495523, "learning_rate": 4.4603947012706135e-05, "loss": 0.5898, "step": 20255 }, { "epoch": 0.5915242113253821, "grad_norm": 0.4922901800488298, "learning_rate": 4.460124357934577e-05, "loss": 0.5829, "step": 20260 }, { "epoch": 0.5916701945956994, "grad_norm": 0.47345946792669436, "learning_rate": 4.45985401459854e-05, "loss": 0.578, "step": 20265 }, { "epoch": 0.5918161778660166, "grad_norm": 0.48320890591362814, "learning_rate": 4.459583671262503e-05, "loss": 0.5675, "step": 20270 }, { "epoch": 0.5919621611363338, "grad_norm": 0.53749437797301, "learning_rate": 4.459313327926467e-05, "loss": 0.6168, "step": 20275 }, { "epoch": 0.592108144406651, "grad_norm": 0.4917271633666933, "learning_rate": 4.4590429845904305e-05, "loss": 0.5574, "step": 20280 }, { "epoch": 0.5922541276769682, "grad_norm": 0.47337356831856203, "learning_rate": 4.458772641254393e-05, "loss": 0.6194, "step": 20285 }, { "epoch": 0.5924001109472854, "grad_norm": 0.5013933383451289, "learning_rate": 4.4585022979183566e-05, "loss": 0.6371, "step": 20290 }, { "epoch": 0.5925460942176026, "grad_norm": 0.5036056533127974, "learning_rate": 4.45823195458232e-05, "loss": 0.592, "step": 20295 }, { "epoch": 0.5926920774879199, "grad_norm": 0.5004563366309044, "learning_rate": 4.457961611246283e-05, "loss": 0.5746, "step": 20300 }, { "epoch": 0.5928380607582371, "grad_norm": 0.49078623606597266, "learning_rate": 4.457691267910246e-05, "loss": 0.5926, "step": 20305 }, { "epoch": 0.5929840440285543, "grad_norm": 0.5144880329228497, "learning_rate": 4.4574209245742095e-05, "loss": 0.5953, "step": 20310 }, { "epoch": 0.5931300272988715, "grad_norm": 0.4768351321542231, "learning_rate": 4.457150581238172e-05, "loss": 0.5833, "step": 20315 }, { "epoch": 0.5932760105691888, "grad_norm": 0.49860870278015396, "learning_rate": 4.4568802379021357e-05, "loss": 0.6077, "step": 20320 }, { "epoch": 0.593421993839506, "grad_norm": 0.738156109993412, "learning_rate": 4.456609894566099e-05, "loss": 0.632, "step": 20325 }, { "epoch": 0.5935679771098232, "grad_norm": 0.5166564206404991, "learning_rate": 4.4563395512300625e-05, "loss": 0.5941, "step": 20330 }, { "epoch": 0.5937139603801405, "grad_norm": 0.5419017050419267, "learning_rate": 4.456069207894026e-05, "loss": 0.6072, "step": 20335 }, { "epoch": 0.5938599436504577, "grad_norm": 0.5312754615659102, "learning_rate": 4.455798864557989e-05, "loss": 0.6223, "step": 20340 }, { "epoch": 0.5940059269207749, "grad_norm": 0.5364633062337076, "learning_rate": 4.455528521221952e-05, "loss": 0.5986, "step": 20345 }, { "epoch": 0.5941519101910921, "grad_norm": 0.5167289711667304, "learning_rate": 4.4552581778859154e-05, "loss": 0.5958, "step": 20350 }, { "epoch": 0.5942978934614094, "grad_norm": 0.5443934787787127, "learning_rate": 4.454987834549879e-05, "loss": 0.6032, "step": 20355 }, { "epoch": 0.5944438767317265, "grad_norm": 0.5200823382919331, "learning_rate": 4.4547174912138415e-05, "loss": 0.6229, "step": 20360 }, { "epoch": 0.5945898600020437, "grad_norm": 0.5282776715739831, "learning_rate": 4.454447147877805e-05, "loss": 0.5834, "step": 20365 }, { "epoch": 0.594735843272361, "grad_norm": 0.5232842587435024, "learning_rate": 4.454176804541768e-05, "loss": 0.6254, "step": 20370 }, { "epoch": 0.5948818265426782, "grad_norm": 0.47058120232350353, "learning_rate": 4.453906461205731e-05, "loss": 0.6139, "step": 20375 }, { "epoch": 0.5950278098129954, "grad_norm": 0.5474110478536591, "learning_rate": 4.4536361178696944e-05, "loss": 0.6025, "step": 20380 }, { "epoch": 0.5951737930833126, "grad_norm": 0.48481339993167005, "learning_rate": 4.4533657745336585e-05, "loss": 0.588, "step": 20385 }, { "epoch": 0.5953197763536299, "grad_norm": 0.539304388628375, "learning_rate": 4.453095431197621e-05, "loss": 0.5853, "step": 20390 }, { "epoch": 0.5954657596239471, "grad_norm": 0.5432341175628194, "learning_rate": 4.4528250878615846e-05, "loss": 0.6217, "step": 20395 }, { "epoch": 0.5956117428942643, "grad_norm": 0.5533820130885913, "learning_rate": 4.452554744525548e-05, "loss": 0.6123, "step": 20400 }, { "epoch": 0.5957577261645816, "grad_norm": 0.5619110212760727, "learning_rate": 4.452284401189511e-05, "loss": 0.622, "step": 20405 }, { "epoch": 0.5959037094348988, "grad_norm": 0.584524306818633, "learning_rate": 4.452014057853474e-05, "loss": 0.6338, "step": 20410 }, { "epoch": 0.596049692705216, "grad_norm": 0.5171584828319725, "learning_rate": 4.4517437145174376e-05, "loss": 0.6004, "step": 20415 }, { "epoch": 0.5961956759755332, "grad_norm": 0.529101836112362, "learning_rate": 4.4514733711814e-05, "loss": 0.6001, "step": 20420 }, { "epoch": 0.5963416592458505, "grad_norm": 0.539386355428286, "learning_rate": 4.451203027845364e-05, "loss": 0.5993, "step": 20425 }, { "epoch": 0.5964876425161676, "grad_norm": 0.5502585975266654, "learning_rate": 4.450932684509327e-05, "loss": 0.6211, "step": 20430 }, { "epoch": 0.5966336257864848, "grad_norm": 0.5316797155321757, "learning_rate": 4.45066234117329e-05, "loss": 0.5962, "step": 20435 }, { "epoch": 0.596779609056802, "grad_norm": 0.5308927352751485, "learning_rate": 4.450391997837254e-05, "loss": 0.6299, "step": 20440 }, { "epoch": 0.5969255923271193, "grad_norm": 0.542937176328066, "learning_rate": 4.450121654501217e-05, "loss": 0.6088, "step": 20445 }, { "epoch": 0.5970715755974365, "grad_norm": 0.5137465817091661, "learning_rate": 4.44985131116518e-05, "loss": 0.6024, "step": 20450 }, { "epoch": 0.5972175588677537, "grad_norm": 0.5286171329203107, "learning_rate": 4.4495809678291434e-05, "loss": 0.561, "step": 20455 }, { "epoch": 0.597363542138071, "grad_norm": 0.5254991703361784, "learning_rate": 4.449310624493107e-05, "loss": 0.6126, "step": 20460 }, { "epoch": 0.5975095254083882, "grad_norm": 0.5215895917409588, "learning_rate": 4.4490402811570695e-05, "loss": 0.6196, "step": 20465 }, { "epoch": 0.5976555086787054, "grad_norm": 0.5458978125026239, "learning_rate": 4.448769937821033e-05, "loss": 0.5861, "step": 20470 }, { "epoch": 0.5978014919490227, "grad_norm": 0.5396312357996192, "learning_rate": 4.4484995944849964e-05, "loss": 0.6153, "step": 20475 }, { "epoch": 0.5979474752193399, "grad_norm": 0.4718841261079546, "learning_rate": 4.448229251148959e-05, "loss": 0.5889, "step": 20480 }, { "epoch": 0.5980934584896571, "grad_norm": 0.5071864497361676, "learning_rate": 4.4479589078129225e-05, "loss": 0.5906, "step": 20485 }, { "epoch": 0.5982394417599743, "grad_norm": 0.52497094332014, "learning_rate": 4.447688564476886e-05, "loss": 0.6043, "step": 20490 }, { "epoch": 0.5983854250302916, "grad_norm": 0.5565559291252802, "learning_rate": 4.4474182211408486e-05, "loss": 0.6023, "step": 20495 }, { "epoch": 0.5985314083006088, "grad_norm": 0.4879308749318239, "learning_rate": 4.447147877804813e-05, "loss": 0.6112, "step": 20500 }, { "epoch": 0.5986773915709259, "grad_norm": 0.5279044691055472, "learning_rate": 4.446877534468776e-05, "loss": 0.6068, "step": 20505 }, { "epoch": 0.5988233748412432, "grad_norm": 0.5390560579456158, "learning_rate": 4.446607191132739e-05, "loss": 0.6063, "step": 20510 }, { "epoch": 0.5989693581115604, "grad_norm": 0.5358240321600899, "learning_rate": 4.446336847796702e-05, "loss": 0.6335, "step": 20515 }, { "epoch": 0.5991153413818776, "grad_norm": 0.5182107610488383, "learning_rate": 4.4460665044606656e-05, "loss": 0.6445, "step": 20520 }, { "epoch": 0.5992613246521948, "grad_norm": 0.5090714903373653, "learning_rate": 4.445796161124628e-05, "loss": 0.5816, "step": 20525 }, { "epoch": 0.5994073079225121, "grad_norm": 0.5198032375864369, "learning_rate": 4.445525817788592e-05, "loss": 0.6092, "step": 20530 }, { "epoch": 0.5995532911928293, "grad_norm": 0.5140070724726874, "learning_rate": 4.445255474452555e-05, "loss": 0.5876, "step": 20535 }, { "epoch": 0.5996992744631465, "grad_norm": 0.5057544872039856, "learning_rate": 4.444985131116518e-05, "loss": 0.6019, "step": 20540 }, { "epoch": 0.5998452577334638, "grad_norm": 0.47040841099166647, "learning_rate": 4.444714787780481e-05, "loss": 0.6011, "step": 20545 }, { "epoch": 0.599991241003781, "grad_norm": 0.5523570556596858, "learning_rate": 4.4444444444444447e-05, "loss": 0.6198, "step": 20550 }, { "epoch": 0.6001372242740982, "grad_norm": 0.5212417427561986, "learning_rate": 4.444174101108408e-05, "loss": 0.5744, "step": 20555 }, { "epoch": 0.6002832075444154, "grad_norm": 0.46756194150961383, "learning_rate": 4.4439037577723715e-05, "loss": 0.5806, "step": 20560 }, { "epoch": 0.6004291908147327, "grad_norm": 0.49554211860830466, "learning_rate": 4.443633414436334e-05, "loss": 0.6124, "step": 20565 }, { "epoch": 0.6005751740850499, "grad_norm": 0.5208937029659171, "learning_rate": 4.4433630711002976e-05, "loss": 0.6066, "step": 20570 }, { "epoch": 0.6007211573553671, "grad_norm": 0.5375256500068133, "learning_rate": 4.443092727764261e-05, "loss": 0.5913, "step": 20575 }, { "epoch": 0.6008671406256842, "grad_norm": 0.5487597533441363, "learning_rate": 4.4428223844282244e-05, "loss": 0.6411, "step": 20580 }, { "epoch": 0.6010131238960015, "grad_norm": 0.5379633763575016, "learning_rate": 4.442552041092187e-05, "loss": 0.5945, "step": 20585 }, { "epoch": 0.6011591071663187, "grad_norm": 0.48660290650286564, "learning_rate": 4.4422816977561505e-05, "loss": 0.5876, "step": 20590 }, { "epoch": 0.6013050904366359, "grad_norm": 0.5004852541319809, "learning_rate": 4.442011354420114e-05, "loss": 0.5797, "step": 20595 }, { "epoch": 0.6014510737069532, "grad_norm": 0.563574265169442, "learning_rate": 4.4417410110840766e-05, "loss": 0.5979, "step": 20600 }, { "epoch": 0.6015970569772704, "grad_norm": 0.4843977162026039, "learning_rate": 4.44147066774804e-05, "loss": 0.5814, "step": 20605 }, { "epoch": 0.6017430402475876, "grad_norm": 0.5389781072455642, "learning_rate": 4.4412003244120034e-05, "loss": 0.6051, "step": 20610 }, { "epoch": 0.6018890235179049, "grad_norm": 0.5113656476876339, "learning_rate": 4.440929981075967e-05, "loss": 0.6041, "step": 20615 }, { "epoch": 0.6020350067882221, "grad_norm": 0.49915922031475857, "learning_rate": 4.44065963773993e-05, "loss": 0.6097, "step": 20620 }, { "epoch": 0.6021809900585393, "grad_norm": 0.5364947213110085, "learning_rate": 4.440389294403893e-05, "loss": 0.6243, "step": 20625 }, { "epoch": 0.6023269733288565, "grad_norm": 0.5262224632896941, "learning_rate": 4.4401189510678564e-05, "loss": 0.6166, "step": 20630 }, { "epoch": 0.6024729565991738, "grad_norm": 0.5174001243434854, "learning_rate": 4.43984860773182e-05, "loss": 0.6182, "step": 20635 }, { "epoch": 0.602618939869491, "grad_norm": 0.5214001840152089, "learning_rate": 4.439578264395783e-05, "loss": 0.5914, "step": 20640 }, { "epoch": 0.6027649231398082, "grad_norm": 0.4858824087821569, "learning_rate": 4.439307921059746e-05, "loss": 0.5835, "step": 20645 }, { "epoch": 0.6029109064101253, "grad_norm": 0.5483592364140324, "learning_rate": 4.439037577723709e-05, "loss": 0.6044, "step": 20650 }, { "epoch": 0.6030568896804426, "grad_norm": 0.5180987096234814, "learning_rate": 4.438767234387673e-05, "loss": 0.5915, "step": 20655 }, { "epoch": 0.6032028729507598, "grad_norm": 0.5155135303175828, "learning_rate": 4.4384968910516354e-05, "loss": 0.6528, "step": 20660 }, { "epoch": 0.603348856221077, "grad_norm": 0.5399306895078531, "learning_rate": 4.438226547715599e-05, "loss": 0.5943, "step": 20665 }, { "epoch": 0.6034948394913943, "grad_norm": 0.514999388828853, "learning_rate": 4.437956204379562e-05, "loss": 0.5884, "step": 20670 }, { "epoch": 0.6036408227617115, "grad_norm": 0.7178753882492682, "learning_rate": 4.4376858610435256e-05, "loss": 0.6076, "step": 20675 }, { "epoch": 0.6037868060320287, "grad_norm": 0.5221980035308166, "learning_rate": 4.437415517707489e-05, "loss": 0.6138, "step": 20680 }, { "epoch": 0.603932789302346, "grad_norm": 0.5137050680199421, "learning_rate": 4.437145174371452e-05, "loss": 0.5939, "step": 20685 }, { "epoch": 0.6040787725726632, "grad_norm": 0.5091719652154596, "learning_rate": 4.436874831035415e-05, "loss": 0.5759, "step": 20690 }, { "epoch": 0.6042247558429804, "grad_norm": 0.5094391297175037, "learning_rate": 4.4366044876993785e-05, "loss": 0.591, "step": 20695 }, { "epoch": 0.6043707391132976, "grad_norm": 0.48911013887540894, "learning_rate": 4.436334144363341e-05, "loss": 0.5999, "step": 20700 }, { "epoch": 0.6045167223836149, "grad_norm": 0.5279191174613918, "learning_rate": 4.436063801027305e-05, "loss": 0.5918, "step": 20705 }, { "epoch": 0.6046627056539321, "grad_norm": 0.5202190049377718, "learning_rate": 4.435793457691268e-05, "loss": 0.6169, "step": 20710 }, { "epoch": 0.6048086889242493, "grad_norm": 0.49500060488518965, "learning_rate": 4.4355231143552315e-05, "loss": 0.586, "step": 20715 }, { "epoch": 0.6049546721945666, "grad_norm": 0.49963282795881186, "learning_rate": 4.435252771019194e-05, "loss": 0.6164, "step": 20720 }, { "epoch": 0.6051006554648837, "grad_norm": 0.5550540853202938, "learning_rate": 4.434982427683158e-05, "loss": 0.6146, "step": 20725 }, { "epoch": 0.6052466387352009, "grad_norm": 0.5008760437348664, "learning_rate": 4.434712084347121e-05, "loss": 0.5696, "step": 20730 }, { "epoch": 0.6053926220055181, "grad_norm": 0.48754413055070367, "learning_rate": 4.4344417410110844e-05, "loss": 0.5908, "step": 20735 }, { "epoch": 0.6055386052758354, "grad_norm": 0.5377099325549712, "learning_rate": 4.434171397675048e-05, "loss": 0.5796, "step": 20740 }, { "epoch": 0.6056845885461526, "grad_norm": 0.47970002673329576, "learning_rate": 4.4339010543390105e-05, "loss": 0.6003, "step": 20745 }, { "epoch": 0.6058305718164698, "grad_norm": 0.5303179703427112, "learning_rate": 4.433630711002974e-05, "loss": 0.6283, "step": 20750 }, { "epoch": 0.605976555086787, "grad_norm": 0.5440558832939429, "learning_rate": 4.433360367666937e-05, "loss": 0.6064, "step": 20755 }, { "epoch": 0.6061225383571043, "grad_norm": 0.5744483527644229, "learning_rate": 4.4330900243309e-05, "loss": 0.5793, "step": 20760 }, { "epoch": 0.6062685216274215, "grad_norm": 0.5075998138043175, "learning_rate": 4.4328196809948635e-05, "loss": 0.5868, "step": 20765 }, { "epoch": 0.6064145048977387, "grad_norm": 0.5138860697012193, "learning_rate": 4.432549337658827e-05, "loss": 0.6196, "step": 20770 }, { "epoch": 0.606560488168056, "grad_norm": 0.5654867280170687, "learning_rate": 4.43227899432279e-05, "loss": 0.605, "step": 20775 }, { "epoch": 0.6067064714383732, "grad_norm": 0.5494805256196598, "learning_rate": 4.4320086509867537e-05, "loss": 0.5859, "step": 20780 }, { "epoch": 0.6068524547086904, "grad_norm": 0.4482620171364903, "learning_rate": 4.431738307650717e-05, "loss": 0.5529, "step": 20785 }, { "epoch": 0.6069984379790077, "grad_norm": 0.5420859613960199, "learning_rate": 4.43146796431468e-05, "loss": 0.6215, "step": 20790 }, { "epoch": 0.6071444212493248, "grad_norm": 0.5095708215724094, "learning_rate": 4.431197620978643e-05, "loss": 0.6152, "step": 20795 }, { "epoch": 0.607290404519642, "grad_norm": 0.5661139739037725, "learning_rate": 4.4309272776426066e-05, "loss": 0.5754, "step": 20800 }, { "epoch": 0.6074363877899592, "grad_norm": 0.5277879341375875, "learning_rate": 4.430656934306569e-05, "loss": 0.5916, "step": 20805 }, { "epoch": 0.6075823710602765, "grad_norm": 0.5515367203244125, "learning_rate": 4.430386590970533e-05, "loss": 0.6154, "step": 20810 }, { "epoch": 0.6077283543305937, "grad_norm": 0.530164173969362, "learning_rate": 4.430116247634496e-05, "loss": 0.6312, "step": 20815 }, { "epoch": 0.6078743376009109, "grad_norm": 0.4905274602501183, "learning_rate": 4.429845904298459e-05, "loss": 0.5688, "step": 20820 }, { "epoch": 0.6080203208712281, "grad_norm": 0.5192029192312869, "learning_rate": 4.429575560962422e-05, "loss": 0.5827, "step": 20825 }, { "epoch": 0.6081663041415454, "grad_norm": 0.4928899782830867, "learning_rate": 4.4293052176263856e-05, "loss": 0.6127, "step": 20830 }, { "epoch": 0.6083122874118626, "grad_norm": 0.5025629869431767, "learning_rate": 4.4290348742903484e-05, "loss": 0.6254, "step": 20835 }, { "epoch": 0.6084582706821798, "grad_norm": 0.49865537446458796, "learning_rate": 4.4287645309543124e-05, "loss": 0.6415, "step": 20840 }, { "epoch": 0.6086042539524971, "grad_norm": 0.5691812162020967, "learning_rate": 4.428494187618276e-05, "loss": 0.619, "step": 20845 }, { "epoch": 0.6087502372228143, "grad_norm": 0.5006360261840475, "learning_rate": 4.4282238442822386e-05, "loss": 0.6329, "step": 20850 }, { "epoch": 0.6088962204931315, "grad_norm": 0.48383612990606556, "learning_rate": 4.427953500946202e-05, "loss": 0.5738, "step": 20855 }, { "epoch": 0.6090422037634488, "grad_norm": 0.46195313267111127, "learning_rate": 4.4276831576101654e-05, "loss": 0.5987, "step": 20860 }, { "epoch": 0.609188187033766, "grad_norm": 0.529998244068242, "learning_rate": 4.427412814274128e-05, "loss": 0.6063, "step": 20865 }, { "epoch": 0.6093341703040831, "grad_norm": 0.5162451346796754, "learning_rate": 4.4271424709380915e-05, "loss": 0.5805, "step": 20870 }, { "epoch": 0.6094801535744003, "grad_norm": 0.47820218620789806, "learning_rate": 4.426872127602055e-05, "loss": 0.5821, "step": 20875 }, { "epoch": 0.6096261368447176, "grad_norm": 0.48729168947060475, "learning_rate": 4.4266017842660176e-05, "loss": 0.5853, "step": 20880 }, { "epoch": 0.6097721201150348, "grad_norm": 0.49462801488171554, "learning_rate": 4.426331440929981e-05, "loss": 0.5894, "step": 20885 }, { "epoch": 0.609918103385352, "grad_norm": 0.5522745239670972, "learning_rate": 4.4260610975939444e-05, "loss": 0.6402, "step": 20890 }, { "epoch": 0.6100640866556692, "grad_norm": 0.5640565283475651, "learning_rate": 4.425790754257908e-05, "loss": 0.6089, "step": 20895 }, { "epoch": 0.6102100699259865, "grad_norm": 0.48466467440940125, "learning_rate": 4.425520410921871e-05, "loss": 0.5777, "step": 20900 }, { "epoch": 0.6103560531963037, "grad_norm": 0.5249766174452263, "learning_rate": 4.4252500675858346e-05, "loss": 0.6246, "step": 20905 }, { "epoch": 0.6105020364666209, "grad_norm": 0.5817823645098399, "learning_rate": 4.4249797242497973e-05, "loss": 0.6248, "step": 20910 }, { "epoch": 0.6106480197369382, "grad_norm": 0.5420074921414026, "learning_rate": 4.424709380913761e-05, "loss": 0.6192, "step": 20915 }, { "epoch": 0.6107940030072554, "grad_norm": 0.4972005609705799, "learning_rate": 4.424439037577724e-05, "loss": 0.6108, "step": 20920 }, { "epoch": 0.6109399862775726, "grad_norm": 0.5456948149781626, "learning_rate": 4.424168694241687e-05, "loss": 0.6437, "step": 20925 }, { "epoch": 0.6110859695478899, "grad_norm": 0.5311538425550065, "learning_rate": 4.42389835090565e-05, "loss": 0.5947, "step": 20930 }, { "epoch": 0.6112319528182071, "grad_norm": 0.5010594171338583, "learning_rate": 4.423628007569614e-05, "loss": 0.5723, "step": 20935 }, { "epoch": 0.6113779360885243, "grad_norm": 0.5133312815770233, "learning_rate": 4.4233576642335764e-05, "loss": 0.5895, "step": 20940 }, { "epoch": 0.6115239193588414, "grad_norm": 0.4854802561329222, "learning_rate": 4.42308732089754e-05, "loss": 0.5467, "step": 20945 }, { "epoch": 0.6116699026291587, "grad_norm": 0.5576868392777251, "learning_rate": 4.422816977561504e-05, "loss": 0.6009, "step": 20950 }, { "epoch": 0.6118158858994759, "grad_norm": 0.5538278782573239, "learning_rate": 4.4225466342254666e-05, "loss": 0.5984, "step": 20955 }, { "epoch": 0.6119618691697931, "grad_norm": 0.5214315251088358, "learning_rate": 4.42227629088943e-05, "loss": 0.592, "step": 20960 }, { "epoch": 0.6121078524401103, "grad_norm": 0.4960522045693009, "learning_rate": 4.4220059475533934e-05, "loss": 0.5782, "step": 20965 }, { "epoch": 0.6122538357104276, "grad_norm": 0.4917182421293515, "learning_rate": 4.421735604217356e-05, "loss": 0.6027, "step": 20970 }, { "epoch": 0.6123998189807448, "grad_norm": 0.5230371959991441, "learning_rate": 4.4214652608813195e-05, "loss": 0.623, "step": 20975 }, { "epoch": 0.612545802251062, "grad_norm": 0.5010042850075599, "learning_rate": 4.421194917545283e-05, "loss": 0.6178, "step": 20980 }, { "epoch": 0.6126917855213793, "grad_norm": 0.5430953417628727, "learning_rate": 4.4209245742092456e-05, "loss": 0.6267, "step": 20985 }, { "epoch": 0.6128377687916965, "grad_norm": 0.49942775689164953, "learning_rate": 4.420654230873209e-05, "loss": 0.606, "step": 20990 }, { "epoch": 0.6129837520620137, "grad_norm": 0.5022437535582666, "learning_rate": 4.4203838875371725e-05, "loss": 0.5976, "step": 20995 }, { "epoch": 0.613129735332331, "grad_norm": 0.532162097436244, "learning_rate": 4.420113544201135e-05, "loss": 0.5948, "step": 21000 }, { "epoch": 0.6132757186026482, "grad_norm": 0.4768521452375743, "learning_rate": 4.4198432008650986e-05, "loss": 0.5904, "step": 21005 }, { "epoch": 0.6134217018729654, "grad_norm": 0.5041044376771066, "learning_rate": 4.4195728575290627e-05, "loss": 0.5855, "step": 21010 }, { "epoch": 0.6135676851432825, "grad_norm": 0.5098164535026258, "learning_rate": 4.4193025141930254e-05, "loss": 0.5995, "step": 21015 }, { "epoch": 0.6137136684135998, "grad_norm": 0.5713526353868127, "learning_rate": 4.419032170856989e-05, "loss": 0.5935, "step": 21020 }, { "epoch": 0.613859651683917, "grad_norm": 0.5143169582956499, "learning_rate": 4.418761827520952e-05, "loss": 0.6138, "step": 21025 }, { "epoch": 0.6140056349542342, "grad_norm": 0.5099916758586557, "learning_rate": 4.418491484184915e-05, "loss": 0.6144, "step": 21030 }, { "epoch": 0.6141516182245514, "grad_norm": 0.5938312913727845, "learning_rate": 4.418221140848878e-05, "loss": 0.6199, "step": 21035 }, { "epoch": 0.6142976014948687, "grad_norm": 0.5039208762533423, "learning_rate": 4.417950797512842e-05, "loss": 0.6026, "step": 21040 }, { "epoch": 0.6144435847651859, "grad_norm": 0.5044740757760828, "learning_rate": 4.4176804541768044e-05, "loss": 0.5905, "step": 21045 }, { "epoch": 0.6145895680355031, "grad_norm": 0.5568241447992995, "learning_rate": 4.417410110840768e-05, "loss": 0.5882, "step": 21050 }, { "epoch": 0.6147355513058204, "grad_norm": 0.4967137094417688, "learning_rate": 4.417139767504731e-05, "loss": 0.6012, "step": 21055 }, { "epoch": 0.6148815345761376, "grad_norm": 0.5650671800633594, "learning_rate": 4.416869424168694e-05, "loss": 0.603, "step": 21060 }, { "epoch": 0.6150275178464548, "grad_norm": 0.5101640278066437, "learning_rate": 4.416599080832658e-05, "loss": 0.6118, "step": 21065 }, { "epoch": 0.615173501116772, "grad_norm": 0.5360608749410234, "learning_rate": 4.4163287374966214e-05, "loss": 0.5582, "step": 21070 }, { "epoch": 0.6153194843870893, "grad_norm": 0.4999837154179035, "learning_rate": 4.416058394160584e-05, "loss": 0.6175, "step": 21075 }, { "epoch": 0.6154654676574065, "grad_norm": 0.5138177304732291, "learning_rate": 4.4157880508245476e-05, "loss": 0.5875, "step": 21080 }, { "epoch": 0.6156114509277237, "grad_norm": 0.4887784568480807, "learning_rate": 4.415517707488511e-05, "loss": 0.645, "step": 21085 }, { "epoch": 0.6157574341980409, "grad_norm": 0.4907766140358094, "learning_rate": 4.415247364152474e-05, "loss": 0.6083, "step": 21090 }, { "epoch": 0.6159034174683581, "grad_norm": 0.5595208122321178, "learning_rate": 4.414977020816437e-05, "loss": 0.5996, "step": 21095 }, { "epoch": 0.6160494007386753, "grad_norm": 0.4871128280119613, "learning_rate": 4.4147066774804005e-05, "loss": 0.59, "step": 21100 }, { "epoch": 0.6161953840089925, "grad_norm": 0.5363553909938078, "learning_rate": 4.414436334144363e-05, "loss": 0.6229, "step": 21105 }, { "epoch": 0.6163413672793098, "grad_norm": 0.5164375576768699, "learning_rate": 4.4141659908083266e-05, "loss": 0.6073, "step": 21110 }, { "epoch": 0.616487350549627, "grad_norm": 0.523233112049689, "learning_rate": 4.41389564747229e-05, "loss": 0.6049, "step": 21115 }, { "epoch": 0.6166333338199442, "grad_norm": 0.5167057655303416, "learning_rate": 4.4136253041362534e-05, "loss": 0.601, "step": 21120 }, { "epoch": 0.6167793170902615, "grad_norm": 0.49693106603957443, "learning_rate": 4.413354960800217e-05, "loss": 0.5679, "step": 21125 }, { "epoch": 0.6169253003605787, "grad_norm": 0.4767488102964855, "learning_rate": 4.41308461746418e-05, "loss": 0.5874, "step": 21130 }, { "epoch": 0.6170712836308959, "grad_norm": 0.5197591074340119, "learning_rate": 4.412814274128143e-05, "loss": 0.5953, "step": 21135 }, { "epoch": 0.6172172669012131, "grad_norm": 0.514304892697331, "learning_rate": 4.4125439307921063e-05, "loss": 0.5984, "step": 21140 }, { "epoch": 0.6173632501715304, "grad_norm": 0.4694461317714241, "learning_rate": 4.41227358745607e-05, "loss": 0.5927, "step": 21145 }, { "epoch": 0.6175092334418476, "grad_norm": 0.5238676983744883, "learning_rate": 4.4120032441200325e-05, "loss": 0.6126, "step": 21150 }, { "epoch": 0.6176552167121648, "grad_norm": 0.5130892734252291, "learning_rate": 4.411732900783996e-05, "loss": 0.6059, "step": 21155 }, { "epoch": 0.617801199982482, "grad_norm": 0.5032789822008664, "learning_rate": 4.411462557447959e-05, "loss": 0.5773, "step": 21160 }, { "epoch": 0.6179471832527992, "grad_norm": 0.47745695295721935, "learning_rate": 4.411192214111922e-05, "loss": 0.5655, "step": 21165 }, { "epoch": 0.6180931665231164, "grad_norm": 0.4988558205202525, "learning_rate": 4.4109218707758854e-05, "loss": 0.6145, "step": 21170 }, { "epoch": 0.6182391497934336, "grad_norm": 0.8200207530055998, "learning_rate": 4.410651527439849e-05, "loss": 0.5832, "step": 21175 }, { "epoch": 0.6183851330637509, "grad_norm": 0.5516218263645802, "learning_rate": 4.410381184103812e-05, "loss": 0.5806, "step": 21180 }, { "epoch": 0.6185311163340681, "grad_norm": 0.48162537694982394, "learning_rate": 4.4101108407677756e-05, "loss": 0.574, "step": 21185 }, { "epoch": 0.6186770996043853, "grad_norm": 0.5002104580927912, "learning_rate": 4.409840497431739e-05, "loss": 0.6049, "step": 21190 }, { "epoch": 0.6188230828747026, "grad_norm": 0.5302332781389664, "learning_rate": 4.409570154095702e-05, "loss": 0.5975, "step": 21195 }, { "epoch": 0.6189690661450198, "grad_norm": 0.534750973393743, "learning_rate": 4.409299810759665e-05, "loss": 0.61, "step": 21200 }, { "epoch": 0.619115049415337, "grad_norm": 0.46425025005792964, "learning_rate": 4.4090294674236285e-05, "loss": 0.6009, "step": 21205 }, { "epoch": 0.6192610326856542, "grad_norm": 0.4803585366481212, "learning_rate": 4.408759124087591e-05, "loss": 0.5826, "step": 21210 }, { "epoch": 0.6194070159559715, "grad_norm": 0.5010556643630893, "learning_rate": 4.4084887807515546e-05, "loss": 0.5657, "step": 21215 }, { "epoch": 0.6195529992262887, "grad_norm": 0.5090228871832398, "learning_rate": 4.408218437415518e-05, "loss": 0.5974, "step": 21220 }, { "epoch": 0.6196989824966059, "grad_norm": 0.49463659975283847, "learning_rate": 4.407948094079481e-05, "loss": 0.5927, "step": 21225 }, { "epoch": 0.6198449657669232, "grad_norm": 0.4810922383842335, "learning_rate": 4.407677750743444e-05, "loss": 0.6, "step": 21230 }, { "epoch": 0.6199909490372403, "grad_norm": 0.5252574882142202, "learning_rate": 4.4074074074074076e-05, "loss": 0.5811, "step": 21235 }, { "epoch": 0.6201369323075575, "grad_norm": 0.525744473455599, "learning_rate": 4.407137064071371e-05, "loss": 0.5882, "step": 21240 }, { "epoch": 0.6202829155778747, "grad_norm": 0.564223028439012, "learning_rate": 4.4068667207353344e-05, "loss": 0.6011, "step": 21245 }, { "epoch": 0.620428898848192, "grad_norm": 0.5238641179613267, "learning_rate": 4.406596377399297e-05, "loss": 0.5944, "step": 21250 }, { "epoch": 0.6205748821185092, "grad_norm": 0.543686533669037, "learning_rate": 4.4063260340632605e-05, "loss": 0.5725, "step": 21255 }, { "epoch": 0.6207208653888264, "grad_norm": 0.5649517014043418, "learning_rate": 4.406055690727224e-05, "loss": 0.6024, "step": 21260 }, { "epoch": 0.6208668486591437, "grad_norm": 0.5055659426848336, "learning_rate": 4.405785347391187e-05, "loss": 0.6073, "step": 21265 }, { "epoch": 0.6210128319294609, "grad_norm": 0.5436256036167315, "learning_rate": 4.40551500405515e-05, "loss": 0.6334, "step": 21270 }, { "epoch": 0.6211588151997781, "grad_norm": 0.5242524692102498, "learning_rate": 4.4052446607191134e-05, "loss": 0.5778, "step": 21275 }, { "epoch": 0.6213047984700953, "grad_norm": 0.5872555472841748, "learning_rate": 4.404974317383077e-05, "loss": 0.6252, "step": 21280 }, { "epoch": 0.6214507817404126, "grad_norm": 0.4971657951307337, "learning_rate": 4.4047039740470396e-05, "loss": 0.5796, "step": 21285 }, { "epoch": 0.6215967650107298, "grad_norm": 0.4839563192352177, "learning_rate": 4.4044336307110036e-05, "loss": 0.5566, "step": 21290 }, { "epoch": 0.621742748281047, "grad_norm": 0.5333155237124771, "learning_rate": 4.4041632873749664e-05, "loss": 0.5929, "step": 21295 }, { "epoch": 0.6218887315513643, "grad_norm": 0.47886690340209653, "learning_rate": 4.40389294403893e-05, "loss": 0.5965, "step": 21300 }, { "epoch": 0.6220347148216815, "grad_norm": 0.5069899705104559, "learning_rate": 4.403622600702893e-05, "loss": 0.5653, "step": 21305 }, { "epoch": 0.6221806980919986, "grad_norm": 0.5818089431952413, "learning_rate": 4.403352257366856e-05, "loss": 0.6075, "step": 21310 }, { "epoch": 0.6223266813623158, "grad_norm": 0.5162811879506826, "learning_rate": 4.403081914030819e-05, "loss": 0.6089, "step": 21315 }, { "epoch": 0.6224726646326331, "grad_norm": 0.5075209213360083, "learning_rate": 4.402811570694783e-05, "loss": 0.6024, "step": 21320 }, { "epoch": 0.6226186479029503, "grad_norm": 0.541641891308563, "learning_rate": 4.402541227358746e-05, "loss": 0.5763, "step": 21325 }, { "epoch": 0.6227646311732675, "grad_norm": 0.5320396167549213, "learning_rate": 4.402270884022709e-05, "loss": 0.6063, "step": 21330 }, { "epoch": 0.6229106144435848, "grad_norm": 0.5451618867756806, "learning_rate": 4.402000540686672e-05, "loss": 0.6161, "step": 21335 }, { "epoch": 0.623056597713902, "grad_norm": 0.5057019471792489, "learning_rate": 4.4017301973506356e-05, "loss": 0.5816, "step": 21340 }, { "epoch": 0.6232025809842192, "grad_norm": 0.4879198966216065, "learning_rate": 4.401459854014598e-05, "loss": 0.6123, "step": 21345 }, { "epoch": 0.6233485642545364, "grad_norm": 0.5588819020372462, "learning_rate": 4.4011895106785624e-05, "loss": 0.578, "step": 21350 }, { "epoch": 0.6234945475248537, "grad_norm": 0.4724703470171056, "learning_rate": 4.400919167342525e-05, "loss": 0.571, "step": 21355 }, { "epoch": 0.6236405307951709, "grad_norm": 0.5049071974051081, "learning_rate": 4.4006488240064885e-05, "loss": 0.5967, "step": 21360 }, { "epoch": 0.6237865140654881, "grad_norm": 0.5160116255633587, "learning_rate": 4.400378480670452e-05, "loss": 0.5939, "step": 21365 }, { "epoch": 0.6239324973358054, "grad_norm": 0.5244353887542182, "learning_rate": 4.4001081373344147e-05, "loss": 0.6207, "step": 21370 }, { "epoch": 0.6240784806061226, "grad_norm": 0.5466991621524058, "learning_rate": 4.399837793998378e-05, "loss": 0.5654, "step": 21375 }, { "epoch": 0.6242244638764397, "grad_norm": 0.49212875277479173, "learning_rate": 4.3995674506623415e-05, "loss": 0.6159, "step": 21380 }, { "epoch": 0.6243704471467569, "grad_norm": 0.5333492318489355, "learning_rate": 4.399297107326304e-05, "loss": 0.5957, "step": 21385 }, { "epoch": 0.6245164304170742, "grad_norm": 0.49538087163504163, "learning_rate": 4.3990267639902676e-05, "loss": 0.6317, "step": 21390 }, { "epoch": 0.6246624136873914, "grad_norm": 0.5080433168784253, "learning_rate": 4.398756420654231e-05, "loss": 0.6384, "step": 21395 }, { "epoch": 0.6248083969577086, "grad_norm": 0.5080808539801239, "learning_rate": 4.3984860773181944e-05, "loss": 0.5742, "step": 21400 }, { "epoch": 0.6249543802280259, "grad_norm": 0.4933792789422375, "learning_rate": 4.398215733982158e-05, "loss": 0.575, "step": 21405 }, { "epoch": 0.6251003634983431, "grad_norm": 0.46374814612167203, "learning_rate": 4.397945390646121e-05, "loss": 0.5604, "step": 21410 }, { "epoch": 0.6252463467686603, "grad_norm": 0.5140624783154042, "learning_rate": 4.397675047310084e-05, "loss": 0.5655, "step": 21415 }, { "epoch": 0.6253923300389775, "grad_norm": 0.4903458561444517, "learning_rate": 4.397404703974047e-05, "loss": 0.621, "step": 21420 }, { "epoch": 0.6255383133092948, "grad_norm": 0.5090799531066662, "learning_rate": 4.397134360638011e-05, "loss": 0.5886, "step": 21425 }, { "epoch": 0.625684296579612, "grad_norm": 0.4987433735754847, "learning_rate": 4.3968640173019734e-05, "loss": 0.5667, "step": 21430 }, { "epoch": 0.6258302798499292, "grad_norm": 0.5071354389167887, "learning_rate": 4.396593673965937e-05, "loss": 0.5829, "step": 21435 }, { "epoch": 0.6259762631202465, "grad_norm": 0.5423142084350709, "learning_rate": 4.3963233306299e-05, "loss": 0.6076, "step": 21440 }, { "epoch": 0.6261222463905637, "grad_norm": 0.5194197372255185, "learning_rate": 4.396052987293863e-05, "loss": 0.6199, "step": 21445 }, { "epoch": 0.6262682296608809, "grad_norm": 0.4746611343454845, "learning_rate": 4.3957826439578264e-05, "loss": 0.6074, "step": 21450 }, { "epoch": 0.626414212931198, "grad_norm": 0.49229288954037437, "learning_rate": 4.39551230062179e-05, "loss": 0.5814, "step": 21455 }, { "epoch": 0.6265601962015153, "grad_norm": 0.4798646004287722, "learning_rate": 4.395241957285753e-05, "loss": 0.5764, "step": 21460 }, { "epoch": 0.6267061794718325, "grad_norm": 0.49029341647590663, "learning_rate": 4.3949716139497166e-05, "loss": 0.575, "step": 21465 }, { "epoch": 0.6268521627421497, "grad_norm": 0.5043858722721078, "learning_rate": 4.39470127061368e-05, "loss": 0.6255, "step": 21470 }, { "epoch": 0.626998146012467, "grad_norm": 0.5162870187222338, "learning_rate": 4.394430927277643e-05, "loss": 0.6339, "step": 21475 }, { "epoch": 0.6271441292827842, "grad_norm": 0.5253947048321964, "learning_rate": 4.394160583941606e-05, "loss": 0.5969, "step": 21480 }, { "epoch": 0.6272901125531014, "grad_norm": 0.5389385130073833, "learning_rate": 4.3938902406055695e-05, "loss": 0.6301, "step": 21485 }, { "epoch": 0.6274360958234186, "grad_norm": 0.5024873214216337, "learning_rate": 4.393619897269532e-05, "loss": 0.5776, "step": 21490 }, { "epoch": 0.6275820790937359, "grad_norm": 0.5114522818930629, "learning_rate": 4.3933495539334956e-05, "loss": 0.6251, "step": 21495 }, { "epoch": 0.6277280623640531, "grad_norm": 0.49686442586487606, "learning_rate": 4.393079210597459e-05, "loss": 0.557, "step": 21500 }, { "epoch": 0.6278740456343703, "grad_norm": 0.46364107069090715, "learning_rate": 4.392808867261422e-05, "loss": 0.5953, "step": 21505 }, { "epoch": 0.6280200289046876, "grad_norm": 0.4732362381349413, "learning_rate": 4.392538523925385e-05, "loss": 0.5796, "step": 21510 }, { "epoch": 0.6281660121750048, "grad_norm": 0.5127593222129654, "learning_rate": 4.3922681805893486e-05, "loss": 0.6081, "step": 21515 }, { "epoch": 0.628311995445322, "grad_norm": 0.4934708408496504, "learning_rate": 4.391997837253312e-05, "loss": 0.5919, "step": 21520 }, { "epoch": 0.6284579787156392, "grad_norm": 0.5090539845173194, "learning_rate": 4.3917274939172754e-05, "loss": 0.6008, "step": 21525 }, { "epoch": 0.6286039619859564, "grad_norm": 0.5147930015214242, "learning_rate": 4.391457150581239e-05, "loss": 0.6051, "step": 21530 }, { "epoch": 0.6287499452562736, "grad_norm": 0.6441234174520556, "learning_rate": 4.3911868072452015e-05, "loss": 0.6203, "step": 21535 }, { "epoch": 0.6288959285265908, "grad_norm": 0.48458229596322905, "learning_rate": 4.390916463909165e-05, "loss": 0.5855, "step": 21540 }, { "epoch": 0.629041911796908, "grad_norm": 0.47868292663542905, "learning_rate": 4.390646120573128e-05, "loss": 0.576, "step": 21545 }, { "epoch": 0.6291878950672253, "grad_norm": 0.49934172780838176, "learning_rate": 4.390375777237091e-05, "loss": 0.6037, "step": 21550 }, { "epoch": 0.6293338783375425, "grad_norm": 0.521363308354067, "learning_rate": 4.3901054339010544e-05, "loss": 0.62, "step": 21555 }, { "epoch": 0.6294798616078597, "grad_norm": 0.5554578892506042, "learning_rate": 4.389835090565018e-05, "loss": 0.6315, "step": 21560 }, { "epoch": 0.629625844878177, "grad_norm": 0.5182143519281445, "learning_rate": 4.3895647472289805e-05, "loss": 0.5978, "step": 21565 }, { "epoch": 0.6297718281484942, "grad_norm": 0.517875122399466, "learning_rate": 4.389294403892944e-05, "loss": 0.5718, "step": 21570 }, { "epoch": 0.6299178114188114, "grad_norm": 0.5731017798149856, "learning_rate": 4.389024060556908e-05, "loss": 0.6219, "step": 21575 }, { "epoch": 0.6300637946891287, "grad_norm": 0.4911146143164229, "learning_rate": 4.388753717220871e-05, "loss": 0.5947, "step": 21580 }, { "epoch": 0.6302097779594459, "grad_norm": 0.5918123063294309, "learning_rate": 4.388483373884834e-05, "loss": 0.6133, "step": 21585 }, { "epoch": 0.6303557612297631, "grad_norm": 0.5574386801074545, "learning_rate": 4.3882130305487975e-05, "loss": 0.5859, "step": 21590 }, { "epoch": 0.6305017445000803, "grad_norm": 0.4685148372619283, "learning_rate": 4.38794268721276e-05, "loss": 0.5704, "step": 21595 }, { "epoch": 0.6306477277703975, "grad_norm": 0.5126279288530653, "learning_rate": 4.3876723438767237e-05, "loss": 0.5986, "step": 21600 }, { "epoch": 0.6307937110407147, "grad_norm": 0.475113618666446, "learning_rate": 4.387402000540687e-05, "loss": 0.6027, "step": 21605 }, { "epoch": 0.6309396943110319, "grad_norm": 0.5092442439299681, "learning_rate": 4.38713165720465e-05, "loss": 0.6205, "step": 21610 }, { "epoch": 0.6310856775813491, "grad_norm": 0.5210534868594731, "learning_rate": 4.386861313868613e-05, "loss": 0.6047, "step": 21615 }, { "epoch": 0.6312316608516664, "grad_norm": 0.5217579834701513, "learning_rate": 4.3865909705325766e-05, "loss": 0.6096, "step": 21620 }, { "epoch": 0.6313776441219836, "grad_norm": 0.4641744737354479, "learning_rate": 4.386320627196539e-05, "loss": 0.5995, "step": 21625 }, { "epoch": 0.6315236273923008, "grad_norm": 0.5026050919519836, "learning_rate": 4.3860502838605034e-05, "loss": 0.5978, "step": 21630 }, { "epoch": 0.6316696106626181, "grad_norm": 0.5608392683967498, "learning_rate": 4.385779940524467e-05, "loss": 0.6184, "step": 21635 }, { "epoch": 0.6318155939329353, "grad_norm": 0.5662956929138933, "learning_rate": 4.3855095971884295e-05, "loss": 0.6134, "step": 21640 }, { "epoch": 0.6319615772032525, "grad_norm": 0.5272253309868543, "learning_rate": 4.385239253852393e-05, "loss": 0.6141, "step": 21645 }, { "epoch": 0.6321075604735698, "grad_norm": 0.47915197071478965, "learning_rate": 4.384968910516356e-05, "loss": 0.6078, "step": 21650 }, { "epoch": 0.632253543743887, "grad_norm": 0.4957634341519332, "learning_rate": 4.384698567180319e-05, "loss": 0.5873, "step": 21655 }, { "epoch": 0.6323995270142042, "grad_norm": 0.5170124306931336, "learning_rate": 4.3844282238442824e-05, "loss": 0.5848, "step": 21660 }, { "epoch": 0.6325455102845214, "grad_norm": 0.5459361638882158, "learning_rate": 4.384157880508246e-05, "loss": 0.6241, "step": 21665 }, { "epoch": 0.6326914935548387, "grad_norm": 0.5524418006329656, "learning_rate": 4.3838875371722086e-05, "loss": 0.598, "step": 21670 }, { "epoch": 0.6328374768251558, "grad_norm": 0.5094106889133538, "learning_rate": 4.383617193836172e-05, "loss": 0.5636, "step": 21675 }, { "epoch": 0.632983460095473, "grad_norm": 0.5568076459910555, "learning_rate": 4.3833468505001354e-05, "loss": 0.5825, "step": 21680 }, { "epoch": 0.6331294433657902, "grad_norm": 0.5087748174504653, "learning_rate": 4.383076507164098e-05, "loss": 0.6104, "step": 21685 }, { "epoch": 0.6332754266361075, "grad_norm": 0.49802618487727407, "learning_rate": 4.382806163828062e-05, "loss": 0.5529, "step": 21690 }, { "epoch": 0.6334214099064247, "grad_norm": 0.49390455355164437, "learning_rate": 4.3825358204920256e-05, "loss": 0.6011, "step": 21695 }, { "epoch": 0.6335673931767419, "grad_norm": 0.5432212148000506, "learning_rate": 4.382265477155988e-05, "loss": 0.5915, "step": 21700 }, { "epoch": 0.6337133764470592, "grad_norm": 0.4555401443720345, "learning_rate": 4.381995133819952e-05, "loss": 0.6023, "step": 21705 }, { "epoch": 0.6338593597173764, "grad_norm": 0.5084944684959715, "learning_rate": 4.381724790483915e-05, "loss": 0.59, "step": 21710 }, { "epoch": 0.6340053429876936, "grad_norm": 0.4737221950219613, "learning_rate": 4.381454447147878e-05, "loss": 0.5604, "step": 21715 }, { "epoch": 0.6341513262580109, "grad_norm": 0.4917515598905969, "learning_rate": 4.381184103811841e-05, "loss": 0.5644, "step": 21720 }, { "epoch": 0.6342973095283281, "grad_norm": 0.5349939111413642, "learning_rate": 4.3809137604758046e-05, "loss": 0.613, "step": 21725 }, { "epoch": 0.6344432927986453, "grad_norm": 0.5954328465569032, "learning_rate": 4.3806434171397673e-05, "loss": 0.6518, "step": 21730 }, { "epoch": 0.6345892760689625, "grad_norm": 0.5278700589972037, "learning_rate": 4.380373073803731e-05, "loss": 0.6008, "step": 21735 }, { "epoch": 0.6347352593392798, "grad_norm": 0.506232654087078, "learning_rate": 4.380102730467694e-05, "loss": 0.5572, "step": 21740 }, { "epoch": 0.6348812426095969, "grad_norm": 0.4887573902506107, "learning_rate": 4.3798323871316575e-05, "loss": 0.5771, "step": 21745 }, { "epoch": 0.6350272258799141, "grad_norm": 0.6232801371447464, "learning_rate": 4.379562043795621e-05, "loss": 0.6423, "step": 21750 }, { "epoch": 0.6351732091502313, "grad_norm": 0.48027296954216475, "learning_rate": 4.3792917004595844e-05, "loss": 0.5811, "step": 21755 }, { "epoch": 0.6353191924205486, "grad_norm": 0.7194573166918629, "learning_rate": 4.379021357123547e-05, "loss": 0.5776, "step": 21760 }, { "epoch": 0.6354651756908658, "grad_norm": 0.5129255401566802, "learning_rate": 4.3787510137875105e-05, "loss": 0.5688, "step": 21765 }, { "epoch": 0.635611158961183, "grad_norm": 0.47510609518556735, "learning_rate": 4.378480670451474e-05, "loss": 0.6007, "step": 21770 }, { "epoch": 0.6357571422315003, "grad_norm": 0.4934475383147565, "learning_rate": 4.3782103271154366e-05, "loss": 0.5771, "step": 21775 }, { "epoch": 0.6359031255018175, "grad_norm": 0.49263782619487967, "learning_rate": 4.3779399837794e-05, "loss": 0.5907, "step": 21780 }, { "epoch": 0.6360491087721347, "grad_norm": 0.5318199347241628, "learning_rate": 4.3776696404433634e-05, "loss": 0.5977, "step": 21785 }, { "epoch": 0.636195092042452, "grad_norm": 0.4659070365082045, "learning_rate": 4.377399297107326e-05, "loss": 0.5763, "step": 21790 }, { "epoch": 0.6363410753127692, "grad_norm": 0.5319077356409924, "learning_rate": 4.3771289537712895e-05, "loss": 0.6004, "step": 21795 }, { "epoch": 0.6364870585830864, "grad_norm": 0.46909493225539084, "learning_rate": 4.376858610435253e-05, "loss": 0.5949, "step": 21800 }, { "epoch": 0.6366330418534036, "grad_norm": 0.523254682664049, "learning_rate": 4.376588267099216e-05, "loss": 0.6256, "step": 21805 }, { "epoch": 0.6367790251237209, "grad_norm": 0.5005314272085429, "learning_rate": 4.37631792376318e-05, "loss": 0.622, "step": 21810 }, { "epoch": 0.6369250083940381, "grad_norm": 0.4984825442318205, "learning_rate": 4.376047580427143e-05, "loss": 0.5945, "step": 21815 }, { "epoch": 0.6370709916643552, "grad_norm": 0.4854655110953347, "learning_rate": 4.375777237091106e-05, "loss": 0.5649, "step": 21820 }, { "epoch": 0.6372169749346724, "grad_norm": 0.46653990523475125, "learning_rate": 4.375506893755069e-05, "loss": 0.588, "step": 21825 }, { "epoch": 0.6373629582049897, "grad_norm": 0.515979360811768, "learning_rate": 4.3752365504190327e-05, "loss": 0.5896, "step": 21830 }, { "epoch": 0.6375089414753069, "grad_norm": 0.5011088793277492, "learning_rate": 4.3749662070829954e-05, "loss": 0.5542, "step": 21835 }, { "epoch": 0.6376549247456241, "grad_norm": 0.5450807440946768, "learning_rate": 4.374695863746959e-05, "loss": 0.6347, "step": 21840 }, { "epoch": 0.6378009080159414, "grad_norm": 0.4991002494601637, "learning_rate": 4.374425520410922e-05, "loss": 0.5832, "step": 21845 }, { "epoch": 0.6379468912862586, "grad_norm": 0.5298555196694309, "learning_rate": 4.374155177074885e-05, "loss": 0.6015, "step": 21850 }, { "epoch": 0.6380928745565758, "grad_norm": 0.4681114827743195, "learning_rate": 4.373884833738848e-05, "loss": 0.5567, "step": 21855 }, { "epoch": 0.638238857826893, "grad_norm": 0.48665006403815364, "learning_rate": 4.373614490402812e-05, "loss": 0.5829, "step": 21860 }, { "epoch": 0.6383848410972103, "grad_norm": 0.5023777945011445, "learning_rate": 4.373344147066775e-05, "loss": 0.5913, "step": 21865 }, { "epoch": 0.6385308243675275, "grad_norm": 1.132316170868049, "learning_rate": 4.3730738037307385e-05, "loss": 0.5939, "step": 21870 }, { "epoch": 0.6386768076378447, "grad_norm": 0.5404268229649342, "learning_rate": 4.372803460394701e-05, "loss": 0.5822, "step": 21875 }, { "epoch": 0.638822790908162, "grad_norm": 0.5065953442063328, "learning_rate": 4.3725331170586646e-05, "loss": 0.6302, "step": 21880 }, { "epoch": 0.6389687741784792, "grad_norm": 0.5689406670784682, "learning_rate": 4.372262773722628e-05, "loss": 0.6172, "step": 21885 }, { "epoch": 0.6391147574487964, "grad_norm": 0.4640610474776451, "learning_rate": 4.3719924303865914e-05, "loss": 0.5995, "step": 21890 }, { "epoch": 0.6392607407191135, "grad_norm": 0.5575700304838176, "learning_rate": 4.371722087050554e-05, "loss": 0.5864, "step": 21895 }, { "epoch": 0.6394067239894308, "grad_norm": 0.5136035227616439, "learning_rate": 4.3714517437145176e-05, "loss": 0.5349, "step": 21900 }, { "epoch": 0.639552707259748, "grad_norm": 0.46563246985382767, "learning_rate": 4.371181400378481e-05, "loss": 0.5896, "step": 21905 }, { "epoch": 0.6396986905300652, "grad_norm": 0.5339181014130245, "learning_rate": 4.370911057042444e-05, "loss": 0.62, "step": 21910 }, { "epoch": 0.6398446738003825, "grad_norm": 0.5708069949898202, "learning_rate": 4.370640713706408e-05, "loss": 0.5838, "step": 21915 }, { "epoch": 0.6399906570706997, "grad_norm": 0.5034606261155822, "learning_rate": 4.3703703703703705e-05, "loss": 0.6187, "step": 21920 }, { "epoch": 0.6401366403410169, "grad_norm": 0.5827707220005974, "learning_rate": 4.370100027034334e-05, "loss": 0.5734, "step": 21925 }, { "epoch": 0.6402826236113341, "grad_norm": 0.4923798314022353, "learning_rate": 4.369829683698297e-05, "loss": 0.6305, "step": 21930 }, { "epoch": 0.6404286068816514, "grad_norm": 0.471700170648044, "learning_rate": 4.36955934036226e-05, "loss": 0.5808, "step": 21935 }, { "epoch": 0.6405745901519686, "grad_norm": 0.5394450619324556, "learning_rate": 4.3692889970262234e-05, "loss": 0.6179, "step": 21940 }, { "epoch": 0.6407205734222858, "grad_norm": 0.48130175776787315, "learning_rate": 4.369018653690187e-05, "loss": 0.597, "step": 21945 }, { "epoch": 0.6408665566926031, "grad_norm": 0.5540910935413217, "learning_rate": 4.36874831035415e-05, "loss": 0.5898, "step": 21950 }, { "epoch": 0.6410125399629203, "grad_norm": 0.516838043930565, "learning_rate": 4.368477967018113e-05, "loss": 0.618, "step": 21955 }, { "epoch": 0.6411585232332375, "grad_norm": 0.5010629876136309, "learning_rate": 4.3682076236820763e-05, "loss": 0.6301, "step": 21960 }, { "epoch": 0.6413045065035546, "grad_norm": 0.5332251852521052, "learning_rate": 4.36793728034604e-05, "loss": 0.5761, "step": 21965 }, { "epoch": 0.6414504897738719, "grad_norm": 0.486231478660478, "learning_rate": 4.367666937010003e-05, "loss": 0.6031, "step": 21970 }, { "epoch": 0.6415964730441891, "grad_norm": 0.45953430833905773, "learning_rate": 4.3673965936739665e-05, "loss": 0.5842, "step": 21975 }, { "epoch": 0.6417424563145063, "grad_norm": 0.5294557704784209, "learning_rate": 4.367126250337929e-05, "loss": 0.5858, "step": 21980 }, { "epoch": 0.6418884395848236, "grad_norm": 0.5149929102947086, "learning_rate": 4.366855907001893e-05, "loss": 0.6105, "step": 21985 }, { "epoch": 0.6420344228551408, "grad_norm": 0.5104008541555459, "learning_rate": 4.366585563665856e-05, "loss": 0.5968, "step": 21990 }, { "epoch": 0.642180406125458, "grad_norm": 0.5760101966434107, "learning_rate": 4.366315220329819e-05, "loss": 0.5983, "step": 21995 }, { "epoch": 0.6423263893957752, "grad_norm": 0.4737427603531942, "learning_rate": 4.366044876993782e-05, "loss": 0.5628, "step": 22000 }, { "epoch": 0.6424723726660925, "grad_norm": 0.5135618207655134, "learning_rate": 4.3657745336577456e-05, "loss": 0.5891, "step": 22005 }, { "epoch": 0.6426183559364097, "grad_norm": 0.5010186191495247, "learning_rate": 4.365504190321708e-05, "loss": 0.596, "step": 22010 }, { "epoch": 0.6427643392067269, "grad_norm": 0.45116463579317206, "learning_rate": 4.365233846985672e-05, "loss": 0.5746, "step": 22015 }, { "epoch": 0.6429103224770442, "grad_norm": 0.5012068462207038, "learning_rate": 4.364963503649635e-05, "loss": 0.6008, "step": 22020 }, { "epoch": 0.6430563057473614, "grad_norm": 0.5052857672058968, "learning_rate": 4.3646931603135985e-05, "loss": 0.5804, "step": 22025 }, { "epoch": 0.6432022890176786, "grad_norm": 0.5259035572579368, "learning_rate": 4.364422816977562e-05, "loss": 0.6151, "step": 22030 }, { "epoch": 0.6433482722879958, "grad_norm": 0.5293267940554692, "learning_rate": 4.364152473641525e-05, "loss": 0.6293, "step": 22035 }, { "epoch": 0.643494255558313, "grad_norm": 0.6535397378282995, "learning_rate": 4.363882130305488e-05, "loss": 0.6058, "step": 22040 }, { "epoch": 0.6436402388286302, "grad_norm": 0.48716440901772146, "learning_rate": 4.3636117869694515e-05, "loss": 0.5449, "step": 22045 }, { "epoch": 0.6437862220989474, "grad_norm": 0.5411210691448763, "learning_rate": 4.363341443633415e-05, "loss": 0.6193, "step": 22050 }, { "epoch": 0.6439322053692647, "grad_norm": 0.6100394569472088, "learning_rate": 4.3630711002973776e-05, "loss": 0.6358, "step": 22055 }, { "epoch": 0.6440781886395819, "grad_norm": 0.4599899474994026, "learning_rate": 4.362800756961341e-05, "loss": 0.5896, "step": 22060 }, { "epoch": 0.6442241719098991, "grad_norm": 0.5123073043200793, "learning_rate": 4.3625304136253044e-05, "loss": 0.5678, "step": 22065 }, { "epoch": 0.6443701551802163, "grad_norm": 0.5268977580851368, "learning_rate": 4.362260070289267e-05, "loss": 0.5916, "step": 22070 }, { "epoch": 0.6445161384505336, "grad_norm": 0.5235464590089322, "learning_rate": 4.3619897269532305e-05, "loss": 0.5885, "step": 22075 }, { "epoch": 0.6446621217208508, "grad_norm": 0.5145423247695387, "learning_rate": 4.361719383617194e-05, "loss": 0.5717, "step": 22080 }, { "epoch": 0.644808104991168, "grad_norm": 0.5554466895113358, "learning_rate": 4.361449040281157e-05, "loss": 0.5801, "step": 22085 }, { "epoch": 0.6449540882614853, "grad_norm": 0.5824394608862217, "learning_rate": 4.361178696945121e-05, "loss": 0.586, "step": 22090 }, { "epoch": 0.6451000715318025, "grad_norm": 0.5593635638165525, "learning_rate": 4.360908353609084e-05, "loss": 0.5828, "step": 22095 }, { "epoch": 0.6452460548021197, "grad_norm": 0.5119767424985117, "learning_rate": 4.360638010273047e-05, "loss": 0.5832, "step": 22100 }, { "epoch": 0.645392038072437, "grad_norm": 0.5094380562892491, "learning_rate": 4.36036766693701e-05, "loss": 0.5876, "step": 22105 }, { "epoch": 0.6455380213427542, "grad_norm": 0.5031442370381579, "learning_rate": 4.3600973236009736e-05, "loss": 0.5972, "step": 22110 }, { "epoch": 0.6456840046130713, "grad_norm": 0.48951938237632076, "learning_rate": 4.3598269802649364e-05, "loss": 0.6155, "step": 22115 }, { "epoch": 0.6458299878833885, "grad_norm": 0.5374838335844454, "learning_rate": 4.3595566369289e-05, "loss": 0.5769, "step": 22120 }, { "epoch": 0.6459759711537058, "grad_norm": 0.5074150997588974, "learning_rate": 4.359286293592863e-05, "loss": 0.5763, "step": 22125 }, { "epoch": 0.646121954424023, "grad_norm": 0.5125303742530414, "learning_rate": 4.359015950256826e-05, "loss": 0.6142, "step": 22130 }, { "epoch": 0.6462679376943402, "grad_norm": 0.4917779957632573, "learning_rate": 4.358745606920789e-05, "loss": 0.5936, "step": 22135 }, { "epoch": 0.6464139209646574, "grad_norm": 0.49102718845670934, "learning_rate": 4.3584752635847534e-05, "loss": 0.5826, "step": 22140 }, { "epoch": 0.6465599042349747, "grad_norm": 0.5117012189903137, "learning_rate": 4.358204920248716e-05, "loss": 0.5985, "step": 22145 }, { "epoch": 0.6467058875052919, "grad_norm": 0.5397098253373828, "learning_rate": 4.3579345769126795e-05, "loss": 0.6209, "step": 22150 }, { "epoch": 0.6468518707756091, "grad_norm": 0.5232456647157022, "learning_rate": 4.357664233576643e-05, "loss": 0.5878, "step": 22155 }, { "epoch": 0.6469978540459264, "grad_norm": 0.5006674205544104, "learning_rate": 4.3573938902406056e-05, "loss": 0.5724, "step": 22160 }, { "epoch": 0.6471438373162436, "grad_norm": 0.49777258908723343, "learning_rate": 4.357123546904569e-05, "loss": 0.6245, "step": 22165 }, { "epoch": 0.6472898205865608, "grad_norm": 0.5555258688999966, "learning_rate": 4.3568532035685324e-05, "loss": 0.6089, "step": 22170 }, { "epoch": 0.647435803856878, "grad_norm": 0.4873670468200062, "learning_rate": 4.356582860232495e-05, "loss": 0.574, "step": 22175 }, { "epoch": 0.6475817871271953, "grad_norm": 0.4882463848032438, "learning_rate": 4.3563125168964585e-05, "loss": 0.608, "step": 22180 }, { "epoch": 0.6477277703975124, "grad_norm": 0.508613345010145, "learning_rate": 4.356042173560422e-05, "loss": 0.5979, "step": 22185 }, { "epoch": 0.6478737536678296, "grad_norm": 0.5000037549259247, "learning_rate": 4.355771830224385e-05, "loss": 0.5727, "step": 22190 }, { "epoch": 0.6480197369381469, "grad_norm": 0.5373773344629542, "learning_rate": 4.355501486888348e-05, "loss": 0.6301, "step": 22195 }, { "epoch": 0.6481657202084641, "grad_norm": 0.4837009085850819, "learning_rate": 4.355231143552312e-05, "loss": 0.5933, "step": 22200 }, { "epoch": 0.6483117034787813, "grad_norm": 0.48541899770626323, "learning_rate": 4.354960800216275e-05, "loss": 0.5602, "step": 22205 }, { "epoch": 0.6484576867490985, "grad_norm": 0.4858639594189527, "learning_rate": 4.354690456880238e-05, "loss": 0.5658, "step": 22210 }, { "epoch": 0.6486036700194158, "grad_norm": 0.5470833230704419, "learning_rate": 4.354420113544202e-05, "loss": 0.6277, "step": 22215 }, { "epoch": 0.648749653289733, "grad_norm": 0.5043591716240171, "learning_rate": 4.3541497702081644e-05, "loss": 0.6073, "step": 22220 }, { "epoch": 0.6488956365600502, "grad_norm": 0.5169416243527857, "learning_rate": 4.353879426872128e-05, "loss": 0.5706, "step": 22225 }, { "epoch": 0.6490416198303675, "grad_norm": 0.5157254130178931, "learning_rate": 4.353609083536091e-05, "loss": 0.6144, "step": 22230 }, { "epoch": 0.6491876031006847, "grad_norm": 0.5118382316267784, "learning_rate": 4.353338740200054e-05, "loss": 0.5906, "step": 22235 }, { "epoch": 0.6493335863710019, "grad_norm": 0.5570321920638107, "learning_rate": 4.353068396864017e-05, "loss": 0.605, "step": 22240 }, { "epoch": 0.6494795696413191, "grad_norm": 0.5143851215442816, "learning_rate": 4.352798053527981e-05, "loss": 0.6183, "step": 22245 }, { "epoch": 0.6496255529116364, "grad_norm": 0.502533209324842, "learning_rate": 4.3525277101919434e-05, "loss": 0.5796, "step": 22250 }, { "epoch": 0.6497715361819536, "grad_norm": 0.5051841815655126, "learning_rate": 4.3522573668559075e-05, "loss": 0.587, "step": 22255 }, { "epoch": 0.6499175194522707, "grad_norm": 0.45290545081389494, "learning_rate": 4.351987023519871e-05, "loss": 0.5629, "step": 22260 }, { "epoch": 0.650063502722588, "grad_norm": 0.45669488879583503, "learning_rate": 4.3517166801838336e-05, "loss": 0.5604, "step": 22265 }, { "epoch": 0.6502094859929052, "grad_norm": 0.47983727730418024, "learning_rate": 4.351446336847797e-05, "loss": 0.5713, "step": 22270 }, { "epoch": 0.6503554692632224, "grad_norm": 0.4958548616249592, "learning_rate": 4.3511759935117605e-05, "loss": 0.5952, "step": 22275 }, { "epoch": 0.6505014525335396, "grad_norm": 0.5475469839794989, "learning_rate": 4.350905650175723e-05, "loss": 0.6114, "step": 22280 }, { "epoch": 0.6506474358038569, "grad_norm": 0.48741721089762396, "learning_rate": 4.3506353068396866e-05, "loss": 0.6168, "step": 22285 }, { "epoch": 0.6507934190741741, "grad_norm": 0.4875611794682844, "learning_rate": 4.35036496350365e-05, "loss": 0.5948, "step": 22290 }, { "epoch": 0.6509394023444913, "grad_norm": 0.47875441665843355, "learning_rate": 4.350094620167613e-05, "loss": 0.5717, "step": 22295 }, { "epoch": 0.6510853856148086, "grad_norm": 0.5120449674103997, "learning_rate": 4.349824276831576e-05, "loss": 0.5983, "step": 22300 }, { "epoch": 0.6512313688851258, "grad_norm": 0.5241716944592394, "learning_rate": 4.3495539334955395e-05, "loss": 0.6023, "step": 22305 }, { "epoch": 0.651377352155443, "grad_norm": 0.4880444677000666, "learning_rate": 4.349283590159503e-05, "loss": 0.6207, "step": 22310 }, { "epoch": 0.6515233354257602, "grad_norm": 0.5093035511943902, "learning_rate": 4.349013246823466e-05, "loss": 0.591, "step": 22315 }, { "epoch": 0.6516693186960775, "grad_norm": 0.5605623979463151, "learning_rate": 4.34874290348743e-05, "loss": 0.6212, "step": 22320 }, { "epoch": 0.6518153019663947, "grad_norm": 0.5446759899800708, "learning_rate": 4.3484725601513924e-05, "loss": 0.6082, "step": 22325 }, { "epoch": 0.6519612852367118, "grad_norm": 0.5202624008781559, "learning_rate": 4.348202216815356e-05, "loss": 0.634, "step": 22330 }, { "epoch": 0.652107268507029, "grad_norm": 0.5409021985303363, "learning_rate": 4.347931873479319e-05, "loss": 0.5839, "step": 22335 }, { "epoch": 0.6522532517773463, "grad_norm": 0.5543540549235492, "learning_rate": 4.347661530143282e-05, "loss": 0.6137, "step": 22340 }, { "epoch": 0.6523992350476635, "grad_norm": 0.47089796547981144, "learning_rate": 4.3473911868072454e-05, "loss": 0.5641, "step": 22345 }, { "epoch": 0.6525452183179807, "grad_norm": 0.47306076145319126, "learning_rate": 4.347120843471209e-05, "loss": 0.5929, "step": 22350 }, { "epoch": 0.652691201588298, "grad_norm": 0.4969515757078861, "learning_rate": 4.3468505001351715e-05, "loss": 0.5884, "step": 22355 }, { "epoch": 0.6528371848586152, "grad_norm": 0.4617577305569009, "learning_rate": 4.346580156799135e-05, "loss": 0.5772, "step": 22360 }, { "epoch": 0.6529831681289324, "grad_norm": 0.5451779023664122, "learning_rate": 4.346309813463098e-05, "loss": 0.616, "step": 22365 }, { "epoch": 0.6531291513992497, "grad_norm": 0.5225374088323332, "learning_rate": 4.346039470127062e-05, "loss": 0.5866, "step": 22370 }, { "epoch": 0.6532751346695669, "grad_norm": 0.537736073775073, "learning_rate": 4.345769126791025e-05, "loss": 0.6221, "step": 22375 }, { "epoch": 0.6534211179398841, "grad_norm": 0.5180313779583554, "learning_rate": 4.3454987834549885e-05, "loss": 0.6054, "step": 22380 }, { "epoch": 0.6535671012102013, "grad_norm": 0.5027179720551066, "learning_rate": 4.345228440118951e-05, "loss": 0.5991, "step": 22385 }, { "epoch": 0.6537130844805186, "grad_norm": 0.4997978154956223, "learning_rate": 4.3449580967829146e-05, "loss": 0.6037, "step": 22390 }, { "epoch": 0.6538590677508358, "grad_norm": 0.43492172477990515, "learning_rate": 4.344687753446878e-05, "loss": 0.576, "step": 22395 }, { "epoch": 0.654005051021153, "grad_norm": 0.5228236608075424, "learning_rate": 4.344417410110841e-05, "loss": 0.5814, "step": 22400 }, { "epoch": 0.6541510342914701, "grad_norm": 0.5247050811998804, "learning_rate": 4.344147066774804e-05, "loss": 0.6023, "step": 22405 }, { "epoch": 0.6542970175617874, "grad_norm": 0.5641509316822305, "learning_rate": 4.3438767234387675e-05, "loss": 0.5846, "step": 22410 }, { "epoch": 0.6544430008321046, "grad_norm": 0.4695551043675555, "learning_rate": 4.34360638010273e-05, "loss": 0.5805, "step": 22415 }, { "epoch": 0.6545889841024218, "grad_norm": 0.5402356798842614, "learning_rate": 4.343336036766694e-05, "loss": 0.5719, "step": 22420 }, { "epoch": 0.6547349673727391, "grad_norm": 0.4778905335586447, "learning_rate": 4.343065693430657e-05, "loss": 0.5661, "step": 22425 }, { "epoch": 0.6548809506430563, "grad_norm": 0.5036239999660168, "learning_rate": 4.3427953500946205e-05, "loss": 0.6009, "step": 22430 }, { "epoch": 0.6550269339133735, "grad_norm": 0.5246246077486364, "learning_rate": 4.342525006758584e-05, "loss": 0.6054, "step": 22435 }, { "epoch": 0.6551729171836908, "grad_norm": 0.5301829335747995, "learning_rate": 4.342254663422547e-05, "loss": 0.6128, "step": 22440 }, { "epoch": 0.655318900454008, "grad_norm": 0.5177574357133079, "learning_rate": 4.34198432008651e-05, "loss": 0.5817, "step": 22445 }, { "epoch": 0.6554648837243252, "grad_norm": 0.5015872255739631, "learning_rate": 4.3417139767504734e-05, "loss": 0.5625, "step": 22450 }, { "epoch": 0.6556108669946424, "grad_norm": 0.543745671490297, "learning_rate": 4.341443633414437e-05, "loss": 0.5919, "step": 22455 }, { "epoch": 0.6557568502649597, "grad_norm": 0.46218193122050577, "learning_rate": 4.3411732900783995e-05, "loss": 0.5734, "step": 22460 }, { "epoch": 0.6559028335352769, "grad_norm": 0.47444950355572, "learning_rate": 4.340902946742363e-05, "loss": 0.6078, "step": 22465 }, { "epoch": 0.6560488168055941, "grad_norm": 0.48959415875902595, "learning_rate": 4.340632603406326e-05, "loss": 0.5654, "step": 22470 }, { "epoch": 0.6561948000759114, "grad_norm": 0.445985181961194, "learning_rate": 4.340362260070289e-05, "loss": 0.5788, "step": 22475 }, { "epoch": 0.6563407833462285, "grad_norm": 0.45585282510403324, "learning_rate": 4.340091916734253e-05, "loss": 0.5697, "step": 22480 }, { "epoch": 0.6564867666165457, "grad_norm": 0.5077410773181978, "learning_rate": 4.339821573398216e-05, "loss": 0.5769, "step": 22485 }, { "epoch": 0.6566327498868629, "grad_norm": 0.46576252858606015, "learning_rate": 4.339551230062179e-05, "loss": 0.5812, "step": 22490 }, { "epoch": 0.6567787331571802, "grad_norm": 0.5079612023862357, "learning_rate": 4.3392808867261426e-05, "loss": 0.5842, "step": 22495 }, { "epoch": 0.6569247164274974, "grad_norm": 0.48198138676064434, "learning_rate": 4.339010543390106e-05, "loss": 0.56, "step": 22500 }, { "epoch": 0.6570706996978146, "grad_norm": 0.595431447310875, "learning_rate": 4.338740200054069e-05, "loss": 0.585, "step": 22505 }, { "epoch": 0.6572166829681318, "grad_norm": 0.4976601630679748, "learning_rate": 4.338469856718032e-05, "loss": 0.5706, "step": 22510 }, { "epoch": 0.6573626662384491, "grad_norm": 0.5249307705948296, "learning_rate": 4.3381995133819956e-05, "loss": 0.5668, "step": 22515 }, { "epoch": 0.6575086495087663, "grad_norm": 0.539553574183211, "learning_rate": 4.337929170045958e-05, "loss": 0.5686, "step": 22520 }, { "epoch": 0.6576546327790835, "grad_norm": 0.5413543565099158, "learning_rate": 4.337658826709922e-05, "loss": 0.6246, "step": 22525 }, { "epoch": 0.6578006160494008, "grad_norm": 0.49032058168884807, "learning_rate": 4.337388483373885e-05, "loss": 0.6015, "step": 22530 }, { "epoch": 0.657946599319718, "grad_norm": 0.5559787089932348, "learning_rate": 4.337118140037848e-05, "loss": 0.5946, "step": 22535 }, { "epoch": 0.6580925825900352, "grad_norm": 0.46691844425986245, "learning_rate": 4.336847796701812e-05, "loss": 0.5727, "step": 22540 }, { "epoch": 0.6582385658603525, "grad_norm": 0.5296272578925959, "learning_rate": 4.3365774533657746e-05, "loss": 0.5882, "step": 22545 }, { "epoch": 0.6583845491306696, "grad_norm": 0.49206997966360555, "learning_rate": 4.336307110029738e-05, "loss": 0.5821, "step": 22550 }, { "epoch": 0.6585305324009868, "grad_norm": 0.5164388104282279, "learning_rate": 4.3360367666937014e-05, "loss": 0.5645, "step": 22555 }, { "epoch": 0.658676515671304, "grad_norm": 0.46317650278747197, "learning_rate": 4.335766423357664e-05, "loss": 0.5743, "step": 22560 }, { "epoch": 0.6588224989416213, "grad_norm": 0.5287335346471912, "learning_rate": 4.3354960800216276e-05, "loss": 0.6111, "step": 22565 }, { "epoch": 0.6589684822119385, "grad_norm": 0.504574213430752, "learning_rate": 4.335225736685591e-05, "loss": 0.5608, "step": 22570 }, { "epoch": 0.6591144654822557, "grad_norm": 0.5029105226884455, "learning_rate": 4.3349553933495544e-05, "loss": 0.6194, "step": 22575 }, { "epoch": 0.659260448752573, "grad_norm": 0.5330142517943516, "learning_rate": 4.334685050013517e-05, "loss": 0.6036, "step": 22580 }, { "epoch": 0.6594064320228902, "grad_norm": 0.48205981502938133, "learning_rate": 4.3344147066774805e-05, "loss": 0.5265, "step": 22585 }, { "epoch": 0.6595524152932074, "grad_norm": 0.5397347703718524, "learning_rate": 4.334144363341444e-05, "loss": 0.6404, "step": 22590 }, { "epoch": 0.6596983985635246, "grad_norm": 0.48327251701407026, "learning_rate": 4.333874020005407e-05, "loss": 0.5699, "step": 22595 }, { "epoch": 0.6598443818338419, "grad_norm": 0.5155874269140619, "learning_rate": 4.333603676669371e-05, "loss": 0.6086, "step": 22600 }, { "epoch": 0.6599903651041591, "grad_norm": 0.48769532479707356, "learning_rate": 4.3333333333333334e-05, "loss": 0.6004, "step": 22605 }, { "epoch": 0.6601363483744763, "grad_norm": 0.522236275441733, "learning_rate": 4.333062989997297e-05, "loss": 0.6301, "step": 22610 }, { "epoch": 0.6602823316447936, "grad_norm": 0.5211785844695608, "learning_rate": 4.33279264666126e-05, "loss": 0.5897, "step": 22615 }, { "epoch": 0.6604283149151108, "grad_norm": 0.5541233034457543, "learning_rate": 4.332522303325223e-05, "loss": 0.5779, "step": 22620 }, { "epoch": 0.6605742981854279, "grad_norm": 0.547403598380857, "learning_rate": 4.332251959989186e-05, "loss": 0.5925, "step": 22625 }, { "epoch": 0.6607202814557451, "grad_norm": 0.493989590570126, "learning_rate": 4.33198161665315e-05, "loss": 0.6231, "step": 22630 }, { "epoch": 0.6608662647260624, "grad_norm": 0.4971138769513478, "learning_rate": 4.3317112733171125e-05, "loss": 0.6036, "step": 22635 }, { "epoch": 0.6610122479963796, "grad_norm": 0.5288431619795567, "learning_rate": 4.331440929981076e-05, "loss": 0.6023, "step": 22640 }, { "epoch": 0.6611582312666968, "grad_norm": 0.5092054393927272, "learning_rate": 4.331170586645039e-05, "loss": 0.5841, "step": 22645 }, { "epoch": 0.661304214537014, "grad_norm": 0.5116634447374865, "learning_rate": 4.3309002433090027e-05, "loss": 0.5836, "step": 22650 }, { "epoch": 0.6614501978073313, "grad_norm": 0.6156578861966584, "learning_rate": 4.330629899972966e-05, "loss": 0.5977, "step": 22655 }, { "epoch": 0.6615961810776485, "grad_norm": 0.4967375990954817, "learning_rate": 4.3303595566369295e-05, "loss": 0.5907, "step": 22660 }, { "epoch": 0.6617421643479657, "grad_norm": 0.493936498377812, "learning_rate": 4.330089213300892e-05, "loss": 0.6522, "step": 22665 }, { "epoch": 0.661888147618283, "grad_norm": 0.47593500862844057, "learning_rate": 4.3298188699648556e-05, "loss": 0.5921, "step": 22670 }, { "epoch": 0.6620341308886002, "grad_norm": 0.5050152555901787, "learning_rate": 4.329548526628819e-05, "loss": 0.556, "step": 22675 }, { "epoch": 0.6621801141589174, "grad_norm": 0.5106953915442861, "learning_rate": 4.329278183292782e-05, "loss": 0.5871, "step": 22680 }, { "epoch": 0.6623260974292347, "grad_norm": 0.5894019693748481, "learning_rate": 4.329007839956745e-05, "loss": 0.6164, "step": 22685 }, { "epoch": 0.6624720806995519, "grad_norm": 0.511998897083347, "learning_rate": 4.3287374966207085e-05, "loss": 0.6011, "step": 22690 }, { "epoch": 0.662618063969869, "grad_norm": 0.5557610702490646, "learning_rate": 4.328467153284671e-05, "loss": 0.6224, "step": 22695 }, { "epoch": 0.6627640472401862, "grad_norm": 0.5266721296894519, "learning_rate": 4.3281968099486346e-05, "loss": 0.6057, "step": 22700 }, { "epoch": 0.6629100305105035, "grad_norm": 0.5265316126987901, "learning_rate": 4.327926466612598e-05, "loss": 0.5828, "step": 22705 }, { "epoch": 0.6630560137808207, "grad_norm": 0.5054451683024811, "learning_rate": 4.3276561232765614e-05, "loss": 0.5955, "step": 22710 }, { "epoch": 0.6632019970511379, "grad_norm": 0.5063505313240245, "learning_rate": 4.327385779940525e-05, "loss": 0.618, "step": 22715 }, { "epoch": 0.6633479803214551, "grad_norm": 0.5586187417069278, "learning_rate": 4.327115436604488e-05, "loss": 0.591, "step": 22720 }, { "epoch": 0.6634939635917724, "grad_norm": 0.4596766107726623, "learning_rate": 4.326845093268451e-05, "loss": 0.5597, "step": 22725 }, { "epoch": 0.6636399468620896, "grad_norm": 0.503220397293367, "learning_rate": 4.3265747499324144e-05, "loss": 0.5797, "step": 22730 }, { "epoch": 0.6637859301324068, "grad_norm": 0.5157575509479801, "learning_rate": 4.326304406596378e-05, "loss": 0.5862, "step": 22735 }, { "epoch": 0.6639319134027241, "grad_norm": 0.4517846837746402, "learning_rate": 4.3260340632603405e-05, "loss": 0.5651, "step": 22740 }, { "epoch": 0.6640778966730413, "grad_norm": 0.49065181204417685, "learning_rate": 4.325763719924304e-05, "loss": 0.6089, "step": 22745 }, { "epoch": 0.6642238799433585, "grad_norm": 0.468718754187443, "learning_rate": 4.325493376588267e-05, "loss": 0.6038, "step": 22750 }, { "epoch": 0.6643698632136757, "grad_norm": 0.48898519289923664, "learning_rate": 4.32522303325223e-05, "loss": 0.6044, "step": 22755 }, { "epoch": 0.664515846483993, "grad_norm": 0.5166513418172338, "learning_rate": 4.3249526899161934e-05, "loss": 0.5718, "step": 22760 }, { "epoch": 0.6646618297543102, "grad_norm": 0.5214860725214686, "learning_rate": 4.3246823465801575e-05, "loss": 0.6031, "step": 22765 }, { "epoch": 0.6648078130246273, "grad_norm": 0.538906506704743, "learning_rate": 4.32441200324412e-05, "loss": 0.6294, "step": 22770 }, { "epoch": 0.6649537962949446, "grad_norm": 0.554771089458384, "learning_rate": 4.3241416599080836e-05, "loss": 0.6035, "step": 22775 }, { "epoch": 0.6650997795652618, "grad_norm": 0.49739653196510714, "learning_rate": 4.323871316572047e-05, "loss": 0.5465, "step": 22780 }, { "epoch": 0.665245762835579, "grad_norm": 0.5136916080840117, "learning_rate": 4.32360097323601e-05, "loss": 0.5585, "step": 22785 }, { "epoch": 0.6653917461058962, "grad_norm": 0.48268891194246144, "learning_rate": 4.323330629899973e-05, "loss": 0.598, "step": 22790 }, { "epoch": 0.6655377293762135, "grad_norm": 0.49325006817622674, "learning_rate": 4.3230602865639365e-05, "loss": 0.564, "step": 22795 }, { "epoch": 0.6656837126465307, "grad_norm": 0.44052421319444973, "learning_rate": 4.322789943227899e-05, "loss": 0.5541, "step": 22800 }, { "epoch": 0.6658296959168479, "grad_norm": 0.4980446823768374, "learning_rate": 4.322519599891863e-05, "loss": 0.5896, "step": 22805 }, { "epoch": 0.6659756791871652, "grad_norm": 0.48339024055405166, "learning_rate": 4.322249256555826e-05, "loss": 0.5465, "step": 22810 }, { "epoch": 0.6661216624574824, "grad_norm": 0.5133311097642806, "learning_rate": 4.321978913219789e-05, "loss": 0.5996, "step": 22815 }, { "epoch": 0.6662676457277996, "grad_norm": 0.4868854320358918, "learning_rate": 4.321708569883753e-05, "loss": 0.5571, "step": 22820 }, { "epoch": 0.6664136289981168, "grad_norm": 0.5370548957877267, "learning_rate": 4.321438226547716e-05, "loss": 0.5859, "step": 22825 }, { "epoch": 0.6665596122684341, "grad_norm": 0.47356093215918593, "learning_rate": 4.321167883211679e-05, "loss": 0.5928, "step": 22830 }, { "epoch": 0.6667055955387513, "grad_norm": 0.531784606689246, "learning_rate": 4.3208975398756424e-05, "loss": 0.6325, "step": 22835 }, { "epoch": 0.6668515788090685, "grad_norm": 0.49505541047122803, "learning_rate": 4.320627196539606e-05, "loss": 0.554, "step": 22840 }, { "epoch": 0.6669975620793857, "grad_norm": 0.547676555116812, "learning_rate": 4.3203568532035685e-05, "loss": 0.6268, "step": 22845 }, { "epoch": 0.6671435453497029, "grad_norm": 0.573472639909631, "learning_rate": 4.320086509867532e-05, "loss": 0.6494, "step": 22850 }, { "epoch": 0.6672895286200201, "grad_norm": 0.530248804375472, "learning_rate": 4.319816166531495e-05, "loss": 0.5895, "step": 22855 }, { "epoch": 0.6674355118903373, "grad_norm": 0.5102252676430022, "learning_rate": 4.319545823195458e-05, "loss": 0.6151, "step": 22860 }, { "epoch": 0.6675814951606546, "grad_norm": 0.4721513518648234, "learning_rate": 4.3192754798594215e-05, "loss": 0.6016, "step": 22865 }, { "epoch": 0.6677274784309718, "grad_norm": 0.4973330103496064, "learning_rate": 4.319005136523385e-05, "loss": 0.5836, "step": 22870 }, { "epoch": 0.667873461701289, "grad_norm": 0.5192885227825254, "learning_rate": 4.318734793187348e-05, "loss": 0.5963, "step": 22875 }, { "epoch": 0.6680194449716063, "grad_norm": 0.5140020408597148, "learning_rate": 4.3184644498513117e-05, "loss": 0.5545, "step": 22880 }, { "epoch": 0.6681654282419235, "grad_norm": 0.4869992134843907, "learning_rate": 4.318194106515275e-05, "loss": 0.5965, "step": 22885 }, { "epoch": 0.6683114115122407, "grad_norm": 0.5032416417416508, "learning_rate": 4.317923763179238e-05, "loss": 0.6165, "step": 22890 }, { "epoch": 0.668457394782558, "grad_norm": 0.43789167850674243, "learning_rate": 4.317653419843201e-05, "loss": 0.5708, "step": 22895 }, { "epoch": 0.6686033780528752, "grad_norm": 0.49884964874001075, "learning_rate": 4.3173830765071646e-05, "loss": 0.5816, "step": 22900 }, { "epoch": 0.6687493613231924, "grad_norm": 0.47778480067322177, "learning_rate": 4.317112733171127e-05, "loss": 0.5331, "step": 22905 }, { "epoch": 0.6688953445935096, "grad_norm": 0.5072108614760451, "learning_rate": 4.316842389835091e-05, "loss": 0.6077, "step": 22910 }, { "epoch": 0.6690413278638268, "grad_norm": 0.48710538232120854, "learning_rate": 4.316572046499054e-05, "loss": 0.5974, "step": 22915 }, { "epoch": 0.669187311134144, "grad_norm": 0.4632679979908139, "learning_rate": 4.316301703163017e-05, "loss": 0.5784, "step": 22920 }, { "epoch": 0.6693332944044612, "grad_norm": 0.5074783252939064, "learning_rate": 4.31603135982698e-05, "loss": 0.5937, "step": 22925 }, { "epoch": 0.6694792776747784, "grad_norm": 0.49593387747080964, "learning_rate": 4.3157610164909436e-05, "loss": 0.5894, "step": 22930 }, { "epoch": 0.6696252609450957, "grad_norm": 0.44083987579202294, "learning_rate": 4.315490673154907e-05, "loss": 0.5434, "step": 22935 }, { "epoch": 0.6697712442154129, "grad_norm": 0.5047644190436665, "learning_rate": 4.3152203298188704e-05, "loss": 0.6052, "step": 22940 }, { "epoch": 0.6699172274857301, "grad_norm": 0.5252915033979207, "learning_rate": 4.314949986482834e-05, "loss": 0.5884, "step": 22945 }, { "epoch": 0.6700632107560474, "grad_norm": 0.4921705179234317, "learning_rate": 4.3146796431467966e-05, "loss": 0.562, "step": 22950 }, { "epoch": 0.6702091940263646, "grad_norm": 0.5245285160742825, "learning_rate": 4.31440929981076e-05, "loss": 0.578, "step": 22955 }, { "epoch": 0.6703551772966818, "grad_norm": 0.47906598928554184, "learning_rate": 4.3141389564747234e-05, "loss": 0.5881, "step": 22960 }, { "epoch": 0.670501160566999, "grad_norm": 0.5310224105149995, "learning_rate": 4.313868613138686e-05, "loss": 0.5794, "step": 22965 }, { "epoch": 0.6706471438373163, "grad_norm": 0.4418630353849803, "learning_rate": 4.3135982698026495e-05, "loss": 0.5689, "step": 22970 }, { "epoch": 0.6707931271076335, "grad_norm": 0.514003963417989, "learning_rate": 4.313327926466613e-05, "loss": 0.5796, "step": 22975 }, { "epoch": 0.6709391103779507, "grad_norm": 0.48523369797316074, "learning_rate": 4.3130575831305756e-05, "loss": 0.5984, "step": 22980 }, { "epoch": 0.671085093648268, "grad_norm": 0.5382222593327456, "learning_rate": 4.312787239794539e-05, "loss": 0.6328, "step": 22985 }, { "epoch": 0.6712310769185851, "grad_norm": 0.5865521610664913, "learning_rate": 4.312516896458503e-05, "loss": 0.5955, "step": 22990 }, { "epoch": 0.6713770601889023, "grad_norm": 0.4952002688722003, "learning_rate": 4.312246553122466e-05, "loss": 0.598, "step": 22995 }, { "epoch": 0.6715230434592195, "grad_norm": 0.5190234807812214, "learning_rate": 4.311976209786429e-05, "loss": 0.6019, "step": 23000 }, { "epoch": 0.6716690267295368, "grad_norm": 0.5013005709745415, "learning_rate": 4.3117058664503926e-05, "loss": 0.5941, "step": 23005 }, { "epoch": 0.671815009999854, "grad_norm": 0.4305155511968599, "learning_rate": 4.3114355231143553e-05, "loss": 0.5958, "step": 23010 }, { "epoch": 0.6719609932701712, "grad_norm": 0.4839636708449907, "learning_rate": 4.311165179778319e-05, "loss": 0.6346, "step": 23015 }, { "epoch": 0.6721069765404885, "grad_norm": 0.5235535971536655, "learning_rate": 4.310894836442282e-05, "loss": 0.5745, "step": 23020 }, { "epoch": 0.6722529598108057, "grad_norm": 0.49586917155197485, "learning_rate": 4.310624493106245e-05, "loss": 0.5719, "step": 23025 }, { "epoch": 0.6723989430811229, "grad_norm": 0.49098210673823117, "learning_rate": 4.310354149770208e-05, "loss": 0.5774, "step": 23030 }, { "epoch": 0.6725449263514401, "grad_norm": 0.5227407144923117, "learning_rate": 4.310083806434172e-05, "loss": 0.5824, "step": 23035 }, { "epoch": 0.6726909096217574, "grad_norm": 0.48934289044736456, "learning_rate": 4.3098134630981344e-05, "loss": 0.5724, "step": 23040 }, { "epoch": 0.6728368928920746, "grad_norm": 0.5173958995301869, "learning_rate": 4.3095431197620985e-05, "loss": 0.5953, "step": 23045 }, { "epoch": 0.6729828761623918, "grad_norm": 0.49540224578481923, "learning_rate": 4.309272776426061e-05, "loss": 0.5565, "step": 23050 }, { "epoch": 0.6731288594327091, "grad_norm": 0.5845132354021741, "learning_rate": 4.3090024330900246e-05, "loss": 0.5967, "step": 23055 }, { "epoch": 0.6732748427030263, "grad_norm": 0.48682682854421994, "learning_rate": 4.308732089753988e-05, "loss": 0.5707, "step": 23060 }, { "epoch": 0.6734208259733434, "grad_norm": 0.5015077087613873, "learning_rate": 4.3084617464179514e-05, "loss": 0.5596, "step": 23065 }, { "epoch": 0.6735668092436606, "grad_norm": 0.5720442364889317, "learning_rate": 4.308191403081914e-05, "loss": 0.5864, "step": 23070 }, { "epoch": 0.6737127925139779, "grad_norm": 0.49402597657610314, "learning_rate": 4.3079210597458775e-05, "loss": 0.582, "step": 23075 }, { "epoch": 0.6738587757842951, "grad_norm": 0.47918339409466937, "learning_rate": 4.307650716409841e-05, "loss": 0.5696, "step": 23080 }, { "epoch": 0.6740047590546123, "grad_norm": 0.5036589208260568, "learning_rate": 4.3073803730738037e-05, "loss": 0.5745, "step": 23085 }, { "epoch": 0.6741507423249296, "grad_norm": 0.5168273606847471, "learning_rate": 4.307110029737767e-05, "loss": 0.5816, "step": 23090 }, { "epoch": 0.6742967255952468, "grad_norm": 0.45455148735580264, "learning_rate": 4.3068396864017305e-05, "loss": 0.6032, "step": 23095 }, { "epoch": 0.674442708865564, "grad_norm": 0.5208017617551851, "learning_rate": 4.306569343065693e-05, "loss": 0.5888, "step": 23100 }, { "epoch": 0.6745886921358812, "grad_norm": 0.5517344106965926, "learning_rate": 4.306298999729657e-05, "loss": 0.6174, "step": 23105 }, { "epoch": 0.6747346754061985, "grad_norm": 0.46349590905685484, "learning_rate": 4.30602865639362e-05, "loss": 0.5774, "step": 23110 }, { "epoch": 0.6748806586765157, "grad_norm": 0.5091212434360558, "learning_rate": 4.3057583130575834e-05, "loss": 0.5743, "step": 23115 }, { "epoch": 0.6750266419468329, "grad_norm": 0.48255983452492557, "learning_rate": 4.305487969721547e-05, "loss": 0.5689, "step": 23120 }, { "epoch": 0.6751726252171502, "grad_norm": 0.4880189377003426, "learning_rate": 4.30521762638551e-05, "loss": 0.5801, "step": 23125 }, { "epoch": 0.6753186084874674, "grad_norm": 0.4895211736198182, "learning_rate": 4.304947283049473e-05, "loss": 0.6119, "step": 23130 }, { "epoch": 0.6754645917577845, "grad_norm": 0.4909701596052639, "learning_rate": 4.304676939713436e-05, "loss": 0.5452, "step": 23135 }, { "epoch": 0.6756105750281017, "grad_norm": 0.49967709758576095, "learning_rate": 4.3044065963774e-05, "loss": 0.5733, "step": 23140 }, { "epoch": 0.675756558298419, "grad_norm": 0.5412673458744964, "learning_rate": 4.3041362530413624e-05, "loss": 0.5879, "step": 23145 }, { "epoch": 0.6759025415687362, "grad_norm": 0.47741796242557477, "learning_rate": 4.303865909705326e-05, "loss": 0.5681, "step": 23150 }, { "epoch": 0.6760485248390534, "grad_norm": 0.6609268600550628, "learning_rate": 4.303595566369289e-05, "loss": 0.6169, "step": 23155 }, { "epoch": 0.6761945081093707, "grad_norm": 0.4917695816281971, "learning_rate": 4.3033252230332526e-05, "loss": 0.5678, "step": 23160 }, { "epoch": 0.6763404913796879, "grad_norm": 0.506737384987777, "learning_rate": 4.303054879697216e-05, "loss": 0.602, "step": 23165 }, { "epoch": 0.6764864746500051, "grad_norm": 0.4804998106090561, "learning_rate": 4.302784536361179e-05, "loss": 0.5968, "step": 23170 }, { "epoch": 0.6766324579203223, "grad_norm": 0.5097667323348968, "learning_rate": 4.302514193025142e-05, "loss": 0.5924, "step": 23175 }, { "epoch": 0.6767784411906396, "grad_norm": 0.48113043979542847, "learning_rate": 4.3022438496891056e-05, "loss": 0.5728, "step": 23180 }, { "epoch": 0.6769244244609568, "grad_norm": 0.4751265127436648, "learning_rate": 4.301973506353068e-05, "loss": 0.5846, "step": 23185 }, { "epoch": 0.677070407731274, "grad_norm": 0.4815879332916174, "learning_rate": 4.301703163017032e-05, "loss": 0.558, "step": 23190 }, { "epoch": 0.6772163910015913, "grad_norm": 0.4924459784083334, "learning_rate": 4.301432819680995e-05, "loss": 0.5941, "step": 23195 }, { "epoch": 0.6773623742719085, "grad_norm": 0.5147758566827881, "learning_rate": 4.3011624763449585e-05, "loss": 0.6226, "step": 23200 }, { "epoch": 0.6775083575422257, "grad_norm": 0.47012460373654386, "learning_rate": 4.300892133008921e-05, "loss": 0.5656, "step": 23205 }, { "epoch": 0.6776543408125428, "grad_norm": 0.5002720415029052, "learning_rate": 4.3006217896728846e-05, "loss": 0.5737, "step": 23210 }, { "epoch": 0.6778003240828601, "grad_norm": 0.46553048724183016, "learning_rate": 4.300351446336848e-05, "loss": 0.5574, "step": 23215 }, { "epoch": 0.6779463073531773, "grad_norm": 0.5217572583134442, "learning_rate": 4.3000811030008114e-05, "loss": 0.6063, "step": 23220 }, { "epoch": 0.6780922906234945, "grad_norm": 0.5256392676073491, "learning_rate": 4.299810759664775e-05, "loss": 0.5823, "step": 23225 }, { "epoch": 0.6782382738938117, "grad_norm": 0.4933171221837321, "learning_rate": 4.2995404163287375e-05, "loss": 0.6114, "step": 23230 }, { "epoch": 0.678384257164129, "grad_norm": 0.520123222546254, "learning_rate": 4.299270072992701e-05, "loss": 0.6194, "step": 23235 }, { "epoch": 0.6785302404344462, "grad_norm": 0.5193070585574907, "learning_rate": 4.2989997296566643e-05, "loss": 0.5887, "step": 23240 }, { "epoch": 0.6786762237047634, "grad_norm": 0.5580634449288198, "learning_rate": 4.298729386320627e-05, "loss": 0.5802, "step": 23245 }, { "epoch": 0.6788222069750807, "grad_norm": 0.5870320533373135, "learning_rate": 4.2984590429845905e-05, "loss": 0.571, "step": 23250 }, { "epoch": 0.6789681902453979, "grad_norm": 0.5630569049513182, "learning_rate": 4.298188699648554e-05, "loss": 0.6023, "step": 23255 }, { "epoch": 0.6791141735157151, "grad_norm": 0.5110085048355419, "learning_rate": 4.297918356312517e-05, "loss": 0.6123, "step": 23260 }, { "epoch": 0.6792601567860324, "grad_norm": 0.47680972752270595, "learning_rate": 4.29764801297648e-05, "loss": 0.5757, "step": 23265 }, { "epoch": 0.6794061400563496, "grad_norm": 0.5089038160172575, "learning_rate": 4.2973776696404434e-05, "loss": 0.6151, "step": 23270 }, { "epoch": 0.6795521233266668, "grad_norm": 0.5170228860070384, "learning_rate": 4.297107326304407e-05, "loss": 0.6017, "step": 23275 }, { "epoch": 0.6796981065969839, "grad_norm": 0.5376827107421502, "learning_rate": 4.29683698296837e-05, "loss": 0.6136, "step": 23280 }, { "epoch": 0.6798440898673012, "grad_norm": 0.4213128290280589, "learning_rate": 4.2965666396323336e-05, "loss": 0.5613, "step": 23285 }, { "epoch": 0.6799900731376184, "grad_norm": 0.48439503375839466, "learning_rate": 4.296296296296296e-05, "loss": 0.5541, "step": 23290 }, { "epoch": 0.6801360564079356, "grad_norm": 0.47312088618378606, "learning_rate": 4.29602595296026e-05, "loss": 0.5959, "step": 23295 }, { "epoch": 0.6802820396782528, "grad_norm": 0.5586835797694238, "learning_rate": 4.295755609624223e-05, "loss": 0.6044, "step": 23300 }, { "epoch": 0.6804280229485701, "grad_norm": 0.5737482664453281, "learning_rate": 4.295485266288186e-05, "loss": 0.618, "step": 23305 }, { "epoch": 0.6805740062188873, "grad_norm": 0.4907929353969015, "learning_rate": 4.295214922952149e-05, "loss": 0.5961, "step": 23310 }, { "epoch": 0.6807199894892045, "grad_norm": 0.4944132764740443, "learning_rate": 4.2949445796161126e-05, "loss": 0.5557, "step": 23315 }, { "epoch": 0.6808659727595218, "grad_norm": 0.5362274968269389, "learning_rate": 4.2946742362800754e-05, "loss": 0.6001, "step": 23320 }, { "epoch": 0.681011956029839, "grad_norm": 0.45981685872039135, "learning_rate": 4.294403892944039e-05, "loss": 0.5812, "step": 23325 }, { "epoch": 0.6811579393001562, "grad_norm": 0.49980353542180744, "learning_rate": 4.294133549608003e-05, "loss": 0.6044, "step": 23330 }, { "epoch": 0.6813039225704735, "grad_norm": 0.5270271240463508, "learning_rate": 4.2938632062719656e-05, "loss": 0.6173, "step": 23335 }, { "epoch": 0.6814499058407907, "grad_norm": 0.5058245300445126, "learning_rate": 4.293592862935929e-05, "loss": 0.5694, "step": 23340 }, { "epoch": 0.6815958891111079, "grad_norm": 0.500800667007394, "learning_rate": 4.2933225195998924e-05, "loss": 0.6075, "step": 23345 }, { "epoch": 0.6817418723814251, "grad_norm": 0.5269133877012658, "learning_rate": 4.293052176263855e-05, "loss": 0.5928, "step": 23350 }, { "epoch": 0.6818878556517423, "grad_norm": 0.4788249557505845, "learning_rate": 4.2927818329278185e-05, "loss": 0.6279, "step": 23355 }, { "epoch": 0.6820338389220595, "grad_norm": 0.4893456970963172, "learning_rate": 4.292511489591782e-05, "loss": 0.5647, "step": 23360 }, { "epoch": 0.6821798221923767, "grad_norm": 0.49046208932627156, "learning_rate": 4.2922411462557446e-05, "loss": 0.5838, "step": 23365 }, { "epoch": 0.682325805462694, "grad_norm": 0.5433479570931766, "learning_rate": 4.291970802919708e-05, "loss": 0.5771, "step": 23370 }, { "epoch": 0.6824717887330112, "grad_norm": 0.5467779924069324, "learning_rate": 4.2917004595836714e-05, "loss": 0.6292, "step": 23375 }, { "epoch": 0.6826177720033284, "grad_norm": 0.4985531305115974, "learning_rate": 4.291430116247634e-05, "loss": 0.5958, "step": 23380 }, { "epoch": 0.6827637552736456, "grad_norm": 0.49285021665538054, "learning_rate": 4.291159772911598e-05, "loss": 0.554, "step": 23385 }, { "epoch": 0.6829097385439629, "grad_norm": 0.5281338425652986, "learning_rate": 4.2908894295755616e-05, "loss": 0.6065, "step": 23390 }, { "epoch": 0.6830557218142801, "grad_norm": 0.46990372096215605, "learning_rate": 4.2906190862395244e-05, "loss": 0.6113, "step": 23395 }, { "epoch": 0.6832017050845973, "grad_norm": 0.5056753000756204, "learning_rate": 4.290348742903488e-05, "loss": 0.5829, "step": 23400 }, { "epoch": 0.6833476883549146, "grad_norm": 0.49452308281405555, "learning_rate": 4.290078399567451e-05, "loss": 0.5985, "step": 23405 }, { "epoch": 0.6834936716252318, "grad_norm": 0.4757477146076932, "learning_rate": 4.289808056231414e-05, "loss": 0.6029, "step": 23410 }, { "epoch": 0.683639654895549, "grad_norm": 0.5224176905092699, "learning_rate": 4.289537712895377e-05, "loss": 0.6138, "step": 23415 }, { "epoch": 0.6837856381658662, "grad_norm": 0.5327516498393585, "learning_rate": 4.289267369559341e-05, "loss": 0.6024, "step": 23420 }, { "epoch": 0.6839316214361835, "grad_norm": 0.504716897573718, "learning_rate": 4.2889970262233034e-05, "loss": 0.5995, "step": 23425 }, { "epoch": 0.6840776047065006, "grad_norm": 0.48751817623634325, "learning_rate": 4.288726682887267e-05, "loss": 0.6153, "step": 23430 }, { "epoch": 0.6842235879768178, "grad_norm": 0.4911307711065635, "learning_rate": 4.28845633955123e-05, "loss": 0.5633, "step": 23435 }, { "epoch": 0.684369571247135, "grad_norm": 0.5344477818032757, "learning_rate": 4.288185996215193e-05, "loss": 0.6191, "step": 23440 }, { "epoch": 0.6845155545174523, "grad_norm": 0.5358624725411008, "learning_rate": 4.287915652879157e-05, "loss": 0.5614, "step": 23445 }, { "epoch": 0.6846615377877695, "grad_norm": 0.5008541001453902, "learning_rate": 4.2876453095431204e-05, "loss": 0.598, "step": 23450 }, { "epoch": 0.6848075210580867, "grad_norm": 0.4814564109717988, "learning_rate": 4.287374966207083e-05, "loss": 0.5998, "step": 23455 }, { "epoch": 0.684953504328404, "grad_norm": 0.4839489756795287, "learning_rate": 4.2871046228710465e-05, "loss": 0.5975, "step": 23460 }, { "epoch": 0.6850994875987212, "grad_norm": 0.4885239122523488, "learning_rate": 4.28683427953501e-05, "loss": 0.6094, "step": 23465 }, { "epoch": 0.6852454708690384, "grad_norm": 0.5138387205298492, "learning_rate": 4.286563936198973e-05, "loss": 0.5913, "step": 23470 }, { "epoch": 0.6853914541393556, "grad_norm": 0.4727453866733554, "learning_rate": 4.286293592862936e-05, "loss": 0.6027, "step": 23475 }, { "epoch": 0.6855374374096729, "grad_norm": 0.48347727888998504, "learning_rate": 4.2860232495268995e-05, "loss": 0.5828, "step": 23480 }, { "epoch": 0.6856834206799901, "grad_norm": 0.486381841478446, "learning_rate": 4.285752906190862e-05, "loss": 0.5836, "step": 23485 }, { "epoch": 0.6858294039503073, "grad_norm": 0.4647075102780251, "learning_rate": 4.2854825628548256e-05, "loss": 0.5695, "step": 23490 }, { "epoch": 0.6859753872206246, "grad_norm": 0.46898139403596667, "learning_rate": 4.285212219518789e-05, "loss": 0.5884, "step": 23495 }, { "epoch": 0.6861213704909417, "grad_norm": 0.4789941507938579, "learning_rate": 4.2849418761827524e-05, "loss": 0.589, "step": 23500 }, { "epoch": 0.6862673537612589, "grad_norm": 0.5475382695096058, "learning_rate": 4.284671532846716e-05, "loss": 0.5793, "step": 23505 }, { "epoch": 0.6864133370315761, "grad_norm": 0.4788200570413279, "learning_rate": 4.284401189510679e-05, "loss": 0.6256, "step": 23510 }, { "epoch": 0.6865593203018934, "grad_norm": 0.4791382538388406, "learning_rate": 4.284130846174642e-05, "loss": 0.564, "step": 23515 }, { "epoch": 0.6867053035722106, "grad_norm": 0.4852717487546066, "learning_rate": 4.283860502838605e-05, "loss": 0.5923, "step": 23520 }, { "epoch": 0.6868512868425278, "grad_norm": 0.49154443156829847, "learning_rate": 4.283590159502569e-05, "loss": 0.6147, "step": 23525 }, { "epoch": 0.6869972701128451, "grad_norm": 0.504750777851779, "learning_rate": 4.2833198161665314e-05, "loss": 0.6042, "step": 23530 }, { "epoch": 0.6871432533831623, "grad_norm": 0.48014533350954797, "learning_rate": 4.283049472830495e-05, "loss": 0.5612, "step": 23535 }, { "epoch": 0.6872892366534795, "grad_norm": 0.4621613767308754, "learning_rate": 4.282779129494458e-05, "loss": 0.6104, "step": 23540 }, { "epoch": 0.6874352199237967, "grad_norm": 0.5207078994148193, "learning_rate": 4.282508786158421e-05, "loss": 0.6047, "step": 23545 }, { "epoch": 0.687581203194114, "grad_norm": 0.4922517091088245, "learning_rate": 4.2822384428223844e-05, "loss": 0.5775, "step": 23550 }, { "epoch": 0.6877271864644312, "grad_norm": 0.4651659953510336, "learning_rate": 4.2819680994863485e-05, "loss": 0.5754, "step": 23555 }, { "epoch": 0.6878731697347484, "grad_norm": 0.48079061510424964, "learning_rate": 4.281697756150311e-05, "loss": 0.6063, "step": 23560 }, { "epoch": 0.6880191530050657, "grad_norm": 0.46507488755808807, "learning_rate": 4.2814274128142746e-05, "loss": 0.5932, "step": 23565 }, { "epoch": 0.6881651362753829, "grad_norm": 0.5139379317186278, "learning_rate": 4.281157069478238e-05, "loss": 0.6219, "step": 23570 }, { "epoch": 0.6883111195457, "grad_norm": 0.5051393086708174, "learning_rate": 4.280886726142201e-05, "loss": 0.5949, "step": 23575 }, { "epoch": 0.6884571028160172, "grad_norm": 0.5117178430511604, "learning_rate": 4.280616382806164e-05, "loss": 0.6085, "step": 23580 }, { "epoch": 0.6886030860863345, "grad_norm": 0.5538659388073374, "learning_rate": 4.2803460394701275e-05, "loss": 0.6138, "step": 23585 }, { "epoch": 0.6887490693566517, "grad_norm": 1.19733328797858, "learning_rate": 4.28007569613409e-05, "loss": 0.5912, "step": 23590 }, { "epoch": 0.6888950526269689, "grad_norm": 0.5201540889649926, "learning_rate": 4.2798053527980536e-05, "loss": 0.5994, "step": 23595 }, { "epoch": 0.6890410358972862, "grad_norm": 0.5139942269684099, "learning_rate": 4.279535009462017e-05, "loss": 0.5547, "step": 23600 }, { "epoch": 0.6891870191676034, "grad_norm": 0.49322116452776116, "learning_rate": 4.27926466612598e-05, "loss": 0.604, "step": 23605 }, { "epoch": 0.6893330024379206, "grad_norm": 0.5096996748454711, "learning_rate": 4.278994322789943e-05, "loss": 0.5813, "step": 23610 }, { "epoch": 0.6894789857082378, "grad_norm": 0.5113421078648859, "learning_rate": 4.278723979453907e-05, "loss": 0.5648, "step": 23615 }, { "epoch": 0.6896249689785551, "grad_norm": 0.46621996957708545, "learning_rate": 4.27845363611787e-05, "loss": 0.5849, "step": 23620 }, { "epoch": 0.6897709522488723, "grad_norm": 0.499450431089948, "learning_rate": 4.2781832927818334e-05, "loss": 0.5967, "step": 23625 }, { "epoch": 0.6899169355191895, "grad_norm": 0.5530843535469151, "learning_rate": 4.277912949445797e-05, "loss": 0.6242, "step": 23630 }, { "epoch": 0.6900629187895068, "grad_norm": 0.47519698508294284, "learning_rate": 4.2776426061097595e-05, "loss": 0.6282, "step": 23635 }, { "epoch": 0.690208902059824, "grad_norm": 0.5030295301532421, "learning_rate": 4.277372262773723e-05, "loss": 0.5845, "step": 23640 }, { "epoch": 0.6903548853301411, "grad_norm": 0.5073777703801022, "learning_rate": 4.277101919437686e-05, "loss": 0.5839, "step": 23645 }, { "epoch": 0.6905008686004583, "grad_norm": 0.5246200565009572, "learning_rate": 4.276831576101649e-05, "loss": 0.5954, "step": 23650 }, { "epoch": 0.6906468518707756, "grad_norm": 0.5286035801865206, "learning_rate": 4.2765612327656124e-05, "loss": 0.5711, "step": 23655 }, { "epoch": 0.6907928351410928, "grad_norm": 0.5002902760356718, "learning_rate": 4.276290889429576e-05, "loss": 0.5959, "step": 23660 }, { "epoch": 0.69093881841141, "grad_norm": 0.5625865436129635, "learning_rate": 4.2760205460935385e-05, "loss": 0.5961, "step": 23665 }, { "epoch": 0.6910848016817273, "grad_norm": 0.4652341593200503, "learning_rate": 4.2757502027575026e-05, "loss": 0.5565, "step": 23670 }, { "epoch": 0.6912307849520445, "grad_norm": 0.4800859802746593, "learning_rate": 4.275479859421465e-05, "loss": 0.5728, "step": 23675 }, { "epoch": 0.6913767682223617, "grad_norm": 0.5121980241658275, "learning_rate": 4.275209516085429e-05, "loss": 0.614, "step": 23680 }, { "epoch": 0.6915227514926789, "grad_norm": 0.511562834999966, "learning_rate": 4.274939172749392e-05, "loss": 0.5757, "step": 23685 }, { "epoch": 0.6916687347629962, "grad_norm": 0.48488820643918107, "learning_rate": 4.2746688294133555e-05, "loss": 0.697, "step": 23690 }, { "epoch": 0.6918147180333134, "grad_norm": 0.49532952441113803, "learning_rate": 4.274398486077318e-05, "loss": 0.5896, "step": 23695 }, { "epoch": 0.6919607013036306, "grad_norm": 0.4778799876681683, "learning_rate": 4.274128142741282e-05, "loss": 0.563, "step": 23700 }, { "epoch": 0.6921066845739479, "grad_norm": 0.5028771836193389, "learning_rate": 4.273857799405245e-05, "loss": 0.5856, "step": 23705 }, { "epoch": 0.6922526678442651, "grad_norm": 0.4939173356834327, "learning_rate": 4.273587456069208e-05, "loss": 0.534, "step": 23710 }, { "epoch": 0.6923986511145823, "grad_norm": 0.5227206472338902, "learning_rate": 4.273317112733171e-05, "loss": 0.6143, "step": 23715 }, { "epoch": 0.6925446343848994, "grad_norm": 0.5044555677898882, "learning_rate": 4.2730467693971346e-05, "loss": 0.6107, "step": 23720 }, { "epoch": 0.6926906176552167, "grad_norm": 0.5184307705145021, "learning_rate": 4.272776426061098e-05, "loss": 0.5972, "step": 23725 }, { "epoch": 0.6928366009255339, "grad_norm": 0.49228422192122057, "learning_rate": 4.2725060827250614e-05, "loss": 0.6157, "step": 23730 }, { "epoch": 0.6929825841958511, "grad_norm": 0.5117801413315834, "learning_rate": 4.272235739389024e-05, "loss": 0.5973, "step": 23735 }, { "epoch": 0.6931285674661684, "grad_norm": 0.5070190941916283, "learning_rate": 4.2719653960529875e-05, "loss": 0.5691, "step": 23740 }, { "epoch": 0.6932745507364856, "grad_norm": 0.5379426901400092, "learning_rate": 4.271695052716951e-05, "loss": 0.5619, "step": 23745 }, { "epoch": 0.6934205340068028, "grad_norm": 0.5089471437902058, "learning_rate": 4.271424709380914e-05, "loss": 0.6242, "step": 23750 }, { "epoch": 0.69356651727712, "grad_norm": 0.47556707336367776, "learning_rate": 4.271154366044877e-05, "loss": 0.5932, "step": 23755 }, { "epoch": 0.6937125005474373, "grad_norm": 0.5469950267878292, "learning_rate": 4.2708840227088404e-05, "loss": 0.603, "step": 23760 }, { "epoch": 0.6938584838177545, "grad_norm": 0.5285854877163993, "learning_rate": 4.270613679372804e-05, "loss": 0.6381, "step": 23765 }, { "epoch": 0.6940044670880717, "grad_norm": 0.473893111691428, "learning_rate": 4.2703433360367666e-05, "loss": 0.5817, "step": 23770 }, { "epoch": 0.694150450358389, "grad_norm": 0.5308266427323016, "learning_rate": 4.27007299270073e-05, "loss": 0.6095, "step": 23775 }, { "epoch": 0.6942964336287062, "grad_norm": 0.4957947266398436, "learning_rate": 4.2698026493646934e-05, "loss": 0.5907, "step": 23780 }, { "epoch": 0.6944424168990234, "grad_norm": 0.6138321711409171, "learning_rate": 4.269532306028657e-05, "loss": 0.6114, "step": 23785 }, { "epoch": 0.6945884001693406, "grad_norm": 0.5039152898543285, "learning_rate": 4.26926196269262e-05, "loss": 0.6106, "step": 23790 }, { "epoch": 0.6947343834396578, "grad_norm": 0.45394597209405774, "learning_rate": 4.268991619356583e-05, "loss": 0.5585, "step": 23795 }, { "epoch": 0.694880366709975, "grad_norm": 0.45595400862994434, "learning_rate": 4.268721276020546e-05, "loss": 0.5626, "step": 23800 }, { "epoch": 0.6950263499802922, "grad_norm": 0.7489840625849268, "learning_rate": 4.26845093268451e-05, "loss": 0.5646, "step": 23805 }, { "epoch": 0.6951723332506095, "grad_norm": 0.4813535528086252, "learning_rate": 4.2681805893484724e-05, "loss": 0.6206, "step": 23810 }, { "epoch": 0.6953183165209267, "grad_norm": 0.5283824740053781, "learning_rate": 4.267910246012436e-05, "loss": 0.5929, "step": 23815 }, { "epoch": 0.6954642997912439, "grad_norm": 0.508388707526176, "learning_rate": 4.267639902676399e-05, "loss": 0.6258, "step": 23820 }, { "epoch": 0.6956102830615611, "grad_norm": 0.523332810388329, "learning_rate": 4.2673695593403626e-05, "loss": 0.5873, "step": 23825 }, { "epoch": 0.6957562663318784, "grad_norm": 0.5017412463236778, "learning_rate": 4.2670992160043253e-05, "loss": 0.5769, "step": 23830 }, { "epoch": 0.6959022496021956, "grad_norm": 0.5339916094619968, "learning_rate": 4.266828872668289e-05, "loss": 0.5801, "step": 23835 }, { "epoch": 0.6960482328725128, "grad_norm": 0.5125428996854745, "learning_rate": 4.266558529332252e-05, "loss": 0.5807, "step": 23840 }, { "epoch": 0.6961942161428301, "grad_norm": 0.48055254462671754, "learning_rate": 4.2662881859962156e-05, "loss": 0.5899, "step": 23845 }, { "epoch": 0.6963401994131473, "grad_norm": 0.5280870865989936, "learning_rate": 4.266017842660179e-05, "loss": 0.5794, "step": 23850 }, { "epoch": 0.6964861826834645, "grad_norm": 0.47829270967683263, "learning_rate": 4.265747499324142e-05, "loss": 0.5708, "step": 23855 }, { "epoch": 0.6966321659537817, "grad_norm": 0.514965588014678, "learning_rate": 4.265477155988105e-05, "loss": 0.6106, "step": 23860 }, { "epoch": 0.6967781492240989, "grad_norm": 0.5313348605379428, "learning_rate": 4.2652068126520685e-05, "loss": 0.584, "step": 23865 }, { "epoch": 0.6969241324944161, "grad_norm": 0.48794774071634056, "learning_rate": 4.264936469316031e-05, "loss": 0.5785, "step": 23870 }, { "epoch": 0.6970701157647333, "grad_norm": 0.5067381217061098, "learning_rate": 4.2646661259799946e-05, "loss": 0.6057, "step": 23875 }, { "epoch": 0.6972160990350506, "grad_norm": 0.5137357256725691, "learning_rate": 4.264395782643958e-05, "loss": 0.5631, "step": 23880 }, { "epoch": 0.6973620823053678, "grad_norm": 0.5152062242979465, "learning_rate": 4.2641254393079214e-05, "loss": 0.5304, "step": 23885 }, { "epoch": 0.697508065575685, "grad_norm": 0.5002400849826495, "learning_rate": 4.263855095971884e-05, "loss": 0.5487, "step": 23890 }, { "epoch": 0.6976540488460022, "grad_norm": 0.5079502723957606, "learning_rate": 4.263584752635848e-05, "loss": 0.5925, "step": 23895 }, { "epoch": 0.6978000321163195, "grad_norm": 0.5087971019669811, "learning_rate": 4.263314409299811e-05, "loss": 0.6017, "step": 23900 }, { "epoch": 0.6979460153866367, "grad_norm": 0.5231817409372247, "learning_rate": 4.263044065963774e-05, "loss": 0.6245, "step": 23905 }, { "epoch": 0.6980919986569539, "grad_norm": 0.46355729008042346, "learning_rate": 4.262773722627738e-05, "loss": 0.5604, "step": 23910 }, { "epoch": 0.6982379819272712, "grad_norm": 0.512530364660406, "learning_rate": 4.2625033792917005e-05, "loss": 0.5769, "step": 23915 }, { "epoch": 0.6983839651975884, "grad_norm": 0.46848831374568606, "learning_rate": 4.262233035955664e-05, "loss": 0.5824, "step": 23920 }, { "epoch": 0.6985299484679056, "grad_norm": 0.48161738509405794, "learning_rate": 4.261962692619627e-05, "loss": 0.586, "step": 23925 }, { "epoch": 0.6986759317382228, "grad_norm": 0.4765232483598049, "learning_rate": 4.26169234928359e-05, "loss": 0.5779, "step": 23930 }, { "epoch": 0.6988219150085401, "grad_norm": 0.42610460245564263, "learning_rate": 4.2614220059475534e-05, "loss": 0.5548, "step": 23935 }, { "epoch": 0.6989678982788572, "grad_norm": 0.5392976219647954, "learning_rate": 4.261151662611517e-05, "loss": 0.6066, "step": 23940 }, { "epoch": 0.6991138815491744, "grad_norm": 0.4960060825275455, "learning_rate": 4.2608813192754795e-05, "loss": 0.6103, "step": 23945 }, { "epoch": 0.6992598648194916, "grad_norm": 0.48561076259803815, "learning_rate": 4.260610975939443e-05, "loss": 0.5993, "step": 23950 }, { "epoch": 0.6994058480898089, "grad_norm": 0.49285327244102306, "learning_rate": 4.260340632603407e-05, "loss": 0.55, "step": 23955 }, { "epoch": 0.6995518313601261, "grad_norm": 0.490407281043911, "learning_rate": 4.26007028926737e-05, "loss": 0.5838, "step": 23960 }, { "epoch": 0.6996978146304433, "grad_norm": 0.5265446294930562, "learning_rate": 4.259799945931333e-05, "loss": 0.5852, "step": 23965 }, { "epoch": 0.6998437979007606, "grad_norm": 0.5167936782881754, "learning_rate": 4.2595296025952965e-05, "loss": 0.6033, "step": 23970 }, { "epoch": 0.6999897811710778, "grad_norm": 0.5106525337474122, "learning_rate": 4.259259259259259e-05, "loss": 0.6192, "step": 23975 }, { "epoch": 0.700135764441395, "grad_norm": 0.49804282126739974, "learning_rate": 4.2589889159232226e-05, "loss": 0.5634, "step": 23980 }, { "epoch": 0.7002817477117123, "grad_norm": 0.5331362077064751, "learning_rate": 4.258718572587186e-05, "loss": 0.602, "step": 23985 }, { "epoch": 0.7004277309820295, "grad_norm": 0.46902020824653445, "learning_rate": 4.258448229251149e-05, "loss": 0.565, "step": 23990 }, { "epoch": 0.7005737142523467, "grad_norm": 0.5175592312685985, "learning_rate": 4.258177885915112e-05, "loss": 0.5869, "step": 23995 }, { "epoch": 0.7007196975226639, "grad_norm": 0.49437811561593853, "learning_rate": 4.2579075425790756e-05, "loss": 0.6072, "step": 24000 }, { "epoch": 0.7008656807929812, "grad_norm": 0.4730443760049917, "learning_rate": 4.257637199243038e-05, "loss": 0.5796, "step": 24005 }, { "epoch": 0.7010116640632984, "grad_norm": 0.5059096835536243, "learning_rate": 4.2573668559070024e-05, "loss": 0.6057, "step": 24010 }, { "epoch": 0.7011576473336155, "grad_norm": 0.46288952828260543, "learning_rate": 4.257096512570966e-05, "loss": 0.5642, "step": 24015 }, { "epoch": 0.7013036306039327, "grad_norm": 0.5243278714011069, "learning_rate": 4.2568261692349285e-05, "loss": 0.6072, "step": 24020 }, { "epoch": 0.70144961387425, "grad_norm": 0.4671655069405424, "learning_rate": 4.256555825898892e-05, "loss": 0.55, "step": 24025 }, { "epoch": 0.7015955971445672, "grad_norm": 0.5106877522318314, "learning_rate": 4.256285482562855e-05, "loss": 0.5666, "step": 24030 }, { "epoch": 0.7017415804148844, "grad_norm": 0.47969120648642466, "learning_rate": 4.256015139226818e-05, "loss": 0.5711, "step": 24035 }, { "epoch": 0.7018875636852017, "grad_norm": 0.4831422542718738, "learning_rate": 4.2557447958907814e-05, "loss": 0.5917, "step": 24040 }, { "epoch": 0.7020335469555189, "grad_norm": 0.47231653042737765, "learning_rate": 4.255474452554745e-05, "loss": 0.6123, "step": 24045 }, { "epoch": 0.7021795302258361, "grad_norm": 0.47931027595585335, "learning_rate": 4.2552041092187075e-05, "loss": 0.6025, "step": 24050 }, { "epoch": 0.7023255134961534, "grad_norm": 0.515542659523576, "learning_rate": 4.254933765882671e-05, "loss": 0.5877, "step": 24055 }, { "epoch": 0.7024714967664706, "grad_norm": 0.4615385013868041, "learning_rate": 4.2546634225466343e-05, "loss": 0.5502, "step": 24060 }, { "epoch": 0.7026174800367878, "grad_norm": 0.45807436469602403, "learning_rate": 4.254393079210598e-05, "loss": 0.6063, "step": 24065 }, { "epoch": 0.702763463307105, "grad_norm": 0.4864409790450951, "learning_rate": 4.254122735874561e-05, "loss": 0.5824, "step": 24070 }, { "epoch": 0.7029094465774223, "grad_norm": 0.5344407041375028, "learning_rate": 4.2538523925385245e-05, "loss": 0.6076, "step": 24075 }, { "epoch": 0.7030554298477395, "grad_norm": 0.48816161457198104, "learning_rate": 4.253582049202487e-05, "loss": 0.5715, "step": 24080 }, { "epoch": 0.7032014131180566, "grad_norm": 0.481932479130352, "learning_rate": 4.253311705866451e-05, "loss": 0.5759, "step": 24085 }, { "epoch": 0.7033473963883738, "grad_norm": 0.5455092611450545, "learning_rate": 4.253041362530414e-05, "loss": 0.5903, "step": 24090 }, { "epoch": 0.7034933796586911, "grad_norm": 0.47892566521329827, "learning_rate": 4.252771019194377e-05, "loss": 0.6031, "step": 24095 }, { "epoch": 0.7036393629290083, "grad_norm": 0.5250069833186936, "learning_rate": 4.25250067585834e-05, "loss": 0.5851, "step": 24100 }, { "epoch": 0.7037853461993255, "grad_norm": 0.5264233412181362, "learning_rate": 4.2522303325223036e-05, "loss": 0.6209, "step": 24105 }, { "epoch": 0.7039313294696428, "grad_norm": 0.5169995973895997, "learning_rate": 4.251959989186266e-05, "loss": 0.6001, "step": 24110 }, { "epoch": 0.70407731273996, "grad_norm": 0.5232356419915986, "learning_rate": 4.25168964585023e-05, "loss": 0.63, "step": 24115 }, { "epoch": 0.7042232960102772, "grad_norm": 0.5071623987031951, "learning_rate": 4.251419302514193e-05, "loss": 0.5487, "step": 24120 }, { "epoch": 0.7043692792805945, "grad_norm": 0.5064000598856209, "learning_rate": 4.2511489591781565e-05, "loss": 0.5656, "step": 24125 }, { "epoch": 0.7045152625509117, "grad_norm": 0.4967644672316962, "learning_rate": 4.25087861584212e-05, "loss": 0.5885, "step": 24130 }, { "epoch": 0.7046612458212289, "grad_norm": 0.5211128721289517, "learning_rate": 4.250608272506083e-05, "loss": 0.5892, "step": 24135 }, { "epoch": 0.7048072290915461, "grad_norm": 0.5062200674333561, "learning_rate": 4.250337929170046e-05, "loss": 0.5973, "step": 24140 }, { "epoch": 0.7049532123618634, "grad_norm": 0.46870526361754494, "learning_rate": 4.2500675858340095e-05, "loss": 0.5933, "step": 24145 }, { "epoch": 0.7050991956321806, "grad_norm": 0.4900003879786673, "learning_rate": 4.249797242497973e-05, "loss": 0.6097, "step": 24150 }, { "epoch": 0.7052451789024978, "grad_norm": 0.5185838647992442, "learning_rate": 4.2495268991619356e-05, "loss": 0.617, "step": 24155 }, { "epoch": 0.705391162172815, "grad_norm": 0.5121005793773791, "learning_rate": 4.249256555825899e-05, "loss": 0.554, "step": 24160 }, { "epoch": 0.7055371454431322, "grad_norm": 0.49445057393492853, "learning_rate": 4.2489862124898624e-05, "loss": 0.6073, "step": 24165 }, { "epoch": 0.7056831287134494, "grad_norm": 0.500812558453981, "learning_rate": 4.248715869153825e-05, "loss": 0.5965, "step": 24170 }, { "epoch": 0.7058291119837666, "grad_norm": 0.5256112402957168, "learning_rate": 4.2484455258177885e-05, "loss": 0.5885, "step": 24175 }, { "epoch": 0.7059750952540839, "grad_norm": 0.5036373637085261, "learning_rate": 4.2481751824817526e-05, "loss": 0.6217, "step": 24180 }, { "epoch": 0.7061210785244011, "grad_norm": 0.5619988452379278, "learning_rate": 4.247904839145715e-05, "loss": 0.6214, "step": 24185 }, { "epoch": 0.7062670617947183, "grad_norm": 0.5367207650956979, "learning_rate": 4.247634495809679e-05, "loss": 0.5952, "step": 24190 }, { "epoch": 0.7064130450650355, "grad_norm": 0.48214609806681047, "learning_rate": 4.247364152473642e-05, "loss": 0.5938, "step": 24195 }, { "epoch": 0.7065590283353528, "grad_norm": 0.5165275266744674, "learning_rate": 4.247093809137605e-05, "loss": 0.6161, "step": 24200 }, { "epoch": 0.70670501160567, "grad_norm": 0.48417661617329555, "learning_rate": 4.246823465801568e-05, "loss": 0.5868, "step": 24205 }, { "epoch": 0.7068509948759872, "grad_norm": 0.5172743044116466, "learning_rate": 4.2465531224655316e-05, "loss": 0.6134, "step": 24210 }, { "epoch": 0.7069969781463045, "grad_norm": 0.4974831380153558, "learning_rate": 4.2462827791294944e-05, "loss": 0.6157, "step": 24215 }, { "epoch": 0.7071429614166217, "grad_norm": 0.5228566232931773, "learning_rate": 4.246012435793458e-05, "loss": 0.6131, "step": 24220 }, { "epoch": 0.7072889446869389, "grad_norm": 0.5353407403340003, "learning_rate": 4.245742092457421e-05, "loss": 0.5949, "step": 24225 }, { "epoch": 0.707434927957256, "grad_norm": 0.49430892662326203, "learning_rate": 4.245471749121384e-05, "loss": 0.5774, "step": 24230 }, { "epoch": 0.7075809112275733, "grad_norm": 0.549998495611076, "learning_rate": 4.245201405785348e-05, "loss": 0.6241, "step": 24235 }, { "epoch": 0.7077268944978905, "grad_norm": 0.4805698870026231, "learning_rate": 4.2449310624493114e-05, "loss": 0.574, "step": 24240 }, { "epoch": 0.7078728777682077, "grad_norm": 0.48404788578566815, "learning_rate": 4.244660719113274e-05, "loss": 0.5821, "step": 24245 }, { "epoch": 0.708018861038525, "grad_norm": 0.48775832788211493, "learning_rate": 4.2443903757772375e-05, "loss": 0.5612, "step": 24250 }, { "epoch": 0.7081648443088422, "grad_norm": 0.5403939846761175, "learning_rate": 4.244120032441201e-05, "loss": 0.6303, "step": 24255 }, { "epoch": 0.7083108275791594, "grad_norm": 0.5014369842743176, "learning_rate": 4.2438496891051636e-05, "loss": 0.577, "step": 24260 }, { "epoch": 0.7084568108494766, "grad_norm": 0.5011521730378313, "learning_rate": 4.243579345769127e-05, "loss": 0.591, "step": 24265 }, { "epoch": 0.7086027941197939, "grad_norm": 0.5121189365461407, "learning_rate": 4.2433090024330904e-05, "loss": 0.5943, "step": 24270 }, { "epoch": 0.7087487773901111, "grad_norm": 0.4788888606631133, "learning_rate": 4.243038659097053e-05, "loss": 0.5723, "step": 24275 }, { "epoch": 0.7088947606604283, "grad_norm": 0.4891601448163312, "learning_rate": 4.2427683157610165e-05, "loss": 0.6164, "step": 24280 }, { "epoch": 0.7090407439307456, "grad_norm": 0.4717973164278389, "learning_rate": 4.24249797242498e-05, "loss": 0.5643, "step": 24285 }, { "epoch": 0.7091867272010628, "grad_norm": 0.49952843753707177, "learning_rate": 4.242227629088943e-05, "loss": 0.6083, "step": 24290 }, { "epoch": 0.70933271047138, "grad_norm": 0.48235666388326803, "learning_rate": 4.241957285752907e-05, "loss": 0.6015, "step": 24295 }, { "epoch": 0.7094786937416973, "grad_norm": 0.46369340835701867, "learning_rate": 4.24168694241687e-05, "loss": 0.6032, "step": 24300 }, { "epoch": 0.7096246770120144, "grad_norm": 0.47488143659611387, "learning_rate": 4.241416599080833e-05, "loss": 0.5634, "step": 24305 }, { "epoch": 0.7097706602823316, "grad_norm": 0.48915129466377266, "learning_rate": 4.241146255744796e-05, "loss": 0.5911, "step": 24310 }, { "epoch": 0.7099166435526488, "grad_norm": 0.565852410416949, "learning_rate": 4.24087591240876e-05, "loss": 0.5819, "step": 24315 }, { "epoch": 0.7100626268229661, "grad_norm": 0.4910981850267739, "learning_rate": 4.2406055690727224e-05, "loss": 0.5806, "step": 24320 }, { "epoch": 0.7102086100932833, "grad_norm": 0.5082849082327537, "learning_rate": 4.240335225736686e-05, "loss": 0.5799, "step": 24325 }, { "epoch": 0.7103545933636005, "grad_norm": 0.5036691957805317, "learning_rate": 4.240064882400649e-05, "loss": 0.5931, "step": 24330 }, { "epoch": 0.7105005766339177, "grad_norm": 0.46145249424117946, "learning_rate": 4.239794539064612e-05, "loss": 0.5863, "step": 24335 }, { "epoch": 0.710646559904235, "grad_norm": 0.5078041370684122, "learning_rate": 4.239524195728575e-05, "loss": 0.5837, "step": 24340 }, { "epoch": 0.7107925431745522, "grad_norm": 0.49416330891203863, "learning_rate": 4.239253852392539e-05, "loss": 0.6156, "step": 24345 }, { "epoch": 0.7109385264448694, "grad_norm": 0.45965389960503994, "learning_rate": 4.238983509056502e-05, "loss": 0.5968, "step": 24350 }, { "epoch": 0.7110845097151867, "grad_norm": 0.5399629001025091, "learning_rate": 4.2387131657204655e-05, "loss": 0.569, "step": 24355 }, { "epoch": 0.7112304929855039, "grad_norm": 0.4876703412253951, "learning_rate": 4.238442822384428e-05, "loss": 0.5738, "step": 24360 }, { "epoch": 0.7113764762558211, "grad_norm": 0.49672612572764363, "learning_rate": 4.2381724790483917e-05, "loss": 0.6052, "step": 24365 }, { "epoch": 0.7115224595261384, "grad_norm": 0.5299023935244859, "learning_rate": 4.237902135712355e-05, "loss": 0.5799, "step": 24370 }, { "epoch": 0.7116684427964556, "grad_norm": 0.521288711691925, "learning_rate": 4.2376317923763185e-05, "loss": 0.5886, "step": 24375 }, { "epoch": 0.7118144260667727, "grad_norm": 0.5036240178676327, "learning_rate": 4.237361449040281e-05, "loss": 0.6241, "step": 24380 }, { "epoch": 0.7119604093370899, "grad_norm": 0.494335147386674, "learning_rate": 4.2370911057042446e-05, "loss": 0.6025, "step": 24385 }, { "epoch": 0.7121063926074072, "grad_norm": 0.4824011254966117, "learning_rate": 4.236820762368208e-05, "loss": 0.6032, "step": 24390 }, { "epoch": 0.7122523758777244, "grad_norm": 0.48444966968670383, "learning_rate": 4.236550419032171e-05, "loss": 0.5162, "step": 24395 }, { "epoch": 0.7123983591480416, "grad_norm": 0.5174049317552577, "learning_rate": 4.236280075696134e-05, "loss": 0.5897, "step": 24400 }, { "epoch": 0.7125443424183588, "grad_norm": 0.5064256820363239, "learning_rate": 4.2360097323600975e-05, "loss": 0.5749, "step": 24405 }, { "epoch": 0.7126903256886761, "grad_norm": 0.47280024203058085, "learning_rate": 4.235739389024061e-05, "loss": 0.5954, "step": 24410 }, { "epoch": 0.7128363089589933, "grad_norm": 0.4667003966102043, "learning_rate": 4.235469045688024e-05, "loss": 0.5741, "step": 24415 }, { "epoch": 0.7129822922293105, "grad_norm": 0.46509749222084545, "learning_rate": 4.235198702351987e-05, "loss": 0.5664, "step": 24420 }, { "epoch": 0.7131282754996278, "grad_norm": 0.47003320325522174, "learning_rate": 4.2349283590159504e-05, "loss": 0.6017, "step": 24425 }, { "epoch": 0.713274258769945, "grad_norm": 0.47283593635950943, "learning_rate": 4.234658015679914e-05, "loss": 0.5838, "step": 24430 }, { "epoch": 0.7134202420402622, "grad_norm": 0.4879342105103843, "learning_rate": 4.234387672343877e-05, "loss": 0.6043, "step": 24435 }, { "epoch": 0.7135662253105794, "grad_norm": 0.5035436017539641, "learning_rate": 4.23411732900784e-05, "loss": 0.5683, "step": 24440 }, { "epoch": 0.7137122085808967, "grad_norm": 0.4679848890484655, "learning_rate": 4.2338469856718034e-05, "loss": 0.5551, "step": 24445 }, { "epoch": 0.7138581918512138, "grad_norm": 0.5015933629935313, "learning_rate": 4.233576642335767e-05, "loss": 0.5696, "step": 24450 }, { "epoch": 0.714004175121531, "grad_norm": 0.4772785365268378, "learning_rate": 4.2333062989997295e-05, "loss": 0.6104, "step": 24455 }, { "epoch": 0.7141501583918483, "grad_norm": 0.4676465321446306, "learning_rate": 4.233035955663693e-05, "loss": 0.5906, "step": 24460 }, { "epoch": 0.7142961416621655, "grad_norm": 0.4679315719656869, "learning_rate": 4.232765612327656e-05, "loss": 0.5954, "step": 24465 }, { "epoch": 0.7144421249324827, "grad_norm": 0.4702134410943967, "learning_rate": 4.23249526899162e-05, "loss": 0.5715, "step": 24470 }, { "epoch": 0.7145881082027999, "grad_norm": 0.5031674681791997, "learning_rate": 4.232224925655583e-05, "loss": 0.5847, "step": 24475 }, { "epoch": 0.7147340914731172, "grad_norm": 0.5333346269366778, "learning_rate": 4.231954582319546e-05, "loss": 0.6112, "step": 24480 }, { "epoch": 0.7148800747434344, "grad_norm": 0.48311819758748464, "learning_rate": 4.231684238983509e-05, "loss": 0.5713, "step": 24485 }, { "epoch": 0.7150260580137516, "grad_norm": 0.46222081377456886, "learning_rate": 4.2314138956474726e-05, "loss": 0.5942, "step": 24490 }, { "epoch": 0.7151720412840689, "grad_norm": 0.49170377431198675, "learning_rate": 4.231143552311435e-05, "loss": 0.5921, "step": 24495 }, { "epoch": 0.7153180245543861, "grad_norm": 0.4644550007094843, "learning_rate": 4.230873208975399e-05, "loss": 0.5859, "step": 24500 }, { "epoch": 0.7154640078247033, "grad_norm": 0.4603653809707847, "learning_rate": 4.230602865639362e-05, "loss": 0.5863, "step": 24505 }, { "epoch": 0.7156099910950205, "grad_norm": 0.4783596124488796, "learning_rate": 4.2303325223033255e-05, "loss": 0.5898, "step": 24510 }, { "epoch": 0.7157559743653378, "grad_norm": 0.5119321535252713, "learning_rate": 4.230062178967288e-05, "loss": 0.6205, "step": 24515 }, { "epoch": 0.715901957635655, "grad_norm": 0.4932638085477284, "learning_rate": 4.2297918356312523e-05, "loss": 0.5823, "step": 24520 }, { "epoch": 0.7160479409059721, "grad_norm": 0.46229065643423434, "learning_rate": 4.229521492295215e-05, "loss": 0.5633, "step": 24525 }, { "epoch": 0.7161939241762894, "grad_norm": 0.5277493772977507, "learning_rate": 4.2292511489591785e-05, "loss": 0.5679, "step": 24530 }, { "epoch": 0.7163399074466066, "grad_norm": 0.5152019281889486, "learning_rate": 4.228980805623142e-05, "loss": 0.5828, "step": 24535 }, { "epoch": 0.7164858907169238, "grad_norm": 0.48653260321553743, "learning_rate": 4.2287104622871046e-05, "loss": 0.612, "step": 24540 }, { "epoch": 0.716631873987241, "grad_norm": 0.476572279699694, "learning_rate": 4.228440118951068e-05, "loss": 0.5906, "step": 24545 }, { "epoch": 0.7167778572575583, "grad_norm": 0.47481282898979127, "learning_rate": 4.2281697756150314e-05, "loss": 0.5888, "step": 24550 }, { "epoch": 0.7169238405278755, "grad_norm": 0.43443404783205286, "learning_rate": 4.227899432278994e-05, "loss": 0.5503, "step": 24555 }, { "epoch": 0.7170698237981927, "grad_norm": 0.5085266911565729, "learning_rate": 4.2276290889429575e-05, "loss": 0.6198, "step": 24560 }, { "epoch": 0.71721580706851, "grad_norm": 0.9198094976847773, "learning_rate": 4.227358745606921e-05, "loss": 0.5865, "step": 24565 }, { "epoch": 0.7173617903388272, "grad_norm": 0.47642568790127454, "learning_rate": 4.227088402270884e-05, "loss": 0.5832, "step": 24570 }, { "epoch": 0.7175077736091444, "grad_norm": 0.5093829623064422, "learning_rate": 4.226818058934848e-05, "loss": 0.5871, "step": 24575 }, { "epoch": 0.7176537568794616, "grad_norm": 0.4992524740389062, "learning_rate": 4.226547715598811e-05, "loss": 0.6031, "step": 24580 }, { "epoch": 0.7177997401497789, "grad_norm": 0.5401730443567955, "learning_rate": 4.226277372262774e-05, "loss": 0.608, "step": 24585 }, { "epoch": 0.7179457234200961, "grad_norm": 0.5347493301202451, "learning_rate": 4.226007028926737e-05, "loss": 0.5792, "step": 24590 }, { "epoch": 0.7180917066904133, "grad_norm": 0.4754799449614113, "learning_rate": 4.2257366855907006e-05, "loss": 0.6167, "step": 24595 }, { "epoch": 0.7182376899607305, "grad_norm": 0.5147289291307234, "learning_rate": 4.2254663422546634e-05, "loss": 0.5998, "step": 24600 }, { "epoch": 0.7183836732310477, "grad_norm": 0.5120209785838661, "learning_rate": 4.225195998918627e-05, "loss": 0.6094, "step": 24605 }, { "epoch": 0.7185296565013649, "grad_norm": 0.5255669581945015, "learning_rate": 4.22492565558259e-05, "loss": 0.5948, "step": 24610 }, { "epoch": 0.7186756397716821, "grad_norm": 0.4218206503994872, "learning_rate": 4.224655312246553e-05, "loss": 0.5785, "step": 24615 }, { "epoch": 0.7188216230419994, "grad_norm": 0.46630542538281056, "learning_rate": 4.224384968910516e-05, "loss": 0.626, "step": 24620 }, { "epoch": 0.7189676063123166, "grad_norm": 0.46410908223777025, "learning_rate": 4.22411462557448e-05, "loss": 0.5715, "step": 24625 }, { "epoch": 0.7191135895826338, "grad_norm": 0.4865415479503874, "learning_rate": 4.2238442822384424e-05, "loss": 0.6102, "step": 24630 }, { "epoch": 0.7192595728529511, "grad_norm": 0.45876189044058596, "learning_rate": 4.2235739389024065e-05, "loss": 0.5513, "step": 24635 }, { "epoch": 0.7194055561232683, "grad_norm": 0.49238344543452855, "learning_rate": 4.22330359556637e-05, "loss": 0.6017, "step": 24640 }, { "epoch": 0.7195515393935855, "grad_norm": 0.5339893958937911, "learning_rate": 4.2230332522303326e-05, "loss": 0.6182, "step": 24645 }, { "epoch": 0.7196975226639027, "grad_norm": 0.5054291460921172, "learning_rate": 4.222762908894296e-05, "loss": 0.606, "step": 24650 }, { "epoch": 0.71984350593422, "grad_norm": 0.517347075829241, "learning_rate": 4.2224925655582594e-05, "loss": 0.5464, "step": 24655 }, { "epoch": 0.7199894892045372, "grad_norm": 0.5047165754321212, "learning_rate": 4.222222222222222e-05, "loss": 0.6088, "step": 24660 }, { "epoch": 0.7201354724748544, "grad_norm": 0.4765624958923576, "learning_rate": 4.2219518788861856e-05, "loss": 0.568, "step": 24665 }, { "epoch": 0.7202814557451716, "grad_norm": 0.4711149379271621, "learning_rate": 4.221681535550149e-05, "loss": 0.6172, "step": 24670 }, { "epoch": 0.7204274390154888, "grad_norm": 0.4897649947506698, "learning_rate": 4.221411192214112e-05, "loss": 0.6233, "step": 24675 }, { "epoch": 0.720573422285806, "grad_norm": 0.48918066089970946, "learning_rate": 4.221140848878075e-05, "loss": 0.5634, "step": 24680 }, { "epoch": 0.7207194055561232, "grad_norm": 0.550303242190007, "learning_rate": 4.2208705055420385e-05, "loss": 0.6268, "step": 24685 }, { "epoch": 0.7208653888264405, "grad_norm": 0.493313528499143, "learning_rate": 4.220600162206002e-05, "loss": 0.6161, "step": 24690 }, { "epoch": 0.7210113720967577, "grad_norm": 0.47334533365772935, "learning_rate": 4.220329818869965e-05, "loss": 0.5968, "step": 24695 }, { "epoch": 0.7211573553670749, "grad_norm": 0.4896269484042467, "learning_rate": 4.220059475533929e-05, "loss": 0.5776, "step": 24700 }, { "epoch": 0.7213033386373922, "grad_norm": 0.5830331014839358, "learning_rate": 4.2197891321978914e-05, "loss": 0.6155, "step": 24705 }, { "epoch": 0.7214493219077094, "grad_norm": 0.465536864126198, "learning_rate": 4.219518788861855e-05, "loss": 0.5525, "step": 24710 }, { "epoch": 0.7215953051780266, "grad_norm": 0.47760665173050165, "learning_rate": 4.219248445525818e-05, "loss": 0.5845, "step": 24715 }, { "epoch": 0.7217412884483438, "grad_norm": 0.4848553318909875, "learning_rate": 4.218978102189781e-05, "loss": 0.5804, "step": 24720 }, { "epoch": 0.7218872717186611, "grad_norm": 0.5078080962089863, "learning_rate": 4.218707758853744e-05, "loss": 0.6056, "step": 24725 }, { "epoch": 0.7220332549889783, "grad_norm": 0.4728520168628585, "learning_rate": 4.218437415517708e-05, "loss": 0.6182, "step": 24730 }, { "epoch": 0.7221792382592955, "grad_norm": 0.4466511057065132, "learning_rate": 4.2181670721816705e-05, "loss": 0.5858, "step": 24735 }, { "epoch": 0.7223252215296128, "grad_norm": 0.46790903155476626, "learning_rate": 4.217896728845634e-05, "loss": 0.6007, "step": 24740 }, { "epoch": 0.7224712047999299, "grad_norm": 0.5242922454868851, "learning_rate": 4.217626385509598e-05, "loss": 0.6051, "step": 24745 }, { "epoch": 0.7226171880702471, "grad_norm": 0.5294501358949844, "learning_rate": 4.217356042173561e-05, "loss": 0.6102, "step": 24750 }, { "epoch": 0.7227631713405643, "grad_norm": 0.5066461733866396, "learning_rate": 4.217085698837524e-05, "loss": 0.6412, "step": 24755 }, { "epoch": 0.7229091546108816, "grad_norm": 0.5113678698885814, "learning_rate": 4.2168153555014875e-05, "loss": 0.5831, "step": 24760 }, { "epoch": 0.7230551378811988, "grad_norm": 0.48224806813209486, "learning_rate": 4.21654501216545e-05, "loss": 0.5644, "step": 24765 }, { "epoch": 0.723201121151516, "grad_norm": 0.46309459134139036, "learning_rate": 4.2162746688294136e-05, "loss": 0.5983, "step": 24770 }, { "epoch": 0.7233471044218333, "grad_norm": 0.4795629726809725, "learning_rate": 4.216004325493377e-05, "loss": 0.5737, "step": 24775 }, { "epoch": 0.7234930876921505, "grad_norm": 0.4703714988732238, "learning_rate": 4.21573398215734e-05, "loss": 0.6004, "step": 24780 }, { "epoch": 0.7236390709624677, "grad_norm": 0.47950603720706364, "learning_rate": 4.215463638821303e-05, "loss": 0.5792, "step": 24785 }, { "epoch": 0.7237850542327849, "grad_norm": 0.5206738297944766, "learning_rate": 4.2151932954852665e-05, "loss": 0.532, "step": 24790 }, { "epoch": 0.7239310375031022, "grad_norm": 0.5961971920945681, "learning_rate": 4.214922952149229e-05, "loss": 0.6164, "step": 24795 }, { "epoch": 0.7240770207734194, "grad_norm": 0.5524257622405722, "learning_rate": 4.2146526088131926e-05, "loss": 0.6206, "step": 24800 }, { "epoch": 0.7242230040437366, "grad_norm": 0.4986468526153715, "learning_rate": 4.214382265477157e-05, "loss": 0.5639, "step": 24805 }, { "epoch": 0.7243689873140539, "grad_norm": 0.4921065361342108, "learning_rate": 4.2141119221411194e-05, "loss": 0.6014, "step": 24810 }, { "epoch": 0.724514970584371, "grad_norm": 0.8082876843565175, "learning_rate": 4.213841578805083e-05, "loss": 0.6267, "step": 24815 }, { "epoch": 0.7246609538546882, "grad_norm": 0.556894566686059, "learning_rate": 4.213571235469046e-05, "loss": 0.6071, "step": 24820 }, { "epoch": 0.7248069371250054, "grad_norm": 0.5066805442456899, "learning_rate": 4.213300892133009e-05, "loss": 0.5909, "step": 24825 }, { "epoch": 0.7249529203953227, "grad_norm": 0.4646873634741852, "learning_rate": 4.2130305487969724e-05, "loss": 0.5877, "step": 24830 }, { "epoch": 0.7250989036656399, "grad_norm": 0.4545916431936843, "learning_rate": 4.212760205460936e-05, "loss": 0.5816, "step": 24835 }, { "epoch": 0.7252448869359571, "grad_norm": 0.5161281193526286, "learning_rate": 4.2124898621248985e-05, "loss": 0.5796, "step": 24840 }, { "epoch": 0.7253908702062744, "grad_norm": 0.5114288400169454, "learning_rate": 4.212219518788862e-05, "loss": 0.6056, "step": 24845 }, { "epoch": 0.7255368534765916, "grad_norm": 0.4811026825487915, "learning_rate": 4.211949175452825e-05, "loss": 0.5789, "step": 24850 }, { "epoch": 0.7256828367469088, "grad_norm": 0.48856997986821443, "learning_rate": 4.211678832116788e-05, "loss": 0.582, "step": 24855 }, { "epoch": 0.725828820017226, "grad_norm": 0.5099327649187019, "learning_rate": 4.211408488780752e-05, "loss": 0.5832, "step": 24860 }, { "epoch": 0.7259748032875433, "grad_norm": 0.5605781385481567, "learning_rate": 4.2111381454447155e-05, "loss": 0.5806, "step": 24865 }, { "epoch": 0.7261207865578605, "grad_norm": 0.45737453518803173, "learning_rate": 4.210867802108678e-05, "loss": 0.5586, "step": 24870 }, { "epoch": 0.7262667698281777, "grad_norm": 0.500848321075906, "learning_rate": 4.2105974587726416e-05, "loss": 0.5555, "step": 24875 }, { "epoch": 0.726412753098495, "grad_norm": 0.5188143149500105, "learning_rate": 4.210327115436605e-05, "loss": 0.5789, "step": 24880 }, { "epoch": 0.7265587363688122, "grad_norm": 0.4989351402336927, "learning_rate": 4.210056772100568e-05, "loss": 0.5797, "step": 24885 }, { "epoch": 0.7267047196391293, "grad_norm": 0.5687643699576566, "learning_rate": 4.209786428764531e-05, "loss": 0.6007, "step": 24890 }, { "epoch": 0.7268507029094465, "grad_norm": 0.479701850565727, "learning_rate": 4.2095160854284946e-05, "loss": 0.6288, "step": 24895 }, { "epoch": 0.7269966861797638, "grad_norm": 0.48788358918166463, "learning_rate": 4.209245742092457e-05, "loss": 0.5612, "step": 24900 }, { "epoch": 0.727142669450081, "grad_norm": 0.47890880472787617, "learning_rate": 4.208975398756421e-05, "loss": 0.5804, "step": 24905 }, { "epoch": 0.7272886527203982, "grad_norm": 0.4845609067273198, "learning_rate": 4.208705055420384e-05, "loss": 0.5751, "step": 24910 }, { "epoch": 0.7274346359907155, "grad_norm": 0.4894841711423139, "learning_rate": 4.2084347120843475e-05, "loss": 0.5952, "step": 24915 }, { "epoch": 0.7275806192610327, "grad_norm": 0.5311321538927718, "learning_rate": 4.208164368748311e-05, "loss": 0.5797, "step": 24920 }, { "epoch": 0.7277266025313499, "grad_norm": 0.5003481329573921, "learning_rate": 4.207894025412274e-05, "loss": 0.5893, "step": 24925 }, { "epoch": 0.7278725858016671, "grad_norm": 0.5256376404658065, "learning_rate": 4.207623682076237e-05, "loss": 0.5958, "step": 24930 }, { "epoch": 0.7280185690719844, "grad_norm": 0.5047654702159768, "learning_rate": 4.2073533387402004e-05, "loss": 0.5679, "step": 24935 }, { "epoch": 0.7281645523423016, "grad_norm": 0.4827130730130578, "learning_rate": 4.207082995404164e-05, "loss": 0.5598, "step": 24940 }, { "epoch": 0.7283105356126188, "grad_norm": 0.4356712229996355, "learning_rate": 4.2068126520681265e-05, "loss": 0.608, "step": 24945 }, { "epoch": 0.728456518882936, "grad_norm": 0.48878671303201204, "learning_rate": 4.20654230873209e-05, "loss": 0.6141, "step": 24950 }, { "epoch": 0.7286025021532533, "grad_norm": 0.4674936215120945, "learning_rate": 4.206271965396053e-05, "loss": 0.597, "step": 24955 }, { "epoch": 0.7287484854235705, "grad_norm": 0.45269510882163844, "learning_rate": 4.206001622060016e-05, "loss": 0.5713, "step": 24960 }, { "epoch": 0.7288944686938876, "grad_norm": 0.517676761935618, "learning_rate": 4.2057312787239795e-05, "loss": 0.6042, "step": 24965 }, { "epoch": 0.7290404519642049, "grad_norm": 0.5416172685908904, "learning_rate": 4.205460935387943e-05, "loss": 0.6277, "step": 24970 }, { "epoch": 0.7291864352345221, "grad_norm": 0.5181290667663853, "learning_rate": 4.205190592051906e-05, "loss": 0.5324, "step": 24975 }, { "epoch": 0.7293324185048393, "grad_norm": 0.4629351371227857, "learning_rate": 4.20492024871587e-05, "loss": 0.5697, "step": 24980 }, { "epoch": 0.7294784017751565, "grad_norm": 0.4633002736780155, "learning_rate": 4.2046499053798324e-05, "loss": 0.5448, "step": 24985 }, { "epoch": 0.7296243850454738, "grad_norm": 0.5536655652631722, "learning_rate": 4.204379562043796e-05, "loss": 0.6107, "step": 24990 }, { "epoch": 0.729770368315791, "grad_norm": 0.5105171269847276, "learning_rate": 4.204109218707759e-05, "loss": 0.6018, "step": 24995 }, { "epoch": 0.7299163515861082, "grad_norm": 0.5222841752906228, "learning_rate": 4.2038388753717226e-05, "loss": 0.5924, "step": 25000 }, { "epoch": 0.7300623348564255, "grad_norm": 0.4838999287643946, "learning_rate": 4.203568532035685e-05, "loss": 0.5718, "step": 25005 }, { "epoch": 0.7302083181267427, "grad_norm": 0.5271006308828909, "learning_rate": 4.203298188699649e-05, "loss": 0.6174, "step": 25010 }, { "epoch": 0.7303543013970599, "grad_norm": 0.528538415644494, "learning_rate": 4.203027845363612e-05, "loss": 0.5929, "step": 25015 }, { "epoch": 0.7305002846673772, "grad_norm": 0.5107852573184941, "learning_rate": 4.202757502027575e-05, "loss": 0.5746, "step": 25020 }, { "epoch": 0.7306462679376944, "grad_norm": 0.46631108669676924, "learning_rate": 4.202487158691538e-05, "loss": 0.5897, "step": 25025 }, { "epoch": 0.7307922512080116, "grad_norm": 0.48810530942008235, "learning_rate": 4.2022168153555016e-05, "loss": 0.5761, "step": 25030 }, { "epoch": 0.7309382344783287, "grad_norm": 0.5063959066093876, "learning_rate": 4.201946472019465e-05, "loss": 0.605, "step": 25035 }, { "epoch": 0.731084217748646, "grad_norm": 0.5494186725346651, "learning_rate": 4.2016761286834284e-05, "loss": 0.6099, "step": 25040 }, { "epoch": 0.7312302010189632, "grad_norm": 0.46887795844732816, "learning_rate": 4.201405785347391e-05, "loss": 0.6038, "step": 25045 }, { "epoch": 0.7313761842892804, "grad_norm": 0.44541032833317656, "learning_rate": 4.2011354420113546e-05, "loss": 0.5732, "step": 25050 }, { "epoch": 0.7315221675595976, "grad_norm": 0.49733331408682385, "learning_rate": 4.200865098675318e-05, "loss": 0.5711, "step": 25055 }, { "epoch": 0.7316681508299149, "grad_norm": 0.4770728612336073, "learning_rate": 4.2005947553392814e-05, "loss": 0.5637, "step": 25060 }, { "epoch": 0.7318141341002321, "grad_norm": 0.4565499049159063, "learning_rate": 4.200324412003244e-05, "loss": 0.5623, "step": 25065 }, { "epoch": 0.7319601173705493, "grad_norm": 0.5087457996909577, "learning_rate": 4.2000540686672075e-05, "loss": 0.5745, "step": 25070 }, { "epoch": 0.7321061006408666, "grad_norm": 0.48009267325043015, "learning_rate": 4.199783725331171e-05, "loss": 0.575, "step": 25075 }, { "epoch": 0.7322520839111838, "grad_norm": 0.47186509204929206, "learning_rate": 4.1995133819951336e-05, "loss": 0.6156, "step": 25080 }, { "epoch": 0.732398067181501, "grad_norm": 0.473218452929245, "learning_rate": 4.199243038659098e-05, "loss": 0.5887, "step": 25085 }, { "epoch": 0.7325440504518183, "grad_norm": 0.4778551517890215, "learning_rate": 4.1989726953230604e-05, "loss": 0.5899, "step": 25090 }, { "epoch": 0.7326900337221355, "grad_norm": 0.49457111127731485, "learning_rate": 4.198702351987024e-05, "loss": 0.5422, "step": 25095 }, { "epoch": 0.7328360169924527, "grad_norm": 0.48580742098652097, "learning_rate": 4.198432008650987e-05, "loss": 0.6059, "step": 25100 }, { "epoch": 0.7329820002627699, "grad_norm": 0.487473482322961, "learning_rate": 4.19816166531495e-05, "loss": 0.5877, "step": 25105 }, { "epoch": 0.7331279835330871, "grad_norm": 0.5658906510775153, "learning_rate": 4.1978913219789133e-05, "loss": 0.6172, "step": 25110 }, { "epoch": 0.7332739668034043, "grad_norm": 0.49917909513023007, "learning_rate": 4.197620978642877e-05, "loss": 0.5845, "step": 25115 }, { "epoch": 0.7334199500737215, "grad_norm": 0.47674995678298443, "learning_rate": 4.1973506353068395e-05, "loss": 0.5915, "step": 25120 }, { "epoch": 0.7335659333440387, "grad_norm": 0.4643416141327242, "learning_rate": 4.197080291970803e-05, "loss": 0.6074, "step": 25125 }, { "epoch": 0.733711916614356, "grad_norm": 0.5012953031156562, "learning_rate": 4.196809948634766e-05, "loss": 0.5518, "step": 25130 }, { "epoch": 0.7338578998846732, "grad_norm": 0.4880785566886002, "learning_rate": 4.19653960529873e-05, "loss": 0.6005, "step": 25135 }, { "epoch": 0.7340038831549904, "grad_norm": 0.4657222768091891, "learning_rate": 4.1962692619626924e-05, "loss": 0.5874, "step": 25140 }, { "epoch": 0.7341498664253077, "grad_norm": 0.4673303491301424, "learning_rate": 4.1959989186266565e-05, "loss": 0.5785, "step": 25145 }, { "epoch": 0.7342958496956249, "grad_norm": 0.5394812006323773, "learning_rate": 4.195728575290619e-05, "loss": 0.5724, "step": 25150 }, { "epoch": 0.7344418329659421, "grad_norm": 0.4679327870263159, "learning_rate": 4.1954582319545826e-05, "loss": 0.6159, "step": 25155 }, { "epoch": 0.7345878162362593, "grad_norm": 0.5344134084266441, "learning_rate": 4.195187888618546e-05, "loss": 0.6017, "step": 25160 }, { "epoch": 0.7347337995065766, "grad_norm": 0.5680163469842961, "learning_rate": 4.194917545282509e-05, "loss": 0.5861, "step": 25165 }, { "epoch": 0.7348797827768938, "grad_norm": 0.510367615601694, "learning_rate": 4.194647201946472e-05, "loss": 0.5745, "step": 25170 }, { "epoch": 0.735025766047211, "grad_norm": 0.4582318912169463, "learning_rate": 4.1943768586104355e-05, "loss": 0.5913, "step": 25175 }, { "epoch": 0.7351717493175282, "grad_norm": 0.5489791707451228, "learning_rate": 4.194106515274398e-05, "loss": 0.6344, "step": 25180 }, { "epoch": 0.7353177325878454, "grad_norm": 0.5440647591760162, "learning_rate": 4.1938361719383617e-05, "loss": 0.6008, "step": 25185 }, { "epoch": 0.7354637158581626, "grad_norm": 0.5060208090251899, "learning_rate": 4.193565828602325e-05, "loss": 0.5896, "step": 25190 }, { "epoch": 0.7356096991284798, "grad_norm": 0.5522810067755312, "learning_rate": 4.1932954852662885e-05, "loss": 0.612, "step": 25195 }, { "epoch": 0.7357556823987971, "grad_norm": 0.4946664351308666, "learning_rate": 4.193025141930252e-05, "loss": 0.5835, "step": 25200 }, { "epoch": 0.7359016656691143, "grad_norm": 0.4611264019203606, "learning_rate": 4.192754798594215e-05, "loss": 0.6131, "step": 25205 }, { "epoch": 0.7360476489394315, "grad_norm": 0.5232454661593609, "learning_rate": 4.192484455258178e-05, "loss": 0.5795, "step": 25210 }, { "epoch": 0.7361936322097488, "grad_norm": 0.49268789660629736, "learning_rate": 4.1922141119221414e-05, "loss": 0.6077, "step": 25215 }, { "epoch": 0.736339615480066, "grad_norm": 0.49416817567328775, "learning_rate": 4.191943768586105e-05, "loss": 0.5926, "step": 25220 }, { "epoch": 0.7364855987503832, "grad_norm": 0.48417369844777597, "learning_rate": 4.1916734252500675e-05, "loss": 0.5881, "step": 25225 }, { "epoch": 0.7366315820207004, "grad_norm": 0.5401895563169338, "learning_rate": 4.191403081914031e-05, "loss": 0.5994, "step": 25230 }, { "epoch": 0.7367775652910177, "grad_norm": 0.5402545198609529, "learning_rate": 4.191132738577994e-05, "loss": 0.5689, "step": 25235 }, { "epoch": 0.7369235485613349, "grad_norm": 0.47734543297752063, "learning_rate": 4.190862395241957e-05, "loss": 0.5929, "step": 25240 }, { "epoch": 0.7370695318316521, "grad_norm": 0.47001002417439086, "learning_rate": 4.1905920519059204e-05, "loss": 0.5737, "step": 25245 }, { "epoch": 0.7372155151019694, "grad_norm": 0.5114369459683783, "learning_rate": 4.190321708569884e-05, "loss": 0.6072, "step": 25250 }, { "epoch": 0.7373614983722865, "grad_norm": 0.46405468597182525, "learning_rate": 4.190051365233847e-05, "loss": 0.5789, "step": 25255 }, { "epoch": 0.7375074816426037, "grad_norm": 0.4999228018058665, "learning_rate": 4.1897810218978106e-05, "loss": 0.6285, "step": 25260 }, { "epoch": 0.7376534649129209, "grad_norm": 0.4572836403916218, "learning_rate": 4.189510678561774e-05, "loss": 0.5814, "step": 25265 }, { "epoch": 0.7377994481832382, "grad_norm": 0.5094608642441321, "learning_rate": 4.189240335225737e-05, "loss": 0.6035, "step": 25270 }, { "epoch": 0.7379454314535554, "grad_norm": 0.46503816397383163, "learning_rate": 4.1889699918897e-05, "loss": 0.5935, "step": 25275 }, { "epoch": 0.7380914147238726, "grad_norm": 0.5106605583979468, "learning_rate": 4.1886996485536636e-05, "loss": 0.6004, "step": 25280 }, { "epoch": 0.7382373979941899, "grad_norm": 0.459404825187436, "learning_rate": 4.188429305217626e-05, "loss": 0.5875, "step": 25285 }, { "epoch": 0.7383833812645071, "grad_norm": 0.44697066703596017, "learning_rate": 4.18815896188159e-05, "loss": 0.5846, "step": 25290 }, { "epoch": 0.7385293645348243, "grad_norm": 0.4824074580963812, "learning_rate": 4.187888618545553e-05, "loss": 0.5555, "step": 25295 }, { "epoch": 0.7386753478051415, "grad_norm": 0.4955340219886804, "learning_rate": 4.187618275209516e-05, "loss": 0.6048, "step": 25300 }, { "epoch": 0.7388213310754588, "grad_norm": 0.4894361163383171, "learning_rate": 4.187347931873479e-05, "loss": 0.5808, "step": 25305 }, { "epoch": 0.738967314345776, "grad_norm": 0.4529686407260849, "learning_rate": 4.187077588537443e-05, "loss": 0.5403, "step": 25310 }, { "epoch": 0.7391132976160932, "grad_norm": 0.5037848414863443, "learning_rate": 4.186807245201406e-05, "loss": 0.6105, "step": 25315 }, { "epoch": 0.7392592808864105, "grad_norm": 0.49552978436069567, "learning_rate": 4.1865369018653694e-05, "loss": 0.5898, "step": 25320 }, { "epoch": 0.7394052641567277, "grad_norm": 0.4879364467513596, "learning_rate": 4.186266558529333e-05, "loss": 0.5737, "step": 25325 }, { "epoch": 0.7395512474270448, "grad_norm": 0.5377695245032945, "learning_rate": 4.1859962151932955e-05, "loss": 0.562, "step": 25330 }, { "epoch": 0.739697230697362, "grad_norm": 0.5104951632586677, "learning_rate": 4.185725871857259e-05, "loss": 0.5838, "step": 25335 }, { "epoch": 0.7398432139676793, "grad_norm": 0.5297845194283689, "learning_rate": 4.1854555285212223e-05, "loss": 0.5937, "step": 25340 }, { "epoch": 0.7399891972379965, "grad_norm": 0.48541897689152674, "learning_rate": 4.185185185185185e-05, "loss": 0.5864, "step": 25345 }, { "epoch": 0.7401351805083137, "grad_norm": 0.46891835292811995, "learning_rate": 4.1849148418491485e-05, "loss": 0.5878, "step": 25350 }, { "epoch": 0.740281163778631, "grad_norm": 0.5063913214388721, "learning_rate": 4.184644498513112e-05, "loss": 0.5612, "step": 25355 }, { "epoch": 0.7404271470489482, "grad_norm": 0.49046245702472135, "learning_rate": 4.1843741551770746e-05, "loss": 0.5811, "step": 25360 }, { "epoch": 0.7405731303192654, "grad_norm": 0.4858788805563198, "learning_rate": 4.184103811841038e-05, "loss": 0.5634, "step": 25365 }, { "epoch": 0.7407191135895826, "grad_norm": 0.4906738873693868, "learning_rate": 4.183833468505002e-05, "loss": 0.5938, "step": 25370 }, { "epoch": 0.7408650968598999, "grad_norm": 0.5264052354511224, "learning_rate": 4.183563125168965e-05, "loss": 0.5968, "step": 25375 }, { "epoch": 0.7410110801302171, "grad_norm": 0.4950018182333427, "learning_rate": 4.183292781832928e-05, "loss": 0.594, "step": 25380 }, { "epoch": 0.7411570634005343, "grad_norm": 0.48525724002946385, "learning_rate": 4.1830224384968916e-05, "loss": 0.5992, "step": 25385 }, { "epoch": 0.7413030466708516, "grad_norm": 0.5072500473497461, "learning_rate": 4.182752095160854e-05, "loss": 0.5904, "step": 25390 }, { "epoch": 0.7414490299411688, "grad_norm": 0.5251322729833232, "learning_rate": 4.182481751824818e-05, "loss": 0.5803, "step": 25395 }, { "epoch": 0.7415950132114859, "grad_norm": 0.45371356348719527, "learning_rate": 4.182211408488781e-05, "loss": 0.5736, "step": 25400 }, { "epoch": 0.7417409964818031, "grad_norm": 0.48240448791303203, "learning_rate": 4.181941065152744e-05, "loss": 0.6052, "step": 25405 }, { "epoch": 0.7418869797521204, "grad_norm": 0.5162072995512325, "learning_rate": 4.181670721816707e-05, "loss": 0.5952, "step": 25410 }, { "epoch": 0.7420329630224376, "grad_norm": 0.540608054443836, "learning_rate": 4.1814003784806707e-05, "loss": 0.6002, "step": 25415 }, { "epoch": 0.7421789462927548, "grad_norm": 0.5170972947404973, "learning_rate": 4.1811300351446334e-05, "loss": 0.5537, "step": 25420 }, { "epoch": 0.7423249295630721, "grad_norm": 0.4469997545985801, "learning_rate": 4.1808596918085975e-05, "loss": 0.547, "step": 25425 }, { "epoch": 0.7424709128333893, "grad_norm": 0.5121257212619679, "learning_rate": 4.180589348472561e-05, "loss": 0.5683, "step": 25430 }, { "epoch": 0.7426168961037065, "grad_norm": 0.522359600320485, "learning_rate": 4.1803190051365236e-05, "loss": 0.5761, "step": 25435 }, { "epoch": 0.7427628793740237, "grad_norm": 0.47174983272183263, "learning_rate": 4.180048661800487e-05, "loss": 0.5946, "step": 25440 }, { "epoch": 0.742908862644341, "grad_norm": 0.5068665804597389, "learning_rate": 4.1797783184644504e-05, "loss": 0.6198, "step": 25445 }, { "epoch": 0.7430548459146582, "grad_norm": 0.4477821180983576, "learning_rate": 4.179507975128413e-05, "loss": 0.5571, "step": 25450 }, { "epoch": 0.7432008291849754, "grad_norm": 0.5265217607500585, "learning_rate": 4.1792376317923765e-05, "loss": 0.5899, "step": 25455 }, { "epoch": 0.7433468124552927, "grad_norm": 0.42954627220753977, "learning_rate": 4.17896728845634e-05, "loss": 0.5662, "step": 25460 }, { "epoch": 0.7434927957256099, "grad_norm": 0.4846068444132874, "learning_rate": 4.1786969451203026e-05, "loss": 0.602, "step": 25465 }, { "epoch": 0.7436387789959271, "grad_norm": 0.484249740387604, "learning_rate": 4.178426601784266e-05, "loss": 0.5758, "step": 25470 }, { "epoch": 0.7437847622662442, "grad_norm": 0.48243583941571816, "learning_rate": 4.1781562584482294e-05, "loss": 0.5939, "step": 25475 }, { "epoch": 0.7439307455365615, "grad_norm": 0.541726695522443, "learning_rate": 4.177885915112193e-05, "loss": 0.5893, "step": 25480 }, { "epoch": 0.7440767288068787, "grad_norm": 0.5072879345162659, "learning_rate": 4.177615571776156e-05, "loss": 0.6017, "step": 25485 }, { "epoch": 0.7442227120771959, "grad_norm": 0.49600622845067605, "learning_rate": 4.1773452284401196e-05, "loss": 0.574, "step": 25490 }, { "epoch": 0.7443686953475132, "grad_norm": 0.5270587244713041, "learning_rate": 4.1770748851040824e-05, "loss": 0.6512, "step": 25495 }, { "epoch": 0.7445146786178304, "grad_norm": 0.5108400981990427, "learning_rate": 4.176804541768046e-05, "loss": 0.5845, "step": 25500 }, { "epoch": 0.7446606618881476, "grad_norm": 0.5131971175876903, "learning_rate": 4.176534198432009e-05, "loss": 0.5773, "step": 25505 }, { "epoch": 0.7448066451584648, "grad_norm": 0.5338230310376951, "learning_rate": 4.176263855095972e-05, "loss": 0.5667, "step": 25510 }, { "epoch": 0.7449526284287821, "grad_norm": 0.4812907161516587, "learning_rate": 4.175993511759935e-05, "loss": 0.5865, "step": 25515 }, { "epoch": 0.7450986116990993, "grad_norm": 0.5428175250960217, "learning_rate": 4.175723168423899e-05, "loss": 0.5953, "step": 25520 }, { "epoch": 0.7452445949694165, "grad_norm": 0.49241845486493124, "learning_rate": 4.1754528250878614e-05, "loss": 0.5493, "step": 25525 }, { "epoch": 0.7453905782397338, "grad_norm": 0.4649850148202702, "learning_rate": 4.175182481751825e-05, "loss": 0.5782, "step": 25530 }, { "epoch": 0.745536561510051, "grad_norm": 0.48707562478504535, "learning_rate": 4.174912138415788e-05, "loss": 0.6029, "step": 25535 }, { "epoch": 0.7456825447803682, "grad_norm": 0.5634262325316857, "learning_rate": 4.1746417950797516e-05, "loss": 0.5912, "step": 25540 }, { "epoch": 0.7458285280506854, "grad_norm": 0.5449476497448914, "learning_rate": 4.174371451743715e-05, "loss": 0.5929, "step": 25545 }, { "epoch": 0.7459745113210026, "grad_norm": 0.482414462250719, "learning_rate": 4.1741011084076784e-05, "loss": 0.6383, "step": 25550 }, { "epoch": 0.7461204945913198, "grad_norm": 0.48819321773488006, "learning_rate": 4.173830765071641e-05, "loss": 0.575, "step": 25555 }, { "epoch": 0.746266477861637, "grad_norm": 0.4684996848936785, "learning_rate": 4.1735604217356045e-05, "loss": 0.601, "step": 25560 }, { "epoch": 0.7464124611319543, "grad_norm": 0.4902006146971998, "learning_rate": 4.173290078399568e-05, "loss": 0.5929, "step": 25565 }, { "epoch": 0.7465584444022715, "grad_norm": 0.5035969635322302, "learning_rate": 4.173019735063531e-05, "loss": 0.5775, "step": 25570 }, { "epoch": 0.7467044276725887, "grad_norm": 0.5146677003196944, "learning_rate": 4.172749391727494e-05, "loss": 0.5715, "step": 25575 }, { "epoch": 0.7468504109429059, "grad_norm": 0.49745186963612076, "learning_rate": 4.1724790483914575e-05, "loss": 0.5683, "step": 25580 }, { "epoch": 0.7469963942132232, "grad_norm": 0.4442777861657574, "learning_rate": 4.17220870505542e-05, "loss": 0.5784, "step": 25585 }, { "epoch": 0.7471423774835404, "grad_norm": 0.4862864292324429, "learning_rate": 4.1719383617193836e-05, "loss": 0.5632, "step": 25590 }, { "epoch": 0.7472883607538576, "grad_norm": 0.4544775945847276, "learning_rate": 4.171668018383347e-05, "loss": 0.5631, "step": 25595 }, { "epoch": 0.7474343440241749, "grad_norm": 0.4684243489517284, "learning_rate": 4.1713976750473104e-05, "loss": 0.5996, "step": 25600 }, { "epoch": 0.7475803272944921, "grad_norm": 0.5639540403700919, "learning_rate": 4.171127331711274e-05, "loss": 0.6055, "step": 25605 }, { "epoch": 0.7477263105648093, "grad_norm": 0.4912807361103324, "learning_rate": 4.170856988375237e-05, "loss": 0.5728, "step": 25610 }, { "epoch": 0.7478722938351265, "grad_norm": 0.46680123350724745, "learning_rate": 4.1705866450392e-05, "loss": 0.6057, "step": 25615 }, { "epoch": 0.7480182771054437, "grad_norm": 0.4804941253541106, "learning_rate": 4.170316301703163e-05, "loss": 0.5781, "step": 25620 }, { "epoch": 0.7481642603757609, "grad_norm": 0.5103122449670606, "learning_rate": 4.170045958367127e-05, "loss": 0.5893, "step": 25625 }, { "epoch": 0.7483102436460781, "grad_norm": 0.5263325994029855, "learning_rate": 4.1697756150310894e-05, "loss": 0.5649, "step": 25630 }, { "epoch": 0.7484562269163954, "grad_norm": 0.5457228085786663, "learning_rate": 4.169505271695053e-05, "loss": 0.6073, "step": 25635 }, { "epoch": 0.7486022101867126, "grad_norm": 0.5130507388049087, "learning_rate": 4.169234928359016e-05, "loss": 0.5865, "step": 25640 }, { "epoch": 0.7487481934570298, "grad_norm": 0.5093765892825546, "learning_rate": 4.168964585022979e-05, "loss": 0.5948, "step": 25645 }, { "epoch": 0.748894176727347, "grad_norm": 0.46617742621276814, "learning_rate": 4.168694241686943e-05, "loss": 0.5761, "step": 25650 }, { "epoch": 0.7490401599976643, "grad_norm": 0.48105441532322374, "learning_rate": 4.168423898350906e-05, "loss": 0.5838, "step": 25655 }, { "epoch": 0.7491861432679815, "grad_norm": 0.5139881393111243, "learning_rate": 4.168153555014869e-05, "loss": 0.5918, "step": 25660 }, { "epoch": 0.7493321265382987, "grad_norm": 0.4858566409384557, "learning_rate": 4.1678832116788326e-05, "loss": 0.6043, "step": 25665 }, { "epoch": 0.749478109808616, "grad_norm": 0.5002585735482595, "learning_rate": 4.167612868342795e-05, "loss": 0.6042, "step": 25670 }, { "epoch": 0.7496240930789332, "grad_norm": 0.5436984140670557, "learning_rate": 4.167342525006759e-05, "loss": 0.5628, "step": 25675 }, { "epoch": 0.7497700763492504, "grad_norm": 0.5100673079373046, "learning_rate": 4.167072181670722e-05, "loss": 0.5566, "step": 25680 }, { "epoch": 0.7499160596195676, "grad_norm": 0.49171101227957725, "learning_rate": 4.1668018383346855e-05, "loss": 0.5684, "step": 25685 }, { "epoch": 0.7500620428898849, "grad_norm": 0.5055320020454558, "learning_rate": 4.166531494998648e-05, "loss": 0.5852, "step": 25690 }, { "epoch": 0.750208026160202, "grad_norm": 0.4956557080671682, "learning_rate": 4.1662611516626116e-05, "loss": 0.6274, "step": 25695 }, { "epoch": 0.7503540094305192, "grad_norm": 0.4889553105792674, "learning_rate": 4.165990808326575e-05, "loss": 0.5959, "step": 25700 }, { "epoch": 0.7504999927008364, "grad_norm": 0.5011732316671158, "learning_rate": 4.165720464990538e-05, "loss": 0.5723, "step": 25705 }, { "epoch": 0.7506459759711537, "grad_norm": 0.46527674404769903, "learning_rate": 4.165450121654502e-05, "loss": 0.5863, "step": 25710 }, { "epoch": 0.7507919592414709, "grad_norm": 0.4913058580278416, "learning_rate": 4.1651797783184646e-05, "loss": 0.5952, "step": 25715 }, { "epoch": 0.7509379425117881, "grad_norm": 0.491339709828053, "learning_rate": 4.164909434982428e-05, "loss": 0.5718, "step": 25720 }, { "epoch": 0.7510839257821054, "grad_norm": 0.5232459594953106, "learning_rate": 4.1646390916463914e-05, "loss": 0.5993, "step": 25725 }, { "epoch": 0.7512299090524226, "grad_norm": 0.49953766128929694, "learning_rate": 4.164368748310354e-05, "loss": 0.6141, "step": 25730 }, { "epoch": 0.7513758923227398, "grad_norm": 0.4720820565681962, "learning_rate": 4.1640984049743175e-05, "loss": 0.5959, "step": 25735 }, { "epoch": 0.751521875593057, "grad_norm": 0.5227086698383504, "learning_rate": 4.163828061638281e-05, "loss": 0.6122, "step": 25740 }, { "epoch": 0.7516678588633743, "grad_norm": 0.4789021368755548, "learning_rate": 4.163557718302244e-05, "loss": 0.5864, "step": 25745 }, { "epoch": 0.7518138421336915, "grad_norm": 0.5068097726049455, "learning_rate": 4.163287374966207e-05, "loss": 0.5825, "step": 25750 }, { "epoch": 0.7519598254040087, "grad_norm": 0.44897514427046903, "learning_rate": 4.1630170316301704e-05, "loss": 0.574, "step": 25755 }, { "epoch": 0.752105808674326, "grad_norm": 0.6336978250337851, "learning_rate": 4.162746688294134e-05, "loss": 0.6183, "step": 25760 }, { "epoch": 0.7522517919446431, "grad_norm": 0.4820728210288141, "learning_rate": 4.162476344958097e-05, "loss": 0.5852, "step": 25765 }, { "epoch": 0.7523977752149603, "grad_norm": 0.5211779139905074, "learning_rate": 4.1622060016220606e-05, "loss": 0.6038, "step": 25770 }, { "epoch": 0.7525437584852775, "grad_norm": 0.510403538244019, "learning_rate": 4.161935658286023e-05, "loss": 0.6277, "step": 25775 }, { "epoch": 0.7526897417555948, "grad_norm": 0.49527576727251094, "learning_rate": 4.161665314949987e-05, "loss": 0.5684, "step": 25780 }, { "epoch": 0.752835725025912, "grad_norm": 0.47886496433034514, "learning_rate": 4.16139497161395e-05, "loss": 0.5747, "step": 25785 }, { "epoch": 0.7529817082962292, "grad_norm": 0.5266101310901199, "learning_rate": 4.161124628277913e-05, "loss": 0.594, "step": 25790 }, { "epoch": 0.7531276915665465, "grad_norm": 0.5145723876828383, "learning_rate": 4.160854284941876e-05, "loss": 0.5496, "step": 25795 }, { "epoch": 0.7532736748368637, "grad_norm": 0.5138544054908135, "learning_rate": 4.16058394160584e-05, "loss": 0.577, "step": 25800 }, { "epoch": 0.7534196581071809, "grad_norm": 0.5273928667990163, "learning_rate": 4.1603135982698024e-05, "loss": 0.593, "step": 25805 }, { "epoch": 0.7535656413774982, "grad_norm": 0.5604103603146062, "learning_rate": 4.160043254933766e-05, "loss": 0.6023, "step": 25810 }, { "epoch": 0.7537116246478154, "grad_norm": 0.4590813776487784, "learning_rate": 4.159772911597729e-05, "loss": 0.5693, "step": 25815 }, { "epoch": 0.7538576079181326, "grad_norm": 0.4844316664895673, "learning_rate": 4.1595025682616926e-05, "loss": 0.5586, "step": 25820 }, { "epoch": 0.7540035911884498, "grad_norm": 0.51100266771022, "learning_rate": 4.159232224925656e-05, "loss": 0.5468, "step": 25825 }, { "epoch": 0.7541495744587671, "grad_norm": 0.5418768389798215, "learning_rate": 4.1589618815896194e-05, "loss": 0.5845, "step": 25830 }, { "epoch": 0.7542955577290843, "grad_norm": 0.5267004245495662, "learning_rate": 4.158691538253582e-05, "loss": 0.6021, "step": 25835 }, { "epoch": 0.7544415409994014, "grad_norm": 0.4962692696831332, "learning_rate": 4.1584211949175455e-05, "loss": 0.5717, "step": 25840 }, { "epoch": 0.7545875242697186, "grad_norm": 0.5229117446123686, "learning_rate": 4.158150851581509e-05, "loss": 0.5837, "step": 25845 }, { "epoch": 0.7547335075400359, "grad_norm": 0.46267701741950534, "learning_rate": 4.1578805082454716e-05, "loss": 0.5492, "step": 25850 }, { "epoch": 0.7548794908103531, "grad_norm": 0.4945220866276884, "learning_rate": 4.157610164909435e-05, "loss": 0.5834, "step": 25855 }, { "epoch": 0.7550254740806703, "grad_norm": 0.5060746555730417, "learning_rate": 4.1573398215733984e-05, "loss": 0.6084, "step": 25860 }, { "epoch": 0.7551714573509876, "grad_norm": 0.5079350278186799, "learning_rate": 4.157069478237361e-05, "loss": 0.5848, "step": 25865 }, { "epoch": 0.7553174406213048, "grad_norm": 0.43815583818381004, "learning_rate": 4.1567991349013246e-05, "loss": 0.5748, "step": 25870 }, { "epoch": 0.755463423891622, "grad_norm": 0.5036979778702373, "learning_rate": 4.156528791565288e-05, "loss": 0.5641, "step": 25875 }, { "epoch": 0.7556094071619393, "grad_norm": 0.49701862290131943, "learning_rate": 4.1562584482292514e-05, "loss": 0.5793, "step": 25880 }, { "epoch": 0.7557553904322565, "grad_norm": 0.5024628930974073, "learning_rate": 4.155988104893215e-05, "loss": 0.5656, "step": 25885 }, { "epoch": 0.7559013737025737, "grad_norm": 0.4616673390596318, "learning_rate": 4.155717761557178e-05, "loss": 0.5772, "step": 25890 }, { "epoch": 0.7560473569728909, "grad_norm": 0.5156096002373076, "learning_rate": 4.155447418221141e-05, "loss": 0.5685, "step": 25895 }, { "epoch": 0.7561933402432082, "grad_norm": 0.48848948178995527, "learning_rate": 4.155177074885104e-05, "loss": 0.5926, "step": 25900 }, { "epoch": 0.7563393235135254, "grad_norm": 0.530747696050957, "learning_rate": 4.154906731549068e-05, "loss": 0.576, "step": 25905 }, { "epoch": 0.7564853067838426, "grad_norm": 0.45860004594737735, "learning_rate": 4.1546363882130304e-05, "loss": 0.5853, "step": 25910 }, { "epoch": 0.7566312900541597, "grad_norm": 0.517897681563022, "learning_rate": 4.154366044876994e-05, "loss": 0.6119, "step": 25915 }, { "epoch": 0.756777273324477, "grad_norm": 0.4806237519809067, "learning_rate": 4.154095701540957e-05, "loss": 0.5776, "step": 25920 }, { "epoch": 0.7569232565947942, "grad_norm": 0.49834200661545475, "learning_rate": 4.15382535820492e-05, "loss": 0.5603, "step": 25925 }, { "epoch": 0.7570692398651114, "grad_norm": 0.5144325076733146, "learning_rate": 4.1535550148688834e-05, "loss": 0.5676, "step": 25930 }, { "epoch": 0.7572152231354287, "grad_norm": 0.4941684524746624, "learning_rate": 4.1532846715328474e-05, "loss": 0.5643, "step": 25935 }, { "epoch": 0.7573612064057459, "grad_norm": 0.44633401156860225, "learning_rate": 4.15301432819681e-05, "loss": 0.5752, "step": 25940 }, { "epoch": 0.7575071896760631, "grad_norm": 0.4902938648435875, "learning_rate": 4.1527439848607736e-05, "loss": 0.5814, "step": 25945 }, { "epoch": 0.7576531729463803, "grad_norm": 0.4927917144609313, "learning_rate": 4.152473641524737e-05, "loss": 0.5786, "step": 25950 }, { "epoch": 0.7577991562166976, "grad_norm": 0.5291347534728377, "learning_rate": 4.1522032981887e-05, "loss": 0.5725, "step": 25955 }, { "epoch": 0.7579451394870148, "grad_norm": 0.5589032331936836, "learning_rate": 4.151932954852663e-05, "loss": 0.6171, "step": 25960 }, { "epoch": 0.758091122757332, "grad_norm": 0.4651436930386596, "learning_rate": 4.1516626115166265e-05, "loss": 0.5689, "step": 25965 }, { "epoch": 0.7582371060276493, "grad_norm": 0.4721745951195607, "learning_rate": 4.151392268180589e-05, "loss": 0.5815, "step": 25970 }, { "epoch": 0.7583830892979665, "grad_norm": 0.5174122614039061, "learning_rate": 4.1511219248445526e-05, "loss": 0.6021, "step": 25975 }, { "epoch": 0.7585290725682837, "grad_norm": 0.4854255186294267, "learning_rate": 4.150851581508516e-05, "loss": 0.5837, "step": 25980 }, { "epoch": 0.7586750558386008, "grad_norm": 0.4967581508882233, "learning_rate": 4.150581238172479e-05, "loss": 0.5574, "step": 25985 }, { "epoch": 0.7588210391089181, "grad_norm": 0.48187168579445633, "learning_rate": 4.150310894836443e-05, "loss": 0.5747, "step": 25990 }, { "epoch": 0.7589670223792353, "grad_norm": 0.49290269933736336, "learning_rate": 4.150040551500406e-05, "loss": 0.6086, "step": 25995 }, { "epoch": 0.7591130056495525, "grad_norm": 0.46469528794545684, "learning_rate": 4.149770208164369e-05, "loss": 0.5768, "step": 26000 }, { "epoch": 0.7592589889198698, "grad_norm": 0.49280296388048495, "learning_rate": 4.149499864828332e-05, "loss": 0.5842, "step": 26005 }, { "epoch": 0.759404972190187, "grad_norm": 0.5043721244806632, "learning_rate": 4.149229521492296e-05, "loss": 0.5834, "step": 26010 }, { "epoch": 0.7595509554605042, "grad_norm": 0.4780790173168188, "learning_rate": 4.1489591781562585e-05, "loss": 0.5804, "step": 26015 }, { "epoch": 0.7596969387308214, "grad_norm": 0.46518806876123675, "learning_rate": 4.148688834820222e-05, "loss": 0.5811, "step": 26020 }, { "epoch": 0.7598429220011387, "grad_norm": 0.5254486776440798, "learning_rate": 4.148418491484185e-05, "loss": 0.5992, "step": 26025 }, { "epoch": 0.7599889052714559, "grad_norm": 0.5055542514911764, "learning_rate": 4.148148148148148e-05, "loss": 0.5674, "step": 26030 }, { "epoch": 0.7601348885417731, "grad_norm": 0.5170220773759411, "learning_rate": 4.1478778048121114e-05, "loss": 0.6014, "step": 26035 }, { "epoch": 0.7602808718120904, "grad_norm": 0.422587591004047, "learning_rate": 4.147607461476075e-05, "loss": 0.5503, "step": 26040 }, { "epoch": 0.7604268550824076, "grad_norm": 0.5327815326034259, "learning_rate": 4.1473371181400375e-05, "loss": 0.5911, "step": 26045 }, { "epoch": 0.7605728383527248, "grad_norm": 0.5086931474458067, "learning_rate": 4.1470667748040016e-05, "loss": 0.561, "step": 26050 }, { "epoch": 0.760718821623042, "grad_norm": 0.48256869415693104, "learning_rate": 4.146796431467965e-05, "loss": 0.5706, "step": 26055 }, { "epoch": 0.7608648048933592, "grad_norm": 0.47960047209748097, "learning_rate": 4.146526088131928e-05, "loss": 0.572, "step": 26060 }, { "epoch": 0.7610107881636764, "grad_norm": 0.5054330313999934, "learning_rate": 4.146255744795891e-05, "loss": 0.5927, "step": 26065 }, { "epoch": 0.7611567714339936, "grad_norm": 0.5184217353560107, "learning_rate": 4.1459854014598545e-05, "loss": 0.5851, "step": 26070 }, { "epoch": 0.7613027547043109, "grad_norm": 0.497416058427207, "learning_rate": 4.145715058123817e-05, "loss": 0.5732, "step": 26075 }, { "epoch": 0.7614487379746281, "grad_norm": 0.49589441062035017, "learning_rate": 4.1454447147877806e-05, "loss": 0.5821, "step": 26080 }, { "epoch": 0.7615947212449453, "grad_norm": 0.5378329087822431, "learning_rate": 4.145174371451744e-05, "loss": 0.5773, "step": 26085 }, { "epoch": 0.7617407045152625, "grad_norm": 0.503145126451262, "learning_rate": 4.144904028115707e-05, "loss": 0.5959, "step": 26090 }, { "epoch": 0.7618866877855798, "grad_norm": 0.4635182524198598, "learning_rate": 4.14463368477967e-05, "loss": 0.5575, "step": 26095 }, { "epoch": 0.762032671055897, "grad_norm": 0.5422451079400671, "learning_rate": 4.1443633414436336e-05, "loss": 0.5853, "step": 26100 }, { "epoch": 0.7621786543262142, "grad_norm": 0.4347912850708402, "learning_rate": 4.144092998107597e-05, "loss": 0.5181, "step": 26105 }, { "epoch": 0.7623246375965315, "grad_norm": 0.4922138552543473, "learning_rate": 4.1438226547715604e-05, "loss": 0.592, "step": 26110 }, { "epoch": 0.7624706208668487, "grad_norm": 0.4448497561692818, "learning_rate": 4.143552311435524e-05, "loss": 0.5654, "step": 26115 }, { "epoch": 0.7626166041371659, "grad_norm": 0.4463025995291245, "learning_rate": 4.1432819680994865e-05, "loss": 0.5483, "step": 26120 }, { "epoch": 0.7627625874074832, "grad_norm": 0.4951662044378599, "learning_rate": 4.14301162476345e-05, "loss": 0.592, "step": 26125 }, { "epoch": 0.7629085706778004, "grad_norm": 0.4809861023952368, "learning_rate": 4.142741281427413e-05, "loss": 0.5731, "step": 26130 }, { "epoch": 0.7630545539481175, "grad_norm": 0.468014738565548, "learning_rate": 4.142470938091376e-05, "loss": 0.5783, "step": 26135 }, { "epoch": 0.7632005372184347, "grad_norm": 0.5267291713855691, "learning_rate": 4.1422005947553394e-05, "loss": 0.5778, "step": 26140 }, { "epoch": 0.763346520488752, "grad_norm": 0.5079059478723338, "learning_rate": 4.141930251419303e-05, "loss": 0.5552, "step": 26145 }, { "epoch": 0.7634925037590692, "grad_norm": 0.4960868979138642, "learning_rate": 4.1416599080832655e-05, "loss": 0.5851, "step": 26150 }, { "epoch": 0.7636384870293864, "grad_norm": 0.49611489088058636, "learning_rate": 4.141389564747229e-05, "loss": 0.5591, "step": 26155 }, { "epoch": 0.7637844702997036, "grad_norm": 0.4956513348557756, "learning_rate": 4.1411192214111923e-05, "loss": 0.5611, "step": 26160 }, { "epoch": 0.7639304535700209, "grad_norm": 0.4787444589593543, "learning_rate": 4.140848878075156e-05, "loss": 0.5553, "step": 26165 }, { "epoch": 0.7640764368403381, "grad_norm": 0.5272523572863869, "learning_rate": 4.140578534739119e-05, "loss": 0.6023, "step": 26170 }, { "epoch": 0.7642224201106553, "grad_norm": 0.5180393680895348, "learning_rate": 4.1403081914030826e-05, "loss": 0.5984, "step": 26175 }, { "epoch": 0.7643684033809726, "grad_norm": 0.4752549956926962, "learning_rate": 4.140037848067045e-05, "loss": 0.5446, "step": 26180 }, { "epoch": 0.7645143866512898, "grad_norm": 0.5491368124752726, "learning_rate": 4.139767504731009e-05, "loss": 0.5893, "step": 26185 }, { "epoch": 0.764660369921607, "grad_norm": 0.5209329047140406, "learning_rate": 4.139497161394972e-05, "loss": 0.6334, "step": 26190 }, { "epoch": 0.7648063531919242, "grad_norm": 0.5186631721272277, "learning_rate": 4.139226818058935e-05, "loss": 0.6139, "step": 26195 }, { "epoch": 0.7649523364622415, "grad_norm": 0.4810759817459812, "learning_rate": 4.138956474722898e-05, "loss": 0.5863, "step": 26200 }, { "epoch": 0.7650983197325586, "grad_norm": 0.5400145784350999, "learning_rate": 4.1386861313868616e-05, "loss": 0.6087, "step": 26205 }, { "epoch": 0.7652443030028758, "grad_norm": 0.536714499852345, "learning_rate": 4.138415788050824e-05, "loss": 0.6061, "step": 26210 }, { "epoch": 0.765390286273193, "grad_norm": 0.48550336752784173, "learning_rate": 4.138145444714788e-05, "loss": 0.5862, "step": 26215 }, { "epoch": 0.7655362695435103, "grad_norm": 0.5049704441908872, "learning_rate": 4.137875101378751e-05, "loss": 0.5765, "step": 26220 }, { "epoch": 0.7656822528138275, "grad_norm": 0.4461808437166648, "learning_rate": 4.1376047580427145e-05, "loss": 0.5699, "step": 26225 }, { "epoch": 0.7658282360841447, "grad_norm": 0.5035359530787099, "learning_rate": 4.137334414706678e-05, "loss": 0.6172, "step": 26230 }, { "epoch": 0.765974219354462, "grad_norm": 0.5100161656968669, "learning_rate": 4.137064071370641e-05, "loss": 0.611, "step": 26235 }, { "epoch": 0.7661202026247792, "grad_norm": 0.4616744632446721, "learning_rate": 4.136793728034604e-05, "loss": 0.5718, "step": 26240 }, { "epoch": 0.7662661858950964, "grad_norm": 0.4540095619220706, "learning_rate": 4.1365233846985675e-05, "loss": 0.5665, "step": 26245 }, { "epoch": 0.7664121691654137, "grad_norm": 0.46777968319183305, "learning_rate": 4.136253041362531e-05, "loss": 0.5702, "step": 26250 }, { "epoch": 0.7665581524357309, "grad_norm": 0.4825706562279013, "learning_rate": 4.1359826980264936e-05, "loss": 0.5951, "step": 26255 }, { "epoch": 0.7667041357060481, "grad_norm": 0.5097790267104445, "learning_rate": 4.135712354690457e-05, "loss": 0.598, "step": 26260 }, { "epoch": 0.7668501189763653, "grad_norm": 0.4901041919390578, "learning_rate": 4.1354420113544204e-05, "loss": 0.608, "step": 26265 }, { "epoch": 0.7669961022466826, "grad_norm": 0.4736400588504933, "learning_rate": 4.135171668018383e-05, "loss": 0.5779, "step": 26270 }, { "epoch": 0.7671420855169998, "grad_norm": 0.5126012744673945, "learning_rate": 4.134901324682347e-05, "loss": 0.549, "step": 26275 }, { "epoch": 0.7672880687873169, "grad_norm": 0.4478995934786695, "learning_rate": 4.13463098134631e-05, "loss": 0.5852, "step": 26280 }, { "epoch": 0.7674340520576342, "grad_norm": 0.4791743507112136, "learning_rate": 4.134360638010273e-05, "loss": 0.563, "step": 26285 }, { "epoch": 0.7675800353279514, "grad_norm": 0.4774520920053946, "learning_rate": 4.134090294674237e-05, "loss": 0.589, "step": 26290 }, { "epoch": 0.7677260185982686, "grad_norm": 0.4971380418470573, "learning_rate": 4.1338199513381994e-05, "loss": 0.6154, "step": 26295 }, { "epoch": 0.7678720018685858, "grad_norm": 0.4849252292007612, "learning_rate": 4.133549608002163e-05, "loss": 0.5946, "step": 26300 }, { "epoch": 0.7680179851389031, "grad_norm": 0.4541839034082382, "learning_rate": 4.133279264666126e-05, "loss": 0.6325, "step": 26305 }, { "epoch": 0.7681639684092203, "grad_norm": 0.4709660548484722, "learning_rate": 4.1330089213300896e-05, "loss": 0.6175, "step": 26310 }, { "epoch": 0.7683099516795375, "grad_norm": 0.4656797113987024, "learning_rate": 4.1327385779940524e-05, "loss": 0.5697, "step": 26315 }, { "epoch": 0.7684559349498548, "grad_norm": 0.4967254717970566, "learning_rate": 4.132468234658016e-05, "loss": 0.5662, "step": 26320 }, { "epoch": 0.768601918220172, "grad_norm": 0.48920799021521966, "learning_rate": 4.132197891321979e-05, "loss": 0.5588, "step": 26325 }, { "epoch": 0.7687479014904892, "grad_norm": 0.5094085192201111, "learning_rate": 4.1319275479859426e-05, "loss": 0.5736, "step": 26330 }, { "epoch": 0.7688938847608064, "grad_norm": 0.5167126627003312, "learning_rate": 4.131657204649906e-05, "loss": 0.5718, "step": 26335 }, { "epoch": 0.7690398680311237, "grad_norm": 0.5200277107240768, "learning_rate": 4.131386861313869e-05, "loss": 0.5819, "step": 26340 }, { "epoch": 0.7691858513014409, "grad_norm": 0.5101520036355122, "learning_rate": 4.131116517977832e-05, "loss": 0.5837, "step": 26345 }, { "epoch": 0.769331834571758, "grad_norm": 0.49364855688356024, "learning_rate": 4.1308461746417955e-05, "loss": 0.5986, "step": 26350 }, { "epoch": 0.7694778178420753, "grad_norm": 0.5989039190591418, "learning_rate": 4.130575831305758e-05, "loss": 0.613, "step": 26355 }, { "epoch": 0.7696238011123925, "grad_norm": 1.024139094206328, "learning_rate": 4.1303054879697216e-05, "loss": 0.644, "step": 26360 }, { "epoch": 0.7697697843827097, "grad_norm": 0.5138553321073329, "learning_rate": 4.130035144633685e-05, "loss": 0.5934, "step": 26365 }, { "epoch": 0.7699157676530269, "grad_norm": 0.528321744455043, "learning_rate": 4.1297648012976484e-05, "loss": 0.5978, "step": 26370 }, { "epoch": 0.7700617509233442, "grad_norm": 0.4540808699923226, "learning_rate": 4.129494457961611e-05, "loss": 0.5654, "step": 26375 }, { "epoch": 0.7702077341936614, "grad_norm": 0.45147628260068573, "learning_rate": 4.1292241146255745e-05, "loss": 0.6135, "step": 26380 }, { "epoch": 0.7703537174639786, "grad_norm": 0.49080841288156357, "learning_rate": 4.128953771289538e-05, "loss": 0.6086, "step": 26385 }, { "epoch": 0.7704997007342959, "grad_norm": 0.46260898382743687, "learning_rate": 4.1286834279535013e-05, "loss": 0.5694, "step": 26390 }, { "epoch": 0.7706456840046131, "grad_norm": 0.5230915450937775, "learning_rate": 4.128413084617465e-05, "loss": 0.5885, "step": 26395 }, { "epoch": 0.7707916672749303, "grad_norm": 0.5321354499245736, "learning_rate": 4.1281427412814275e-05, "loss": 0.597, "step": 26400 }, { "epoch": 0.7709376505452475, "grad_norm": 0.4970574816323529, "learning_rate": 4.127872397945391e-05, "loss": 0.5778, "step": 26405 }, { "epoch": 0.7710836338155648, "grad_norm": 0.47161316880291826, "learning_rate": 4.127602054609354e-05, "loss": 0.6107, "step": 26410 }, { "epoch": 0.771229617085882, "grad_norm": 0.5546582399411953, "learning_rate": 4.127331711273317e-05, "loss": 0.6411, "step": 26415 }, { "epoch": 0.7713756003561992, "grad_norm": 0.4972396068255572, "learning_rate": 4.1270613679372804e-05, "loss": 0.5811, "step": 26420 }, { "epoch": 0.7715215836265163, "grad_norm": 0.4900427566029866, "learning_rate": 4.126791024601244e-05, "loss": 0.5912, "step": 26425 }, { "epoch": 0.7716675668968336, "grad_norm": 0.4862495267855322, "learning_rate": 4.1265206812652065e-05, "loss": 0.6013, "step": 26430 }, { "epoch": 0.7718135501671508, "grad_norm": 0.47288843904239064, "learning_rate": 4.12625033792917e-05, "loss": 0.5862, "step": 26435 }, { "epoch": 0.771959533437468, "grad_norm": 0.47679961401927623, "learning_rate": 4.125979994593133e-05, "loss": 0.5998, "step": 26440 }, { "epoch": 0.7721055167077853, "grad_norm": 0.4937380968646177, "learning_rate": 4.125709651257097e-05, "loss": 0.5661, "step": 26445 }, { "epoch": 0.7722514999781025, "grad_norm": 0.474004835079195, "learning_rate": 4.12543930792106e-05, "loss": 0.5856, "step": 26450 }, { "epoch": 0.7723974832484197, "grad_norm": 0.5099525475717221, "learning_rate": 4.1251689645850235e-05, "loss": 0.5952, "step": 26455 }, { "epoch": 0.772543466518737, "grad_norm": 0.5077217599269555, "learning_rate": 4.124898621248986e-05, "loss": 0.6115, "step": 26460 }, { "epoch": 0.7726894497890542, "grad_norm": 0.4866110168300675, "learning_rate": 4.1246282779129497e-05, "loss": 0.574, "step": 26465 }, { "epoch": 0.7728354330593714, "grad_norm": 0.43499619126573624, "learning_rate": 4.124357934576913e-05, "loss": 0.5411, "step": 26470 }, { "epoch": 0.7729814163296886, "grad_norm": 0.4950385146975815, "learning_rate": 4.124087591240876e-05, "loss": 0.6025, "step": 26475 }, { "epoch": 0.7731273996000059, "grad_norm": 0.4537002901460953, "learning_rate": 4.123817247904839e-05, "loss": 0.6038, "step": 26480 }, { "epoch": 0.7732733828703231, "grad_norm": 0.5457854281756505, "learning_rate": 4.1235469045688026e-05, "loss": 0.6021, "step": 26485 }, { "epoch": 0.7734193661406403, "grad_norm": 0.4942558841367732, "learning_rate": 4.123276561232765e-05, "loss": 0.5996, "step": 26490 }, { "epoch": 0.7735653494109576, "grad_norm": 0.5047381542189757, "learning_rate": 4.123006217896729e-05, "loss": 0.5998, "step": 26495 }, { "epoch": 0.7737113326812747, "grad_norm": 0.4743244561361349, "learning_rate": 4.122735874560693e-05, "loss": 0.5966, "step": 26500 }, { "epoch": 0.7738573159515919, "grad_norm": 0.4868152749883291, "learning_rate": 4.1224655312246555e-05, "loss": 0.5526, "step": 26505 }, { "epoch": 0.7740032992219091, "grad_norm": 0.4719899485857622, "learning_rate": 4.122195187888619e-05, "loss": 0.5773, "step": 26510 }, { "epoch": 0.7741492824922264, "grad_norm": 0.4792953352130204, "learning_rate": 4.121924844552582e-05, "loss": 0.6009, "step": 26515 }, { "epoch": 0.7742952657625436, "grad_norm": 0.46521367661257707, "learning_rate": 4.121654501216545e-05, "loss": 0.5733, "step": 26520 }, { "epoch": 0.7744412490328608, "grad_norm": 0.4625308207324747, "learning_rate": 4.1213841578805084e-05, "loss": 0.5365, "step": 26525 }, { "epoch": 0.774587232303178, "grad_norm": 0.4966090531194868, "learning_rate": 4.121113814544472e-05, "loss": 0.5647, "step": 26530 }, { "epoch": 0.7747332155734953, "grad_norm": 0.48507011085554785, "learning_rate": 4.1208434712084346e-05, "loss": 0.562, "step": 26535 }, { "epoch": 0.7748791988438125, "grad_norm": 0.5079479226150256, "learning_rate": 4.120573127872398e-05, "loss": 0.5763, "step": 26540 }, { "epoch": 0.7750251821141297, "grad_norm": 0.4660849058720176, "learning_rate": 4.1203027845363614e-05, "loss": 0.5944, "step": 26545 }, { "epoch": 0.775171165384447, "grad_norm": 0.45160968297598275, "learning_rate": 4.120032441200324e-05, "loss": 0.5795, "step": 26550 }, { "epoch": 0.7753171486547642, "grad_norm": 0.5151768115072224, "learning_rate": 4.1197620978642875e-05, "loss": 0.5753, "step": 26555 }, { "epoch": 0.7754631319250814, "grad_norm": 0.5377828585440398, "learning_rate": 4.1194917545282516e-05, "loss": 0.5964, "step": 26560 }, { "epoch": 0.7756091151953987, "grad_norm": 0.4646937789932469, "learning_rate": 4.119221411192214e-05, "loss": 0.5696, "step": 26565 }, { "epoch": 0.7757550984657158, "grad_norm": 0.4984318446493698, "learning_rate": 4.118951067856178e-05, "loss": 0.5686, "step": 26570 }, { "epoch": 0.775901081736033, "grad_norm": 0.5051681057293651, "learning_rate": 4.118680724520141e-05, "loss": 0.5476, "step": 26575 }, { "epoch": 0.7760470650063502, "grad_norm": 0.5203483739469353, "learning_rate": 4.118410381184104e-05, "loss": 0.5975, "step": 26580 }, { "epoch": 0.7761930482766675, "grad_norm": 0.6706028441866515, "learning_rate": 4.118140037848067e-05, "loss": 0.6013, "step": 26585 }, { "epoch": 0.7763390315469847, "grad_norm": 0.4561078753488426, "learning_rate": 4.1178696945120306e-05, "loss": 0.5685, "step": 26590 }, { "epoch": 0.7764850148173019, "grad_norm": 0.5038944294497936, "learning_rate": 4.1175993511759933e-05, "loss": 0.5947, "step": 26595 }, { "epoch": 0.7766309980876192, "grad_norm": 0.5026261036261371, "learning_rate": 4.117329007839957e-05, "loss": 0.6017, "step": 26600 }, { "epoch": 0.7767769813579364, "grad_norm": 0.5183175652324039, "learning_rate": 4.11705866450392e-05, "loss": 0.6054, "step": 26605 }, { "epoch": 0.7769229646282536, "grad_norm": 0.5021755438295313, "learning_rate": 4.116788321167883e-05, "loss": 0.5761, "step": 26610 }, { "epoch": 0.7770689478985708, "grad_norm": 0.4941012396666856, "learning_rate": 4.116517977831847e-05, "loss": 0.587, "step": 26615 }, { "epoch": 0.7772149311688881, "grad_norm": 0.446324610987899, "learning_rate": 4.1162476344958103e-05, "loss": 0.577, "step": 26620 }, { "epoch": 0.7773609144392053, "grad_norm": 0.5172059099204198, "learning_rate": 4.115977291159773e-05, "loss": 0.6258, "step": 26625 }, { "epoch": 0.7775068977095225, "grad_norm": 0.45875545189029965, "learning_rate": 4.1157069478237365e-05, "loss": 0.5455, "step": 26630 }, { "epoch": 0.7776528809798398, "grad_norm": 0.4838554418374402, "learning_rate": 4.1154366044877e-05, "loss": 0.6196, "step": 26635 }, { "epoch": 0.777798864250157, "grad_norm": 0.48587917513229173, "learning_rate": 4.1151662611516626e-05, "loss": 0.5695, "step": 26640 }, { "epoch": 0.7779448475204741, "grad_norm": 0.4944452836563637, "learning_rate": 4.114895917815626e-05, "loss": 0.5877, "step": 26645 }, { "epoch": 0.7780908307907913, "grad_norm": 0.47813965476868164, "learning_rate": 4.1146255744795894e-05, "loss": 0.6047, "step": 26650 }, { "epoch": 0.7782368140611086, "grad_norm": 0.5035952747855948, "learning_rate": 4.114355231143552e-05, "loss": 0.6032, "step": 26655 }, { "epoch": 0.7783827973314258, "grad_norm": 0.45333031381409833, "learning_rate": 4.1140848878075155e-05, "loss": 0.5914, "step": 26660 }, { "epoch": 0.778528780601743, "grad_norm": 0.5025718289409312, "learning_rate": 4.113814544471479e-05, "loss": 0.5727, "step": 26665 }, { "epoch": 0.7786747638720602, "grad_norm": 0.5137929224023079, "learning_rate": 4.113544201135442e-05, "loss": 0.5907, "step": 26670 }, { "epoch": 0.7788207471423775, "grad_norm": 0.4937558134355779, "learning_rate": 4.113273857799406e-05, "loss": 0.5786, "step": 26675 }, { "epoch": 0.7789667304126947, "grad_norm": 0.46575630806158236, "learning_rate": 4.113003514463369e-05, "loss": 0.5937, "step": 26680 }, { "epoch": 0.7791127136830119, "grad_norm": 0.5430420894724125, "learning_rate": 4.112733171127332e-05, "loss": 0.5894, "step": 26685 }, { "epoch": 0.7792586969533292, "grad_norm": 0.5104787565265856, "learning_rate": 4.112462827791295e-05, "loss": 0.6042, "step": 26690 }, { "epoch": 0.7794046802236464, "grad_norm": 0.5157264141085577, "learning_rate": 4.1121924844552587e-05, "loss": 0.5642, "step": 26695 }, { "epoch": 0.7795506634939636, "grad_norm": 0.5179020751985601, "learning_rate": 4.1119221411192214e-05, "loss": 0.6087, "step": 26700 }, { "epoch": 0.7796966467642809, "grad_norm": 0.4599949026533868, "learning_rate": 4.111651797783185e-05, "loss": 0.5925, "step": 26705 }, { "epoch": 0.7798426300345981, "grad_norm": 0.5146831684765418, "learning_rate": 4.111381454447148e-05, "loss": 0.5718, "step": 26710 }, { "epoch": 0.7799886133049152, "grad_norm": 0.47208734518289364, "learning_rate": 4.111111111111111e-05, "loss": 0.5761, "step": 26715 }, { "epoch": 0.7801345965752324, "grad_norm": 0.4731785051823638, "learning_rate": 4.110840767775074e-05, "loss": 0.6379, "step": 26720 }, { "epoch": 0.7802805798455497, "grad_norm": 0.49414991243618694, "learning_rate": 4.110570424439038e-05, "loss": 0.5384, "step": 26725 }, { "epoch": 0.7804265631158669, "grad_norm": 0.4700574545405978, "learning_rate": 4.110300081103001e-05, "loss": 0.5637, "step": 26730 }, { "epoch": 0.7805725463861841, "grad_norm": 0.4838083879030604, "learning_rate": 4.1100297377669645e-05, "loss": 0.5802, "step": 26735 }, { "epoch": 0.7807185296565013, "grad_norm": 0.4897627260605992, "learning_rate": 4.109759394430928e-05, "loss": 0.6044, "step": 26740 }, { "epoch": 0.7808645129268186, "grad_norm": 0.49568891513737295, "learning_rate": 4.1094890510948906e-05, "loss": 0.5877, "step": 26745 }, { "epoch": 0.7810104961971358, "grad_norm": 0.4537175517937365, "learning_rate": 4.109218707758854e-05, "loss": 0.5591, "step": 26750 }, { "epoch": 0.781156479467453, "grad_norm": 0.5043250292726043, "learning_rate": 4.1089483644228174e-05, "loss": 0.589, "step": 26755 }, { "epoch": 0.7813024627377703, "grad_norm": 0.4606202687020646, "learning_rate": 4.10867802108678e-05, "loss": 0.5553, "step": 26760 }, { "epoch": 0.7814484460080875, "grad_norm": 0.49461085340287747, "learning_rate": 4.1084076777507436e-05, "loss": 0.563, "step": 26765 }, { "epoch": 0.7815944292784047, "grad_norm": 0.46716737333952546, "learning_rate": 4.108137334414707e-05, "loss": 0.5366, "step": 26770 }, { "epoch": 0.781740412548722, "grad_norm": 0.5008990553932091, "learning_rate": 4.10786699107867e-05, "loss": 0.5785, "step": 26775 }, { "epoch": 0.7818863958190392, "grad_norm": 0.49959192340910824, "learning_rate": 4.107596647742633e-05, "loss": 0.5803, "step": 26780 }, { "epoch": 0.7820323790893564, "grad_norm": 0.4535638644945004, "learning_rate": 4.107326304406597e-05, "loss": 0.5602, "step": 26785 }, { "epoch": 0.7821783623596735, "grad_norm": 0.4764392727006467, "learning_rate": 4.10705596107056e-05, "loss": 0.6006, "step": 26790 }, { "epoch": 0.7823243456299908, "grad_norm": 0.5301565631398865, "learning_rate": 4.106785617734523e-05, "loss": 0.5822, "step": 26795 }, { "epoch": 0.782470328900308, "grad_norm": 0.4700065785636922, "learning_rate": 4.106515274398487e-05, "loss": 0.5556, "step": 26800 }, { "epoch": 0.7826163121706252, "grad_norm": 0.4764638804080247, "learning_rate": 4.1062449310624494e-05, "loss": 0.5857, "step": 26805 }, { "epoch": 0.7827622954409424, "grad_norm": 0.5095677307570382, "learning_rate": 4.105974587726413e-05, "loss": 0.5664, "step": 26810 }, { "epoch": 0.7829082787112597, "grad_norm": 0.5151746944896161, "learning_rate": 4.105704244390376e-05, "loss": 0.612, "step": 26815 }, { "epoch": 0.7830542619815769, "grad_norm": 0.5128445218673919, "learning_rate": 4.105433901054339e-05, "loss": 0.6191, "step": 26820 }, { "epoch": 0.7832002452518941, "grad_norm": 0.49447245201258766, "learning_rate": 4.105163557718302e-05, "loss": 0.5634, "step": 26825 }, { "epoch": 0.7833462285222114, "grad_norm": 0.4728922961057886, "learning_rate": 4.104893214382266e-05, "loss": 0.5747, "step": 26830 }, { "epoch": 0.7834922117925286, "grad_norm": 0.5093172647499249, "learning_rate": 4.1046228710462285e-05, "loss": 0.606, "step": 26835 }, { "epoch": 0.7836381950628458, "grad_norm": 0.5312042417540828, "learning_rate": 4.1043525277101925e-05, "loss": 0.5928, "step": 26840 }, { "epoch": 0.783784178333163, "grad_norm": 0.48873611757011737, "learning_rate": 4.104082184374155e-05, "loss": 0.5968, "step": 26845 }, { "epoch": 0.7839301616034803, "grad_norm": 0.49403852302740664, "learning_rate": 4.103811841038119e-05, "loss": 0.6128, "step": 26850 }, { "epoch": 0.7840761448737975, "grad_norm": 0.48611411860006076, "learning_rate": 4.103541497702082e-05, "loss": 0.5542, "step": 26855 }, { "epoch": 0.7842221281441147, "grad_norm": 0.5131368583741147, "learning_rate": 4.1032711543660455e-05, "loss": 0.5863, "step": 26860 }, { "epoch": 0.7843681114144319, "grad_norm": 0.584443346499358, "learning_rate": 4.103000811030008e-05, "loss": 0.6338, "step": 26865 }, { "epoch": 0.7845140946847491, "grad_norm": 0.5263988057927067, "learning_rate": 4.1027304676939716e-05, "loss": 0.5483, "step": 26870 }, { "epoch": 0.7846600779550663, "grad_norm": 0.47364211746443524, "learning_rate": 4.102460124357935e-05, "loss": 0.5519, "step": 26875 }, { "epoch": 0.7848060612253835, "grad_norm": 0.4679740938053427, "learning_rate": 4.102189781021898e-05, "loss": 0.5763, "step": 26880 }, { "epoch": 0.7849520444957008, "grad_norm": 0.5216094005201407, "learning_rate": 4.101919437685861e-05, "loss": 0.5783, "step": 26885 }, { "epoch": 0.785098027766018, "grad_norm": 0.48720916267562214, "learning_rate": 4.1016490943498245e-05, "loss": 0.577, "step": 26890 }, { "epoch": 0.7852440110363352, "grad_norm": 0.48165744338721495, "learning_rate": 4.101378751013787e-05, "loss": 0.5938, "step": 26895 }, { "epoch": 0.7853899943066525, "grad_norm": 0.4992736955324328, "learning_rate": 4.101108407677751e-05, "loss": 0.5614, "step": 26900 }, { "epoch": 0.7855359775769697, "grad_norm": 0.527357388555085, "learning_rate": 4.100838064341714e-05, "loss": 0.6076, "step": 26905 }, { "epoch": 0.7856819608472869, "grad_norm": 0.501409999473325, "learning_rate": 4.1005677210056774e-05, "loss": 0.596, "step": 26910 }, { "epoch": 0.7858279441176041, "grad_norm": 0.5091277285225726, "learning_rate": 4.100297377669641e-05, "loss": 0.5874, "step": 26915 }, { "epoch": 0.7859739273879214, "grad_norm": 0.48349074131964775, "learning_rate": 4.100027034333604e-05, "loss": 0.6126, "step": 26920 }, { "epoch": 0.7861199106582386, "grad_norm": 0.4935615376503723, "learning_rate": 4.099756690997567e-05, "loss": 0.5862, "step": 26925 }, { "epoch": 0.7862658939285558, "grad_norm": 0.48487623096081445, "learning_rate": 4.0994863476615304e-05, "loss": 0.554, "step": 26930 }, { "epoch": 0.786411877198873, "grad_norm": 0.5109613454760664, "learning_rate": 4.099216004325494e-05, "loss": 0.5672, "step": 26935 }, { "epoch": 0.7865578604691902, "grad_norm": 0.4994408881423538, "learning_rate": 4.0989456609894565e-05, "loss": 0.5523, "step": 26940 }, { "epoch": 0.7867038437395074, "grad_norm": 0.46222458696609925, "learning_rate": 4.09867531765342e-05, "loss": 0.5708, "step": 26945 }, { "epoch": 0.7868498270098246, "grad_norm": 0.46890709039165795, "learning_rate": 4.098404974317383e-05, "loss": 0.5736, "step": 26950 }, { "epoch": 0.7869958102801419, "grad_norm": 0.459864729766314, "learning_rate": 4.098134630981347e-05, "loss": 0.5886, "step": 26955 }, { "epoch": 0.7871417935504591, "grad_norm": 0.4671501705040721, "learning_rate": 4.09786428764531e-05, "loss": 0.5692, "step": 26960 }, { "epoch": 0.7872877768207763, "grad_norm": 0.4664493146079479, "learning_rate": 4.097593944309273e-05, "loss": 0.5405, "step": 26965 }, { "epoch": 0.7874337600910936, "grad_norm": 0.5228473297234643, "learning_rate": 4.097323600973236e-05, "loss": 0.6196, "step": 26970 }, { "epoch": 0.7875797433614108, "grad_norm": 0.4903061096723968, "learning_rate": 4.0970532576371996e-05, "loss": 0.5708, "step": 26975 }, { "epoch": 0.787725726631728, "grad_norm": 0.4846717930576652, "learning_rate": 4.0967829143011624e-05, "loss": 0.6026, "step": 26980 }, { "epoch": 0.7878717099020452, "grad_norm": 0.4927420187709962, "learning_rate": 4.096512570965126e-05, "loss": 0.6261, "step": 26985 }, { "epoch": 0.7880176931723625, "grad_norm": 0.48209570165607735, "learning_rate": 4.096242227629089e-05, "loss": 0.6162, "step": 26990 }, { "epoch": 0.7881636764426797, "grad_norm": 0.4759008591822909, "learning_rate": 4.0959718842930526e-05, "loss": 0.5642, "step": 26995 }, { "epoch": 0.7883096597129969, "grad_norm": 0.46837547032507476, "learning_rate": 4.095701540957015e-05, "loss": 0.5519, "step": 27000 }, { "epoch": 0.7884556429833142, "grad_norm": 0.45437306528609467, "learning_rate": 4.095431197620979e-05, "loss": 0.5877, "step": 27005 }, { "epoch": 0.7886016262536313, "grad_norm": 0.45976811166298065, "learning_rate": 4.095160854284942e-05, "loss": 0.5848, "step": 27010 }, { "epoch": 0.7887476095239485, "grad_norm": 0.5037670459059458, "learning_rate": 4.0948905109489055e-05, "loss": 0.6147, "step": 27015 }, { "epoch": 0.7888935927942657, "grad_norm": 0.4365966785006211, "learning_rate": 4.094620167612869e-05, "loss": 0.5473, "step": 27020 }, { "epoch": 0.789039576064583, "grad_norm": 0.4912343373539898, "learning_rate": 4.0943498242768316e-05, "loss": 0.5916, "step": 27025 }, { "epoch": 0.7891855593349002, "grad_norm": 0.5213046067006752, "learning_rate": 4.094079480940795e-05, "loss": 0.5906, "step": 27030 }, { "epoch": 0.7893315426052174, "grad_norm": 0.5196262505640383, "learning_rate": 4.0938091376047584e-05, "loss": 0.6367, "step": 27035 }, { "epoch": 0.7894775258755347, "grad_norm": 0.4956925003868305, "learning_rate": 4.093538794268721e-05, "loss": 0.5967, "step": 27040 }, { "epoch": 0.7896235091458519, "grad_norm": 0.49951969753449815, "learning_rate": 4.0932684509326845e-05, "loss": 0.5876, "step": 27045 }, { "epoch": 0.7897694924161691, "grad_norm": 0.49333911198687525, "learning_rate": 4.092998107596648e-05, "loss": 0.5781, "step": 27050 }, { "epoch": 0.7899154756864863, "grad_norm": 0.4589201935718093, "learning_rate": 4.0927277642606107e-05, "loss": 0.536, "step": 27055 }, { "epoch": 0.7900614589568036, "grad_norm": 0.4638763344258726, "learning_rate": 4.092457420924574e-05, "loss": 0.5544, "step": 27060 }, { "epoch": 0.7902074422271208, "grad_norm": 0.4902404338282721, "learning_rate": 4.0921870775885375e-05, "loss": 0.556, "step": 27065 }, { "epoch": 0.790353425497438, "grad_norm": 0.5295549247121253, "learning_rate": 4.091916734252501e-05, "loss": 0.6061, "step": 27070 }, { "epoch": 0.7904994087677553, "grad_norm": 0.45432689158759115, "learning_rate": 4.091646390916464e-05, "loss": 0.5494, "step": 27075 }, { "epoch": 0.7906453920380725, "grad_norm": 0.4953698315640033, "learning_rate": 4.091376047580428e-05, "loss": 0.5903, "step": 27080 }, { "epoch": 0.7907913753083896, "grad_norm": 0.5696899253037816, "learning_rate": 4.0911057042443904e-05, "loss": 0.6093, "step": 27085 }, { "epoch": 0.7909373585787068, "grad_norm": 0.44080501401804784, "learning_rate": 4.090835360908354e-05, "loss": 0.5474, "step": 27090 }, { "epoch": 0.7910833418490241, "grad_norm": 0.49789088190183867, "learning_rate": 4.090565017572317e-05, "loss": 0.6027, "step": 27095 }, { "epoch": 0.7912293251193413, "grad_norm": 0.4349260561684147, "learning_rate": 4.09029467423628e-05, "loss": 0.5397, "step": 27100 }, { "epoch": 0.7913753083896585, "grad_norm": 0.49471022232376777, "learning_rate": 4.090024330900243e-05, "loss": 0.5586, "step": 27105 }, { "epoch": 0.7915212916599758, "grad_norm": 0.4747250719934095, "learning_rate": 4.089753987564207e-05, "loss": 0.5749, "step": 27110 }, { "epoch": 0.791667274930293, "grad_norm": 0.5028120741374295, "learning_rate": 4.0894836442281694e-05, "loss": 0.5716, "step": 27115 }, { "epoch": 0.7918132582006102, "grad_norm": 0.482728866761072, "learning_rate": 4.089213300892133e-05, "loss": 0.5908, "step": 27120 }, { "epoch": 0.7919592414709274, "grad_norm": 0.46427935492644906, "learning_rate": 4.088942957556097e-05, "loss": 0.5641, "step": 27125 }, { "epoch": 0.7921052247412447, "grad_norm": 0.4632270710694138, "learning_rate": 4.0886726142200596e-05, "loss": 0.5515, "step": 27130 }, { "epoch": 0.7922512080115619, "grad_norm": 0.45440226910383613, "learning_rate": 4.088402270884023e-05, "loss": 0.5624, "step": 27135 }, { "epoch": 0.7923971912818791, "grad_norm": 0.5054591708700616, "learning_rate": 4.0881319275479864e-05, "loss": 0.5656, "step": 27140 }, { "epoch": 0.7925431745521964, "grad_norm": 0.49253684834514494, "learning_rate": 4.087861584211949e-05, "loss": 0.5704, "step": 27145 }, { "epoch": 0.7926891578225136, "grad_norm": 0.4958760908980027, "learning_rate": 4.0875912408759126e-05, "loss": 0.5816, "step": 27150 }, { "epoch": 0.7928351410928307, "grad_norm": 0.4965368530619694, "learning_rate": 4.087320897539876e-05, "loss": 0.5487, "step": 27155 }, { "epoch": 0.7929811243631479, "grad_norm": 0.4744084215234379, "learning_rate": 4.087050554203839e-05, "loss": 0.5897, "step": 27160 }, { "epoch": 0.7931271076334652, "grad_norm": 0.4883155252053708, "learning_rate": 4.086780210867802e-05, "loss": 0.5659, "step": 27165 }, { "epoch": 0.7932730909037824, "grad_norm": 0.4964269937844207, "learning_rate": 4.0865098675317655e-05, "loss": 0.5813, "step": 27170 }, { "epoch": 0.7934190741740996, "grad_norm": 0.4765646553393713, "learning_rate": 4.086239524195728e-05, "loss": 0.5718, "step": 27175 }, { "epoch": 0.7935650574444169, "grad_norm": 0.5029009907124055, "learning_rate": 4.085969180859692e-05, "loss": 0.5723, "step": 27180 }, { "epoch": 0.7937110407147341, "grad_norm": 0.4763404515479181, "learning_rate": 4.085698837523656e-05, "loss": 0.5817, "step": 27185 }, { "epoch": 0.7938570239850513, "grad_norm": 0.4659099552249324, "learning_rate": 4.0854284941876184e-05, "loss": 0.581, "step": 27190 }, { "epoch": 0.7940030072553685, "grad_norm": 0.5225917658288965, "learning_rate": 4.085158150851582e-05, "loss": 0.5947, "step": 27195 }, { "epoch": 0.7941489905256858, "grad_norm": 0.4970606372011653, "learning_rate": 4.084887807515545e-05, "loss": 0.5763, "step": 27200 }, { "epoch": 0.794294973796003, "grad_norm": 0.4514982850150082, "learning_rate": 4.084617464179508e-05, "loss": 0.5903, "step": 27205 }, { "epoch": 0.7944409570663202, "grad_norm": 0.45708747070764427, "learning_rate": 4.0843471208434714e-05, "loss": 0.563, "step": 27210 }, { "epoch": 0.7945869403366375, "grad_norm": 0.4745852221637267, "learning_rate": 4.084076777507435e-05, "loss": 0.5728, "step": 27215 }, { "epoch": 0.7947329236069547, "grad_norm": 0.5022887919795657, "learning_rate": 4.0838064341713975e-05, "loss": 0.573, "step": 27220 }, { "epoch": 0.7948789068772719, "grad_norm": 0.4681625638200986, "learning_rate": 4.083536090835361e-05, "loss": 0.554, "step": 27225 }, { "epoch": 0.795024890147589, "grad_norm": 0.5177440619782766, "learning_rate": 4.083265747499324e-05, "loss": 0.57, "step": 27230 }, { "epoch": 0.7951708734179063, "grad_norm": 0.5185225336288514, "learning_rate": 4.082995404163287e-05, "loss": 0.5838, "step": 27235 }, { "epoch": 0.7953168566882235, "grad_norm": 0.5052299118727553, "learning_rate": 4.082725060827251e-05, "loss": 0.5876, "step": 27240 }, { "epoch": 0.7954628399585407, "grad_norm": 0.5684799323416815, "learning_rate": 4.0824547174912145e-05, "loss": 0.5693, "step": 27245 }, { "epoch": 0.795608823228858, "grad_norm": 0.45616556966246974, "learning_rate": 4.082184374155177e-05, "loss": 0.5753, "step": 27250 }, { "epoch": 0.7957548064991752, "grad_norm": 0.48117974457867324, "learning_rate": 4.0819140308191406e-05, "loss": 0.5807, "step": 27255 }, { "epoch": 0.7959007897694924, "grad_norm": 0.4700498788130877, "learning_rate": 4.081643687483104e-05, "loss": 0.5395, "step": 27260 }, { "epoch": 0.7960467730398096, "grad_norm": 0.4276996076971122, "learning_rate": 4.081373344147067e-05, "loss": 0.5781, "step": 27265 }, { "epoch": 0.7961927563101269, "grad_norm": 0.4979570007268738, "learning_rate": 4.08110300081103e-05, "loss": 0.5977, "step": 27270 }, { "epoch": 0.7963387395804441, "grad_norm": 0.44547516951971566, "learning_rate": 4.0808326574749935e-05, "loss": 0.5667, "step": 27275 }, { "epoch": 0.7964847228507613, "grad_norm": 0.5197037938574597, "learning_rate": 4.080562314138956e-05, "loss": 0.6071, "step": 27280 }, { "epoch": 0.7966307061210786, "grad_norm": 0.48851978557895637, "learning_rate": 4.0802919708029197e-05, "loss": 0.5535, "step": 27285 }, { "epoch": 0.7967766893913958, "grad_norm": 0.5627114326432273, "learning_rate": 4.080021627466883e-05, "loss": 0.6067, "step": 27290 }, { "epoch": 0.796922672661713, "grad_norm": 0.45249315704502735, "learning_rate": 4.0797512841308465e-05, "loss": 0.5354, "step": 27295 }, { "epoch": 0.7970686559320301, "grad_norm": 0.48087308533401585, "learning_rate": 4.07948094079481e-05, "loss": 0.5447, "step": 27300 }, { "epoch": 0.7972146392023474, "grad_norm": 0.4999292674782079, "learning_rate": 4.079210597458773e-05, "loss": 0.5955, "step": 27305 }, { "epoch": 0.7973606224726646, "grad_norm": 0.5209407145135003, "learning_rate": 4.078940254122736e-05, "loss": 0.589, "step": 27310 }, { "epoch": 0.7975066057429818, "grad_norm": 0.4829146943261909, "learning_rate": 4.0786699107866994e-05, "loss": 0.5848, "step": 27315 }, { "epoch": 0.797652589013299, "grad_norm": 0.461314255270558, "learning_rate": 4.078399567450663e-05, "loss": 0.5784, "step": 27320 }, { "epoch": 0.7977985722836163, "grad_norm": 0.45601534240211766, "learning_rate": 4.0781292241146255e-05, "loss": 0.5963, "step": 27325 }, { "epoch": 0.7979445555539335, "grad_norm": 0.5385631282736644, "learning_rate": 4.077858880778589e-05, "loss": 0.5794, "step": 27330 }, { "epoch": 0.7980905388242507, "grad_norm": 0.5097570577646348, "learning_rate": 4.077588537442552e-05, "loss": 0.5429, "step": 27335 }, { "epoch": 0.798236522094568, "grad_norm": 0.46913621071306766, "learning_rate": 4.077318194106515e-05, "loss": 0.5883, "step": 27340 }, { "epoch": 0.7983825053648852, "grad_norm": 0.5837148732175633, "learning_rate": 4.0770478507704784e-05, "loss": 0.5944, "step": 27345 }, { "epoch": 0.7985284886352024, "grad_norm": 0.49878209587535083, "learning_rate": 4.0767775074344425e-05, "loss": 0.5946, "step": 27350 }, { "epoch": 0.7986744719055197, "grad_norm": 0.6069179395710912, "learning_rate": 4.076507164098405e-05, "loss": 0.5319, "step": 27355 }, { "epoch": 0.7988204551758369, "grad_norm": 0.523114646759243, "learning_rate": 4.0762368207623686e-05, "loss": 0.5638, "step": 27360 }, { "epoch": 0.7989664384461541, "grad_norm": 0.4846862683281243, "learning_rate": 4.075966477426332e-05, "loss": 0.5734, "step": 27365 }, { "epoch": 0.7991124217164713, "grad_norm": 0.48977665390757413, "learning_rate": 4.075696134090295e-05, "loss": 0.6115, "step": 27370 }, { "epoch": 0.7992584049867885, "grad_norm": 0.4682064291973858, "learning_rate": 4.075425790754258e-05, "loss": 0.5829, "step": 27375 }, { "epoch": 0.7994043882571057, "grad_norm": 0.5050366820817758, "learning_rate": 4.0751554474182216e-05, "loss": 0.6008, "step": 27380 }, { "epoch": 0.7995503715274229, "grad_norm": 0.46302824520309804, "learning_rate": 4.074885104082184e-05, "loss": 0.5258, "step": 27385 }, { "epoch": 0.7996963547977401, "grad_norm": 0.5045869516824611, "learning_rate": 4.074614760746148e-05, "loss": 0.5815, "step": 27390 }, { "epoch": 0.7998423380680574, "grad_norm": 0.4894078692281068, "learning_rate": 4.074344417410111e-05, "loss": 0.5552, "step": 27395 }, { "epoch": 0.7999883213383746, "grad_norm": 0.48454084108437295, "learning_rate": 4.074074074074074e-05, "loss": 0.5809, "step": 27400 }, { "epoch": 0.8001343046086918, "grad_norm": 0.45872968793395613, "learning_rate": 4.073803730738037e-05, "loss": 0.5596, "step": 27405 }, { "epoch": 0.8002802878790091, "grad_norm": 0.5168896507406826, "learning_rate": 4.073533387402001e-05, "loss": 0.5746, "step": 27410 }, { "epoch": 0.8004262711493263, "grad_norm": 0.527345156768531, "learning_rate": 4.073263044065964e-05, "loss": 0.5796, "step": 27415 }, { "epoch": 0.8005722544196435, "grad_norm": 0.4561275129883638, "learning_rate": 4.0729927007299274e-05, "loss": 0.5885, "step": 27420 }, { "epoch": 0.8007182376899608, "grad_norm": 0.47380564359518007, "learning_rate": 4.072722357393891e-05, "loss": 0.5279, "step": 27425 }, { "epoch": 0.800864220960278, "grad_norm": 0.5232152134946648, "learning_rate": 4.0724520140578535e-05, "loss": 0.562, "step": 27430 }, { "epoch": 0.8010102042305952, "grad_norm": 0.46941527536912375, "learning_rate": 4.072181670721817e-05, "loss": 0.5898, "step": 27435 }, { "epoch": 0.8011561875009124, "grad_norm": 0.49799630194464245, "learning_rate": 4.0719113273857803e-05, "loss": 0.5718, "step": 27440 }, { "epoch": 0.8013021707712297, "grad_norm": 0.48470782978191446, "learning_rate": 4.071640984049743e-05, "loss": 0.5837, "step": 27445 }, { "epoch": 0.8014481540415468, "grad_norm": 0.4594737730874377, "learning_rate": 4.0713706407137065e-05, "loss": 0.5568, "step": 27450 }, { "epoch": 0.801594137311864, "grad_norm": 0.44240822145313424, "learning_rate": 4.07110029737767e-05, "loss": 0.5495, "step": 27455 }, { "epoch": 0.8017401205821812, "grad_norm": 0.5066302076752157, "learning_rate": 4.0708299540416326e-05, "loss": 0.5806, "step": 27460 }, { "epoch": 0.8018861038524985, "grad_norm": 0.4592748016943781, "learning_rate": 4.070559610705597e-05, "loss": 0.5629, "step": 27465 }, { "epoch": 0.8020320871228157, "grad_norm": 0.5178809793926084, "learning_rate": 4.0702892673695594e-05, "loss": 0.6015, "step": 27470 }, { "epoch": 0.8021780703931329, "grad_norm": 0.47838282063500887, "learning_rate": 4.070018924033523e-05, "loss": 0.5884, "step": 27475 }, { "epoch": 0.8023240536634502, "grad_norm": 0.4593299533778625, "learning_rate": 4.069748580697486e-05, "loss": 0.5544, "step": 27480 }, { "epoch": 0.8024700369337674, "grad_norm": 0.49221171978969597, "learning_rate": 4.0694782373614496e-05, "loss": 0.5894, "step": 27485 }, { "epoch": 0.8026160202040846, "grad_norm": 0.49316168672521754, "learning_rate": 4.069207894025412e-05, "loss": 0.6134, "step": 27490 }, { "epoch": 0.8027620034744019, "grad_norm": 0.5043710236620317, "learning_rate": 4.068937550689376e-05, "loss": 0.5553, "step": 27495 }, { "epoch": 0.8029079867447191, "grad_norm": 0.47192465661925803, "learning_rate": 4.068667207353339e-05, "loss": 0.596, "step": 27500 }, { "epoch": 0.8030539700150363, "grad_norm": 0.47760560319288203, "learning_rate": 4.068396864017302e-05, "loss": 0.5733, "step": 27505 }, { "epoch": 0.8031999532853535, "grad_norm": 0.48817471959525416, "learning_rate": 4.068126520681265e-05, "loss": 0.5835, "step": 27510 }, { "epoch": 0.8033459365556708, "grad_norm": 0.49770596780637616, "learning_rate": 4.0678561773452287e-05, "loss": 0.6116, "step": 27515 }, { "epoch": 0.8034919198259879, "grad_norm": 0.476939908186038, "learning_rate": 4.067585834009192e-05, "loss": 0.6217, "step": 27520 }, { "epoch": 0.8036379030963051, "grad_norm": 0.5232276918654158, "learning_rate": 4.0673154906731555e-05, "loss": 0.5943, "step": 27525 }, { "epoch": 0.8037838863666223, "grad_norm": 0.44668127062633256, "learning_rate": 4.067045147337118e-05, "loss": 0.5761, "step": 27530 }, { "epoch": 0.8039298696369396, "grad_norm": 0.5392347055197886, "learning_rate": 4.0667748040010816e-05, "loss": 0.6143, "step": 27535 }, { "epoch": 0.8040758529072568, "grad_norm": 0.5366190675731877, "learning_rate": 4.066504460665045e-05, "loss": 0.5789, "step": 27540 }, { "epoch": 0.804221836177574, "grad_norm": 0.49444680169908195, "learning_rate": 4.0662341173290084e-05, "loss": 0.5863, "step": 27545 }, { "epoch": 0.8043678194478913, "grad_norm": 0.4934836667367059, "learning_rate": 4.065963773992971e-05, "loss": 0.5942, "step": 27550 }, { "epoch": 0.8045138027182085, "grad_norm": 0.4726268176076868, "learning_rate": 4.0656934306569345e-05, "loss": 0.5727, "step": 27555 }, { "epoch": 0.8046597859885257, "grad_norm": 0.46625415257931474, "learning_rate": 4.065423087320898e-05, "loss": 0.6072, "step": 27560 }, { "epoch": 0.804805769258843, "grad_norm": 0.45107347295931915, "learning_rate": 4.0651527439848606e-05, "loss": 0.5791, "step": 27565 }, { "epoch": 0.8049517525291602, "grad_norm": 0.47898367418762355, "learning_rate": 4.064882400648824e-05, "loss": 0.5832, "step": 27570 }, { "epoch": 0.8050977357994774, "grad_norm": 0.4725292444156725, "learning_rate": 4.0646120573127874e-05, "loss": 0.5668, "step": 27575 }, { "epoch": 0.8052437190697946, "grad_norm": 0.5304329528052952, "learning_rate": 4.064341713976751e-05, "loss": 0.6036, "step": 27580 }, { "epoch": 0.8053897023401119, "grad_norm": 0.4721424972764196, "learning_rate": 4.064071370640714e-05, "loss": 0.6133, "step": 27585 }, { "epoch": 0.8055356856104291, "grad_norm": 0.47576416702613933, "learning_rate": 4.063801027304677e-05, "loss": 0.5518, "step": 27590 }, { "epoch": 0.8056816688807462, "grad_norm": 0.48322688552618925, "learning_rate": 4.0635306839686404e-05, "loss": 0.5685, "step": 27595 }, { "epoch": 0.8058276521510634, "grad_norm": 0.4575121462401775, "learning_rate": 4.063260340632604e-05, "loss": 0.5834, "step": 27600 }, { "epoch": 0.8059736354213807, "grad_norm": 0.41665950635189697, "learning_rate": 4.0629899972965665e-05, "loss": 0.5529, "step": 27605 }, { "epoch": 0.8061196186916979, "grad_norm": 0.5402544280261707, "learning_rate": 4.06271965396053e-05, "loss": 0.6068, "step": 27610 }, { "epoch": 0.8062656019620151, "grad_norm": 0.48490783928493253, "learning_rate": 4.062449310624493e-05, "loss": 0.573, "step": 27615 }, { "epoch": 0.8064115852323324, "grad_norm": 0.47090871986012833, "learning_rate": 4.062178967288457e-05, "loss": 0.6061, "step": 27620 }, { "epoch": 0.8065575685026496, "grad_norm": 0.4615097866767794, "learning_rate": 4.0619086239524194e-05, "loss": 0.5661, "step": 27625 }, { "epoch": 0.8067035517729668, "grad_norm": 0.5536744563292417, "learning_rate": 4.061638280616383e-05, "loss": 0.5987, "step": 27630 }, { "epoch": 0.806849535043284, "grad_norm": 0.4731816598480753, "learning_rate": 4.061367937280346e-05, "loss": 0.5517, "step": 27635 }, { "epoch": 0.8069955183136013, "grad_norm": 0.5237347957825649, "learning_rate": 4.0610975939443096e-05, "loss": 0.5995, "step": 27640 }, { "epoch": 0.8071415015839185, "grad_norm": 0.457671481326379, "learning_rate": 4.060827250608273e-05, "loss": 0.541, "step": 27645 }, { "epoch": 0.8072874848542357, "grad_norm": 0.4816759885375045, "learning_rate": 4.060556907272236e-05, "loss": 0.5881, "step": 27650 }, { "epoch": 0.807433468124553, "grad_norm": 0.47255656510456917, "learning_rate": 4.060286563936199e-05, "loss": 0.5646, "step": 27655 }, { "epoch": 0.8075794513948702, "grad_norm": 0.46602983241875, "learning_rate": 4.0600162206001625e-05, "loss": 0.5663, "step": 27660 }, { "epoch": 0.8077254346651873, "grad_norm": 0.4971907766357267, "learning_rate": 4.059745877264125e-05, "loss": 0.5661, "step": 27665 }, { "epoch": 0.8078714179355045, "grad_norm": 0.456913920258753, "learning_rate": 4.059475533928089e-05, "loss": 0.5932, "step": 27670 }, { "epoch": 0.8080174012058218, "grad_norm": 0.4731996906681795, "learning_rate": 4.059205190592052e-05, "loss": 0.5703, "step": 27675 }, { "epoch": 0.808163384476139, "grad_norm": 0.47769660445300327, "learning_rate": 4.0589348472560155e-05, "loss": 0.5655, "step": 27680 }, { "epoch": 0.8083093677464562, "grad_norm": 0.5136249729862641, "learning_rate": 4.058664503919978e-05, "loss": 0.5962, "step": 27685 }, { "epoch": 0.8084553510167735, "grad_norm": 0.4999501436965939, "learning_rate": 4.058394160583942e-05, "loss": 0.5676, "step": 27690 }, { "epoch": 0.8086013342870907, "grad_norm": 0.4954542935577133, "learning_rate": 4.058123817247905e-05, "loss": 0.5856, "step": 27695 }, { "epoch": 0.8087473175574079, "grad_norm": 0.4807414339056041, "learning_rate": 4.0578534739118684e-05, "loss": 0.596, "step": 27700 }, { "epoch": 0.8088933008277251, "grad_norm": 0.5041769049721065, "learning_rate": 4.057583130575832e-05, "loss": 0.5718, "step": 27705 }, { "epoch": 0.8090392840980424, "grad_norm": 0.47567785310655675, "learning_rate": 4.0573127872397945e-05, "loss": 0.5348, "step": 27710 }, { "epoch": 0.8091852673683596, "grad_norm": 0.500023628677916, "learning_rate": 4.057042443903758e-05, "loss": 0.5839, "step": 27715 }, { "epoch": 0.8093312506386768, "grad_norm": 0.4740631617915734, "learning_rate": 4.056772100567721e-05, "loss": 0.5552, "step": 27720 }, { "epoch": 0.8094772339089941, "grad_norm": 0.5255734107378278, "learning_rate": 4.056501757231684e-05, "loss": 0.5699, "step": 27725 }, { "epoch": 0.8096232171793113, "grad_norm": 0.4732480280705173, "learning_rate": 4.0562314138956475e-05, "loss": 0.5778, "step": 27730 }, { "epoch": 0.8097692004496285, "grad_norm": 0.4824536307330423, "learning_rate": 4.055961070559611e-05, "loss": 0.6268, "step": 27735 }, { "epoch": 0.8099151837199456, "grad_norm": 0.5140331860910724, "learning_rate": 4.0556907272235736e-05, "loss": 0.5783, "step": 27740 }, { "epoch": 0.8100611669902629, "grad_norm": 0.4977934108282735, "learning_rate": 4.055420383887537e-05, "loss": 0.6203, "step": 27745 }, { "epoch": 0.8102071502605801, "grad_norm": 0.47007719833286266, "learning_rate": 4.055150040551501e-05, "loss": 0.5572, "step": 27750 }, { "epoch": 0.8103531335308973, "grad_norm": 0.4673527285700454, "learning_rate": 4.054879697215464e-05, "loss": 0.5593, "step": 27755 }, { "epoch": 0.8104991168012146, "grad_norm": 0.4559092008943759, "learning_rate": 4.054609353879427e-05, "loss": 0.5728, "step": 27760 }, { "epoch": 0.8106451000715318, "grad_norm": 0.48087357058895386, "learning_rate": 4.0543390105433906e-05, "loss": 0.6337, "step": 27765 }, { "epoch": 0.810791083341849, "grad_norm": 0.46531240000427976, "learning_rate": 4.054068667207353e-05, "loss": 0.6094, "step": 27770 }, { "epoch": 0.8109370666121662, "grad_norm": 0.4834740062812958, "learning_rate": 4.053798323871317e-05, "loss": 0.5926, "step": 27775 }, { "epoch": 0.8110830498824835, "grad_norm": 0.523402584696972, "learning_rate": 4.05352798053528e-05, "loss": 0.588, "step": 27780 }, { "epoch": 0.8112290331528007, "grad_norm": 0.6375683928415372, "learning_rate": 4.053257637199243e-05, "loss": 0.5626, "step": 27785 }, { "epoch": 0.8113750164231179, "grad_norm": 0.5539573062158402, "learning_rate": 4.052987293863206e-05, "loss": 0.6216, "step": 27790 }, { "epoch": 0.8115209996934352, "grad_norm": 0.4823663305522676, "learning_rate": 4.0527169505271696e-05, "loss": 0.5691, "step": 27795 }, { "epoch": 0.8116669829637524, "grad_norm": 0.48572587765651676, "learning_rate": 4.0524466071911324e-05, "loss": 0.6143, "step": 27800 }, { "epoch": 0.8118129662340696, "grad_norm": 0.4991182887496647, "learning_rate": 4.0521762638550964e-05, "loss": 0.6112, "step": 27805 }, { "epoch": 0.8119589495043869, "grad_norm": 0.49774996947308636, "learning_rate": 4.05190592051906e-05, "loss": 0.5465, "step": 27810 }, { "epoch": 0.812104932774704, "grad_norm": 0.4671600106550813, "learning_rate": 4.0516355771830226e-05, "loss": 0.5799, "step": 27815 }, { "epoch": 0.8122509160450212, "grad_norm": 0.4762757118516118, "learning_rate": 4.051365233846986e-05, "loss": 0.5743, "step": 27820 }, { "epoch": 0.8123968993153384, "grad_norm": 0.4964255186721996, "learning_rate": 4.0510948905109494e-05, "loss": 0.5815, "step": 27825 }, { "epoch": 0.8125428825856557, "grad_norm": 0.4767827329400308, "learning_rate": 4.050824547174912e-05, "loss": 0.5741, "step": 27830 }, { "epoch": 0.8126888658559729, "grad_norm": 0.5022158814328759, "learning_rate": 4.0505542038388755e-05, "loss": 0.5511, "step": 27835 }, { "epoch": 0.8128348491262901, "grad_norm": 0.49161728698802615, "learning_rate": 4.050283860502839e-05, "loss": 0.5881, "step": 27840 }, { "epoch": 0.8129808323966073, "grad_norm": 0.46622891137466876, "learning_rate": 4.0500135171668016e-05, "loss": 0.591, "step": 27845 }, { "epoch": 0.8131268156669246, "grad_norm": 0.4984766825849544, "learning_rate": 4.049743173830765e-05, "loss": 0.5715, "step": 27850 }, { "epoch": 0.8132727989372418, "grad_norm": 0.49243987608827644, "learning_rate": 4.0494728304947284e-05, "loss": 0.5943, "step": 27855 }, { "epoch": 0.813418782207559, "grad_norm": 0.45648343964568777, "learning_rate": 4.049202487158692e-05, "loss": 0.5971, "step": 27860 }, { "epoch": 0.8135647654778763, "grad_norm": 0.5159797307994959, "learning_rate": 4.048932143822655e-05, "loss": 0.5866, "step": 27865 }, { "epoch": 0.8137107487481935, "grad_norm": 0.4599343549595533, "learning_rate": 4.0486618004866186e-05, "loss": 0.5853, "step": 27870 }, { "epoch": 0.8138567320185107, "grad_norm": 0.5039175291408546, "learning_rate": 4.0483914571505813e-05, "loss": 0.5939, "step": 27875 }, { "epoch": 0.814002715288828, "grad_norm": 0.5646557811391917, "learning_rate": 4.048121113814545e-05, "loss": 0.5726, "step": 27880 }, { "epoch": 0.8141486985591451, "grad_norm": 0.468288823046827, "learning_rate": 4.047850770478508e-05, "loss": 0.54, "step": 27885 }, { "epoch": 0.8142946818294623, "grad_norm": 0.48066469545034807, "learning_rate": 4.047580427142471e-05, "loss": 0.6182, "step": 27890 }, { "epoch": 0.8144406650997795, "grad_norm": 0.4942995022898227, "learning_rate": 4.047310083806434e-05, "loss": 0.5564, "step": 27895 }, { "epoch": 0.8145866483700968, "grad_norm": 0.48854683374966884, "learning_rate": 4.047039740470398e-05, "loss": 0.5898, "step": 27900 }, { "epoch": 0.814732631640414, "grad_norm": 0.5339175264769809, "learning_rate": 4.0467693971343604e-05, "loss": 0.6339, "step": 27905 }, { "epoch": 0.8148786149107312, "grad_norm": 0.4647396981183384, "learning_rate": 4.046499053798324e-05, "loss": 0.5938, "step": 27910 }, { "epoch": 0.8150245981810484, "grad_norm": 0.47823309510575135, "learning_rate": 4.046228710462288e-05, "loss": 0.6054, "step": 27915 }, { "epoch": 0.8151705814513657, "grad_norm": 0.46961759142942555, "learning_rate": 4.0459583671262506e-05, "loss": 0.5341, "step": 27920 }, { "epoch": 0.8153165647216829, "grad_norm": 0.44406889811064953, "learning_rate": 4.045688023790214e-05, "loss": 0.5703, "step": 27925 }, { "epoch": 0.8154625479920001, "grad_norm": 0.483967211104462, "learning_rate": 4.0454176804541774e-05, "loss": 0.576, "step": 27930 }, { "epoch": 0.8156085312623174, "grad_norm": 0.470930202866237, "learning_rate": 4.04514733711814e-05, "loss": 0.5677, "step": 27935 }, { "epoch": 0.8157545145326346, "grad_norm": 0.45611196075656857, "learning_rate": 4.0448769937821035e-05, "loss": 0.5636, "step": 27940 }, { "epoch": 0.8159004978029518, "grad_norm": 0.45032187511561983, "learning_rate": 4.044606650446067e-05, "loss": 0.5751, "step": 27945 }, { "epoch": 0.816046481073269, "grad_norm": 0.4610516573052287, "learning_rate": 4.0443363071100296e-05, "loss": 0.6225, "step": 27950 }, { "epoch": 0.8161924643435863, "grad_norm": 0.5015237365106742, "learning_rate": 4.044065963773993e-05, "loss": 0.5926, "step": 27955 }, { "epoch": 0.8163384476139034, "grad_norm": 0.49921890609647374, "learning_rate": 4.0437956204379564e-05, "loss": 0.5866, "step": 27960 }, { "epoch": 0.8164844308842206, "grad_norm": 0.4881131239387303, "learning_rate": 4.043525277101919e-05, "loss": 0.5709, "step": 27965 }, { "epoch": 0.8166304141545379, "grad_norm": 0.48587249589109754, "learning_rate": 4.0432549337658826e-05, "loss": 0.5788, "step": 27970 }, { "epoch": 0.8167763974248551, "grad_norm": 0.5115471163259758, "learning_rate": 4.0429845904298467e-05, "loss": 0.5956, "step": 27975 }, { "epoch": 0.8169223806951723, "grad_norm": 0.4739242377569629, "learning_rate": 4.0427142470938094e-05, "loss": 0.572, "step": 27980 }, { "epoch": 0.8170683639654895, "grad_norm": 0.48250846409221343, "learning_rate": 4.042443903757773e-05, "loss": 0.571, "step": 27985 }, { "epoch": 0.8172143472358068, "grad_norm": 0.4649030252859327, "learning_rate": 4.042173560421736e-05, "loss": 0.5961, "step": 27990 }, { "epoch": 0.817360330506124, "grad_norm": 0.48233607059882266, "learning_rate": 4.041903217085699e-05, "loss": 0.5357, "step": 27995 }, { "epoch": 0.8175063137764412, "grad_norm": 0.4835025869512256, "learning_rate": 4.041632873749662e-05, "loss": 0.5844, "step": 28000 }, { "epoch": 0.8176522970467585, "grad_norm": 0.47486420196391965, "learning_rate": 4.041362530413626e-05, "loss": 0.5958, "step": 28005 }, { "epoch": 0.8177982803170757, "grad_norm": 0.49705293287147967, "learning_rate": 4.0410921870775884e-05, "loss": 0.5872, "step": 28010 }, { "epoch": 0.8179442635873929, "grad_norm": 0.46269341187366503, "learning_rate": 4.040821843741552e-05, "loss": 0.5939, "step": 28015 }, { "epoch": 0.8180902468577101, "grad_norm": 0.46117169803362323, "learning_rate": 4.040551500405515e-05, "loss": 0.5394, "step": 28020 }, { "epoch": 0.8182362301280274, "grad_norm": 0.49067701106302286, "learning_rate": 4.040281157069478e-05, "loss": 0.5308, "step": 28025 }, { "epoch": 0.8183822133983446, "grad_norm": 0.5120971312618192, "learning_rate": 4.040010813733442e-05, "loss": 0.6099, "step": 28030 }, { "epoch": 0.8185281966686617, "grad_norm": 0.49903824553271287, "learning_rate": 4.0397404703974054e-05, "loss": 0.612, "step": 28035 }, { "epoch": 0.818674179938979, "grad_norm": 0.4726547972636938, "learning_rate": 4.039470127061368e-05, "loss": 0.5825, "step": 28040 }, { "epoch": 0.8188201632092962, "grad_norm": 0.49951767505470085, "learning_rate": 4.0391997837253316e-05, "loss": 0.6106, "step": 28045 }, { "epoch": 0.8189661464796134, "grad_norm": 0.49355056478585896, "learning_rate": 4.038929440389295e-05, "loss": 0.6171, "step": 28050 }, { "epoch": 0.8191121297499306, "grad_norm": 0.4776685252098006, "learning_rate": 4.038659097053258e-05, "loss": 0.6027, "step": 28055 }, { "epoch": 0.8192581130202479, "grad_norm": 0.5055914075847829, "learning_rate": 4.038388753717221e-05, "loss": 0.6143, "step": 28060 }, { "epoch": 0.8194040962905651, "grad_norm": 0.4838244000601444, "learning_rate": 4.0381184103811845e-05, "loss": 0.5476, "step": 28065 }, { "epoch": 0.8195500795608823, "grad_norm": 0.458571376947412, "learning_rate": 4.037848067045147e-05, "loss": 0.561, "step": 28070 }, { "epoch": 0.8196960628311996, "grad_norm": 0.5046779825562312, "learning_rate": 4.0375777237091106e-05, "loss": 0.588, "step": 28075 }, { "epoch": 0.8198420461015168, "grad_norm": 0.496715926484546, "learning_rate": 4.037307380373074e-05, "loss": 0.5988, "step": 28080 }, { "epoch": 0.819988029371834, "grad_norm": 0.5059852119660432, "learning_rate": 4.0370370370370374e-05, "loss": 0.5638, "step": 28085 }, { "epoch": 0.8201340126421512, "grad_norm": 0.48935244830015034, "learning_rate": 4.036766693701001e-05, "loss": 0.6154, "step": 28090 }, { "epoch": 0.8202799959124685, "grad_norm": 0.4553183934115889, "learning_rate": 4.0364963503649635e-05, "loss": 0.5915, "step": 28095 }, { "epoch": 0.8204259791827857, "grad_norm": 0.5362246251289265, "learning_rate": 4.036226007028927e-05, "loss": 0.5353, "step": 28100 }, { "epoch": 0.8205719624531028, "grad_norm": 0.4830525697688783, "learning_rate": 4.03595566369289e-05, "loss": 0.5472, "step": 28105 }, { "epoch": 0.82071794572342, "grad_norm": 0.48638367806697314, "learning_rate": 4.035685320356854e-05, "loss": 0.591, "step": 28110 }, { "epoch": 0.8208639289937373, "grad_norm": 0.5661970315234035, "learning_rate": 4.0354149770208165e-05, "loss": 0.5944, "step": 28115 }, { "epoch": 0.8210099122640545, "grad_norm": 0.44859351038202766, "learning_rate": 4.03514463368478e-05, "loss": 0.5685, "step": 28120 }, { "epoch": 0.8211558955343717, "grad_norm": 0.49816504660583816, "learning_rate": 4.034874290348743e-05, "loss": 0.5844, "step": 28125 }, { "epoch": 0.821301878804689, "grad_norm": 0.5484434539677394, "learning_rate": 4.034603947012706e-05, "loss": 0.5945, "step": 28130 }, { "epoch": 0.8214478620750062, "grad_norm": 0.4863401834048787, "learning_rate": 4.0343336036766694e-05, "loss": 0.5737, "step": 28135 }, { "epoch": 0.8215938453453234, "grad_norm": 0.49489422085136053, "learning_rate": 4.034063260340633e-05, "loss": 0.579, "step": 28140 }, { "epoch": 0.8217398286156407, "grad_norm": 0.48270134367472917, "learning_rate": 4.033792917004596e-05, "loss": 0.5531, "step": 28145 }, { "epoch": 0.8218858118859579, "grad_norm": 0.5097704778719525, "learning_rate": 4.0335225736685596e-05, "loss": 0.543, "step": 28150 }, { "epoch": 0.8220317951562751, "grad_norm": 0.5096365786520803, "learning_rate": 4.033252230332522e-05, "loss": 0.5757, "step": 28155 }, { "epoch": 0.8221777784265923, "grad_norm": 0.4683342882821541, "learning_rate": 4.032981886996486e-05, "loss": 0.5698, "step": 28160 }, { "epoch": 0.8223237616969096, "grad_norm": 0.4597986820659409, "learning_rate": 4.032711543660449e-05, "loss": 0.5394, "step": 28165 }, { "epoch": 0.8224697449672268, "grad_norm": 0.46077745789734315, "learning_rate": 4.0324412003244125e-05, "loss": 0.5566, "step": 28170 }, { "epoch": 0.822615728237544, "grad_norm": 0.5069202075733661, "learning_rate": 4.032170856988375e-05, "loss": 0.5993, "step": 28175 }, { "epoch": 0.8227617115078611, "grad_norm": 0.5508630601434928, "learning_rate": 4.0319005136523386e-05, "loss": 0.6078, "step": 28180 }, { "epoch": 0.8229076947781784, "grad_norm": 0.5011387393655753, "learning_rate": 4.031630170316302e-05, "loss": 0.591, "step": 28185 }, { "epoch": 0.8230536780484956, "grad_norm": 0.4948647059368962, "learning_rate": 4.031359826980265e-05, "loss": 0.5843, "step": 28190 }, { "epoch": 0.8231996613188128, "grad_norm": 0.4978244344520603, "learning_rate": 4.031089483644228e-05, "loss": 0.5564, "step": 28195 }, { "epoch": 0.8233456445891301, "grad_norm": 0.519821020791866, "learning_rate": 4.0308191403081916e-05, "loss": 0.5608, "step": 28200 }, { "epoch": 0.8234916278594473, "grad_norm": 0.5425324711454332, "learning_rate": 4.030548796972155e-05, "loss": 0.5764, "step": 28205 }, { "epoch": 0.8236376111297645, "grad_norm": 0.4575986651722688, "learning_rate": 4.0302784536361184e-05, "loss": 0.5461, "step": 28210 }, { "epoch": 0.8237835944000818, "grad_norm": 0.4887986366986915, "learning_rate": 4.030008110300081e-05, "loss": 0.5691, "step": 28215 }, { "epoch": 0.823929577670399, "grad_norm": 0.44825126541133437, "learning_rate": 4.0297377669640445e-05, "loss": 0.5741, "step": 28220 }, { "epoch": 0.8240755609407162, "grad_norm": 0.47405148459741986, "learning_rate": 4.029467423628008e-05, "loss": 0.5512, "step": 28225 }, { "epoch": 0.8242215442110334, "grad_norm": 0.5084099728272499, "learning_rate": 4.0291970802919706e-05, "loss": 0.5911, "step": 28230 }, { "epoch": 0.8243675274813507, "grad_norm": 0.48242199933139807, "learning_rate": 4.028926736955934e-05, "loss": 0.5859, "step": 28235 }, { "epoch": 0.8245135107516679, "grad_norm": 0.5146185570797571, "learning_rate": 4.0286563936198974e-05, "loss": 0.5893, "step": 28240 }, { "epoch": 0.8246594940219851, "grad_norm": 0.49480883379165436, "learning_rate": 4.028386050283861e-05, "loss": 0.5515, "step": 28245 }, { "epoch": 0.8248054772923022, "grad_norm": 0.4931406186255318, "learning_rate": 4.0281157069478235e-05, "loss": 0.5516, "step": 28250 }, { "epoch": 0.8249514605626195, "grad_norm": 0.5049953465629915, "learning_rate": 4.0278453636117876e-05, "loss": 0.5454, "step": 28255 }, { "epoch": 0.8250974438329367, "grad_norm": 0.5980091784903072, "learning_rate": 4.0275750202757504e-05, "loss": 0.5995, "step": 28260 }, { "epoch": 0.8252434271032539, "grad_norm": 0.4883942421893215, "learning_rate": 4.027304676939714e-05, "loss": 0.6232, "step": 28265 }, { "epoch": 0.8253894103735712, "grad_norm": 0.5323940015053302, "learning_rate": 4.027034333603677e-05, "loss": 0.613, "step": 28270 }, { "epoch": 0.8255353936438884, "grad_norm": 0.4829480653561637, "learning_rate": 4.02676399026764e-05, "loss": 0.5682, "step": 28275 }, { "epoch": 0.8256813769142056, "grad_norm": 0.482327042630906, "learning_rate": 4.026493646931603e-05, "loss": 0.6021, "step": 28280 }, { "epoch": 0.8258273601845229, "grad_norm": 0.5026177230268795, "learning_rate": 4.026223303595567e-05, "loss": 0.5731, "step": 28285 }, { "epoch": 0.8259733434548401, "grad_norm": 0.5055215910951167, "learning_rate": 4.0259529602595294e-05, "loss": 0.6049, "step": 28290 }, { "epoch": 0.8261193267251573, "grad_norm": 0.4770702751232095, "learning_rate": 4.025682616923493e-05, "loss": 0.5858, "step": 28295 }, { "epoch": 0.8262653099954745, "grad_norm": 0.5050043065572174, "learning_rate": 4.025412273587456e-05, "loss": 0.5861, "step": 28300 }, { "epoch": 0.8264112932657918, "grad_norm": 0.42049294556749484, "learning_rate": 4.0251419302514196e-05, "loss": 0.551, "step": 28305 }, { "epoch": 0.826557276536109, "grad_norm": 0.47808267435968993, "learning_rate": 4.024871586915382e-05, "loss": 0.5583, "step": 28310 }, { "epoch": 0.8267032598064262, "grad_norm": 0.4423887277496576, "learning_rate": 4.0246012435793464e-05, "loss": 0.5476, "step": 28315 }, { "epoch": 0.8268492430767435, "grad_norm": 0.4876969490185113, "learning_rate": 4.024330900243309e-05, "loss": 0.5619, "step": 28320 }, { "epoch": 0.8269952263470606, "grad_norm": 0.47463443093095375, "learning_rate": 4.0240605569072725e-05, "loss": 0.5842, "step": 28325 }, { "epoch": 0.8271412096173778, "grad_norm": 0.4424815185795845, "learning_rate": 4.023790213571236e-05, "loss": 0.564, "step": 28330 }, { "epoch": 0.827287192887695, "grad_norm": 0.488433056638473, "learning_rate": 4.0235198702351987e-05, "loss": 0.614, "step": 28335 }, { "epoch": 0.8274331761580123, "grad_norm": 0.4872649705643328, "learning_rate": 4.023249526899162e-05, "loss": 0.608, "step": 28340 }, { "epoch": 0.8275791594283295, "grad_norm": 0.46451792985037, "learning_rate": 4.0229791835631255e-05, "loss": 0.5644, "step": 28345 }, { "epoch": 0.8277251426986467, "grad_norm": 0.4668067217135776, "learning_rate": 4.022708840227088e-05, "loss": 0.5688, "step": 28350 }, { "epoch": 0.827871125968964, "grad_norm": 0.4869580212634317, "learning_rate": 4.0224384968910516e-05, "loss": 0.5891, "step": 28355 }, { "epoch": 0.8280171092392812, "grad_norm": 0.4758925963191965, "learning_rate": 4.022168153555015e-05, "loss": 0.5851, "step": 28360 }, { "epoch": 0.8281630925095984, "grad_norm": 0.47927803841297734, "learning_rate": 4.021897810218978e-05, "loss": 0.572, "step": 28365 }, { "epoch": 0.8283090757799156, "grad_norm": 0.47518211544440736, "learning_rate": 4.021627466882942e-05, "loss": 0.5586, "step": 28370 }, { "epoch": 0.8284550590502329, "grad_norm": 0.5161481934114257, "learning_rate": 4.021357123546905e-05, "loss": 0.6155, "step": 28375 }, { "epoch": 0.8286010423205501, "grad_norm": 0.500711310409762, "learning_rate": 4.021086780210868e-05, "loss": 0.6218, "step": 28380 }, { "epoch": 0.8287470255908673, "grad_norm": 0.4960431115131775, "learning_rate": 4.020816436874831e-05, "loss": 0.6062, "step": 28385 }, { "epoch": 0.8288930088611846, "grad_norm": 0.4725831580246436, "learning_rate": 4.020546093538795e-05, "loss": 0.6059, "step": 28390 }, { "epoch": 0.8290389921315018, "grad_norm": 0.4636209576139614, "learning_rate": 4.0202757502027574e-05, "loss": 0.5741, "step": 28395 }, { "epoch": 0.8291849754018189, "grad_norm": 0.4594204655756444, "learning_rate": 4.020005406866721e-05, "loss": 0.5623, "step": 28400 }, { "epoch": 0.8293309586721361, "grad_norm": 0.4727660493446217, "learning_rate": 4.019735063530684e-05, "loss": 0.5936, "step": 28405 }, { "epoch": 0.8294769419424534, "grad_norm": 0.4897824528336723, "learning_rate": 4.019464720194647e-05, "loss": 0.5822, "step": 28410 }, { "epoch": 0.8296229252127706, "grad_norm": 0.4788980082760175, "learning_rate": 4.0191943768586104e-05, "loss": 0.5553, "step": 28415 }, { "epoch": 0.8297689084830878, "grad_norm": 0.48767643145282247, "learning_rate": 4.018924033522574e-05, "loss": 0.6034, "step": 28420 }, { "epoch": 0.829914891753405, "grad_norm": 0.44535234047926264, "learning_rate": 4.018653690186537e-05, "loss": 0.5456, "step": 28425 }, { "epoch": 0.8300608750237223, "grad_norm": 1.477690240917796, "learning_rate": 4.0183833468505006e-05, "loss": 0.5997, "step": 28430 }, { "epoch": 0.8302068582940395, "grad_norm": 0.5185438796986946, "learning_rate": 4.018113003514464e-05, "loss": 0.5961, "step": 28435 }, { "epoch": 0.8303528415643567, "grad_norm": 0.45319739941593695, "learning_rate": 4.017842660178427e-05, "loss": 0.5527, "step": 28440 }, { "epoch": 0.830498824834674, "grad_norm": 0.5232760379763035, "learning_rate": 4.01757231684239e-05, "loss": 0.5893, "step": 28445 }, { "epoch": 0.8306448081049912, "grad_norm": 0.5165059095855616, "learning_rate": 4.0173019735063535e-05, "loss": 0.5876, "step": 28450 }, { "epoch": 0.8307907913753084, "grad_norm": 0.4698510551426366, "learning_rate": 4.017031630170316e-05, "loss": 0.5751, "step": 28455 }, { "epoch": 0.8309367746456257, "grad_norm": 0.4727987386518304, "learning_rate": 4.0167612868342796e-05, "loss": 0.5849, "step": 28460 }, { "epoch": 0.8310827579159429, "grad_norm": 0.4534023140460517, "learning_rate": 4.016490943498243e-05, "loss": 0.5399, "step": 28465 }, { "epoch": 0.83122874118626, "grad_norm": 0.46162603271224456, "learning_rate": 4.016220600162206e-05, "loss": 0.5533, "step": 28470 }, { "epoch": 0.8313747244565772, "grad_norm": 0.5072357107360351, "learning_rate": 4.015950256826169e-05, "loss": 0.5789, "step": 28475 }, { "epoch": 0.8315207077268945, "grad_norm": 0.48389983141379556, "learning_rate": 4.0156799134901325e-05, "loss": 0.5791, "step": 28480 }, { "epoch": 0.8316666909972117, "grad_norm": 0.4487272080867481, "learning_rate": 4.015409570154096e-05, "loss": 0.5486, "step": 28485 }, { "epoch": 0.8318126742675289, "grad_norm": 0.4742215668277903, "learning_rate": 4.0151392268180594e-05, "loss": 0.6027, "step": 28490 }, { "epoch": 0.8319586575378461, "grad_norm": 0.47493481722349845, "learning_rate": 4.014868883482023e-05, "loss": 0.5917, "step": 28495 }, { "epoch": 0.8321046408081634, "grad_norm": 0.5143882839833381, "learning_rate": 4.0145985401459855e-05, "loss": 0.5905, "step": 28500 }, { "epoch": 0.8322506240784806, "grad_norm": 0.5211391328064631, "learning_rate": 4.014328196809949e-05, "loss": 0.6079, "step": 28505 }, { "epoch": 0.8323966073487978, "grad_norm": 0.49054968799062265, "learning_rate": 4.014057853473912e-05, "loss": 0.5884, "step": 28510 }, { "epoch": 0.8325425906191151, "grad_norm": 0.47995004693327925, "learning_rate": 4.013787510137875e-05, "loss": 0.5644, "step": 28515 }, { "epoch": 0.8326885738894323, "grad_norm": 0.4598170571573396, "learning_rate": 4.0135171668018384e-05, "loss": 0.5546, "step": 28520 }, { "epoch": 0.8328345571597495, "grad_norm": 0.5439797782623154, "learning_rate": 4.013246823465802e-05, "loss": 0.6107, "step": 28525 }, { "epoch": 0.8329805404300668, "grad_norm": 0.47024531229892264, "learning_rate": 4.0129764801297645e-05, "loss": 0.5962, "step": 28530 }, { "epoch": 0.833126523700384, "grad_norm": 0.4780314657043609, "learning_rate": 4.012706136793728e-05, "loss": 0.5974, "step": 28535 }, { "epoch": 0.8332725069707012, "grad_norm": 0.43860918820395733, "learning_rate": 4.012435793457692e-05, "loss": 0.5978, "step": 28540 }, { "epoch": 0.8334184902410183, "grad_norm": 0.46616203517008364, "learning_rate": 4.012165450121655e-05, "loss": 0.5519, "step": 28545 }, { "epoch": 0.8335644735113356, "grad_norm": 0.45337614553397754, "learning_rate": 4.011895106785618e-05, "loss": 0.5746, "step": 28550 }, { "epoch": 0.8337104567816528, "grad_norm": 0.4890864174467048, "learning_rate": 4.0116247634495815e-05, "loss": 0.5962, "step": 28555 }, { "epoch": 0.83385644005197, "grad_norm": 0.4854832763109584, "learning_rate": 4.011354420113544e-05, "loss": 0.5831, "step": 28560 }, { "epoch": 0.8340024233222872, "grad_norm": 0.4628858235066753, "learning_rate": 4.0110840767775077e-05, "loss": 0.5608, "step": 28565 }, { "epoch": 0.8341484065926045, "grad_norm": 0.4845061104051395, "learning_rate": 4.010813733441471e-05, "loss": 0.5702, "step": 28570 }, { "epoch": 0.8342943898629217, "grad_norm": 0.5101556314605219, "learning_rate": 4.010543390105434e-05, "loss": 0.596, "step": 28575 }, { "epoch": 0.8344403731332389, "grad_norm": 0.531579493938886, "learning_rate": 4.010273046769397e-05, "loss": 0.5802, "step": 28580 }, { "epoch": 0.8345863564035562, "grad_norm": 0.4184340525077409, "learning_rate": 4.0100027034333606e-05, "loss": 0.556, "step": 28585 }, { "epoch": 0.8347323396738734, "grad_norm": 0.5075698944080029, "learning_rate": 4.009732360097323e-05, "loss": 0.5959, "step": 28590 }, { "epoch": 0.8348783229441906, "grad_norm": 0.5022786570430575, "learning_rate": 4.0094620167612874e-05, "loss": 0.5881, "step": 28595 }, { "epoch": 0.8350243062145078, "grad_norm": 0.46775578247712946, "learning_rate": 4.009191673425251e-05, "loss": 0.5941, "step": 28600 }, { "epoch": 0.8351702894848251, "grad_norm": 0.4645395541290761, "learning_rate": 4.0089213300892135e-05, "loss": 0.5392, "step": 28605 }, { "epoch": 0.8353162727551423, "grad_norm": 0.5115038202642025, "learning_rate": 4.008650986753177e-05, "loss": 0.5835, "step": 28610 }, { "epoch": 0.8354622560254595, "grad_norm": 0.47925039797795804, "learning_rate": 4.00838064341714e-05, "loss": 0.5812, "step": 28615 }, { "epoch": 0.8356082392957767, "grad_norm": 0.4602598883191342, "learning_rate": 4.008110300081103e-05, "loss": 0.5782, "step": 28620 }, { "epoch": 0.8357542225660939, "grad_norm": 0.47513331826025956, "learning_rate": 4.0078399567450664e-05, "loss": 0.5673, "step": 28625 }, { "epoch": 0.8359002058364111, "grad_norm": 0.44945437519847115, "learning_rate": 4.00756961340903e-05, "loss": 0.581, "step": 28630 }, { "epoch": 0.8360461891067283, "grad_norm": 0.48201616808257935, "learning_rate": 4.0072992700729926e-05, "loss": 0.5963, "step": 28635 }, { "epoch": 0.8361921723770456, "grad_norm": 0.4861621856547835, "learning_rate": 4.007028926736956e-05, "loss": 0.5493, "step": 28640 }, { "epoch": 0.8363381556473628, "grad_norm": 0.4996218155772707, "learning_rate": 4.0067585834009194e-05, "loss": 0.5766, "step": 28645 }, { "epoch": 0.83648413891768, "grad_norm": 0.47771943491258284, "learning_rate": 4.006488240064882e-05, "loss": 0.5506, "step": 28650 }, { "epoch": 0.8366301221879973, "grad_norm": 0.47115949893330056, "learning_rate": 4.006217896728846e-05, "loss": 0.5866, "step": 28655 }, { "epoch": 0.8367761054583145, "grad_norm": 0.49203788968476053, "learning_rate": 4.0059475533928096e-05, "loss": 0.5567, "step": 28660 }, { "epoch": 0.8369220887286317, "grad_norm": 0.5849812790043319, "learning_rate": 4.005677210056772e-05, "loss": 0.5855, "step": 28665 }, { "epoch": 0.837068071998949, "grad_norm": 0.5029016304573511, "learning_rate": 4.005406866720736e-05, "loss": 0.5712, "step": 28670 }, { "epoch": 0.8372140552692662, "grad_norm": 0.4820269965501686, "learning_rate": 4.005136523384699e-05, "loss": 0.5547, "step": 28675 }, { "epoch": 0.8373600385395834, "grad_norm": 0.4534829208396593, "learning_rate": 4.004866180048662e-05, "loss": 0.5733, "step": 28680 }, { "epoch": 0.8375060218099006, "grad_norm": 0.47848503636329504, "learning_rate": 4.004595836712625e-05, "loss": 0.5777, "step": 28685 }, { "epoch": 0.8376520050802178, "grad_norm": 0.5302090052617993, "learning_rate": 4.0043254933765886e-05, "loss": 0.6094, "step": 28690 }, { "epoch": 0.837797988350535, "grad_norm": 0.4816471396648606, "learning_rate": 4.0040551500405513e-05, "loss": 0.5594, "step": 28695 }, { "epoch": 0.8379439716208522, "grad_norm": 0.42073525903643866, "learning_rate": 4.003784806704515e-05, "loss": 0.5596, "step": 28700 }, { "epoch": 0.8380899548911694, "grad_norm": 0.4817776562823615, "learning_rate": 4.003514463368478e-05, "loss": 0.5678, "step": 28705 }, { "epoch": 0.8382359381614867, "grad_norm": 0.4944204247253353, "learning_rate": 4.0032441200324415e-05, "loss": 0.5816, "step": 28710 }, { "epoch": 0.8383819214318039, "grad_norm": 0.49147084464023205, "learning_rate": 4.002973776696405e-05, "loss": 0.563, "step": 28715 }, { "epoch": 0.8385279047021211, "grad_norm": 0.48928264130355564, "learning_rate": 4.0027034333603683e-05, "loss": 0.5682, "step": 28720 }, { "epoch": 0.8386738879724384, "grad_norm": 0.49452909296346276, "learning_rate": 4.002433090024331e-05, "loss": 0.5872, "step": 28725 }, { "epoch": 0.8388198712427556, "grad_norm": 0.46062850718905973, "learning_rate": 4.0021627466882945e-05, "loss": 0.5946, "step": 28730 }, { "epoch": 0.8389658545130728, "grad_norm": 0.5204975289278757, "learning_rate": 4.001892403352258e-05, "loss": 0.5682, "step": 28735 }, { "epoch": 0.83911183778339, "grad_norm": 0.49850617109475504, "learning_rate": 4.0016220600162206e-05, "loss": 0.583, "step": 28740 }, { "epoch": 0.8392578210537073, "grad_norm": 0.5007266327313666, "learning_rate": 4.001351716680184e-05, "loss": 0.5727, "step": 28745 }, { "epoch": 0.8394038043240245, "grad_norm": 0.48991777220682425, "learning_rate": 4.0010813733441474e-05, "loss": 0.5687, "step": 28750 }, { "epoch": 0.8395497875943417, "grad_norm": 0.5095062288693117, "learning_rate": 4.00081103000811e-05, "loss": 0.5764, "step": 28755 }, { "epoch": 0.839695770864659, "grad_norm": 0.4695184127943562, "learning_rate": 4.0005406866720735e-05, "loss": 0.6006, "step": 28760 }, { "epoch": 0.8398417541349761, "grad_norm": 0.5105447419387168, "learning_rate": 4.000270343336037e-05, "loss": 0.5978, "step": 28765 }, { "epoch": 0.8399877374052933, "grad_norm": 0.47984227370997545, "learning_rate": 4e-05, "loss": 0.5886, "step": 28770 }, { "epoch": 0.8401337206756105, "grad_norm": 0.5420748312706574, "learning_rate": 3.999729656663964e-05, "loss": 0.5725, "step": 28775 }, { "epoch": 0.8402797039459278, "grad_norm": 0.5435884196004735, "learning_rate": 3.9994593133279265e-05, "loss": 0.6341, "step": 28780 }, { "epoch": 0.840425687216245, "grad_norm": 0.4460806319695271, "learning_rate": 3.99918896999189e-05, "loss": 0.5447, "step": 28785 }, { "epoch": 0.8405716704865622, "grad_norm": 0.49278163456801655, "learning_rate": 3.998918626655853e-05, "loss": 0.563, "step": 28790 }, { "epoch": 0.8407176537568795, "grad_norm": 0.47735160263882753, "learning_rate": 3.9986482833198167e-05, "loss": 0.6084, "step": 28795 }, { "epoch": 0.8408636370271967, "grad_norm": 0.5397652708752317, "learning_rate": 3.9983779399837794e-05, "loss": 0.5829, "step": 28800 }, { "epoch": 0.8410096202975139, "grad_norm": 0.47678683901748564, "learning_rate": 3.998107596647743e-05, "loss": 0.5588, "step": 28805 }, { "epoch": 0.8411556035678311, "grad_norm": 0.4509283316858051, "learning_rate": 3.997837253311706e-05, "loss": 0.5558, "step": 28810 }, { "epoch": 0.8413015868381484, "grad_norm": 0.47145078491161774, "learning_rate": 3.997566909975669e-05, "loss": 0.5842, "step": 28815 }, { "epoch": 0.8414475701084656, "grad_norm": 0.5087743641455881, "learning_rate": 3.997296566639632e-05, "loss": 0.5749, "step": 28820 }, { "epoch": 0.8415935533787828, "grad_norm": 0.5078241643639, "learning_rate": 3.997026223303596e-05, "loss": 0.5514, "step": 28825 }, { "epoch": 0.8417395366491001, "grad_norm": 0.4540983029952141, "learning_rate": 3.996755879967559e-05, "loss": 0.5784, "step": 28830 }, { "epoch": 0.8418855199194172, "grad_norm": 0.4800275997477469, "learning_rate": 3.9964855366315225e-05, "loss": 0.5997, "step": 28835 }, { "epoch": 0.8420315031897344, "grad_norm": 0.5343309388880075, "learning_rate": 3.996215193295485e-05, "loss": 0.6007, "step": 28840 }, { "epoch": 0.8421774864600516, "grad_norm": 0.49565754760944764, "learning_rate": 3.9959448499594486e-05, "loss": 0.5634, "step": 28845 }, { "epoch": 0.8423234697303689, "grad_norm": 0.4621942332202745, "learning_rate": 3.995674506623412e-05, "loss": 0.575, "step": 28850 }, { "epoch": 0.8424694530006861, "grad_norm": 0.49414737376211626, "learning_rate": 3.9954041632873754e-05, "loss": 0.6001, "step": 28855 }, { "epoch": 0.8426154362710033, "grad_norm": 0.4353945309176783, "learning_rate": 3.995133819951338e-05, "loss": 0.5802, "step": 28860 }, { "epoch": 0.8427614195413206, "grad_norm": 0.5047653516154431, "learning_rate": 3.9948634766153016e-05, "loss": 0.5784, "step": 28865 }, { "epoch": 0.8429074028116378, "grad_norm": 0.46641238729285783, "learning_rate": 3.994593133279265e-05, "loss": 0.5768, "step": 28870 }, { "epoch": 0.843053386081955, "grad_norm": 0.46218809855198356, "learning_rate": 3.994322789943228e-05, "loss": 0.5533, "step": 28875 }, { "epoch": 0.8431993693522722, "grad_norm": 0.48130758807819296, "learning_rate": 3.994052446607192e-05, "loss": 0.5584, "step": 28880 }, { "epoch": 0.8433453526225895, "grad_norm": 0.49426571718676215, "learning_rate": 3.9937821032711545e-05, "loss": 0.5795, "step": 28885 }, { "epoch": 0.8434913358929067, "grad_norm": 0.4699900223782631, "learning_rate": 3.993511759935118e-05, "loss": 0.5988, "step": 28890 }, { "epoch": 0.8436373191632239, "grad_norm": 0.5180702947497549, "learning_rate": 3.993241416599081e-05, "loss": 0.5821, "step": 28895 }, { "epoch": 0.8437833024335412, "grad_norm": 0.4886346143709685, "learning_rate": 3.992971073263044e-05, "loss": 0.5756, "step": 28900 }, { "epoch": 0.8439292857038584, "grad_norm": 0.4384612732832848, "learning_rate": 3.9927007299270074e-05, "loss": 0.562, "step": 28905 }, { "epoch": 0.8440752689741755, "grad_norm": 0.4940959203004717, "learning_rate": 3.992430386590971e-05, "loss": 0.5654, "step": 28910 }, { "epoch": 0.8442212522444927, "grad_norm": 0.48681473209806075, "learning_rate": 3.9921600432549335e-05, "loss": 0.5852, "step": 28915 }, { "epoch": 0.84436723551481, "grad_norm": 0.4773426461725239, "learning_rate": 3.991889699918897e-05, "loss": 0.5838, "step": 28920 }, { "epoch": 0.8445132187851272, "grad_norm": 0.4479496639655724, "learning_rate": 3.9916193565828603e-05, "loss": 0.6067, "step": 28925 }, { "epoch": 0.8446592020554444, "grad_norm": 0.48625563282707973, "learning_rate": 3.991349013246824e-05, "loss": 0.5652, "step": 28930 }, { "epoch": 0.8448051853257617, "grad_norm": 0.49561450914788097, "learning_rate": 3.991078669910787e-05, "loss": 0.5818, "step": 28935 }, { "epoch": 0.8449511685960789, "grad_norm": 0.46027743290311807, "learning_rate": 3.9908083265747505e-05, "loss": 0.5877, "step": 28940 }, { "epoch": 0.8450971518663961, "grad_norm": 0.43028119493990774, "learning_rate": 3.990537983238713e-05, "loss": 0.5615, "step": 28945 }, { "epoch": 0.8452431351367133, "grad_norm": 0.4709884322951424, "learning_rate": 3.990267639902677e-05, "loss": 0.5844, "step": 28950 }, { "epoch": 0.8453891184070306, "grad_norm": 0.4623991773284739, "learning_rate": 3.98999729656664e-05, "loss": 0.5954, "step": 28955 }, { "epoch": 0.8455351016773478, "grad_norm": 0.45729001398985203, "learning_rate": 3.989726953230603e-05, "loss": 0.5875, "step": 28960 }, { "epoch": 0.845681084947665, "grad_norm": 0.5242850895937472, "learning_rate": 3.989456609894566e-05, "loss": 0.5761, "step": 28965 }, { "epoch": 0.8458270682179823, "grad_norm": 0.4972458574084099, "learning_rate": 3.9891862665585296e-05, "loss": 0.5677, "step": 28970 }, { "epoch": 0.8459730514882995, "grad_norm": 0.47018957883842455, "learning_rate": 3.988915923222492e-05, "loss": 0.5664, "step": 28975 }, { "epoch": 0.8461190347586167, "grad_norm": 0.4802711660179394, "learning_rate": 3.988645579886456e-05, "loss": 0.5717, "step": 28980 }, { "epoch": 0.8462650180289338, "grad_norm": 0.43314552735303663, "learning_rate": 3.988375236550419e-05, "loss": 0.5934, "step": 28985 }, { "epoch": 0.8464110012992511, "grad_norm": 0.5213277200300156, "learning_rate": 3.9881048932143825e-05, "loss": 0.6198, "step": 28990 }, { "epoch": 0.8465569845695683, "grad_norm": 0.4673204739031356, "learning_rate": 3.987834549878346e-05, "loss": 0.5894, "step": 28995 }, { "epoch": 0.8467029678398855, "grad_norm": 0.48777622244548285, "learning_rate": 3.987564206542309e-05, "loss": 0.5569, "step": 29000 }, { "epoch": 0.8468489511102028, "grad_norm": 0.4894743995771581, "learning_rate": 3.987293863206272e-05, "loss": 0.5768, "step": 29005 }, { "epoch": 0.84699493438052, "grad_norm": 0.47451815338510334, "learning_rate": 3.9870235198702355e-05, "loss": 0.5715, "step": 29010 }, { "epoch": 0.8471409176508372, "grad_norm": 0.49184042893703533, "learning_rate": 3.986753176534199e-05, "loss": 0.5876, "step": 29015 }, { "epoch": 0.8472869009211544, "grad_norm": 0.4451807712610259, "learning_rate": 3.9864828331981616e-05, "loss": 0.544, "step": 29020 }, { "epoch": 0.8474328841914717, "grad_norm": 0.47808611994511996, "learning_rate": 3.986212489862125e-05, "loss": 0.5653, "step": 29025 }, { "epoch": 0.8475788674617889, "grad_norm": 0.48412079966260424, "learning_rate": 3.9859421465260884e-05, "loss": 0.5665, "step": 29030 }, { "epoch": 0.8477248507321061, "grad_norm": 0.46720827140534293, "learning_rate": 3.985671803190051e-05, "loss": 0.5528, "step": 29035 }, { "epoch": 0.8478708340024234, "grad_norm": 0.48453730814738133, "learning_rate": 3.9854014598540145e-05, "loss": 0.5933, "step": 29040 }, { "epoch": 0.8480168172727406, "grad_norm": 0.537756533322573, "learning_rate": 3.985131116517978e-05, "loss": 0.6109, "step": 29045 }, { "epoch": 0.8481628005430578, "grad_norm": 0.5100555888599442, "learning_rate": 3.984860773181941e-05, "loss": 0.5625, "step": 29050 }, { "epoch": 0.8483087838133749, "grad_norm": 0.4932250585903912, "learning_rate": 3.984590429845905e-05, "loss": 0.5553, "step": 29055 }, { "epoch": 0.8484547670836922, "grad_norm": 0.49335396198457554, "learning_rate": 3.984320086509868e-05, "loss": 0.561, "step": 29060 }, { "epoch": 0.8486007503540094, "grad_norm": 0.4758045753385531, "learning_rate": 3.984049743173831e-05, "loss": 0.5874, "step": 29065 }, { "epoch": 0.8487467336243266, "grad_norm": 0.4840358108990501, "learning_rate": 3.983779399837794e-05, "loss": 0.57, "step": 29070 }, { "epoch": 0.8488927168946439, "grad_norm": 0.49668804231686126, "learning_rate": 3.9835090565017576e-05, "loss": 0.571, "step": 29075 }, { "epoch": 0.8490387001649611, "grad_norm": 0.49516189717072756, "learning_rate": 3.9832387131657204e-05, "loss": 0.6088, "step": 29080 }, { "epoch": 0.8491846834352783, "grad_norm": 0.47826120218971446, "learning_rate": 3.982968369829684e-05, "loss": 0.5757, "step": 29085 }, { "epoch": 0.8493306667055955, "grad_norm": 0.49273270739439934, "learning_rate": 3.982698026493647e-05, "loss": 0.5346, "step": 29090 }, { "epoch": 0.8494766499759128, "grad_norm": 0.5091851803325381, "learning_rate": 3.98242768315761e-05, "loss": 0.59, "step": 29095 }, { "epoch": 0.84962263324623, "grad_norm": 0.47504373874571887, "learning_rate": 3.982157339821573e-05, "loss": 0.6078, "step": 29100 }, { "epoch": 0.8497686165165472, "grad_norm": 0.5666104280000597, "learning_rate": 3.9818869964855374e-05, "loss": 0.5785, "step": 29105 }, { "epoch": 0.8499145997868645, "grad_norm": 0.4848194577527432, "learning_rate": 3.9816166531495e-05, "loss": 0.6288, "step": 29110 }, { "epoch": 0.8500605830571817, "grad_norm": 0.4828223141035297, "learning_rate": 3.9813463098134635e-05, "loss": 0.5861, "step": 29115 }, { "epoch": 0.8502065663274989, "grad_norm": 0.5032652009009405, "learning_rate": 3.981075966477427e-05, "loss": 0.5816, "step": 29120 }, { "epoch": 0.8503525495978161, "grad_norm": 0.48825931040560716, "learning_rate": 3.9808056231413896e-05, "loss": 0.556, "step": 29125 }, { "epoch": 0.8504985328681333, "grad_norm": 0.5265536200355085, "learning_rate": 3.980535279805353e-05, "loss": 0.5928, "step": 29130 }, { "epoch": 0.8506445161384505, "grad_norm": 0.4818517325752457, "learning_rate": 3.9802649364693164e-05, "loss": 0.5907, "step": 29135 }, { "epoch": 0.8507904994087677, "grad_norm": 0.4218179756047507, "learning_rate": 3.979994593133279e-05, "loss": 0.5486, "step": 29140 }, { "epoch": 0.850936482679085, "grad_norm": 0.42838005828384296, "learning_rate": 3.9797242497972425e-05, "loss": 0.5642, "step": 29145 }, { "epoch": 0.8510824659494022, "grad_norm": 0.5126316029104664, "learning_rate": 3.979453906461206e-05, "loss": 0.6213, "step": 29150 }, { "epoch": 0.8512284492197194, "grad_norm": 0.49063308005929235, "learning_rate": 3.979183563125169e-05, "loss": 0.6027, "step": 29155 }, { "epoch": 0.8513744324900366, "grad_norm": 0.4459030159654386, "learning_rate": 3.978913219789132e-05, "loss": 0.5551, "step": 29160 }, { "epoch": 0.8515204157603539, "grad_norm": 0.48208950695530767, "learning_rate": 3.978642876453096e-05, "loss": 0.5729, "step": 29165 }, { "epoch": 0.8516663990306711, "grad_norm": 0.437678061578468, "learning_rate": 3.978372533117059e-05, "loss": 0.5578, "step": 29170 }, { "epoch": 0.8518123823009883, "grad_norm": 0.4540459575625549, "learning_rate": 3.978102189781022e-05, "loss": 0.5622, "step": 29175 }, { "epoch": 0.8519583655713056, "grad_norm": 0.48896598909130107, "learning_rate": 3.977831846444986e-05, "loss": 0.5726, "step": 29180 }, { "epoch": 0.8521043488416228, "grad_norm": 0.47622854550701205, "learning_rate": 3.9775615031089484e-05, "loss": 0.5667, "step": 29185 }, { "epoch": 0.85225033211194, "grad_norm": 0.48830065719213256, "learning_rate": 3.977291159772912e-05, "loss": 0.5671, "step": 29190 }, { "epoch": 0.8523963153822572, "grad_norm": 0.4760116110582343, "learning_rate": 3.977020816436875e-05, "loss": 0.571, "step": 29195 }, { "epoch": 0.8525422986525744, "grad_norm": 0.46411741953484137, "learning_rate": 3.976750473100838e-05, "loss": 0.5688, "step": 29200 }, { "epoch": 0.8526882819228916, "grad_norm": 0.4609446618567447, "learning_rate": 3.976480129764801e-05, "loss": 0.5981, "step": 29205 }, { "epoch": 0.8528342651932088, "grad_norm": 0.47618057798518754, "learning_rate": 3.976209786428765e-05, "loss": 0.5661, "step": 29210 }, { "epoch": 0.852980248463526, "grad_norm": 0.5294284051944449, "learning_rate": 3.9759394430927274e-05, "loss": 0.5266, "step": 29215 }, { "epoch": 0.8531262317338433, "grad_norm": 0.4709709042645848, "learning_rate": 3.9756690997566915e-05, "loss": 0.5829, "step": 29220 }, { "epoch": 0.8532722150041605, "grad_norm": 0.4923253603062297, "learning_rate": 3.975398756420655e-05, "loss": 0.5634, "step": 29225 }, { "epoch": 0.8534181982744777, "grad_norm": 0.5408822831703869, "learning_rate": 3.9751284130846176e-05, "loss": 0.602, "step": 29230 }, { "epoch": 0.853564181544795, "grad_norm": 0.4802028720245997, "learning_rate": 3.974858069748581e-05, "loss": 0.6177, "step": 29235 }, { "epoch": 0.8537101648151122, "grad_norm": 0.501306476892069, "learning_rate": 3.9745877264125444e-05, "loss": 0.5956, "step": 29240 }, { "epoch": 0.8538561480854294, "grad_norm": 0.46561447801585865, "learning_rate": 3.974317383076507e-05, "loss": 0.5619, "step": 29245 }, { "epoch": 0.8540021313557467, "grad_norm": 0.4344703546430763, "learning_rate": 3.9740470397404706e-05, "loss": 0.5776, "step": 29250 }, { "epoch": 0.8541481146260639, "grad_norm": 0.49409670237128545, "learning_rate": 3.973776696404434e-05, "loss": 0.6024, "step": 29255 }, { "epoch": 0.8542940978963811, "grad_norm": 0.46440335197093724, "learning_rate": 3.973506353068397e-05, "loss": 0.5662, "step": 29260 }, { "epoch": 0.8544400811666983, "grad_norm": 0.4732069311815957, "learning_rate": 3.97323600973236e-05, "loss": 0.5625, "step": 29265 }, { "epoch": 0.8545860644370156, "grad_norm": 0.47768378621576996, "learning_rate": 3.9729656663963235e-05, "loss": 0.5673, "step": 29270 }, { "epoch": 0.8547320477073327, "grad_norm": 0.4870415567120779, "learning_rate": 3.972695323060287e-05, "loss": 0.5571, "step": 29275 }, { "epoch": 0.8548780309776499, "grad_norm": 0.49127319040271106, "learning_rate": 3.97242497972425e-05, "loss": 0.5905, "step": 29280 }, { "epoch": 0.8550240142479671, "grad_norm": 0.4786414575473011, "learning_rate": 3.972154636388214e-05, "loss": 0.5351, "step": 29285 }, { "epoch": 0.8551699975182844, "grad_norm": 0.4986470012854601, "learning_rate": 3.9718842930521764e-05, "loss": 0.6119, "step": 29290 }, { "epoch": 0.8553159807886016, "grad_norm": 0.48274076434108615, "learning_rate": 3.97161394971614e-05, "loss": 0.5856, "step": 29295 }, { "epoch": 0.8554619640589188, "grad_norm": 0.482189577275938, "learning_rate": 3.971343606380103e-05, "loss": 0.5868, "step": 29300 }, { "epoch": 0.8556079473292361, "grad_norm": 0.4869206571715154, "learning_rate": 3.971073263044066e-05, "loss": 0.5923, "step": 29305 }, { "epoch": 0.8557539305995533, "grad_norm": 0.4761358063317685, "learning_rate": 3.9708029197080294e-05, "loss": 0.6073, "step": 29310 }, { "epoch": 0.8558999138698705, "grad_norm": 0.5284205171245578, "learning_rate": 3.970532576371993e-05, "loss": 0.6114, "step": 29315 }, { "epoch": 0.8560458971401878, "grad_norm": 0.4492458771170567, "learning_rate": 3.9702622330359555e-05, "loss": 0.5827, "step": 29320 }, { "epoch": 0.856191880410505, "grad_norm": 0.42862928887156565, "learning_rate": 3.969991889699919e-05, "loss": 0.5495, "step": 29325 }, { "epoch": 0.8563378636808222, "grad_norm": 0.48553975615901396, "learning_rate": 3.969721546363882e-05, "loss": 0.562, "step": 29330 }, { "epoch": 0.8564838469511394, "grad_norm": 0.5198395620138435, "learning_rate": 3.969451203027846e-05, "loss": 0.589, "step": 29335 }, { "epoch": 0.8566298302214567, "grad_norm": 0.4615990463448464, "learning_rate": 3.969180859691809e-05, "loss": 0.6016, "step": 29340 }, { "epoch": 0.8567758134917739, "grad_norm": 0.4360417743361931, "learning_rate": 3.9689105163557725e-05, "loss": 0.5728, "step": 29345 }, { "epoch": 0.856921796762091, "grad_norm": 0.5378959343848485, "learning_rate": 3.968640173019735e-05, "loss": 0.5807, "step": 29350 }, { "epoch": 0.8570677800324082, "grad_norm": 0.453234870034994, "learning_rate": 3.9683698296836986e-05, "loss": 0.565, "step": 29355 }, { "epoch": 0.8572137633027255, "grad_norm": 0.478396209675199, "learning_rate": 3.968099486347662e-05, "loss": 0.5629, "step": 29360 }, { "epoch": 0.8573597465730427, "grad_norm": 0.4368587056938086, "learning_rate": 3.967829143011625e-05, "loss": 0.5604, "step": 29365 }, { "epoch": 0.8575057298433599, "grad_norm": 0.47029627443550714, "learning_rate": 3.967558799675588e-05, "loss": 0.5753, "step": 29370 }, { "epoch": 0.8576517131136772, "grad_norm": 0.5236954515213528, "learning_rate": 3.9672884563395515e-05, "loss": 0.5917, "step": 29375 }, { "epoch": 0.8577976963839944, "grad_norm": 0.4761929106244914, "learning_rate": 3.967018113003514e-05, "loss": 0.5751, "step": 29380 }, { "epoch": 0.8579436796543116, "grad_norm": 0.49935885721267, "learning_rate": 3.9667477696674777e-05, "loss": 0.6035, "step": 29385 }, { "epoch": 0.8580896629246288, "grad_norm": 0.4880219232957977, "learning_rate": 3.966477426331441e-05, "loss": 0.5602, "step": 29390 }, { "epoch": 0.8582356461949461, "grad_norm": 0.49297800880425013, "learning_rate": 3.9662070829954045e-05, "loss": 0.5989, "step": 29395 }, { "epoch": 0.8583816294652633, "grad_norm": 0.4954176311768601, "learning_rate": 3.965936739659368e-05, "loss": 0.5825, "step": 29400 }, { "epoch": 0.8585276127355805, "grad_norm": 0.4566516845922177, "learning_rate": 3.9656663963233306e-05, "loss": 0.5828, "step": 29405 }, { "epoch": 0.8586735960058978, "grad_norm": 0.49877251097106146, "learning_rate": 3.965396052987294e-05, "loss": 0.5687, "step": 29410 }, { "epoch": 0.858819579276215, "grad_norm": 0.5235745235935856, "learning_rate": 3.9651257096512574e-05, "loss": 0.5506, "step": 29415 }, { "epoch": 0.8589655625465321, "grad_norm": 0.45575122198661444, "learning_rate": 3.964855366315221e-05, "loss": 0.5559, "step": 29420 }, { "epoch": 0.8591115458168493, "grad_norm": 0.4505389047631396, "learning_rate": 3.9645850229791835e-05, "loss": 0.5608, "step": 29425 }, { "epoch": 0.8592575290871666, "grad_norm": 0.463396884258521, "learning_rate": 3.964314679643147e-05, "loss": 0.5453, "step": 29430 }, { "epoch": 0.8594035123574838, "grad_norm": 0.5028001319705203, "learning_rate": 3.96404433630711e-05, "loss": 0.5554, "step": 29435 }, { "epoch": 0.859549495627801, "grad_norm": 0.4777578119584414, "learning_rate": 3.963773992971073e-05, "loss": 0.5768, "step": 29440 }, { "epoch": 0.8596954788981183, "grad_norm": 0.42841805882666334, "learning_rate": 3.963503649635037e-05, "loss": 0.5309, "step": 29445 }, { "epoch": 0.8598414621684355, "grad_norm": 0.525386849897949, "learning_rate": 3.963233306299e-05, "loss": 0.598, "step": 29450 }, { "epoch": 0.8599874454387527, "grad_norm": 0.47275715217242537, "learning_rate": 3.962962962962963e-05, "loss": 0.6043, "step": 29455 }, { "epoch": 0.86013342870907, "grad_norm": 0.4902169688580318, "learning_rate": 3.9626926196269266e-05, "loss": 0.59, "step": 29460 }, { "epoch": 0.8602794119793872, "grad_norm": 0.5151120896089763, "learning_rate": 3.9624222762908894e-05, "loss": 0.5663, "step": 29465 }, { "epoch": 0.8604253952497044, "grad_norm": 0.5115523439392369, "learning_rate": 3.962151932954853e-05, "loss": 0.5733, "step": 29470 }, { "epoch": 0.8605713785200216, "grad_norm": 0.5340325132502854, "learning_rate": 3.961881589618816e-05, "loss": 0.6223, "step": 29475 }, { "epoch": 0.8607173617903389, "grad_norm": 0.5089281350103794, "learning_rate": 3.9616112462827796e-05, "loss": 0.5765, "step": 29480 }, { "epoch": 0.8608633450606561, "grad_norm": 0.43950835390122067, "learning_rate": 3.961340902946742e-05, "loss": 0.5793, "step": 29485 }, { "epoch": 0.8610093283309733, "grad_norm": 0.49777299601774566, "learning_rate": 3.961070559610706e-05, "loss": 0.5809, "step": 29490 }, { "epoch": 0.8611553116012904, "grad_norm": 0.5290581562946255, "learning_rate": 3.960800216274669e-05, "loss": 0.6154, "step": 29495 }, { "epoch": 0.8613012948716077, "grad_norm": 0.4886418010079302, "learning_rate": 3.960529872938632e-05, "loss": 0.5616, "step": 29500 }, { "epoch": 0.8614472781419249, "grad_norm": 0.4770354541457698, "learning_rate": 3.960259529602596e-05, "loss": 0.5967, "step": 29505 }, { "epoch": 0.8615932614122421, "grad_norm": 0.4995952605652485, "learning_rate": 3.9599891862665586e-05, "loss": 0.5792, "step": 29510 }, { "epoch": 0.8617392446825594, "grad_norm": 0.4769625719719412, "learning_rate": 3.959718842930522e-05, "loss": 0.6115, "step": 29515 }, { "epoch": 0.8618852279528766, "grad_norm": 0.5540320068618946, "learning_rate": 3.9594484995944854e-05, "loss": 0.6044, "step": 29520 }, { "epoch": 0.8620312112231938, "grad_norm": 0.47246254096974183, "learning_rate": 3.959178156258448e-05, "loss": 0.5696, "step": 29525 }, { "epoch": 0.862177194493511, "grad_norm": 0.43223697430437363, "learning_rate": 3.9589078129224115e-05, "loss": 0.589, "step": 29530 }, { "epoch": 0.8623231777638283, "grad_norm": 0.45089329982290777, "learning_rate": 3.958637469586375e-05, "loss": 0.5748, "step": 29535 }, { "epoch": 0.8624691610341455, "grad_norm": 0.49698732688582525, "learning_rate": 3.958367126250338e-05, "loss": 0.6131, "step": 29540 }, { "epoch": 0.8626151443044627, "grad_norm": 0.4746506658140945, "learning_rate": 3.958096782914301e-05, "loss": 0.5729, "step": 29545 }, { "epoch": 0.86276112757478, "grad_norm": 0.46313647835000055, "learning_rate": 3.9578264395782645e-05, "loss": 0.5488, "step": 29550 }, { "epoch": 0.8629071108450972, "grad_norm": 0.46647992381591247, "learning_rate": 3.957556096242228e-05, "loss": 0.5927, "step": 29555 }, { "epoch": 0.8630530941154144, "grad_norm": 0.5439156869089042, "learning_rate": 3.957285752906191e-05, "loss": 0.5877, "step": 29560 }, { "epoch": 0.8631990773857317, "grad_norm": 0.44826647771189027, "learning_rate": 3.957015409570155e-05, "loss": 0.5408, "step": 29565 }, { "epoch": 0.8633450606560488, "grad_norm": 0.4569664622599756, "learning_rate": 3.9567450662341174e-05, "loss": 0.5333, "step": 29570 }, { "epoch": 0.863491043926366, "grad_norm": 0.5266374164001499, "learning_rate": 3.956474722898081e-05, "loss": 0.5419, "step": 29575 }, { "epoch": 0.8636370271966832, "grad_norm": 0.49233141382007556, "learning_rate": 3.956204379562044e-05, "loss": 0.5696, "step": 29580 }, { "epoch": 0.8637830104670005, "grad_norm": 0.44767947372966005, "learning_rate": 3.955934036226007e-05, "loss": 0.5446, "step": 29585 }, { "epoch": 0.8639289937373177, "grad_norm": 0.49182751428331084, "learning_rate": 3.95566369288997e-05, "loss": 0.5592, "step": 29590 }, { "epoch": 0.8640749770076349, "grad_norm": 0.49442085844578404, "learning_rate": 3.955393349553934e-05, "loss": 0.5595, "step": 29595 }, { "epoch": 0.8642209602779521, "grad_norm": 0.5041585438221503, "learning_rate": 3.9551230062178965e-05, "loss": 0.5764, "step": 29600 }, { "epoch": 0.8643669435482694, "grad_norm": 0.45467820266413206, "learning_rate": 3.95485266288186e-05, "loss": 0.5779, "step": 29605 }, { "epoch": 0.8645129268185866, "grad_norm": 0.47432447848222303, "learning_rate": 3.954582319545823e-05, "loss": 0.5316, "step": 29610 }, { "epoch": 0.8646589100889038, "grad_norm": 0.49763910424421054, "learning_rate": 3.9543119762097867e-05, "loss": 0.5763, "step": 29615 }, { "epoch": 0.8648048933592211, "grad_norm": 0.518190761764184, "learning_rate": 3.95404163287375e-05, "loss": 0.5786, "step": 29620 }, { "epoch": 0.8649508766295383, "grad_norm": 0.46478406765729147, "learning_rate": 3.9537712895377135e-05, "loss": 0.5995, "step": 29625 }, { "epoch": 0.8650968598998555, "grad_norm": 0.49147421146790377, "learning_rate": 3.953500946201676e-05, "loss": 0.5879, "step": 29630 }, { "epoch": 0.8652428431701727, "grad_norm": 0.5082687959987251, "learning_rate": 3.9532306028656396e-05, "loss": 0.5957, "step": 29635 }, { "epoch": 0.8653888264404899, "grad_norm": 0.4578898948536882, "learning_rate": 3.952960259529603e-05, "loss": 0.5794, "step": 29640 }, { "epoch": 0.8655348097108071, "grad_norm": 0.45816526904405674, "learning_rate": 3.952689916193566e-05, "loss": 0.5679, "step": 29645 }, { "epoch": 0.8656807929811243, "grad_norm": 0.4247149837682434, "learning_rate": 3.952419572857529e-05, "loss": 0.5478, "step": 29650 }, { "epoch": 0.8658267762514416, "grad_norm": 0.45158803388559754, "learning_rate": 3.9521492295214925e-05, "loss": 0.5473, "step": 29655 }, { "epoch": 0.8659727595217588, "grad_norm": 0.4671701491838432, "learning_rate": 3.951878886185455e-05, "loss": 0.5277, "step": 29660 }, { "epoch": 0.866118742792076, "grad_norm": 0.4435815089176342, "learning_rate": 3.9516085428494186e-05, "loss": 0.5642, "step": 29665 }, { "epoch": 0.8662647260623932, "grad_norm": 0.45204375361434196, "learning_rate": 3.951338199513382e-05, "loss": 0.5375, "step": 29670 }, { "epoch": 0.8664107093327105, "grad_norm": 0.5099190193207381, "learning_rate": 3.9510678561773454e-05, "loss": 0.5981, "step": 29675 }, { "epoch": 0.8665566926030277, "grad_norm": 0.4883036744682386, "learning_rate": 3.950797512841309e-05, "loss": 0.5817, "step": 29680 }, { "epoch": 0.8667026758733449, "grad_norm": 0.4769614314056276, "learning_rate": 3.950527169505272e-05, "loss": 0.5695, "step": 29685 }, { "epoch": 0.8668486591436622, "grad_norm": 0.4332060045337015, "learning_rate": 3.950256826169235e-05, "loss": 0.5339, "step": 29690 }, { "epoch": 0.8669946424139794, "grad_norm": 0.4769830333403917, "learning_rate": 3.9499864828331984e-05, "loss": 0.5943, "step": 29695 }, { "epoch": 0.8671406256842966, "grad_norm": 0.4680475342313199, "learning_rate": 3.949716139497162e-05, "loss": 0.5745, "step": 29700 }, { "epoch": 0.8672866089546138, "grad_norm": 0.48286784446694186, "learning_rate": 3.9494457961611245e-05, "loss": 0.5435, "step": 29705 }, { "epoch": 0.8674325922249311, "grad_norm": 0.4971673283337019, "learning_rate": 3.949175452825088e-05, "loss": 0.5799, "step": 29710 }, { "epoch": 0.8675785754952482, "grad_norm": 0.47473424911651046, "learning_rate": 3.948905109489051e-05, "loss": 0.5466, "step": 29715 }, { "epoch": 0.8677245587655654, "grad_norm": 0.47248373318551307, "learning_rate": 3.948634766153014e-05, "loss": 0.5431, "step": 29720 }, { "epoch": 0.8678705420358827, "grad_norm": 0.5137213898117505, "learning_rate": 3.9483644228169774e-05, "loss": 0.5812, "step": 29725 }, { "epoch": 0.8680165253061999, "grad_norm": 0.4887479584365219, "learning_rate": 3.9480940794809415e-05, "loss": 0.5613, "step": 29730 }, { "epoch": 0.8681625085765171, "grad_norm": 0.4599095570159044, "learning_rate": 3.947823736144904e-05, "loss": 0.5763, "step": 29735 }, { "epoch": 0.8683084918468343, "grad_norm": 0.4585479615052006, "learning_rate": 3.9475533928088676e-05, "loss": 0.5371, "step": 29740 }, { "epoch": 0.8684544751171516, "grad_norm": 0.43777201448502545, "learning_rate": 3.947283049472831e-05, "loss": 0.5576, "step": 29745 }, { "epoch": 0.8686004583874688, "grad_norm": 0.44810220380316274, "learning_rate": 3.947012706136794e-05, "loss": 0.5349, "step": 29750 }, { "epoch": 0.868746441657786, "grad_norm": 0.5047894758724182, "learning_rate": 3.946742362800757e-05, "loss": 0.5909, "step": 29755 }, { "epoch": 0.8688924249281033, "grad_norm": 0.48159797757530654, "learning_rate": 3.9464720194647205e-05, "loss": 0.5865, "step": 29760 }, { "epoch": 0.8690384081984205, "grad_norm": 0.46458414132396314, "learning_rate": 3.946201676128683e-05, "loss": 0.6238, "step": 29765 }, { "epoch": 0.8691843914687377, "grad_norm": 0.5394435873947705, "learning_rate": 3.945931332792647e-05, "loss": 0.5689, "step": 29770 }, { "epoch": 0.869330374739055, "grad_norm": 0.47943250933284415, "learning_rate": 3.94566098945661e-05, "loss": 0.5826, "step": 29775 }, { "epoch": 0.8694763580093722, "grad_norm": 0.4617584006683329, "learning_rate": 3.945390646120573e-05, "loss": 0.5317, "step": 29780 }, { "epoch": 0.8696223412796893, "grad_norm": 0.4856966361753008, "learning_rate": 3.945120302784537e-05, "loss": 0.5908, "step": 29785 }, { "epoch": 0.8697683245500065, "grad_norm": 0.4996518125780748, "learning_rate": 3.9448499594485e-05, "loss": 0.6189, "step": 29790 }, { "epoch": 0.8699143078203238, "grad_norm": 0.4589696569093018, "learning_rate": 3.944579616112463e-05, "loss": 0.5634, "step": 29795 }, { "epoch": 0.870060291090641, "grad_norm": 0.49098158622605176, "learning_rate": 3.9443092727764264e-05, "loss": 0.5937, "step": 29800 }, { "epoch": 0.8702062743609582, "grad_norm": 0.49699294663074217, "learning_rate": 3.94403892944039e-05, "loss": 0.5842, "step": 29805 }, { "epoch": 0.8703522576312754, "grad_norm": 0.5010726701449361, "learning_rate": 3.9437685861043525e-05, "loss": 0.5937, "step": 29810 }, { "epoch": 0.8704982409015927, "grad_norm": 0.5085280275574062, "learning_rate": 3.943498242768316e-05, "loss": 0.5637, "step": 29815 }, { "epoch": 0.8706442241719099, "grad_norm": 0.4753779748402008, "learning_rate": 3.943227899432279e-05, "loss": 0.5622, "step": 29820 }, { "epoch": 0.8707902074422271, "grad_norm": 0.4583825124227837, "learning_rate": 3.942957556096242e-05, "loss": 0.5497, "step": 29825 }, { "epoch": 0.8709361907125444, "grad_norm": 0.4990901812469766, "learning_rate": 3.9426872127602055e-05, "loss": 0.5761, "step": 29830 }, { "epoch": 0.8710821739828616, "grad_norm": 0.4720181561436168, "learning_rate": 3.942416869424169e-05, "loss": 0.5688, "step": 29835 }, { "epoch": 0.8712281572531788, "grad_norm": 0.4928225493908683, "learning_rate": 3.9421465260881316e-05, "loss": 0.5589, "step": 29840 }, { "epoch": 0.871374140523496, "grad_norm": 0.4889845622999799, "learning_rate": 3.9418761827520957e-05, "loss": 0.5741, "step": 29845 }, { "epoch": 0.8715201237938133, "grad_norm": 0.4850614494003717, "learning_rate": 3.941605839416059e-05, "loss": 0.54, "step": 29850 }, { "epoch": 0.8716661070641305, "grad_norm": 0.4719080763028767, "learning_rate": 3.941335496080022e-05, "loss": 0.5656, "step": 29855 }, { "epoch": 0.8718120903344476, "grad_norm": 0.4748475624916677, "learning_rate": 3.941065152743985e-05, "loss": 0.5837, "step": 29860 }, { "epoch": 0.8719580736047648, "grad_norm": 0.5149153549244904, "learning_rate": 3.9407948094079486e-05, "loss": 0.5439, "step": 29865 }, { "epoch": 0.8721040568750821, "grad_norm": 0.5198494335634075, "learning_rate": 3.940524466071911e-05, "loss": 0.5781, "step": 29870 }, { "epoch": 0.8722500401453993, "grad_norm": 0.4876101782363857, "learning_rate": 3.940254122735875e-05, "loss": 0.5606, "step": 29875 }, { "epoch": 0.8723960234157165, "grad_norm": 0.5008510011306484, "learning_rate": 3.939983779399838e-05, "loss": 0.5746, "step": 29880 }, { "epoch": 0.8725420066860338, "grad_norm": 0.4553114027584063, "learning_rate": 3.939713436063801e-05, "loss": 0.6153, "step": 29885 }, { "epoch": 0.872687989956351, "grad_norm": 0.5111145148268411, "learning_rate": 3.939443092727764e-05, "loss": 0.5687, "step": 29890 }, { "epoch": 0.8728339732266682, "grad_norm": 0.5120315504784334, "learning_rate": 3.9391727493917276e-05, "loss": 0.6036, "step": 29895 }, { "epoch": 0.8729799564969855, "grad_norm": 0.46127778745079234, "learning_rate": 3.938902406055691e-05, "loss": 0.5531, "step": 29900 }, { "epoch": 0.8731259397673027, "grad_norm": 0.4773446028433243, "learning_rate": 3.9386320627196544e-05, "loss": 0.579, "step": 29905 }, { "epoch": 0.8732719230376199, "grad_norm": 0.4829138784728173, "learning_rate": 3.938361719383618e-05, "loss": 0.6115, "step": 29910 }, { "epoch": 0.8734179063079371, "grad_norm": 0.47819324583198325, "learning_rate": 3.9380913760475806e-05, "loss": 0.6032, "step": 29915 }, { "epoch": 0.8735638895782544, "grad_norm": 0.48333314590160753, "learning_rate": 3.937821032711544e-05, "loss": 0.555, "step": 29920 }, { "epoch": 0.8737098728485716, "grad_norm": 0.47629789412232676, "learning_rate": 3.9375506893755074e-05, "loss": 0.5599, "step": 29925 }, { "epoch": 0.8738558561188888, "grad_norm": 0.4472839056284742, "learning_rate": 3.93728034603947e-05, "loss": 0.5139, "step": 29930 }, { "epoch": 0.874001839389206, "grad_norm": 0.4552834555235537, "learning_rate": 3.9370100027034335e-05, "loss": 0.5653, "step": 29935 }, { "epoch": 0.8741478226595232, "grad_norm": 0.48568429751741277, "learning_rate": 3.936739659367397e-05, "loss": 0.5923, "step": 29940 }, { "epoch": 0.8742938059298404, "grad_norm": 0.4863397493401043, "learning_rate": 3.9364693160313596e-05, "loss": 0.513, "step": 29945 }, { "epoch": 0.8744397892001576, "grad_norm": 0.4439598843762937, "learning_rate": 3.936198972695323e-05, "loss": 0.5406, "step": 29950 }, { "epoch": 0.8745857724704749, "grad_norm": 0.4965774311961464, "learning_rate": 3.9359286293592864e-05, "loss": 0.5747, "step": 29955 }, { "epoch": 0.8747317557407921, "grad_norm": 0.49302915355683996, "learning_rate": 3.93565828602325e-05, "loss": 0.5748, "step": 29960 }, { "epoch": 0.8748777390111093, "grad_norm": 0.496255404184037, "learning_rate": 3.935387942687213e-05, "loss": 0.538, "step": 29965 }, { "epoch": 0.8750237222814266, "grad_norm": 0.4752975883693477, "learning_rate": 3.9351175993511766e-05, "loss": 0.5673, "step": 29970 }, { "epoch": 0.8751697055517438, "grad_norm": 0.48229436331788145, "learning_rate": 3.9348472560151393e-05, "loss": 0.5917, "step": 29975 }, { "epoch": 0.875315688822061, "grad_norm": 0.44069091746246575, "learning_rate": 3.934576912679103e-05, "loss": 0.5615, "step": 29980 }, { "epoch": 0.8754616720923782, "grad_norm": 0.5086031302709648, "learning_rate": 3.934306569343066e-05, "loss": 0.5927, "step": 29985 }, { "epoch": 0.8756076553626955, "grad_norm": 0.49357011864474903, "learning_rate": 3.934036226007029e-05, "loss": 0.5533, "step": 29990 }, { "epoch": 0.8757536386330127, "grad_norm": 0.4896150297866562, "learning_rate": 3.933765882670992e-05, "loss": 0.5628, "step": 29995 }, { "epoch": 0.8758996219033299, "grad_norm": 0.7189027706504203, "learning_rate": 3.933495539334956e-05, "loss": 0.5539, "step": 30000 }, { "epoch": 0.876045605173647, "grad_norm": 0.4873386989925254, "learning_rate": 3.9332251959989184e-05, "loss": 0.5819, "step": 30005 }, { "epoch": 0.8761915884439643, "grad_norm": 0.4475029177544232, "learning_rate": 3.932954852662882e-05, "loss": 0.6076, "step": 30010 }, { "epoch": 0.8763375717142815, "grad_norm": 0.42963901083743855, "learning_rate": 3.932684509326845e-05, "loss": 0.5953, "step": 30015 }, { "epoch": 0.8764835549845987, "grad_norm": 0.5364117516949453, "learning_rate": 3.9324141659908086e-05, "loss": 0.5824, "step": 30020 }, { "epoch": 0.876629538254916, "grad_norm": 0.47885352347631116, "learning_rate": 3.932143822654772e-05, "loss": 0.5624, "step": 30025 }, { "epoch": 0.8767755215252332, "grad_norm": 0.4769447467850553, "learning_rate": 3.9318734793187354e-05, "loss": 0.5529, "step": 30030 }, { "epoch": 0.8769215047955504, "grad_norm": 0.44084076441044523, "learning_rate": 3.931603135982698e-05, "loss": 0.6074, "step": 30035 }, { "epoch": 0.8770674880658677, "grad_norm": 0.4309703818110764, "learning_rate": 3.9313327926466615e-05, "loss": 0.55, "step": 30040 }, { "epoch": 0.8772134713361849, "grad_norm": 0.514547098186897, "learning_rate": 3.931062449310625e-05, "loss": 0.5725, "step": 30045 }, { "epoch": 0.8773594546065021, "grad_norm": 0.4800443376873459, "learning_rate": 3.9307921059745876e-05, "loss": 0.5708, "step": 30050 }, { "epoch": 0.8775054378768193, "grad_norm": 0.4436419443452591, "learning_rate": 3.930521762638551e-05, "loss": 0.5694, "step": 30055 }, { "epoch": 0.8776514211471366, "grad_norm": 0.4675615049670133, "learning_rate": 3.9302514193025145e-05, "loss": 0.5535, "step": 30060 }, { "epoch": 0.8777974044174538, "grad_norm": 0.45155020582905026, "learning_rate": 3.929981075966477e-05, "loss": 0.5754, "step": 30065 }, { "epoch": 0.877943387687771, "grad_norm": 0.4680160438620846, "learning_rate": 3.929710732630441e-05, "loss": 0.5664, "step": 30070 }, { "epoch": 0.8780893709580883, "grad_norm": 0.48372373472648306, "learning_rate": 3.929440389294404e-05, "loss": 0.5871, "step": 30075 }, { "epoch": 0.8782353542284054, "grad_norm": 0.474749759515907, "learning_rate": 3.9291700459583674e-05, "loss": 0.5754, "step": 30080 }, { "epoch": 0.8783813374987226, "grad_norm": 0.49998541263835555, "learning_rate": 3.928899702622331e-05, "loss": 0.5429, "step": 30085 }, { "epoch": 0.8785273207690398, "grad_norm": 0.44375803705619193, "learning_rate": 3.9286293592862935e-05, "loss": 0.5568, "step": 30090 }, { "epoch": 0.8786733040393571, "grad_norm": 0.4944363339828379, "learning_rate": 3.928359015950257e-05, "loss": 0.5664, "step": 30095 }, { "epoch": 0.8788192873096743, "grad_norm": 0.46712165130817623, "learning_rate": 3.92808867261422e-05, "loss": 0.569, "step": 30100 }, { "epoch": 0.8789652705799915, "grad_norm": 0.47418321665757185, "learning_rate": 3.927818329278184e-05, "loss": 0.572, "step": 30105 }, { "epoch": 0.8791112538503087, "grad_norm": 0.4795471442181984, "learning_rate": 3.9275479859421464e-05, "loss": 0.5735, "step": 30110 }, { "epoch": 0.879257237120626, "grad_norm": 0.47743826150687296, "learning_rate": 3.92727764260611e-05, "loss": 0.543, "step": 30115 }, { "epoch": 0.8794032203909432, "grad_norm": 0.4412462745073568, "learning_rate": 3.927007299270073e-05, "loss": 0.6035, "step": 30120 }, { "epoch": 0.8795492036612604, "grad_norm": 0.4652815489282728, "learning_rate": 3.9267369559340366e-05, "loss": 0.5594, "step": 30125 }, { "epoch": 0.8796951869315777, "grad_norm": 0.45760253302649306, "learning_rate": 3.926466612598e-05, "loss": 0.59, "step": 30130 }, { "epoch": 0.8798411702018949, "grad_norm": 0.4704927055892414, "learning_rate": 3.926196269261963e-05, "loss": 0.5723, "step": 30135 }, { "epoch": 0.8799871534722121, "grad_norm": 0.47423087542201675, "learning_rate": 3.925925925925926e-05, "loss": 0.5635, "step": 30140 }, { "epoch": 0.8801331367425294, "grad_norm": 0.48392622659436907, "learning_rate": 3.9256555825898896e-05, "loss": 0.5733, "step": 30145 }, { "epoch": 0.8802791200128466, "grad_norm": 0.5301008153422581, "learning_rate": 3.925385239253852e-05, "loss": 0.5771, "step": 30150 }, { "epoch": 0.8804251032831637, "grad_norm": 0.40391557092642344, "learning_rate": 3.925114895917816e-05, "loss": 0.5316, "step": 30155 }, { "epoch": 0.8805710865534809, "grad_norm": 0.4666964286512551, "learning_rate": 3.924844552581779e-05, "loss": 0.5431, "step": 30160 }, { "epoch": 0.8807170698237982, "grad_norm": 0.41298086724902666, "learning_rate": 3.9245742092457425e-05, "loss": 0.5862, "step": 30165 }, { "epoch": 0.8808630530941154, "grad_norm": 0.478923954167632, "learning_rate": 3.924303865909705e-05, "loss": 0.5593, "step": 30170 }, { "epoch": 0.8810090363644326, "grad_norm": 0.45166323864923846, "learning_rate": 3.9240335225736686e-05, "loss": 0.5767, "step": 30175 }, { "epoch": 0.8811550196347498, "grad_norm": 0.4584281707677801, "learning_rate": 3.923763179237632e-05, "loss": 0.5703, "step": 30180 }, { "epoch": 0.8813010029050671, "grad_norm": 0.4756838524275658, "learning_rate": 3.9234928359015954e-05, "loss": 0.5889, "step": 30185 }, { "epoch": 0.8814469861753843, "grad_norm": 0.4796231037967516, "learning_rate": 3.923222492565559e-05, "loss": 0.6036, "step": 30190 }, { "epoch": 0.8815929694457015, "grad_norm": 0.4836751629695918, "learning_rate": 3.9229521492295215e-05, "loss": 0.5991, "step": 30195 }, { "epoch": 0.8817389527160188, "grad_norm": 0.4970675854832711, "learning_rate": 3.922681805893485e-05, "loss": 0.5665, "step": 30200 }, { "epoch": 0.881884935986336, "grad_norm": 0.4816079626515954, "learning_rate": 3.9224114625574483e-05, "loss": 0.5908, "step": 30205 }, { "epoch": 0.8820309192566532, "grad_norm": 0.5055857772307443, "learning_rate": 3.922141119221411e-05, "loss": 0.5784, "step": 30210 }, { "epoch": 0.8821769025269705, "grad_norm": 0.5304989835614209, "learning_rate": 3.9218707758853745e-05, "loss": 0.5955, "step": 30215 }, { "epoch": 0.8823228857972877, "grad_norm": 0.4893636002640536, "learning_rate": 3.921600432549338e-05, "loss": 0.5885, "step": 30220 }, { "epoch": 0.8824688690676048, "grad_norm": 0.47474284510253967, "learning_rate": 3.9213300892133006e-05, "loss": 0.5756, "step": 30225 }, { "epoch": 0.882614852337922, "grad_norm": 0.43741938597375574, "learning_rate": 3.921059745877264e-05, "loss": 0.5452, "step": 30230 }, { "epoch": 0.8827608356082393, "grad_norm": 0.4845211115742234, "learning_rate": 3.9207894025412274e-05, "loss": 0.6101, "step": 30235 }, { "epoch": 0.8829068188785565, "grad_norm": 0.45591537275837585, "learning_rate": 3.920519059205191e-05, "loss": 0.5669, "step": 30240 }, { "epoch": 0.8830528021488737, "grad_norm": 0.4736775656225697, "learning_rate": 3.920248715869154e-05, "loss": 0.5743, "step": 30245 }, { "epoch": 0.883198785419191, "grad_norm": 0.4688879234252337, "learning_rate": 3.9199783725331176e-05, "loss": 0.599, "step": 30250 }, { "epoch": 0.8833447686895082, "grad_norm": 0.5245030144822924, "learning_rate": 3.91970802919708e-05, "loss": 0.584, "step": 30255 }, { "epoch": 0.8834907519598254, "grad_norm": 0.47242258887262606, "learning_rate": 3.919437685861044e-05, "loss": 0.5618, "step": 30260 }, { "epoch": 0.8836367352301426, "grad_norm": 0.5054882039049581, "learning_rate": 3.919167342525007e-05, "loss": 0.6034, "step": 30265 }, { "epoch": 0.8837827185004599, "grad_norm": 0.49080037244015645, "learning_rate": 3.91889699918897e-05, "loss": 0.6012, "step": 30270 }, { "epoch": 0.8839287017707771, "grad_norm": 0.4939380483747007, "learning_rate": 3.918626655852933e-05, "loss": 0.5739, "step": 30275 }, { "epoch": 0.8840746850410943, "grad_norm": 0.44868436094675146, "learning_rate": 3.9183563125168966e-05, "loss": 0.5724, "step": 30280 }, { "epoch": 0.8842206683114116, "grad_norm": 0.4639035384251814, "learning_rate": 3.9180859691808594e-05, "loss": 0.5541, "step": 30285 }, { "epoch": 0.8843666515817288, "grad_norm": 0.4771363938350773, "learning_rate": 3.917815625844823e-05, "loss": 0.5578, "step": 30290 }, { "epoch": 0.884512634852046, "grad_norm": 0.47218467511397993, "learning_rate": 3.917545282508787e-05, "loss": 0.5784, "step": 30295 }, { "epoch": 0.8846586181223631, "grad_norm": 0.4590301464029332, "learning_rate": 3.9172749391727496e-05, "loss": 0.5586, "step": 30300 }, { "epoch": 0.8848046013926804, "grad_norm": 0.4558791052393592, "learning_rate": 3.917004595836713e-05, "loss": 0.589, "step": 30305 }, { "epoch": 0.8849505846629976, "grad_norm": 0.4998357094926016, "learning_rate": 3.9167342525006764e-05, "loss": 0.5877, "step": 30310 }, { "epoch": 0.8850965679333148, "grad_norm": 0.49557543446822383, "learning_rate": 3.916463909164639e-05, "loss": 0.5725, "step": 30315 }, { "epoch": 0.885242551203632, "grad_norm": 0.5054046185907176, "learning_rate": 3.9161935658286025e-05, "loss": 0.5775, "step": 30320 }, { "epoch": 0.8853885344739493, "grad_norm": 0.4595965326384013, "learning_rate": 3.915923222492566e-05, "loss": 0.5694, "step": 30325 }, { "epoch": 0.8855345177442665, "grad_norm": 0.424713969516107, "learning_rate": 3.9156528791565286e-05, "loss": 0.5325, "step": 30330 }, { "epoch": 0.8856805010145837, "grad_norm": 0.5013953783442424, "learning_rate": 3.915382535820492e-05, "loss": 0.5954, "step": 30335 }, { "epoch": 0.885826484284901, "grad_norm": 0.48716575931736483, "learning_rate": 3.9151121924844554e-05, "loss": 0.5761, "step": 30340 }, { "epoch": 0.8859724675552182, "grad_norm": 0.4972354292153659, "learning_rate": 3.914841849148418e-05, "loss": 0.5767, "step": 30345 }, { "epoch": 0.8861184508255354, "grad_norm": 0.5450753301876761, "learning_rate": 3.914571505812382e-05, "loss": 0.5989, "step": 30350 }, { "epoch": 0.8862644340958526, "grad_norm": 0.46979539384196234, "learning_rate": 3.9143011624763456e-05, "loss": 0.5827, "step": 30355 }, { "epoch": 0.8864104173661699, "grad_norm": 0.48219778654152834, "learning_rate": 3.9140308191403084e-05, "loss": 0.5435, "step": 30360 }, { "epoch": 0.8865564006364871, "grad_norm": 0.45577426048195085, "learning_rate": 3.913760475804272e-05, "loss": 0.5572, "step": 30365 }, { "epoch": 0.8867023839068042, "grad_norm": 0.49698149455486546, "learning_rate": 3.913490132468235e-05, "loss": 0.6284, "step": 30370 }, { "epoch": 0.8868483671771215, "grad_norm": 0.5064185066680967, "learning_rate": 3.913219789132198e-05, "loss": 0.5917, "step": 30375 }, { "epoch": 0.8869943504474387, "grad_norm": 0.47814364908953505, "learning_rate": 3.912949445796161e-05, "loss": 0.5745, "step": 30380 }, { "epoch": 0.8871403337177559, "grad_norm": 0.4845716160928787, "learning_rate": 3.912679102460125e-05, "loss": 0.5683, "step": 30385 }, { "epoch": 0.8872863169880731, "grad_norm": 0.5237231069700994, "learning_rate": 3.9124087591240874e-05, "loss": 0.579, "step": 30390 }, { "epoch": 0.8874323002583904, "grad_norm": 0.45526650443535394, "learning_rate": 3.912138415788051e-05, "loss": 0.5843, "step": 30395 }, { "epoch": 0.8875782835287076, "grad_norm": 0.4695821933395041, "learning_rate": 3.911868072452014e-05, "loss": 0.5709, "step": 30400 }, { "epoch": 0.8877242667990248, "grad_norm": 0.46069081983591365, "learning_rate": 3.911597729115977e-05, "loss": 0.5433, "step": 30405 }, { "epoch": 0.8878702500693421, "grad_norm": 0.48409931887461094, "learning_rate": 3.911327385779941e-05, "loss": 0.5726, "step": 30410 }, { "epoch": 0.8880162333396593, "grad_norm": 0.5055551485489163, "learning_rate": 3.9110570424439044e-05, "loss": 0.5392, "step": 30415 }, { "epoch": 0.8881622166099765, "grad_norm": 0.4469576236092897, "learning_rate": 3.910786699107867e-05, "loss": 0.5773, "step": 30420 }, { "epoch": 0.8883081998802937, "grad_norm": 0.47641864403581874, "learning_rate": 3.9105163557718305e-05, "loss": 0.5539, "step": 30425 }, { "epoch": 0.888454183150611, "grad_norm": 0.4762324462466633, "learning_rate": 3.910246012435794e-05, "loss": 0.5721, "step": 30430 }, { "epoch": 0.8886001664209282, "grad_norm": 0.4511830792199617, "learning_rate": 3.909975669099757e-05, "loss": 0.5625, "step": 30435 }, { "epoch": 0.8887461496912454, "grad_norm": 0.5882726479720399, "learning_rate": 3.90970532576372e-05, "loss": 0.6265, "step": 30440 }, { "epoch": 0.8888921329615626, "grad_norm": 0.4658220944696342, "learning_rate": 3.9094349824276835e-05, "loss": 0.5664, "step": 30445 }, { "epoch": 0.8890381162318798, "grad_norm": 0.41958128539288697, "learning_rate": 3.909164639091646e-05, "loss": 0.5493, "step": 30450 }, { "epoch": 0.889184099502197, "grad_norm": 0.4619066007642949, "learning_rate": 3.9088942957556096e-05, "loss": 0.5854, "step": 30455 }, { "epoch": 0.8893300827725142, "grad_norm": 0.4870923275112942, "learning_rate": 3.908623952419573e-05, "loss": 0.62, "step": 30460 }, { "epoch": 0.8894760660428315, "grad_norm": 0.4871235940144329, "learning_rate": 3.9083536090835364e-05, "loss": 0.5896, "step": 30465 }, { "epoch": 0.8896220493131487, "grad_norm": 0.43827214199368975, "learning_rate": 3.9080832657475e-05, "loss": 0.5815, "step": 30470 }, { "epoch": 0.8897680325834659, "grad_norm": 0.4517726065067876, "learning_rate": 3.907812922411463e-05, "loss": 0.5623, "step": 30475 }, { "epoch": 0.8899140158537832, "grad_norm": 0.4567788132255237, "learning_rate": 3.907542579075426e-05, "loss": 0.5599, "step": 30480 }, { "epoch": 0.8900599991241004, "grad_norm": 0.4672848322617956, "learning_rate": 3.907272235739389e-05, "loss": 0.5794, "step": 30485 }, { "epoch": 0.8902059823944176, "grad_norm": 0.46660655571959503, "learning_rate": 3.907001892403353e-05, "loss": 0.5616, "step": 30490 }, { "epoch": 0.8903519656647348, "grad_norm": 0.44611666293985486, "learning_rate": 3.9067315490673154e-05, "loss": 0.5665, "step": 30495 }, { "epoch": 0.8904979489350521, "grad_norm": 0.4986241927957542, "learning_rate": 3.906461205731279e-05, "loss": 0.6084, "step": 30500 }, { "epoch": 0.8906439322053693, "grad_norm": 0.4741928825028336, "learning_rate": 3.906190862395242e-05, "loss": 0.5471, "step": 30505 }, { "epoch": 0.8907899154756865, "grad_norm": 0.46496860359108977, "learning_rate": 3.905920519059205e-05, "loss": 0.5487, "step": 30510 }, { "epoch": 0.8909358987460038, "grad_norm": 0.4807214595747047, "learning_rate": 3.9056501757231684e-05, "loss": 0.5597, "step": 30515 }, { "epoch": 0.8910818820163209, "grad_norm": 0.46308375797784923, "learning_rate": 3.9053798323871324e-05, "loss": 0.5606, "step": 30520 }, { "epoch": 0.8912278652866381, "grad_norm": 0.5194346809309658, "learning_rate": 3.905109489051095e-05, "loss": 0.6152, "step": 30525 }, { "epoch": 0.8913738485569553, "grad_norm": 0.643738215073285, "learning_rate": 3.9048391457150586e-05, "loss": 0.5848, "step": 30530 }, { "epoch": 0.8915198318272726, "grad_norm": 0.46317602952002607, "learning_rate": 3.904568802379022e-05, "loss": 0.5578, "step": 30535 }, { "epoch": 0.8916658150975898, "grad_norm": 0.46318267534808377, "learning_rate": 3.904298459042985e-05, "loss": 0.5856, "step": 30540 }, { "epoch": 0.891811798367907, "grad_norm": 0.4506416319309265, "learning_rate": 3.904028115706948e-05, "loss": 0.5621, "step": 30545 }, { "epoch": 0.8919577816382243, "grad_norm": 0.45763007552521123, "learning_rate": 3.9037577723709115e-05, "loss": 0.5724, "step": 30550 }, { "epoch": 0.8921037649085415, "grad_norm": 0.4919785697753527, "learning_rate": 3.903487429034874e-05, "loss": 0.5678, "step": 30555 }, { "epoch": 0.8922497481788587, "grad_norm": 0.49401932185320685, "learning_rate": 3.9032170856988376e-05, "loss": 0.5658, "step": 30560 }, { "epoch": 0.8923957314491759, "grad_norm": 0.48839871632241155, "learning_rate": 3.902946742362801e-05, "loss": 0.5654, "step": 30565 }, { "epoch": 0.8925417147194932, "grad_norm": 0.47550838023598674, "learning_rate": 3.902676399026764e-05, "loss": 0.5871, "step": 30570 }, { "epoch": 0.8926876979898104, "grad_norm": 0.46563755208393853, "learning_rate": 3.902406055690727e-05, "loss": 0.583, "step": 30575 }, { "epoch": 0.8928336812601276, "grad_norm": 0.4430557969428061, "learning_rate": 3.9021357123546906e-05, "loss": 0.567, "step": 30580 }, { "epoch": 0.8929796645304449, "grad_norm": 0.44282783757590044, "learning_rate": 3.901865369018654e-05, "loss": 0.5299, "step": 30585 }, { "epoch": 0.893125647800762, "grad_norm": 0.5046382572788634, "learning_rate": 3.9015950256826174e-05, "loss": 0.6029, "step": 30590 }, { "epoch": 0.8932716310710792, "grad_norm": 0.4414425439467984, "learning_rate": 3.901324682346581e-05, "loss": 0.5694, "step": 30595 }, { "epoch": 0.8934176143413964, "grad_norm": 0.42113220668119855, "learning_rate": 3.9010543390105435e-05, "loss": 0.5694, "step": 30600 }, { "epoch": 0.8935635976117137, "grad_norm": 0.5283138551333877, "learning_rate": 3.900783995674507e-05, "loss": 0.6173, "step": 30605 }, { "epoch": 0.8937095808820309, "grad_norm": 0.5446959400000998, "learning_rate": 3.90051365233847e-05, "loss": 0.6101, "step": 30610 }, { "epoch": 0.8938555641523481, "grad_norm": 0.43500506700318586, "learning_rate": 3.900243309002433e-05, "loss": 0.5426, "step": 30615 }, { "epoch": 0.8940015474226654, "grad_norm": 0.44608739345875864, "learning_rate": 3.8999729656663964e-05, "loss": 0.5734, "step": 30620 }, { "epoch": 0.8941475306929826, "grad_norm": 0.4467002826045946, "learning_rate": 3.89970262233036e-05, "loss": 0.5573, "step": 30625 }, { "epoch": 0.8942935139632998, "grad_norm": 0.47214574071146326, "learning_rate": 3.8994322789943225e-05, "loss": 0.5681, "step": 30630 }, { "epoch": 0.894439497233617, "grad_norm": 0.44790243440384325, "learning_rate": 3.8991619356582866e-05, "loss": 0.5595, "step": 30635 }, { "epoch": 0.8945854805039343, "grad_norm": 0.45809970198545624, "learning_rate": 3.898891592322249e-05, "loss": 0.5789, "step": 30640 }, { "epoch": 0.8947314637742515, "grad_norm": 0.4584432555040814, "learning_rate": 3.898621248986213e-05, "loss": 0.5853, "step": 30645 }, { "epoch": 0.8948774470445687, "grad_norm": 0.4920084219907532, "learning_rate": 3.898350905650176e-05, "loss": 0.5707, "step": 30650 }, { "epoch": 0.895023430314886, "grad_norm": 0.4836553607407794, "learning_rate": 3.8980805623141395e-05, "loss": 0.5915, "step": 30655 }, { "epoch": 0.8951694135852032, "grad_norm": 0.45979187644828645, "learning_rate": 3.897810218978102e-05, "loss": 0.5965, "step": 30660 }, { "epoch": 0.8953153968555203, "grad_norm": 0.4876888789702917, "learning_rate": 3.8975398756420657e-05, "loss": 0.5709, "step": 30665 }, { "epoch": 0.8954613801258375, "grad_norm": 0.48196365914914857, "learning_rate": 3.897269532306029e-05, "loss": 0.5583, "step": 30670 }, { "epoch": 0.8956073633961548, "grad_norm": 0.48041401562298636, "learning_rate": 3.896999188969992e-05, "loss": 0.58, "step": 30675 }, { "epoch": 0.895753346666472, "grad_norm": 0.42102873175390304, "learning_rate": 3.896728845633955e-05, "loss": 0.5138, "step": 30680 }, { "epoch": 0.8958993299367892, "grad_norm": 0.4420656907910973, "learning_rate": 3.8964585022979186e-05, "loss": 0.5541, "step": 30685 }, { "epoch": 0.8960453132071065, "grad_norm": 0.5245128712125899, "learning_rate": 3.896188158961882e-05, "loss": 0.5854, "step": 30690 }, { "epoch": 0.8961912964774237, "grad_norm": 0.4718175128904984, "learning_rate": 3.8959178156258454e-05, "loss": 0.6229, "step": 30695 }, { "epoch": 0.8963372797477409, "grad_norm": 0.4665339984552741, "learning_rate": 3.895647472289808e-05, "loss": 0.5696, "step": 30700 }, { "epoch": 0.8964832630180581, "grad_norm": 0.4308458255619208, "learning_rate": 3.8953771289537715e-05, "loss": 0.565, "step": 30705 }, { "epoch": 0.8966292462883754, "grad_norm": 0.5059905823836095, "learning_rate": 3.895106785617735e-05, "loss": 0.5838, "step": 30710 }, { "epoch": 0.8967752295586926, "grad_norm": 0.49410865870828297, "learning_rate": 3.8948364422816976e-05, "loss": 0.5393, "step": 30715 }, { "epoch": 0.8969212128290098, "grad_norm": 0.4907313379699967, "learning_rate": 3.894566098945661e-05, "loss": 0.6196, "step": 30720 }, { "epoch": 0.8970671960993271, "grad_norm": 0.4806600551836275, "learning_rate": 3.8942957556096244e-05, "loss": 0.5803, "step": 30725 }, { "epoch": 0.8972131793696443, "grad_norm": 0.45845469618481344, "learning_rate": 3.894025412273588e-05, "loss": 0.5579, "step": 30730 }, { "epoch": 0.8973591626399614, "grad_norm": 0.4883288984748315, "learning_rate": 3.8937550689375506e-05, "loss": 0.5677, "step": 30735 }, { "epoch": 0.8975051459102786, "grad_norm": 0.4497016503477842, "learning_rate": 3.893484725601514e-05, "loss": 0.5748, "step": 30740 }, { "epoch": 0.8976511291805959, "grad_norm": 0.47375275302589975, "learning_rate": 3.8932143822654774e-05, "loss": 0.561, "step": 30745 }, { "epoch": 0.8977971124509131, "grad_norm": 0.4397015494815666, "learning_rate": 3.892944038929441e-05, "loss": 0.5563, "step": 30750 }, { "epoch": 0.8979430957212303, "grad_norm": 0.4785110377009117, "learning_rate": 3.892673695593404e-05, "loss": 0.6107, "step": 30755 }, { "epoch": 0.8980890789915476, "grad_norm": 0.4996120439492506, "learning_rate": 3.892403352257367e-05, "loss": 0.6171, "step": 30760 }, { "epoch": 0.8982350622618648, "grad_norm": 0.4558001137973362, "learning_rate": 3.89213300892133e-05, "loss": 0.5328, "step": 30765 }, { "epoch": 0.898381045532182, "grad_norm": 0.4545612222546475, "learning_rate": 3.891862665585294e-05, "loss": 0.5955, "step": 30770 }, { "epoch": 0.8985270288024992, "grad_norm": 0.48259308591473343, "learning_rate": 3.8915923222492564e-05, "loss": 0.5904, "step": 30775 }, { "epoch": 0.8986730120728165, "grad_norm": 0.4729581849085214, "learning_rate": 3.89132197891322e-05, "loss": 0.566, "step": 30780 }, { "epoch": 0.8988189953431337, "grad_norm": 0.4895607530506439, "learning_rate": 3.891051635577183e-05, "loss": 0.5555, "step": 30785 }, { "epoch": 0.8989649786134509, "grad_norm": 0.5289075949941602, "learning_rate": 3.8907812922411466e-05, "loss": 0.5504, "step": 30790 }, { "epoch": 0.8991109618837682, "grad_norm": 0.43760589522738097, "learning_rate": 3.8905109489051093e-05, "loss": 0.5782, "step": 30795 }, { "epoch": 0.8992569451540854, "grad_norm": 0.5328029378209453, "learning_rate": 3.890240605569073e-05, "loss": 0.5966, "step": 30800 }, { "epoch": 0.8994029284244026, "grad_norm": 0.5103306201294991, "learning_rate": 3.889970262233036e-05, "loss": 0.6199, "step": 30805 }, { "epoch": 0.8995489116947197, "grad_norm": 0.5078019782926267, "learning_rate": 3.8896999188969995e-05, "loss": 0.5903, "step": 30810 }, { "epoch": 0.899694894965037, "grad_norm": 0.5256253639867279, "learning_rate": 3.889429575560963e-05, "loss": 0.5826, "step": 30815 }, { "epoch": 0.8998408782353542, "grad_norm": 0.4319832274565892, "learning_rate": 3.889159232224926e-05, "loss": 0.5487, "step": 30820 }, { "epoch": 0.8999868615056714, "grad_norm": 0.48324639807461006, "learning_rate": 3.888888888888889e-05, "loss": 0.5712, "step": 30825 }, { "epoch": 0.9001328447759886, "grad_norm": 0.4820267940253546, "learning_rate": 3.8886185455528525e-05, "loss": 0.5767, "step": 30830 }, { "epoch": 0.9002788280463059, "grad_norm": 0.4665938077384041, "learning_rate": 3.888348202216815e-05, "loss": 0.5947, "step": 30835 }, { "epoch": 0.9004248113166231, "grad_norm": 0.4673170743557373, "learning_rate": 3.8880778588807786e-05, "loss": 0.5758, "step": 30840 }, { "epoch": 0.9005707945869403, "grad_norm": 0.44653848596341517, "learning_rate": 3.887807515544742e-05, "loss": 0.5742, "step": 30845 }, { "epoch": 0.9007167778572576, "grad_norm": 0.46495855645765005, "learning_rate": 3.887537172208705e-05, "loss": 0.5593, "step": 30850 }, { "epoch": 0.9008627611275748, "grad_norm": 0.449443970511038, "learning_rate": 3.887266828872668e-05, "loss": 0.5877, "step": 30855 }, { "epoch": 0.901008744397892, "grad_norm": 0.46526016640412277, "learning_rate": 3.886996485536632e-05, "loss": 0.5823, "step": 30860 }, { "epoch": 0.9011547276682093, "grad_norm": 0.46486887343297245, "learning_rate": 3.886726142200595e-05, "loss": 0.573, "step": 30865 }, { "epoch": 0.9013007109385265, "grad_norm": 0.45042146309714987, "learning_rate": 3.886455798864558e-05, "loss": 0.5917, "step": 30870 }, { "epoch": 0.9014466942088437, "grad_norm": 0.5031564342276683, "learning_rate": 3.886185455528522e-05, "loss": 0.58, "step": 30875 }, { "epoch": 0.9015926774791609, "grad_norm": 0.47059800611361624, "learning_rate": 3.8859151121924845e-05, "loss": 0.5279, "step": 30880 }, { "epoch": 0.9017386607494781, "grad_norm": 0.503062933174236, "learning_rate": 3.885644768856448e-05, "loss": 0.5741, "step": 30885 }, { "epoch": 0.9018846440197953, "grad_norm": 0.49372205019871784, "learning_rate": 3.885374425520411e-05, "loss": 0.5823, "step": 30890 }, { "epoch": 0.9020306272901125, "grad_norm": 0.47501622775752705, "learning_rate": 3.885104082184374e-05, "loss": 0.5813, "step": 30895 }, { "epoch": 0.9021766105604297, "grad_norm": 0.4959902068296029, "learning_rate": 3.8848337388483374e-05, "loss": 0.5553, "step": 30900 }, { "epoch": 0.902322593830747, "grad_norm": 0.446995609285626, "learning_rate": 3.884563395512301e-05, "loss": 0.578, "step": 30905 }, { "epoch": 0.9024685771010642, "grad_norm": 0.45709640109313926, "learning_rate": 3.8842930521762635e-05, "loss": 0.5754, "step": 30910 }, { "epoch": 0.9026145603713814, "grad_norm": 0.45241599689287293, "learning_rate": 3.884022708840227e-05, "loss": 0.5607, "step": 30915 }, { "epoch": 0.9027605436416987, "grad_norm": 0.44495629393116254, "learning_rate": 3.883752365504191e-05, "loss": 0.5425, "step": 30920 }, { "epoch": 0.9029065269120159, "grad_norm": 0.5201785935204445, "learning_rate": 3.883482022168154e-05, "loss": 0.5907, "step": 30925 }, { "epoch": 0.9030525101823331, "grad_norm": 0.4350925481941831, "learning_rate": 3.883211678832117e-05, "loss": 0.5283, "step": 30930 }, { "epoch": 0.9031984934526504, "grad_norm": 0.47719302498887634, "learning_rate": 3.8829413354960805e-05, "loss": 0.5756, "step": 30935 }, { "epoch": 0.9033444767229676, "grad_norm": 0.46365356948535774, "learning_rate": 3.882670992160043e-05, "loss": 0.5402, "step": 30940 }, { "epoch": 0.9034904599932848, "grad_norm": 0.4889280697505459, "learning_rate": 3.8824006488240066e-05, "loss": 0.5751, "step": 30945 }, { "epoch": 0.903636443263602, "grad_norm": 0.49914672238130514, "learning_rate": 3.88213030548797e-05, "loss": 0.5653, "step": 30950 }, { "epoch": 0.9037824265339192, "grad_norm": 0.47264883529958196, "learning_rate": 3.881859962151933e-05, "loss": 0.5724, "step": 30955 }, { "epoch": 0.9039284098042364, "grad_norm": 0.5127390971011734, "learning_rate": 3.881589618815896e-05, "loss": 0.5763, "step": 30960 }, { "epoch": 0.9040743930745536, "grad_norm": 0.4667226787941501, "learning_rate": 3.8813192754798596e-05, "loss": 0.5601, "step": 30965 }, { "epoch": 0.9042203763448708, "grad_norm": 0.4640998452289242, "learning_rate": 3.881048932143822e-05, "loss": 0.5599, "step": 30970 }, { "epoch": 0.9043663596151881, "grad_norm": 0.5087803654256606, "learning_rate": 3.8807785888077864e-05, "loss": 0.597, "step": 30975 }, { "epoch": 0.9045123428855053, "grad_norm": 0.4993089126494738, "learning_rate": 3.88050824547175e-05, "loss": 0.5953, "step": 30980 }, { "epoch": 0.9046583261558225, "grad_norm": 0.4897616164257748, "learning_rate": 3.8802379021357125e-05, "loss": 0.5734, "step": 30985 }, { "epoch": 0.9048043094261398, "grad_norm": 0.502939651959479, "learning_rate": 3.879967558799676e-05, "loss": 0.5687, "step": 30990 }, { "epoch": 0.904950292696457, "grad_norm": 0.4732697450571393, "learning_rate": 3.879697215463639e-05, "loss": 0.5598, "step": 30995 }, { "epoch": 0.9050962759667742, "grad_norm": 0.49902952214643703, "learning_rate": 3.879426872127602e-05, "loss": 0.5727, "step": 31000 }, { "epoch": 0.9052422592370915, "grad_norm": 0.4795161165540716, "learning_rate": 3.8791565287915654e-05, "loss": 0.6008, "step": 31005 }, { "epoch": 0.9053882425074087, "grad_norm": 0.5088450691391357, "learning_rate": 3.878886185455529e-05, "loss": 0.5871, "step": 31010 }, { "epoch": 0.9055342257777259, "grad_norm": 0.4993990244172543, "learning_rate": 3.8786158421194915e-05, "loss": 0.6163, "step": 31015 }, { "epoch": 0.9056802090480431, "grad_norm": 0.4266638660815988, "learning_rate": 3.878345498783455e-05, "loss": 0.5525, "step": 31020 }, { "epoch": 0.9058261923183604, "grad_norm": 0.5513442457590388, "learning_rate": 3.8780751554474183e-05, "loss": 0.6205, "step": 31025 }, { "epoch": 0.9059721755886775, "grad_norm": 0.5006159143698173, "learning_rate": 3.877804812111382e-05, "loss": 0.5602, "step": 31030 }, { "epoch": 0.9061181588589947, "grad_norm": 0.5090232545780927, "learning_rate": 3.877534468775345e-05, "loss": 0.6005, "step": 31035 }, { "epoch": 0.906264142129312, "grad_norm": 0.506015360010983, "learning_rate": 3.8772641254393085e-05, "loss": 0.5579, "step": 31040 }, { "epoch": 0.9064101253996292, "grad_norm": 0.4265414538165359, "learning_rate": 3.876993782103271e-05, "loss": 0.5325, "step": 31045 }, { "epoch": 0.9065561086699464, "grad_norm": 0.45502627438061927, "learning_rate": 3.876723438767235e-05, "loss": 0.5588, "step": 31050 }, { "epoch": 0.9067020919402636, "grad_norm": 0.507448093914595, "learning_rate": 3.876453095431198e-05, "loss": 0.5814, "step": 31055 }, { "epoch": 0.9068480752105809, "grad_norm": 0.49910191588093916, "learning_rate": 3.876182752095161e-05, "loss": 0.5743, "step": 31060 }, { "epoch": 0.9069940584808981, "grad_norm": 0.46915364593304343, "learning_rate": 3.875912408759124e-05, "loss": 0.56, "step": 31065 }, { "epoch": 0.9071400417512153, "grad_norm": 0.46416266149487767, "learning_rate": 3.8756420654230876e-05, "loss": 0.6153, "step": 31070 }, { "epoch": 0.9072860250215325, "grad_norm": 0.4599171169927186, "learning_rate": 3.87537172208705e-05, "loss": 0.5317, "step": 31075 }, { "epoch": 0.9074320082918498, "grad_norm": 0.43662117204159406, "learning_rate": 3.875101378751014e-05, "loss": 0.5278, "step": 31080 }, { "epoch": 0.907577991562167, "grad_norm": 0.48087612443332534, "learning_rate": 3.874831035414977e-05, "loss": 0.5579, "step": 31085 }, { "epoch": 0.9077239748324842, "grad_norm": 0.47513007754030623, "learning_rate": 3.8745606920789405e-05, "loss": 0.5725, "step": 31090 }, { "epoch": 0.9078699581028015, "grad_norm": 0.49406157378071097, "learning_rate": 3.874290348742904e-05, "loss": 0.5473, "step": 31095 }, { "epoch": 0.9080159413731187, "grad_norm": 0.4620764357535784, "learning_rate": 3.874020005406867e-05, "loss": 0.5258, "step": 31100 }, { "epoch": 0.9081619246434358, "grad_norm": 0.48578637510700184, "learning_rate": 3.87374966207083e-05, "loss": 0.6111, "step": 31105 }, { "epoch": 0.908307907913753, "grad_norm": 0.4757314347960328, "learning_rate": 3.8734793187347935e-05, "loss": 0.5785, "step": 31110 }, { "epoch": 0.9084538911840703, "grad_norm": 0.49387279015328184, "learning_rate": 3.873208975398757e-05, "loss": 0.5889, "step": 31115 }, { "epoch": 0.9085998744543875, "grad_norm": 0.4984943973571385, "learning_rate": 3.8729386320627196e-05, "loss": 0.5666, "step": 31120 }, { "epoch": 0.9087458577247047, "grad_norm": 0.48254684105822093, "learning_rate": 3.872668288726683e-05, "loss": 0.539, "step": 31125 }, { "epoch": 0.908891840995022, "grad_norm": 0.49423124993339806, "learning_rate": 3.8723979453906464e-05, "loss": 0.5626, "step": 31130 }, { "epoch": 0.9090378242653392, "grad_norm": 0.48561979603386257, "learning_rate": 3.872127602054609e-05, "loss": 0.5483, "step": 31135 }, { "epoch": 0.9091838075356564, "grad_norm": 0.45524330118509093, "learning_rate": 3.8718572587185725e-05, "loss": 0.5958, "step": 31140 }, { "epoch": 0.9093297908059736, "grad_norm": 0.4529634709079136, "learning_rate": 3.8715869153825366e-05, "loss": 0.5894, "step": 31145 }, { "epoch": 0.9094757740762909, "grad_norm": 0.4733540565580116, "learning_rate": 3.871316572046499e-05, "loss": 0.567, "step": 31150 }, { "epoch": 0.9096217573466081, "grad_norm": 0.45263613321213764, "learning_rate": 3.871046228710463e-05, "loss": 0.5523, "step": 31155 }, { "epoch": 0.9097677406169253, "grad_norm": 0.48201188005306667, "learning_rate": 3.870775885374426e-05, "loss": 0.5937, "step": 31160 }, { "epoch": 0.9099137238872426, "grad_norm": 0.44062814573021336, "learning_rate": 3.870505542038389e-05, "loss": 0.5841, "step": 31165 }, { "epoch": 0.9100597071575598, "grad_norm": 0.493055438452785, "learning_rate": 3.870235198702352e-05, "loss": 0.5933, "step": 31170 }, { "epoch": 0.9102056904278769, "grad_norm": 0.517385551665104, "learning_rate": 3.8699648553663156e-05, "loss": 0.579, "step": 31175 }, { "epoch": 0.9103516736981941, "grad_norm": 0.460443354602874, "learning_rate": 3.8696945120302784e-05, "loss": 0.5865, "step": 31180 }, { "epoch": 0.9104976569685114, "grad_norm": 0.4795050049357554, "learning_rate": 3.869424168694242e-05, "loss": 0.5473, "step": 31185 }, { "epoch": 0.9106436402388286, "grad_norm": 0.45831842931645017, "learning_rate": 3.869153825358205e-05, "loss": 0.5959, "step": 31190 }, { "epoch": 0.9107896235091458, "grad_norm": 0.45105342991730657, "learning_rate": 3.868883482022168e-05, "loss": 0.5655, "step": 31195 }, { "epoch": 0.9109356067794631, "grad_norm": 0.4678125270926409, "learning_rate": 3.868613138686132e-05, "loss": 0.5804, "step": 31200 }, { "epoch": 0.9110815900497803, "grad_norm": 0.47739501491465797, "learning_rate": 3.8683427953500954e-05, "loss": 0.6099, "step": 31205 }, { "epoch": 0.9112275733200975, "grad_norm": 0.4481225851258337, "learning_rate": 3.868072452014058e-05, "loss": 0.549, "step": 31210 }, { "epoch": 0.9113735565904147, "grad_norm": 0.482037536306798, "learning_rate": 3.8678021086780215e-05, "loss": 0.6073, "step": 31215 }, { "epoch": 0.911519539860732, "grad_norm": 0.4597105447809946, "learning_rate": 3.867531765341985e-05, "loss": 0.5877, "step": 31220 }, { "epoch": 0.9116655231310492, "grad_norm": 0.5499135918605164, "learning_rate": 3.8672614220059476e-05, "loss": 0.5737, "step": 31225 }, { "epoch": 0.9118115064013664, "grad_norm": 0.46090524932129623, "learning_rate": 3.866991078669911e-05, "loss": 0.5454, "step": 31230 }, { "epoch": 0.9119574896716837, "grad_norm": 0.4620932798729212, "learning_rate": 3.8667207353338744e-05, "loss": 0.5503, "step": 31235 }, { "epoch": 0.9121034729420009, "grad_norm": 0.49288814756930466, "learning_rate": 3.866450391997837e-05, "loss": 0.5876, "step": 31240 }, { "epoch": 0.9122494562123181, "grad_norm": 0.41343010794927615, "learning_rate": 3.8661800486618005e-05, "loss": 0.5469, "step": 31245 }, { "epoch": 0.9123954394826352, "grad_norm": 0.49387999308266206, "learning_rate": 3.865909705325764e-05, "loss": 0.565, "step": 31250 }, { "epoch": 0.9125414227529525, "grad_norm": 0.4709370477099422, "learning_rate": 3.865639361989727e-05, "loss": 0.5897, "step": 31255 }, { "epoch": 0.9126874060232697, "grad_norm": 0.4081831585475442, "learning_rate": 3.865369018653691e-05, "loss": 0.5661, "step": 31260 }, { "epoch": 0.9128333892935869, "grad_norm": 0.5128970921660383, "learning_rate": 3.8650986753176535e-05, "loss": 0.6089, "step": 31265 }, { "epoch": 0.9129793725639042, "grad_norm": 0.43597724599725174, "learning_rate": 3.864828331981617e-05, "loss": 0.5597, "step": 31270 }, { "epoch": 0.9131253558342214, "grad_norm": 0.4650106364660108, "learning_rate": 3.86455798864558e-05, "loss": 0.5351, "step": 31275 }, { "epoch": 0.9132713391045386, "grad_norm": 0.4834101898293038, "learning_rate": 3.864287645309544e-05, "loss": 0.5601, "step": 31280 }, { "epoch": 0.9134173223748558, "grad_norm": 0.46551581646371887, "learning_rate": 3.8640173019735064e-05, "loss": 0.5357, "step": 31285 }, { "epoch": 0.9135633056451731, "grad_norm": 0.4606568195952739, "learning_rate": 3.86374695863747e-05, "loss": 0.5504, "step": 31290 }, { "epoch": 0.9137092889154903, "grad_norm": 0.4354545140262311, "learning_rate": 3.863476615301433e-05, "loss": 0.5569, "step": 31295 }, { "epoch": 0.9138552721858075, "grad_norm": 0.48842255404521273, "learning_rate": 3.863206271965396e-05, "loss": 0.5881, "step": 31300 }, { "epoch": 0.9140012554561248, "grad_norm": 0.48149551026996035, "learning_rate": 3.862935928629359e-05, "loss": 0.5772, "step": 31305 }, { "epoch": 0.914147238726442, "grad_norm": 0.4717813750003367, "learning_rate": 3.862665585293323e-05, "loss": 0.5704, "step": 31310 }, { "epoch": 0.9142932219967592, "grad_norm": 0.48936512275260846, "learning_rate": 3.862395241957286e-05, "loss": 0.5863, "step": 31315 }, { "epoch": 0.9144392052670763, "grad_norm": 0.4491674186188653, "learning_rate": 3.8621248986212495e-05, "loss": 0.5582, "step": 31320 }, { "epoch": 0.9145851885373936, "grad_norm": 0.45474480318010785, "learning_rate": 3.861854555285212e-05, "loss": 0.5618, "step": 31325 }, { "epoch": 0.9147311718077108, "grad_norm": 0.4965875017422041, "learning_rate": 3.8615842119491756e-05, "loss": 0.562, "step": 31330 }, { "epoch": 0.914877155078028, "grad_norm": 0.5124288631798865, "learning_rate": 3.861313868613139e-05, "loss": 0.5717, "step": 31335 }, { "epoch": 0.9150231383483453, "grad_norm": 0.4951213703716861, "learning_rate": 3.8610435252771025e-05, "loss": 0.5686, "step": 31340 }, { "epoch": 0.9151691216186625, "grad_norm": 0.4627556320065069, "learning_rate": 3.860773181941065e-05, "loss": 0.5994, "step": 31345 }, { "epoch": 0.9153151048889797, "grad_norm": 0.4224189433654764, "learning_rate": 3.8605028386050286e-05, "loss": 0.5458, "step": 31350 }, { "epoch": 0.9154610881592969, "grad_norm": 0.44714010266822957, "learning_rate": 3.860232495268992e-05, "loss": 0.5177, "step": 31355 }, { "epoch": 0.9156070714296142, "grad_norm": 0.498374240155192, "learning_rate": 3.859962151932955e-05, "loss": 0.5831, "step": 31360 }, { "epoch": 0.9157530546999314, "grad_norm": 0.5345251019754816, "learning_rate": 3.859691808596918e-05, "loss": 0.6088, "step": 31365 }, { "epoch": 0.9158990379702486, "grad_norm": 0.43366716602199784, "learning_rate": 3.8594214652608815e-05, "loss": 0.5474, "step": 31370 }, { "epoch": 0.9160450212405659, "grad_norm": 0.4471059445413396, "learning_rate": 3.859151121924845e-05, "loss": 0.5602, "step": 31375 }, { "epoch": 0.9161910045108831, "grad_norm": 0.46511876623762044, "learning_rate": 3.858880778588808e-05, "loss": 0.5847, "step": 31380 }, { "epoch": 0.9163369877812003, "grad_norm": 0.467307874047773, "learning_rate": 3.858610435252771e-05, "loss": 0.5648, "step": 31385 }, { "epoch": 0.9164829710515175, "grad_norm": 0.4594079720843788, "learning_rate": 3.8583400919167344e-05, "loss": 0.5677, "step": 31390 }, { "epoch": 0.9166289543218347, "grad_norm": 0.4827868963581638, "learning_rate": 3.858069748580698e-05, "loss": 0.5608, "step": 31395 }, { "epoch": 0.9167749375921519, "grad_norm": 0.4717135554695514, "learning_rate": 3.8577994052446606e-05, "loss": 0.573, "step": 31400 }, { "epoch": 0.9169209208624691, "grad_norm": 0.4871553177128392, "learning_rate": 3.857529061908624e-05, "loss": 0.5936, "step": 31405 }, { "epoch": 0.9170669041327864, "grad_norm": 0.4283757830131854, "learning_rate": 3.8572587185725874e-05, "loss": 0.5591, "step": 31410 }, { "epoch": 0.9172128874031036, "grad_norm": 0.47573926924775745, "learning_rate": 3.856988375236551e-05, "loss": 0.5676, "step": 31415 }, { "epoch": 0.9173588706734208, "grad_norm": 0.4132543161112747, "learning_rate": 3.8567180319005135e-05, "loss": 0.5565, "step": 31420 }, { "epoch": 0.917504853943738, "grad_norm": 0.4776675115867321, "learning_rate": 3.856447688564477e-05, "loss": 0.5811, "step": 31425 }, { "epoch": 0.9176508372140553, "grad_norm": 0.566408605504653, "learning_rate": 3.85617734522844e-05, "loss": 0.5878, "step": 31430 }, { "epoch": 0.9177968204843725, "grad_norm": 0.45919801894940643, "learning_rate": 3.855907001892404e-05, "loss": 0.5365, "step": 31435 }, { "epoch": 0.9179428037546897, "grad_norm": 0.4490022929452355, "learning_rate": 3.855636658556367e-05, "loss": 0.5491, "step": 31440 }, { "epoch": 0.918088787025007, "grad_norm": 0.47559388827400917, "learning_rate": 3.85536631522033e-05, "loss": 0.5958, "step": 31445 }, { "epoch": 0.9182347702953242, "grad_norm": 0.47917792208606874, "learning_rate": 3.855095971884293e-05, "loss": 0.555, "step": 31450 }, { "epoch": 0.9183807535656414, "grad_norm": 0.592142523612366, "learning_rate": 3.8548256285482566e-05, "loss": 0.5758, "step": 31455 }, { "epoch": 0.9185267368359586, "grad_norm": 0.49323897809989314, "learning_rate": 3.854555285212219e-05, "loss": 0.5899, "step": 31460 }, { "epoch": 0.9186727201062759, "grad_norm": 0.4744917011947816, "learning_rate": 3.854284941876183e-05, "loss": 0.5647, "step": 31465 }, { "epoch": 0.918818703376593, "grad_norm": 0.47124148912740527, "learning_rate": 3.854014598540146e-05, "loss": 0.5692, "step": 31470 }, { "epoch": 0.9189646866469102, "grad_norm": 0.44246842108436, "learning_rate": 3.853744255204109e-05, "loss": 0.5692, "step": 31475 }, { "epoch": 0.9191106699172275, "grad_norm": 0.45204156437930954, "learning_rate": 3.853473911868072e-05, "loss": 0.5529, "step": 31480 }, { "epoch": 0.9192566531875447, "grad_norm": 0.49404670644301574, "learning_rate": 3.8532035685320363e-05, "loss": 0.5722, "step": 31485 }, { "epoch": 0.9194026364578619, "grad_norm": 0.48021399801486114, "learning_rate": 3.852933225195999e-05, "loss": 0.5816, "step": 31490 }, { "epoch": 0.9195486197281791, "grad_norm": 0.473589302283552, "learning_rate": 3.8526628818599625e-05, "loss": 0.5364, "step": 31495 }, { "epoch": 0.9196946029984964, "grad_norm": 0.5045525505370416, "learning_rate": 3.852392538523926e-05, "loss": 0.6041, "step": 31500 }, { "epoch": 0.9198405862688136, "grad_norm": 0.4662958866157352, "learning_rate": 3.8521221951878886e-05, "loss": 0.5975, "step": 31505 }, { "epoch": 0.9199865695391308, "grad_norm": 0.5901845204102646, "learning_rate": 3.851851851851852e-05, "loss": 0.5815, "step": 31510 }, { "epoch": 0.9201325528094481, "grad_norm": 0.4623204043997674, "learning_rate": 3.8515815085158154e-05, "loss": 0.5533, "step": 31515 }, { "epoch": 0.9202785360797653, "grad_norm": 0.48022072951270117, "learning_rate": 3.851311165179778e-05, "loss": 0.5487, "step": 31520 }, { "epoch": 0.9204245193500825, "grad_norm": 0.4340534480631207, "learning_rate": 3.8510408218437415e-05, "loss": 0.5475, "step": 31525 }, { "epoch": 0.9205705026203997, "grad_norm": 0.466987976738713, "learning_rate": 3.850770478507705e-05, "loss": 0.5625, "step": 31530 }, { "epoch": 0.920716485890717, "grad_norm": 0.5092361092945439, "learning_rate": 3.8505001351716676e-05, "loss": 0.5955, "step": 31535 }, { "epoch": 0.9208624691610341, "grad_norm": 0.4500449576544778, "learning_rate": 3.850229791835632e-05, "loss": 0.5452, "step": 31540 }, { "epoch": 0.9210084524313513, "grad_norm": 0.4618958019429292, "learning_rate": 3.849959448499595e-05, "loss": 0.5725, "step": 31545 }, { "epoch": 0.9211544357016686, "grad_norm": 0.49423074748697127, "learning_rate": 3.849689105163558e-05, "loss": 0.6356, "step": 31550 }, { "epoch": 0.9213004189719858, "grad_norm": 0.4611305019006141, "learning_rate": 3.849418761827521e-05, "loss": 0.5502, "step": 31555 }, { "epoch": 0.921446402242303, "grad_norm": 0.46530768704752007, "learning_rate": 3.8491484184914846e-05, "loss": 0.6014, "step": 31560 }, { "epoch": 0.9215923855126202, "grad_norm": 0.44062935111675694, "learning_rate": 3.8488780751554474e-05, "loss": 0.5525, "step": 31565 }, { "epoch": 0.9217383687829375, "grad_norm": 0.48582517194393177, "learning_rate": 3.848607731819411e-05, "loss": 0.5804, "step": 31570 }, { "epoch": 0.9218843520532547, "grad_norm": 0.45393385507837064, "learning_rate": 3.848337388483374e-05, "loss": 0.5389, "step": 31575 }, { "epoch": 0.9220303353235719, "grad_norm": 0.4945387189179343, "learning_rate": 3.848067045147337e-05, "loss": 0.578, "step": 31580 }, { "epoch": 0.9221763185938892, "grad_norm": 0.46458405832957983, "learning_rate": 3.8477967018113e-05, "loss": 0.5801, "step": 31585 }, { "epoch": 0.9223223018642064, "grad_norm": 0.4371255818731531, "learning_rate": 3.847526358475264e-05, "loss": 0.5531, "step": 31590 }, { "epoch": 0.9224682851345236, "grad_norm": 0.479447169670483, "learning_rate": 3.8472560151392264e-05, "loss": 0.5494, "step": 31595 }, { "epoch": 0.9226142684048408, "grad_norm": 0.4875175148590992, "learning_rate": 3.8469856718031905e-05, "loss": 0.5582, "step": 31600 }, { "epoch": 0.9227602516751581, "grad_norm": 0.4769643092249807, "learning_rate": 3.846715328467154e-05, "loss": 0.5718, "step": 31605 }, { "epoch": 0.9229062349454753, "grad_norm": 0.4406054891425822, "learning_rate": 3.8464449851311166e-05, "loss": 0.5393, "step": 31610 }, { "epoch": 0.9230522182157924, "grad_norm": 0.46974920447586493, "learning_rate": 3.84617464179508e-05, "loss": 0.5651, "step": 31615 }, { "epoch": 0.9231982014861096, "grad_norm": 0.4739078920983294, "learning_rate": 3.8459042984590434e-05, "loss": 0.592, "step": 31620 }, { "epoch": 0.9233441847564269, "grad_norm": 0.46474368226558416, "learning_rate": 3.845633955123006e-05, "loss": 0.5669, "step": 31625 }, { "epoch": 0.9234901680267441, "grad_norm": 0.5004173030628581, "learning_rate": 3.8453636117869696e-05, "loss": 0.5848, "step": 31630 }, { "epoch": 0.9236361512970613, "grad_norm": 0.4602458033962826, "learning_rate": 3.845093268450933e-05, "loss": 0.5804, "step": 31635 }, { "epoch": 0.9237821345673786, "grad_norm": 0.45386743342159536, "learning_rate": 3.844822925114896e-05, "loss": 0.5842, "step": 31640 }, { "epoch": 0.9239281178376958, "grad_norm": 0.4894148852886433, "learning_rate": 3.844552581778859e-05, "loss": 0.5641, "step": 31645 }, { "epoch": 0.924074101108013, "grad_norm": 0.4509531263671512, "learning_rate": 3.8442822384428225e-05, "loss": 0.5411, "step": 31650 }, { "epoch": 0.9242200843783303, "grad_norm": 0.5002462582334966, "learning_rate": 3.844011895106786e-05, "loss": 0.5687, "step": 31655 }, { "epoch": 0.9243660676486475, "grad_norm": 0.498265779337789, "learning_rate": 3.843741551770749e-05, "loss": 0.5713, "step": 31660 }, { "epoch": 0.9245120509189647, "grad_norm": 0.46714844404415384, "learning_rate": 3.843471208434713e-05, "loss": 0.5817, "step": 31665 }, { "epoch": 0.9246580341892819, "grad_norm": 0.5035992873262191, "learning_rate": 3.8432008650986754e-05, "loss": 0.5596, "step": 31670 }, { "epoch": 0.9248040174595992, "grad_norm": 0.42066106361750893, "learning_rate": 3.842930521762639e-05, "loss": 0.5475, "step": 31675 }, { "epoch": 0.9249500007299164, "grad_norm": 0.4847779317907286, "learning_rate": 3.842660178426602e-05, "loss": 0.5966, "step": 31680 }, { "epoch": 0.9250959840002336, "grad_norm": 0.45550959587724954, "learning_rate": 3.842389835090565e-05, "loss": 0.5558, "step": 31685 }, { "epoch": 0.9252419672705507, "grad_norm": 0.45535467717088035, "learning_rate": 3.842119491754528e-05, "loss": 0.5695, "step": 31690 }, { "epoch": 0.925387950540868, "grad_norm": 0.43930623318085454, "learning_rate": 3.841849148418492e-05, "loss": 0.5454, "step": 31695 }, { "epoch": 0.9255339338111852, "grad_norm": 0.5204593831362917, "learning_rate": 3.8415788050824545e-05, "loss": 0.5869, "step": 31700 }, { "epoch": 0.9256799170815024, "grad_norm": 0.4846822750293604, "learning_rate": 3.841308461746418e-05, "loss": 0.5664, "step": 31705 }, { "epoch": 0.9258259003518197, "grad_norm": 0.48399528026380667, "learning_rate": 3.841038118410382e-05, "loss": 0.6234, "step": 31710 }, { "epoch": 0.9259718836221369, "grad_norm": 0.49049162075790365, "learning_rate": 3.840767775074345e-05, "loss": 0.5709, "step": 31715 }, { "epoch": 0.9261178668924541, "grad_norm": 0.45609268004655174, "learning_rate": 3.840497431738308e-05, "loss": 0.563, "step": 31720 }, { "epoch": 0.9262638501627714, "grad_norm": 0.4788242191319684, "learning_rate": 3.8402270884022715e-05, "loss": 0.5965, "step": 31725 }, { "epoch": 0.9264098334330886, "grad_norm": 0.5222079928548844, "learning_rate": 3.839956745066234e-05, "loss": 0.5832, "step": 31730 }, { "epoch": 0.9265558167034058, "grad_norm": 0.4544467038309609, "learning_rate": 3.8396864017301976e-05, "loss": 0.5752, "step": 31735 }, { "epoch": 0.926701799973723, "grad_norm": 0.5209770268525218, "learning_rate": 3.839416058394161e-05, "loss": 0.5723, "step": 31740 }, { "epoch": 0.9268477832440403, "grad_norm": 0.48110960397915464, "learning_rate": 3.839145715058124e-05, "loss": 0.5646, "step": 31745 }, { "epoch": 0.9269937665143575, "grad_norm": 0.5057471868330301, "learning_rate": 3.838875371722087e-05, "loss": 0.5844, "step": 31750 }, { "epoch": 0.9271397497846747, "grad_norm": 0.48940397742693936, "learning_rate": 3.8386050283860505e-05, "loss": 0.5965, "step": 31755 }, { "epoch": 0.9272857330549918, "grad_norm": 0.4944798297120638, "learning_rate": 3.838334685050013e-05, "loss": 0.5963, "step": 31760 }, { "epoch": 0.9274317163253091, "grad_norm": 0.5251939700617779, "learning_rate": 3.8380643417139766e-05, "loss": 0.5927, "step": 31765 }, { "epoch": 0.9275776995956263, "grad_norm": 0.5206287282605663, "learning_rate": 3.837793998377941e-05, "loss": 0.5531, "step": 31770 }, { "epoch": 0.9277236828659435, "grad_norm": 0.4411213604511076, "learning_rate": 3.8375236550419034e-05, "loss": 0.5548, "step": 31775 }, { "epoch": 0.9278696661362608, "grad_norm": 0.45533773365925007, "learning_rate": 3.837253311705867e-05, "loss": 0.5494, "step": 31780 }, { "epoch": 0.928015649406578, "grad_norm": 0.44290528981579574, "learning_rate": 3.83698296836983e-05, "loss": 0.5634, "step": 31785 }, { "epoch": 0.9281616326768952, "grad_norm": 0.4556690343493923, "learning_rate": 3.836712625033793e-05, "loss": 0.5998, "step": 31790 }, { "epoch": 0.9283076159472125, "grad_norm": 0.4815487272580662, "learning_rate": 3.8364422816977564e-05, "loss": 0.5288, "step": 31795 }, { "epoch": 0.9284535992175297, "grad_norm": 0.48638739875394205, "learning_rate": 3.83617193836172e-05, "loss": 0.5663, "step": 31800 }, { "epoch": 0.9285995824878469, "grad_norm": 0.4579550064468035, "learning_rate": 3.8359015950256825e-05, "loss": 0.5511, "step": 31805 }, { "epoch": 0.9287455657581641, "grad_norm": 0.4553312155347089, "learning_rate": 3.835631251689646e-05, "loss": 0.5635, "step": 31810 }, { "epoch": 0.9288915490284814, "grad_norm": 0.4678560071336632, "learning_rate": 3.835360908353609e-05, "loss": 0.5829, "step": 31815 }, { "epoch": 0.9290375322987986, "grad_norm": 0.46615505695401266, "learning_rate": 3.835090565017572e-05, "loss": 0.557, "step": 31820 }, { "epoch": 0.9291835155691158, "grad_norm": 0.4730774055613684, "learning_rate": 3.834820221681536e-05, "loss": 0.571, "step": 31825 }, { "epoch": 0.929329498839433, "grad_norm": 0.5010064794626535, "learning_rate": 3.8345498783454995e-05, "loss": 0.6131, "step": 31830 }, { "epoch": 0.9294754821097502, "grad_norm": 0.4801984691433079, "learning_rate": 3.834279535009462e-05, "loss": 0.58, "step": 31835 }, { "epoch": 0.9296214653800674, "grad_norm": 0.47025864661267486, "learning_rate": 3.8340091916734256e-05, "loss": 0.5696, "step": 31840 }, { "epoch": 0.9297674486503846, "grad_norm": 0.48975774804064365, "learning_rate": 3.833738848337389e-05, "loss": 0.5904, "step": 31845 }, { "epoch": 0.9299134319207019, "grad_norm": 0.4602513561982308, "learning_rate": 3.833468505001352e-05, "loss": 0.5463, "step": 31850 }, { "epoch": 0.9300594151910191, "grad_norm": 0.48630029446992806, "learning_rate": 3.833198161665315e-05, "loss": 0.5606, "step": 31855 }, { "epoch": 0.9302053984613363, "grad_norm": 0.4604788154137727, "learning_rate": 3.8329278183292786e-05, "loss": 0.5656, "step": 31860 }, { "epoch": 0.9303513817316535, "grad_norm": 0.5235111309276402, "learning_rate": 3.832657474993241e-05, "loss": 0.5608, "step": 31865 }, { "epoch": 0.9304973650019708, "grad_norm": 0.4727612692330279, "learning_rate": 3.832387131657205e-05, "loss": 0.5575, "step": 31870 }, { "epoch": 0.930643348272288, "grad_norm": 0.4744644917174992, "learning_rate": 3.832116788321168e-05, "loss": 0.5707, "step": 31875 }, { "epoch": 0.9307893315426052, "grad_norm": 0.4583708805566461, "learning_rate": 3.8318464449851315e-05, "loss": 0.5815, "step": 31880 }, { "epoch": 0.9309353148129225, "grad_norm": 0.498330064692592, "learning_rate": 3.831576101649095e-05, "loss": 0.5912, "step": 31885 }, { "epoch": 0.9310812980832397, "grad_norm": 0.45720682214209596, "learning_rate": 3.8313057583130576e-05, "loss": 0.539, "step": 31890 }, { "epoch": 0.9312272813535569, "grad_norm": 0.48038367799251314, "learning_rate": 3.831035414977021e-05, "loss": 0.5651, "step": 31895 }, { "epoch": 0.9313732646238742, "grad_norm": 0.4720929625488102, "learning_rate": 3.8307650716409844e-05, "loss": 0.5672, "step": 31900 }, { "epoch": 0.9315192478941913, "grad_norm": 0.4276483973710787, "learning_rate": 3.830494728304948e-05, "loss": 0.5499, "step": 31905 }, { "epoch": 0.9316652311645085, "grad_norm": 0.47924354029155763, "learning_rate": 3.8302243849689105e-05, "loss": 0.5794, "step": 31910 }, { "epoch": 0.9318112144348257, "grad_norm": 0.46292443168103525, "learning_rate": 3.829954041632874e-05, "loss": 0.5639, "step": 31915 }, { "epoch": 0.931957197705143, "grad_norm": 0.5238349002471503, "learning_rate": 3.829683698296837e-05, "loss": 0.5881, "step": 31920 }, { "epoch": 0.9321031809754602, "grad_norm": 0.5046006821366644, "learning_rate": 3.8294133549608e-05, "loss": 0.5779, "step": 31925 }, { "epoch": 0.9322491642457774, "grad_norm": 0.4723303580221788, "learning_rate": 3.8291430116247635e-05, "loss": 0.5794, "step": 31930 }, { "epoch": 0.9323951475160946, "grad_norm": 0.4610481102193912, "learning_rate": 3.828872668288727e-05, "loss": 0.5571, "step": 31935 }, { "epoch": 0.9325411307864119, "grad_norm": 0.4665568480983696, "learning_rate": 3.82860232495269e-05, "loss": 0.5626, "step": 31940 }, { "epoch": 0.9326871140567291, "grad_norm": 0.46771509549297235, "learning_rate": 3.8283319816166537e-05, "loss": 0.5614, "step": 31945 }, { "epoch": 0.9328330973270463, "grad_norm": 0.46472565273399896, "learning_rate": 3.8280616382806164e-05, "loss": 0.5316, "step": 31950 }, { "epoch": 0.9329790805973636, "grad_norm": 0.47064136841046433, "learning_rate": 3.82779129494458e-05, "loss": 0.6168, "step": 31955 }, { "epoch": 0.9331250638676808, "grad_norm": 0.45180468219520326, "learning_rate": 3.827520951608543e-05, "loss": 0.5537, "step": 31960 }, { "epoch": 0.933271047137998, "grad_norm": 0.47522890507471127, "learning_rate": 3.8272506082725066e-05, "loss": 0.5767, "step": 31965 }, { "epoch": 0.9334170304083153, "grad_norm": 0.48229875911715836, "learning_rate": 3.826980264936469e-05, "loss": 0.5895, "step": 31970 }, { "epoch": 0.9335630136786325, "grad_norm": 0.5100858011118261, "learning_rate": 3.826709921600433e-05, "loss": 0.5626, "step": 31975 }, { "epoch": 0.9337089969489496, "grad_norm": 0.44500257178117614, "learning_rate": 3.826439578264396e-05, "loss": 0.5587, "step": 31980 }, { "epoch": 0.9338549802192668, "grad_norm": 0.4804511692913755, "learning_rate": 3.826169234928359e-05, "loss": 0.5911, "step": 31985 }, { "epoch": 0.9340009634895841, "grad_norm": 0.4905711153380234, "learning_rate": 3.825898891592322e-05, "loss": 0.5577, "step": 31990 }, { "epoch": 0.9341469467599013, "grad_norm": 0.45928552560816455, "learning_rate": 3.8256285482562856e-05, "loss": 0.553, "step": 31995 }, { "epoch": 0.9342929300302185, "grad_norm": 0.4637270413217214, "learning_rate": 3.825358204920249e-05, "loss": 0.5871, "step": 32000 }, { "epoch": 0.9344389133005357, "grad_norm": 0.4393924320303899, "learning_rate": 3.8250878615842124e-05, "loss": 0.5356, "step": 32005 }, { "epoch": 0.934584896570853, "grad_norm": 0.4556867065799245, "learning_rate": 3.824817518248175e-05, "loss": 0.546, "step": 32010 }, { "epoch": 0.9347308798411702, "grad_norm": 0.4769539905728759, "learning_rate": 3.8245471749121386e-05, "loss": 0.5611, "step": 32015 }, { "epoch": 0.9348768631114874, "grad_norm": 0.5139302770975858, "learning_rate": 3.824276831576102e-05, "loss": 0.5757, "step": 32020 }, { "epoch": 0.9350228463818047, "grad_norm": 0.44076735781806614, "learning_rate": 3.824006488240065e-05, "loss": 0.5604, "step": 32025 }, { "epoch": 0.9351688296521219, "grad_norm": 0.47452691040594, "learning_rate": 3.823736144904028e-05, "loss": 0.5592, "step": 32030 }, { "epoch": 0.9353148129224391, "grad_norm": 0.48633379986461256, "learning_rate": 3.8234658015679915e-05, "loss": 0.5665, "step": 32035 }, { "epoch": 0.9354607961927563, "grad_norm": 0.46303719384691466, "learning_rate": 3.823195458231955e-05, "loss": 0.565, "step": 32040 }, { "epoch": 0.9356067794630736, "grad_norm": 0.5024630024754876, "learning_rate": 3.8229251148959176e-05, "loss": 0.5644, "step": 32045 }, { "epoch": 0.9357527627333908, "grad_norm": 0.47305341868801676, "learning_rate": 3.822654771559882e-05, "loss": 0.6043, "step": 32050 }, { "epoch": 0.9358987460037079, "grad_norm": 0.4744034590110301, "learning_rate": 3.8223844282238444e-05, "loss": 0.5586, "step": 32055 }, { "epoch": 0.9360447292740252, "grad_norm": 0.47342683905004784, "learning_rate": 3.822114084887808e-05, "loss": 0.5636, "step": 32060 }, { "epoch": 0.9361907125443424, "grad_norm": 0.4837477553082139, "learning_rate": 3.821843741551771e-05, "loss": 0.5706, "step": 32065 }, { "epoch": 0.9363366958146596, "grad_norm": 0.491584188316606, "learning_rate": 3.821573398215734e-05, "loss": 0.5982, "step": 32070 }, { "epoch": 0.9364826790849768, "grad_norm": 0.5120031903936308, "learning_rate": 3.8213030548796973e-05, "loss": 0.5974, "step": 32075 }, { "epoch": 0.9366286623552941, "grad_norm": 0.4742243462612837, "learning_rate": 3.821032711543661e-05, "loss": 0.6145, "step": 32080 }, { "epoch": 0.9367746456256113, "grad_norm": 0.49515976041332477, "learning_rate": 3.8207623682076235e-05, "loss": 0.5785, "step": 32085 }, { "epoch": 0.9369206288959285, "grad_norm": 0.5071980382359971, "learning_rate": 3.820492024871587e-05, "loss": 0.5928, "step": 32090 }, { "epoch": 0.9370666121662458, "grad_norm": 0.5139031138795164, "learning_rate": 3.82022168153555e-05, "loss": 0.6024, "step": 32095 }, { "epoch": 0.937212595436563, "grad_norm": 0.46066774345521244, "learning_rate": 3.819951338199514e-05, "loss": 0.5589, "step": 32100 }, { "epoch": 0.9373585787068802, "grad_norm": 0.4761880631484803, "learning_rate": 3.8196809948634764e-05, "loss": 0.5824, "step": 32105 }, { "epoch": 0.9375045619771974, "grad_norm": 0.4614264988866443, "learning_rate": 3.8194106515274405e-05, "loss": 0.599, "step": 32110 }, { "epoch": 0.9376505452475147, "grad_norm": 0.461242168553452, "learning_rate": 3.819140308191403e-05, "loss": 0.5464, "step": 32115 }, { "epoch": 0.9377965285178319, "grad_norm": 0.49749066987880786, "learning_rate": 3.8188699648553666e-05, "loss": 0.5849, "step": 32120 }, { "epoch": 0.937942511788149, "grad_norm": 0.4853005411185301, "learning_rate": 3.81859962151933e-05, "loss": 0.5536, "step": 32125 }, { "epoch": 0.9380884950584663, "grad_norm": 0.4677478618238052, "learning_rate": 3.818329278183293e-05, "loss": 0.5901, "step": 32130 }, { "epoch": 0.9382344783287835, "grad_norm": 0.48265952492720515, "learning_rate": 3.818058934847256e-05, "loss": 0.6002, "step": 32135 }, { "epoch": 0.9383804615991007, "grad_norm": 0.4722194651653722, "learning_rate": 3.8177885915112195e-05, "loss": 0.5431, "step": 32140 }, { "epoch": 0.9385264448694179, "grad_norm": 0.5246727303530843, "learning_rate": 3.817518248175182e-05, "loss": 0.625, "step": 32145 }, { "epoch": 0.9386724281397352, "grad_norm": 0.4670602076653776, "learning_rate": 3.8172479048391457e-05, "loss": 0.5725, "step": 32150 }, { "epoch": 0.9388184114100524, "grad_norm": 0.4489446388220147, "learning_rate": 3.816977561503109e-05, "loss": 0.5393, "step": 32155 }, { "epoch": 0.9389643946803696, "grad_norm": 0.45753265645213925, "learning_rate": 3.816707218167072e-05, "loss": 0.5498, "step": 32160 }, { "epoch": 0.9391103779506869, "grad_norm": 0.5187144666396146, "learning_rate": 3.816436874831036e-05, "loss": 0.5896, "step": 32165 }, { "epoch": 0.9392563612210041, "grad_norm": 0.4363369003766718, "learning_rate": 3.816166531494999e-05, "loss": 0.5363, "step": 32170 }, { "epoch": 0.9394023444913213, "grad_norm": 0.5132624927808663, "learning_rate": 3.815896188158962e-05, "loss": 0.6047, "step": 32175 }, { "epoch": 0.9395483277616385, "grad_norm": 0.4604253686112979, "learning_rate": 3.8156258448229254e-05, "loss": 0.5506, "step": 32180 }, { "epoch": 0.9396943110319558, "grad_norm": 0.45196402966920174, "learning_rate": 3.815355501486889e-05, "loss": 0.5798, "step": 32185 }, { "epoch": 0.939840294302273, "grad_norm": 0.48622705861717175, "learning_rate": 3.8150851581508515e-05, "loss": 0.5572, "step": 32190 }, { "epoch": 0.9399862775725902, "grad_norm": 0.44326074252005243, "learning_rate": 3.814814814814815e-05, "loss": 0.584, "step": 32195 }, { "epoch": 0.9401322608429074, "grad_norm": 0.4622925815821642, "learning_rate": 3.814544471478778e-05, "loss": 0.5453, "step": 32200 }, { "epoch": 0.9402782441132246, "grad_norm": 0.4278220782463683, "learning_rate": 3.814274128142741e-05, "loss": 0.5856, "step": 32205 }, { "epoch": 0.9404242273835418, "grad_norm": 0.4976573920135692, "learning_rate": 3.8140037848067044e-05, "loss": 0.5736, "step": 32210 }, { "epoch": 0.940570210653859, "grad_norm": 0.45215300760399385, "learning_rate": 3.813733441470668e-05, "loss": 0.5697, "step": 32215 }, { "epoch": 0.9407161939241763, "grad_norm": 0.4467564522854091, "learning_rate": 3.813463098134631e-05, "loss": 0.5766, "step": 32220 }, { "epoch": 0.9408621771944935, "grad_norm": 0.44578787286307564, "learning_rate": 3.8131927547985946e-05, "loss": 0.599, "step": 32225 }, { "epoch": 0.9410081604648107, "grad_norm": 0.45083506332275985, "learning_rate": 3.812922411462558e-05, "loss": 0.5696, "step": 32230 }, { "epoch": 0.941154143735128, "grad_norm": 0.48553014233750014, "learning_rate": 3.812652068126521e-05, "loss": 0.6193, "step": 32235 }, { "epoch": 0.9413001270054452, "grad_norm": 0.47692925873346415, "learning_rate": 3.812381724790484e-05, "loss": 0.5828, "step": 32240 }, { "epoch": 0.9414461102757624, "grad_norm": 0.48173536239850734, "learning_rate": 3.8121113814544476e-05, "loss": 0.5708, "step": 32245 }, { "epoch": 0.9415920935460796, "grad_norm": 0.4816639444238, "learning_rate": 3.81184103811841e-05, "loss": 0.5836, "step": 32250 }, { "epoch": 0.9417380768163969, "grad_norm": 0.443204311003816, "learning_rate": 3.811570694782374e-05, "loss": 0.549, "step": 32255 }, { "epoch": 0.9418840600867141, "grad_norm": 0.4566836627019246, "learning_rate": 3.811300351446337e-05, "loss": 0.5969, "step": 32260 }, { "epoch": 0.9420300433570313, "grad_norm": 0.4868039934192803, "learning_rate": 3.8110300081103e-05, "loss": 0.5586, "step": 32265 }, { "epoch": 0.9421760266273485, "grad_norm": 0.4691404847144425, "learning_rate": 3.810759664774263e-05, "loss": 0.5907, "step": 32270 }, { "epoch": 0.9423220098976657, "grad_norm": 0.4683096838987568, "learning_rate": 3.8104893214382266e-05, "loss": 0.5578, "step": 32275 }, { "epoch": 0.9424679931679829, "grad_norm": 0.4589164576926967, "learning_rate": 3.81021897810219e-05, "loss": 0.5781, "step": 32280 }, { "epoch": 0.9426139764383001, "grad_norm": 0.4962979607388475, "learning_rate": 3.8099486347661534e-05, "loss": 0.5654, "step": 32285 }, { "epoch": 0.9427599597086174, "grad_norm": 0.5550824038317972, "learning_rate": 3.809678291430117e-05, "loss": 0.6015, "step": 32290 }, { "epoch": 0.9429059429789346, "grad_norm": 0.47247581381320447, "learning_rate": 3.8094079480940795e-05, "loss": 0.5562, "step": 32295 }, { "epoch": 0.9430519262492518, "grad_norm": 0.44474921126236805, "learning_rate": 3.809137604758043e-05, "loss": 0.5784, "step": 32300 }, { "epoch": 0.943197909519569, "grad_norm": 0.48351465784850456, "learning_rate": 3.8088672614220063e-05, "loss": 0.5965, "step": 32305 }, { "epoch": 0.9433438927898863, "grad_norm": 0.494712458911376, "learning_rate": 3.808596918085969e-05, "loss": 0.5474, "step": 32310 }, { "epoch": 0.9434898760602035, "grad_norm": 0.4451678100510814, "learning_rate": 3.8083265747499325e-05, "loss": 0.5827, "step": 32315 }, { "epoch": 0.9436358593305207, "grad_norm": 0.49501543772662226, "learning_rate": 3.808056231413896e-05, "loss": 0.5929, "step": 32320 }, { "epoch": 0.943781842600838, "grad_norm": 0.4794325098892284, "learning_rate": 3.8077858880778586e-05, "loss": 0.5738, "step": 32325 }, { "epoch": 0.9439278258711552, "grad_norm": 0.435039941302272, "learning_rate": 3.807515544741822e-05, "loss": 0.5546, "step": 32330 }, { "epoch": 0.9440738091414724, "grad_norm": 0.42130037363615774, "learning_rate": 3.807245201405786e-05, "loss": 0.5466, "step": 32335 }, { "epoch": 0.9442197924117897, "grad_norm": 0.48326932533520983, "learning_rate": 3.806974858069749e-05, "loss": 0.6001, "step": 32340 }, { "epoch": 0.9443657756821068, "grad_norm": 0.450427113746496, "learning_rate": 3.806704514733712e-05, "loss": 0.5784, "step": 32345 }, { "epoch": 0.944511758952424, "grad_norm": 0.4387013100910361, "learning_rate": 3.8064341713976756e-05, "loss": 0.5248, "step": 32350 }, { "epoch": 0.9446577422227412, "grad_norm": 0.45559459475598535, "learning_rate": 3.806163828061638e-05, "loss": 0.5526, "step": 32355 }, { "epoch": 0.9448037254930585, "grad_norm": 0.47128720870157975, "learning_rate": 3.805893484725602e-05, "loss": 0.5819, "step": 32360 }, { "epoch": 0.9449497087633757, "grad_norm": 0.46066842734812424, "learning_rate": 3.805623141389565e-05, "loss": 0.5612, "step": 32365 }, { "epoch": 0.9450956920336929, "grad_norm": 0.48022386873043404, "learning_rate": 3.805352798053528e-05, "loss": 0.5913, "step": 32370 }, { "epoch": 0.9452416753040102, "grad_norm": 0.4712735865576934, "learning_rate": 3.805082454717491e-05, "loss": 0.5329, "step": 32375 }, { "epoch": 0.9453876585743274, "grad_norm": 0.493272723003161, "learning_rate": 3.8048121113814547e-05, "loss": 0.5951, "step": 32380 }, { "epoch": 0.9455336418446446, "grad_norm": 0.4930691103385476, "learning_rate": 3.8045417680454174e-05, "loss": 0.5461, "step": 32385 }, { "epoch": 0.9456796251149618, "grad_norm": 0.4670619814977357, "learning_rate": 3.8042714247093815e-05, "loss": 0.5651, "step": 32390 }, { "epoch": 0.9458256083852791, "grad_norm": 0.48683010310774877, "learning_rate": 3.804001081373345e-05, "loss": 0.543, "step": 32395 }, { "epoch": 0.9459715916555963, "grad_norm": 0.48235822110302995, "learning_rate": 3.8037307380373076e-05, "loss": 0.6166, "step": 32400 }, { "epoch": 0.9461175749259135, "grad_norm": 0.46547860889575476, "learning_rate": 3.803460394701271e-05, "loss": 0.6071, "step": 32405 }, { "epoch": 0.9462635581962308, "grad_norm": 0.4573984030792724, "learning_rate": 3.8031900513652344e-05, "loss": 0.5861, "step": 32410 }, { "epoch": 0.946409541466548, "grad_norm": 0.49697294879552595, "learning_rate": 3.802919708029197e-05, "loss": 0.5654, "step": 32415 }, { "epoch": 0.9465555247368651, "grad_norm": 0.49390171175961894, "learning_rate": 3.8026493646931605e-05, "loss": 0.5829, "step": 32420 }, { "epoch": 0.9467015080071823, "grad_norm": 0.5121958237493454, "learning_rate": 3.802379021357124e-05, "loss": 0.5868, "step": 32425 }, { "epoch": 0.9468474912774996, "grad_norm": 0.4173906614327878, "learning_rate": 3.8021086780210866e-05, "loss": 0.5429, "step": 32430 }, { "epoch": 0.9469934745478168, "grad_norm": 0.47700057490235775, "learning_rate": 3.80183833468505e-05, "loss": 0.5439, "step": 32435 }, { "epoch": 0.947139457818134, "grad_norm": 0.4543462021958043, "learning_rate": 3.8015679913490134e-05, "loss": 0.558, "step": 32440 }, { "epoch": 0.9472854410884513, "grad_norm": 0.417193992441115, "learning_rate": 3.801297648012976e-05, "loss": 0.5466, "step": 32445 }, { "epoch": 0.9474314243587685, "grad_norm": 0.47021545731306535, "learning_rate": 3.80102730467694e-05, "loss": 0.5815, "step": 32450 }, { "epoch": 0.9475774076290857, "grad_norm": 0.4759601831501499, "learning_rate": 3.8007569613409036e-05, "loss": 0.5511, "step": 32455 }, { "epoch": 0.9477233908994029, "grad_norm": 0.45904545038700595, "learning_rate": 3.8004866180048664e-05, "loss": 0.5649, "step": 32460 }, { "epoch": 0.9478693741697202, "grad_norm": 0.46363533583545047, "learning_rate": 3.80021627466883e-05, "loss": 0.5945, "step": 32465 }, { "epoch": 0.9480153574400374, "grad_norm": 0.4654144310501924, "learning_rate": 3.799945931332793e-05, "loss": 0.569, "step": 32470 }, { "epoch": 0.9481613407103546, "grad_norm": 0.5347602726317761, "learning_rate": 3.799675587996756e-05, "loss": 0.5774, "step": 32475 }, { "epoch": 0.9483073239806719, "grad_norm": 0.533962080817506, "learning_rate": 3.799405244660719e-05, "loss": 0.5698, "step": 32480 }, { "epoch": 0.9484533072509891, "grad_norm": 0.5109848041305457, "learning_rate": 3.799134901324683e-05, "loss": 0.6081, "step": 32485 }, { "epoch": 0.9485992905213062, "grad_norm": 0.4751568840486144, "learning_rate": 3.7988645579886454e-05, "loss": 0.5677, "step": 32490 }, { "epoch": 0.9487452737916234, "grad_norm": 0.43339154368561394, "learning_rate": 3.798594214652609e-05, "loss": 0.5439, "step": 32495 }, { "epoch": 0.9488912570619407, "grad_norm": 0.46457338622316685, "learning_rate": 3.798323871316572e-05, "loss": 0.5907, "step": 32500 }, { "epoch": 0.9490372403322579, "grad_norm": 0.45964340798593956, "learning_rate": 3.7980535279805356e-05, "loss": 0.5727, "step": 32505 }, { "epoch": 0.9491832236025751, "grad_norm": 0.4340316185375223, "learning_rate": 3.797783184644499e-05, "loss": 0.5584, "step": 32510 }, { "epoch": 0.9493292068728924, "grad_norm": 0.40587787203494535, "learning_rate": 3.7975128413084624e-05, "loss": 0.533, "step": 32515 }, { "epoch": 0.9494751901432096, "grad_norm": 0.43209889489675485, "learning_rate": 3.797242497972425e-05, "loss": 0.5798, "step": 32520 }, { "epoch": 0.9496211734135268, "grad_norm": 0.4975798830864513, "learning_rate": 3.7969721546363885e-05, "loss": 0.61, "step": 32525 }, { "epoch": 0.949767156683844, "grad_norm": 0.4631302268291443, "learning_rate": 3.796701811300352e-05, "loss": 0.5611, "step": 32530 }, { "epoch": 0.9499131399541613, "grad_norm": 0.44177238764497906, "learning_rate": 3.796431467964315e-05, "loss": 0.5851, "step": 32535 }, { "epoch": 0.9500591232244785, "grad_norm": 0.46643985145444555, "learning_rate": 3.796161124628278e-05, "loss": 0.5738, "step": 32540 }, { "epoch": 0.9502051064947957, "grad_norm": 0.4484022874904198, "learning_rate": 3.7958907812922415e-05, "loss": 0.5407, "step": 32545 }, { "epoch": 0.950351089765113, "grad_norm": 0.47114192782426645, "learning_rate": 3.795620437956204e-05, "loss": 0.5876, "step": 32550 }, { "epoch": 0.9504970730354302, "grad_norm": 0.4768316821207964, "learning_rate": 3.7953500946201676e-05, "loss": 0.5929, "step": 32555 }, { "epoch": 0.9506430563057474, "grad_norm": 0.4388689583692413, "learning_rate": 3.795079751284131e-05, "loss": 0.5456, "step": 32560 }, { "epoch": 0.9507890395760645, "grad_norm": 0.46758424653856, "learning_rate": 3.7948094079480944e-05, "loss": 0.563, "step": 32565 }, { "epoch": 0.9509350228463818, "grad_norm": 0.4490281163363607, "learning_rate": 3.794539064612058e-05, "loss": 0.5838, "step": 32570 }, { "epoch": 0.951081006116699, "grad_norm": 0.46020695893602065, "learning_rate": 3.7942687212760205e-05, "loss": 0.5649, "step": 32575 }, { "epoch": 0.9512269893870162, "grad_norm": 0.4747246095734574, "learning_rate": 3.793998377939984e-05, "loss": 0.5577, "step": 32580 }, { "epoch": 0.9513729726573334, "grad_norm": 0.4733628737240806, "learning_rate": 3.793728034603947e-05, "loss": 0.5597, "step": 32585 }, { "epoch": 0.9515189559276507, "grad_norm": 0.488182544743457, "learning_rate": 3.793457691267911e-05, "loss": 0.5726, "step": 32590 }, { "epoch": 0.9516649391979679, "grad_norm": 0.6069914249381018, "learning_rate": 3.7931873479318734e-05, "loss": 0.5758, "step": 32595 }, { "epoch": 0.9518109224682851, "grad_norm": 0.4723541789678376, "learning_rate": 3.792917004595837e-05, "loss": 0.5676, "step": 32600 }, { "epoch": 0.9519569057386024, "grad_norm": 0.4090769257531056, "learning_rate": 3.7926466612598e-05, "loss": 0.5569, "step": 32605 }, { "epoch": 0.9521028890089196, "grad_norm": 0.4666867677700621, "learning_rate": 3.792376317923763e-05, "loss": 0.5635, "step": 32610 }, { "epoch": 0.9522488722792368, "grad_norm": 0.4112201591040885, "learning_rate": 3.7921059745877264e-05, "loss": 0.5841, "step": 32615 }, { "epoch": 0.952394855549554, "grad_norm": 0.4423362036789691, "learning_rate": 3.79183563125169e-05, "loss": 0.5706, "step": 32620 }, { "epoch": 0.9525408388198713, "grad_norm": 0.4812371282265279, "learning_rate": 3.791565287915653e-05, "loss": 0.5764, "step": 32625 }, { "epoch": 0.9526868220901885, "grad_norm": 0.4657872978658252, "learning_rate": 3.7912949445796166e-05, "loss": 0.5564, "step": 32630 }, { "epoch": 0.9528328053605057, "grad_norm": 0.47429180096223017, "learning_rate": 3.791024601243579e-05, "loss": 0.5785, "step": 32635 }, { "epoch": 0.9529787886308229, "grad_norm": 0.463984375236927, "learning_rate": 3.790754257907543e-05, "loss": 0.5546, "step": 32640 }, { "epoch": 0.9531247719011401, "grad_norm": 0.4724434631635152, "learning_rate": 3.790483914571506e-05, "loss": 0.6, "step": 32645 }, { "epoch": 0.9532707551714573, "grad_norm": 0.47434221903480533, "learning_rate": 3.790213571235469e-05, "loss": 0.5552, "step": 32650 }, { "epoch": 0.9534167384417745, "grad_norm": 0.4677318283491978, "learning_rate": 3.789943227899432e-05, "loss": 0.5523, "step": 32655 }, { "epoch": 0.9535627217120918, "grad_norm": 0.5172283461275475, "learning_rate": 3.7896728845633956e-05, "loss": 0.5925, "step": 32660 }, { "epoch": 0.953708704982409, "grad_norm": 0.4287030371326491, "learning_rate": 3.789402541227359e-05, "loss": 0.5265, "step": 32665 }, { "epoch": 0.9538546882527262, "grad_norm": 0.4600758865586244, "learning_rate": 3.789132197891322e-05, "loss": 0.548, "step": 32670 }, { "epoch": 0.9540006715230435, "grad_norm": 0.48452114806120616, "learning_rate": 3.788861854555286e-05, "loss": 0.596, "step": 32675 }, { "epoch": 0.9541466547933607, "grad_norm": 0.5096959223484697, "learning_rate": 3.7885915112192486e-05, "loss": 0.5839, "step": 32680 }, { "epoch": 0.9542926380636779, "grad_norm": 0.4655949118835975, "learning_rate": 3.788321167883212e-05, "loss": 0.5652, "step": 32685 }, { "epoch": 0.9544386213339952, "grad_norm": 0.4247829988718031, "learning_rate": 3.7880508245471754e-05, "loss": 0.5571, "step": 32690 }, { "epoch": 0.9545846046043124, "grad_norm": 0.44591378002529997, "learning_rate": 3.787780481211138e-05, "loss": 0.5394, "step": 32695 }, { "epoch": 0.9547305878746296, "grad_norm": 0.4912261525277832, "learning_rate": 3.7875101378751015e-05, "loss": 0.5742, "step": 32700 }, { "epoch": 0.9548765711449468, "grad_norm": 0.5042998938179982, "learning_rate": 3.787239794539065e-05, "loss": 0.5671, "step": 32705 }, { "epoch": 0.955022554415264, "grad_norm": 0.4831755867704204, "learning_rate": 3.7869694512030276e-05, "loss": 0.5586, "step": 32710 }, { "epoch": 0.9551685376855812, "grad_norm": 0.5067846263793472, "learning_rate": 3.786699107866991e-05, "loss": 0.5594, "step": 32715 }, { "epoch": 0.9553145209558984, "grad_norm": 0.5053943524446165, "learning_rate": 3.7864287645309544e-05, "loss": 0.5934, "step": 32720 }, { "epoch": 0.9554605042262156, "grad_norm": 0.45026539042899616, "learning_rate": 3.786158421194918e-05, "loss": 0.5558, "step": 32725 }, { "epoch": 0.9556064874965329, "grad_norm": 0.487657083413096, "learning_rate": 3.785888077858881e-05, "loss": 0.5557, "step": 32730 }, { "epoch": 0.9557524707668501, "grad_norm": 0.4683220566004751, "learning_rate": 3.7856177345228446e-05, "loss": 0.5387, "step": 32735 }, { "epoch": 0.9558984540371673, "grad_norm": 0.45699584318160413, "learning_rate": 3.785347391186807e-05, "loss": 0.5638, "step": 32740 }, { "epoch": 0.9560444373074846, "grad_norm": 0.47711867661509955, "learning_rate": 3.785077047850771e-05, "loss": 0.5566, "step": 32745 }, { "epoch": 0.9561904205778018, "grad_norm": 0.4674586819033579, "learning_rate": 3.784806704514734e-05, "loss": 0.5815, "step": 32750 }, { "epoch": 0.956336403848119, "grad_norm": 0.47110011988391937, "learning_rate": 3.784536361178697e-05, "loss": 0.5815, "step": 32755 }, { "epoch": 0.9564823871184363, "grad_norm": 0.438839718012457, "learning_rate": 3.78426601784266e-05, "loss": 0.6094, "step": 32760 }, { "epoch": 0.9566283703887535, "grad_norm": 0.5064481445503217, "learning_rate": 3.783995674506624e-05, "loss": 0.5981, "step": 32765 }, { "epoch": 0.9567743536590707, "grad_norm": 0.48403296288273406, "learning_rate": 3.7837253311705864e-05, "loss": 0.5771, "step": 32770 }, { "epoch": 0.9569203369293879, "grad_norm": 0.528366981699215, "learning_rate": 3.78345498783455e-05, "loss": 0.5665, "step": 32775 }, { "epoch": 0.9570663201997052, "grad_norm": 0.49251853027136017, "learning_rate": 3.783184644498513e-05, "loss": 0.5634, "step": 32780 }, { "epoch": 0.9572123034700223, "grad_norm": 0.43991798454300496, "learning_rate": 3.7829143011624766e-05, "loss": 0.5674, "step": 32785 }, { "epoch": 0.9573582867403395, "grad_norm": 0.4684479164674067, "learning_rate": 3.78264395782644e-05, "loss": 0.5554, "step": 32790 }, { "epoch": 0.9575042700106567, "grad_norm": 0.49932505722623205, "learning_rate": 3.7823736144904034e-05, "loss": 0.5945, "step": 32795 }, { "epoch": 0.957650253280974, "grad_norm": 0.46881027150912763, "learning_rate": 3.782103271154366e-05, "loss": 0.5556, "step": 32800 }, { "epoch": 0.9577962365512912, "grad_norm": 0.4948871762367225, "learning_rate": 3.7818329278183295e-05, "loss": 0.5662, "step": 32805 }, { "epoch": 0.9579422198216084, "grad_norm": 0.45506792540527985, "learning_rate": 3.781562584482293e-05, "loss": 0.5557, "step": 32810 }, { "epoch": 0.9580882030919257, "grad_norm": 0.45300248878718213, "learning_rate": 3.7812922411462556e-05, "loss": 0.5694, "step": 32815 }, { "epoch": 0.9582341863622429, "grad_norm": 0.4442055361660497, "learning_rate": 3.781021897810219e-05, "loss": 0.5606, "step": 32820 }, { "epoch": 0.9583801696325601, "grad_norm": 0.47274281047877537, "learning_rate": 3.7807515544741824e-05, "loss": 0.5847, "step": 32825 }, { "epoch": 0.9585261529028773, "grad_norm": 0.4681720357288999, "learning_rate": 3.780481211138145e-05, "loss": 0.5037, "step": 32830 }, { "epoch": 0.9586721361731946, "grad_norm": 0.5081181157332509, "learning_rate": 3.7802108678021086e-05, "loss": 0.5885, "step": 32835 }, { "epoch": 0.9588181194435118, "grad_norm": 0.4522125113852953, "learning_rate": 3.779940524466072e-05, "loss": 0.5408, "step": 32840 }, { "epoch": 0.958964102713829, "grad_norm": 0.4521539950503471, "learning_rate": 3.7796701811300354e-05, "loss": 0.5687, "step": 32845 }, { "epoch": 0.9591100859841463, "grad_norm": 0.44345007246591556, "learning_rate": 3.779399837793999e-05, "loss": 0.5771, "step": 32850 }, { "epoch": 0.9592560692544634, "grad_norm": 0.45210759875688555, "learning_rate": 3.779129494457962e-05, "loss": 0.5902, "step": 32855 }, { "epoch": 0.9594020525247806, "grad_norm": 0.5208394496146997, "learning_rate": 3.778859151121925e-05, "loss": 0.6044, "step": 32860 }, { "epoch": 0.9595480357950978, "grad_norm": 0.5084826452969737, "learning_rate": 3.778588807785888e-05, "loss": 0.5788, "step": 32865 }, { "epoch": 0.9596940190654151, "grad_norm": 0.44309368545366773, "learning_rate": 3.778318464449852e-05, "loss": 0.5945, "step": 32870 }, { "epoch": 0.9598400023357323, "grad_norm": 0.47211496562177896, "learning_rate": 3.7780481211138144e-05, "loss": 0.573, "step": 32875 }, { "epoch": 0.9599859856060495, "grad_norm": 0.49146554674978327, "learning_rate": 3.777777777777778e-05, "loss": 0.564, "step": 32880 }, { "epoch": 0.9601319688763668, "grad_norm": 0.4578168073749175, "learning_rate": 3.777507434441741e-05, "loss": 0.5525, "step": 32885 }, { "epoch": 0.960277952146684, "grad_norm": 0.513870990209352, "learning_rate": 3.777237091105704e-05, "loss": 0.5617, "step": 32890 }, { "epoch": 0.9604239354170012, "grad_norm": 0.45333370837879694, "learning_rate": 3.7769667477696673e-05, "loss": 0.5556, "step": 32895 }, { "epoch": 0.9605699186873184, "grad_norm": 0.49452051365771216, "learning_rate": 3.7766964044336314e-05, "loss": 0.5754, "step": 32900 }, { "epoch": 0.9607159019576357, "grad_norm": 0.4343115351780136, "learning_rate": 3.776426061097594e-05, "loss": 0.5217, "step": 32905 }, { "epoch": 0.9608618852279529, "grad_norm": 0.46949183730864674, "learning_rate": 3.7761557177615576e-05, "loss": 0.548, "step": 32910 }, { "epoch": 0.9610078684982701, "grad_norm": 0.41262780635595536, "learning_rate": 3.775885374425521e-05, "loss": 0.5213, "step": 32915 }, { "epoch": 0.9611538517685874, "grad_norm": 0.4602290264167597, "learning_rate": 3.775615031089484e-05, "loss": 0.55, "step": 32920 }, { "epoch": 0.9612998350389046, "grad_norm": 0.4762021982013752, "learning_rate": 3.775344687753447e-05, "loss": 0.5798, "step": 32925 }, { "epoch": 0.9614458183092217, "grad_norm": 0.46783997988331716, "learning_rate": 3.7750743444174105e-05, "loss": 0.5626, "step": 32930 }, { "epoch": 0.9615918015795389, "grad_norm": 0.5050431842444346, "learning_rate": 3.774804001081373e-05, "loss": 0.6088, "step": 32935 }, { "epoch": 0.9617377848498562, "grad_norm": 0.4454371129872889, "learning_rate": 3.7745336577453366e-05, "loss": 0.5653, "step": 32940 }, { "epoch": 0.9618837681201734, "grad_norm": 0.4040001016607152, "learning_rate": 3.7742633144093e-05, "loss": 0.5409, "step": 32945 }, { "epoch": 0.9620297513904906, "grad_norm": 0.4608251911678811, "learning_rate": 3.773992971073263e-05, "loss": 0.5834, "step": 32950 }, { "epoch": 0.9621757346608079, "grad_norm": 0.5130747872545067, "learning_rate": 3.773722627737227e-05, "loss": 0.5589, "step": 32955 }, { "epoch": 0.9623217179311251, "grad_norm": 0.4798561057983575, "learning_rate": 3.77345228440119e-05, "loss": 0.5794, "step": 32960 }, { "epoch": 0.9624677012014423, "grad_norm": 0.45843818198359565, "learning_rate": 3.773181941065153e-05, "loss": 0.5698, "step": 32965 }, { "epoch": 0.9626136844717595, "grad_norm": 0.49533873143152796, "learning_rate": 3.772911597729116e-05, "loss": 0.5739, "step": 32970 }, { "epoch": 0.9627596677420768, "grad_norm": 0.4455524533833788, "learning_rate": 3.77264125439308e-05, "loss": 0.5454, "step": 32975 }, { "epoch": 0.962905651012394, "grad_norm": 0.5435978417820891, "learning_rate": 3.7723709110570425e-05, "loss": 0.5505, "step": 32980 }, { "epoch": 0.9630516342827112, "grad_norm": 0.45098825856220903, "learning_rate": 3.772100567721006e-05, "loss": 0.5718, "step": 32985 }, { "epoch": 0.9631976175530285, "grad_norm": 0.4779757211077594, "learning_rate": 3.771830224384969e-05, "loss": 0.5568, "step": 32990 }, { "epoch": 0.9633436008233457, "grad_norm": 0.5088058721476798, "learning_rate": 3.771559881048932e-05, "loss": 0.5895, "step": 32995 }, { "epoch": 0.9634895840936629, "grad_norm": 0.4909493704296209, "learning_rate": 3.7712895377128954e-05, "loss": 0.5799, "step": 33000 }, { "epoch": 0.96363556736398, "grad_norm": 0.5404806270431952, "learning_rate": 3.771019194376859e-05, "loss": 0.5816, "step": 33005 }, { "epoch": 0.9637815506342973, "grad_norm": 0.44660532548004966, "learning_rate": 3.7707488510408215e-05, "loss": 0.547, "step": 33010 }, { "epoch": 0.9639275339046145, "grad_norm": 0.5523696437637052, "learning_rate": 3.7704785077047856e-05, "loss": 0.5381, "step": 33015 }, { "epoch": 0.9640735171749317, "grad_norm": 0.43833762370509183, "learning_rate": 3.770208164368749e-05, "loss": 0.5567, "step": 33020 }, { "epoch": 0.964219500445249, "grad_norm": 0.5159740951439643, "learning_rate": 3.769937821032712e-05, "loss": 0.604, "step": 33025 }, { "epoch": 0.9643654837155662, "grad_norm": 0.47433983499369964, "learning_rate": 3.769667477696675e-05, "loss": 0.5487, "step": 33030 }, { "epoch": 0.9645114669858834, "grad_norm": 0.5069549733269717, "learning_rate": 3.7693971343606385e-05, "loss": 0.5819, "step": 33035 }, { "epoch": 0.9646574502562006, "grad_norm": 0.4756447960414476, "learning_rate": 3.769126791024601e-05, "loss": 0.5944, "step": 33040 }, { "epoch": 0.9648034335265179, "grad_norm": 0.4798535948171558, "learning_rate": 3.7688564476885646e-05, "loss": 0.5457, "step": 33045 }, { "epoch": 0.9649494167968351, "grad_norm": 0.47965026851851666, "learning_rate": 3.768586104352528e-05, "loss": 0.5885, "step": 33050 }, { "epoch": 0.9650954000671523, "grad_norm": 0.4482002612721211, "learning_rate": 3.768315761016491e-05, "loss": 0.5509, "step": 33055 }, { "epoch": 0.9652413833374696, "grad_norm": 0.4968316634202179, "learning_rate": 3.768045417680454e-05, "loss": 0.5862, "step": 33060 }, { "epoch": 0.9653873666077868, "grad_norm": 0.4653494472202166, "learning_rate": 3.7677750743444176e-05, "loss": 0.5636, "step": 33065 }, { "epoch": 0.965533349878104, "grad_norm": 0.46086882789510036, "learning_rate": 3.767504731008381e-05, "loss": 0.5418, "step": 33070 }, { "epoch": 0.9656793331484211, "grad_norm": 0.448925238815938, "learning_rate": 3.7672343876723444e-05, "loss": 0.5587, "step": 33075 }, { "epoch": 0.9658253164187384, "grad_norm": 0.4452405610844193, "learning_rate": 3.766964044336308e-05, "loss": 0.5251, "step": 33080 }, { "epoch": 0.9659712996890556, "grad_norm": 0.43340616966342893, "learning_rate": 3.7666937010002705e-05, "loss": 0.58, "step": 33085 }, { "epoch": 0.9661172829593728, "grad_norm": 0.42868806606178655, "learning_rate": 3.766423357664234e-05, "loss": 0.5365, "step": 33090 }, { "epoch": 0.96626326622969, "grad_norm": 0.4761560137828698, "learning_rate": 3.766153014328197e-05, "loss": 0.567, "step": 33095 }, { "epoch": 0.9664092495000073, "grad_norm": 0.4442850248815893, "learning_rate": 3.76588267099216e-05, "loss": 0.5822, "step": 33100 }, { "epoch": 0.9665552327703245, "grad_norm": 0.4767863099964627, "learning_rate": 3.7656123276561234e-05, "loss": 0.5484, "step": 33105 }, { "epoch": 0.9667012160406417, "grad_norm": 0.4386294919047128, "learning_rate": 3.765341984320087e-05, "loss": 0.5905, "step": 33110 }, { "epoch": 0.966847199310959, "grad_norm": 0.4949752217725106, "learning_rate": 3.7650716409840495e-05, "loss": 0.5864, "step": 33115 }, { "epoch": 0.9669931825812762, "grad_norm": 0.4472293297309314, "learning_rate": 3.764801297648013e-05, "loss": 0.5607, "step": 33120 }, { "epoch": 0.9671391658515934, "grad_norm": 0.472905570827807, "learning_rate": 3.7645309543119763e-05, "loss": 0.574, "step": 33125 }, { "epoch": 0.9672851491219107, "grad_norm": 0.47060522426853185, "learning_rate": 3.76426061097594e-05, "loss": 0.5591, "step": 33130 }, { "epoch": 0.9674311323922279, "grad_norm": 0.4746127579842705, "learning_rate": 3.763990267639903e-05, "loss": 0.5637, "step": 33135 }, { "epoch": 0.9675771156625451, "grad_norm": 0.4665628296082251, "learning_rate": 3.7637199243038666e-05, "loss": 0.5906, "step": 33140 }, { "epoch": 0.9677230989328623, "grad_norm": 0.4602600535317185, "learning_rate": 3.763449580967829e-05, "loss": 0.5362, "step": 33145 }, { "epoch": 0.9678690822031795, "grad_norm": 0.43440789077232245, "learning_rate": 3.763179237631793e-05, "loss": 0.5908, "step": 33150 }, { "epoch": 0.9680150654734967, "grad_norm": 0.4300219527412103, "learning_rate": 3.762908894295756e-05, "loss": 0.5564, "step": 33155 }, { "epoch": 0.9681610487438139, "grad_norm": 0.47331954193550685, "learning_rate": 3.762638550959719e-05, "loss": 0.5653, "step": 33160 }, { "epoch": 0.9683070320141312, "grad_norm": 0.48141568613169544, "learning_rate": 3.762368207623682e-05, "loss": 0.5726, "step": 33165 }, { "epoch": 0.9684530152844484, "grad_norm": 0.4793133463696762, "learning_rate": 3.7620978642876456e-05, "loss": 0.575, "step": 33170 }, { "epoch": 0.9685989985547656, "grad_norm": 0.45136181278048626, "learning_rate": 3.761827520951608e-05, "loss": 0.5341, "step": 33175 }, { "epoch": 0.9687449818250828, "grad_norm": 0.49240198429350895, "learning_rate": 3.761557177615572e-05, "loss": 0.5891, "step": 33180 }, { "epoch": 0.9688909650954001, "grad_norm": 0.5377286209297172, "learning_rate": 3.761286834279535e-05, "loss": 0.5753, "step": 33185 }, { "epoch": 0.9690369483657173, "grad_norm": 0.4542604650009564, "learning_rate": 3.7610164909434985e-05, "loss": 0.5974, "step": 33190 }, { "epoch": 0.9691829316360345, "grad_norm": 0.5007931576860043, "learning_rate": 3.760746147607462e-05, "loss": 0.567, "step": 33195 }, { "epoch": 0.9693289149063518, "grad_norm": 0.4535726828164218, "learning_rate": 3.7604758042714247e-05, "loss": 0.5462, "step": 33200 }, { "epoch": 0.969474898176669, "grad_norm": 0.4796867319636271, "learning_rate": 3.760205460935388e-05, "loss": 0.5732, "step": 33205 }, { "epoch": 0.9696208814469862, "grad_norm": 0.46897701650836204, "learning_rate": 3.7599351175993515e-05, "loss": 0.564, "step": 33210 }, { "epoch": 0.9697668647173034, "grad_norm": 0.4851878615000867, "learning_rate": 3.759664774263315e-05, "loss": 0.5963, "step": 33215 }, { "epoch": 0.9699128479876206, "grad_norm": 0.4484659866935233, "learning_rate": 3.7593944309272776e-05, "loss": 0.5992, "step": 33220 }, { "epoch": 0.9700588312579378, "grad_norm": 0.4241148343277649, "learning_rate": 3.759124087591241e-05, "loss": 0.5618, "step": 33225 }, { "epoch": 0.970204814528255, "grad_norm": 0.47670590151571923, "learning_rate": 3.7588537442552044e-05, "loss": 0.5859, "step": 33230 }, { "epoch": 0.9703507977985723, "grad_norm": 0.48986758964592564, "learning_rate": 3.758583400919167e-05, "loss": 0.5731, "step": 33235 }, { "epoch": 0.9704967810688895, "grad_norm": 0.4466703805455658, "learning_rate": 3.758313057583131e-05, "loss": 0.5645, "step": 33240 }, { "epoch": 0.9706427643392067, "grad_norm": 0.4732647800626199, "learning_rate": 3.758042714247094e-05, "loss": 0.5748, "step": 33245 }, { "epoch": 0.9707887476095239, "grad_norm": 0.45590638680775847, "learning_rate": 3.757772370911057e-05, "loss": 0.5476, "step": 33250 }, { "epoch": 0.9709347308798412, "grad_norm": 0.4802638441557381, "learning_rate": 3.757502027575021e-05, "loss": 0.5706, "step": 33255 }, { "epoch": 0.9710807141501584, "grad_norm": 0.4662545023040575, "learning_rate": 3.7572316842389834e-05, "loss": 0.5535, "step": 33260 }, { "epoch": 0.9712266974204756, "grad_norm": 0.48512235653843666, "learning_rate": 3.756961340902947e-05, "loss": 0.596, "step": 33265 }, { "epoch": 0.9713726806907929, "grad_norm": 0.4291334208449322, "learning_rate": 3.75669099756691e-05, "loss": 0.5753, "step": 33270 }, { "epoch": 0.9715186639611101, "grad_norm": 0.46706556963802764, "learning_rate": 3.7564206542308736e-05, "loss": 0.5233, "step": 33275 }, { "epoch": 0.9716646472314273, "grad_norm": 0.4831696851028622, "learning_rate": 3.7561503108948364e-05, "loss": 0.5706, "step": 33280 }, { "epoch": 0.9718106305017445, "grad_norm": 0.49039225040153833, "learning_rate": 3.7558799675588e-05, "loss": 0.563, "step": 33285 }, { "epoch": 0.9719566137720618, "grad_norm": 0.549785301525253, "learning_rate": 3.755609624222763e-05, "loss": 0.5948, "step": 33290 }, { "epoch": 0.9721025970423789, "grad_norm": 0.47850503689756774, "learning_rate": 3.7553392808867266e-05, "loss": 0.5757, "step": 33295 }, { "epoch": 0.9722485803126961, "grad_norm": 0.44040642433213484, "learning_rate": 3.75506893755069e-05, "loss": 0.5849, "step": 33300 }, { "epoch": 0.9723945635830133, "grad_norm": 0.4476875019835539, "learning_rate": 3.754798594214653e-05, "loss": 0.5786, "step": 33305 }, { "epoch": 0.9725405468533306, "grad_norm": 0.4559610288620949, "learning_rate": 3.754528250878616e-05, "loss": 0.5415, "step": 33310 }, { "epoch": 0.9726865301236478, "grad_norm": 0.5024539253693963, "learning_rate": 3.7542579075425795e-05, "loss": 0.5784, "step": 33315 }, { "epoch": 0.972832513393965, "grad_norm": 0.5086848588022637, "learning_rate": 3.753987564206542e-05, "loss": 0.5617, "step": 33320 }, { "epoch": 0.9729784966642823, "grad_norm": 0.43361611253911775, "learning_rate": 3.7537172208705056e-05, "loss": 0.5789, "step": 33325 }, { "epoch": 0.9731244799345995, "grad_norm": 0.47468696457316023, "learning_rate": 3.753446877534469e-05, "loss": 0.5592, "step": 33330 }, { "epoch": 0.9732704632049167, "grad_norm": 0.45867314263470393, "learning_rate": 3.753176534198432e-05, "loss": 0.5667, "step": 33335 }, { "epoch": 0.973416446475234, "grad_norm": 0.479531234781738, "learning_rate": 3.752906190862395e-05, "loss": 0.5558, "step": 33340 }, { "epoch": 0.9735624297455512, "grad_norm": 0.4894092734564813, "learning_rate": 3.7526358475263585e-05, "loss": 0.5841, "step": 33345 }, { "epoch": 0.9737084130158684, "grad_norm": 0.4572614270965544, "learning_rate": 3.752365504190322e-05, "loss": 0.5316, "step": 33350 }, { "epoch": 0.9738543962861856, "grad_norm": 0.4244942630457049, "learning_rate": 3.7520951608542853e-05, "loss": 0.5372, "step": 33355 }, { "epoch": 0.9740003795565029, "grad_norm": 0.4712510008690695, "learning_rate": 3.751824817518249e-05, "loss": 0.5572, "step": 33360 }, { "epoch": 0.9741463628268201, "grad_norm": 0.4848594801143829, "learning_rate": 3.7515544741822115e-05, "loss": 0.5301, "step": 33365 }, { "epoch": 0.9742923460971372, "grad_norm": 0.46047025647210804, "learning_rate": 3.751284130846175e-05, "loss": 0.5347, "step": 33370 }, { "epoch": 0.9744383293674544, "grad_norm": 0.48504521998033284, "learning_rate": 3.751013787510138e-05, "loss": 0.5483, "step": 33375 }, { "epoch": 0.9745843126377717, "grad_norm": 0.4532792683484141, "learning_rate": 3.750743444174101e-05, "loss": 0.5815, "step": 33380 }, { "epoch": 0.9747302959080889, "grad_norm": 0.4765822270948112, "learning_rate": 3.7504731008380644e-05, "loss": 0.5778, "step": 33385 }, { "epoch": 0.9748762791784061, "grad_norm": 0.48712041271640766, "learning_rate": 3.750202757502028e-05, "loss": 0.5902, "step": 33390 }, { "epoch": 0.9750222624487234, "grad_norm": 0.42377862090657203, "learning_rate": 3.7499324141659905e-05, "loss": 0.5709, "step": 33395 }, { "epoch": 0.9751682457190406, "grad_norm": 0.5303064795665391, "learning_rate": 3.749662070829954e-05, "loss": 0.5988, "step": 33400 }, { "epoch": 0.9753142289893578, "grad_norm": 0.4415711835816463, "learning_rate": 3.749391727493917e-05, "loss": 0.5306, "step": 33405 }, { "epoch": 0.975460212259675, "grad_norm": 0.47611583957403875, "learning_rate": 3.749121384157881e-05, "loss": 0.6007, "step": 33410 }, { "epoch": 0.9756061955299923, "grad_norm": 0.4201031767419556, "learning_rate": 3.748851040821844e-05, "loss": 0.5648, "step": 33415 }, { "epoch": 0.9757521788003095, "grad_norm": 0.5194356570061721, "learning_rate": 3.7485806974858075e-05, "loss": 0.5804, "step": 33420 }, { "epoch": 0.9758981620706267, "grad_norm": 0.44926605649141715, "learning_rate": 3.74831035414977e-05, "loss": 0.5724, "step": 33425 }, { "epoch": 0.976044145340944, "grad_norm": 0.4866935790864556, "learning_rate": 3.7480400108137337e-05, "loss": 0.5798, "step": 33430 }, { "epoch": 0.9761901286112612, "grad_norm": 0.4947928879358275, "learning_rate": 3.747769667477697e-05, "loss": 0.5815, "step": 33435 }, { "epoch": 0.9763361118815783, "grad_norm": 0.48232077917801003, "learning_rate": 3.74749932414166e-05, "loss": 0.54, "step": 33440 }, { "epoch": 0.9764820951518955, "grad_norm": 0.48182390704099365, "learning_rate": 3.747228980805623e-05, "loss": 0.5477, "step": 33445 }, { "epoch": 0.9766280784222128, "grad_norm": 0.4829189127227331, "learning_rate": 3.7469586374695866e-05, "loss": 0.5833, "step": 33450 }, { "epoch": 0.97677406169253, "grad_norm": 0.47686326055412154, "learning_rate": 3.746688294133549e-05, "loss": 0.5493, "step": 33455 }, { "epoch": 0.9769200449628472, "grad_norm": 0.44047042451972784, "learning_rate": 3.746417950797513e-05, "loss": 0.5858, "step": 33460 }, { "epoch": 0.9770660282331645, "grad_norm": 0.4467579905799918, "learning_rate": 3.746147607461477e-05, "loss": 0.5238, "step": 33465 }, { "epoch": 0.9772120115034817, "grad_norm": 0.4824602455975395, "learning_rate": 3.7458772641254395e-05, "loss": 0.5814, "step": 33470 }, { "epoch": 0.9773579947737989, "grad_norm": 0.43042199062513187, "learning_rate": 3.745606920789403e-05, "loss": 0.5652, "step": 33475 }, { "epoch": 0.9775039780441162, "grad_norm": 0.6863109293095658, "learning_rate": 3.745336577453366e-05, "loss": 0.5622, "step": 33480 }, { "epoch": 0.9776499613144334, "grad_norm": 0.5089128930103103, "learning_rate": 3.745066234117329e-05, "loss": 0.5818, "step": 33485 }, { "epoch": 0.9777959445847506, "grad_norm": 0.44326950170107976, "learning_rate": 3.7447958907812924e-05, "loss": 0.557, "step": 33490 }, { "epoch": 0.9779419278550678, "grad_norm": 0.47100050750367745, "learning_rate": 3.744525547445256e-05, "loss": 0.5809, "step": 33495 }, { "epoch": 0.9780879111253851, "grad_norm": 0.4843673840817632, "learning_rate": 3.7442552041092186e-05, "loss": 0.5858, "step": 33500 }, { "epoch": 0.9782338943957023, "grad_norm": 0.47831380950088587, "learning_rate": 3.743984860773182e-05, "loss": 0.5846, "step": 33505 }, { "epoch": 0.9783798776660195, "grad_norm": 0.4856433735865436, "learning_rate": 3.7437145174371454e-05, "loss": 0.5639, "step": 33510 }, { "epoch": 0.9785258609363366, "grad_norm": 0.5041567671900923, "learning_rate": 3.743444174101108e-05, "loss": 0.5773, "step": 33515 }, { "epoch": 0.9786718442066539, "grad_norm": 0.43631825619976083, "learning_rate": 3.7431738307650715e-05, "loss": 0.5423, "step": 33520 }, { "epoch": 0.9788178274769711, "grad_norm": 0.49920259058012206, "learning_rate": 3.7429034874290356e-05, "loss": 0.5719, "step": 33525 }, { "epoch": 0.9789638107472883, "grad_norm": 0.4500455749129426, "learning_rate": 3.742633144092998e-05, "loss": 0.5512, "step": 33530 }, { "epoch": 0.9791097940176056, "grad_norm": 0.4331858986489004, "learning_rate": 3.742362800756962e-05, "loss": 0.5969, "step": 33535 }, { "epoch": 0.9792557772879228, "grad_norm": 0.46815501757422556, "learning_rate": 3.742092457420925e-05, "loss": 0.5618, "step": 33540 }, { "epoch": 0.97940176055824, "grad_norm": 0.4802766277262043, "learning_rate": 3.741822114084888e-05, "loss": 0.5644, "step": 33545 }, { "epoch": 0.9795477438285572, "grad_norm": 0.4943008035052169, "learning_rate": 3.741551770748851e-05, "loss": 0.6045, "step": 33550 }, { "epoch": 0.9796937270988745, "grad_norm": 0.4443916888618698, "learning_rate": 3.7412814274128146e-05, "loss": 0.5667, "step": 33555 }, { "epoch": 0.9798397103691917, "grad_norm": 0.47326246044504033, "learning_rate": 3.741011084076777e-05, "loss": 0.5396, "step": 33560 }, { "epoch": 0.9799856936395089, "grad_norm": 0.4793389287204649, "learning_rate": 3.740740740740741e-05, "loss": 0.5622, "step": 33565 }, { "epoch": 0.9801316769098262, "grad_norm": 0.488505491774776, "learning_rate": 3.740470397404704e-05, "loss": 0.5861, "step": 33570 }, { "epoch": 0.9802776601801434, "grad_norm": 0.4495408131883136, "learning_rate": 3.740200054068667e-05, "loss": 0.5388, "step": 33575 }, { "epoch": 0.9804236434504606, "grad_norm": 0.5376953557425664, "learning_rate": 3.739929710732631e-05, "loss": 0.5654, "step": 33580 }, { "epoch": 0.9805696267207779, "grad_norm": 0.4599892636609691, "learning_rate": 3.7396593673965943e-05, "loss": 0.5827, "step": 33585 }, { "epoch": 0.980715609991095, "grad_norm": 0.46052237498669973, "learning_rate": 3.739389024060557e-05, "loss": 0.5448, "step": 33590 }, { "epoch": 0.9808615932614122, "grad_norm": 0.4578208892755288, "learning_rate": 3.7391186807245205e-05, "loss": 0.5715, "step": 33595 }, { "epoch": 0.9810075765317294, "grad_norm": 0.4896174569234465, "learning_rate": 3.738848337388484e-05, "loss": 0.5901, "step": 33600 }, { "epoch": 0.9811535598020467, "grad_norm": 0.5001433356448425, "learning_rate": 3.7385779940524466e-05, "loss": 0.5685, "step": 33605 }, { "epoch": 0.9812995430723639, "grad_norm": 0.48238540629138266, "learning_rate": 3.73830765071641e-05, "loss": 0.5638, "step": 33610 }, { "epoch": 0.9814455263426811, "grad_norm": 0.44303458248211935, "learning_rate": 3.7380373073803734e-05, "loss": 0.5414, "step": 33615 }, { "epoch": 0.9815915096129983, "grad_norm": 0.44756258987863895, "learning_rate": 3.737766964044336e-05, "loss": 0.5577, "step": 33620 }, { "epoch": 0.9817374928833156, "grad_norm": 0.47221729789696915, "learning_rate": 3.7374966207082995e-05, "loss": 0.5654, "step": 33625 }, { "epoch": 0.9818834761536328, "grad_norm": 0.48078898356189004, "learning_rate": 3.737226277372263e-05, "loss": 0.578, "step": 33630 }, { "epoch": 0.98202945942395, "grad_norm": 0.49686689631089354, "learning_rate": 3.736955934036226e-05, "loss": 0.5783, "step": 33635 }, { "epoch": 0.9821754426942673, "grad_norm": 0.47456520548973335, "learning_rate": 3.73668559070019e-05, "loss": 0.5319, "step": 33640 }, { "epoch": 0.9823214259645845, "grad_norm": 0.522390071100121, "learning_rate": 3.736415247364153e-05, "loss": 0.6106, "step": 33645 }, { "epoch": 0.9824674092349017, "grad_norm": 0.5070504579643909, "learning_rate": 3.736144904028116e-05, "loss": 0.5906, "step": 33650 }, { "epoch": 0.982613392505219, "grad_norm": 0.455243346208376, "learning_rate": 3.735874560692079e-05, "loss": 0.5817, "step": 33655 }, { "epoch": 0.9827593757755361, "grad_norm": 0.506561082673123, "learning_rate": 3.7356042173560427e-05, "loss": 0.5978, "step": 33660 }, { "epoch": 0.9829053590458533, "grad_norm": 0.5026454467083626, "learning_rate": 3.7353338740200054e-05, "loss": 0.559, "step": 33665 }, { "epoch": 0.9830513423161705, "grad_norm": 0.43230912508169267, "learning_rate": 3.735063530683969e-05, "loss": 0.5557, "step": 33670 }, { "epoch": 0.9831973255864878, "grad_norm": 0.46919376612939095, "learning_rate": 3.734793187347932e-05, "loss": 0.5517, "step": 33675 }, { "epoch": 0.983343308856805, "grad_norm": 0.4978275026936467, "learning_rate": 3.734522844011895e-05, "loss": 0.5543, "step": 33680 }, { "epoch": 0.9834892921271222, "grad_norm": 0.47276715704523004, "learning_rate": 3.734252500675858e-05, "loss": 0.5325, "step": 33685 }, { "epoch": 0.9836352753974394, "grad_norm": 0.4573791462398445, "learning_rate": 3.733982157339822e-05, "loss": 0.5871, "step": 33690 }, { "epoch": 0.9837812586677567, "grad_norm": 0.4343141946696522, "learning_rate": 3.733711814003785e-05, "loss": 0.5401, "step": 33695 }, { "epoch": 0.9839272419380739, "grad_norm": 0.4948085810231094, "learning_rate": 3.7334414706677485e-05, "loss": 0.5314, "step": 33700 }, { "epoch": 0.9840732252083911, "grad_norm": 0.4308322777572328, "learning_rate": 3.733171127331712e-05, "loss": 0.5497, "step": 33705 }, { "epoch": 0.9842192084787084, "grad_norm": 0.4244210340697082, "learning_rate": 3.7329007839956746e-05, "loss": 0.5625, "step": 33710 }, { "epoch": 0.9843651917490256, "grad_norm": 0.4505449463022645, "learning_rate": 3.732630440659638e-05, "loss": 0.5744, "step": 33715 }, { "epoch": 0.9845111750193428, "grad_norm": 0.46855098815736523, "learning_rate": 3.7323600973236014e-05, "loss": 0.5862, "step": 33720 }, { "epoch": 0.98465715828966, "grad_norm": 0.4590673534938166, "learning_rate": 3.732089753987564e-05, "loss": 0.5664, "step": 33725 }, { "epoch": 0.9848031415599773, "grad_norm": 0.46816184784797166, "learning_rate": 3.7318194106515276e-05, "loss": 0.5638, "step": 33730 }, { "epoch": 0.9849491248302944, "grad_norm": 0.4583193565273547, "learning_rate": 3.731549067315491e-05, "loss": 0.5607, "step": 33735 }, { "epoch": 0.9850951081006116, "grad_norm": 0.47795942507743105, "learning_rate": 3.731278723979454e-05, "loss": 0.566, "step": 33740 }, { "epoch": 0.9852410913709289, "grad_norm": 0.46424221270133936, "learning_rate": 3.731008380643417e-05, "loss": 0.5697, "step": 33745 }, { "epoch": 0.9853870746412461, "grad_norm": 0.45672127488813496, "learning_rate": 3.7307380373073805e-05, "loss": 0.5506, "step": 33750 }, { "epoch": 0.9855330579115633, "grad_norm": 0.4535270952717809, "learning_rate": 3.730467693971344e-05, "loss": 0.5612, "step": 33755 }, { "epoch": 0.9856790411818805, "grad_norm": 0.45802951191560665, "learning_rate": 3.730197350635307e-05, "loss": 0.5437, "step": 33760 }, { "epoch": 0.9858250244521978, "grad_norm": 0.4654958304930099, "learning_rate": 3.729927007299271e-05, "loss": 0.5821, "step": 33765 }, { "epoch": 0.985971007722515, "grad_norm": 0.4811825308359082, "learning_rate": 3.7296566639632334e-05, "loss": 0.5703, "step": 33770 }, { "epoch": 0.9861169909928322, "grad_norm": 0.4905888762369477, "learning_rate": 3.729386320627197e-05, "loss": 0.574, "step": 33775 }, { "epoch": 0.9862629742631495, "grad_norm": 0.45584816072397927, "learning_rate": 3.72911597729116e-05, "loss": 0.5564, "step": 33780 }, { "epoch": 0.9864089575334667, "grad_norm": 0.4405481998761997, "learning_rate": 3.728845633955123e-05, "loss": 0.5605, "step": 33785 }, { "epoch": 0.9865549408037839, "grad_norm": 0.47161655174298256, "learning_rate": 3.728575290619086e-05, "loss": 0.5886, "step": 33790 }, { "epoch": 0.9867009240741011, "grad_norm": 0.4778619909791718, "learning_rate": 3.72830494728305e-05, "loss": 0.5773, "step": 33795 }, { "epoch": 0.9868469073444184, "grad_norm": 0.47387606634769036, "learning_rate": 3.7280346039470125e-05, "loss": 0.553, "step": 33800 }, { "epoch": 0.9869928906147355, "grad_norm": 0.45459750527548715, "learning_rate": 3.7277642606109765e-05, "loss": 0.5669, "step": 33805 }, { "epoch": 0.9871388738850527, "grad_norm": 0.4508311491942028, "learning_rate": 3.727493917274939e-05, "loss": 0.5439, "step": 33810 }, { "epoch": 0.98728485715537, "grad_norm": 0.46704645077488177, "learning_rate": 3.727223573938903e-05, "loss": 0.5634, "step": 33815 }, { "epoch": 0.9874308404256872, "grad_norm": 0.4770967942665187, "learning_rate": 3.726953230602866e-05, "loss": 0.5588, "step": 33820 }, { "epoch": 0.9875768236960044, "grad_norm": 0.44936551618615006, "learning_rate": 3.726682887266829e-05, "loss": 0.5341, "step": 33825 }, { "epoch": 0.9877228069663216, "grad_norm": 0.4733220958502046, "learning_rate": 3.726412543930792e-05, "loss": 0.5496, "step": 33830 }, { "epoch": 0.9878687902366389, "grad_norm": 0.5092449905607249, "learning_rate": 3.7261422005947556e-05, "loss": 0.5921, "step": 33835 }, { "epoch": 0.9880147735069561, "grad_norm": 0.4314775169381782, "learning_rate": 3.725871857258719e-05, "loss": 0.549, "step": 33840 }, { "epoch": 0.9881607567772733, "grad_norm": 0.4990886934605731, "learning_rate": 3.725601513922682e-05, "loss": 0.5893, "step": 33845 }, { "epoch": 0.9883067400475906, "grad_norm": 0.48458946170791883, "learning_rate": 3.725331170586645e-05, "loss": 0.5518, "step": 33850 }, { "epoch": 0.9884527233179078, "grad_norm": 0.464057981000171, "learning_rate": 3.7250608272506085e-05, "loss": 0.547, "step": 33855 }, { "epoch": 0.988598706588225, "grad_norm": 0.4280636610181043, "learning_rate": 3.724790483914571e-05, "loss": 0.5479, "step": 33860 }, { "epoch": 0.9887446898585422, "grad_norm": 0.4695810819177006, "learning_rate": 3.724520140578535e-05, "loss": 0.5682, "step": 33865 }, { "epoch": 0.9888906731288595, "grad_norm": 0.48077930178541795, "learning_rate": 3.724249797242498e-05, "loss": 0.5523, "step": 33870 }, { "epoch": 0.9890366563991767, "grad_norm": 0.44607299102749026, "learning_rate": 3.7239794539064614e-05, "loss": 0.5871, "step": 33875 }, { "epoch": 0.9891826396694938, "grad_norm": 0.47460555515913044, "learning_rate": 3.723709110570425e-05, "loss": 0.5653, "step": 33880 }, { "epoch": 0.989328622939811, "grad_norm": 0.4473256710643278, "learning_rate": 3.7234387672343876e-05, "loss": 0.5562, "step": 33885 }, { "epoch": 0.9894746062101283, "grad_norm": 0.47145036241799804, "learning_rate": 3.723168423898351e-05, "loss": 0.577, "step": 33890 }, { "epoch": 0.9896205894804455, "grad_norm": 0.5392855223646005, "learning_rate": 3.7228980805623144e-05, "loss": 0.6182, "step": 33895 }, { "epoch": 0.9897665727507627, "grad_norm": 0.45009025161441135, "learning_rate": 3.722627737226278e-05, "loss": 0.5512, "step": 33900 }, { "epoch": 0.98991255602108, "grad_norm": 0.4875419895925797, "learning_rate": 3.7223573938902405e-05, "loss": 0.617, "step": 33905 }, { "epoch": 0.9900585392913972, "grad_norm": 0.5332425737620585, "learning_rate": 3.722087050554204e-05, "loss": 0.6162, "step": 33910 }, { "epoch": 0.9902045225617144, "grad_norm": 0.46590418139614087, "learning_rate": 3.721816707218167e-05, "loss": 0.5687, "step": 33915 }, { "epoch": 0.9903505058320317, "grad_norm": 0.4288668747252994, "learning_rate": 3.721546363882131e-05, "loss": 0.5486, "step": 33920 }, { "epoch": 0.9904964891023489, "grad_norm": 0.4338030444050101, "learning_rate": 3.721276020546094e-05, "loss": 0.5365, "step": 33925 }, { "epoch": 0.9906424723726661, "grad_norm": 0.48713981348121677, "learning_rate": 3.721005677210057e-05, "loss": 0.5986, "step": 33930 }, { "epoch": 0.9907884556429833, "grad_norm": 0.48525425239642783, "learning_rate": 3.72073533387402e-05, "loss": 0.5606, "step": 33935 }, { "epoch": 0.9909344389133006, "grad_norm": 0.4638557624327638, "learning_rate": 3.7204649905379836e-05, "loss": 0.541, "step": 33940 }, { "epoch": 0.9910804221836178, "grad_norm": 0.46597203275670956, "learning_rate": 3.7201946472019464e-05, "loss": 0.5593, "step": 33945 }, { "epoch": 0.991226405453935, "grad_norm": 0.4798965731218837, "learning_rate": 3.71992430386591e-05, "loss": 0.6119, "step": 33950 }, { "epoch": 0.9913723887242522, "grad_norm": 0.48333297715177465, "learning_rate": 3.719653960529873e-05, "loss": 0.56, "step": 33955 }, { "epoch": 0.9915183719945694, "grad_norm": 0.5200551528004189, "learning_rate": 3.719383617193836e-05, "loss": 0.584, "step": 33960 }, { "epoch": 0.9916643552648866, "grad_norm": 0.4694385260456287, "learning_rate": 3.719113273857799e-05, "loss": 0.5519, "step": 33965 }, { "epoch": 0.9918103385352038, "grad_norm": 0.4959316163013148, "learning_rate": 3.718842930521763e-05, "loss": 0.5692, "step": 33970 }, { "epoch": 0.9919563218055211, "grad_norm": 0.5183317972666293, "learning_rate": 3.718572587185726e-05, "loss": 0.5594, "step": 33975 }, { "epoch": 0.9921023050758383, "grad_norm": 0.4774941382796974, "learning_rate": 3.7183022438496895e-05, "loss": 0.5451, "step": 33980 }, { "epoch": 0.9922482883461555, "grad_norm": 0.4553284756291157, "learning_rate": 3.718031900513653e-05, "loss": 0.5442, "step": 33985 }, { "epoch": 0.9923942716164728, "grad_norm": 0.4810106179596436, "learning_rate": 3.7177615571776156e-05, "loss": 0.5478, "step": 33990 }, { "epoch": 0.99254025488679, "grad_norm": 0.4845447637614768, "learning_rate": 3.717491213841579e-05, "loss": 0.5467, "step": 33995 }, { "epoch": 0.9926862381571072, "grad_norm": 0.5025814009424223, "learning_rate": 3.7172208705055424e-05, "loss": 0.5567, "step": 34000 }, { "epoch": 0.9928322214274244, "grad_norm": 0.47789674760074763, "learning_rate": 3.716950527169505e-05, "loss": 0.5712, "step": 34005 }, { "epoch": 0.9929782046977417, "grad_norm": 0.4576769793331926, "learning_rate": 3.7166801838334685e-05, "loss": 0.5709, "step": 34010 }, { "epoch": 0.9931241879680589, "grad_norm": 0.4685708248853744, "learning_rate": 3.716409840497432e-05, "loss": 0.5587, "step": 34015 }, { "epoch": 0.9932701712383761, "grad_norm": 0.5277300456746963, "learning_rate": 3.7161394971613947e-05, "loss": 0.5884, "step": 34020 }, { "epoch": 0.9934161545086932, "grad_norm": 0.48624948184111433, "learning_rate": 3.715869153825358e-05, "loss": 0.5612, "step": 34025 }, { "epoch": 0.9935621377790105, "grad_norm": 0.46118197425980867, "learning_rate": 3.7155988104893215e-05, "loss": 0.5778, "step": 34030 }, { "epoch": 0.9937081210493277, "grad_norm": 0.4471663850808587, "learning_rate": 3.715328467153285e-05, "loss": 0.5566, "step": 34035 }, { "epoch": 0.9938541043196449, "grad_norm": 0.4833195830797912, "learning_rate": 3.715058123817248e-05, "loss": 0.5807, "step": 34040 }, { "epoch": 0.9940000875899622, "grad_norm": 0.42296131429857825, "learning_rate": 3.714787780481212e-05, "loss": 0.5781, "step": 34045 }, { "epoch": 0.9941460708602794, "grad_norm": 0.4353489158089301, "learning_rate": 3.7145174371451744e-05, "loss": 0.5255, "step": 34050 }, { "epoch": 0.9942920541305966, "grad_norm": 0.4582220049304152, "learning_rate": 3.714247093809138e-05, "loss": 0.5445, "step": 34055 }, { "epoch": 0.9944380374009139, "grad_norm": 0.4476731013693613, "learning_rate": 3.713976750473101e-05, "loss": 0.5728, "step": 34060 }, { "epoch": 0.9945840206712311, "grad_norm": 0.47421087819335, "learning_rate": 3.713706407137064e-05, "loss": 0.5646, "step": 34065 }, { "epoch": 0.9947300039415483, "grad_norm": 0.46597274225794594, "learning_rate": 3.713436063801027e-05, "loss": 0.5473, "step": 34070 }, { "epoch": 0.9948759872118655, "grad_norm": 0.4805155781192358, "learning_rate": 3.713165720464991e-05, "loss": 0.5599, "step": 34075 }, { "epoch": 0.9950219704821828, "grad_norm": 0.4698170960023596, "learning_rate": 3.7128953771289534e-05, "loss": 0.5805, "step": 34080 }, { "epoch": 0.9951679537525, "grad_norm": 0.49286348675140085, "learning_rate": 3.712625033792917e-05, "loss": 0.5959, "step": 34085 }, { "epoch": 0.9953139370228172, "grad_norm": 0.48809012103159827, "learning_rate": 3.712354690456881e-05, "loss": 0.5637, "step": 34090 }, { "epoch": 0.9954599202931345, "grad_norm": 0.4807463246765356, "learning_rate": 3.7120843471208436e-05, "loss": 0.5908, "step": 34095 }, { "epoch": 0.9956059035634516, "grad_norm": 0.4586056732032637, "learning_rate": 3.711814003784807e-05, "loss": 0.5539, "step": 34100 }, { "epoch": 0.9957518868337688, "grad_norm": 0.46517398951840555, "learning_rate": 3.7115436604487704e-05, "loss": 0.5524, "step": 34105 }, { "epoch": 0.995897870104086, "grad_norm": 0.425156956116527, "learning_rate": 3.711273317112733e-05, "loss": 0.5641, "step": 34110 }, { "epoch": 0.9960438533744033, "grad_norm": 0.4867699038102081, "learning_rate": 3.7110029737766966e-05, "loss": 0.5654, "step": 34115 }, { "epoch": 0.9961898366447205, "grad_norm": 0.4477817729658998, "learning_rate": 3.71073263044066e-05, "loss": 0.5354, "step": 34120 }, { "epoch": 0.9963358199150377, "grad_norm": 0.47017162072576274, "learning_rate": 3.710462287104623e-05, "loss": 0.6029, "step": 34125 }, { "epoch": 0.996481803185355, "grad_norm": 0.47473342216294645, "learning_rate": 3.710191943768586e-05, "loss": 0.5826, "step": 34130 }, { "epoch": 0.9966277864556722, "grad_norm": 0.478898701564807, "learning_rate": 3.7099216004325495e-05, "loss": 0.5482, "step": 34135 }, { "epoch": 0.9967737697259894, "grad_norm": 0.46516017823973377, "learning_rate": 3.709651257096512e-05, "loss": 0.567, "step": 34140 }, { "epoch": 0.9969197529963066, "grad_norm": 0.47480910966891093, "learning_rate": 3.709380913760476e-05, "loss": 0.5557, "step": 34145 }, { "epoch": 0.9970657362666239, "grad_norm": 0.45815891599866454, "learning_rate": 3.70911057042444e-05, "loss": 0.5533, "step": 34150 }, { "epoch": 0.9972117195369411, "grad_norm": 0.4778048365975155, "learning_rate": 3.7088402270884024e-05, "loss": 0.5789, "step": 34155 }, { "epoch": 0.9973577028072583, "grad_norm": 0.45263622873140213, "learning_rate": 3.708569883752366e-05, "loss": 0.5842, "step": 34160 }, { "epoch": 0.9975036860775756, "grad_norm": 0.5161799176410966, "learning_rate": 3.708299540416329e-05, "loss": 0.6036, "step": 34165 }, { "epoch": 0.9976496693478928, "grad_norm": 0.48788973935616525, "learning_rate": 3.708029197080292e-05, "loss": 0.5657, "step": 34170 }, { "epoch": 0.9977956526182099, "grad_norm": 0.4735039136422654, "learning_rate": 3.7077588537442553e-05, "loss": 0.6071, "step": 34175 }, { "epoch": 0.9979416358885271, "grad_norm": 0.4458146066380976, "learning_rate": 3.707488510408219e-05, "loss": 0.5726, "step": 34180 }, { "epoch": 0.9980876191588444, "grad_norm": 0.4691236682275335, "learning_rate": 3.7072181670721815e-05, "loss": 0.5609, "step": 34185 }, { "epoch": 0.9982336024291616, "grad_norm": 0.4351981084840898, "learning_rate": 3.706947823736145e-05, "loss": 0.5293, "step": 34190 }, { "epoch": 0.9983795856994788, "grad_norm": 0.46452363973594507, "learning_rate": 3.706677480400108e-05, "loss": 0.5798, "step": 34195 }, { "epoch": 0.998525568969796, "grad_norm": 0.4953484522006862, "learning_rate": 3.706407137064071e-05, "loss": 0.5869, "step": 34200 }, { "epoch": 0.9986715522401133, "grad_norm": 0.5010950074955607, "learning_rate": 3.706136793728035e-05, "loss": 0.5571, "step": 34205 }, { "epoch": 0.9988175355104305, "grad_norm": 0.4661950316235704, "learning_rate": 3.7058664503919985e-05, "loss": 0.5507, "step": 34210 }, { "epoch": 0.9989635187807477, "grad_norm": 0.4363488312848607, "learning_rate": 3.705596107055961e-05, "loss": 0.5871, "step": 34215 }, { "epoch": 0.999109502051065, "grad_norm": 0.48639952246926393, "learning_rate": 3.7053257637199246e-05, "loss": 0.6058, "step": 34220 }, { "epoch": 0.9992554853213822, "grad_norm": 0.45469408323741645, "learning_rate": 3.705055420383888e-05, "loss": 0.5453, "step": 34225 }, { "epoch": 0.9994014685916994, "grad_norm": 0.5002990638399945, "learning_rate": 3.704785077047851e-05, "loss": 0.5819, "step": 34230 }, { "epoch": 0.9995474518620167, "grad_norm": 0.4800525141650515, "learning_rate": 3.704514733711814e-05, "loss": 0.5443, "step": 34235 }, { "epoch": 0.9996934351323339, "grad_norm": 0.44731668021326193, "learning_rate": 3.7042443903757775e-05, "loss": 0.5577, "step": 34240 }, { "epoch": 0.999839418402651, "grad_norm": 0.4830505904439273, "learning_rate": 3.70397404703974e-05, "loss": 0.5704, "step": 34245 }, { "epoch": 0.9999854016729682, "grad_norm": 0.4860566500838918, "learning_rate": 3.7037037037037037e-05, "loss": 0.5856, "step": 34250 }, { "epoch": 1.0001167866162537, "grad_norm": 0.4645943321051691, "learning_rate": 3.703433360367667e-05, "loss": 0.5274, "step": 34255 }, { "epoch": 1.000262769886571, "grad_norm": 0.41454141157675906, "learning_rate": 3.7031630170316305e-05, "loss": 0.4218, "step": 34260 }, { "epoch": 1.0004087531568882, "grad_norm": 0.4338090414581318, "learning_rate": 3.702892673695594e-05, "loss": 0.4097, "step": 34265 }, { "epoch": 1.0005547364272054, "grad_norm": 0.500799004878826, "learning_rate": 3.702622330359557e-05, "loss": 0.45, "step": 34270 }, { "epoch": 1.0007007196975226, "grad_norm": 0.47404503563176836, "learning_rate": 3.70235198702352e-05, "loss": 0.452, "step": 34275 }, { "epoch": 1.0008467029678398, "grad_norm": 0.4621882019217227, "learning_rate": 3.7020816436874834e-05, "loss": 0.4327, "step": 34280 }, { "epoch": 1.000992686238157, "grad_norm": 0.4084892919608579, "learning_rate": 3.701811300351447e-05, "loss": 0.4396, "step": 34285 }, { "epoch": 1.0011386695084743, "grad_norm": 0.468014466121082, "learning_rate": 3.7015409570154095e-05, "loss": 0.4257, "step": 34290 }, { "epoch": 1.0012846527787915, "grad_norm": 0.4991847256475515, "learning_rate": 3.701270613679373e-05, "loss": 0.4441, "step": 34295 }, { "epoch": 1.0014306360491088, "grad_norm": 0.45194430799368174, "learning_rate": 3.701000270343336e-05, "loss": 0.4565, "step": 34300 }, { "epoch": 1.001576619319426, "grad_norm": 0.484069269700306, "learning_rate": 3.700729927007299e-05, "loss": 0.4504, "step": 34305 }, { "epoch": 1.0017226025897432, "grad_norm": 0.4829026037882965, "learning_rate": 3.7004595836712624e-05, "loss": 0.4539, "step": 34310 }, { "epoch": 1.0018685858600604, "grad_norm": 0.5134527940207546, "learning_rate": 3.7001892403352265e-05, "loss": 0.4575, "step": 34315 }, { "epoch": 1.0020145691303777, "grad_norm": 0.485056054975411, "learning_rate": 3.699918896999189e-05, "loss": 0.4633, "step": 34320 }, { "epoch": 1.002160552400695, "grad_norm": 0.4021715667013184, "learning_rate": 3.6996485536631526e-05, "loss": 0.421, "step": 34325 }, { "epoch": 1.0023065356710121, "grad_norm": 0.4469788544365214, "learning_rate": 3.699378210327116e-05, "loss": 0.4485, "step": 34330 }, { "epoch": 1.0024525189413294, "grad_norm": 0.5448474216872771, "learning_rate": 3.699107866991079e-05, "loss": 0.4366, "step": 34335 }, { "epoch": 1.0025985022116466, "grad_norm": 0.4976872906100235, "learning_rate": 3.698837523655042e-05, "loss": 0.4591, "step": 34340 }, { "epoch": 1.0027444854819638, "grad_norm": 0.4593536469608403, "learning_rate": 3.6985671803190056e-05, "loss": 0.4246, "step": 34345 }, { "epoch": 1.002890468752281, "grad_norm": 0.527239383635679, "learning_rate": 3.698296836982968e-05, "loss": 0.4427, "step": 34350 }, { "epoch": 1.0030364520225983, "grad_norm": 0.5288115427288171, "learning_rate": 3.698026493646932e-05, "loss": 0.4536, "step": 34355 }, { "epoch": 1.0031824352929155, "grad_norm": 0.41902371753434625, "learning_rate": 3.697756150310895e-05, "loss": 0.4518, "step": 34360 }, { "epoch": 1.0033284185632327, "grad_norm": 0.4792901410641968, "learning_rate": 3.697485806974858e-05, "loss": 0.4615, "step": 34365 }, { "epoch": 1.00347440183355, "grad_norm": 0.464431310299233, "learning_rate": 3.697215463638821e-05, "loss": 0.4367, "step": 34370 }, { "epoch": 1.0036203851038672, "grad_norm": 0.46124522995349737, "learning_rate": 3.6969451203027846e-05, "loss": 0.4469, "step": 34375 }, { "epoch": 1.0037663683741844, "grad_norm": 0.4559807033068156, "learning_rate": 3.696674776966748e-05, "loss": 0.4604, "step": 34380 }, { "epoch": 1.0039123516445014, "grad_norm": 0.4758522513373093, "learning_rate": 3.6964044336307114e-05, "loss": 0.433, "step": 34385 }, { "epoch": 1.0040583349148187, "grad_norm": 0.5540925771948949, "learning_rate": 3.696134090294675e-05, "loss": 0.4749, "step": 34390 }, { "epoch": 1.004204318185136, "grad_norm": 0.5546528252428068, "learning_rate": 3.6958637469586375e-05, "loss": 0.4643, "step": 34395 }, { "epoch": 1.0043503014554531, "grad_norm": 0.4748017637474818, "learning_rate": 3.695593403622601e-05, "loss": 0.4415, "step": 34400 }, { "epoch": 1.0044962847257703, "grad_norm": 0.4609351345227812, "learning_rate": 3.6953230602865643e-05, "loss": 0.4546, "step": 34405 }, { "epoch": 1.0046422679960876, "grad_norm": 0.44871885479808077, "learning_rate": 3.695052716950527e-05, "loss": 0.4195, "step": 34410 }, { "epoch": 1.0047882512664048, "grad_norm": 0.48464131639978275, "learning_rate": 3.6947823736144905e-05, "loss": 0.4561, "step": 34415 }, { "epoch": 1.004934234536722, "grad_norm": 0.47529328520136915, "learning_rate": 3.694512030278454e-05, "loss": 0.4418, "step": 34420 }, { "epoch": 1.0050802178070393, "grad_norm": 0.46904168270753954, "learning_rate": 3.6942416869424166e-05, "loss": 0.4498, "step": 34425 }, { "epoch": 1.0052262010773565, "grad_norm": 0.48984986215906073, "learning_rate": 3.693971343606381e-05, "loss": 0.4399, "step": 34430 }, { "epoch": 1.0053721843476737, "grad_norm": 0.49626672654144255, "learning_rate": 3.6937010002703434e-05, "loss": 0.4331, "step": 34435 }, { "epoch": 1.005518167617991, "grad_norm": 0.46863706329434446, "learning_rate": 3.693430656934307e-05, "loss": 0.4408, "step": 34440 }, { "epoch": 1.0056641508883082, "grad_norm": 0.4971312078048271, "learning_rate": 3.69316031359827e-05, "loss": 0.4645, "step": 34445 }, { "epoch": 1.0058101341586254, "grad_norm": 0.4682909795741274, "learning_rate": 3.6928899702622336e-05, "loss": 0.4597, "step": 34450 }, { "epoch": 1.0059561174289426, "grad_norm": 0.4434029109346255, "learning_rate": 3.692619626926196e-05, "loss": 0.4377, "step": 34455 }, { "epoch": 1.0061021006992599, "grad_norm": 0.4796937021037689, "learning_rate": 3.69234928359016e-05, "loss": 0.4526, "step": 34460 }, { "epoch": 1.006248083969577, "grad_norm": 0.5528587283308294, "learning_rate": 3.692078940254123e-05, "loss": 0.4585, "step": 34465 }, { "epoch": 1.0063940672398943, "grad_norm": 0.46124235731280666, "learning_rate": 3.691808596918086e-05, "loss": 0.4768, "step": 34470 }, { "epoch": 1.0065400505102116, "grad_norm": 0.7667851358749745, "learning_rate": 3.691538253582049e-05, "loss": 0.4487, "step": 34475 }, { "epoch": 1.0066860337805288, "grad_norm": 0.515620198082316, "learning_rate": 3.6912679102460127e-05, "loss": 0.4435, "step": 34480 }, { "epoch": 1.006832017050846, "grad_norm": 0.45004630025514925, "learning_rate": 3.690997566909976e-05, "loss": 0.4322, "step": 34485 }, { "epoch": 1.0069780003211632, "grad_norm": 0.498161844069613, "learning_rate": 3.6907272235739395e-05, "loss": 0.4653, "step": 34490 }, { "epoch": 1.0071239835914805, "grad_norm": 0.4929069335519687, "learning_rate": 3.690456880237902e-05, "loss": 0.4485, "step": 34495 }, { "epoch": 1.0072699668617977, "grad_norm": 0.5198326652022379, "learning_rate": 3.6901865369018656e-05, "loss": 0.4249, "step": 34500 }, { "epoch": 1.007415950132115, "grad_norm": 0.48955575682143554, "learning_rate": 3.689916193565829e-05, "loss": 0.443, "step": 34505 }, { "epoch": 1.0075619334024322, "grad_norm": 0.48391689167327706, "learning_rate": 3.689645850229792e-05, "loss": 0.4477, "step": 34510 }, { "epoch": 1.0077079166727494, "grad_norm": 0.5198736423062387, "learning_rate": 3.689375506893755e-05, "loss": 0.4337, "step": 34515 }, { "epoch": 1.0078538999430666, "grad_norm": 0.4814393859487624, "learning_rate": 3.6891051635577185e-05, "loss": 0.4561, "step": 34520 }, { "epoch": 1.0079998832133839, "grad_norm": 0.4737505463009846, "learning_rate": 3.688834820221682e-05, "loss": 0.422, "step": 34525 }, { "epoch": 1.0081458664837009, "grad_norm": 0.4631364140258957, "learning_rate": 3.6885644768856446e-05, "loss": 0.4617, "step": 34530 }, { "epoch": 1.008291849754018, "grad_norm": 0.49870502937200445, "learning_rate": 3.688294133549608e-05, "loss": 0.4522, "step": 34535 }, { "epoch": 1.0084378330243353, "grad_norm": 0.5064281039599199, "learning_rate": 3.6880237902135714e-05, "loss": 0.4351, "step": 34540 }, { "epoch": 1.0085838162946525, "grad_norm": 0.47085805336449055, "learning_rate": 3.687753446877535e-05, "loss": 0.441, "step": 34545 }, { "epoch": 1.0087297995649698, "grad_norm": 0.5003956234143895, "learning_rate": 3.687483103541498e-05, "loss": 0.4449, "step": 34550 }, { "epoch": 1.008875782835287, "grad_norm": 0.45989485661357443, "learning_rate": 3.687212760205461e-05, "loss": 0.4447, "step": 34555 }, { "epoch": 1.0090217661056042, "grad_norm": 0.5001976072296277, "learning_rate": 3.6869424168694244e-05, "loss": 0.4693, "step": 34560 }, { "epoch": 1.0091677493759215, "grad_norm": 0.49697641035402573, "learning_rate": 3.686672073533388e-05, "loss": 0.437, "step": 34565 }, { "epoch": 1.0093137326462387, "grad_norm": 0.5016235624729062, "learning_rate": 3.6864017301973505e-05, "loss": 0.4506, "step": 34570 }, { "epoch": 1.009459715916556, "grad_norm": 0.5090209101606463, "learning_rate": 3.686131386861314e-05, "loss": 0.4504, "step": 34575 }, { "epoch": 1.0096056991868732, "grad_norm": 0.4866202687290929, "learning_rate": 3.685861043525277e-05, "loss": 0.4663, "step": 34580 }, { "epoch": 1.0097516824571904, "grad_norm": 0.5205519291900402, "learning_rate": 3.685590700189241e-05, "loss": 0.475, "step": 34585 }, { "epoch": 1.0098976657275076, "grad_norm": 0.47238023290804415, "learning_rate": 3.6853203568532034e-05, "loss": 0.4518, "step": 34590 }, { "epoch": 1.0100436489978248, "grad_norm": 0.5090571473207964, "learning_rate": 3.685050013517167e-05, "loss": 0.4884, "step": 34595 }, { "epoch": 1.010189632268142, "grad_norm": 0.43623019175458333, "learning_rate": 3.68477967018113e-05, "loss": 0.4509, "step": 34600 }, { "epoch": 1.0103356155384593, "grad_norm": 0.48318718001298416, "learning_rate": 3.6845093268450936e-05, "loss": 0.4391, "step": 34605 }, { "epoch": 1.0104815988087765, "grad_norm": 0.49508277740093515, "learning_rate": 3.684238983509057e-05, "loss": 0.4005, "step": 34610 }, { "epoch": 1.0106275820790938, "grad_norm": 0.48142333340231086, "learning_rate": 3.68396864017302e-05, "loss": 0.4657, "step": 34615 }, { "epoch": 1.010773565349411, "grad_norm": 0.4936203739291091, "learning_rate": 3.683698296836983e-05, "loss": 0.4491, "step": 34620 }, { "epoch": 1.0109195486197282, "grad_norm": 0.46599268885724915, "learning_rate": 3.6834279535009465e-05, "loss": 0.4509, "step": 34625 }, { "epoch": 1.0110655318900454, "grad_norm": 0.5587311643778018, "learning_rate": 3.683157610164909e-05, "loss": 0.4116, "step": 34630 }, { "epoch": 1.0112115151603627, "grad_norm": 0.4807846031738309, "learning_rate": 3.682887266828873e-05, "loss": 0.4638, "step": 34635 }, { "epoch": 1.01135749843068, "grad_norm": 0.5197934128726915, "learning_rate": 3.682616923492836e-05, "loss": 0.4521, "step": 34640 }, { "epoch": 1.0115034817009971, "grad_norm": 0.5183150162272621, "learning_rate": 3.682346580156799e-05, "loss": 0.448, "step": 34645 }, { "epoch": 1.0116494649713144, "grad_norm": 0.524425780403762, "learning_rate": 3.682076236820762e-05, "loss": 0.4259, "step": 34650 }, { "epoch": 1.0117954482416316, "grad_norm": 0.45199768433063775, "learning_rate": 3.681805893484726e-05, "loss": 0.4223, "step": 34655 }, { "epoch": 1.0119414315119488, "grad_norm": 0.4388862760134689, "learning_rate": 3.681535550148689e-05, "loss": 0.4244, "step": 34660 }, { "epoch": 1.012087414782266, "grad_norm": 0.47932568277462395, "learning_rate": 3.6812652068126524e-05, "loss": 0.4321, "step": 34665 }, { "epoch": 1.0122333980525833, "grad_norm": 0.538112358051248, "learning_rate": 3.680994863476616e-05, "loss": 0.4685, "step": 34670 }, { "epoch": 1.0123793813229005, "grad_norm": 0.5048510962774407, "learning_rate": 3.6807245201405785e-05, "loss": 0.4427, "step": 34675 }, { "epoch": 1.0125253645932175, "grad_norm": 0.5022074993591062, "learning_rate": 3.680454176804542e-05, "loss": 0.4284, "step": 34680 }, { "epoch": 1.0126713478635347, "grad_norm": 0.45628124477353627, "learning_rate": 3.680183833468505e-05, "loss": 0.4593, "step": 34685 }, { "epoch": 1.012817331133852, "grad_norm": 0.5312157802037527, "learning_rate": 3.679913490132468e-05, "loss": 0.4513, "step": 34690 }, { "epoch": 1.0129633144041692, "grad_norm": 0.4759275982880759, "learning_rate": 3.6796431467964314e-05, "loss": 0.4398, "step": 34695 }, { "epoch": 1.0131092976744864, "grad_norm": 0.5003918167338688, "learning_rate": 3.679372803460395e-05, "loss": 0.4505, "step": 34700 }, { "epoch": 1.0132552809448037, "grad_norm": 0.4653070342446947, "learning_rate": 3.6791024601243576e-05, "loss": 0.4499, "step": 34705 }, { "epoch": 1.013401264215121, "grad_norm": 0.47074330008287674, "learning_rate": 3.678832116788321e-05, "loss": 0.4585, "step": 34710 }, { "epoch": 1.0135472474854381, "grad_norm": 0.5167363246716666, "learning_rate": 3.678561773452285e-05, "loss": 0.4458, "step": 34715 }, { "epoch": 1.0136932307557553, "grad_norm": 0.4741809700739694, "learning_rate": 3.678291430116248e-05, "loss": 0.4491, "step": 34720 }, { "epoch": 1.0138392140260726, "grad_norm": 0.46230805119311497, "learning_rate": 3.678021086780211e-05, "loss": 0.4105, "step": 34725 }, { "epoch": 1.0139851972963898, "grad_norm": 0.4573207018877494, "learning_rate": 3.6777507434441746e-05, "loss": 0.4324, "step": 34730 }, { "epoch": 1.014131180566707, "grad_norm": 0.4398720324639679, "learning_rate": 3.677480400108137e-05, "loss": 0.4357, "step": 34735 }, { "epoch": 1.0142771638370243, "grad_norm": 0.43056908386366305, "learning_rate": 3.677210056772101e-05, "loss": 0.4266, "step": 34740 }, { "epoch": 1.0144231471073415, "grad_norm": 0.46447191382287817, "learning_rate": 3.676939713436064e-05, "loss": 0.4399, "step": 34745 }, { "epoch": 1.0145691303776587, "grad_norm": 0.5246586065862548, "learning_rate": 3.676669370100027e-05, "loss": 0.4514, "step": 34750 }, { "epoch": 1.014715113647976, "grad_norm": 0.43460534445684884, "learning_rate": 3.67639902676399e-05, "loss": 0.4701, "step": 34755 }, { "epoch": 1.0148610969182932, "grad_norm": 0.4928592004229817, "learning_rate": 3.6761286834279536e-05, "loss": 0.4529, "step": 34760 }, { "epoch": 1.0150070801886104, "grad_norm": 0.4271758418060749, "learning_rate": 3.6758583400919164e-05, "loss": 0.4371, "step": 34765 }, { "epoch": 1.0151530634589276, "grad_norm": 0.47595952916358836, "learning_rate": 3.6755879967558804e-05, "loss": 0.4357, "step": 34770 }, { "epoch": 1.0152990467292449, "grad_norm": 0.448871761372388, "learning_rate": 3.675317653419844e-05, "loss": 0.4211, "step": 34775 }, { "epoch": 1.015445029999562, "grad_norm": 0.5016179844661256, "learning_rate": 3.6750473100838066e-05, "loss": 0.4495, "step": 34780 }, { "epoch": 1.0155910132698793, "grad_norm": 0.4684363618364832, "learning_rate": 3.67477696674777e-05, "loss": 0.4421, "step": 34785 }, { "epoch": 1.0157369965401966, "grad_norm": 0.5636586252054696, "learning_rate": 3.6745066234117334e-05, "loss": 0.4905, "step": 34790 }, { "epoch": 1.0158829798105138, "grad_norm": 0.4788967415708988, "learning_rate": 3.674236280075696e-05, "loss": 0.437, "step": 34795 }, { "epoch": 1.016028963080831, "grad_norm": 0.484884925742909, "learning_rate": 3.6739659367396595e-05, "loss": 0.4415, "step": 34800 }, { "epoch": 1.0161749463511482, "grad_norm": 0.4646265806200349, "learning_rate": 3.673695593403623e-05, "loss": 0.4611, "step": 34805 }, { "epoch": 1.0163209296214655, "grad_norm": 0.4874669636422879, "learning_rate": 3.6734252500675856e-05, "loss": 0.4337, "step": 34810 }, { "epoch": 1.0164669128917827, "grad_norm": 0.44444113673435465, "learning_rate": 3.673154906731549e-05, "loss": 0.4121, "step": 34815 }, { "epoch": 1.0166128961621, "grad_norm": 0.4920499505299594, "learning_rate": 3.6728845633955124e-05, "loss": 0.4104, "step": 34820 }, { "epoch": 1.016758879432417, "grad_norm": 0.49686491513341113, "learning_rate": 3.672614220059476e-05, "loss": 0.4618, "step": 34825 }, { "epoch": 1.0169048627027342, "grad_norm": 0.4607250958733793, "learning_rate": 3.672343876723439e-05, "loss": 0.4325, "step": 34830 }, { "epoch": 1.0170508459730514, "grad_norm": 0.534247975956093, "learning_rate": 3.6720735333874026e-05, "loss": 0.4657, "step": 34835 }, { "epoch": 1.0171968292433686, "grad_norm": 0.4638164168878654, "learning_rate": 3.671803190051365e-05, "loss": 0.4545, "step": 34840 }, { "epoch": 1.0173428125136859, "grad_norm": 0.5150549526681757, "learning_rate": 3.671532846715329e-05, "loss": 0.4878, "step": 34845 }, { "epoch": 1.017488795784003, "grad_norm": 0.4907583668944653, "learning_rate": 3.671262503379292e-05, "loss": 0.4419, "step": 34850 }, { "epoch": 1.0176347790543203, "grad_norm": 0.4992357644419203, "learning_rate": 3.670992160043255e-05, "loss": 0.4527, "step": 34855 }, { "epoch": 1.0177807623246375, "grad_norm": 0.49065763213099234, "learning_rate": 3.670721816707218e-05, "loss": 0.4406, "step": 34860 }, { "epoch": 1.0179267455949548, "grad_norm": 0.5139345135438788, "learning_rate": 3.670451473371182e-05, "loss": 0.4253, "step": 34865 }, { "epoch": 1.018072728865272, "grad_norm": 0.5046532264247187, "learning_rate": 3.6701811300351444e-05, "loss": 0.4538, "step": 34870 }, { "epoch": 1.0182187121355892, "grad_norm": 0.5016736026529524, "learning_rate": 3.669910786699108e-05, "loss": 0.4526, "step": 34875 }, { "epoch": 1.0183646954059065, "grad_norm": 0.4573509933042238, "learning_rate": 3.669640443363071e-05, "loss": 0.4402, "step": 34880 }, { "epoch": 1.0185106786762237, "grad_norm": 0.46596102858858485, "learning_rate": 3.6693701000270346e-05, "loss": 0.4323, "step": 34885 }, { "epoch": 1.018656661946541, "grad_norm": 0.48372717742614246, "learning_rate": 3.669099756690998e-05, "loss": 0.447, "step": 34890 }, { "epoch": 1.0188026452168581, "grad_norm": 0.4986214132435557, "learning_rate": 3.6688294133549614e-05, "loss": 0.4659, "step": 34895 }, { "epoch": 1.0189486284871754, "grad_norm": 0.5209945319587861, "learning_rate": 3.668559070018924e-05, "loss": 0.4583, "step": 34900 }, { "epoch": 1.0190946117574926, "grad_norm": 0.45404407248842915, "learning_rate": 3.6682887266828875e-05, "loss": 0.4473, "step": 34905 }, { "epoch": 1.0192405950278098, "grad_norm": 0.4877262075596995, "learning_rate": 3.668018383346851e-05, "loss": 0.446, "step": 34910 }, { "epoch": 1.019386578298127, "grad_norm": 0.4635908757956665, "learning_rate": 3.6677480400108136e-05, "loss": 0.4731, "step": 34915 }, { "epoch": 1.0195325615684443, "grad_norm": 0.48205587195560784, "learning_rate": 3.667477696674777e-05, "loss": 0.4538, "step": 34920 }, { "epoch": 1.0196785448387615, "grad_norm": 0.49463179441758487, "learning_rate": 3.6672073533387404e-05, "loss": 0.4699, "step": 34925 }, { "epoch": 1.0198245281090788, "grad_norm": 0.46734028949312006, "learning_rate": 3.666937010002703e-05, "loss": 0.4302, "step": 34930 }, { "epoch": 1.019970511379396, "grad_norm": 0.5196376152410818, "learning_rate": 3.6666666666666666e-05, "loss": 0.4786, "step": 34935 }, { "epoch": 1.0201164946497132, "grad_norm": 0.48908259603827176, "learning_rate": 3.6663963233306307e-05, "loss": 0.4123, "step": 34940 }, { "epoch": 1.0202624779200304, "grad_norm": 0.46775087140470945, "learning_rate": 3.6661259799945934e-05, "loss": 0.4067, "step": 34945 }, { "epoch": 1.0204084611903477, "grad_norm": 0.504341652548316, "learning_rate": 3.665855636658557e-05, "loss": 0.4585, "step": 34950 }, { "epoch": 1.020554444460665, "grad_norm": 0.5138246909388102, "learning_rate": 3.66558529332252e-05, "loss": 0.4476, "step": 34955 }, { "epoch": 1.0207004277309821, "grad_norm": 0.4700848362709616, "learning_rate": 3.665314949986483e-05, "loss": 0.4647, "step": 34960 }, { "epoch": 1.0208464110012994, "grad_norm": 0.5121768238580952, "learning_rate": 3.665044606650446e-05, "loss": 0.4469, "step": 34965 }, { "epoch": 1.0209923942716164, "grad_norm": 0.4984297446912709, "learning_rate": 3.66477426331441e-05, "loss": 0.4618, "step": 34970 }, { "epoch": 1.0211383775419336, "grad_norm": 0.4621825077040563, "learning_rate": 3.6645039199783724e-05, "loss": 0.444, "step": 34975 }, { "epoch": 1.0212843608122508, "grad_norm": 0.48202508680680317, "learning_rate": 3.664233576642336e-05, "loss": 0.4585, "step": 34980 }, { "epoch": 1.021430344082568, "grad_norm": 0.4784452363800074, "learning_rate": 3.663963233306299e-05, "loss": 0.4414, "step": 34985 }, { "epoch": 1.0215763273528853, "grad_norm": 0.5274696034045429, "learning_rate": 3.663692889970262e-05, "loss": 0.4552, "step": 34990 }, { "epoch": 1.0217223106232025, "grad_norm": 0.5131886681512174, "learning_rate": 3.663422546634226e-05, "loss": 0.4574, "step": 34995 }, { "epoch": 1.0218682938935197, "grad_norm": 0.5235107844728817, "learning_rate": 3.663152203298189e-05, "loss": 0.4757, "step": 35000 }, { "epoch": 1.022014277163837, "grad_norm": 0.5091150110462868, "learning_rate": 3.662881859962152e-05, "loss": 0.4467, "step": 35005 }, { "epoch": 1.0221602604341542, "grad_norm": 0.4687812308241603, "learning_rate": 3.6626115166261156e-05, "loss": 0.4171, "step": 35010 }, { "epoch": 1.0223062437044714, "grad_norm": 0.4897097980189667, "learning_rate": 3.662341173290079e-05, "loss": 0.4413, "step": 35015 }, { "epoch": 1.0224522269747887, "grad_norm": 0.4998764288615226, "learning_rate": 3.662070829954042e-05, "loss": 0.4399, "step": 35020 }, { "epoch": 1.0225982102451059, "grad_norm": 0.4574930598960751, "learning_rate": 3.661800486618005e-05, "loss": 0.4544, "step": 35025 }, { "epoch": 1.0227441935154231, "grad_norm": 0.509433287329657, "learning_rate": 3.6615301432819685e-05, "loss": 0.4392, "step": 35030 }, { "epoch": 1.0228901767857403, "grad_norm": 0.5287560042181183, "learning_rate": 3.661259799945931e-05, "loss": 0.4615, "step": 35035 }, { "epoch": 1.0230361600560576, "grad_norm": 0.49371254521179203, "learning_rate": 3.6609894566098946e-05, "loss": 0.4346, "step": 35040 }, { "epoch": 1.0231821433263748, "grad_norm": 0.5170980228895914, "learning_rate": 3.660719113273858e-05, "loss": 0.4336, "step": 35045 }, { "epoch": 1.023328126596692, "grad_norm": 0.4572453916717076, "learning_rate": 3.660448769937821e-05, "loss": 0.4455, "step": 35050 }, { "epoch": 1.0234741098670093, "grad_norm": 0.4906007743691841, "learning_rate": 3.660178426601785e-05, "loss": 0.4423, "step": 35055 }, { "epoch": 1.0236200931373265, "grad_norm": 0.5194318264150688, "learning_rate": 3.6599080832657475e-05, "loss": 0.4563, "step": 35060 }, { "epoch": 1.0237660764076437, "grad_norm": 0.4788058145312078, "learning_rate": 3.659637739929711e-05, "loss": 0.4376, "step": 35065 }, { "epoch": 1.023912059677961, "grad_norm": 0.47255578615627575, "learning_rate": 3.659367396593674e-05, "loss": 0.4392, "step": 35070 }, { "epoch": 1.0240580429482782, "grad_norm": 0.5580525850384284, "learning_rate": 3.659097053257638e-05, "loss": 0.4395, "step": 35075 }, { "epoch": 1.0242040262185954, "grad_norm": 0.517082248894776, "learning_rate": 3.6588267099216005e-05, "loss": 0.4563, "step": 35080 }, { "epoch": 1.0243500094889126, "grad_norm": 0.4726015913611309, "learning_rate": 3.658556366585564e-05, "loss": 0.4598, "step": 35085 }, { "epoch": 1.0244959927592299, "grad_norm": 0.483510227751084, "learning_rate": 3.658286023249527e-05, "loss": 0.4656, "step": 35090 }, { "epoch": 1.024641976029547, "grad_norm": 0.475524805642634, "learning_rate": 3.65801567991349e-05, "loss": 0.4318, "step": 35095 }, { "epoch": 1.0247879592998643, "grad_norm": 0.5188332715544547, "learning_rate": 3.6577453365774534e-05, "loss": 0.5147, "step": 35100 }, { "epoch": 1.0249339425701816, "grad_norm": 0.4720234795648167, "learning_rate": 3.657474993241417e-05, "loss": 0.4648, "step": 35105 }, { "epoch": 1.0250799258404988, "grad_norm": 0.4479689467276955, "learning_rate": 3.65720464990538e-05, "loss": 0.4454, "step": 35110 }, { "epoch": 1.0252259091108158, "grad_norm": 0.5010112407961402, "learning_rate": 3.6569343065693436e-05, "loss": 0.465, "step": 35115 }, { "epoch": 1.025371892381133, "grad_norm": 0.48069099780061936, "learning_rate": 3.656663963233306e-05, "loss": 0.4413, "step": 35120 }, { "epoch": 1.0255178756514503, "grad_norm": 0.44247080308599995, "learning_rate": 3.65639361989727e-05, "loss": 0.4432, "step": 35125 }, { "epoch": 1.0256638589217675, "grad_norm": 0.4732925507866004, "learning_rate": 3.656123276561233e-05, "loss": 0.4328, "step": 35130 }, { "epoch": 1.0258098421920847, "grad_norm": 0.4970454969722106, "learning_rate": 3.655852933225196e-05, "loss": 0.461, "step": 35135 }, { "epoch": 1.025955825462402, "grad_norm": 0.48173887066540844, "learning_rate": 3.655582589889159e-05, "loss": 0.4446, "step": 35140 }, { "epoch": 1.0261018087327192, "grad_norm": 0.4844884851742542, "learning_rate": 3.6553122465531226e-05, "loss": 0.4432, "step": 35145 }, { "epoch": 1.0262477920030364, "grad_norm": 0.4975022067497281, "learning_rate": 3.655041903217086e-05, "loss": 0.4562, "step": 35150 }, { "epoch": 1.0263937752733536, "grad_norm": 0.4778387651258652, "learning_rate": 3.654771559881049e-05, "loss": 0.4359, "step": 35155 }, { "epoch": 1.0265397585436709, "grad_norm": 0.48191335812498376, "learning_rate": 3.654501216545012e-05, "loss": 0.4591, "step": 35160 }, { "epoch": 1.026685741813988, "grad_norm": 0.4645779386465807, "learning_rate": 3.6542308732089756e-05, "loss": 0.4162, "step": 35165 }, { "epoch": 1.0268317250843053, "grad_norm": 0.5024417745541955, "learning_rate": 3.653960529872939e-05, "loss": 0.4666, "step": 35170 }, { "epoch": 1.0269777083546225, "grad_norm": 0.46217290625756735, "learning_rate": 3.6536901865369024e-05, "loss": 0.4624, "step": 35175 }, { "epoch": 1.0271236916249398, "grad_norm": 0.46342439825221804, "learning_rate": 3.653419843200865e-05, "loss": 0.4454, "step": 35180 }, { "epoch": 1.027269674895257, "grad_norm": 0.4698150313440131, "learning_rate": 3.6531494998648285e-05, "loss": 0.5045, "step": 35185 }, { "epoch": 1.0274156581655742, "grad_norm": 0.5096624043679229, "learning_rate": 3.652879156528792e-05, "loss": 0.4426, "step": 35190 }, { "epoch": 1.0275616414358915, "grad_norm": 0.484464801424897, "learning_rate": 3.6526088131927546e-05, "loss": 0.4364, "step": 35195 }, { "epoch": 1.0277076247062087, "grad_norm": 0.5069611294435313, "learning_rate": 3.652338469856718e-05, "loss": 0.4676, "step": 35200 }, { "epoch": 1.027853607976526, "grad_norm": 0.42474844975150083, "learning_rate": 3.6520681265206814e-05, "loss": 0.4533, "step": 35205 }, { "epoch": 1.0279995912468431, "grad_norm": 0.49616139334407167, "learning_rate": 3.651797783184645e-05, "loss": 0.465, "step": 35210 }, { "epoch": 1.0281455745171604, "grad_norm": 0.4844364208536649, "learning_rate": 3.6515274398486075e-05, "loss": 0.4523, "step": 35215 }, { "epoch": 1.0282915577874776, "grad_norm": 0.5091447527386784, "learning_rate": 3.651257096512571e-05, "loss": 0.4355, "step": 35220 }, { "epoch": 1.0284375410577948, "grad_norm": 0.5108595622977898, "learning_rate": 3.6509867531765344e-05, "loss": 0.4204, "step": 35225 }, { "epoch": 1.028583524328112, "grad_norm": 0.4963279064721689, "learning_rate": 3.650716409840498e-05, "loss": 0.4133, "step": 35230 }, { "epoch": 1.0287295075984293, "grad_norm": 0.4576739585449746, "learning_rate": 3.650446066504461e-05, "loss": 0.4441, "step": 35235 }, { "epoch": 1.0288754908687465, "grad_norm": 0.49804618222543884, "learning_rate": 3.650175723168424e-05, "loss": 0.4688, "step": 35240 }, { "epoch": 1.0290214741390638, "grad_norm": 0.4824793543795803, "learning_rate": 3.649905379832387e-05, "loss": 0.4417, "step": 35245 }, { "epoch": 1.029167457409381, "grad_norm": 0.48647214006128214, "learning_rate": 3.649635036496351e-05, "loss": 0.4769, "step": 35250 }, { "epoch": 1.0293134406796982, "grad_norm": 0.46791759258880306, "learning_rate": 3.6493646931603134e-05, "loss": 0.444, "step": 35255 }, { "epoch": 1.0294594239500152, "grad_norm": 0.46378868356797387, "learning_rate": 3.649094349824277e-05, "loss": 0.4895, "step": 35260 }, { "epoch": 1.0296054072203324, "grad_norm": 0.4662197099067784, "learning_rate": 3.64882400648824e-05, "loss": 0.4332, "step": 35265 }, { "epoch": 1.0297513904906497, "grad_norm": 0.4442958888790567, "learning_rate": 3.648553663152203e-05, "loss": 0.4208, "step": 35270 }, { "epoch": 1.029897373760967, "grad_norm": 0.6730186308527248, "learning_rate": 3.648283319816166e-05, "loss": 0.4545, "step": 35275 }, { "epoch": 1.0300433570312841, "grad_norm": 0.4473620999880052, "learning_rate": 3.6480129764801304e-05, "loss": 0.4485, "step": 35280 }, { "epoch": 1.0301893403016014, "grad_norm": 0.5291171397915683, "learning_rate": 3.647742633144093e-05, "loss": 0.4517, "step": 35285 }, { "epoch": 1.0303353235719186, "grad_norm": 0.4467162367663976, "learning_rate": 3.6474722898080565e-05, "loss": 0.4386, "step": 35290 }, { "epoch": 1.0304813068422358, "grad_norm": 0.44503723308090903, "learning_rate": 3.64720194647202e-05, "loss": 0.4268, "step": 35295 }, { "epoch": 1.030627290112553, "grad_norm": 0.489217365818923, "learning_rate": 3.6469316031359827e-05, "loss": 0.4418, "step": 35300 }, { "epoch": 1.0307732733828703, "grad_norm": 0.4303158084458488, "learning_rate": 3.646661259799946e-05, "loss": 0.4288, "step": 35305 }, { "epoch": 1.0309192566531875, "grad_norm": 0.4677207309192138, "learning_rate": 3.6463909164639095e-05, "loss": 0.4524, "step": 35310 }, { "epoch": 1.0310652399235047, "grad_norm": 0.51056099429133, "learning_rate": 3.646120573127872e-05, "loss": 0.4563, "step": 35315 }, { "epoch": 1.031211223193822, "grad_norm": 0.4927686738939354, "learning_rate": 3.6458502297918356e-05, "loss": 0.4714, "step": 35320 }, { "epoch": 1.0313572064641392, "grad_norm": 0.5008674748735716, "learning_rate": 3.645579886455799e-05, "loss": 0.4335, "step": 35325 }, { "epoch": 1.0315031897344564, "grad_norm": 0.4561905831607986, "learning_rate": 3.645309543119762e-05, "loss": 0.4319, "step": 35330 }, { "epoch": 1.0316491730047737, "grad_norm": 0.5021372048878673, "learning_rate": 3.645039199783726e-05, "loss": 0.4723, "step": 35335 }, { "epoch": 1.0317951562750909, "grad_norm": 0.5021224914506103, "learning_rate": 3.644768856447689e-05, "loss": 0.4541, "step": 35340 }, { "epoch": 1.0319411395454081, "grad_norm": 0.49045432017666724, "learning_rate": 3.644498513111652e-05, "loss": 0.4249, "step": 35345 }, { "epoch": 1.0320871228157253, "grad_norm": 0.5364999942459481, "learning_rate": 3.644228169775615e-05, "loss": 0.4495, "step": 35350 }, { "epoch": 1.0322331060860426, "grad_norm": 0.5241190488470769, "learning_rate": 3.643957826439579e-05, "loss": 0.4624, "step": 35355 }, { "epoch": 1.0323790893563598, "grad_norm": 0.5116954060508693, "learning_rate": 3.6436874831035414e-05, "loss": 0.4411, "step": 35360 }, { "epoch": 1.032525072626677, "grad_norm": 0.49781827216657315, "learning_rate": 3.643417139767505e-05, "loss": 0.4751, "step": 35365 }, { "epoch": 1.0326710558969943, "grad_norm": 0.4587863750669065, "learning_rate": 3.643146796431468e-05, "loss": 0.4186, "step": 35370 }, { "epoch": 1.0328170391673115, "grad_norm": 0.4429160492209204, "learning_rate": 3.642876453095431e-05, "loss": 0.4403, "step": 35375 }, { "epoch": 1.0329630224376287, "grad_norm": 0.5350510213503963, "learning_rate": 3.6426061097593944e-05, "loss": 0.4654, "step": 35380 }, { "epoch": 1.033109005707946, "grad_norm": 0.4723244309820009, "learning_rate": 3.642335766423358e-05, "loss": 0.4234, "step": 35385 }, { "epoch": 1.0332549889782632, "grad_norm": 0.4640088172060209, "learning_rate": 3.642065423087321e-05, "loss": 0.4484, "step": 35390 }, { "epoch": 1.0334009722485804, "grad_norm": 0.4448066923069756, "learning_rate": 3.6417950797512846e-05, "loss": 0.4299, "step": 35395 }, { "epoch": 1.0335469555188976, "grad_norm": 0.49152228283063387, "learning_rate": 3.641524736415248e-05, "loss": 0.443, "step": 35400 }, { "epoch": 1.0336929387892146, "grad_norm": 0.5376669488799707, "learning_rate": 3.641254393079211e-05, "loss": 0.459, "step": 35405 }, { "epoch": 1.0338389220595319, "grad_norm": 0.45345356490284716, "learning_rate": 3.640984049743174e-05, "loss": 0.4586, "step": 35410 }, { "epoch": 1.033984905329849, "grad_norm": 0.5334078085163575, "learning_rate": 3.6407137064071375e-05, "loss": 0.474, "step": 35415 }, { "epoch": 1.0341308886001663, "grad_norm": 0.46764262890532343, "learning_rate": 3.6404433630711e-05, "loss": 0.4358, "step": 35420 }, { "epoch": 1.0342768718704836, "grad_norm": 0.5035421725706056, "learning_rate": 3.6401730197350636e-05, "loss": 0.4681, "step": 35425 }, { "epoch": 1.0344228551408008, "grad_norm": 0.47438849335989547, "learning_rate": 3.639902676399027e-05, "loss": 0.4518, "step": 35430 }, { "epoch": 1.034568838411118, "grad_norm": 0.4756617789874237, "learning_rate": 3.63963233306299e-05, "loss": 0.4385, "step": 35435 }, { "epoch": 1.0347148216814352, "grad_norm": 0.5104746144993041, "learning_rate": 3.639361989726953e-05, "loss": 0.4601, "step": 35440 }, { "epoch": 1.0348608049517525, "grad_norm": 0.48785377426103355, "learning_rate": 3.6390916463909165e-05, "loss": 0.4981, "step": 35445 }, { "epoch": 1.0350067882220697, "grad_norm": 0.4661516775720678, "learning_rate": 3.63882130305488e-05, "loss": 0.4537, "step": 35450 }, { "epoch": 1.035152771492387, "grad_norm": 0.522339222547212, "learning_rate": 3.6385509597188433e-05, "loss": 0.4328, "step": 35455 }, { "epoch": 1.0352987547627042, "grad_norm": 0.41200240586366543, "learning_rate": 3.638280616382807e-05, "loss": 0.4255, "step": 35460 }, { "epoch": 1.0354447380330214, "grad_norm": 0.5348778868053936, "learning_rate": 3.6380102730467695e-05, "loss": 0.4516, "step": 35465 }, { "epoch": 1.0355907213033386, "grad_norm": 0.4320497004966528, "learning_rate": 3.637739929710733e-05, "loss": 0.4302, "step": 35470 }, { "epoch": 1.0357367045736559, "grad_norm": 0.4324478771431389, "learning_rate": 3.637469586374696e-05, "loss": 0.4125, "step": 35475 }, { "epoch": 1.035882687843973, "grad_norm": 0.48887279474099515, "learning_rate": 3.637199243038659e-05, "loss": 0.4456, "step": 35480 }, { "epoch": 1.0360286711142903, "grad_norm": 0.5107985365242592, "learning_rate": 3.6369288997026224e-05, "loss": 0.4458, "step": 35485 }, { "epoch": 1.0361746543846075, "grad_norm": 0.42443294028973116, "learning_rate": 3.636658556366586e-05, "loss": 0.408, "step": 35490 }, { "epoch": 1.0363206376549248, "grad_norm": 0.45662369473063563, "learning_rate": 3.6363882130305485e-05, "loss": 0.4484, "step": 35495 }, { "epoch": 1.036466620925242, "grad_norm": 0.48148097674152407, "learning_rate": 3.636117869694512e-05, "loss": 0.4283, "step": 35500 }, { "epoch": 1.0366126041955592, "grad_norm": 0.5239681180029466, "learning_rate": 3.635847526358476e-05, "loss": 0.447, "step": 35505 }, { "epoch": 1.0367585874658765, "grad_norm": 0.47108141544880827, "learning_rate": 3.635577183022439e-05, "loss": 0.4264, "step": 35510 }, { "epoch": 1.0369045707361937, "grad_norm": 0.4707343302109889, "learning_rate": 3.635306839686402e-05, "loss": 0.441, "step": 35515 }, { "epoch": 1.037050554006511, "grad_norm": 0.49447053474243846, "learning_rate": 3.6350364963503655e-05, "loss": 0.4233, "step": 35520 }, { "epoch": 1.0371965372768281, "grad_norm": 0.4720538113819336, "learning_rate": 3.634766153014328e-05, "loss": 0.4474, "step": 35525 }, { "epoch": 1.0373425205471454, "grad_norm": 0.4835049549720146, "learning_rate": 3.6344958096782917e-05, "loss": 0.4584, "step": 35530 }, { "epoch": 1.0374885038174626, "grad_norm": 0.468468344108591, "learning_rate": 3.634225466342255e-05, "loss": 0.4853, "step": 35535 }, { "epoch": 1.0376344870877798, "grad_norm": 0.5135764669930831, "learning_rate": 3.633955123006218e-05, "loss": 0.4545, "step": 35540 }, { "epoch": 1.037780470358097, "grad_norm": 0.49861007662247947, "learning_rate": 3.633684779670181e-05, "loss": 0.4308, "step": 35545 }, { "epoch": 1.037926453628414, "grad_norm": 0.48946501212919175, "learning_rate": 3.6334144363341446e-05, "loss": 0.4464, "step": 35550 }, { "epoch": 1.0380724368987313, "grad_norm": 0.47912484282307, "learning_rate": 3.633144092998107e-05, "loss": 0.4664, "step": 35555 }, { "epoch": 1.0382184201690485, "grad_norm": 0.48097376127857816, "learning_rate": 3.6328737496620714e-05, "loss": 0.4637, "step": 35560 }, { "epoch": 1.0383644034393658, "grad_norm": 0.47598227329619835, "learning_rate": 3.632603406326035e-05, "loss": 0.4309, "step": 35565 }, { "epoch": 1.038510386709683, "grad_norm": 0.4629629827599092, "learning_rate": 3.6323330629899975e-05, "loss": 0.4424, "step": 35570 }, { "epoch": 1.0386563699800002, "grad_norm": 0.45058089803067536, "learning_rate": 3.632062719653961e-05, "loss": 0.4101, "step": 35575 }, { "epoch": 1.0388023532503174, "grad_norm": 0.5221760611781298, "learning_rate": 3.631792376317924e-05, "loss": 0.4531, "step": 35580 }, { "epoch": 1.0389483365206347, "grad_norm": 0.468811809033396, "learning_rate": 3.631522032981887e-05, "loss": 0.4399, "step": 35585 }, { "epoch": 1.039094319790952, "grad_norm": 0.49596941146619244, "learning_rate": 3.6312516896458504e-05, "loss": 0.4516, "step": 35590 }, { "epoch": 1.0392403030612691, "grad_norm": 0.5273685709040056, "learning_rate": 3.630981346309814e-05, "loss": 0.4123, "step": 35595 }, { "epoch": 1.0393862863315864, "grad_norm": 0.4605028219217751, "learning_rate": 3.6307110029737766e-05, "loss": 0.4488, "step": 35600 }, { "epoch": 1.0395322696019036, "grad_norm": 0.4639368463674698, "learning_rate": 3.63044065963774e-05, "loss": 0.4136, "step": 35605 }, { "epoch": 1.0396782528722208, "grad_norm": 0.5073307217131723, "learning_rate": 3.6301703163017034e-05, "loss": 0.4648, "step": 35610 }, { "epoch": 1.039824236142538, "grad_norm": 0.46475151897615435, "learning_rate": 3.629899972965666e-05, "loss": 0.4241, "step": 35615 }, { "epoch": 1.0399702194128553, "grad_norm": 0.5147171536964487, "learning_rate": 3.62962962962963e-05, "loss": 0.4464, "step": 35620 }, { "epoch": 1.0401162026831725, "grad_norm": 0.5255424023358196, "learning_rate": 3.6293592862935936e-05, "loss": 0.4583, "step": 35625 }, { "epoch": 1.0402621859534897, "grad_norm": 0.530646715297433, "learning_rate": 3.629088942957556e-05, "loss": 0.4563, "step": 35630 }, { "epoch": 1.040408169223807, "grad_norm": 0.5308890859757747, "learning_rate": 3.62881859962152e-05, "loss": 0.4413, "step": 35635 }, { "epoch": 1.0405541524941242, "grad_norm": 0.4516330694282592, "learning_rate": 3.628548256285483e-05, "loss": 0.43, "step": 35640 }, { "epoch": 1.0407001357644414, "grad_norm": 0.5068389710247097, "learning_rate": 3.628277912949446e-05, "loss": 0.4101, "step": 35645 }, { "epoch": 1.0408461190347587, "grad_norm": 0.47457207388875483, "learning_rate": 3.628007569613409e-05, "loss": 0.4112, "step": 35650 }, { "epoch": 1.0409921023050759, "grad_norm": 0.4590093566596017, "learning_rate": 3.6277372262773726e-05, "loss": 0.3985, "step": 35655 }, { "epoch": 1.0411380855753931, "grad_norm": 0.48654203993289463, "learning_rate": 3.6274668829413353e-05, "loss": 0.4333, "step": 35660 }, { "epoch": 1.0412840688457103, "grad_norm": 0.5149487850115484, "learning_rate": 3.627196539605299e-05, "loss": 0.4539, "step": 35665 }, { "epoch": 1.0414300521160276, "grad_norm": 0.4973217418475557, "learning_rate": 3.626926196269262e-05, "loss": 0.4398, "step": 35670 }, { "epoch": 1.0415760353863448, "grad_norm": 0.4577893031516144, "learning_rate": 3.6266558529332255e-05, "loss": 0.4195, "step": 35675 }, { "epoch": 1.041722018656662, "grad_norm": 0.4616484414731765, "learning_rate": 3.626385509597189e-05, "loss": 0.4632, "step": 35680 }, { "epoch": 1.0418680019269793, "grad_norm": 0.5020558178590007, "learning_rate": 3.626115166261152e-05, "loss": 0.4297, "step": 35685 }, { "epoch": 1.0420139851972965, "grad_norm": 0.5053026103273748, "learning_rate": 3.625844822925115e-05, "loss": 0.4331, "step": 35690 }, { "epoch": 1.0421599684676137, "grad_norm": 0.4599093037771762, "learning_rate": 3.6255744795890785e-05, "loss": 0.4268, "step": 35695 }, { "epoch": 1.042305951737931, "grad_norm": 0.47639934971613046, "learning_rate": 3.625304136253042e-05, "loss": 0.4499, "step": 35700 }, { "epoch": 1.042451935008248, "grad_norm": 0.5154905770608711, "learning_rate": 3.6250337929170046e-05, "loss": 0.4404, "step": 35705 }, { "epoch": 1.0425979182785652, "grad_norm": 0.5229240093048905, "learning_rate": 3.624763449580968e-05, "loss": 0.4486, "step": 35710 }, { "epoch": 1.0427439015488824, "grad_norm": 0.4731998161684238, "learning_rate": 3.6244931062449314e-05, "loss": 0.4438, "step": 35715 }, { "epoch": 1.0428898848191996, "grad_norm": 0.5118443248162603, "learning_rate": 3.624222762908894e-05, "loss": 0.4539, "step": 35720 }, { "epoch": 1.0430358680895169, "grad_norm": 0.4689666830715714, "learning_rate": 3.6239524195728575e-05, "loss": 0.4469, "step": 35725 }, { "epoch": 1.043181851359834, "grad_norm": 0.48858131771557767, "learning_rate": 3.623682076236821e-05, "loss": 0.4649, "step": 35730 }, { "epoch": 1.0433278346301513, "grad_norm": 0.5622481732429649, "learning_rate": 3.623411732900784e-05, "loss": 0.446, "step": 35735 }, { "epoch": 1.0434738179004686, "grad_norm": 0.4857933503297927, "learning_rate": 3.623141389564748e-05, "loss": 0.4584, "step": 35740 }, { "epoch": 1.0436198011707858, "grad_norm": 0.481744708987077, "learning_rate": 3.6228710462287104e-05, "loss": 0.4153, "step": 35745 }, { "epoch": 1.043765784441103, "grad_norm": 0.4745518751736025, "learning_rate": 3.622600702892674e-05, "loss": 0.442, "step": 35750 }, { "epoch": 1.0439117677114202, "grad_norm": 0.459568810758634, "learning_rate": 3.622330359556637e-05, "loss": 0.4637, "step": 35755 }, { "epoch": 1.0440577509817375, "grad_norm": 0.5046163991788138, "learning_rate": 3.6220600162206007e-05, "loss": 0.4568, "step": 35760 }, { "epoch": 1.0442037342520547, "grad_norm": 0.5202885448153252, "learning_rate": 3.6217896728845634e-05, "loss": 0.4453, "step": 35765 }, { "epoch": 1.044349717522372, "grad_norm": 0.5136804989601466, "learning_rate": 3.621519329548527e-05, "loss": 0.4696, "step": 35770 }, { "epoch": 1.0444957007926892, "grad_norm": 0.46224793692515687, "learning_rate": 3.62124898621249e-05, "loss": 0.4511, "step": 35775 }, { "epoch": 1.0446416840630064, "grad_norm": 0.5221114769800189, "learning_rate": 3.620978642876453e-05, "loss": 0.4534, "step": 35780 }, { "epoch": 1.0447876673333236, "grad_norm": 0.4569017556102374, "learning_rate": 3.620708299540416e-05, "loss": 0.4683, "step": 35785 }, { "epoch": 1.0449336506036409, "grad_norm": 0.48023259254314926, "learning_rate": 3.62043795620438e-05, "loss": 0.4446, "step": 35790 }, { "epoch": 1.045079633873958, "grad_norm": 0.448798451946459, "learning_rate": 3.620167612868343e-05, "loss": 0.4325, "step": 35795 }, { "epoch": 1.0452256171442753, "grad_norm": 0.5010115007018804, "learning_rate": 3.6198972695323065e-05, "loss": 0.4581, "step": 35800 }, { "epoch": 1.0453716004145925, "grad_norm": 0.51115108557374, "learning_rate": 3.619626926196269e-05, "loss": 0.4591, "step": 35805 }, { "epoch": 1.0455175836849098, "grad_norm": 0.4911210912071766, "learning_rate": 3.6193565828602326e-05, "loss": 0.4696, "step": 35810 }, { "epoch": 1.045663566955227, "grad_norm": 0.47234618155899494, "learning_rate": 3.619086239524196e-05, "loss": 0.438, "step": 35815 }, { "epoch": 1.0458095502255442, "grad_norm": 0.4902454683991678, "learning_rate": 3.618815896188159e-05, "loss": 0.4376, "step": 35820 }, { "epoch": 1.0459555334958615, "grad_norm": 0.5206243944026882, "learning_rate": 3.618545552852122e-05, "loss": 0.4702, "step": 35825 }, { "epoch": 1.0461015167661787, "grad_norm": 0.4676720575759324, "learning_rate": 3.6182752095160856e-05, "loss": 0.4371, "step": 35830 }, { "epoch": 1.046247500036496, "grad_norm": 0.4567697561737159, "learning_rate": 3.618004866180049e-05, "loss": 0.4443, "step": 35835 }, { "epoch": 1.0463934833068131, "grad_norm": 0.44353078504379306, "learning_rate": 3.617734522844012e-05, "loss": 0.4338, "step": 35840 }, { "epoch": 1.0465394665771304, "grad_norm": 0.4525461768049567, "learning_rate": 3.617464179507976e-05, "loss": 0.4079, "step": 35845 }, { "epoch": 1.0466854498474474, "grad_norm": 0.48796746177842387, "learning_rate": 3.6171938361719385e-05, "loss": 0.429, "step": 35850 }, { "epoch": 1.0468314331177646, "grad_norm": 0.5074152523379918, "learning_rate": 3.616923492835902e-05, "loss": 0.4313, "step": 35855 }, { "epoch": 1.0469774163880818, "grad_norm": 0.5061605189905588, "learning_rate": 3.616653149499865e-05, "loss": 0.4623, "step": 35860 }, { "epoch": 1.047123399658399, "grad_norm": 0.4686992195131351, "learning_rate": 3.616382806163828e-05, "loss": 0.4281, "step": 35865 }, { "epoch": 1.0472693829287163, "grad_norm": 0.507843088103929, "learning_rate": 3.6161124628277914e-05, "loss": 0.4427, "step": 35870 }, { "epoch": 1.0474153661990335, "grad_norm": 0.4836220161302858, "learning_rate": 3.615842119491755e-05, "loss": 0.4561, "step": 35875 }, { "epoch": 1.0475613494693508, "grad_norm": 0.4781253953016932, "learning_rate": 3.6155717761557175e-05, "loss": 0.4288, "step": 35880 }, { "epoch": 1.047707332739668, "grad_norm": 0.5287228503201638, "learning_rate": 3.615301432819681e-05, "loss": 0.4292, "step": 35885 }, { "epoch": 1.0478533160099852, "grad_norm": 0.5070864697924299, "learning_rate": 3.6150310894836443e-05, "loss": 0.4359, "step": 35890 }, { "epoch": 1.0479992992803024, "grad_norm": 0.44721072646010773, "learning_rate": 3.614760746147608e-05, "loss": 0.4192, "step": 35895 }, { "epoch": 1.0481452825506197, "grad_norm": 0.49864488383177713, "learning_rate": 3.614490402811571e-05, "loss": 0.4476, "step": 35900 }, { "epoch": 1.048291265820937, "grad_norm": 0.5069814450953021, "learning_rate": 3.6142200594755345e-05, "loss": 0.4278, "step": 35905 }, { "epoch": 1.0484372490912541, "grad_norm": 0.4120089920862398, "learning_rate": 3.613949716139497e-05, "loss": 0.477, "step": 35910 }, { "epoch": 1.0485832323615714, "grad_norm": 0.4925363612118924, "learning_rate": 3.613679372803461e-05, "loss": 0.4481, "step": 35915 }, { "epoch": 1.0487292156318886, "grad_norm": 0.4372957642089357, "learning_rate": 3.613409029467424e-05, "loss": 0.4691, "step": 35920 }, { "epoch": 1.0488751989022058, "grad_norm": 0.5094173092954207, "learning_rate": 3.613138686131387e-05, "loss": 0.4446, "step": 35925 }, { "epoch": 1.049021182172523, "grad_norm": 0.4775046869279353, "learning_rate": 3.61286834279535e-05, "loss": 0.4432, "step": 35930 }, { "epoch": 1.0491671654428403, "grad_norm": 0.47659662019089294, "learning_rate": 3.6125979994593136e-05, "loss": 0.4472, "step": 35935 }, { "epoch": 1.0493131487131575, "grad_norm": 0.5073857049083745, "learning_rate": 3.612327656123276e-05, "loss": 0.4414, "step": 35940 }, { "epoch": 1.0494591319834747, "grad_norm": 0.5152947795445764, "learning_rate": 3.61205731278724e-05, "loss": 0.4246, "step": 35945 }, { "epoch": 1.049605115253792, "grad_norm": 0.49253668630519526, "learning_rate": 3.611786969451203e-05, "loss": 0.4517, "step": 35950 }, { "epoch": 1.0497510985241092, "grad_norm": 0.49417884527841877, "learning_rate": 3.611516626115166e-05, "loss": 0.4548, "step": 35955 }, { "epoch": 1.0498970817944264, "grad_norm": 0.4993204034356202, "learning_rate": 3.61124628277913e-05, "loss": 0.4504, "step": 35960 }, { "epoch": 1.0500430650647437, "grad_norm": 0.485541166099729, "learning_rate": 3.610975939443093e-05, "loss": 0.4568, "step": 35965 }, { "epoch": 1.0501890483350609, "grad_norm": 0.45942913273599567, "learning_rate": 3.610705596107056e-05, "loss": 0.4633, "step": 35970 }, { "epoch": 1.0503350316053781, "grad_norm": 0.4939097987590261, "learning_rate": 3.6104352527710194e-05, "loss": 0.4618, "step": 35975 }, { "epoch": 1.0504810148756953, "grad_norm": 0.48083215983998556, "learning_rate": 3.610164909434983e-05, "loss": 0.4578, "step": 35980 }, { "epoch": 1.0506269981460126, "grad_norm": 0.48221315540118453, "learning_rate": 3.6098945660989456e-05, "loss": 0.4505, "step": 35985 }, { "epoch": 1.0507729814163298, "grad_norm": 0.5119256220426185, "learning_rate": 3.609624222762909e-05, "loss": 0.4546, "step": 35990 }, { "epoch": 1.0509189646866468, "grad_norm": 0.5337321593411382, "learning_rate": 3.6093538794268724e-05, "loss": 0.4459, "step": 35995 }, { "epoch": 1.051064947956964, "grad_norm": 0.4435100416932959, "learning_rate": 3.609083536090835e-05, "loss": 0.4379, "step": 36000 }, { "epoch": 1.0512109312272813, "grad_norm": 0.4546013358991116, "learning_rate": 3.6088131927547985e-05, "loss": 0.4408, "step": 36005 }, { "epoch": 1.0513569144975985, "grad_norm": 0.4903685770347423, "learning_rate": 3.608542849418762e-05, "loss": 0.4707, "step": 36010 }, { "epoch": 1.0515028977679157, "grad_norm": 0.48123373073111164, "learning_rate": 3.608272506082725e-05, "loss": 0.4613, "step": 36015 }, { "epoch": 1.051648881038233, "grad_norm": 0.4688618258976834, "learning_rate": 3.608002162746689e-05, "loss": 0.4237, "step": 36020 }, { "epoch": 1.0517948643085502, "grad_norm": 0.5103220084889233, "learning_rate": 3.607731819410652e-05, "loss": 0.454, "step": 36025 }, { "epoch": 1.0519408475788674, "grad_norm": 0.4964389308158139, "learning_rate": 3.607461476074615e-05, "loss": 0.4548, "step": 36030 }, { "epoch": 1.0520868308491846, "grad_norm": 0.4824304809111668, "learning_rate": 3.607191132738578e-05, "loss": 0.4264, "step": 36035 }, { "epoch": 1.0522328141195019, "grad_norm": 0.46371901890697054, "learning_rate": 3.6069207894025416e-05, "loss": 0.4122, "step": 36040 }, { "epoch": 1.052378797389819, "grad_norm": 0.5065762685463526, "learning_rate": 3.6066504460665044e-05, "loss": 0.4704, "step": 36045 }, { "epoch": 1.0525247806601363, "grad_norm": 0.500007151276172, "learning_rate": 3.606380102730468e-05, "loss": 0.4305, "step": 36050 }, { "epoch": 1.0526707639304536, "grad_norm": 0.48454169983764406, "learning_rate": 3.606109759394431e-05, "loss": 0.4412, "step": 36055 }, { "epoch": 1.0528167472007708, "grad_norm": 0.4425786593702238, "learning_rate": 3.605839416058394e-05, "loss": 0.4492, "step": 36060 }, { "epoch": 1.052962730471088, "grad_norm": 0.5173704834475185, "learning_rate": 3.605569072722357e-05, "loss": 0.448, "step": 36065 }, { "epoch": 1.0531087137414052, "grad_norm": 0.48469096431078, "learning_rate": 3.6052987293863214e-05, "loss": 0.4223, "step": 36070 }, { "epoch": 1.0532546970117225, "grad_norm": 0.47061734468978494, "learning_rate": 3.605028386050284e-05, "loss": 0.4449, "step": 36075 }, { "epoch": 1.0534006802820397, "grad_norm": 0.4746368546348524, "learning_rate": 3.6047580427142475e-05, "loss": 0.4428, "step": 36080 }, { "epoch": 1.053546663552357, "grad_norm": 0.4864383772911628, "learning_rate": 3.604487699378211e-05, "loss": 0.4588, "step": 36085 }, { "epoch": 1.0536926468226742, "grad_norm": 0.43510993252895364, "learning_rate": 3.6042173560421736e-05, "loss": 0.437, "step": 36090 }, { "epoch": 1.0538386300929914, "grad_norm": 0.5377910855385317, "learning_rate": 3.603947012706137e-05, "loss": 0.46, "step": 36095 }, { "epoch": 1.0539846133633086, "grad_norm": 0.5073215153099936, "learning_rate": 3.6036766693701004e-05, "loss": 0.4484, "step": 36100 }, { "epoch": 1.0541305966336258, "grad_norm": 0.5117875119972756, "learning_rate": 3.603406326034063e-05, "loss": 0.4193, "step": 36105 }, { "epoch": 1.054276579903943, "grad_norm": 0.4981967197644997, "learning_rate": 3.6031359826980265e-05, "loss": 0.462, "step": 36110 }, { "epoch": 1.0544225631742603, "grad_norm": 0.48973489227686234, "learning_rate": 3.60286563936199e-05, "loss": 0.4794, "step": 36115 }, { "epoch": 1.0545685464445775, "grad_norm": 0.4729403564968897, "learning_rate": 3.6025952960259527e-05, "loss": 0.4508, "step": 36120 }, { "epoch": 1.0547145297148948, "grad_norm": 0.49449654324721104, "learning_rate": 3.602324952689916e-05, "loss": 0.4229, "step": 36125 }, { "epoch": 1.054860512985212, "grad_norm": 0.488612577256024, "learning_rate": 3.60205460935388e-05, "loss": 0.4623, "step": 36130 }, { "epoch": 1.0550064962555292, "grad_norm": 0.49428654038239495, "learning_rate": 3.601784266017843e-05, "loss": 0.46, "step": 36135 }, { "epoch": 1.0551524795258462, "grad_norm": 0.45269785474841323, "learning_rate": 3.601513922681806e-05, "loss": 0.4354, "step": 36140 }, { "epoch": 1.0552984627961635, "grad_norm": 0.5131069122767759, "learning_rate": 3.60124357934577e-05, "loss": 0.4568, "step": 36145 }, { "epoch": 1.0554444460664807, "grad_norm": 0.47337065729369393, "learning_rate": 3.6009732360097324e-05, "loss": 0.4725, "step": 36150 }, { "epoch": 1.055590429336798, "grad_norm": 0.47559034761293845, "learning_rate": 3.600702892673696e-05, "loss": 0.4354, "step": 36155 }, { "epoch": 1.0557364126071151, "grad_norm": 0.5294413429069075, "learning_rate": 3.600432549337659e-05, "loss": 0.4396, "step": 36160 }, { "epoch": 1.0558823958774324, "grad_norm": 0.48752941856234866, "learning_rate": 3.600162206001622e-05, "loss": 0.4603, "step": 36165 }, { "epoch": 1.0560283791477496, "grad_norm": 0.42859300264777983, "learning_rate": 3.599891862665585e-05, "loss": 0.4102, "step": 36170 }, { "epoch": 1.0561743624180668, "grad_norm": 0.5165419491199399, "learning_rate": 3.599621519329549e-05, "loss": 0.4453, "step": 36175 }, { "epoch": 1.056320345688384, "grad_norm": 0.5880001568847648, "learning_rate": 3.5993511759935114e-05, "loss": 0.4331, "step": 36180 }, { "epoch": 1.0564663289587013, "grad_norm": 0.5362040906179971, "learning_rate": 3.5990808326574755e-05, "loss": 0.4379, "step": 36185 }, { "epoch": 1.0566123122290185, "grad_norm": 0.503172413570812, "learning_rate": 3.598810489321439e-05, "loss": 0.4332, "step": 36190 }, { "epoch": 1.0567582954993358, "grad_norm": 0.4388423085474948, "learning_rate": 3.5985401459854016e-05, "loss": 0.4024, "step": 36195 }, { "epoch": 1.056904278769653, "grad_norm": 0.4613335072216716, "learning_rate": 3.598269802649365e-05, "loss": 0.4537, "step": 36200 }, { "epoch": 1.0570502620399702, "grad_norm": 0.5008786094199457, "learning_rate": 3.5979994593133284e-05, "loss": 0.4438, "step": 36205 }, { "epoch": 1.0571962453102874, "grad_norm": 0.42696907740435086, "learning_rate": 3.597729115977291e-05, "loss": 0.434, "step": 36210 }, { "epoch": 1.0573422285806047, "grad_norm": 0.4642609800011428, "learning_rate": 3.5974587726412546e-05, "loss": 0.4337, "step": 36215 }, { "epoch": 1.057488211850922, "grad_norm": 0.4955925950030038, "learning_rate": 3.597188429305218e-05, "loss": 0.4728, "step": 36220 }, { "epoch": 1.0576341951212391, "grad_norm": 0.5033144610724799, "learning_rate": 3.596918085969181e-05, "loss": 0.4582, "step": 36225 }, { "epoch": 1.0577801783915564, "grad_norm": 0.5040994762281498, "learning_rate": 3.596647742633144e-05, "loss": 0.4583, "step": 36230 }, { "epoch": 1.0579261616618736, "grad_norm": 0.5294474983494251, "learning_rate": 3.5963773992971075e-05, "loss": 0.4685, "step": 36235 }, { "epoch": 1.0580721449321908, "grad_norm": 0.4919362448824863, "learning_rate": 3.596107055961071e-05, "loss": 0.4623, "step": 36240 }, { "epoch": 1.058218128202508, "grad_norm": 0.4806212579804166, "learning_rate": 3.595836712625034e-05, "loss": 0.4614, "step": 36245 }, { "epoch": 1.0583641114728253, "grad_norm": 0.4398298721508761, "learning_rate": 3.595566369288998e-05, "loss": 0.431, "step": 36250 }, { "epoch": 1.0585100947431425, "grad_norm": 0.4851251259546224, "learning_rate": 3.5952960259529604e-05, "loss": 0.4162, "step": 36255 }, { "epoch": 1.0586560780134597, "grad_norm": 0.48330471908953176, "learning_rate": 3.595025682616924e-05, "loss": 0.4553, "step": 36260 }, { "epoch": 1.058802061283777, "grad_norm": 0.4780324512998416, "learning_rate": 3.594755339280887e-05, "loss": 0.47, "step": 36265 }, { "epoch": 1.0589480445540942, "grad_norm": 0.5327656487767022, "learning_rate": 3.59448499594485e-05, "loss": 0.457, "step": 36270 }, { "epoch": 1.0590940278244114, "grad_norm": 0.4887756263005612, "learning_rate": 3.5942146526088134e-05, "loss": 0.4368, "step": 36275 }, { "epoch": 1.0592400110947287, "grad_norm": 0.4901170384598178, "learning_rate": 3.593944309272777e-05, "loss": 0.462, "step": 36280 }, { "epoch": 1.0593859943650457, "grad_norm": 0.4483858011343321, "learning_rate": 3.5936739659367395e-05, "loss": 0.4414, "step": 36285 }, { "epoch": 1.0595319776353629, "grad_norm": 0.47032990538502056, "learning_rate": 3.593403622600703e-05, "loss": 0.4669, "step": 36290 }, { "epoch": 1.0596779609056801, "grad_norm": 0.475005555985247, "learning_rate": 3.593133279264666e-05, "loss": 0.4479, "step": 36295 }, { "epoch": 1.0598239441759973, "grad_norm": 0.48201421233125374, "learning_rate": 3.59286293592863e-05, "loss": 0.4445, "step": 36300 }, { "epoch": 1.0599699274463146, "grad_norm": 0.5790842855314504, "learning_rate": 3.592592592592593e-05, "loss": 0.4834, "step": 36305 }, { "epoch": 1.0601159107166318, "grad_norm": 0.4968812763545031, "learning_rate": 3.592322249256556e-05, "loss": 0.4375, "step": 36310 }, { "epoch": 1.060261893986949, "grad_norm": 0.5437952745250044, "learning_rate": 3.592051905920519e-05, "loss": 0.4757, "step": 36315 }, { "epoch": 1.0604078772572663, "grad_norm": 0.49149434582942125, "learning_rate": 3.5917815625844826e-05, "loss": 0.4665, "step": 36320 }, { "epoch": 1.0605538605275835, "grad_norm": 0.4747804048524922, "learning_rate": 3.591511219248446e-05, "loss": 0.4371, "step": 36325 }, { "epoch": 1.0606998437979007, "grad_norm": 0.5411364380378371, "learning_rate": 3.591240875912409e-05, "loss": 0.4459, "step": 36330 }, { "epoch": 1.060845827068218, "grad_norm": 0.5012388263938895, "learning_rate": 3.590970532576372e-05, "loss": 0.4515, "step": 36335 }, { "epoch": 1.0609918103385352, "grad_norm": 0.4808398741590045, "learning_rate": 3.5907001892403355e-05, "loss": 0.4351, "step": 36340 }, { "epoch": 1.0611377936088524, "grad_norm": 0.4873074383997691, "learning_rate": 3.590429845904298e-05, "loss": 0.4294, "step": 36345 }, { "epoch": 1.0612837768791696, "grad_norm": 0.49031577367239243, "learning_rate": 3.5901595025682617e-05, "loss": 0.4515, "step": 36350 }, { "epoch": 1.0614297601494869, "grad_norm": 0.4993200210308824, "learning_rate": 3.589889159232225e-05, "loss": 0.4466, "step": 36355 }, { "epoch": 1.061575743419804, "grad_norm": 0.4596846686034964, "learning_rate": 3.5896188158961885e-05, "loss": 0.4419, "step": 36360 }, { "epoch": 1.0617217266901213, "grad_norm": 0.5059904552208603, "learning_rate": 3.589348472560152e-05, "loss": 0.476, "step": 36365 }, { "epoch": 1.0618677099604386, "grad_norm": 0.4709530632849702, "learning_rate": 3.5890781292241146e-05, "loss": 0.4256, "step": 36370 }, { "epoch": 1.0620136932307558, "grad_norm": 0.47189421707493784, "learning_rate": 3.588807785888078e-05, "loss": 0.4605, "step": 36375 }, { "epoch": 1.062159676501073, "grad_norm": 0.49471107622690913, "learning_rate": 3.5885374425520414e-05, "loss": 0.4571, "step": 36380 }, { "epoch": 1.0623056597713902, "grad_norm": 0.43151116670322254, "learning_rate": 3.588267099216005e-05, "loss": 0.4519, "step": 36385 }, { "epoch": 1.0624516430417075, "grad_norm": 0.507198609019797, "learning_rate": 3.5879967558799675e-05, "loss": 0.4533, "step": 36390 }, { "epoch": 1.0625976263120247, "grad_norm": 0.46959964126395237, "learning_rate": 3.587726412543931e-05, "loss": 0.4351, "step": 36395 }, { "epoch": 1.062743609582342, "grad_norm": 0.5052980157908625, "learning_rate": 3.587456069207894e-05, "loss": 0.4381, "step": 36400 }, { "epoch": 1.0628895928526592, "grad_norm": 0.4680350117847551, "learning_rate": 3.587185725871857e-05, "loss": 0.4454, "step": 36405 }, { "epoch": 1.0630355761229764, "grad_norm": 0.4906064040460302, "learning_rate": 3.586915382535821e-05, "loss": 0.4518, "step": 36410 }, { "epoch": 1.0631815593932936, "grad_norm": 0.5026164628959892, "learning_rate": 3.586645039199784e-05, "loss": 0.4535, "step": 36415 }, { "epoch": 1.0633275426636108, "grad_norm": 0.4425783402665667, "learning_rate": 3.586374695863747e-05, "loss": 0.4542, "step": 36420 }, { "epoch": 1.063473525933928, "grad_norm": 0.44207921904785086, "learning_rate": 3.5861043525277106e-05, "loss": 0.4582, "step": 36425 }, { "epoch": 1.063619509204245, "grad_norm": 0.5060734818277431, "learning_rate": 3.5858340091916734e-05, "loss": 0.4686, "step": 36430 }, { "epoch": 1.0637654924745623, "grad_norm": 0.6812077910895101, "learning_rate": 3.585563665855637e-05, "loss": 0.4564, "step": 36435 }, { "epoch": 1.0639114757448795, "grad_norm": 0.48281089441453545, "learning_rate": 3.5852933225196e-05, "loss": 0.4213, "step": 36440 }, { "epoch": 1.0640574590151968, "grad_norm": 0.5391463218132536, "learning_rate": 3.585022979183563e-05, "loss": 0.4465, "step": 36445 }, { "epoch": 1.064203442285514, "grad_norm": 0.5114845008789908, "learning_rate": 3.584752635847526e-05, "loss": 0.4436, "step": 36450 }, { "epoch": 1.0643494255558312, "grad_norm": 0.5075276899200777, "learning_rate": 3.58448229251149e-05, "loss": 0.4387, "step": 36455 }, { "epoch": 1.0644954088261485, "grad_norm": 0.46634609564260276, "learning_rate": 3.584211949175453e-05, "loss": 0.4334, "step": 36460 }, { "epoch": 1.0646413920964657, "grad_norm": 0.5160176911408744, "learning_rate": 3.583941605839416e-05, "loss": 0.444, "step": 36465 }, { "epoch": 1.064787375366783, "grad_norm": 0.49214201682443487, "learning_rate": 3.58367126250338e-05, "loss": 0.4502, "step": 36470 }, { "epoch": 1.0649333586371001, "grad_norm": 0.4973204684277942, "learning_rate": 3.5834009191673426e-05, "loss": 0.444, "step": 36475 }, { "epoch": 1.0650793419074174, "grad_norm": 0.4721748955296523, "learning_rate": 3.583130575831306e-05, "loss": 0.45, "step": 36480 }, { "epoch": 1.0652253251777346, "grad_norm": 0.5145923705940042, "learning_rate": 3.5828602324952694e-05, "loss": 0.4911, "step": 36485 }, { "epoch": 1.0653713084480518, "grad_norm": 0.5031756831112183, "learning_rate": 3.582589889159232e-05, "loss": 0.4722, "step": 36490 }, { "epoch": 1.065517291718369, "grad_norm": 0.5218619523985049, "learning_rate": 3.5823195458231955e-05, "loss": 0.4423, "step": 36495 }, { "epoch": 1.0656632749886863, "grad_norm": 0.5234316981340801, "learning_rate": 3.582049202487159e-05, "loss": 0.4394, "step": 36500 }, { "epoch": 1.0658092582590035, "grad_norm": 0.49423698161521734, "learning_rate": 3.581778859151122e-05, "loss": 0.4456, "step": 36505 }, { "epoch": 1.0659552415293208, "grad_norm": 0.520252185325464, "learning_rate": 3.581508515815085e-05, "loss": 0.4737, "step": 36510 }, { "epoch": 1.066101224799638, "grad_norm": 0.4435996329813464, "learning_rate": 3.5812381724790485e-05, "loss": 0.4606, "step": 36515 }, { "epoch": 1.0662472080699552, "grad_norm": 0.5089522643694345, "learning_rate": 3.580967829143012e-05, "loss": 0.4389, "step": 36520 }, { "epoch": 1.0663931913402724, "grad_norm": 0.4850095457456483, "learning_rate": 3.580697485806975e-05, "loss": 0.4246, "step": 36525 }, { "epoch": 1.0665391746105897, "grad_norm": 0.4834373032671675, "learning_rate": 3.580427142470939e-05, "loss": 0.4284, "step": 36530 }, { "epoch": 1.066685157880907, "grad_norm": 0.546185450874547, "learning_rate": 3.5801567991349014e-05, "loss": 0.4703, "step": 36535 }, { "epoch": 1.0668311411512241, "grad_norm": 0.5065681013320009, "learning_rate": 3.579886455798865e-05, "loss": 0.442, "step": 36540 }, { "epoch": 1.0669771244215414, "grad_norm": 0.5070355078135594, "learning_rate": 3.579616112462828e-05, "loss": 0.4273, "step": 36545 }, { "epoch": 1.0671231076918586, "grad_norm": 0.45406179926166285, "learning_rate": 3.579345769126791e-05, "loss": 0.4242, "step": 36550 }, { "epoch": 1.0672690909621758, "grad_norm": 0.46176546540437924, "learning_rate": 3.579075425790754e-05, "loss": 0.4173, "step": 36555 }, { "epoch": 1.067415074232493, "grad_norm": 0.4700516624423253, "learning_rate": 3.578805082454718e-05, "loss": 0.4673, "step": 36560 }, { "epoch": 1.0675610575028103, "grad_norm": 0.5343134466266982, "learning_rate": 3.5785347391186805e-05, "loss": 0.4649, "step": 36565 }, { "epoch": 1.0677070407731275, "grad_norm": 0.48272531500579546, "learning_rate": 3.578264395782644e-05, "loss": 0.455, "step": 36570 }, { "epoch": 1.0678530240434445, "grad_norm": 0.511186794028661, "learning_rate": 3.577994052446607e-05, "loss": 0.4352, "step": 36575 }, { "epoch": 1.0679990073137617, "grad_norm": 0.5385525823175084, "learning_rate": 3.5777237091105707e-05, "loss": 0.4498, "step": 36580 }, { "epoch": 1.068144990584079, "grad_norm": 0.4566494520060468, "learning_rate": 3.577453365774534e-05, "loss": 0.4463, "step": 36585 }, { "epoch": 1.0682909738543962, "grad_norm": 0.4637163568056678, "learning_rate": 3.5771830224384975e-05, "loss": 0.4419, "step": 36590 }, { "epoch": 1.0684369571247134, "grad_norm": 0.5248031186573798, "learning_rate": 3.57691267910246e-05, "loss": 0.4531, "step": 36595 }, { "epoch": 1.0685829403950307, "grad_norm": 0.4554911511031223, "learning_rate": 3.5766423357664236e-05, "loss": 0.436, "step": 36600 }, { "epoch": 1.0687289236653479, "grad_norm": 0.46505580950292047, "learning_rate": 3.576371992430387e-05, "loss": 0.4323, "step": 36605 }, { "epoch": 1.0688749069356651, "grad_norm": 0.46560830791306784, "learning_rate": 3.57610164909435e-05, "loss": 0.4551, "step": 36610 }, { "epoch": 1.0690208902059823, "grad_norm": 0.5114777386624421, "learning_rate": 3.575831305758313e-05, "loss": 0.4504, "step": 36615 }, { "epoch": 1.0691668734762996, "grad_norm": 0.4842314797180984, "learning_rate": 3.5755609624222765e-05, "loss": 0.4578, "step": 36620 }, { "epoch": 1.0693128567466168, "grad_norm": 0.5502164339941913, "learning_rate": 3.575290619086239e-05, "loss": 0.4493, "step": 36625 }, { "epoch": 1.069458840016934, "grad_norm": 0.4816523617737906, "learning_rate": 3.5750202757502026e-05, "loss": 0.4412, "step": 36630 }, { "epoch": 1.0696048232872513, "grad_norm": 0.435137996125795, "learning_rate": 3.574749932414166e-05, "loss": 0.4211, "step": 36635 }, { "epoch": 1.0697508065575685, "grad_norm": 0.49253073628419597, "learning_rate": 3.5744795890781294e-05, "loss": 0.4368, "step": 36640 }, { "epoch": 1.0698967898278857, "grad_norm": 0.4315880548572327, "learning_rate": 3.574209245742093e-05, "loss": 0.4284, "step": 36645 }, { "epoch": 1.070042773098203, "grad_norm": 0.42696279676258714, "learning_rate": 3.573938902406056e-05, "loss": 0.4347, "step": 36650 }, { "epoch": 1.0701887563685202, "grad_norm": 0.4666073444029581, "learning_rate": 3.573668559070019e-05, "loss": 0.4334, "step": 36655 }, { "epoch": 1.0703347396388374, "grad_norm": 0.5161103407666414, "learning_rate": 3.5733982157339824e-05, "loss": 0.4725, "step": 36660 }, { "epoch": 1.0704807229091546, "grad_norm": 0.48175676190125766, "learning_rate": 3.573127872397946e-05, "loss": 0.4033, "step": 36665 }, { "epoch": 1.0706267061794719, "grad_norm": 0.4632452578209457, "learning_rate": 3.5728575290619085e-05, "loss": 0.4168, "step": 36670 }, { "epoch": 1.070772689449789, "grad_norm": 0.4605966017822432, "learning_rate": 3.572587185725872e-05, "loss": 0.4174, "step": 36675 }, { "epoch": 1.0709186727201063, "grad_norm": 0.5019513571169848, "learning_rate": 3.572316842389835e-05, "loss": 0.4661, "step": 36680 }, { "epoch": 1.0710646559904236, "grad_norm": 0.48180506536087814, "learning_rate": 3.572046499053798e-05, "loss": 0.4312, "step": 36685 }, { "epoch": 1.0712106392607408, "grad_norm": 0.5375413427539484, "learning_rate": 3.5717761557177614e-05, "loss": 0.442, "step": 36690 }, { "epoch": 1.071356622531058, "grad_norm": 0.5281884722149647, "learning_rate": 3.5715058123817255e-05, "loss": 0.4103, "step": 36695 }, { "epoch": 1.0715026058013752, "grad_norm": 0.5091422298724056, "learning_rate": 3.571235469045688e-05, "loss": 0.4692, "step": 36700 }, { "epoch": 1.0716485890716925, "grad_norm": 0.46182394328996007, "learning_rate": 3.5709651257096516e-05, "loss": 0.399, "step": 36705 }, { "epoch": 1.0717945723420097, "grad_norm": 0.4806522257347416, "learning_rate": 3.570694782373615e-05, "loss": 0.465, "step": 36710 }, { "epoch": 1.071940555612327, "grad_norm": 0.5172132596977659, "learning_rate": 3.570424439037578e-05, "loss": 0.4477, "step": 36715 }, { "epoch": 1.072086538882644, "grad_norm": 0.4663834671816309, "learning_rate": 3.570154095701541e-05, "loss": 0.4388, "step": 36720 }, { "epoch": 1.0722325221529614, "grad_norm": 2.9941572745528657, "learning_rate": 3.5698837523655045e-05, "loss": 0.4585, "step": 36725 }, { "epoch": 1.0723785054232784, "grad_norm": 0.4933800560252615, "learning_rate": 3.569613409029467e-05, "loss": 0.4472, "step": 36730 }, { "epoch": 1.0725244886935956, "grad_norm": 0.5143252568622627, "learning_rate": 3.569343065693431e-05, "loss": 0.4323, "step": 36735 }, { "epoch": 1.0726704719639129, "grad_norm": 0.4545240617780906, "learning_rate": 3.569072722357394e-05, "loss": 0.4354, "step": 36740 }, { "epoch": 1.07281645523423, "grad_norm": 0.450448238274298, "learning_rate": 3.568802379021357e-05, "loss": 0.4538, "step": 36745 }, { "epoch": 1.0729624385045473, "grad_norm": 0.4914099686667458, "learning_rate": 3.568532035685321e-05, "loss": 0.4328, "step": 36750 }, { "epoch": 1.0731084217748645, "grad_norm": 0.5136289782647996, "learning_rate": 3.568261692349284e-05, "loss": 0.4523, "step": 36755 }, { "epoch": 1.0732544050451818, "grad_norm": 0.5126667448651596, "learning_rate": 3.567991349013247e-05, "loss": 0.465, "step": 36760 }, { "epoch": 1.073400388315499, "grad_norm": 0.4813322663517675, "learning_rate": 3.5677210056772104e-05, "loss": 0.4625, "step": 36765 }, { "epoch": 1.0735463715858162, "grad_norm": 0.5070259065033442, "learning_rate": 3.567450662341174e-05, "loss": 0.4475, "step": 36770 }, { "epoch": 1.0736923548561335, "grad_norm": 0.49151459085639854, "learning_rate": 3.5671803190051365e-05, "loss": 0.4447, "step": 36775 }, { "epoch": 1.0738383381264507, "grad_norm": 0.4735048920842646, "learning_rate": 3.5669099756691e-05, "loss": 0.4285, "step": 36780 }, { "epoch": 1.073984321396768, "grad_norm": 0.5137315414632263, "learning_rate": 3.566639632333063e-05, "loss": 0.4416, "step": 36785 }, { "epoch": 1.0741303046670851, "grad_norm": 0.46910539157482534, "learning_rate": 3.566369288997026e-05, "loss": 0.4513, "step": 36790 }, { "epoch": 1.0742762879374024, "grad_norm": 0.4978221305819988, "learning_rate": 3.5660989456609895e-05, "loss": 0.4646, "step": 36795 }, { "epoch": 1.0744222712077196, "grad_norm": 0.5158769298110957, "learning_rate": 3.565828602324953e-05, "loss": 0.457, "step": 36800 }, { "epoch": 1.0745682544780368, "grad_norm": 0.4947439177791396, "learning_rate": 3.5655582589889156e-05, "loss": 0.4266, "step": 36805 }, { "epoch": 1.074714237748354, "grad_norm": 0.4713107795176129, "learning_rate": 3.5652879156528797e-05, "loss": 0.4556, "step": 36810 }, { "epoch": 1.0748602210186713, "grad_norm": 0.45952305076385697, "learning_rate": 3.565017572316843e-05, "loss": 0.4051, "step": 36815 }, { "epoch": 1.0750062042889885, "grad_norm": 0.5188893811988678, "learning_rate": 3.564747228980806e-05, "loss": 0.4388, "step": 36820 }, { "epoch": 1.0751521875593057, "grad_norm": 0.45892921456505353, "learning_rate": 3.564476885644769e-05, "loss": 0.4367, "step": 36825 }, { "epoch": 1.075298170829623, "grad_norm": 0.5000291414792473, "learning_rate": 3.5642065423087326e-05, "loss": 0.4582, "step": 36830 }, { "epoch": 1.0754441540999402, "grad_norm": 0.4688131689616081, "learning_rate": 3.563936198972695e-05, "loss": 0.4531, "step": 36835 }, { "epoch": 1.0755901373702574, "grad_norm": 0.4922790365948632, "learning_rate": 3.563665855636659e-05, "loss": 0.5139, "step": 36840 }, { "epoch": 1.0757361206405747, "grad_norm": 0.47332428177932334, "learning_rate": 3.563395512300622e-05, "loss": 0.4245, "step": 36845 }, { "epoch": 1.075882103910892, "grad_norm": 0.4609222852202362, "learning_rate": 3.563125168964585e-05, "loss": 0.4638, "step": 36850 }, { "epoch": 1.0760280871812091, "grad_norm": 0.4829402058239499, "learning_rate": 3.562854825628548e-05, "loss": 0.4342, "step": 36855 }, { "epoch": 1.0761740704515264, "grad_norm": 0.48869509534464206, "learning_rate": 3.5625844822925116e-05, "loss": 0.4624, "step": 36860 }, { "epoch": 1.0763200537218434, "grad_norm": 0.47118095432351503, "learning_rate": 3.562314138956475e-05, "loss": 0.5433, "step": 36865 }, { "epoch": 1.0764660369921608, "grad_norm": 0.4594069691124179, "learning_rate": 3.5620437956204384e-05, "loss": 0.4651, "step": 36870 }, { "epoch": 1.0766120202624778, "grad_norm": 0.4564674203666557, "learning_rate": 3.561773452284402e-05, "loss": 0.4428, "step": 36875 }, { "epoch": 1.076758003532795, "grad_norm": 0.5176507633223613, "learning_rate": 3.5615031089483646e-05, "loss": 0.4399, "step": 36880 }, { "epoch": 1.0769039868031123, "grad_norm": 0.49182581340277765, "learning_rate": 3.561232765612328e-05, "loss": 0.4715, "step": 36885 }, { "epoch": 1.0770499700734295, "grad_norm": 0.46545815409778346, "learning_rate": 3.5609624222762914e-05, "loss": 0.4397, "step": 36890 }, { "epoch": 1.0771959533437467, "grad_norm": 0.5013660549824819, "learning_rate": 3.560692078940254e-05, "loss": 0.4422, "step": 36895 }, { "epoch": 1.077341936614064, "grad_norm": 0.4901627424389813, "learning_rate": 3.5604217356042175e-05, "loss": 0.4393, "step": 36900 }, { "epoch": 1.0774879198843812, "grad_norm": 0.5981729947104049, "learning_rate": 3.560151392268181e-05, "loss": 0.4484, "step": 36905 }, { "epoch": 1.0776339031546984, "grad_norm": 0.4705746499487919, "learning_rate": 3.5598810489321436e-05, "loss": 0.4556, "step": 36910 }, { "epoch": 1.0777798864250157, "grad_norm": 0.4886627834143494, "learning_rate": 3.559610705596107e-05, "loss": 0.4356, "step": 36915 }, { "epoch": 1.0779258696953329, "grad_norm": 0.5055870251651516, "learning_rate": 3.5593403622600704e-05, "loss": 0.4705, "step": 36920 }, { "epoch": 1.0780718529656501, "grad_norm": 0.5363904727163545, "learning_rate": 3.559070018924034e-05, "loss": 0.4686, "step": 36925 }, { "epoch": 1.0782178362359673, "grad_norm": 0.4637081865443676, "learning_rate": 3.558799675587997e-05, "loss": 0.4419, "step": 36930 }, { "epoch": 1.0783638195062846, "grad_norm": 0.46280323354332403, "learning_rate": 3.5585293322519606e-05, "loss": 0.4223, "step": 36935 }, { "epoch": 1.0785098027766018, "grad_norm": 0.5059932686175148, "learning_rate": 3.5582589889159233e-05, "loss": 0.435, "step": 36940 }, { "epoch": 1.078655786046919, "grad_norm": 0.466703119134932, "learning_rate": 3.557988645579887e-05, "loss": 0.4467, "step": 36945 }, { "epoch": 1.0788017693172363, "grad_norm": 0.5071374082809074, "learning_rate": 3.55771830224385e-05, "loss": 0.4391, "step": 36950 }, { "epoch": 1.0789477525875535, "grad_norm": 0.4881315093138636, "learning_rate": 3.557447958907813e-05, "loss": 0.4661, "step": 36955 }, { "epoch": 1.0790937358578707, "grad_norm": 0.5186456358357271, "learning_rate": 3.557177615571776e-05, "loss": 0.4528, "step": 36960 }, { "epoch": 1.079239719128188, "grad_norm": 0.5011830535849247, "learning_rate": 3.55690727223574e-05, "loss": 0.4334, "step": 36965 }, { "epoch": 1.0793857023985052, "grad_norm": 0.4684367281674236, "learning_rate": 3.5566369288997024e-05, "loss": 0.446, "step": 36970 }, { "epoch": 1.0795316856688224, "grad_norm": 0.5289139076706313, "learning_rate": 3.556366585563666e-05, "loss": 0.462, "step": 36975 }, { "epoch": 1.0796776689391396, "grad_norm": 0.47797335584271594, "learning_rate": 3.556096242227629e-05, "loss": 0.4371, "step": 36980 }, { "epoch": 1.0798236522094569, "grad_norm": 0.5416063105632183, "learning_rate": 3.5558258988915926e-05, "loss": 0.4739, "step": 36985 }, { "epoch": 1.079969635479774, "grad_norm": 0.5019231133051253, "learning_rate": 3.555555555555556e-05, "loss": 0.4779, "step": 36990 }, { "epoch": 1.0801156187500913, "grad_norm": 0.46375587429056675, "learning_rate": 3.555285212219519e-05, "loss": 0.4619, "step": 36995 }, { "epoch": 1.0802616020204086, "grad_norm": 0.4586214321401256, "learning_rate": 3.555014868883482e-05, "loss": 0.4485, "step": 37000 }, { "epoch": 1.0804075852907258, "grad_norm": 0.45486451046610554, "learning_rate": 3.5547445255474455e-05, "loss": 0.4351, "step": 37005 }, { "epoch": 1.0805535685610428, "grad_norm": 0.510762829185105, "learning_rate": 3.554474182211409e-05, "loss": 0.4373, "step": 37010 }, { "epoch": 1.0806995518313602, "grad_norm": 0.4816693827320039, "learning_rate": 3.5542038388753716e-05, "loss": 0.4191, "step": 37015 }, { "epoch": 1.0808455351016772, "grad_norm": 0.5288402578954319, "learning_rate": 3.553933495539335e-05, "loss": 0.4552, "step": 37020 }, { "epoch": 1.0809915183719945, "grad_norm": 0.4968066410013802, "learning_rate": 3.5536631522032984e-05, "loss": 0.4542, "step": 37025 }, { "epoch": 1.0811375016423117, "grad_norm": 0.5137057529586396, "learning_rate": 3.553392808867261e-05, "loss": 0.4432, "step": 37030 }, { "epoch": 1.081283484912629, "grad_norm": 0.516319545973264, "learning_rate": 3.553122465531225e-05, "loss": 0.4447, "step": 37035 }, { "epoch": 1.0814294681829462, "grad_norm": 0.4837955606688444, "learning_rate": 3.552852122195188e-05, "loss": 0.4578, "step": 37040 }, { "epoch": 1.0815754514532634, "grad_norm": 0.49731466077675074, "learning_rate": 3.5525817788591514e-05, "loss": 0.4438, "step": 37045 }, { "epoch": 1.0817214347235806, "grad_norm": 0.4836573821386738, "learning_rate": 3.552311435523115e-05, "loss": 0.4635, "step": 37050 }, { "epoch": 1.0818674179938979, "grad_norm": 0.5036193903414661, "learning_rate": 3.5520410921870775e-05, "loss": 0.4173, "step": 37055 }, { "epoch": 1.082013401264215, "grad_norm": 0.4535761618171738, "learning_rate": 3.551770748851041e-05, "loss": 0.4332, "step": 37060 }, { "epoch": 1.0821593845345323, "grad_norm": 0.5045925258951958, "learning_rate": 3.551500405515004e-05, "loss": 0.4459, "step": 37065 }, { "epoch": 1.0823053678048495, "grad_norm": 0.48231737494047167, "learning_rate": 3.551230062178967e-05, "loss": 0.459, "step": 37070 }, { "epoch": 1.0824513510751668, "grad_norm": 0.45107184022596003, "learning_rate": 3.5509597188429304e-05, "loss": 0.4197, "step": 37075 }, { "epoch": 1.082597334345484, "grad_norm": 0.4725949974877658, "learning_rate": 3.550689375506894e-05, "loss": 0.4378, "step": 37080 }, { "epoch": 1.0827433176158012, "grad_norm": 0.4846469773935199, "learning_rate": 3.550419032170857e-05, "loss": 0.4562, "step": 37085 }, { "epoch": 1.0828893008861185, "grad_norm": 0.5042325507008504, "learning_rate": 3.5501486888348206e-05, "loss": 0.4634, "step": 37090 }, { "epoch": 1.0830352841564357, "grad_norm": 0.45347977786973986, "learning_rate": 3.549878345498784e-05, "loss": 0.4397, "step": 37095 }, { "epoch": 1.083181267426753, "grad_norm": 0.4981977336826187, "learning_rate": 3.549608002162747e-05, "loss": 0.4613, "step": 37100 }, { "epoch": 1.0833272506970701, "grad_norm": 0.5130070176176063, "learning_rate": 3.54933765882671e-05, "loss": 0.4392, "step": 37105 }, { "epoch": 1.0834732339673874, "grad_norm": 0.5043746142039965, "learning_rate": 3.5490673154906736e-05, "loss": 0.4806, "step": 37110 }, { "epoch": 1.0836192172377046, "grad_norm": 0.45370963887918114, "learning_rate": 3.548796972154636e-05, "loss": 0.4242, "step": 37115 }, { "epoch": 1.0837652005080218, "grad_norm": 0.48054394274035883, "learning_rate": 3.5485266288186e-05, "loss": 0.4599, "step": 37120 }, { "epoch": 1.083911183778339, "grad_norm": 0.46121113067723857, "learning_rate": 3.548256285482563e-05, "loss": 0.459, "step": 37125 }, { "epoch": 1.0840571670486563, "grad_norm": 0.5417621526996711, "learning_rate": 3.547985942146526e-05, "loss": 0.4571, "step": 37130 }, { "epoch": 1.0842031503189735, "grad_norm": 0.496177718234986, "learning_rate": 3.547715598810489e-05, "loss": 0.4905, "step": 37135 }, { "epoch": 1.0843491335892907, "grad_norm": 0.4491040387739239, "learning_rate": 3.5474452554744526e-05, "loss": 0.4446, "step": 37140 }, { "epoch": 1.084495116859608, "grad_norm": 0.4858774262934631, "learning_rate": 3.547174912138416e-05, "loss": 0.4451, "step": 37145 }, { "epoch": 1.0846411001299252, "grad_norm": 0.5085500961330546, "learning_rate": 3.5469045688023794e-05, "loss": 0.4422, "step": 37150 }, { "epoch": 1.0847870834002424, "grad_norm": 0.5779714317021447, "learning_rate": 3.546634225466343e-05, "loss": 0.4479, "step": 37155 }, { "epoch": 1.0849330666705597, "grad_norm": 0.4456024005797675, "learning_rate": 3.5463638821303055e-05, "loss": 0.4741, "step": 37160 }, { "epoch": 1.0850790499408767, "grad_norm": 0.4993429949831491, "learning_rate": 3.546093538794269e-05, "loss": 0.4718, "step": 37165 }, { "epoch": 1.085225033211194, "grad_norm": 0.5053713631020769, "learning_rate": 3.5458231954582323e-05, "loss": 0.4299, "step": 37170 }, { "epoch": 1.0853710164815111, "grad_norm": 0.44744960183270716, "learning_rate": 3.545552852122195e-05, "loss": 0.4424, "step": 37175 }, { "epoch": 1.0855169997518284, "grad_norm": 0.4459870692624293, "learning_rate": 3.5452825087861585e-05, "loss": 0.4597, "step": 37180 }, { "epoch": 1.0856629830221456, "grad_norm": 0.46908483339775625, "learning_rate": 3.545012165450122e-05, "loss": 0.4341, "step": 37185 }, { "epoch": 1.0858089662924628, "grad_norm": 0.5054025688560265, "learning_rate": 3.5447418221140846e-05, "loss": 0.4418, "step": 37190 }, { "epoch": 1.08595494956278, "grad_norm": 0.5181482986115644, "learning_rate": 3.544471478778048e-05, "loss": 0.4535, "step": 37195 }, { "epoch": 1.0861009328330973, "grad_norm": 0.5414892799484342, "learning_rate": 3.5442011354420114e-05, "loss": 0.4587, "step": 37200 }, { "epoch": 1.0862469161034145, "grad_norm": 0.4595310417231588, "learning_rate": 3.543930792105975e-05, "loss": 0.4189, "step": 37205 }, { "epoch": 1.0863928993737317, "grad_norm": 0.5079307246972332, "learning_rate": 3.543660448769938e-05, "loss": 0.4395, "step": 37210 }, { "epoch": 1.086538882644049, "grad_norm": 0.4674884127652365, "learning_rate": 3.5433901054339016e-05, "loss": 0.445, "step": 37215 }, { "epoch": 1.0866848659143662, "grad_norm": 0.469529558581289, "learning_rate": 3.543119762097864e-05, "loss": 0.4299, "step": 37220 }, { "epoch": 1.0868308491846834, "grad_norm": 12.77431735264364, "learning_rate": 3.542849418761828e-05, "loss": 0.4588, "step": 37225 }, { "epoch": 1.0869768324550007, "grad_norm": 0.4629276316072643, "learning_rate": 3.542579075425791e-05, "loss": 0.4289, "step": 37230 }, { "epoch": 1.0871228157253179, "grad_norm": 0.49169700494207036, "learning_rate": 3.542308732089754e-05, "loss": 0.4322, "step": 37235 }, { "epoch": 1.087268798995635, "grad_norm": 0.4879582063078043, "learning_rate": 3.542038388753717e-05, "loss": 0.4582, "step": 37240 }, { "epoch": 1.0874147822659523, "grad_norm": 0.4315567366796098, "learning_rate": 3.5417680454176806e-05, "loss": 0.4417, "step": 37245 }, { "epoch": 1.0875607655362696, "grad_norm": 0.46282404984103837, "learning_rate": 3.5414977020816434e-05, "loss": 0.4355, "step": 37250 }, { "epoch": 1.0877067488065868, "grad_norm": 0.48210249508950304, "learning_rate": 3.541227358745607e-05, "loss": 0.4577, "step": 37255 }, { "epoch": 1.087852732076904, "grad_norm": 0.48538451559287776, "learning_rate": 3.540957015409571e-05, "loss": 0.436, "step": 37260 }, { "epoch": 1.0879987153472213, "grad_norm": 0.5281448697492138, "learning_rate": 3.5406866720735336e-05, "loss": 0.4703, "step": 37265 }, { "epoch": 1.0881446986175385, "grad_norm": 0.49090678577380237, "learning_rate": 3.540416328737497e-05, "loss": 0.4371, "step": 37270 }, { "epoch": 1.0882906818878557, "grad_norm": 0.49298119910888266, "learning_rate": 3.5401459854014604e-05, "loss": 0.4604, "step": 37275 }, { "epoch": 1.088436665158173, "grad_norm": 0.45052583927829715, "learning_rate": 3.539875642065423e-05, "loss": 0.4193, "step": 37280 }, { "epoch": 1.0885826484284902, "grad_norm": 0.4431646814604032, "learning_rate": 3.5396052987293865e-05, "loss": 0.4453, "step": 37285 }, { "epoch": 1.0887286316988074, "grad_norm": 0.48826935729560383, "learning_rate": 3.53933495539335e-05, "loss": 0.4243, "step": 37290 }, { "epoch": 1.0888746149691246, "grad_norm": 0.4635183782589664, "learning_rate": 3.5390646120573126e-05, "loss": 0.4319, "step": 37295 }, { "epoch": 1.0890205982394419, "grad_norm": 0.5053079068323527, "learning_rate": 3.538794268721276e-05, "loss": 0.4555, "step": 37300 }, { "epoch": 1.089166581509759, "grad_norm": 0.45409274967154156, "learning_rate": 3.5385239253852394e-05, "loss": 0.4385, "step": 37305 }, { "epoch": 1.089312564780076, "grad_norm": 0.4979230213900829, "learning_rate": 3.538253582049202e-05, "loss": 0.4317, "step": 37310 }, { "epoch": 1.0894585480503933, "grad_norm": 0.4560845650441257, "learning_rate": 3.5379832387131656e-05, "loss": 0.4379, "step": 37315 }, { "epoch": 1.0896045313207106, "grad_norm": 0.4599194753584152, "learning_rate": 3.5377128953771296e-05, "loss": 0.4236, "step": 37320 }, { "epoch": 1.0897505145910278, "grad_norm": 0.4986784326048004, "learning_rate": 3.5374425520410924e-05, "loss": 0.4552, "step": 37325 }, { "epoch": 1.089896497861345, "grad_norm": 0.5054334254995615, "learning_rate": 3.537172208705056e-05, "loss": 0.4711, "step": 37330 }, { "epoch": 1.0900424811316622, "grad_norm": 0.49469390187137685, "learning_rate": 3.536901865369019e-05, "loss": 0.4404, "step": 37335 }, { "epoch": 1.0901884644019795, "grad_norm": 0.5124350443295606, "learning_rate": 3.536631522032982e-05, "loss": 0.4533, "step": 37340 }, { "epoch": 1.0903344476722967, "grad_norm": 0.4837052444682092, "learning_rate": 3.536361178696945e-05, "loss": 0.4341, "step": 37345 }, { "epoch": 1.090480430942614, "grad_norm": 0.48265003477082674, "learning_rate": 3.536090835360909e-05, "loss": 0.4296, "step": 37350 }, { "epoch": 1.0906264142129312, "grad_norm": 0.4853341920927925, "learning_rate": 3.5358204920248714e-05, "loss": 0.4652, "step": 37355 }, { "epoch": 1.0907723974832484, "grad_norm": 0.4126294969146983, "learning_rate": 3.535550148688835e-05, "loss": 0.4186, "step": 37360 }, { "epoch": 1.0909183807535656, "grad_norm": 0.45893306414707946, "learning_rate": 3.535279805352798e-05, "loss": 0.4143, "step": 37365 }, { "epoch": 1.0910643640238828, "grad_norm": 0.4981792415258338, "learning_rate": 3.535009462016761e-05, "loss": 0.4394, "step": 37370 }, { "epoch": 1.0912103472942, "grad_norm": 0.4955980344717909, "learning_rate": 3.534739118680725e-05, "loss": 0.4405, "step": 37375 }, { "epoch": 1.0913563305645173, "grad_norm": 0.4969427432400289, "learning_rate": 3.5344687753446884e-05, "loss": 0.4542, "step": 37380 }, { "epoch": 1.0915023138348345, "grad_norm": 0.47718940100472285, "learning_rate": 3.534198432008651e-05, "loss": 0.4745, "step": 37385 }, { "epoch": 1.0916482971051518, "grad_norm": 0.5017722183913684, "learning_rate": 3.5339280886726145e-05, "loss": 0.4409, "step": 37390 }, { "epoch": 1.091794280375469, "grad_norm": 0.4912506981790292, "learning_rate": 3.533657745336578e-05, "loss": 0.4389, "step": 37395 }, { "epoch": 1.0919402636457862, "grad_norm": 0.5029948184241738, "learning_rate": 3.5333874020005407e-05, "loss": 0.4286, "step": 37400 }, { "epoch": 1.0920862469161035, "grad_norm": 0.49090275296709296, "learning_rate": 3.533117058664504e-05, "loss": 0.4479, "step": 37405 }, { "epoch": 1.0922322301864207, "grad_norm": 0.5117310045921043, "learning_rate": 3.5328467153284675e-05, "loss": 0.4595, "step": 37410 }, { "epoch": 1.092378213456738, "grad_norm": 0.4511581639273934, "learning_rate": 3.53257637199243e-05, "loss": 0.4257, "step": 37415 }, { "epoch": 1.0925241967270551, "grad_norm": 0.5178287579496125, "learning_rate": 3.5323060286563936e-05, "loss": 0.4867, "step": 37420 }, { "epoch": 1.0926701799973724, "grad_norm": 0.4772312824650009, "learning_rate": 3.532035685320357e-05, "loss": 0.4404, "step": 37425 }, { "epoch": 1.0928161632676896, "grad_norm": 0.45577133790678104, "learning_rate": 3.5317653419843204e-05, "loss": 0.4464, "step": 37430 }, { "epoch": 1.0929621465380068, "grad_norm": 0.5149320244260119, "learning_rate": 3.531494998648284e-05, "loss": 0.4571, "step": 37435 }, { "epoch": 1.093108129808324, "grad_norm": 0.5157816368530805, "learning_rate": 3.531224655312247e-05, "loss": 0.4615, "step": 37440 }, { "epoch": 1.0932541130786413, "grad_norm": 0.4643748951287963, "learning_rate": 3.53095431197621e-05, "loss": 0.4553, "step": 37445 }, { "epoch": 1.0934000963489585, "grad_norm": 0.5034438004309704, "learning_rate": 3.530683968640173e-05, "loss": 0.4389, "step": 37450 }, { "epoch": 1.0935460796192755, "grad_norm": 0.5077588870159562, "learning_rate": 3.530413625304137e-05, "loss": 0.4611, "step": 37455 }, { "epoch": 1.0936920628895928, "grad_norm": 0.476251294819665, "learning_rate": 3.5301432819680994e-05, "loss": 0.4427, "step": 37460 }, { "epoch": 1.09383804615991, "grad_norm": 0.45681918773878555, "learning_rate": 3.529872938632063e-05, "loss": 0.4246, "step": 37465 }, { "epoch": 1.0939840294302272, "grad_norm": 0.5544778065477023, "learning_rate": 3.529602595296026e-05, "loss": 0.457, "step": 37470 }, { "epoch": 1.0941300127005444, "grad_norm": 0.5680999890915573, "learning_rate": 3.529332251959989e-05, "loss": 0.4937, "step": 37475 }, { "epoch": 1.0942759959708617, "grad_norm": 0.41264788680957626, "learning_rate": 3.5290619086239524e-05, "loss": 0.4313, "step": 37480 }, { "epoch": 1.094421979241179, "grad_norm": 0.4489157420473313, "learning_rate": 3.528791565287916e-05, "loss": 0.4256, "step": 37485 }, { "epoch": 1.0945679625114961, "grad_norm": 0.5038980549506843, "learning_rate": 3.528521221951879e-05, "loss": 0.4747, "step": 37490 }, { "epoch": 1.0947139457818134, "grad_norm": 0.47427021541563164, "learning_rate": 3.5282508786158426e-05, "loss": 0.4128, "step": 37495 }, { "epoch": 1.0948599290521306, "grad_norm": 0.48889069018599624, "learning_rate": 3.527980535279806e-05, "loss": 0.4353, "step": 37500 }, { "epoch": 1.0950059123224478, "grad_norm": 0.45049603382841485, "learning_rate": 3.527710191943769e-05, "loss": 0.4456, "step": 37505 }, { "epoch": 1.095151895592765, "grad_norm": 0.540785490224129, "learning_rate": 3.527439848607732e-05, "loss": 0.4502, "step": 37510 }, { "epoch": 1.0952978788630823, "grad_norm": 0.5240108662875897, "learning_rate": 3.5271695052716955e-05, "loss": 0.42, "step": 37515 }, { "epoch": 1.0954438621333995, "grad_norm": 0.502235760953174, "learning_rate": 3.526899161935658e-05, "loss": 0.4354, "step": 37520 }, { "epoch": 1.0955898454037167, "grad_norm": 0.4930862375372053, "learning_rate": 3.5266288185996216e-05, "loss": 0.4415, "step": 37525 }, { "epoch": 1.095735828674034, "grad_norm": 0.5103579785761343, "learning_rate": 3.526358475263585e-05, "loss": 0.4516, "step": 37530 }, { "epoch": 1.0958818119443512, "grad_norm": 0.49653374770737835, "learning_rate": 3.526088131927548e-05, "loss": 0.4648, "step": 37535 }, { "epoch": 1.0960277952146684, "grad_norm": 0.43214654653738294, "learning_rate": 3.525817788591511e-05, "loss": 0.4347, "step": 37540 }, { "epoch": 1.0961737784849857, "grad_norm": 0.4935475598825564, "learning_rate": 3.5255474452554745e-05, "loss": 0.4438, "step": 37545 }, { "epoch": 1.0963197617553029, "grad_norm": 0.557982208461803, "learning_rate": 3.525277101919438e-05, "loss": 0.4739, "step": 37550 }, { "epoch": 1.09646574502562, "grad_norm": 0.5190992158482751, "learning_rate": 3.5250067585834014e-05, "loss": 0.4549, "step": 37555 }, { "epoch": 1.0966117282959373, "grad_norm": 0.5070530714822313, "learning_rate": 3.524736415247365e-05, "loss": 0.4494, "step": 37560 }, { "epoch": 1.0967577115662546, "grad_norm": 0.4802885541387296, "learning_rate": 3.5244660719113275e-05, "loss": 0.4584, "step": 37565 }, { "epoch": 1.0969036948365718, "grad_norm": 0.5041525353694736, "learning_rate": 3.524195728575291e-05, "loss": 0.4169, "step": 37570 }, { "epoch": 1.097049678106889, "grad_norm": 0.5169421109888707, "learning_rate": 3.523925385239254e-05, "loss": 0.4493, "step": 37575 }, { "epoch": 1.0971956613772063, "grad_norm": 0.6352195296529699, "learning_rate": 3.523655041903217e-05, "loss": 0.4755, "step": 37580 }, { "epoch": 1.0973416446475235, "grad_norm": 0.44460134811515023, "learning_rate": 3.5233846985671804e-05, "loss": 0.466, "step": 37585 }, { "epoch": 1.0974876279178407, "grad_norm": 0.452015069510055, "learning_rate": 3.523114355231144e-05, "loss": 0.3948, "step": 37590 }, { "epoch": 1.097633611188158, "grad_norm": 0.5019426570778768, "learning_rate": 3.5228440118951065e-05, "loss": 0.4886, "step": 37595 }, { "epoch": 1.097779594458475, "grad_norm": 0.46860116713132866, "learning_rate": 3.5225736685590706e-05, "loss": 0.4405, "step": 37600 }, { "epoch": 1.0979255777287922, "grad_norm": 0.49331369518412566, "learning_rate": 3.522303325223033e-05, "loss": 0.4729, "step": 37605 }, { "epoch": 1.0980715609991094, "grad_norm": 0.45657269426138314, "learning_rate": 3.522032981886997e-05, "loss": 0.4342, "step": 37610 }, { "epoch": 1.0982175442694266, "grad_norm": 0.5120403661098099, "learning_rate": 3.52176263855096e-05, "loss": 0.4511, "step": 37615 }, { "epoch": 1.0983635275397439, "grad_norm": 0.46492249157592275, "learning_rate": 3.521492295214923e-05, "loss": 0.4486, "step": 37620 }, { "epoch": 1.098509510810061, "grad_norm": 0.49956546949370056, "learning_rate": 3.521221951878886e-05, "loss": 0.4415, "step": 37625 }, { "epoch": 1.0986554940803783, "grad_norm": 0.4429715071975797, "learning_rate": 3.5209516085428497e-05, "loss": 0.4152, "step": 37630 }, { "epoch": 1.0988014773506956, "grad_norm": 0.46794721941515904, "learning_rate": 3.520681265206813e-05, "loss": 0.4368, "step": 37635 }, { "epoch": 1.0989474606210128, "grad_norm": 0.45714131021209875, "learning_rate": 3.520410921870776e-05, "loss": 0.4184, "step": 37640 }, { "epoch": 1.09909344389133, "grad_norm": 0.4789773243905403, "learning_rate": 3.520140578534739e-05, "loss": 0.4416, "step": 37645 }, { "epoch": 1.0992394271616472, "grad_norm": 0.5321554580139474, "learning_rate": 3.5198702351987026e-05, "loss": 0.4369, "step": 37650 }, { "epoch": 1.0993854104319645, "grad_norm": 0.4965667072144064, "learning_rate": 3.519599891862665e-05, "loss": 0.4564, "step": 37655 }, { "epoch": 1.0995313937022817, "grad_norm": 0.44549279681720017, "learning_rate": 3.5193295485266294e-05, "loss": 0.4128, "step": 37660 }, { "epoch": 1.099677376972599, "grad_norm": 0.49447286603833546, "learning_rate": 3.519059205190592e-05, "loss": 0.4793, "step": 37665 }, { "epoch": 1.0998233602429162, "grad_norm": 0.49925353244700194, "learning_rate": 3.5187888618545555e-05, "loss": 0.4504, "step": 37670 }, { "epoch": 1.0999693435132334, "grad_norm": 0.48317045666101177, "learning_rate": 3.518518518518519e-05, "loss": 0.4464, "step": 37675 }, { "epoch": 1.1001153267835506, "grad_norm": 0.483149025976519, "learning_rate": 3.5182481751824816e-05, "loss": 0.4412, "step": 37680 }, { "epoch": 1.1002613100538678, "grad_norm": 0.5026983312398298, "learning_rate": 3.517977831846445e-05, "loss": 0.4381, "step": 37685 }, { "epoch": 1.100407293324185, "grad_norm": 0.40398189734653916, "learning_rate": 3.5177074885104084e-05, "loss": 0.3974, "step": 37690 }, { "epoch": 1.1005532765945023, "grad_norm": 0.480327703574939, "learning_rate": 3.517437145174372e-05, "loss": 0.4486, "step": 37695 }, { "epoch": 1.1006992598648195, "grad_norm": 0.5385680582822512, "learning_rate": 3.5171668018383346e-05, "loss": 0.4504, "step": 37700 }, { "epoch": 1.1008452431351368, "grad_norm": 0.4976754409481508, "learning_rate": 3.516896458502298e-05, "loss": 0.4341, "step": 37705 }, { "epoch": 1.100991226405454, "grad_norm": 0.5013355056973593, "learning_rate": 3.5166261151662614e-05, "loss": 0.4593, "step": 37710 }, { "epoch": 1.1011372096757712, "grad_norm": 0.5209097450371619, "learning_rate": 3.516355771830225e-05, "loss": 0.4749, "step": 37715 }, { "epoch": 1.1012831929460885, "grad_norm": 0.4680386078078925, "learning_rate": 3.516085428494188e-05, "loss": 0.4059, "step": 37720 }, { "epoch": 1.1014291762164057, "grad_norm": 0.5158734014310473, "learning_rate": 3.515815085158151e-05, "loss": 0.4671, "step": 37725 }, { "epoch": 1.101575159486723, "grad_norm": 0.47250104238700275, "learning_rate": 3.515544741822114e-05, "loss": 0.445, "step": 37730 }, { "epoch": 1.1017211427570401, "grad_norm": 0.5172685877630977, "learning_rate": 3.515274398486078e-05, "loss": 0.4777, "step": 37735 }, { "epoch": 1.1018671260273574, "grad_norm": 0.5109676524277142, "learning_rate": 3.5150040551500404e-05, "loss": 0.4341, "step": 37740 }, { "epoch": 1.1020131092976744, "grad_norm": 0.5206985716696383, "learning_rate": 3.514733711814004e-05, "loss": 0.4421, "step": 37745 }, { "epoch": 1.1021590925679916, "grad_norm": 0.5100276838536043, "learning_rate": 3.514463368477967e-05, "loss": 0.4496, "step": 37750 }, { "epoch": 1.1023050758383088, "grad_norm": 0.4792469534688615, "learning_rate": 3.51419302514193e-05, "loss": 0.444, "step": 37755 }, { "epoch": 1.102451059108626, "grad_norm": 0.5266339321828419, "learning_rate": 3.5139226818058933e-05, "loss": 0.4456, "step": 37760 }, { "epoch": 1.1025970423789433, "grad_norm": 0.5908561139944358, "learning_rate": 3.513652338469857e-05, "loss": 0.4249, "step": 37765 }, { "epoch": 1.1027430256492605, "grad_norm": 0.4961092642248354, "learning_rate": 3.51338199513382e-05, "loss": 0.4395, "step": 37770 }, { "epoch": 1.1028890089195778, "grad_norm": 0.49254251272949995, "learning_rate": 3.5131116517977835e-05, "loss": 0.4218, "step": 37775 }, { "epoch": 1.103034992189895, "grad_norm": 0.5001660386258345, "learning_rate": 3.512841308461747e-05, "loss": 0.4886, "step": 37780 }, { "epoch": 1.1031809754602122, "grad_norm": 0.5126530851945553, "learning_rate": 3.51257096512571e-05, "loss": 0.4359, "step": 37785 }, { "epoch": 1.1033269587305294, "grad_norm": 0.6352606412817912, "learning_rate": 3.512300621789673e-05, "loss": 0.4464, "step": 37790 }, { "epoch": 1.1034729420008467, "grad_norm": 0.48535363215333, "learning_rate": 3.5120302784536365e-05, "loss": 0.4281, "step": 37795 }, { "epoch": 1.103618925271164, "grad_norm": 0.5020722306427657, "learning_rate": 3.511759935117599e-05, "loss": 0.4632, "step": 37800 }, { "epoch": 1.1037649085414811, "grad_norm": 0.5050536952545932, "learning_rate": 3.5114895917815626e-05, "loss": 0.4376, "step": 37805 }, { "epoch": 1.1039108918117984, "grad_norm": 0.5296829681512092, "learning_rate": 3.511219248445526e-05, "loss": 0.4882, "step": 37810 }, { "epoch": 1.1040568750821156, "grad_norm": 0.5121487311783671, "learning_rate": 3.510948905109489e-05, "loss": 0.4639, "step": 37815 }, { "epoch": 1.1042028583524328, "grad_norm": 0.47984858662209595, "learning_rate": 3.510678561773452e-05, "loss": 0.4262, "step": 37820 }, { "epoch": 1.10434884162275, "grad_norm": 0.456318051648646, "learning_rate": 3.510408218437416e-05, "loss": 0.4643, "step": 37825 }, { "epoch": 1.1044948248930673, "grad_norm": 0.4802149760683741, "learning_rate": 3.510137875101379e-05, "loss": 0.4507, "step": 37830 }, { "epoch": 1.1046408081633845, "grad_norm": 0.4797099143266116, "learning_rate": 3.509867531765342e-05, "loss": 0.4704, "step": 37835 }, { "epoch": 1.1047867914337017, "grad_norm": 0.4814082750531259, "learning_rate": 3.509597188429306e-05, "loss": 0.4846, "step": 37840 }, { "epoch": 1.104932774704019, "grad_norm": 0.5160040219316308, "learning_rate": 3.5093268450932685e-05, "loss": 0.4628, "step": 37845 }, { "epoch": 1.1050787579743362, "grad_norm": 0.48896562974182234, "learning_rate": 3.509056501757232e-05, "loss": 0.4596, "step": 37850 }, { "epoch": 1.1052247412446534, "grad_norm": 0.5030229171673057, "learning_rate": 3.508786158421195e-05, "loss": 0.4455, "step": 37855 }, { "epoch": 1.1053707245149706, "grad_norm": 0.5215757286689212, "learning_rate": 3.508515815085158e-05, "loss": 0.4606, "step": 37860 }, { "epoch": 1.1055167077852879, "grad_norm": 0.5290040317632556, "learning_rate": 3.5082454717491214e-05, "loss": 0.476, "step": 37865 }, { "epoch": 1.105662691055605, "grad_norm": 0.48342173569571906, "learning_rate": 3.507975128413085e-05, "loss": 0.4699, "step": 37870 }, { "epoch": 1.1058086743259223, "grad_norm": 0.490103900208311, "learning_rate": 3.5077047850770475e-05, "loss": 0.465, "step": 37875 }, { "epoch": 1.1059546575962396, "grad_norm": 0.489363272022334, "learning_rate": 3.507434441741011e-05, "loss": 0.464, "step": 37880 }, { "epoch": 1.1061006408665568, "grad_norm": 0.464249891979239, "learning_rate": 3.507164098404975e-05, "loss": 0.4098, "step": 37885 }, { "epoch": 1.1062466241368738, "grad_norm": 0.5165935534924505, "learning_rate": 3.506893755068938e-05, "loss": 0.4681, "step": 37890 }, { "epoch": 1.1063926074071913, "grad_norm": 0.5464012531135214, "learning_rate": 3.506623411732901e-05, "loss": 0.4476, "step": 37895 }, { "epoch": 1.1065385906775083, "grad_norm": 0.49087574486564445, "learning_rate": 3.5063530683968645e-05, "loss": 0.4585, "step": 37900 }, { "epoch": 1.1066845739478255, "grad_norm": 0.48877840276711604, "learning_rate": 3.506082725060827e-05, "loss": 0.4573, "step": 37905 }, { "epoch": 1.1068305572181427, "grad_norm": 0.5113345809641432, "learning_rate": 3.5058123817247906e-05, "loss": 0.4691, "step": 37910 }, { "epoch": 1.10697654048846, "grad_norm": 0.5175914154088279, "learning_rate": 3.505542038388754e-05, "loss": 0.4442, "step": 37915 }, { "epoch": 1.1071225237587772, "grad_norm": 0.48173283022689534, "learning_rate": 3.505271695052717e-05, "loss": 0.458, "step": 37920 }, { "epoch": 1.1072685070290944, "grad_norm": 0.467752376619967, "learning_rate": 3.50500135171668e-05, "loss": 0.4343, "step": 37925 }, { "epoch": 1.1074144902994116, "grad_norm": 0.5147637609258746, "learning_rate": 3.5047310083806436e-05, "loss": 0.4381, "step": 37930 }, { "epoch": 1.1075604735697289, "grad_norm": 0.4652552111353881, "learning_rate": 3.504460665044606e-05, "loss": 0.4585, "step": 37935 }, { "epoch": 1.107706456840046, "grad_norm": 0.5295307271901329, "learning_rate": 3.5041903217085704e-05, "loss": 0.4473, "step": 37940 }, { "epoch": 1.1078524401103633, "grad_norm": 0.5090681203274648, "learning_rate": 3.503919978372534e-05, "loss": 0.4464, "step": 37945 }, { "epoch": 1.1079984233806806, "grad_norm": 0.4858685319299979, "learning_rate": 3.5036496350364965e-05, "loss": 0.45, "step": 37950 }, { "epoch": 1.1081444066509978, "grad_norm": 0.48129343850911666, "learning_rate": 3.50337929170046e-05, "loss": 0.4179, "step": 37955 }, { "epoch": 1.108290389921315, "grad_norm": 0.5116937449565178, "learning_rate": 3.503108948364423e-05, "loss": 0.45, "step": 37960 }, { "epoch": 1.1084363731916322, "grad_norm": 0.5264915471431262, "learning_rate": 3.502838605028386e-05, "loss": 0.4798, "step": 37965 }, { "epoch": 1.1085823564619495, "grad_norm": 0.46919794040890495, "learning_rate": 3.5025682616923494e-05, "loss": 0.4457, "step": 37970 }, { "epoch": 1.1087283397322667, "grad_norm": 0.5244981532446576, "learning_rate": 3.502297918356313e-05, "loss": 0.4619, "step": 37975 }, { "epoch": 1.108874323002584, "grad_norm": 0.48818359204354494, "learning_rate": 3.5020275750202755e-05, "loss": 0.4502, "step": 37980 }, { "epoch": 1.1090203062729012, "grad_norm": 0.49286208714998236, "learning_rate": 3.501757231684239e-05, "loss": 0.4781, "step": 37985 }, { "epoch": 1.1091662895432184, "grad_norm": 0.5282324893367365, "learning_rate": 3.5014868883482023e-05, "loss": 0.4582, "step": 37990 }, { "epoch": 1.1093122728135356, "grad_norm": 0.48015669121175825, "learning_rate": 3.501216545012166e-05, "loss": 0.4577, "step": 37995 }, { "epoch": 1.1094582560838528, "grad_norm": 0.4976731168114031, "learning_rate": 3.500946201676129e-05, "loss": 0.4619, "step": 38000 }, { "epoch": 1.10960423935417, "grad_norm": 0.5074242212531087, "learning_rate": 3.5006758583400925e-05, "loss": 0.4499, "step": 38005 }, { "epoch": 1.1097502226244873, "grad_norm": 0.49441667474528844, "learning_rate": 3.500405515004055e-05, "loss": 0.4464, "step": 38010 }, { "epoch": 1.1098962058948045, "grad_norm": 0.4883725783553711, "learning_rate": 3.500135171668019e-05, "loss": 0.4186, "step": 38015 }, { "epoch": 1.1100421891651218, "grad_norm": 0.48330635587598003, "learning_rate": 3.499864828331982e-05, "loss": 0.4391, "step": 38020 }, { "epoch": 1.110188172435439, "grad_norm": 0.4999272824402941, "learning_rate": 3.499594484995945e-05, "loss": 0.4551, "step": 38025 }, { "epoch": 1.1103341557057562, "grad_norm": 0.4808870692314805, "learning_rate": 3.499324141659908e-05, "loss": 0.4251, "step": 38030 }, { "epoch": 1.1104801389760732, "grad_norm": 0.49874023884491697, "learning_rate": 3.4990537983238716e-05, "loss": 0.4377, "step": 38035 }, { "epoch": 1.1106261222463907, "grad_norm": 0.4774603036328655, "learning_rate": 3.498783454987834e-05, "loss": 0.4497, "step": 38040 }, { "epoch": 1.1107721055167077, "grad_norm": 0.4768855822765853, "learning_rate": 3.498513111651798e-05, "loss": 0.4471, "step": 38045 }, { "epoch": 1.110918088787025, "grad_norm": 0.4569046459633886, "learning_rate": 3.498242768315761e-05, "loss": 0.4201, "step": 38050 }, { "epoch": 1.1110640720573421, "grad_norm": 0.4783627829930949, "learning_rate": 3.4979724249797245e-05, "loss": 0.4227, "step": 38055 }, { "epoch": 1.1112100553276594, "grad_norm": 0.5059068537474598, "learning_rate": 3.497702081643688e-05, "loss": 0.5032, "step": 38060 }, { "epoch": 1.1113560385979766, "grad_norm": 0.47828239882055057, "learning_rate": 3.497431738307651e-05, "loss": 0.474, "step": 38065 }, { "epoch": 1.1115020218682938, "grad_norm": 0.49736958041512025, "learning_rate": 3.497161394971614e-05, "loss": 0.4899, "step": 38070 }, { "epoch": 1.111648005138611, "grad_norm": 0.4740545517117727, "learning_rate": 3.4968910516355775e-05, "loss": 0.4556, "step": 38075 }, { "epoch": 1.1117939884089283, "grad_norm": 0.5143698925376923, "learning_rate": 3.496620708299541e-05, "loss": 0.4653, "step": 38080 }, { "epoch": 1.1119399716792455, "grad_norm": 0.4966439550445986, "learning_rate": 3.4963503649635036e-05, "loss": 0.4455, "step": 38085 }, { "epoch": 1.1120859549495627, "grad_norm": 0.49475300454211396, "learning_rate": 3.496080021627467e-05, "loss": 0.4233, "step": 38090 }, { "epoch": 1.11223193821988, "grad_norm": 0.49379798668583896, "learning_rate": 3.4958096782914304e-05, "loss": 0.4331, "step": 38095 }, { "epoch": 1.1123779214901972, "grad_norm": 0.5459479679934605, "learning_rate": 3.495539334955393e-05, "loss": 0.4731, "step": 38100 }, { "epoch": 1.1125239047605144, "grad_norm": 0.4698352119957212, "learning_rate": 3.4952689916193565e-05, "loss": 0.414, "step": 38105 }, { "epoch": 1.1126698880308317, "grad_norm": 0.452496755052012, "learning_rate": 3.49499864828332e-05, "loss": 0.438, "step": 38110 }, { "epoch": 1.112815871301149, "grad_norm": 0.5042113842802378, "learning_rate": 3.494728304947283e-05, "loss": 0.4398, "step": 38115 }, { "epoch": 1.1129618545714661, "grad_norm": 0.5120593794110598, "learning_rate": 3.494457961611247e-05, "loss": 0.4434, "step": 38120 }, { "epoch": 1.1131078378417834, "grad_norm": 0.5352296582909113, "learning_rate": 3.49418761827521e-05, "loss": 0.4928, "step": 38125 }, { "epoch": 1.1132538211121006, "grad_norm": 0.530213018535408, "learning_rate": 3.493917274939173e-05, "loss": 0.4764, "step": 38130 }, { "epoch": 1.1133998043824178, "grad_norm": 0.4682215453319905, "learning_rate": 3.493646931603136e-05, "loss": 0.4453, "step": 38135 }, { "epoch": 1.113545787652735, "grad_norm": 0.4844915305697557, "learning_rate": 3.4933765882670996e-05, "loss": 0.4253, "step": 38140 }, { "epoch": 1.1136917709230523, "grad_norm": 0.4403330888301466, "learning_rate": 3.4931062449310624e-05, "loss": 0.4157, "step": 38145 }, { "epoch": 1.1138377541933695, "grad_norm": 0.4772332028657211, "learning_rate": 3.492835901595026e-05, "loss": 0.4597, "step": 38150 }, { "epoch": 1.1139837374636867, "grad_norm": 0.5825207954539029, "learning_rate": 3.492565558258989e-05, "loss": 0.4658, "step": 38155 }, { "epoch": 1.114129720734004, "grad_norm": 0.4696665672074267, "learning_rate": 3.492295214922952e-05, "loss": 0.4576, "step": 38160 }, { "epoch": 1.1142757040043212, "grad_norm": 0.5018956661859788, "learning_rate": 3.492024871586916e-05, "loss": 0.4575, "step": 38165 }, { "epoch": 1.1144216872746384, "grad_norm": 0.48458098343642303, "learning_rate": 3.491754528250879e-05, "loss": 0.4404, "step": 38170 }, { "epoch": 1.1145676705449556, "grad_norm": 0.48816467503918687, "learning_rate": 3.491484184914842e-05, "loss": 0.437, "step": 38175 }, { "epoch": 1.1147136538152727, "grad_norm": 0.49705774265340014, "learning_rate": 3.4912138415788055e-05, "loss": 0.4273, "step": 38180 }, { "epoch": 1.11485963708559, "grad_norm": 0.4832039506033928, "learning_rate": 3.490943498242769e-05, "loss": 0.474, "step": 38185 }, { "epoch": 1.1150056203559071, "grad_norm": 0.4789725823998034, "learning_rate": 3.4906731549067316e-05, "loss": 0.4387, "step": 38190 }, { "epoch": 1.1151516036262243, "grad_norm": 0.5327341859350336, "learning_rate": 3.490402811570695e-05, "loss": 0.4667, "step": 38195 }, { "epoch": 1.1152975868965416, "grad_norm": 0.49458450976655594, "learning_rate": 3.4901324682346584e-05, "loss": 0.4597, "step": 38200 }, { "epoch": 1.1154435701668588, "grad_norm": 0.47070431123505435, "learning_rate": 3.489862124898621e-05, "loss": 0.4281, "step": 38205 }, { "epoch": 1.115589553437176, "grad_norm": 0.5261548883061228, "learning_rate": 3.4895917815625845e-05, "loss": 0.478, "step": 38210 }, { "epoch": 1.1157355367074933, "grad_norm": 0.4770063321760695, "learning_rate": 3.489321438226548e-05, "loss": 0.45, "step": 38215 }, { "epoch": 1.1158815199778105, "grad_norm": 0.48481417992877324, "learning_rate": 3.489051094890511e-05, "loss": 0.4576, "step": 38220 }, { "epoch": 1.1160275032481277, "grad_norm": 0.4702986067115493, "learning_rate": 3.488780751554475e-05, "loss": 0.4386, "step": 38225 }, { "epoch": 1.116173486518445, "grad_norm": 0.44752735798895854, "learning_rate": 3.4885104082184375e-05, "loss": 0.4183, "step": 38230 }, { "epoch": 1.1163194697887622, "grad_norm": 0.4557330184067742, "learning_rate": 3.488240064882401e-05, "loss": 0.4575, "step": 38235 }, { "epoch": 1.1164654530590794, "grad_norm": 0.48665299606914264, "learning_rate": 3.487969721546364e-05, "loss": 0.4754, "step": 38240 }, { "epoch": 1.1166114363293966, "grad_norm": 0.5914093494251969, "learning_rate": 3.487699378210327e-05, "loss": 0.4363, "step": 38245 }, { "epoch": 1.1167574195997139, "grad_norm": 0.5121649923714121, "learning_rate": 3.4874290348742904e-05, "loss": 0.4391, "step": 38250 }, { "epoch": 1.116903402870031, "grad_norm": 0.4814854363357068, "learning_rate": 3.487158691538254e-05, "loss": 0.4332, "step": 38255 }, { "epoch": 1.1170493861403483, "grad_norm": 0.4832084948278466, "learning_rate": 3.486888348202217e-05, "loss": 0.4442, "step": 38260 }, { "epoch": 1.1171953694106656, "grad_norm": 0.5021380777147835, "learning_rate": 3.48661800486618e-05, "loss": 0.4476, "step": 38265 }, { "epoch": 1.1173413526809828, "grad_norm": 0.4917172908703215, "learning_rate": 3.486347661530143e-05, "loss": 0.4322, "step": 38270 }, { "epoch": 1.1174873359513, "grad_norm": 0.47435570004625366, "learning_rate": 3.486077318194107e-05, "loss": 0.4625, "step": 38275 }, { "epoch": 1.1176333192216172, "grad_norm": 0.4722917456650494, "learning_rate": 3.48580697485807e-05, "loss": 0.4446, "step": 38280 }, { "epoch": 1.1177793024919345, "grad_norm": 0.4415956667781828, "learning_rate": 3.4855366315220335e-05, "loss": 0.4492, "step": 38285 }, { "epoch": 1.1179252857622517, "grad_norm": 0.4751091447058009, "learning_rate": 3.485266288185996e-05, "loss": 0.467, "step": 38290 }, { "epoch": 1.118071269032569, "grad_norm": 0.5072442290406124, "learning_rate": 3.4849959448499596e-05, "loss": 0.4886, "step": 38295 }, { "epoch": 1.1182172523028862, "grad_norm": 0.4579945554993043, "learning_rate": 3.484725601513923e-05, "loss": 0.4363, "step": 38300 }, { "epoch": 1.1183632355732034, "grad_norm": 0.47421639559174605, "learning_rate": 3.484455258177886e-05, "loss": 0.4475, "step": 38305 }, { "epoch": 1.1185092188435206, "grad_norm": 0.4892940213079924, "learning_rate": 3.484184914841849e-05, "loss": 0.46, "step": 38310 }, { "epoch": 1.1186552021138378, "grad_norm": 0.4611083424676873, "learning_rate": 3.4839145715058126e-05, "loss": 0.4291, "step": 38315 }, { "epoch": 1.118801185384155, "grad_norm": 0.48890642236706755, "learning_rate": 3.483644228169776e-05, "loss": 0.4292, "step": 38320 }, { "epoch": 1.118947168654472, "grad_norm": 0.49308631599593666, "learning_rate": 3.483373884833739e-05, "loss": 0.4529, "step": 38325 }, { "epoch": 1.1190931519247895, "grad_norm": 0.46545379644756163, "learning_rate": 3.483103541497702e-05, "loss": 0.4475, "step": 38330 }, { "epoch": 1.1192391351951065, "grad_norm": 0.4624173510420203, "learning_rate": 3.4828331981616655e-05, "loss": 0.4368, "step": 38335 }, { "epoch": 1.1193851184654238, "grad_norm": 0.5173341256588659, "learning_rate": 3.482562854825629e-05, "loss": 0.4599, "step": 38340 }, { "epoch": 1.119531101735741, "grad_norm": 0.5212380286896914, "learning_rate": 3.482292511489592e-05, "loss": 0.456, "step": 38345 }, { "epoch": 1.1196770850060582, "grad_norm": 0.5137881193597471, "learning_rate": 3.482022168153555e-05, "loss": 0.4535, "step": 38350 }, { "epoch": 1.1198230682763755, "grad_norm": 0.4287735345063629, "learning_rate": 3.4817518248175184e-05, "loss": 0.4188, "step": 38355 }, { "epoch": 1.1199690515466927, "grad_norm": 0.4577172296222195, "learning_rate": 3.481481481481482e-05, "loss": 0.4367, "step": 38360 }, { "epoch": 1.12011503481701, "grad_norm": 0.5132447379557642, "learning_rate": 3.4812111381454446e-05, "loss": 0.4524, "step": 38365 }, { "epoch": 1.1202610180873271, "grad_norm": 0.43058272571734696, "learning_rate": 3.480940794809408e-05, "loss": 0.4333, "step": 38370 }, { "epoch": 1.1204070013576444, "grad_norm": 0.48779234397656235, "learning_rate": 3.4806704514733714e-05, "loss": 0.4423, "step": 38375 }, { "epoch": 1.1205529846279616, "grad_norm": 0.49213742220940604, "learning_rate": 3.480400108137334e-05, "loss": 0.4656, "step": 38380 }, { "epoch": 1.1206989678982788, "grad_norm": 0.5367931506603089, "learning_rate": 3.4801297648012975e-05, "loss": 0.4505, "step": 38385 }, { "epoch": 1.120844951168596, "grad_norm": 0.4792222736754918, "learning_rate": 3.479859421465261e-05, "loss": 0.4154, "step": 38390 }, { "epoch": 1.1209909344389133, "grad_norm": 0.4954496339683461, "learning_rate": 3.479589078129224e-05, "loss": 0.4867, "step": 38395 }, { "epoch": 1.1211369177092305, "grad_norm": 0.4539290530253212, "learning_rate": 3.479318734793188e-05, "loss": 0.4442, "step": 38400 }, { "epoch": 1.1212829009795477, "grad_norm": 0.4667031914299342, "learning_rate": 3.479048391457151e-05, "loss": 0.4628, "step": 38405 }, { "epoch": 1.121428884249865, "grad_norm": 0.45390699961939796, "learning_rate": 3.478778048121114e-05, "loss": 0.4142, "step": 38410 }, { "epoch": 1.1215748675201822, "grad_norm": 0.5125337156511178, "learning_rate": 3.478507704785077e-05, "loss": 0.4464, "step": 38415 }, { "epoch": 1.1217208507904994, "grad_norm": 0.4960588493391267, "learning_rate": 3.4782373614490406e-05, "loss": 0.4356, "step": 38420 }, { "epoch": 1.1218668340608167, "grad_norm": 0.4729588150473352, "learning_rate": 3.477967018113003e-05, "loss": 0.4194, "step": 38425 }, { "epoch": 1.122012817331134, "grad_norm": 0.47394711275607, "learning_rate": 3.477696674776967e-05, "loss": 0.4458, "step": 38430 }, { "epoch": 1.1221588006014511, "grad_norm": 0.5117783641204947, "learning_rate": 3.47742633144093e-05, "loss": 0.4622, "step": 38435 }, { "epoch": 1.1223047838717684, "grad_norm": 0.49857460196740483, "learning_rate": 3.477155988104893e-05, "loss": 0.4484, "step": 38440 }, { "epoch": 1.1224507671420856, "grad_norm": 0.4833423928924624, "learning_rate": 3.476885644768856e-05, "loss": 0.4441, "step": 38445 }, { "epoch": 1.1225967504124028, "grad_norm": 0.43932364270344954, "learning_rate": 3.4766153014328203e-05, "loss": 0.3992, "step": 38450 }, { "epoch": 1.12274273368272, "grad_norm": 0.4985183361216655, "learning_rate": 3.476344958096783e-05, "loss": 0.4347, "step": 38455 }, { "epoch": 1.1228887169530373, "grad_norm": 0.5329375750873311, "learning_rate": 3.4760746147607465e-05, "loss": 0.4659, "step": 38460 }, { "epoch": 1.1230347002233545, "grad_norm": 0.49760899438742, "learning_rate": 3.47580427142471e-05, "loss": 0.4575, "step": 38465 }, { "epoch": 1.1231806834936717, "grad_norm": 0.4428250689697357, "learning_rate": 3.4755339280886726e-05, "loss": 0.4287, "step": 38470 }, { "epoch": 1.123326666763989, "grad_norm": 0.48706525402742423, "learning_rate": 3.475263584752636e-05, "loss": 0.4643, "step": 38475 }, { "epoch": 1.123472650034306, "grad_norm": 0.5713526991543398, "learning_rate": 3.4749932414165994e-05, "loss": 0.4685, "step": 38480 }, { "epoch": 1.1236186333046232, "grad_norm": 0.5212471860880254, "learning_rate": 3.474722898080562e-05, "loss": 0.4796, "step": 38485 }, { "epoch": 1.1237646165749404, "grad_norm": 0.4650929765587971, "learning_rate": 3.4744525547445255e-05, "loss": 0.4527, "step": 38490 }, { "epoch": 1.1239105998452577, "grad_norm": 0.48232444624581455, "learning_rate": 3.474182211408489e-05, "loss": 0.4439, "step": 38495 }, { "epoch": 1.1240565831155749, "grad_norm": 0.45522052518453676, "learning_rate": 3.4739118680724516e-05, "loss": 0.4182, "step": 38500 }, { "epoch": 1.124202566385892, "grad_norm": 0.4414326306882229, "learning_rate": 3.473641524736416e-05, "loss": 0.4549, "step": 38505 }, { "epoch": 1.1243485496562093, "grad_norm": 0.46585082577731596, "learning_rate": 3.473371181400379e-05, "loss": 0.4473, "step": 38510 }, { "epoch": 1.1244945329265266, "grad_norm": 0.48892413423589304, "learning_rate": 3.473100838064342e-05, "loss": 0.4456, "step": 38515 }, { "epoch": 1.1246405161968438, "grad_norm": 0.504525024595467, "learning_rate": 3.472830494728305e-05, "loss": 0.4488, "step": 38520 }, { "epoch": 1.124786499467161, "grad_norm": 0.48766069784798594, "learning_rate": 3.4725601513922686e-05, "loss": 0.4342, "step": 38525 }, { "epoch": 1.1249324827374783, "grad_norm": 0.47980850367215266, "learning_rate": 3.4722898080562314e-05, "loss": 0.4519, "step": 38530 }, { "epoch": 1.1250784660077955, "grad_norm": 0.46527359210847474, "learning_rate": 3.472019464720195e-05, "loss": 0.4538, "step": 38535 }, { "epoch": 1.1252244492781127, "grad_norm": 0.47547795923440483, "learning_rate": 3.471749121384158e-05, "loss": 0.4711, "step": 38540 }, { "epoch": 1.12537043254843, "grad_norm": 0.47087734005057064, "learning_rate": 3.471478778048121e-05, "loss": 0.4553, "step": 38545 }, { "epoch": 1.1255164158187472, "grad_norm": 0.48451748732870653, "learning_rate": 3.471208434712084e-05, "loss": 0.4278, "step": 38550 }, { "epoch": 1.1256623990890644, "grad_norm": 0.5005837919779873, "learning_rate": 3.470938091376048e-05, "loss": 0.4252, "step": 38555 }, { "epoch": 1.1258083823593816, "grad_norm": 0.49694428343696256, "learning_rate": 3.4706677480400104e-05, "loss": 0.4305, "step": 38560 }, { "epoch": 1.1259543656296989, "grad_norm": 0.4518874564769177, "learning_rate": 3.4703974047039745e-05, "loss": 0.4372, "step": 38565 }, { "epoch": 1.126100348900016, "grad_norm": 0.48800702897147824, "learning_rate": 3.470127061367938e-05, "loss": 0.4524, "step": 38570 }, { "epoch": 1.1262463321703333, "grad_norm": 0.4980574149954539, "learning_rate": 3.4698567180319006e-05, "loss": 0.4405, "step": 38575 }, { "epoch": 1.1263923154406505, "grad_norm": 0.4903114126217432, "learning_rate": 3.469586374695864e-05, "loss": 0.4681, "step": 38580 }, { "epoch": 1.1265382987109678, "grad_norm": 0.5212040342238148, "learning_rate": 3.4693160313598274e-05, "loss": 0.4751, "step": 38585 }, { "epoch": 1.126684281981285, "grad_norm": 0.4733675578893074, "learning_rate": 3.46904568802379e-05, "loss": 0.4304, "step": 38590 }, { "epoch": 1.1268302652516022, "grad_norm": 0.46804015286694645, "learning_rate": 3.4687753446877536e-05, "loss": 0.4363, "step": 38595 }, { "epoch": 1.1269762485219195, "grad_norm": 0.519089447446813, "learning_rate": 3.468505001351717e-05, "loss": 0.4297, "step": 38600 }, { "epoch": 1.1271222317922367, "grad_norm": 0.5193859073441971, "learning_rate": 3.46823465801568e-05, "loss": 0.4577, "step": 38605 }, { "epoch": 1.127268215062554, "grad_norm": 0.5074260740586497, "learning_rate": 3.467964314679643e-05, "loss": 0.4355, "step": 38610 }, { "epoch": 1.127414198332871, "grad_norm": 0.42277773787033857, "learning_rate": 3.4676939713436065e-05, "loss": 0.4126, "step": 38615 }, { "epoch": 1.1275601816031884, "grad_norm": 0.4416412461865432, "learning_rate": 3.46742362800757e-05, "loss": 0.4436, "step": 38620 }, { "epoch": 1.1277061648735054, "grad_norm": 0.4604113555887875, "learning_rate": 3.467153284671533e-05, "loss": 0.4696, "step": 38625 }, { "epoch": 1.1278521481438228, "grad_norm": 0.44487283533593297, "learning_rate": 3.466882941335497e-05, "loss": 0.4301, "step": 38630 }, { "epoch": 1.1279981314141398, "grad_norm": 0.45962141471944185, "learning_rate": 3.4666125979994594e-05, "loss": 0.475, "step": 38635 }, { "epoch": 1.128144114684457, "grad_norm": 0.46595137562924055, "learning_rate": 3.466342254663423e-05, "loss": 0.431, "step": 38640 }, { "epoch": 1.1282900979547743, "grad_norm": 0.5349342136518573, "learning_rate": 3.466071911327386e-05, "loss": 0.4572, "step": 38645 }, { "epoch": 1.1284360812250915, "grad_norm": 0.46146192750527365, "learning_rate": 3.465801567991349e-05, "loss": 0.4345, "step": 38650 }, { "epoch": 1.1285820644954088, "grad_norm": 0.5034169435673969, "learning_rate": 3.465531224655312e-05, "loss": 0.4566, "step": 38655 }, { "epoch": 1.128728047765726, "grad_norm": 0.49385970354452136, "learning_rate": 3.465260881319276e-05, "loss": 0.4739, "step": 38660 }, { "epoch": 1.1288740310360432, "grad_norm": 0.4896645834217128, "learning_rate": 3.4649905379832385e-05, "loss": 0.452, "step": 38665 }, { "epoch": 1.1290200143063605, "grad_norm": 0.47376095955085007, "learning_rate": 3.464720194647202e-05, "loss": 0.4295, "step": 38670 }, { "epoch": 1.1291659975766777, "grad_norm": 0.5009472378009973, "learning_rate": 3.464449851311166e-05, "loss": 0.4399, "step": 38675 }, { "epoch": 1.129311980846995, "grad_norm": 0.46966786329155913, "learning_rate": 3.4641795079751287e-05, "loss": 0.4666, "step": 38680 }, { "epoch": 1.1294579641173121, "grad_norm": 0.466332848538872, "learning_rate": 3.463909164639092e-05, "loss": 0.4573, "step": 38685 }, { "epoch": 1.1296039473876294, "grad_norm": 0.4185244957622967, "learning_rate": 3.4636388213030555e-05, "loss": 0.4211, "step": 38690 }, { "epoch": 1.1297499306579466, "grad_norm": 0.4980785078198567, "learning_rate": 3.463368477967018e-05, "loss": 0.4503, "step": 38695 }, { "epoch": 1.1298959139282638, "grad_norm": 0.5211537672176162, "learning_rate": 3.4630981346309816e-05, "loss": 0.4793, "step": 38700 }, { "epoch": 1.130041897198581, "grad_norm": 0.44760171535898147, "learning_rate": 3.462827791294945e-05, "loss": 0.4563, "step": 38705 }, { "epoch": 1.1301878804688983, "grad_norm": 0.5027807687936721, "learning_rate": 3.462557447958908e-05, "loss": 0.4389, "step": 38710 }, { "epoch": 1.1303338637392155, "grad_norm": 0.4638992431438287, "learning_rate": 3.462287104622871e-05, "loss": 0.4381, "step": 38715 }, { "epoch": 1.1304798470095327, "grad_norm": 0.5235004577248997, "learning_rate": 3.4620167612868345e-05, "loss": 0.4604, "step": 38720 }, { "epoch": 1.13062583027985, "grad_norm": 0.49605658314283074, "learning_rate": 3.461746417950797e-05, "loss": 0.4511, "step": 38725 }, { "epoch": 1.1307718135501672, "grad_norm": 0.5310374352999895, "learning_rate": 3.4614760746147606e-05, "loss": 0.4828, "step": 38730 }, { "epoch": 1.1309177968204844, "grad_norm": 0.5259746330639916, "learning_rate": 3.461205731278725e-05, "loss": 0.4682, "step": 38735 }, { "epoch": 1.1310637800908017, "grad_norm": 0.5294738034742316, "learning_rate": 3.4609353879426874e-05, "loss": 0.4621, "step": 38740 }, { "epoch": 1.131209763361119, "grad_norm": 0.5245735834473553, "learning_rate": 3.460665044606651e-05, "loss": 0.4412, "step": 38745 }, { "epoch": 1.1313557466314361, "grad_norm": 0.5065539409387768, "learning_rate": 3.460394701270614e-05, "loss": 0.4579, "step": 38750 }, { "epoch": 1.1315017299017534, "grad_norm": 0.46492845223852647, "learning_rate": 3.460124357934577e-05, "loss": 0.4214, "step": 38755 }, { "epoch": 1.1316477131720704, "grad_norm": 0.4996761942341037, "learning_rate": 3.4598540145985404e-05, "loss": 0.4434, "step": 38760 }, { "epoch": 1.1317936964423878, "grad_norm": 0.4926835842778048, "learning_rate": 3.459583671262504e-05, "loss": 0.4325, "step": 38765 }, { "epoch": 1.1319396797127048, "grad_norm": 0.4877500127506136, "learning_rate": 3.4593133279264665e-05, "loss": 0.4481, "step": 38770 }, { "epoch": 1.1320856629830223, "grad_norm": 0.46794687142380315, "learning_rate": 3.45904298459043e-05, "loss": 0.4566, "step": 38775 }, { "epoch": 1.1322316462533393, "grad_norm": 0.45214160647932095, "learning_rate": 3.458772641254393e-05, "loss": 0.4555, "step": 38780 }, { "epoch": 1.1323776295236565, "grad_norm": 0.4785602080178122, "learning_rate": 3.458502297918356e-05, "loss": 0.4657, "step": 38785 }, { "epoch": 1.1325236127939737, "grad_norm": 0.4957847061090922, "learning_rate": 3.45823195458232e-05, "loss": 0.4435, "step": 38790 }, { "epoch": 1.132669596064291, "grad_norm": 0.5141420333959893, "learning_rate": 3.457961611246283e-05, "loss": 0.4606, "step": 38795 }, { "epoch": 1.1328155793346082, "grad_norm": 0.4519146934580386, "learning_rate": 3.457691267910246e-05, "loss": 0.4473, "step": 38800 }, { "epoch": 1.1329615626049254, "grad_norm": 0.47534063918997954, "learning_rate": 3.4574209245742096e-05, "loss": 0.4697, "step": 38805 }, { "epoch": 1.1331075458752426, "grad_norm": 0.45389925536247044, "learning_rate": 3.457150581238173e-05, "loss": 0.4518, "step": 38810 }, { "epoch": 1.1332535291455599, "grad_norm": 0.4595817311828508, "learning_rate": 3.456880237902136e-05, "loss": 0.4498, "step": 38815 }, { "epoch": 1.133399512415877, "grad_norm": 0.44574206010651723, "learning_rate": 3.456609894566099e-05, "loss": 0.4155, "step": 38820 }, { "epoch": 1.1335454956861943, "grad_norm": 0.4243876362805203, "learning_rate": 3.4563395512300625e-05, "loss": 0.4056, "step": 38825 }, { "epoch": 1.1336914789565116, "grad_norm": 0.49587228537347666, "learning_rate": 3.456069207894025e-05, "loss": 0.4672, "step": 38830 }, { "epoch": 1.1338374622268288, "grad_norm": 0.49868715735525065, "learning_rate": 3.455798864557989e-05, "loss": 0.4799, "step": 38835 }, { "epoch": 1.133983445497146, "grad_norm": 0.5014078090866291, "learning_rate": 3.455528521221952e-05, "loss": 0.4238, "step": 38840 }, { "epoch": 1.1341294287674633, "grad_norm": 0.49275072208707554, "learning_rate": 3.4552581778859155e-05, "loss": 0.4578, "step": 38845 }, { "epoch": 1.1342754120377805, "grad_norm": 0.49741620047681534, "learning_rate": 3.454987834549879e-05, "loss": 0.4474, "step": 38850 }, { "epoch": 1.1344213953080977, "grad_norm": 0.5354607471412485, "learning_rate": 3.4547174912138416e-05, "loss": 0.4613, "step": 38855 }, { "epoch": 1.134567378578415, "grad_norm": 0.4818255513601331, "learning_rate": 3.454447147877805e-05, "loss": 0.4693, "step": 38860 }, { "epoch": 1.1347133618487322, "grad_norm": 0.4932987255098399, "learning_rate": 3.4541768045417684e-05, "loss": 0.4376, "step": 38865 }, { "epoch": 1.1348593451190494, "grad_norm": 0.4798299999805462, "learning_rate": 3.453906461205732e-05, "loss": 0.4463, "step": 38870 }, { "epoch": 1.1350053283893666, "grad_norm": 0.4763364491279672, "learning_rate": 3.4536361178696945e-05, "loss": 0.4276, "step": 38875 }, { "epoch": 1.1351513116596839, "grad_norm": 0.4989130391280858, "learning_rate": 3.453365774533658e-05, "loss": 0.4537, "step": 38880 }, { "epoch": 1.135297294930001, "grad_norm": 0.46953935987192397, "learning_rate": 3.453095431197621e-05, "loss": 0.4505, "step": 38885 }, { "epoch": 1.1354432782003183, "grad_norm": 0.4959323711129003, "learning_rate": 3.452825087861584e-05, "loss": 0.4439, "step": 38890 }, { "epoch": 1.1355892614706355, "grad_norm": 0.49445609553232484, "learning_rate": 3.4525547445255475e-05, "loss": 0.478, "step": 38895 }, { "epoch": 1.1357352447409528, "grad_norm": 0.4732195384700816, "learning_rate": 3.452284401189511e-05, "loss": 0.4415, "step": 38900 }, { "epoch": 1.13588122801127, "grad_norm": 0.46016586090573963, "learning_rate": 3.452014057853474e-05, "loss": 0.433, "step": 38905 }, { "epoch": 1.1360272112815872, "grad_norm": 0.4798349530739856, "learning_rate": 3.4517437145174377e-05, "loss": 0.4223, "step": 38910 }, { "epoch": 1.1361731945519042, "grad_norm": 0.53817106482587, "learning_rate": 3.4514733711814004e-05, "loss": 0.4499, "step": 38915 }, { "epoch": 1.1363191778222217, "grad_norm": 0.47323371596094954, "learning_rate": 3.451203027845364e-05, "loss": 0.4629, "step": 38920 }, { "epoch": 1.1364651610925387, "grad_norm": 0.4969924237219572, "learning_rate": 3.450932684509327e-05, "loss": 0.478, "step": 38925 }, { "epoch": 1.136611144362856, "grad_norm": 0.5290556780915809, "learning_rate": 3.45066234117329e-05, "loss": 0.4522, "step": 38930 }, { "epoch": 1.1367571276331732, "grad_norm": 0.4763149116832702, "learning_rate": 3.450391997837253e-05, "loss": 0.4097, "step": 38935 }, { "epoch": 1.1369031109034904, "grad_norm": 0.42915481967684876, "learning_rate": 3.450121654501217e-05, "loss": 0.4374, "step": 38940 }, { "epoch": 1.1370490941738076, "grad_norm": 0.4475648739385291, "learning_rate": 3.44985131116518e-05, "loss": 0.4407, "step": 38945 }, { "epoch": 1.1371950774441248, "grad_norm": 0.5319433310680076, "learning_rate": 3.449580967829143e-05, "loss": 0.4552, "step": 38950 }, { "epoch": 1.137341060714442, "grad_norm": 0.48859194622428903, "learning_rate": 3.449310624493106e-05, "loss": 0.4247, "step": 38955 }, { "epoch": 1.1374870439847593, "grad_norm": 0.4976909892271954, "learning_rate": 3.4490402811570696e-05, "loss": 0.4463, "step": 38960 }, { "epoch": 1.1376330272550765, "grad_norm": 0.4999269628929952, "learning_rate": 3.448769937821033e-05, "loss": 0.4386, "step": 38965 }, { "epoch": 1.1377790105253938, "grad_norm": 0.49440659339137233, "learning_rate": 3.4484995944849964e-05, "loss": 0.4721, "step": 38970 }, { "epoch": 1.137924993795711, "grad_norm": 0.4851782786884608, "learning_rate": 3.448229251148959e-05, "loss": 0.4458, "step": 38975 }, { "epoch": 1.1380709770660282, "grad_norm": 0.5052991251342316, "learning_rate": 3.4479589078129226e-05, "loss": 0.4507, "step": 38980 }, { "epoch": 1.1382169603363455, "grad_norm": 0.5774242261526283, "learning_rate": 3.447688564476886e-05, "loss": 0.4286, "step": 38985 }, { "epoch": 1.1383629436066627, "grad_norm": 0.554793460720371, "learning_rate": 3.447418221140849e-05, "loss": 0.4646, "step": 38990 }, { "epoch": 1.13850892687698, "grad_norm": 0.47851150813114063, "learning_rate": 3.447147877804812e-05, "loss": 0.4315, "step": 38995 }, { "epoch": 1.1386549101472971, "grad_norm": 0.4970534026009285, "learning_rate": 3.4468775344687755e-05, "loss": 0.4511, "step": 39000 }, { "epoch": 1.1388008934176144, "grad_norm": 0.52726804669426, "learning_rate": 3.446607191132739e-05, "loss": 0.46, "step": 39005 }, { "epoch": 1.1389468766879316, "grad_norm": 0.46015473127196865, "learning_rate": 3.4463368477967016e-05, "loss": 0.4371, "step": 39010 }, { "epoch": 1.1390928599582488, "grad_norm": 0.47549025082251517, "learning_rate": 3.446066504460666e-05, "loss": 0.451, "step": 39015 }, { "epoch": 1.139238843228566, "grad_norm": 0.5121501039226283, "learning_rate": 3.4457961611246284e-05, "loss": 0.4565, "step": 39020 }, { "epoch": 1.1393848264988833, "grad_norm": 0.504606773105028, "learning_rate": 3.445525817788592e-05, "loss": 0.473, "step": 39025 }, { "epoch": 1.1395308097692005, "grad_norm": 0.44177038359514703, "learning_rate": 3.445255474452555e-05, "loss": 0.4337, "step": 39030 }, { "epoch": 1.1396767930395177, "grad_norm": 0.4885253140665842, "learning_rate": 3.444985131116518e-05, "loss": 0.4786, "step": 39035 }, { "epoch": 1.139822776309835, "grad_norm": 0.47604910190397964, "learning_rate": 3.4447147877804813e-05, "loss": 0.4317, "step": 39040 }, { "epoch": 1.1399687595801522, "grad_norm": 0.4890714715114406, "learning_rate": 3.444444444444445e-05, "loss": 0.4299, "step": 39045 }, { "epoch": 1.1401147428504694, "grad_norm": 0.47963149491418233, "learning_rate": 3.4441741011084075e-05, "loss": 0.4217, "step": 39050 }, { "epoch": 1.1402607261207867, "grad_norm": 0.5006621743854915, "learning_rate": 3.443903757772371e-05, "loss": 0.4311, "step": 39055 }, { "epoch": 1.1404067093911037, "grad_norm": 0.5032185617937075, "learning_rate": 3.443633414436334e-05, "loss": 0.446, "step": 39060 }, { "epoch": 1.1405526926614211, "grad_norm": 0.5324354723895205, "learning_rate": 3.443363071100297e-05, "loss": 0.4746, "step": 39065 }, { "epoch": 1.1406986759317381, "grad_norm": 0.49237164635328756, "learning_rate": 3.4430927277642604e-05, "loss": 0.4746, "step": 39070 }, { "epoch": 1.1408446592020554, "grad_norm": 0.49112569174670134, "learning_rate": 3.4428223844282245e-05, "loss": 0.4213, "step": 39075 }, { "epoch": 1.1409906424723726, "grad_norm": 0.49848364206445206, "learning_rate": 3.442552041092187e-05, "loss": 0.4509, "step": 39080 }, { "epoch": 1.1411366257426898, "grad_norm": 0.48026793046730404, "learning_rate": 3.4422816977561506e-05, "loss": 0.4514, "step": 39085 }, { "epoch": 1.141282609013007, "grad_norm": 0.4675312816270822, "learning_rate": 3.442011354420114e-05, "loss": 0.4467, "step": 39090 }, { "epoch": 1.1414285922833243, "grad_norm": 0.470618692931958, "learning_rate": 3.441741011084077e-05, "loss": 0.4224, "step": 39095 }, { "epoch": 1.1415745755536415, "grad_norm": 0.5123012528370727, "learning_rate": 3.44147066774804e-05, "loss": 0.4419, "step": 39100 }, { "epoch": 1.1417205588239587, "grad_norm": 0.4861566794290503, "learning_rate": 3.4412003244120035e-05, "loss": 0.4572, "step": 39105 }, { "epoch": 1.141866542094276, "grad_norm": 0.5199984064735914, "learning_rate": 3.440929981075966e-05, "loss": 0.4667, "step": 39110 }, { "epoch": 1.1420125253645932, "grad_norm": 0.5117017145934886, "learning_rate": 3.4406596377399297e-05, "loss": 0.4543, "step": 39115 }, { "epoch": 1.1421585086349104, "grad_norm": 0.5247864631594004, "learning_rate": 3.440389294403893e-05, "loss": 0.4487, "step": 39120 }, { "epoch": 1.1423044919052276, "grad_norm": 0.4884565896518215, "learning_rate": 3.440118951067856e-05, "loss": 0.4172, "step": 39125 }, { "epoch": 1.1424504751755449, "grad_norm": 0.4601470273326058, "learning_rate": 3.43984860773182e-05, "loss": 0.4377, "step": 39130 }, { "epoch": 1.142596458445862, "grad_norm": 0.45253811722306614, "learning_rate": 3.439578264395783e-05, "loss": 0.4227, "step": 39135 }, { "epoch": 1.1427424417161793, "grad_norm": 0.4825752389671073, "learning_rate": 3.439307921059746e-05, "loss": 0.4492, "step": 39140 }, { "epoch": 1.1428884249864966, "grad_norm": 0.47429558765544205, "learning_rate": 3.4390375777237094e-05, "loss": 0.4209, "step": 39145 }, { "epoch": 1.1430344082568138, "grad_norm": 0.4961704722217023, "learning_rate": 3.438767234387673e-05, "loss": 0.5001, "step": 39150 }, { "epoch": 1.143180391527131, "grad_norm": 0.5544703993790513, "learning_rate": 3.4384968910516355e-05, "loss": 0.4839, "step": 39155 }, { "epoch": 1.1433263747974483, "grad_norm": 0.46962208505682573, "learning_rate": 3.438226547715599e-05, "loss": 0.4432, "step": 39160 }, { "epoch": 1.1434723580677655, "grad_norm": 0.497057775705692, "learning_rate": 3.437956204379562e-05, "loss": 0.4121, "step": 39165 }, { "epoch": 1.1436183413380827, "grad_norm": 0.4855264204518673, "learning_rate": 3.437685861043525e-05, "loss": 0.4442, "step": 39170 }, { "epoch": 1.1437643246084, "grad_norm": 0.4690011668284652, "learning_rate": 3.4374155177074884e-05, "loss": 0.4561, "step": 39175 }, { "epoch": 1.1439103078787172, "grad_norm": 0.4833970278434339, "learning_rate": 3.437145174371452e-05, "loss": 0.4217, "step": 39180 }, { "epoch": 1.1440562911490344, "grad_norm": 0.47940960331983234, "learning_rate": 3.436874831035415e-05, "loss": 0.4592, "step": 39185 }, { "epoch": 1.1442022744193516, "grad_norm": 0.5168376045981584, "learning_rate": 3.4366044876993786e-05, "loss": 0.4489, "step": 39190 }, { "epoch": 1.1443482576896689, "grad_norm": 0.4834552267380426, "learning_rate": 3.436334144363342e-05, "loss": 0.4273, "step": 39195 }, { "epoch": 1.144494240959986, "grad_norm": 0.4608662858854394, "learning_rate": 3.436063801027305e-05, "loss": 0.4371, "step": 39200 }, { "epoch": 1.144640224230303, "grad_norm": 0.4622510503087259, "learning_rate": 3.435793457691268e-05, "loss": 0.4446, "step": 39205 }, { "epoch": 1.1447862075006205, "grad_norm": 0.49507439057448493, "learning_rate": 3.4355231143552316e-05, "loss": 0.4223, "step": 39210 }, { "epoch": 1.1449321907709376, "grad_norm": 0.5425851902763663, "learning_rate": 3.435252771019194e-05, "loss": 0.4525, "step": 39215 }, { "epoch": 1.1450781740412548, "grad_norm": 0.4794810576547758, "learning_rate": 3.434982427683158e-05, "loss": 0.4565, "step": 39220 }, { "epoch": 1.145224157311572, "grad_norm": 0.4922145442033923, "learning_rate": 3.434712084347121e-05, "loss": 0.4541, "step": 39225 }, { "epoch": 1.1453701405818892, "grad_norm": 0.5023857921404822, "learning_rate": 3.434441741011084e-05, "loss": 0.4491, "step": 39230 }, { "epoch": 1.1455161238522065, "grad_norm": 0.48603495407977704, "learning_rate": 3.434171397675047e-05, "loss": 0.4485, "step": 39235 }, { "epoch": 1.1456621071225237, "grad_norm": 0.4408552033473463, "learning_rate": 3.4339010543390106e-05, "loss": 0.4371, "step": 39240 }, { "epoch": 1.145808090392841, "grad_norm": 0.4492443860536996, "learning_rate": 3.433630711002974e-05, "loss": 0.4412, "step": 39245 }, { "epoch": 1.1459540736631582, "grad_norm": 0.4404914378063166, "learning_rate": 3.4333603676669374e-05, "loss": 0.4326, "step": 39250 }, { "epoch": 1.1461000569334754, "grad_norm": 0.5200372801332502, "learning_rate": 3.433090024330901e-05, "loss": 0.4472, "step": 39255 }, { "epoch": 1.1462460402037926, "grad_norm": 0.5049101379942573, "learning_rate": 3.4328196809948635e-05, "loss": 0.4557, "step": 39260 }, { "epoch": 1.1463920234741098, "grad_norm": 0.4754392270766317, "learning_rate": 3.432549337658827e-05, "loss": 0.4429, "step": 39265 }, { "epoch": 1.146538006744427, "grad_norm": 0.45370785393781055, "learning_rate": 3.4322789943227903e-05, "loss": 0.4297, "step": 39270 }, { "epoch": 1.1466839900147443, "grad_norm": 0.5077455610903688, "learning_rate": 3.432008650986753e-05, "loss": 0.4188, "step": 39275 }, { "epoch": 1.1468299732850615, "grad_norm": 0.5005075437247867, "learning_rate": 3.4317383076507165e-05, "loss": 0.4495, "step": 39280 }, { "epoch": 1.1469759565553788, "grad_norm": 0.419039998537127, "learning_rate": 3.43146796431468e-05, "loss": 0.3986, "step": 39285 }, { "epoch": 1.147121939825696, "grad_norm": 0.48376120729652816, "learning_rate": 3.4311976209786426e-05, "loss": 0.4146, "step": 39290 }, { "epoch": 1.1472679230960132, "grad_norm": 0.4818427266129403, "learning_rate": 3.430927277642606e-05, "loss": 0.4249, "step": 39295 }, { "epoch": 1.1474139063663304, "grad_norm": 0.5149994557738651, "learning_rate": 3.43065693430657e-05, "loss": 0.4505, "step": 39300 }, { "epoch": 1.1475598896366477, "grad_norm": 0.4820052889930598, "learning_rate": 3.430386590970533e-05, "loss": 0.4659, "step": 39305 }, { "epoch": 1.147705872906965, "grad_norm": 0.4941882685482348, "learning_rate": 3.430116247634496e-05, "loss": 0.463, "step": 39310 }, { "epoch": 1.1478518561772821, "grad_norm": 0.4621734171164182, "learning_rate": 3.4298459042984596e-05, "loss": 0.4559, "step": 39315 }, { "epoch": 1.1479978394475994, "grad_norm": 0.5241985488024367, "learning_rate": 3.429575560962422e-05, "loss": 0.4267, "step": 39320 }, { "epoch": 1.1481438227179166, "grad_norm": 0.4727285435806766, "learning_rate": 3.429305217626386e-05, "loss": 0.4424, "step": 39325 }, { "epoch": 1.1482898059882338, "grad_norm": 0.5220961789475171, "learning_rate": 3.429034874290349e-05, "loss": 0.4511, "step": 39330 }, { "epoch": 1.148435789258551, "grad_norm": 0.4881740027311144, "learning_rate": 3.428764530954312e-05, "loss": 0.43, "step": 39335 }, { "epoch": 1.1485817725288683, "grad_norm": 0.4883365443329996, "learning_rate": 3.428494187618275e-05, "loss": 0.4445, "step": 39340 }, { "epoch": 1.1487277557991855, "grad_norm": 0.48687639764636553, "learning_rate": 3.4282238442822386e-05, "loss": 0.4413, "step": 39345 }, { "epoch": 1.1488737390695025, "grad_norm": 0.5141533389574153, "learning_rate": 3.4279535009462014e-05, "loss": 0.4723, "step": 39350 }, { "epoch": 1.14901972233982, "grad_norm": 0.44284453187799777, "learning_rate": 3.4276831576101655e-05, "loss": 0.4394, "step": 39355 }, { "epoch": 1.149165705610137, "grad_norm": 0.4888489703565556, "learning_rate": 3.427412814274129e-05, "loss": 0.4483, "step": 39360 }, { "epoch": 1.1493116888804542, "grad_norm": 0.6283036175935834, "learning_rate": 3.4271424709380916e-05, "loss": 0.4442, "step": 39365 }, { "epoch": 1.1494576721507714, "grad_norm": 0.4910016039776583, "learning_rate": 3.426872127602055e-05, "loss": 0.4474, "step": 39370 }, { "epoch": 1.1496036554210887, "grad_norm": 0.462511287761287, "learning_rate": 3.4266017842660184e-05, "loss": 0.4394, "step": 39375 }, { "epoch": 1.149749638691406, "grad_norm": 0.47107612884970734, "learning_rate": 3.426331440929981e-05, "loss": 0.4239, "step": 39380 }, { "epoch": 1.1498956219617231, "grad_norm": 0.48468049268176, "learning_rate": 3.4260610975939445e-05, "loss": 0.4537, "step": 39385 }, { "epoch": 1.1500416052320404, "grad_norm": 0.5371008176399048, "learning_rate": 3.425790754257908e-05, "loss": 0.459, "step": 39390 }, { "epoch": 1.1501875885023576, "grad_norm": 0.5093826365326789, "learning_rate": 3.4255204109218706e-05, "loss": 0.4765, "step": 39395 }, { "epoch": 1.1503335717726748, "grad_norm": 0.5105786842543086, "learning_rate": 3.425250067585834e-05, "loss": 0.4431, "step": 39400 }, { "epoch": 1.150479555042992, "grad_norm": 0.5556868841649165, "learning_rate": 3.4249797242497974e-05, "loss": 0.4573, "step": 39405 }, { "epoch": 1.1506255383133093, "grad_norm": 0.5050013253322417, "learning_rate": 3.42470938091376e-05, "loss": 0.4558, "step": 39410 }, { "epoch": 1.1507715215836265, "grad_norm": 0.4984502549549151, "learning_rate": 3.424439037577724e-05, "loss": 0.4519, "step": 39415 }, { "epoch": 1.1509175048539437, "grad_norm": 0.49541216503757735, "learning_rate": 3.424168694241687e-05, "loss": 0.4223, "step": 39420 }, { "epoch": 1.151063488124261, "grad_norm": 0.4910638821181358, "learning_rate": 3.4238983509056504e-05, "loss": 0.4707, "step": 39425 }, { "epoch": 1.1512094713945782, "grad_norm": 0.5242478906268399, "learning_rate": 3.423628007569614e-05, "loss": 0.4519, "step": 39430 }, { "epoch": 1.1513554546648954, "grad_norm": 0.5181243885738935, "learning_rate": 3.423357664233577e-05, "loss": 0.4383, "step": 39435 }, { "epoch": 1.1515014379352126, "grad_norm": 0.5488052207888304, "learning_rate": 3.42308732089754e-05, "loss": 0.4505, "step": 39440 }, { "epoch": 1.1516474212055299, "grad_norm": 0.4531233313320184, "learning_rate": 3.422816977561503e-05, "loss": 0.4294, "step": 39445 }, { "epoch": 1.151793404475847, "grad_norm": 0.44474637071170453, "learning_rate": 3.422546634225467e-05, "loss": 0.4449, "step": 39450 }, { "epoch": 1.1519393877461643, "grad_norm": 0.512149164367829, "learning_rate": 3.4222762908894294e-05, "loss": 0.4403, "step": 39455 }, { "epoch": 1.1520853710164816, "grad_norm": 0.4902586458603293, "learning_rate": 3.422005947553393e-05, "loss": 0.4521, "step": 39460 }, { "epoch": 1.1522313542867988, "grad_norm": 0.4940818534268301, "learning_rate": 3.421735604217356e-05, "loss": 0.4501, "step": 39465 }, { "epoch": 1.152377337557116, "grad_norm": 0.5211695615621693, "learning_rate": 3.4214652608813196e-05, "loss": 0.4645, "step": 39470 }, { "epoch": 1.1525233208274333, "grad_norm": 0.49447825108521787, "learning_rate": 3.421194917545283e-05, "loss": 0.4448, "step": 39475 }, { "epoch": 1.1526693040977505, "grad_norm": 0.4778320259769925, "learning_rate": 3.420924574209246e-05, "loss": 0.4338, "step": 39480 }, { "epoch": 1.1528152873680677, "grad_norm": 0.46988074745241515, "learning_rate": 3.420654230873209e-05, "loss": 0.4515, "step": 39485 }, { "epoch": 1.152961270638385, "grad_norm": 0.47931485856850065, "learning_rate": 3.4203838875371725e-05, "loss": 0.4733, "step": 39490 }, { "epoch": 1.153107253908702, "grad_norm": 0.4676459827580996, "learning_rate": 3.420113544201136e-05, "loss": 0.4332, "step": 39495 }, { "epoch": 1.1532532371790194, "grad_norm": 0.47256970983592567, "learning_rate": 3.419843200865099e-05, "loss": 0.4378, "step": 39500 }, { "epoch": 1.1533992204493364, "grad_norm": 0.47765225545978807, "learning_rate": 3.419572857529062e-05, "loss": 0.434, "step": 39505 }, { "epoch": 1.1535452037196536, "grad_norm": 0.48758022217024327, "learning_rate": 3.4193025141930255e-05, "loss": 0.4286, "step": 39510 }, { "epoch": 1.1536911869899709, "grad_norm": 0.4837164798218771, "learning_rate": 3.419032170856988e-05, "loss": 0.4248, "step": 39515 }, { "epoch": 1.153837170260288, "grad_norm": 0.489591985439747, "learning_rate": 3.4187618275209516e-05, "loss": 0.4324, "step": 39520 }, { "epoch": 1.1539831535306053, "grad_norm": 0.530521621193364, "learning_rate": 3.418491484184915e-05, "loss": 0.4442, "step": 39525 }, { "epoch": 1.1541291368009226, "grad_norm": 0.4533876322951908, "learning_rate": 3.4182211408488784e-05, "loss": 0.4629, "step": 39530 }, { "epoch": 1.1542751200712398, "grad_norm": 0.4664372598526108, "learning_rate": 3.417950797512842e-05, "loss": 0.4325, "step": 39535 }, { "epoch": 1.154421103341557, "grad_norm": 0.4567344519437662, "learning_rate": 3.4176804541768045e-05, "loss": 0.4521, "step": 39540 }, { "epoch": 1.1545670866118742, "grad_norm": 0.449909984794273, "learning_rate": 3.417410110840768e-05, "loss": 0.4507, "step": 39545 }, { "epoch": 1.1547130698821915, "grad_norm": 0.49600059172586686, "learning_rate": 3.417139767504731e-05, "loss": 0.4209, "step": 39550 }, { "epoch": 1.1548590531525087, "grad_norm": 0.46312692079077206, "learning_rate": 3.416869424168694e-05, "loss": 0.4397, "step": 39555 }, { "epoch": 1.155005036422826, "grad_norm": 0.4876547243609625, "learning_rate": 3.4165990808326574e-05, "loss": 0.4467, "step": 39560 }, { "epoch": 1.1551510196931432, "grad_norm": 0.5084583444238865, "learning_rate": 3.416328737496621e-05, "loss": 0.4715, "step": 39565 }, { "epoch": 1.1552970029634604, "grad_norm": 0.5606780263016069, "learning_rate": 3.416058394160584e-05, "loss": 0.4878, "step": 39570 }, { "epoch": 1.1554429862337776, "grad_norm": 0.5344021460443923, "learning_rate": 3.415788050824547e-05, "loss": 0.4358, "step": 39575 }, { "epoch": 1.1555889695040948, "grad_norm": 0.5054646549816899, "learning_rate": 3.4155177074885104e-05, "loss": 0.4304, "step": 39580 }, { "epoch": 1.155734952774412, "grad_norm": 0.4935923963973833, "learning_rate": 3.415247364152474e-05, "loss": 0.4666, "step": 39585 }, { "epoch": 1.1558809360447293, "grad_norm": 0.4915206364341364, "learning_rate": 3.414977020816437e-05, "loss": 0.4575, "step": 39590 }, { "epoch": 1.1560269193150465, "grad_norm": 0.45283341899819707, "learning_rate": 3.4147066774804006e-05, "loss": 0.4667, "step": 39595 }, { "epoch": 1.1561729025853638, "grad_norm": 0.4579768452858763, "learning_rate": 3.414436334144363e-05, "loss": 0.4038, "step": 39600 }, { "epoch": 1.156318885855681, "grad_norm": 0.4754992235533084, "learning_rate": 3.414165990808327e-05, "loss": 0.4555, "step": 39605 }, { "epoch": 1.1564648691259982, "grad_norm": 0.47408198064270823, "learning_rate": 3.41389564747229e-05, "loss": 0.438, "step": 39610 }, { "epoch": 1.1566108523963154, "grad_norm": 0.42221693529457605, "learning_rate": 3.413625304136253e-05, "loss": 0.4321, "step": 39615 }, { "epoch": 1.1567568356666327, "grad_norm": 0.4862989744158984, "learning_rate": 3.413354960800216e-05, "loss": 0.4294, "step": 39620 }, { "epoch": 1.15690281893695, "grad_norm": 0.5432629511472953, "learning_rate": 3.4130846174641796e-05, "loss": 0.4612, "step": 39625 }, { "epoch": 1.1570488022072671, "grad_norm": 0.547592726086427, "learning_rate": 3.412814274128143e-05, "loss": 0.4639, "step": 39630 }, { "epoch": 1.1571947854775844, "grad_norm": 0.48194910740256647, "learning_rate": 3.412543930792106e-05, "loss": 0.4545, "step": 39635 }, { "epoch": 1.1573407687479014, "grad_norm": 0.4533492259274199, "learning_rate": 3.41227358745607e-05, "loss": 0.4212, "step": 39640 }, { "epoch": 1.1574867520182188, "grad_norm": 0.46744591202834446, "learning_rate": 3.4120032441200326e-05, "loss": 0.4286, "step": 39645 }, { "epoch": 1.1576327352885358, "grad_norm": 0.4583766050135631, "learning_rate": 3.411732900783996e-05, "loss": 0.4468, "step": 39650 }, { "epoch": 1.157778718558853, "grad_norm": 0.4479017058843514, "learning_rate": 3.4114625574479594e-05, "loss": 0.44, "step": 39655 }, { "epoch": 1.1579247018291703, "grad_norm": 0.46935083088932783, "learning_rate": 3.411192214111922e-05, "loss": 0.4548, "step": 39660 }, { "epoch": 1.1580706850994875, "grad_norm": 0.4753425346066539, "learning_rate": 3.4109218707758855e-05, "loss": 0.455, "step": 39665 }, { "epoch": 1.1582166683698047, "grad_norm": 0.46988171761138653, "learning_rate": 3.410651527439849e-05, "loss": 0.4418, "step": 39670 }, { "epoch": 1.158362651640122, "grad_norm": 0.5027060981809286, "learning_rate": 3.4103811841038116e-05, "loss": 0.4308, "step": 39675 }, { "epoch": 1.1585086349104392, "grad_norm": 0.48252554372202383, "learning_rate": 3.410110840767775e-05, "loss": 0.4264, "step": 39680 }, { "epoch": 1.1586546181807564, "grad_norm": 0.5124895381418868, "learning_rate": 3.4098404974317384e-05, "loss": 0.465, "step": 39685 }, { "epoch": 1.1588006014510737, "grad_norm": 0.5139012927402147, "learning_rate": 3.409570154095701e-05, "loss": 0.4499, "step": 39690 }, { "epoch": 1.158946584721391, "grad_norm": 0.4630949515204297, "learning_rate": 3.409299810759665e-05, "loss": 0.4349, "step": 39695 }, { "epoch": 1.1590925679917081, "grad_norm": 0.4577414013424107, "learning_rate": 3.4090294674236286e-05, "loss": 0.4214, "step": 39700 }, { "epoch": 1.1592385512620254, "grad_norm": 0.4923052211938711, "learning_rate": 3.408759124087591e-05, "loss": 0.4463, "step": 39705 }, { "epoch": 1.1593845345323426, "grad_norm": 0.4966414610657298, "learning_rate": 3.408488780751555e-05, "loss": 0.4693, "step": 39710 }, { "epoch": 1.1595305178026598, "grad_norm": 0.4513011956568562, "learning_rate": 3.408218437415518e-05, "loss": 0.449, "step": 39715 }, { "epoch": 1.159676501072977, "grad_norm": 0.5221364367376438, "learning_rate": 3.407948094079481e-05, "loss": 0.4645, "step": 39720 }, { "epoch": 1.1598224843432943, "grad_norm": 0.473509827380018, "learning_rate": 3.407677750743444e-05, "loss": 0.4517, "step": 39725 }, { "epoch": 1.1599684676136115, "grad_norm": 0.4867751989448627, "learning_rate": 3.4074074074074077e-05, "loss": 0.4281, "step": 39730 }, { "epoch": 1.1601144508839287, "grad_norm": 0.47231524534681324, "learning_rate": 3.4071370640713704e-05, "loss": 0.4639, "step": 39735 }, { "epoch": 1.160260434154246, "grad_norm": 0.4870696545813601, "learning_rate": 3.406866720735334e-05, "loss": 0.4438, "step": 39740 }, { "epoch": 1.1604064174245632, "grad_norm": 0.4782262402776441, "learning_rate": 3.406596377399297e-05, "loss": 0.4657, "step": 39745 }, { "epoch": 1.1605524006948804, "grad_norm": 0.5312984648905207, "learning_rate": 3.40632603406326e-05, "loss": 0.4338, "step": 39750 }, { "epoch": 1.1606983839651976, "grad_norm": 0.5118531481673956, "learning_rate": 3.406055690727224e-05, "loss": 0.4308, "step": 39755 }, { "epoch": 1.1608443672355149, "grad_norm": 0.4987706497492304, "learning_rate": 3.4057853473911874e-05, "loss": 0.4503, "step": 39760 }, { "epoch": 1.160990350505832, "grad_norm": 0.4736728365652066, "learning_rate": 3.40551500405515e-05, "loss": 0.4325, "step": 39765 }, { "epoch": 1.1611363337761493, "grad_norm": 0.4831073888971117, "learning_rate": 3.4052446607191135e-05, "loss": 0.4435, "step": 39770 }, { "epoch": 1.1612823170464666, "grad_norm": 0.4687393246196871, "learning_rate": 3.404974317383077e-05, "loss": 0.4358, "step": 39775 }, { "epoch": 1.1614283003167838, "grad_norm": 0.507099883092318, "learning_rate": 3.4047039740470396e-05, "loss": 0.4348, "step": 39780 }, { "epoch": 1.1615742835871008, "grad_norm": 0.48635789612558966, "learning_rate": 3.404433630711003e-05, "loss": 0.4188, "step": 39785 }, { "epoch": 1.1617202668574182, "grad_norm": 0.49710128607088166, "learning_rate": 3.4041632873749664e-05, "loss": 0.4312, "step": 39790 }, { "epoch": 1.1618662501277353, "grad_norm": 0.4805107295654449, "learning_rate": 3.403892944038929e-05, "loss": 0.4459, "step": 39795 }, { "epoch": 1.1620122333980527, "grad_norm": 0.49751305422089676, "learning_rate": 3.4036226007028926e-05, "loss": 0.4562, "step": 39800 }, { "epoch": 1.1621582166683697, "grad_norm": 0.5152491109447155, "learning_rate": 3.403352257366856e-05, "loss": 0.4473, "step": 39805 }, { "epoch": 1.162304199938687, "grad_norm": 0.4659389125453737, "learning_rate": 3.4030819140308194e-05, "loss": 0.4731, "step": 39810 }, { "epoch": 1.1624501832090042, "grad_norm": 0.4758734825236468, "learning_rate": 3.402811570694783e-05, "loss": 0.4467, "step": 39815 }, { "epoch": 1.1625961664793214, "grad_norm": 0.49737894792613807, "learning_rate": 3.402541227358746e-05, "loss": 0.4618, "step": 39820 }, { "epoch": 1.1627421497496386, "grad_norm": 0.4671243946681425, "learning_rate": 3.402270884022709e-05, "loss": 0.4418, "step": 39825 }, { "epoch": 1.1628881330199559, "grad_norm": 0.43636143488092355, "learning_rate": 3.402000540686672e-05, "loss": 0.4527, "step": 39830 }, { "epoch": 1.163034116290273, "grad_norm": 0.5136354701896034, "learning_rate": 3.401730197350636e-05, "loss": 0.4552, "step": 39835 }, { "epoch": 1.1631800995605903, "grad_norm": 0.4902031071934303, "learning_rate": 3.4014598540145984e-05, "loss": 0.4669, "step": 39840 }, { "epoch": 1.1633260828309075, "grad_norm": 0.43134136884181357, "learning_rate": 3.401189510678562e-05, "loss": 0.4408, "step": 39845 }, { "epoch": 1.1634720661012248, "grad_norm": 0.5036219205102819, "learning_rate": 3.400919167342525e-05, "loss": 0.4292, "step": 39850 }, { "epoch": 1.163618049371542, "grad_norm": 0.5101453932309429, "learning_rate": 3.400648824006488e-05, "loss": 0.4496, "step": 39855 }, { "epoch": 1.1637640326418592, "grad_norm": 0.5028160171088653, "learning_rate": 3.4003784806704513e-05, "loss": 0.4733, "step": 39860 }, { "epoch": 1.1639100159121765, "grad_norm": 0.4370662872642784, "learning_rate": 3.4001081373344154e-05, "loss": 0.4358, "step": 39865 }, { "epoch": 1.1640559991824937, "grad_norm": 0.47149895475644654, "learning_rate": 3.399837793998378e-05, "loss": 0.4206, "step": 39870 }, { "epoch": 1.164201982452811, "grad_norm": 0.4990595587320204, "learning_rate": 3.3995674506623416e-05, "loss": 0.4783, "step": 39875 }, { "epoch": 1.1643479657231282, "grad_norm": 0.4678901422925371, "learning_rate": 3.399297107326305e-05, "loss": 0.4686, "step": 39880 }, { "epoch": 1.1644939489934454, "grad_norm": 0.5142816480327222, "learning_rate": 3.399026763990268e-05, "loss": 0.4613, "step": 39885 }, { "epoch": 1.1646399322637626, "grad_norm": 0.5052350404834243, "learning_rate": 3.398756420654231e-05, "loss": 0.4728, "step": 39890 }, { "epoch": 1.1647859155340798, "grad_norm": 0.48602693676070907, "learning_rate": 3.3984860773181945e-05, "loss": 0.4557, "step": 39895 }, { "epoch": 1.164931898804397, "grad_norm": 0.49295456350158867, "learning_rate": 3.398215733982157e-05, "loss": 0.4428, "step": 39900 }, { "epoch": 1.1650778820747143, "grad_norm": 0.5058665717157365, "learning_rate": 3.3979453906461206e-05, "loss": 0.4541, "step": 39905 }, { "epoch": 1.1652238653450315, "grad_norm": 0.510424323834722, "learning_rate": 3.397675047310084e-05, "loss": 0.4705, "step": 39910 }, { "epoch": 1.1653698486153488, "grad_norm": 0.47400400243008084, "learning_rate": 3.397404703974047e-05, "loss": 0.4382, "step": 39915 }, { "epoch": 1.165515831885666, "grad_norm": 0.47115122215800337, "learning_rate": 3.39713436063801e-05, "loss": 0.4369, "step": 39920 }, { "epoch": 1.1656618151559832, "grad_norm": 0.4623655768290321, "learning_rate": 3.396864017301974e-05, "loss": 0.4615, "step": 39925 }, { "epoch": 1.1658077984263002, "grad_norm": 0.4501801785110059, "learning_rate": 3.396593673965937e-05, "loss": 0.4578, "step": 39930 }, { "epoch": 1.1659537816966177, "grad_norm": 0.4882569222136237, "learning_rate": 3.3963233306299e-05, "loss": 0.4531, "step": 39935 }, { "epoch": 1.1660997649669347, "grad_norm": 0.4557537218024334, "learning_rate": 3.396052987293864e-05, "loss": 0.4001, "step": 39940 }, { "epoch": 1.1662457482372521, "grad_norm": 0.48270995971742314, "learning_rate": 3.3957826439578265e-05, "loss": 0.4461, "step": 39945 }, { "epoch": 1.1663917315075691, "grad_norm": 0.5237989599384258, "learning_rate": 3.39551230062179e-05, "loss": 0.4514, "step": 39950 }, { "epoch": 1.1665377147778864, "grad_norm": 0.5257290068213875, "learning_rate": 3.395241957285753e-05, "loss": 0.4781, "step": 39955 }, { "epoch": 1.1666836980482036, "grad_norm": 0.5028927312344361, "learning_rate": 3.394971613949716e-05, "loss": 0.4551, "step": 39960 }, { "epoch": 1.1668296813185208, "grad_norm": 0.4716570908413221, "learning_rate": 3.3947012706136794e-05, "loss": 0.4529, "step": 39965 }, { "epoch": 1.166975664588838, "grad_norm": 0.5155896245836187, "learning_rate": 3.394430927277643e-05, "loss": 0.4458, "step": 39970 }, { "epoch": 1.1671216478591553, "grad_norm": 0.46460967407587717, "learning_rate": 3.3941605839416055e-05, "loss": 0.4235, "step": 39975 }, { "epoch": 1.1672676311294725, "grad_norm": 0.4386149366886866, "learning_rate": 3.3938902406055696e-05, "loss": 0.4573, "step": 39980 }, { "epoch": 1.1674136143997897, "grad_norm": 0.4925427442151363, "learning_rate": 3.393619897269533e-05, "loss": 0.4626, "step": 39985 }, { "epoch": 1.167559597670107, "grad_norm": 0.5157139509094436, "learning_rate": 3.393349553933496e-05, "loss": 0.4875, "step": 39990 }, { "epoch": 1.1677055809404242, "grad_norm": 0.47118264371763935, "learning_rate": 3.393079210597459e-05, "loss": 0.4098, "step": 39995 }, { "epoch": 1.1678515642107414, "grad_norm": 0.4761315081946747, "learning_rate": 3.3928088672614225e-05, "loss": 0.4377, "step": 40000 }, { "epoch": 1.1679975474810587, "grad_norm": 0.46693362111564857, "learning_rate": 3.392538523925385e-05, "loss": 0.4385, "step": 40005 }, { "epoch": 1.168143530751376, "grad_norm": 0.49360958730591603, "learning_rate": 3.3922681805893486e-05, "loss": 0.4419, "step": 40010 }, { "epoch": 1.1682895140216931, "grad_norm": 0.4994310404227237, "learning_rate": 3.391997837253312e-05, "loss": 0.4582, "step": 40015 }, { "epoch": 1.1684354972920103, "grad_norm": 0.5275525975177444, "learning_rate": 3.391727493917275e-05, "loss": 0.4095, "step": 40020 }, { "epoch": 1.1685814805623276, "grad_norm": 0.4890250073066056, "learning_rate": 3.391457150581238e-05, "loss": 0.4565, "step": 40025 }, { "epoch": 1.1687274638326448, "grad_norm": 0.4640652866882105, "learning_rate": 3.3911868072452016e-05, "loss": 0.4436, "step": 40030 }, { "epoch": 1.168873447102962, "grad_norm": 0.47818307113119346, "learning_rate": 3.390916463909165e-05, "loss": 0.4631, "step": 40035 }, { "epoch": 1.1690194303732793, "grad_norm": 0.5325853567517155, "learning_rate": 3.3906461205731284e-05, "loss": 0.4731, "step": 40040 }, { "epoch": 1.1691654136435965, "grad_norm": 0.4913139631936013, "learning_rate": 3.390375777237092e-05, "loss": 0.53, "step": 40045 }, { "epoch": 1.1693113969139137, "grad_norm": 0.5353420850920403, "learning_rate": 3.3901054339010545e-05, "loss": 0.4479, "step": 40050 }, { "epoch": 1.169457380184231, "grad_norm": 0.46898012713681014, "learning_rate": 3.389835090565018e-05, "loss": 0.4533, "step": 40055 }, { "epoch": 1.1696033634545482, "grad_norm": 0.48808630503480943, "learning_rate": 3.389564747228981e-05, "loss": 0.4519, "step": 40060 }, { "epoch": 1.1697493467248654, "grad_norm": 0.4876526719463031, "learning_rate": 3.389294403892944e-05, "loss": 0.4425, "step": 40065 }, { "epoch": 1.1698953299951826, "grad_norm": 0.5107152272098577, "learning_rate": 3.3890240605569074e-05, "loss": 0.4756, "step": 40070 }, { "epoch": 1.1700413132654996, "grad_norm": 0.5110600614527017, "learning_rate": 3.388753717220871e-05, "loss": 0.4457, "step": 40075 }, { "epoch": 1.170187296535817, "grad_norm": 0.4800163037545323, "learning_rate": 3.3884833738848335e-05, "loss": 0.4539, "step": 40080 }, { "epoch": 1.170333279806134, "grad_norm": 0.5354966342621909, "learning_rate": 3.388213030548797e-05, "loss": 0.4645, "step": 40085 }, { "epoch": 1.1704792630764516, "grad_norm": 0.5184518015337582, "learning_rate": 3.3879426872127603e-05, "loss": 0.4473, "step": 40090 }, { "epoch": 1.1706252463467686, "grad_norm": 0.45826927137600765, "learning_rate": 3.387672343876724e-05, "loss": 0.4574, "step": 40095 }, { "epoch": 1.1707712296170858, "grad_norm": 0.48368611312795984, "learning_rate": 3.387402000540687e-05, "loss": 0.4644, "step": 40100 }, { "epoch": 1.170917212887403, "grad_norm": 0.4605391330656475, "learning_rate": 3.38713165720465e-05, "loss": 0.4473, "step": 40105 }, { "epoch": 1.1710631961577203, "grad_norm": 0.4615011582026954, "learning_rate": 3.386861313868613e-05, "loss": 0.4516, "step": 40110 }, { "epoch": 1.1712091794280375, "grad_norm": 0.4386400188234843, "learning_rate": 3.386590970532577e-05, "loss": 0.4453, "step": 40115 }, { "epoch": 1.1713551626983547, "grad_norm": 0.5047421483549116, "learning_rate": 3.38632062719654e-05, "loss": 0.4763, "step": 40120 }, { "epoch": 1.171501145968672, "grad_norm": 0.49651553459621883, "learning_rate": 3.386050283860503e-05, "loss": 0.4309, "step": 40125 }, { "epoch": 1.1716471292389892, "grad_norm": 0.5379536190303983, "learning_rate": 3.385779940524466e-05, "loss": 0.4597, "step": 40130 }, { "epoch": 1.1717931125093064, "grad_norm": 0.5509882190869625, "learning_rate": 3.3855095971884296e-05, "loss": 0.4411, "step": 40135 }, { "epoch": 1.1719390957796236, "grad_norm": 0.4541714180546545, "learning_rate": 3.385239253852392e-05, "loss": 0.4115, "step": 40140 }, { "epoch": 1.1720850790499409, "grad_norm": 0.4742009342275473, "learning_rate": 3.384968910516356e-05, "loss": 0.4371, "step": 40145 }, { "epoch": 1.172231062320258, "grad_norm": 0.4738187896151064, "learning_rate": 3.384698567180319e-05, "loss": 0.4266, "step": 40150 }, { "epoch": 1.1723770455905753, "grad_norm": 0.47230995382765334, "learning_rate": 3.3844282238442825e-05, "loss": 0.44, "step": 40155 }, { "epoch": 1.1725230288608925, "grad_norm": 0.45685782653030416, "learning_rate": 3.384157880508246e-05, "loss": 0.4578, "step": 40160 }, { "epoch": 1.1726690121312098, "grad_norm": 0.5021109567563975, "learning_rate": 3.3838875371722087e-05, "loss": 0.4865, "step": 40165 }, { "epoch": 1.172814995401527, "grad_norm": 0.4848145943549633, "learning_rate": 3.383617193836172e-05, "loss": 0.434, "step": 40170 }, { "epoch": 1.1729609786718442, "grad_norm": 0.4826317767215168, "learning_rate": 3.3833468505001355e-05, "loss": 0.4407, "step": 40175 }, { "epoch": 1.1731069619421615, "grad_norm": 0.4972058748811621, "learning_rate": 3.383076507164099e-05, "loss": 0.4611, "step": 40180 }, { "epoch": 1.1732529452124787, "grad_norm": 0.4829266162777172, "learning_rate": 3.3828061638280616e-05, "loss": 0.4386, "step": 40185 }, { "epoch": 1.173398928482796, "grad_norm": 0.5229478370078293, "learning_rate": 3.382535820492025e-05, "loss": 0.4476, "step": 40190 }, { "epoch": 1.1735449117531132, "grad_norm": 0.5225159007743972, "learning_rate": 3.3822654771559884e-05, "loss": 0.4575, "step": 40195 }, { "epoch": 1.1736908950234304, "grad_norm": 0.46315382148033757, "learning_rate": 3.381995133819951e-05, "loss": 0.4225, "step": 40200 }, { "epoch": 1.1738368782937476, "grad_norm": 0.513965674080365, "learning_rate": 3.381724790483915e-05, "loss": 0.4654, "step": 40205 }, { "epoch": 1.1739828615640648, "grad_norm": 0.46192477245012487, "learning_rate": 3.381454447147878e-05, "loss": 0.4497, "step": 40210 }, { "epoch": 1.174128844834382, "grad_norm": 0.4831447949863435, "learning_rate": 3.381184103811841e-05, "loss": 0.438, "step": 40215 }, { "epoch": 1.1742748281046993, "grad_norm": 0.5155747205133028, "learning_rate": 3.380913760475805e-05, "loss": 0.4389, "step": 40220 }, { "epoch": 1.1744208113750165, "grad_norm": 0.5175570830743766, "learning_rate": 3.3806434171397674e-05, "loss": 0.4542, "step": 40225 }, { "epoch": 1.1745667946453335, "grad_norm": 0.4828964682308049, "learning_rate": 3.380373073803731e-05, "loss": 0.432, "step": 40230 }, { "epoch": 1.174712777915651, "grad_norm": 0.4756805299196469, "learning_rate": 3.380102730467694e-05, "loss": 0.4275, "step": 40235 }, { "epoch": 1.174858761185968, "grad_norm": 0.5043036180799134, "learning_rate": 3.379832387131657e-05, "loss": 0.4346, "step": 40240 }, { "epoch": 1.1750047444562852, "grad_norm": 0.4902871773020178, "learning_rate": 3.3795620437956204e-05, "loss": 0.4535, "step": 40245 }, { "epoch": 1.1751507277266025, "grad_norm": 0.47891725728530915, "learning_rate": 3.379291700459584e-05, "loss": 0.4512, "step": 40250 }, { "epoch": 1.1752967109969197, "grad_norm": 0.4893913129455175, "learning_rate": 3.379021357123547e-05, "loss": 0.439, "step": 40255 }, { "epoch": 1.175442694267237, "grad_norm": 0.4974054836172575, "learning_rate": 3.37875101378751e-05, "loss": 0.4494, "step": 40260 }, { "epoch": 1.1755886775375541, "grad_norm": 0.4828714883011629, "learning_rate": 3.378480670451474e-05, "loss": 0.4366, "step": 40265 }, { "epoch": 1.1757346608078714, "grad_norm": 0.4766924976302322, "learning_rate": 3.378210327115437e-05, "loss": 0.4385, "step": 40270 }, { "epoch": 1.1758806440781886, "grad_norm": 0.47868588643546056, "learning_rate": 3.3779399837794e-05, "loss": 0.4585, "step": 40275 }, { "epoch": 1.1760266273485058, "grad_norm": 0.514456044910208, "learning_rate": 3.3776696404433635e-05, "loss": 0.45, "step": 40280 }, { "epoch": 1.176172610618823, "grad_norm": 0.4773076326194459, "learning_rate": 3.377399297107326e-05, "loss": 0.4448, "step": 40285 }, { "epoch": 1.1763185938891403, "grad_norm": 0.4923561447002844, "learning_rate": 3.3771289537712896e-05, "loss": 0.4638, "step": 40290 }, { "epoch": 1.1764645771594575, "grad_norm": 0.4996277036421547, "learning_rate": 3.376858610435253e-05, "loss": 0.4558, "step": 40295 }, { "epoch": 1.1766105604297747, "grad_norm": 0.504801594504238, "learning_rate": 3.376588267099216e-05, "loss": 0.453, "step": 40300 }, { "epoch": 1.176756543700092, "grad_norm": 0.4875785507502831, "learning_rate": 3.376317923763179e-05, "loss": 0.4209, "step": 40305 }, { "epoch": 1.1769025269704092, "grad_norm": 0.47955927289959105, "learning_rate": 3.3760475804271425e-05, "loss": 0.4309, "step": 40310 }, { "epoch": 1.1770485102407264, "grad_norm": 0.4760563574366559, "learning_rate": 3.375777237091106e-05, "loss": 0.4486, "step": 40315 }, { "epoch": 1.1771944935110437, "grad_norm": 0.5165430431438952, "learning_rate": 3.3755068937550693e-05, "loss": 0.4253, "step": 40320 }, { "epoch": 1.177340476781361, "grad_norm": 0.50232417536883, "learning_rate": 3.375236550419033e-05, "loss": 0.4234, "step": 40325 }, { "epoch": 1.1774864600516781, "grad_norm": 0.4935761208859985, "learning_rate": 3.3749662070829955e-05, "loss": 0.4629, "step": 40330 }, { "epoch": 1.1776324433219953, "grad_norm": 0.4996406341238638, "learning_rate": 3.374695863746959e-05, "loss": 0.4324, "step": 40335 }, { "epoch": 1.1777784265923126, "grad_norm": 0.5047443343178066, "learning_rate": 3.374425520410922e-05, "loss": 0.4472, "step": 40340 }, { "epoch": 1.1779244098626298, "grad_norm": 0.5250545523298793, "learning_rate": 3.374155177074885e-05, "loss": 0.4577, "step": 40345 }, { "epoch": 1.178070393132947, "grad_norm": 0.4806605922465151, "learning_rate": 3.3738848337388484e-05, "loss": 0.4494, "step": 40350 }, { "epoch": 1.1782163764032643, "grad_norm": 0.5086050533648908, "learning_rate": 3.373614490402812e-05, "loss": 0.4572, "step": 40355 }, { "epoch": 1.1783623596735815, "grad_norm": 0.4667164404981502, "learning_rate": 3.3733441470667745e-05, "loss": 0.4228, "step": 40360 }, { "epoch": 1.1785083429438987, "grad_norm": 0.48779339943725675, "learning_rate": 3.373073803730738e-05, "loss": 0.442, "step": 40365 }, { "epoch": 1.178654326214216, "grad_norm": 0.4822939817109956, "learning_rate": 3.372803460394701e-05, "loss": 0.4215, "step": 40370 }, { "epoch": 1.178800309484533, "grad_norm": 0.4987833657836873, "learning_rate": 3.372533117058665e-05, "loss": 0.4589, "step": 40375 }, { "epoch": 1.1789462927548504, "grad_norm": 0.468086201348507, "learning_rate": 3.372262773722628e-05, "loss": 0.4333, "step": 40380 }, { "epoch": 1.1790922760251674, "grad_norm": 0.48820898957020825, "learning_rate": 3.3719924303865915e-05, "loss": 0.4523, "step": 40385 }, { "epoch": 1.1792382592954846, "grad_norm": 0.489137394612644, "learning_rate": 3.371722087050554e-05, "loss": 0.4429, "step": 40390 }, { "epoch": 1.1793842425658019, "grad_norm": 0.48327604003355956, "learning_rate": 3.3714517437145177e-05, "loss": 0.4357, "step": 40395 }, { "epoch": 1.179530225836119, "grad_norm": 0.4433936998724374, "learning_rate": 3.371181400378481e-05, "loss": 0.4151, "step": 40400 }, { "epoch": 1.1796762091064363, "grad_norm": 0.4638840480968748, "learning_rate": 3.370911057042444e-05, "loss": 0.443, "step": 40405 }, { "epoch": 1.1798221923767536, "grad_norm": 0.47881685972162924, "learning_rate": 3.370640713706407e-05, "loss": 0.471, "step": 40410 }, { "epoch": 1.1799681756470708, "grad_norm": 0.5235228609585033, "learning_rate": 3.3703703703703706e-05, "loss": 0.4746, "step": 40415 }, { "epoch": 1.180114158917388, "grad_norm": 0.4968946633941501, "learning_rate": 3.370100027034333e-05, "loss": 0.4191, "step": 40420 }, { "epoch": 1.1802601421877053, "grad_norm": 0.5005021636483094, "learning_rate": 3.369829683698297e-05, "loss": 0.452, "step": 40425 }, { "epoch": 1.1804061254580225, "grad_norm": 0.516188901273371, "learning_rate": 3.369559340362261e-05, "loss": 0.435, "step": 40430 }, { "epoch": 1.1805521087283397, "grad_norm": 0.4848609684197025, "learning_rate": 3.3692889970262235e-05, "loss": 0.4226, "step": 40435 }, { "epoch": 1.180698091998657, "grad_norm": 0.5110645800789151, "learning_rate": 3.369018653690187e-05, "loss": 0.4367, "step": 40440 }, { "epoch": 1.1808440752689742, "grad_norm": 0.4447730445943394, "learning_rate": 3.36874831035415e-05, "loss": 0.4188, "step": 40445 }, { "epoch": 1.1809900585392914, "grad_norm": 0.5145742863851075, "learning_rate": 3.368477967018113e-05, "loss": 0.4918, "step": 40450 }, { "epoch": 1.1811360418096086, "grad_norm": 0.489362190914775, "learning_rate": 3.3682076236820764e-05, "loss": 0.4451, "step": 40455 }, { "epoch": 1.1812820250799259, "grad_norm": 0.508591602954986, "learning_rate": 3.36793728034604e-05, "loss": 0.4343, "step": 40460 }, { "epoch": 1.181428008350243, "grad_norm": 0.4463287552479713, "learning_rate": 3.3676669370100026e-05, "loss": 0.4377, "step": 40465 }, { "epoch": 1.1815739916205603, "grad_norm": 0.48007804871982457, "learning_rate": 3.367396593673966e-05, "loss": 0.4367, "step": 40470 }, { "epoch": 1.1817199748908775, "grad_norm": 0.5077570796142955, "learning_rate": 3.3671262503379294e-05, "loss": 0.4263, "step": 40475 }, { "epoch": 1.1818659581611948, "grad_norm": 0.5086746779849955, "learning_rate": 3.366855907001892e-05, "loss": 0.4511, "step": 40480 }, { "epoch": 1.182011941431512, "grad_norm": 0.48752830860543667, "learning_rate": 3.3665855636658555e-05, "loss": 0.4456, "step": 40485 }, { "epoch": 1.1821579247018292, "grad_norm": 0.46442696166204983, "learning_rate": 3.3663152203298196e-05, "loss": 0.4404, "step": 40490 }, { "epoch": 1.1823039079721465, "grad_norm": 0.5393487789554016, "learning_rate": 3.366044876993782e-05, "loss": 0.4439, "step": 40495 }, { "epoch": 1.1824498912424637, "grad_norm": 0.44729191948217023, "learning_rate": 3.365774533657746e-05, "loss": 0.4641, "step": 40500 }, { "epoch": 1.182595874512781, "grad_norm": 0.5154493856814186, "learning_rate": 3.365504190321709e-05, "loss": 0.4451, "step": 40505 }, { "epoch": 1.1827418577830981, "grad_norm": 0.46901812157389644, "learning_rate": 3.365233846985672e-05, "loss": 0.418, "step": 40510 }, { "epoch": 1.1828878410534154, "grad_norm": 0.519176850415783, "learning_rate": 3.364963503649635e-05, "loss": 0.471, "step": 40515 }, { "epoch": 1.1830338243237324, "grad_norm": 0.5080069962960271, "learning_rate": 3.3646931603135986e-05, "loss": 0.4397, "step": 40520 }, { "epoch": 1.1831798075940498, "grad_norm": 0.42552327489740516, "learning_rate": 3.364422816977561e-05, "loss": 0.4057, "step": 40525 }, { "epoch": 1.1833257908643668, "grad_norm": 0.5122174521513314, "learning_rate": 3.364152473641525e-05, "loss": 0.4586, "step": 40530 }, { "epoch": 1.183471774134684, "grad_norm": 0.4818540813535331, "learning_rate": 3.363882130305488e-05, "loss": 0.4548, "step": 40535 }, { "epoch": 1.1836177574050013, "grad_norm": 0.4838521547489823, "learning_rate": 3.363611786969451e-05, "loss": 0.4599, "step": 40540 }, { "epoch": 1.1837637406753185, "grad_norm": 0.4866551259851189, "learning_rate": 3.363341443633415e-05, "loss": 0.4401, "step": 40545 }, { "epoch": 1.1839097239456358, "grad_norm": 0.48163359077304907, "learning_rate": 3.3630711002973783e-05, "loss": 0.4608, "step": 40550 }, { "epoch": 1.184055707215953, "grad_norm": 0.46383355292172834, "learning_rate": 3.362800756961341e-05, "loss": 0.4559, "step": 40555 }, { "epoch": 1.1842016904862702, "grad_norm": 0.4835677510714611, "learning_rate": 3.3625304136253045e-05, "loss": 0.4606, "step": 40560 }, { "epoch": 1.1843476737565874, "grad_norm": 0.5285010747930412, "learning_rate": 3.362260070289268e-05, "loss": 0.4337, "step": 40565 }, { "epoch": 1.1844936570269047, "grad_norm": 0.4890419679477597, "learning_rate": 3.3619897269532306e-05, "loss": 0.4626, "step": 40570 }, { "epoch": 1.184639640297222, "grad_norm": 0.4930191468254127, "learning_rate": 3.361719383617194e-05, "loss": 0.4808, "step": 40575 }, { "epoch": 1.1847856235675391, "grad_norm": 0.49468542323823717, "learning_rate": 3.3614490402811574e-05, "loss": 0.4591, "step": 40580 }, { "epoch": 1.1849316068378564, "grad_norm": 0.49817808406184283, "learning_rate": 3.36117869694512e-05, "loss": 0.4506, "step": 40585 }, { "epoch": 1.1850775901081736, "grad_norm": 0.491933200888528, "learning_rate": 3.3609083536090835e-05, "loss": 0.4748, "step": 40590 }, { "epoch": 1.1852235733784908, "grad_norm": 0.4645129200551889, "learning_rate": 3.360638010273047e-05, "loss": 0.4363, "step": 40595 }, { "epoch": 1.185369556648808, "grad_norm": 0.4559381610627248, "learning_rate": 3.36036766693701e-05, "loss": 0.4453, "step": 40600 }, { "epoch": 1.1855155399191253, "grad_norm": 0.4912600714598949, "learning_rate": 3.360097323600974e-05, "loss": 0.4529, "step": 40605 }, { "epoch": 1.1856615231894425, "grad_norm": 0.4969385630606886, "learning_rate": 3.359826980264937e-05, "loss": 0.4566, "step": 40610 }, { "epoch": 1.1858075064597597, "grad_norm": 0.48426949296785254, "learning_rate": 3.3595566369289e-05, "loss": 0.459, "step": 40615 }, { "epoch": 1.185953489730077, "grad_norm": 0.5142370507831677, "learning_rate": 3.359286293592863e-05, "loss": 0.4459, "step": 40620 }, { "epoch": 1.1860994730003942, "grad_norm": 0.5108371593277813, "learning_rate": 3.3590159502568266e-05, "loss": 0.4404, "step": 40625 }, { "epoch": 1.1862454562707114, "grad_norm": 0.47844566086107715, "learning_rate": 3.3587456069207894e-05, "loss": 0.4385, "step": 40630 }, { "epoch": 1.1863914395410287, "grad_norm": 0.4630328651155562, "learning_rate": 3.358475263584753e-05, "loss": 0.4528, "step": 40635 }, { "epoch": 1.1865374228113459, "grad_norm": 0.5282628290967659, "learning_rate": 3.358204920248716e-05, "loss": 0.4475, "step": 40640 }, { "epoch": 1.1866834060816631, "grad_norm": 0.46361814997187023, "learning_rate": 3.357934576912679e-05, "loss": 0.4255, "step": 40645 }, { "epoch": 1.1868293893519803, "grad_norm": 0.536195235046482, "learning_rate": 3.357664233576642e-05, "loss": 0.4954, "step": 40650 }, { "epoch": 1.1869753726222976, "grad_norm": 0.47815956137293325, "learning_rate": 3.357393890240606e-05, "loss": 0.4367, "step": 40655 }, { "epoch": 1.1871213558926148, "grad_norm": 0.524731469875013, "learning_rate": 3.357123546904569e-05, "loss": 0.4652, "step": 40660 }, { "epoch": 1.1872673391629318, "grad_norm": 0.4919538969884295, "learning_rate": 3.3568532035685325e-05, "loss": 0.4478, "step": 40665 }, { "epoch": 1.1874133224332493, "grad_norm": 0.4310697187219604, "learning_rate": 3.356582860232496e-05, "loss": 0.4308, "step": 40670 }, { "epoch": 1.1875593057035663, "grad_norm": 0.502064652226165, "learning_rate": 3.3563125168964586e-05, "loss": 0.4468, "step": 40675 }, { "epoch": 1.1877052889738835, "grad_norm": 0.47086155947809355, "learning_rate": 3.356042173560422e-05, "loss": 0.4453, "step": 40680 }, { "epoch": 1.1878512722442007, "grad_norm": 0.4903049298318245, "learning_rate": 3.3557718302243854e-05, "loss": 0.4667, "step": 40685 }, { "epoch": 1.187997255514518, "grad_norm": 0.48628360089339, "learning_rate": 3.355501486888348e-05, "loss": 0.4469, "step": 40690 }, { "epoch": 1.1881432387848352, "grad_norm": 0.4459699631924848, "learning_rate": 3.3552311435523116e-05, "loss": 0.455, "step": 40695 }, { "epoch": 1.1882892220551524, "grad_norm": 0.4737280811471524, "learning_rate": 3.354960800216275e-05, "loss": 0.4581, "step": 40700 }, { "epoch": 1.1884352053254696, "grad_norm": 0.45270053498001045, "learning_rate": 3.354690456880238e-05, "loss": 0.411, "step": 40705 }, { "epoch": 1.1885811885957869, "grad_norm": 0.48316257797602374, "learning_rate": 3.354420113544201e-05, "loss": 0.4224, "step": 40710 }, { "epoch": 1.188727171866104, "grad_norm": 0.529438565114139, "learning_rate": 3.3541497702081645e-05, "loss": 0.4694, "step": 40715 }, { "epoch": 1.1888731551364213, "grad_norm": 0.4753509383147598, "learning_rate": 3.353879426872128e-05, "loss": 0.4596, "step": 40720 }, { "epoch": 1.1890191384067386, "grad_norm": 0.5093150056668871, "learning_rate": 3.353609083536091e-05, "loss": 0.4611, "step": 40725 }, { "epoch": 1.1891651216770558, "grad_norm": 0.45329725181335223, "learning_rate": 3.353338740200054e-05, "loss": 0.424, "step": 40730 }, { "epoch": 1.189311104947373, "grad_norm": 0.4856644279125501, "learning_rate": 3.3530683968640174e-05, "loss": 0.4212, "step": 40735 }, { "epoch": 1.1894570882176903, "grad_norm": 0.5386173576506489, "learning_rate": 3.352798053527981e-05, "loss": 0.4334, "step": 40740 }, { "epoch": 1.1896030714880075, "grad_norm": 0.4737514678865527, "learning_rate": 3.352527710191944e-05, "loss": 0.4453, "step": 40745 }, { "epoch": 1.1897490547583247, "grad_norm": 0.45503115631658264, "learning_rate": 3.352257366855907e-05, "loss": 0.43, "step": 40750 }, { "epoch": 1.189895038028642, "grad_norm": 0.4688116052710788, "learning_rate": 3.35198702351987e-05, "loss": 0.4694, "step": 40755 }, { "epoch": 1.1900410212989592, "grad_norm": 0.48009191982137017, "learning_rate": 3.351716680183834e-05, "loss": 0.4459, "step": 40760 }, { "epoch": 1.1901870045692764, "grad_norm": 0.46150523343706273, "learning_rate": 3.3514463368477965e-05, "loss": 0.4342, "step": 40765 }, { "epoch": 1.1903329878395936, "grad_norm": 0.4684249103530241, "learning_rate": 3.3511759935117605e-05, "loss": 0.4233, "step": 40770 }, { "epoch": 1.1904789711099109, "grad_norm": 0.4977463239783489, "learning_rate": 3.350905650175723e-05, "loss": 0.4459, "step": 40775 }, { "epoch": 1.190624954380228, "grad_norm": 0.5077821779716633, "learning_rate": 3.350635306839687e-05, "loss": 0.4718, "step": 40780 }, { "epoch": 1.1907709376505453, "grad_norm": 0.4726213422241918, "learning_rate": 3.35036496350365e-05, "loss": 0.4555, "step": 40785 }, { "epoch": 1.1909169209208625, "grad_norm": 0.47873420147446105, "learning_rate": 3.350094620167613e-05, "loss": 0.4231, "step": 40790 }, { "epoch": 1.1910629041911798, "grad_norm": 0.4747786114699692, "learning_rate": 3.349824276831576e-05, "loss": 0.4195, "step": 40795 }, { "epoch": 1.191208887461497, "grad_norm": 0.5218839204886145, "learning_rate": 3.3495539334955396e-05, "loss": 0.4963, "step": 40800 }, { "epoch": 1.1913548707318142, "grad_norm": 0.46587922535324605, "learning_rate": 3.349283590159503e-05, "loss": 0.4569, "step": 40805 }, { "epoch": 1.1915008540021312, "grad_norm": 0.4895603184640587, "learning_rate": 3.349013246823466e-05, "loss": 0.4281, "step": 40810 }, { "epoch": 1.1916468372724487, "grad_norm": 0.5226030924617371, "learning_rate": 3.348742903487429e-05, "loss": 0.4562, "step": 40815 }, { "epoch": 1.1917928205427657, "grad_norm": 0.45657646620824166, "learning_rate": 3.3484725601513925e-05, "loss": 0.4352, "step": 40820 }, { "epoch": 1.191938803813083, "grad_norm": 0.48939016455103357, "learning_rate": 3.348202216815355e-05, "loss": 0.4394, "step": 40825 }, { "epoch": 1.1920847870834002, "grad_norm": 0.4593679165554268, "learning_rate": 3.347931873479319e-05, "loss": 0.4399, "step": 40830 }, { "epoch": 1.1922307703537174, "grad_norm": 0.45880383657057977, "learning_rate": 3.347661530143282e-05, "loss": 0.4292, "step": 40835 }, { "epoch": 1.1923767536240346, "grad_norm": 0.4814943789425334, "learning_rate": 3.3473911868072454e-05, "loss": 0.4233, "step": 40840 }, { "epoch": 1.1925227368943518, "grad_norm": 0.5005889055838427, "learning_rate": 3.347120843471209e-05, "loss": 0.4393, "step": 40845 }, { "epoch": 1.192668720164669, "grad_norm": 0.4866376181872701, "learning_rate": 3.3468505001351716e-05, "loss": 0.4399, "step": 40850 }, { "epoch": 1.1928147034349863, "grad_norm": 0.4979877203233723, "learning_rate": 3.346580156799135e-05, "loss": 0.4684, "step": 40855 }, { "epoch": 1.1929606867053035, "grad_norm": 0.530523217098147, "learning_rate": 3.3463098134630984e-05, "loss": 0.4902, "step": 40860 }, { "epoch": 1.1931066699756208, "grad_norm": 0.42843063621048116, "learning_rate": 3.346039470127061e-05, "loss": 0.4255, "step": 40865 }, { "epoch": 1.193252653245938, "grad_norm": 0.5083612283867183, "learning_rate": 3.3457691267910245e-05, "loss": 0.4628, "step": 40870 }, { "epoch": 1.1933986365162552, "grad_norm": 0.4817987388489914, "learning_rate": 3.345498783454988e-05, "loss": 0.4512, "step": 40875 }, { "epoch": 1.1935446197865724, "grad_norm": 0.5405860761475255, "learning_rate": 3.345228440118951e-05, "loss": 0.4529, "step": 40880 }, { "epoch": 1.1936906030568897, "grad_norm": 0.49098594500567855, "learning_rate": 3.344958096782915e-05, "loss": 0.4357, "step": 40885 }, { "epoch": 1.193836586327207, "grad_norm": 0.4788302711051346, "learning_rate": 3.344687753446878e-05, "loss": 0.4483, "step": 40890 }, { "epoch": 1.1939825695975241, "grad_norm": 0.4631031272739592, "learning_rate": 3.344417410110841e-05, "loss": 0.4336, "step": 40895 }, { "epoch": 1.1941285528678414, "grad_norm": 0.4626892546199405, "learning_rate": 3.344147066774804e-05, "loss": 0.4528, "step": 40900 }, { "epoch": 1.1942745361381586, "grad_norm": 0.4707022420237595, "learning_rate": 3.3438767234387676e-05, "loss": 0.4609, "step": 40905 }, { "epoch": 1.1944205194084758, "grad_norm": 0.48587107119813533, "learning_rate": 3.3436063801027303e-05, "loss": 0.4464, "step": 40910 }, { "epoch": 1.194566502678793, "grad_norm": 0.47417427805688395, "learning_rate": 3.343336036766694e-05, "loss": 0.4618, "step": 40915 }, { "epoch": 1.1947124859491103, "grad_norm": 0.48636944900619244, "learning_rate": 3.343065693430657e-05, "loss": 0.4157, "step": 40920 }, { "epoch": 1.1948584692194275, "grad_norm": 0.47167161343170744, "learning_rate": 3.34279535009462e-05, "loss": 0.4269, "step": 40925 }, { "epoch": 1.1950044524897447, "grad_norm": 0.4743841771789526, "learning_rate": 3.342525006758583e-05, "loss": 0.4464, "step": 40930 }, { "epoch": 1.195150435760062, "grad_norm": 0.46027704188209556, "learning_rate": 3.342254663422547e-05, "loss": 0.446, "step": 40935 }, { "epoch": 1.1952964190303792, "grad_norm": 0.5402184107763413, "learning_rate": 3.34198432008651e-05, "loss": 0.4692, "step": 40940 }, { "epoch": 1.1954424023006964, "grad_norm": 0.46942498769410074, "learning_rate": 3.3417139767504735e-05, "loss": 0.4177, "step": 40945 }, { "epoch": 1.1955883855710137, "grad_norm": 0.5105483631182193, "learning_rate": 3.341443633414437e-05, "loss": 0.4512, "step": 40950 }, { "epoch": 1.1957343688413307, "grad_norm": 0.5149436799355499, "learning_rate": 3.3411732900783996e-05, "loss": 0.4463, "step": 40955 }, { "epoch": 1.1958803521116481, "grad_norm": 0.4676138038813065, "learning_rate": 3.340902946742363e-05, "loss": 0.4205, "step": 40960 }, { "epoch": 1.1960263353819651, "grad_norm": 0.47991094636178555, "learning_rate": 3.3406326034063264e-05, "loss": 0.4557, "step": 40965 }, { "epoch": 1.1961723186522824, "grad_norm": 0.510544137375689, "learning_rate": 3.340362260070289e-05, "loss": 0.4764, "step": 40970 }, { "epoch": 1.1963183019225996, "grad_norm": 0.457213893292473, "learning_rate": 3.3400919167342525e-05, "loss": 0.4413, "step": 40975 }, { "epoch": 1.1964642851929168, "grad_norm": 0.4770722410818207, "learning_rate": 3.339821573398216e-05, "loss": 0.4386, "step": 40980 }, { "epoch": 1.196610268463234, "grad_norm": 0.489208563147002, "learning_rate": 3.3395512300621787e-05, "loss": 0.4589, "step": 40985 }, { "epoch": 1.1967562517335513, "grad_norm": 0.46384591589217833, "learning_rate": 3.339280886726142e-05, "loss": 0.4338, "step": 40990 }, { "epoch": 1.1969022350038685, "grad_norm": 0.5265225283686029, "learning_rate": 3.3390105433901055e-05, "loss": 0.4581, "step": 40995 }, { "epoch": 1.1970482182741857, "grad_norm": 0.48927049920746224, "learning_rate": 3.338740200054069e-05, "loss": 0.4613, "step": 41000 }, { "epoch": 1.197194201544503, "grad_norm": 0.4855197971862878, "learning_rate": 3.338469856718032e-05, "loss": 0.4584, "step": 41005 }, { "epoch": 1.1973401848148202, "grad_norm": 0.45179209883119037, "learning_rate": 3.3381995133819957e-05, "loss": 0.4051, "step": 41010 }, { "epoch": 1.1974861680851374, "grad_norm": 0.4946332713927851, "learning_rate": 3.3379291700459584e-05, "loss": 0.457, "step": 41015 }, { "epoch": 1.1976321513554546, "grad_norm": 0.48999987035717935, "learning_rate": 3.337658826709922e-05, "loss": 0.4263, "step": 41020 }, { "epoch": 1.1977781346257719, "grad_norm": 0.497761131123293, "learning_rate": 3.337388483373885e-05, "loss": 0.4339, "step": 41025 }, { "epoch": 1.197924117896089, "grad_norm": 0.4850239118492498, "learning_rate": 3.337118140037848e-05, "loss": 0.4362, "step": 41030 }, { "epoch": 1.1980701011664063, "grad_norm": 0.4992141716788941, "learning_rate": 3.336847796701811e-05, "loss": 0.4587, "step": 41035 }, { "epoch": 1.1982160844367236, "grad_norm": 0.4823660113955174, "learning_rate": 3.336577453365775e-05, "loss": 0.447, "step": 41040 }, { "epoch": 1.1983620677070408, "grad_norm": 0.48555359882629445, "learning_rate": 3.3363071100297374e-05, "loss": 0.4723, "step": 41045 }, { "epoch": 1.198508050977358, "grad_norm": 0.4911262626904479, "learning_rate": 3.336036766693701e-05, "loss": 0.449, "step": 41050 }, { "epoch": 1.1986540342476752, "grad_norm": 0.5079638891029293, "learning_rate": 3.335766423357665e-05, "loss": 0.4609, "step": 41055 }, { "epoch": 1.1988000175179925, "grad_norm": 0.44898501713916383, "learning_rate": 3.3354960800216276e-05, "loss": 0.4512, "step": 41060 }, { "epoch": 1.1989460007883097, "grad_norm": 0.5188431790662444, "learning_rate": 3.335225736685591e-05, "loss": 0.4314, "step": 41065 }, { "epoch": 1.199091984058627, "grad_norm": 0.43128861717962486, "learning_rate": 3.3349553933495544e-05, "loss": 0.4398, "step": 41070 }, { "epoch": 1.1992379673289442, "grad_norm": 0.49887962393700985, "learning_rate": 3.334685050013517e-05, "loss": 0.4194, "step": 41075 }, { "epoch": 1.1993839505992614, "grad_norm": 0.5268649877219064, "learning_rate": 3.3344147066774806e-05, "loss": 0.4795, "step": 41080 }, { "epoch": 1.1995299338695786, "grad_norm": 0.5330762262208741, "learning_rate": 3.334144363341444e-05, "loss": 0.4366, "step": 41085 }, { "epoch": 1.1996759171398959, "grad_norm": 0.49672336785005033, "learning_rate": 3.333874020005407e-05, "loss": 0.455, "step": 41090 }, { "epoch": 1.199821900410213, "grad_norm": 0.4942565187112725, "learning_rate": 3.33360367666937e-05, "loss": 0.4666, "step": 41095 }, { "epoch": 1.19996788368053, "grad_norm": 0.45762439799014915, "learning_rate": 3.3333333333333335e-05, "loss": 0.4502, "step": 41100 }, { "epoch": 1.2001138669508475, "grad_norm": 0.5145663381780627, "learning_rate": 3.333062989997296e-05, "loss": 0.4543, "step": 41105 }, { "epoch": 1.2002598502211645, "grad_norm": 0.46412412430320804, "learning_rate": 3.33279264666126e-05, "loss": 0.4375, "step": 41110 }, { "epoch": 1.200405833491482, "grad_norm": 0.4994419786218878, "learning_rate": 3.332522303325224e-05, "loss": 0.4297, "step": 41115 }, { "epoch": 1.200551816761799, "grad_norm": 0.5065569532466984, "learning_rate": 3.3322519599891864e-05, "loss": 0.4643, "step": 41120 }, { "epoch": 1.2006978000321162, "grad_norm": 0.5280072963956028, "learning_rate": 3.33198161665315e-05, "loss": 0.447, "step": 41125 }, { "epoch": 1.2008437833024335, "grad_norm": 0.4995407442474923, "learning_rate": 3.331711273317113e-05, "loss": 0.4504, "step": 41130 }, { "epoch": 1.2009897665727507, "grad_norm": 0.5411463314597674, "learning_rate": 3.331440929981076e-05, "loss": 0.4489, "step": 41135 }, { "epoch": 1.201135749843068, "grad_norm": 0.4869289262207197, "learning_rate": 3.3311705866450393e-05, "loss": 0.4567, "step": 41140 }, { "epoch": 1.2012817331133852, "grad_norm": 0.5140610455375537, "learning_rate": 3.330900243309003e-05, "loss": 0.4476, "step": 41145 }, { "epoch": 1.2014277163837024, "grad_norm": 0.4909058098273205, "learning_rate": 3.3306298999729655e-05, "loss": 0.4299, "step": 41150 }, { "epoch": 1.2015736996540196, "grad_norm": 0.49221479524584827, "learning_rate": 3.330359556636929e-05, "loss": 0.46, "step": 41155 }, { "epoch": 1.2017196829243368, "grad_norm": 0.4643122011083057, "learning_rate": 3.330089213300892e-05, "loss": 0.4389, "step": 41160 }, { "epoch": 1.201865666194654, "grad_norm": 0.4932851089938185, "learning_rate": 3.329818869964855e-05, "loss": 0.4171, "step": 41165 }, { "epoch": 1.2020116494649713, "grad_norm": 0.5123055446413096, "learning_rate": 3.329548526628819e-05, "loss": 0.4455, "step": 41170 }, { "epoch": 1.2021576327352885, "grad_norm": 0.49301737588268324, "learning_rate": 3.3292781832927825e-05, "loss": 0.4243, "step": 41175 }, { "epoch": 1.2023036160056058, "grad_norm": 0.4802922484173736, "learning_rate": 3.329007839956745e-05, "loss": 0.4583, "step": 41180 }, { "epoch": 1.202449599275923, "grad_norm": 0.5093734404991447, "learning_rate": 3.3287374966207086e-05, "loss": 0.4156, "step": 41185 }, { "epoch": 1.2025955825462402, "grad_norm": 0.5059217039994651, "learning_rate": 3.328467153284672e-05, "loss": 0.4554, "step": 41190 }, { "epoch": 1.2027415658165574, "grad_norm": 0.46674160560678785, "learning_rate": 3.328196809948635e-05, "loss": 0.4451, "step": 41195 }, { "epoch": 1.2028875490868747, "grad_norm": 0.48953946169399803, "learning_rate": 3.327926466612598e-05, "loss": 0.4228, "step": 41200 }, { "epoch": 1.203033532357192, "grad_norm": 0.47552532990188, "learning_rate": 3.3276561232765615e-05, "loss": 0.4466, "step": 41205 }, { "epoch": 1.2031795156275091, "grad_norm": 0.5191314270032634, "learning_rate": 3.327385779940524e-05, "loss": 0.4588, "step": 41210 }, { "epoch": 1.2033254988978264, "grad_norm": 0.5184840583159687, "learning_rate": 3.3271154366044877e-05, "loss": 0.4533, "step": 41215 }, { "epoch": 1.2034714821681436, "grad_norm": 0.4863624604660927, "learning_rate": 3.326845093268451e-05, "loss": 0.4776, "step": 41220 }, { "epoch": 1.2036174654384608, "grad_norm": 0.5187474020348002, "learning_rate": 3.3265747499324145e-05, "loss": 0.4701, "step": 41225 }, { "epoch": 1.203763448708778, "grad_norm": 0.491741504393596, "learning_rate": 3.326304406596378e-05, "loss": 0.4443, "step": 41230 }, { "epoch": 1.2039094319790953, "grad_norm": 0.47819643688399505, "learning_rate": 3.326034063260341e-05, "loss": 0.4695, "step": 41235 }, { "epoch": 1.2040554152494125, "grad_norm": 0.49669560886390096, "learning_rate": 3.325763719924304e-05, "loss": 0.4771, "step": 41240 }, { "epoch": 1.2042013985197295, "grad_norm": 0.47661517295071915, "learning_rate": 3.3254933765882674e-05, "loss": 0.4413, "step": 41245 }, { "epoch": 1.204347381790047, "grad_norm": 0.4567336142229945, "learning_rate": 3.325223033252231e-05, "loss": 0.4237, "step": 41250 }, { "epoch": 1.204493365060364, "grad_norm": 0.47263793739942156, "learning_rate": 3.3249526899161935e-05, "loss": 0.4152, "step": 41255 }, { "epoch": 1.2046393483306814, "grad_norm": 0.49728911163561895, "learning_rate": 3.324682346580157e-05, "loss": 0.433, "step": 41260 }, { "epoch": 1.2047853316009984, "grad_norm": 0.4637141901276336, "learning_rate": 3.32441200324412e-05, "loss": 0.4124, "step": 41265 }, { "epoch": 1.2049313148713157, "grad_norm": 0.44780793035536115, "learning_rate": 3.324141659908083e-05, "loss": 0.429, "step": 41270 }, { "epoch": 1.205077298141633, "grad_norm": 0.4941164749459985, "learning_rate": 3.3238713165720464e-05, "loss": 0.4223, "step": 41275 }, { "epoch": 1.2052232814119501, "grad_norm": 0.4806839313614333, "learning_rate": 3.32360097323601e-05, "loss": 0.4473, "step": 41280 }, { "epoch": 1.2053692646822673, "grad_norm": 0.4844732022164088, "learning_rate": 3.323330629899973e-05, "loss": 0.4501, "step": 41285 }, { "epoch": 1.2055152479525846, "grad_norm": 0.4656240342061287, "learning_rate": 3.3230602865639366e-05, "loss": 0.4386, "step": 41290 }, { "epoch": 1.2056612312229018, "grad_norm": 0.5017146866401182, "learning_rate": 3.3227899432279e-05, "loss": 0.42, "step": 41295 }, { "epoch": 1.205807214493219, "grad_norm": 0.49596596588137526, "learning_rate": 3.322519599891863e-05, "loss": 0.4361, "step": 41300 }, { "epoch": 1.2059531977635363, "grad_norm": 0.5266489158432945, "learning_rate": 3.322249256555826e-05, "loss": 0.4478, "step": 41305 }, { "epoch": 1.2060991810338535, "grad_norm": 0.4820229142672731, "learning_rate": 3.3219789132197896e-05, "loss": 0.4387, "step": 41310 }, { "epoch": 1.2062451643041707, "grad_norm": 0.496670531850576, "learning_rate": 3.321708569883752e-05, "loss": 0.452, "step": 41315 }, { "epoch": 1.206391147574488, "grad_norm": 0.48116265205666314, "learning_rate": 3.321438226547716e-05, "loss": 0.463, "step": 41320 }, { "epoch": 1.2065371308448052, "grad_norm": 1.2238135970283612, "learning_rate": 3.321167883211679e-05, "loss": 0.4607, "step": 41325 }, { "epoch": 1.2066831141151224, "grad_norm": 0.5071100057870803, "learning_rate": 3.320897539875642e-05, "loss": 0.4553, "step": 41330 }, { "epoch": 1.2068290973854396, "grad_norm": 0.5061067742989747, "learning_rate": 3.320627196539605e-05, "loss": 0.4398, "step": 41335 }, { "epoch": 1.2069750806557569, "grad_norm": 0.4857088217148799, "learning_rate": 3.3203568532035686e-05, "loss": 0.4413, "step": 41340 }, { "epoch": 1.207121063926074, "grad_norm": 0.5287490593513374, "learning_rate": 3.320086509867532e-05, "loss": 0.4397, "step": 41345 }, { "epoch": 1.2072670471963913, "grad_norm": 0.556884346575485, "learning_rate": 3.3198161665314954e-05, "loss": 0.4325, "step": 41350 }, { "epoch": 1.2074130304667086, "grad_norm": 0.4962349738038809, "learning_rate": 3.319545823195459e-05, "loss": 0.4138, "step": 41355 }, { "epoch": 1.2075590137370258, "grad_norm": 0.4987068909881322, "learning_rate": 3.3192754798594215e-05, "loss": 0.452, "step": 41360 }, { "epoch": 1.207704997007343, "grad_norm": 0.527851434136859, "learning_rate": 3.319005136523385e-05, "loss": 0.4402, "step": 41365 }, { "epoch": 1.2078509802776602, "grad_norm": 0.5130306051232947, "learning_rate": 3.3187347931873483e-05, "loss": 0.463, "step": 41370 }, { "epoch": 1.2079969635479775, "grad_norm": 0.45530382642113737, "learning_rate": 3.318464449851311e-05, "loss": 0.4329, "step": 41375 }, { "epoch": 1.2081429468182947, "grad_norm": 0.5132511433131253, "learning_rate": 3.3181941065152745e-05, "loss": 0.439, "step": 41380 }, { "epoch": 1.208288930088612, "grad_norm": 0.4748474606695394, "learning_rate": 3.317923763179238e-05, "loss": 0.4767, "step": 41385 }, { "epoch": 1.2084349133589292, "grad_norm": 0.5654611604129367, "learning_rate": 3.3176534198432006e-05, "loss": 0.4661, "step": 41390 }, { "epoch": 1.2085808966292464, "grad_norm": 0.5214757817747763, "learning_rate": 3.317383076507165e-05, "loss": 0.4721, "step": 41395 }, { "epoch": 1.2087268798995634, "grad_norm": 0.5280025918202202, "learning_rate": 3.3171127331711274e-05, "loss": 0.4745, "step": 41400 }, { "epoch": 1.2088728631698809, "grad_norm": 0.46473956961088786, "learning_rate": 3.316842389835091e-05, "loss": 0.449, "step": 41405 }, { "epoch": 1.2090188464401979, "grad_norm": 0.5205198149790258, "learning_rate": 3.316572046499054e-05, "loss": 0.4547, "step": 41410 }, { "epoch": 1.209164829710515, "grad_norm": 0.5293155452680746, "learning_rate": 3.316301703163017e-05, "loss": 0.4488, "step": 41415 }, { "epoch": 1.2093108129808323, "grad_norm": 0.4522971171855706, "learning_rate": 3.31603135982698e-05, "loss": 0.4519, "step": 41420 }, { "epoch": 1.2094567962511495, "grad_norm": 0.4886704440866671, "learning_rate": 3.315761016490944e-05, "loss": 0.4807, "step": 41425 }, { "epoch": 1.2096027795214668, "grad_norm": 0.47987623658146183, "learning_rate": 3.315490673154907e-05, "loss": 0.4719, "step": 41430 }, { "epoch": 1.209748762791784, "grad_norm": 0.536041311380934, "learning_rate": 3.31522032981887e-05, "loss": 0.4687, "step": 41435 }, { "epoch": 1.2098947460621012, "grad_norm": 0.5184273788224951, "learning_rate": 3.314949986482833e-05, "loss": 0.4684, "step": 41440 }, { "epoch": 1.2100407293324185, "grad_norm": 0.5077549139570728, "learning_rate": 3.3146796431467967e-05, "loss": 0.4501, "step": 41445 }, { "epoch": 1.2101867126027357, "grad_norm": 0.5169424064311942, "learning_rate": 3.31440929981076e-05, "loss": 0.4618, "step": 41450 }, { "epoch": 1.210332695873053, "grad_norm": 0.46607410887383577, "learning_rate": 3.3141389564747235e-05, "loss": 0.4435, "step": 41455 }, { "epoch": 1.2104786791433702, "grad_norm": 0.47558975472091286, "learning_rate": 3.313868613138686e-05, "loss": 0.4271, "step": 41460 }, { "epoch": 1.2106246624136874, "grad_norm": 0.5057783728055907, "learning_rate": 3.3135982698026496e-05, "loss": 0.4384, "step": 41465 }, { "epoch": 1.2107706456840046, "grad_norm": 0.46935188358334523, "learning_rate": 3.313327926466613e-05, "loss": 0.4367, "step": 41470 }, { "epoch": 1.2109166289543218, "grad_norm": 0.49823359651836074, "learning_rate": 3.313057583130576e-05, "loss": 0.4594, "step": 41475 }, { "epoch": 1.211062612224639, "grad_norm": 0.48881872823562456, "learning_rate": 3.312787239794539e-05, "loss": 0.4154, "step": 41480 }, { "epoch": 1.2112085954949563, "grad_norm": 0.45261018288341276, "learning_rate": 3.3125168964585025e-05, "loss": 0.44, "step": 41485 }, { "epoch": 1.2113545787652735, "grad_norm": 0.49395650513446165, "learning_rate": 3.312246553122465e-05, "loss": 0.453, "step": 41490 }, { "epoch": 1.2115005620355908, "grad_norm": 0.501328368177862, "learning_rate": 3.3119762097864286e-05, "loss": 0.4413, "step": 41495 }, { "epoch": 1.211646545305908, "grad_norm": 0.48639004999180924, "learning_rate": 3.311705866450392e-05, "loss": 0.4354, "step": 41500 }, { "epoch": 1.2117925285762252, "grad_norm": 0.5122183447637992, "learning_rate": 3.3114355231143554e-05, "loss": 0.4734, "step": 41505 }, { "epoch": 1.2119385118465424, "grad_norm": 0.4852993569305618, "learning_rate": 3.311165179778319e-05, "loss": 0.4505, "step": 41510 }, { "epoch": 1.2120844951168597, "grad_norm": 0.4737765164683148, "learning_rate": 3.310894836442282e-05, "loss": 0.4692, "step": 41515 }, { "epoch": 1.212230478387177, "grad_norm": 0.4671592658867465, "learning_rate": 3.310624493106245e-05, "loss": 0.4155, "step": 41520 }, { "epoch": 1.2123764616574941, "grad_norm": 0.5012376478717092, "learning_rate": 3.3103541497702084e-05, "loss": 0.4837, "step": 41525 }, { "epoch": 1.2125224449278114, "grad_norm": 0.5375000344287522, "learning_rate": 3.310083806434172e-05, "loss": 0.4839, "step": 41530 }, { "epoch": 1.2126684281981286, "grad_norm": 0.6113765069425603, "learning_rate": 3.3098134630981345e-05, "loss": 0.4392, "step": 41535 }, { "epoch": 1.2128144114684458, "grad_norm": 0.5111081975746545, "learning_rate": 3.309543119762098e-05, "loss": 0.4795, "step": 41540 }, { "epoch": 1.2129603947387628, "grad_norm": 0.4926019511966472, "learning_rate": 3.309272776426061e-05, "loss": 0.4629, "step": 41545 }, { "epoch": 1.2131063780090803, "grad_norm": 0.49586372729706424, "learning_rate": 3.309002433090024e-05, "loss": 0.4595, "step": 41550 }, { "epoch": 1.2132523612793973, "grad_norm": 0.4807893903258242, "learning_rate": 3.3087320897539874e-05, "loss": 0.4528, "step": 41555 }, { "epoch": 1.2133983445497145, "grad_norm": 0.4658822321378485, "learning_rate": 3.308461746417951e-05, "loss": 0.4286, "step": 41560 }, { "epoch": 1.2135443278200317, "grad_norm": 0.5190704730434661, "learning_rate": 3.308191403081914e-05, "loss": 0.4588, "step": 41565 }, { "epoch": 1.213690311090349, "grad_norm": 0.49009869657890953, "learning_rate": 3.3079210597458776e-05, "loss": 0.4373, "step": 41570 }, { "epoch": 1.2138362943606662, "grad_norm": 0.5181061928802145, "learning_rate": 3.307650716409841e-05, "loss": 0.441, "step": 41575 }, { "epoch": 1.2139822776309834, "grad_norm": 0.4621408529541985, "learning_rate": 3.307380373073804e-05, "loss": 0.4203, "step": 41580 }, { "epoch": 1.2141282609013007, "grad_norm": 0.49140843961786745, "learning_rate": 3.307110029737767e-05, "loss": 0.4483, "step": 41585 }, { "epoch": 1.214274244171618, "grad_norm": 0.5485916983382862, "learning_rate": 3.3068396864017305e-05, "loss": 0.4259, "step": 41590 }, { "epoch": 1.2144202274419351, "grad_norm": 0.460201743166983, "learning_rate": 3.306569343065693e-05, "loss": 0.4151, "step": 41595 }, { "epoch": 1.2145662107122523, "grad_norm": 0.49477988484662616, "learning_rate": 3.306298999729657e-05, "loss": 0.4225, "step": 41600 }, { "epoch": 1.2147121939825696, "grad_norm": 0.46866604030197745, "learning_rate": 3.30602865639362e-05, "loss": 0.4419, "step": 41605 }, { "epoch": 1.2148581772528868, "grad_norm": 0.508019142219108, "learning_rate": 3.305758313057583e-05, "loss": 0.4621, "step": 41610 }, { "epoch": 1.215004160523204, "grad_norm": 0.4777069239197384, "learning_rate": 3.305487969721546e-05, "loss": 0.4493, "step": 41615 }, { "epoch": 1.2151501437935213, "grad_norm": 0.45948841342624464, "learning_rate": 3.30521762638551e-05, "loss": 0.4378, "step": 41620 }, { "epoch": 1.2152961270638385, "grad_norm": 0.4842213869072109, "learning_rate": 3.304947283049473e-05, "loss": 0.4883, "step": 41625 }, { "epoch": 1.2154421103341557, "grad_norm": 0.5080927621911472, "learning_rate": 3.3046769397134364e-05, "loss": 0.4723, "step": 41630 }, { "epoch": 1.215588093604473, "grad_norm": 0.4621660818969047, "learning_rate": 3.3044065963774e-05, "loss": 0.4316, "step": 41635 }, { "epoch": 1.2157340768747902, "grad_norm": 0.4654517424704835, "learning_rate": 3.3041362530413625e-05, "loss": 0.441, "step": 41640 }, { "epoch": 1.2158800601451074, "grad_norm": 0.458600864306645, "learning_rate": 3.303865909705326e-05, "loss": 0.463, "step": 41645 }, { "epoch": 1.2160260434154246, "grad_norm": 0.5071201137507959, "learning_rate": 3.303595566369289e-05, "loss": 0.4658, "step": 41650 }, { "epoch": 1.2161720266857419, "grad_norm": 0.47127412510594857, "learning_rate": 3.303325223033252e-05, "loss": 0.4485, "step": 41655 }, { "epoch": 1.216318009956059, "grad_norm": 0.47785558421059837, "learning_rate": 3.3030548796972154e-05, "loss": 0.4436, "step": 41660 }, { "epoch": 1.2164639932263763, "grad_norm": 0.5087307813991369, "learning_rate": 3.302784536361179e-05, "loss": 0.4755, "step": 41665 }, { "epoch": 1.2166099764966936, "grad_norm": 0.48149455234541716, "learning_rate": 3.3025141930251416e-05, "loss": 0.4629, "step": 41670 }, { "epoch": 1.2167559597670108, "grad_norm": 0.482995287517612, "learning_rate": 3.302243849689105e-05, "loss": 0.4312, "step": 41675 }, { "epoch": 1.216901943037328, "grad_norm": 0.5163226435255245, "learning_rate": 3.301973506353069e-05, "loss": 0.4588, "step": 41680 }, { "epoch": 1.2170479263076452, "grad_norm": 0.49525807099867936, "learning_rate": 3.301703163017032e-05, "loss": 0.4438, "step": 41685 }, { "epoch": 1.2171939095779623, "grad_norm": 0.4978430770337896, "learning_rate": 3.301432819680995e-05, "loss": 0.459, "step": 41690 }, { "epoch": 1.2173398928482797, "grad_norm": 0.4638667599853233, "learning_rate": 3.3011624763449586e-05, "loss": 0.4287, "step": 41695 }, { "epoch": 1.2174858761185967, "grad_norm": 0.5000594811072792, "learning_rate": 3.300892133008921e-05, "loss": 0.4656, "step": 41700 }, { "epoch": 1.217631859388914, "grad_norm": 0.5019480717933104, "learning_rate": 3.300621789672885e-05, "loss": 0.4279, "step": 41705 }, { "epoch": 1.2177778426592312, "grad_norm": 0.4798510491968485, "learning_rate": 3.300351446336848e-05, "loss": 0.4553, "step": 41710 }, { "epoch": 1.2179238259295484, "grad_norm": 0.5073421393798916, "learning_rate": 3.300081103000811e-05, "loss": 0.4311, "step": 41715 }, { "epoch": 1.2180698091998656, "grad_norm": 0.4786325753417793, "learning_rate": 3.299810759664774e-05, "loss": 0.4598, "step": 41720 }, { "epoch": 1.2182157924701829, "grad_norm": 0.4848873096697743, "learning_rate": 3.2995404163287376e-05, "loss": 0.464, "step": 41725 }, { "epoch": 1.2183617757405, "grad_norm": 0.48530794754406315, "learning_rate": 3.2992700729927004e-05, "loss": 0.4573, "step": 41730 }, { "epoch": 1.2185077590108173, "grad_norm": 0.4290935324467871, "learning_rate": 3.2989997296566644e-05, "loss": 0.414, "step": 41735 }, { "epoch": 1.2186537422811345, "grad_norm": 0.4647030729362264, "learning_rate": 3.298729386320628e-05, "loss": 0.4493, "step": 41740 }, { "epoch": 1.2187997255514518, "grad_norm": 0.4800297816551023, "learning_rate": 3.2984590429845906e-05, "loss": 0.4678, "step": 41745 }, { "epoch": 1.218945708821769, "grad_norm": 0.5140755233678815, "learning_rate": 3.298188699648554e-05, "loss": 0.4617, "step": 41750 }, { "epoch": 1.2190916920920862, "grad_norm": 0.4400307753121593, "learning_rate": 3.2979183563125174e-05, "loss": 0.4404, "step": 41755 }, { "epoch": 1.2192376753624035, "grad_norm": 0.44151050890909127, "learning_rate": 3.29764801297648e-05, "loss": 0.4369, "step": 41760 }, { "epoch": 1.2193836586327207, "grad_norm": 0.4955930035056075, "learning_rate": 3.2973776696404435e-05, "loss": 0.4583, "step": 41765 }, { "epoch": 1.219529641903038, "grad_norm": 0.44299733876741126, "learning_rate": 3.297107326304407e-05, "loss": 0.4426, "step": 41770 }, { "epoch": 1.2196756251733551, "grad_norm": 0.5315545809187696, "learning_rate": 3.2968369829683696e-05, "loss": 0.4694, "step": 41775 }, { "epoch": 1.2198216084436724, "grad_norm": 0.48635496910541604, "learning_rate": 3.296566639632333e-05, "loss": 0.431, "step": 41780 }, { "epoch": 1.2199675917139896, "grad_norm": 0.47088462941099896, "learning_rate": 3.2962962962962964e-05, "loss": 0.4199, "step": 41785 }, { "epoch": 1.2201135749843068, "grad_norm": 0.5122354091430972, "learning_rate": 3.29602595296026e-05, "loss": 0.4573, "step": 41790 }, { "epoch": 1.220259558254624, "grad_norm": 0.4946195481067343, "learning_rate": 3.295755609624223e-05, "loss": 0.4549, "step": 41795 }, { "epoch": 1.2204055415249413, "grad_norm": 0.4641736674752909, "learning_rate": 3.2954852662881866e-05, "loss": 0.4389, "step": 41800 }, { "epoch": 1.2205515247952585, "grad_norm": 0.4777338223316846, "learning_rate": 3.295214922952149e-05, "loss": 0.4387, "step": 41805 }, { "epoch": 1.2206975080655758, "grad_norm": 0.49280369708473853, "learning_rate": 3.294944579616113e-05, "loss": 0.4657, "step": 41810 }, { "epoch": 1.220843491335893, "grad_norm": 0.5137672864468726, "learning_rate": 3.294674236280076e-05, "loss": 0.4466, "step": 41815 }, { "epoch": 1.2209894746062102, "grad_norm": 0.5084486845992475, "learning_rate": 3.294403892944039e-05, "loss": 0.4919, "step": 41820 }, { "epoch": 1.2211354578765274, "grad_norm": 0.47164945545719933, "learning_rate": 3.294133549608002e-05, "loss": 0.4821, "step": 41825 }, { "epoch": 1.2212814411468447, "grad_norm": 0.4841449554049637, "learning_rate": 3.293863206271966e-05, "loss": 0.4731, "step": 41830 }, { "epoch": 1.2214274244171617, "grad_norm": 0.4632588751514265, "learning_rate": 3.2935928629359284e-05, "loss": 0.4385, "step": 41835 }, { "epoch": 1.2215734076874791, "grad_norm": 0.5034047404469588, "learning_rate": 3.293322519599892e-05, "loss": 0.4391, "step": 41840 }, { "epoch": 1.2217193909577961, "grad_norm": 0.5054609507450404, "learning_rate": 3.293052176263855e-05, "loss": 0.4744, "step": 41845 }, { "epoch": 1.2218653742281134, "grad_norm": 0.48129180357080403, "learning_rate": 3.2927818329278186e-05, "loss": 0.4449, "step": 41850 }, { "epoch": 1.2220113574984306, "grad_norm": 0.4759559181103196, "learning_rate": 3.292511489591782e-05, "loss": 0.4179, "step": 41855 }, { "epoch": 1.2221573407687478, "grad_norm": 0.5206108796390478, "learning_rate": 3.2922411462557454e-05, "loss": 0.4388, "step": 41860 }, { "epoch": 1.222303324039065, "grad_norm": 0.49059594649132887, "learning_rate": 3.291970802919708e-05, "loss": 0.4339, "step": 41865 }, { "epoch": 1.2224493073093823, "grad_norm": 0.4683613173640377, "learning_rate": 3.2917004595836715e-05, "loss": 0.415, "step": 41870 }, { "epoch": 1.2225952905796995, "grad_norm": 0.4772348797009657, "learning_rate": 3.291430116247635e-05, "loss": 0.4553, "step": 41875 }, { "epoch": 1.2227412738500167, "grad_norm": 0.5327117145146979, "learning_rate": 3.2911597729115976e-05, "loss": 0.4531, "step": 41880 }, { "epoch": 1.222887257120334, "grad_norm": 0.4474755884485056, "learning_rate": 3.290889429575561e-05, "loss": 0.45, "step": 41885 }, { "epoch": 1.2230332403906512, "grad_norm": 0.4814053806463529, "learning_rate": 3.2906190862395244e-05, "loss": 0.4325, "step": 41890 }, { "epoch": 1.2231792236609684, "grad_norm": 0.500774108522665, "learning_rate": 3.290348742903487e-05, "loss": 0.4449, "step": 41895 }, { "epoch": 1.2233252069312857, "grad_norm": 0.4776478802278825, "learning_rate": 3.2900783995674506e-05, "loss": 0.4358, "step": 41900 }, { "epoch": 1.2234711902016029, "grad_norm": 0.4869089866182072, "learning_rate": 3.289808056231414e-05, "loss": 0.4438, "step": 41905 }, { "epoch": 1.2236171734719201, "grad_norm": 0.47236439939314273, "learning_rate": 3.2895377128953774e-05, "loss": 0.4207, "step": 41910 }, { "epoch": 1.2237631567422373, "grad_norm": 0.511113060557435, "learning_rate": 3.289267369559341e-05, "loss": 0.4569, "step": 41915 }, { "epoch": 1.2239091400125546, "grad_norm": 0.4876697665718616, "learning_rate": 3.288997026223304e-05, "loss": 0.4581, "step": 41920 }, { "epoch": 1.2240551232828718, "grad_norm": 0.4683520320358467, "learning_rate": 3.288726682887267e-05, "loss": 0.4324, "step": 41925 }, { "epoch": 1.224201106553189, "grad_norm": 0.449230253254273, "learning_rate": 3.28845633955123e-05, "loss": 0.4244, "step": 41930 }, { "epoch": 1.2243470898235063, "grad_norm": 0.5001820348551578, "learning_rate": 3.288185996215194e-05, "loss": 0.435, "step": 41935 }, { "epoch": 1.2244930730938235, "grad_norm": 0.5057919853810097, "learning_rate": 3.2879156528791564e-05, "loss": 0.473, "step": 41940 }, { "epoch": 1.2246390563641407, "grad_norm": 0.4748164433275308, "learning_rate": 3.28764530954312e-05, "loss": 0.4537, "step": 41945 }, { "epoch": 1.224785039634458, "grad_norm": 0.47804933862815685, "learning_rate": 3.287374966207083e-05, "loss": 0.4496, "step": 41950 }, { "epoch": 1.2249310229047752, "grad_norm": 0.5074408191987634, "learning_rate": 3.287104622871046e-05, "loss": 0.4297, "step": 41955 }, { "epoch": 1.2250770061750924, "grad_norm": 0.47335066909335594, "learning_rate": 3.28683427953501e-05, "loss": 0.4713, "step": 41960 }, { "epoch": 1.2252229894454096, "grad_norm": 0.4849263026736005, "learning_rate": 3.286563936198973e-05, "loss": 0.4397, "step": 41965 }, { "epoch": 1.2253689727157269, "grad_norm": 0.5427903454144518, "learning_rate": 3.286293592862936e-05, "loss": 0.4553, "step": 41970 }, { "epoch": 1.225514955986044, "grad_norm": 0.5282243042034176, "learning_rate": 3.2860232495268996e-05, "loss": 0.4637, "step": 41975 }, { "epoch": 1.225660939256361, "grad_norm": 0.4493892171330449, "learning_rate": 3.285752906190863e-05, "loss": 0.46, "step": 41980 }, { "epoch": 1.2258069225266786, "grad_norm": 0.44767529889285307, "learning_rate": 3.285482562854826e-05, "loss": 0.4601, "step": 41985 }, { "epoch": 1.2259529057969956, "grad_norm": 0.5275539276446186, "learning_rate": 3.285212219518789e-05, "loss": 0.4564, "step": 41990 }, { "epoch": 1.2260988890673128, "grad_norm": 0.45994772090866154, "learning_rate": 3.2849418761827525e-05, "loss": 0.4493, "step": 41995 }, { "epoch": 1.22624487233763, "grad_norm": 0.46608048105336014, "learning_rate": 3.284671532846715e-05, "loss": 0.4444, "step": 42000 }, { "epoch": 1.2263908556079473, "grad_norm": 0.47773966173827737, "learning_rate": 3.2844011895106786e-05, "loss": 0.4371, "step": 42005 }, { "epoch": 1.2265368388782645, "grad_norm": 0.4824783353949465, "learning_rate": 3.284130846174642e-05, "loss": 0.4232, "step": 42010 }, { "epoch": 1.2266828221485817, "grad_norm": 0.5129457867490069, "learning_rate": 3.283860502838605e-05, "loss": 0.4272, "step": 42015 }, { "epoch": 1.226828805418899, "grad_norm": 0.48487001259917095, "learning_rate": 3.283590159502569e-05, "loss": 0.433, "step": 42020 }, { "epoch": 1.2269747886892162, "grad_norm": 0.4710969797304497, "learning_rate": 3.2833198161665315e-05, "loss": 0.4241, "step": 42025 }, { "epoch": 1.2271207719595334, "grad_norm": 0.48360039940670346, "learning_rate": 3.283049472830495e-05, "loss": 0.436, "step": 42030 }, { "epoch": 1.2272667552298506, "grad_norm": 0.44998735539905677, "learning_rate": 3.282779129494458e-05, "loss": 0.4274, "step": 42035 }, { "epoch": 1.2274127385001679, "grad_norm": 0.5121026838553109, "learning_rate": 3.282508786158421e-05, "loss": 0.4702, "step": 42040 }, { "epoch": 1.227558721770485, "grad_norm": 0.4945140807021223, "learning_rate": 3.2822384428223845e-05, "loss": 0.4429, "step": 42045 }, { "epoch": 1.2277047050408023, "grad_norm": 0.5003563632001169, "learning_rate": 3.281968099486348e-05, "loss": 0.4515, "step": 42050 }, { "epoch": 1.2278506883111195, "grad_norm": 0.45435889768610804, "learning_rate": 3.281697756150311e-05, "loss": 0.4355, "step": 42055 }, { "epoch": 1.2279966715814368, "grad_norm": 0.4964013567455759, "learning_rate": 3.281427412814274e-05, "loss": 0.4591, "step": 42060 }, { "epoch": 1.228142654851754, "grad_norm": 0.531558596072032, "learning_rate": 3.2811570694782374e-05, "loss": 0.4657, "step": 42065 }, { "epoch": 1.2282886381220712, "grad_norm": 0.4872144040641121, "learning_rate": 3.280886726142201e-05, "loss": 0.4387, "step": 42070 }, { "epoch": 1.2284346213923885, "grad_norm": 0.4675329952083443, "learning_rate": 3.280616382806164e-05, "loss": 0.466, "step": 42075 }, { "epoch": 1.2285806046627057, "grad_norm": 0.5763328902441271, "learning_rate": 3.2803460394701276e-05, "loss": 0.4642, "step": 42080 }, { "epoch": 1.228726587933023, "grad_norm": 0.4969347211983337, "learning_rate": 3.28007569613409e-05, "loss": 0.4495, "step": 42085 }, { "epoch": 1.2288725712033401, "grad_norm": 0.4967040562508055, "learning_rate": 3.279805352798054e-05, "loss": 0.4664, "step": 42090 }, { "epoch": 1.2290185544736574, "grad_norm": 0.5175058198609498, "learning_rate": 3.279535009462017e-05, "loss": 0.4699, "step": 42095 }, { "epoch": 1.2291645377439746, "grad_norm": 0.5032839036886836, "learning_rate": 3.27926466612598e-05, "loss": 0.4447, "step": 42100 }, { "epoch": 1.2293105210142918, "grad_norm": 0.46583572127278683, "learning_rate": 3.278994322789943e-05, "loss": 0.4344, "step": 42105 }, { "epoch": 1.229456504284609, "grad_norm": 0.4669601583420529, "learning_rate": 3.2787239794539066e-05, "loss": 0.4482, "step": 42110 }, { "epoch": 1.2296024875549263, "grad_norm": 0.4980846422048899, "learning_rate": 3.27845363611787e-05, "loss": 0.4578, "step": 42115 }, { "epoch": 1.2297484708252435, "grad_norm": 0.4999846633099792, "learning_rate": 3.278183292781833e-05, "loss": 0.4507, "step": 42120 }, { "epoch": 1.2298944540955605, "grad_norm": 0.46551489454416833, "learning_rate": 3.277912949445796e-05, "loss": 0.456, "step": 42125 }, { "epoch": 1.230040437365878, "grad_norm": 0.4843394785333364, "learning_rate": 3.2776426061097596e-05, "loss": 0.4546, "step": 42130 }, { "epoch": 1.230186420636195, "grad_norm": 0.4803469131841075, "learning_rate": 3.277372262773723e-05, "loss": 0.4675, "step": 42135 }, { "epoch": 1.2303324039065122, "grad_norm": 0.5343232454003148, "learning_rate": 3.2771019194376864e-05, "loss": 0.4359, "step": 42140 }, { "epoch": 1.2304783871768294, "grad_norm": 0.4636139349502683, "learning_rate": 3.276831576101649e-05, "loss": 0.4393, "step": 42145 }, { "epoch": 1.2306243704471467, "grad_norm": 0.4371771962948214, "learning_rate": 3.2765612327656125e-05, "loss": 0.4463, "step": 42150 }, { "epoch": 1.230770353717464, "grad_norm": 0.4603680067413695, "learning_rate": 3.276290889429576e-05, "loss": 0.457, "step": 42155 }, { "epoch": 1.2309163369877811, "grad_norm": 0.4916809056458096, "learning_rate": 3.2760205460935386e-05, "loss": 0.4628, "step": 42160 }, { "epoch": 1.2310623202580984, "grad_norm": 0.4952385257750453, "learning_rate": 3.275750202757502e-05, "loss": 0.4405, "step": 42165 }, { "epoch": 1.2312083035284156, "grad_norm": 0.4779784644032412, "learning_rate": 3.2754798594214654e-05, "loss": 0.4384, "step": 42170 }, { "epoch": 1.2313542867987328, "grad_norm": 0.4888284151017021, "learning_rate": 3.275209516085428e-05, "loss": 0.4301, "step": 42175 }, { "epoch": 1.23150027006905, "grad_norm": 0.5189729708389698, "learning_rate": 3.2749391727493915e-05, "loss": 0.4189, "step": 42180 }, { "epoch": 1.2316462533393673, "grad_norm": 0.4649337647343974, "learning_rate": 3.274668829413355e-05, "loss": 0.4287, "step": 42185 }, { "epoch": 1.2317922366096845, "grad_norm": 0.4763578117062219, "learning_rate": 3.2743984860773183e-05, "loss": 0.4524, "step": 42190 }, { "epoch": 1.2319382198800017, "grad_norm": 0.505978244043173, "learning_rate": 3.274128142741282e-05, "loss": 0.4644, "step": 42195 }, { "epoch": 1.232084203150319, "grad_norm": 0.45185097726750173, "learning_rate": 3.273857799405245e-05, "loss": 0.4547, "step": 42200 }, { "epoch": 1.2322301864206362, "grad_norm": 0.48458013127204774, "learning_rate": 3.273587456069208e-05, "loss": 0.439, "step": 42205 }, { "epoch": 1.2323761696909534, "grad_norm": 0.5113255418831402, "learning_rate": 3.273317112733171e-05, "loss": 0.4234, "step": 42210 }, { "epoch": 1.2325221529612707, "grad_norm": 0.46901836935798563, "learning_rate": 3.273046769397135e-05, "loss": 0.458, "step": 42215 }, { "epoch": 1.2326681362315879, "grad_norm": 0.48125512456879377, "learning_rate": 3.2727764260610974e-05, "loss": 0.4506, "step": 42220 }, { "epoch": 1.2328141195019051, "grad_norm": 0.5105869672138033, "learning_rate": 3.272506082725061e-05, "loss": 0.4636, "step": 42225 }, { "epoch": 1.2329601027722223, "grad_norm": 0.4936146262595072, "learning_rate": 3.272235739389024e-05, "loss": 0.4522, "step": 42230 }, { "epoch": 1.2331060860425396, "grad_norm": 0.4989342086313793, "learning_rate": 3.271965396052987e-05, "loss": 0.458, "step": 42235 }, { "epoch": 1.2332520693128568, "grad_norm": 0.4805972271603907, "learning_rate": 3.27169505271695e-05, "loss": 0.4395, "step": 42240 }, { "epoch": 1.233398052583174, "grad_norm": 0.5041194236265406, "learning_rate": 3.2714247093809144e-05, "loss": 0.4585, "step": 42245 }, { "epoch": 1.2335440358534913, "grad_norm": 0.4928287447278477, "learning_rate": 3.271154366044877e-05, "loss": 0.4449, "step": 42250 }, { "epoch": 1.2336900191238085, "grad_norm": 0.5066729797227224, "learning_rate": 3.2708840227088405e-05, "loss": 0.4534, "step": 42255 }, { "epoch": 1.2338360023941257, "grad_norm": 0.44367166334888364, "learning_rate": 3.270613679372804e-05, "loss": 0.4162, "step": 42260 }, { "epoch": 1.233981985664443, "grad_norm": 0.48390706664402877, "learning_rate": 3.2703433360367667e-05, "loss": 0.4605, "step": 42265 }, { "epoch": 1.23412796893476, "grad_norm": 0.4983613534589689, "learning_rate": 3.27007299270073e-05, "loss": 0.4435, "step": 42270 }, { "epoch": 1.2342739522050774, "grad_norm": 0.46372444492254244, "learning_rate": 3.2698026493646935e-05, "loss": 0.429, "step": 42275 }, { "epoch": 1.2344199354753944, "grad_norm": 0.4563521053823791, "learning_rate": 3.269532306028656e-05, "loss": 0.4569, "step": 42280 }, { "epoch": 1.2345659187457119, "grad_norm": 0.4662982028619074, "learning_rate": 3.2692619626926196e-05, "loss": 0.4141, "step": 42285 }, { "epoch": 1.2347119020160289, "grad_norm": 0.4919217897542998, "learning_rate": 3.268991619356583e-05, "loss": 0.4179, "step": 42290 }, { "epoch": 1.234857885286346, "grad_norm": 0.45589684569777, "learning_rate": 3.268721276020546e-05, "loss": 0.4511, "step": 42295 }, { "epoch": 1.2350038685566633, "grad_norm": 0.5143180466290408, "learning_rate": 3.26845093268451e-05, "loss": 0.4227, "step": 42300 }, { "epoch": 1.2351498518269806, "grad_norm": 0.4469219884044672, "learning_rate": 3.268180589348473e-05, "loss": 0.4002, "step": 42305 }, { "epoch": 1.2352958350972978, "grad_norm": 0.5738131645899968, "learning_rate": 3.267910246012436e-05, "loss": 0.4953, "step": 42310 }, { "epoch": 1.235441818367615, "grad_norm": 0.4444880592594426, "learning_rate": 3.267639902676399e-05, "loss": 0.4603, "step": 42315 }, { "epoch": 1.2355878016379322, "grad_norm": 0.44149527599372557, "learning_rate": 3.267369559340363e-05, "loss": 0.4466, "step": 42320 }, { "epoch": 1.2357337849082495, "grad_norm": 0.4944890206232222, "learning_rate": 3.2670992160043254e-05, "loss": 0.4466, "step": 42325 }, { "epoch": 1.2358797681785667, "grad_norm": 0.47219710035414525, "learning_rate": 3.266828872668289e-05, "loss": 0.4206, "step": 42330 }, { "epoch": 1.236025751448884, "grad_norm": 0.45810869277207944, "learning_rate": 3.266558529332252e-05, "loss": 0.4148, "step": 42335 }, { "epoch": 1.2361717347192012, "grad_norm": 0.5126349077132931, "learning_rate": 3.266288185996215e-05, "loss": 0.4767, "step": 42340 }, { "epoch": 1.2363177179895184, "grad_norm": 0.5116768345646265, "learning_rate": 3.2660178426601784e-05, "loss": 0.4513, "step": 42345 }, { "epoch": 1.2364637012598356, "grad_norm": 0.49360278799296464, "learning_rate": 3.265747499324142e-05, "loss": 0.4752, "step": 42350 }, { "epoch": 1.2366096845301529, "grad_norm": 0.4699193759710326, "learning_rate": 3.2654771559881045e-05, "loss": 0.4567, "step": 42355 }, { "epoch": 1.23675566780047, "grad_norm": 0.5362012673157617, "learning_rate": 3.2652068126520686e-05, "loss": 0.4523, "step": 42360 }, { "epoch": 1.2369016510707873, "grad_norm": 0.5020604040270076, "learning_rate": 3.264936469316032e-05, "loss": 0.4444, "step": 42365 }, { "epoch": 1.2370476343411045, "grad_norm": 0.5209743642302277, "learning_rate": 3.264666125979995e-05, "loss": 0.4746, "step": 42370 }, { "epoch": 1.2371936176114218, "grad_norm": 0.4841521641500294, "learning_rate": 3.264395782643958e-05, "loss": 0.4555, "step": 42375 }, { "epoch": 1.237339600881739, "grad_norm": 0.4986409853071038, "learning_rate": 3.2641254393079215e-05, "loss": 0.4764, "step": 42380 }, { "epoch": 1.2374855841520562, "grad_norm": 0.5195559428335644, "learning_rate": 3.263855095971884e-05, "loss": 0.4541, "step": 42385 }, { "epoch": 1.2376315674223735, "grad_norm": 0.4772786765224233, "learning_rate": 3.2635847526358476e-05, "loss": 0.4704, "step": 42390 }, { "epoch": 1.2377775506926907, "grad_norm": 0.42759540678147795, "learning_rate": 3.263314409299811e-05, "loss": 0.4212, "step": 42395 }, { "epoch": 1.237923533963008, "grad_norm": 0.4721875117852034, "learning_rate": 3.263044065963774e-05, "loss": 0.4378, "step": 42400 }, { "epoch": 1.2380695172333251, "grad_norm": 0.48471604737768675, "learning_rate": 3.262773722627737e-05, "loss": 0.4428, "step": 42405 }, { "epoch": 1.2382155005036424, "grad_norm": 0.4861611772832345, "learning_rate": 3.2625033792917005e-05, "loss": 0.4399, "step": 42410 }, { "epoch": 1.2383614837739594, "grad_norm": 0.4668294131109508, "learning_rate": 3.262233035955664e-05, "loss": 0.4171, "step": 42415 }, { "epoch": 1.2385074670442768, "grad_norm": 0.5884283574043232, "learning_rate": 3.2619626926196273e-05, "loss": 0.4688, "step": 42420 }, { "epoch": 1.2386534503145938, "grad_norm": 0.4680584020041899, "learning_rate": 3.261692349283591e-05, "loss": 0.4278, "step": 42425 }, { "epoch": 1.2387994335849113, "grad_norm": 0.45696864885362115, "learning_rate": 3.2614220059475535e-05, "loss": 0.441, "step": 42430 }, { "epoch": 1.2389454168552283, "grad_norm": 0.4659347514476637, "learning_rate": 3.261151662611517e-05, "loss": 0.4314, "step": 42435 }, { "epoch": 1.2390914001255455, "grad_norm": 0.46569840389768424, "learning_rate": 3.26088131927548e-05, "loss": 0.4011, "step": 42440 }, { "epoch": 1.2392373833958628, "grad_norm": 0.48383259896103037, "learning_rate": 3.260610975939443e-05, "loss": 0.4434, "step": 42445 }, { "epoch": 1.23938336666618, "grad_norm": 0.4836290991382518, "learning_rate": 3.2603406326034064e-05, "loss": 0.4619, "step": 42450 }, { "epoch": 1.2395293499364972, "grad_norm": 0.45969214462688507, "learning_rate": 3.26007028926737e-05, "loss": 0.4331, "step": 42455 }, { "epoch": 1.2396753332068144, "grad_norm": 0.47394839744648526, "learning_rate": 3.2597999459313325e-05, "loss": 0.4419, "step": 42460 }, { "epoch": 1.2398213164771317, "grad_norm": 0.48665604774218746, "learning_rate": 3.259529602595296e-05, "loss": 0.4852, "step": 42465 }, { "epoch": 1.239967299747449, "grad_norm": 0.4507966573115752, "learning_rate": 3.25925925925926e-05, "loss": 0.4413, "step": 42470 }, { "epoch": 1.2401132830177661, "grad_norm": 0.4963510723089822, "learning_rate": 3.258988915923223e-05, "loss": 0.4666, "step": 42475 }, { "epoch": 1.2402592662880834, "grad_norm": 0.5008222069381013, "learning_rate": 3.258718572587186e-05, "loss": 0.4483, "step": 42480 }, { "epoch": 1.2404052495584006, "grad_norm": 0.47216753776725956, "learning_rate": 3.2584482292511495e-05, "loss": 0.439, "step": 42485 }, { "epoch": 1.2405512328287178, "grad_norm": 0.5059986440678259, "learning_rate": 3.258177885915112e-05, "loss": 0.4775, "step": 42490 }, { "epoch": 1.240697216099035, "grad_norm": 0.43817162092872686, "learning_rate": 3.2579075425790757e-05, "loss": 0.4634, "step": 42495 }, { "epoch": 1.2408431993693523, "grad_norm": 0.48186914008519066, "learning_rate": 3.257637199243039e-05, "loss": 0.46, "step": 42500 }, { "epoch": 1.2409891826396695, "grad_norm": 0.46201787520211063, "learning_rate": 3.257366855907002e-05, "loss": 0.415, "step": 42505 }, { "epoch": 1.2411351659099867, "grad_norm": 0.45945094903336353, "learning_rate": 3.257096512570965e-05, "loss": 0.4523, "step": 42510 }, { "epoch": 1.241281149180304, "grad_norm": 0.4438188801787332, "learning_rate": 3.2568261692349286e-05, "loss": 0.4084, "step": 42515 }, { "epoch": 1.2414271324506212, "grad_norm": 0.49561646931332537, "learning_rate": 3.256555825898891e-05, "loss": 0.4472, "step": 42520 }, { "epoch": 1.2415731157209384, "grad_norm": 0.4608512575931024, "learning_rate": 3.256285482562855e-05, "loss": 0.4529, "step": 42525 }, { "epoch": 1.2417190989912557, "grad_norm": 0.4881371211079221, "learning_rate": 3.256015139226819e-05, "loss": 0.4526, "step": 42530 }, { "epoch": 1.2418650822615729, "grad_norm": 0.47266553931916655, "learning_rate": 3.2557447958907815e-05, "loss": 0.4496, "step": 42535 }, { "epoch": 1.2420110655318901, "grad_norm": 0.5409727606969955, "learning_rate": 3.255474452554745e-05, "loss": 0.4489, "step": 42540 }, { "epoch": 1.2421570488022073, "grad_norm": 0.4759076515636961, "learning_rate": 3.255204109218708e-05, "loss": 0.4604, "step": 42545 }, { "epoch": 1.2423030320725246, "grad_norm": 0.5126963967874106, "learning_rate": 3.254933765882671e-05, "loss": 0.4753, "step": 42550 }, { "epoch": 1.2424490153428418, "grad_norm": 0.46962082891116996, "learning_rate": 3.2546634225466344e-05, "loss": 0.4572, "step": 42555 }, { "epoch": 1.2425949986131588, "grad_norm": 0.4567869388843893, "learning_rate": 3.254393079210598e-05, "loss": 0.4285, "step": 42560 }, { "epoch": 1.2427409818834763, "grad_norm": 0.5027553019579395, "learning_rate": 3.2541227358745606e-05, "loss": 0.4434, "step": 42565 }, { "epoch": 1.2428869651537933, "grad_norm": 0.5253241928906871, "learning_rate": 3.253852392538524e-05, "loss": 0.4649, "step": 42570 }, { "epoch": 1.2430329484241107, "grad_norm": 0.4661088431175444, "learning_rate": 3.2535820492024874e-05, "loss": 0.4324, "step": 42575 }, { "epoch": 1.2431789316944277, "grad_norm": 0.4851707972390036, "learning_rate": 3.25331170586645e-05, "loss": 0.4073, "step": 42580 }, { "epoch": 1.243324914964745, "grad_norm": 0.5456357779851059, "learning_rate": 3.253041362530414e-05, "loss": 0.4493, "step": 42585 }, { "epoch": 1.2434708982350622, "grad_norm": 0.4830044140290673, "learning_rate": 3.252771019194377e-05, "loss": 0.4504, "step": 42590 }, { "epoch": 1.2436168815053794, "grad_norm": 0.48689947079635576, "learning_rate": 3.25250067585834e-05, "loss": 0.4277, "step": 42595 }, { "epoch": 1.2437628647756966, "grad_norm": 0.5047332759187914, "learning_rate": 3.252230332522304e-05, "loss": 0.4329, "step": 42600 }, { "epoch": 1.2439088480460139, "grad_norm": 0.5029049696560903, "learning_rate": 3.251959989186267e-05, "loss": 0.4362, "step": 42605 }, { "epoch": 1.244054831316331, "grad_norm": 0.486191422765315, "learning_rate": 3.25168964585023e-05, "loss": 0.4427, "step": 42610 }, { "epoch": 1.2442008145866483, "grad_norm": 0.45866698823774116, "learning_rate": 3.251419302514193e-05, "loss": 0.4547, "step": 42615 }, { "epoch": 1.2443467978569656, "grad_norm": 0.5061258881621346, "learning_rate": 3.2511489591781566e-05, "loss": 0.4608, "step": 42620 }, { "epoch": 1.2444927811272828, "grad_norm": 0.4770865712517517, "learning_rate": 3.2508786158421193e-05, "loss": 0.4273, "step": 42625 }, { "epoch": 1.2446387643976, "grad_norm": 0.49514654153923743, "learning_rate": 3.250608272506083e-05, "loss": 0.4283, "step": 42630 }, { "epoch": 1.2447847476679172, "grad_norm": 0.491704331745691, "learning_rate": 3.250337929170046e-05, "loss": 0.4549, "step": 42635 }, { "epoch": 1.2449307309382345, "grad_norm": 0.4902320230377316, "learning_rate": 3.2500675858340095e-05, "loss": 0.4412, "step": 42640 }, { "epoch": 1.2450767142085517, "grad_norm": 0.4676144510828083, "learning_rate": 3.249797242497973e-05, "loss": 0.4542, "step": 42645 }, { "epoch": 1.245222697478869, "grad_norm": 0.4891798436495601, "learning_rate": 3.249526899161936e-05, "loss": 0.4644, "step": 42650 }, { "epoch": 1.2453686807491862, "grad_norm": 0.45960991626897596, "learning_rate": 3.249256555825899e-05, "loss": 0.4346, "step": 42655 }, { "epoch": 1.2455146640195034, "grad_norm": 0.48819665775893245, "learning_rate": 3.2489862124898625e-05, "loss": 0.4614, "step": 42660 }, { "epoch": 1.2456606472898206, "grad_norm": 0.5351795401073873, "learning_rate": 3.248715869153825e-05, "loss": 0.463, "step": 42665 }, { "epoch": 1.2458066305601379, "grad_norm": 0.44934016664371756, "learning_rate": 3.2484455258177886e-05, "loss": 0.4406, "step": 42670 }, { "epoch": 1.245952613830455, "grad_norm": 0.508998328550108, "learning_rate": 3.248175182481752e-05, "loss": 0.4532, "step": 42675 }, { "epoch": 1.2460985971007723, "grad_norm": 0.5215129868319598, "learning_rate": 3.2479048391457154e-05, "loss": 0.4403, "step": 42680 }, { "epoch": 1.2462445803710895, "grad_norm": 0.4967956953294297, "learning_rate": 3.247634495809678e-05, "loss": 0.445, "step": 42685 }, { "epoch": 1.2463905636414068, "grad_norm": 0.503376769766951, "learning_rate": 3.2473641524736415e-05, "loss": 0.4497, "step": 42690 }, { "epoch": 1.246536546911724, "grad_norm": 0.4491547135987566, "learning_rate": 3.247093809137605e-05, "loss": 0.4663, "step": 42695 }, { "epoch": 1.2466825301820412, "grad_norm": 0.45777654128568335, "learning_rate": 3.246823465801568e-05, "loss": 0.4503, "step": 42700 }, { "epoch": 1.2468285134523585, "grad_norm": 0.46098558195480643, "learning_rate": 3.246553122465532e-05, "loss": 0.4467, "step": 42705 }, { "epoch": 1.2469744967226757, "grad_norm": 0.4747568441927361, "learning_rate": 3.2462827791294944e-05, "loss": 0.4667, "step": 42710 }, { "epoch": 1.2471204799929927, "grad_norm": 0.48269541909887786, "learning_rate": 3.246012435793458e-05, "loss": 0.4474, "step": 42715 }, { "epoch": 1.2472664632633101, "grad_norm": 0.4788067966991266, "learning_rate": 3.245742092457421e-05, "loss": 0.4409, "step": 42720 }, { "epoch": 1.2474124465336272, "grad_norm": 0.49923848719888647, "learning_rate": 3.245471749121384e-05, "loss": 0.4348, "step": 42725 }, { "epoch": 1.2475584298039444, "grad_norm": 0.462436650397835, "learning_rate": 3.2452014057853474e-05, "loss": 0.4602, "step": 42730 }, { "epoch": 1.2477044130742616, "grad_norm": 0.48554121678536166, "learning_rate": 3.244931062449311e-05, "loss": 0.4408, "step": 42735 }, { "epoch": 1.2478503963445788, "grad_norm": 0.45397503078874796, "learning_rate": 3.244660719113274e-05, "loss": 0.4292, "step": 42740 }, { "epoch": 1.247996379614896, "grad_norm": 0.5043057517392998, "learning_rate": 3.244390375777237e-05, "loss": 0.4606, "step": 42745 }, { "epoch": 1.2481423628852133, "grad_norm": 0.468481310570617, "learning_rate": 3.2441200324412e-05, "loss": 0.4597, "step": 42750 }, { "epoch": 1.2482883461555305, "grad_norm": 0.47255186114305753, "learning_rate": 3.243849689105164e-05, "loss": 0.4298, "step": 42755 }, { "epoch": 1.2484343294258478, "grad_norm": 0.4673826441085406, "learning_rate": 3.243579345769127e-05, "loss": 0.4187, "step": 42760 }, { "epoch": 1.248580312696165, "grad_norm": 0.5006882716970001, "learning_rate": 3.2433090024330905e-05, "loss": 0.4766, "step": 42765 }, { "epoch": 1.2487262959664822, "grad_norm": 0.5097259436160212, "learning_rate": 3.243038659097053e-05, "loss": 0.4503, "step": 42770 }, { "epoch": 1.2488722792367994, "grad_norm": 0.46814932028602885, "learning_rate": 3.2427683157610166e-05, "loss": 0.4496, "step": 42775 }, { "epoch": 1.2490182625071167, "grad_norm": 0.4948475831791287, "learning_rate": 3.24249797242498e-05, "loss": 0.4444, "step": 42780 }, { "epoch": 1.249164245777434, "grad_norm": 0.5014474559815372, "learning_rate": 3.242227629088943e-05, "loss": 0.452, "step": 42785 }, { "epoch": 1.2493102290477511, "grad_norm": 0.5157926589550165, "learning_rate": 3.241957285752906e-05, "loss": 0.4455, "step": 42790 }, { "epoch": 1.2494562123180684, "grad_norm": 0.4634745992535068, "learning_rate": 3.2416869424168696e-05, "loss": 0.4639, "step": 42795 }, { "epoch": 1.2496021955883856, "grad_norm": 0.48194307253355484, "learning_rate": 3.241416599080832e-05, "loss": 0.4313, "step": 42800 }, { "epoch": 1.2497481788587028, "grad_norm": 0.46723213715762946, "learning_rate": 3.241146255744796e-05, "loss": 0.4506, "step": 42805 }, { "epoch": 1.24989416212902, "grad_norm": 0.47960994980486676, "learning_rate": 3.24087591240876e-05, "loss": 0.4502, "step": 42810 }, { "epoch": 1.2500401453993373, "grad_norm": 0.5034768911437699, "learning_rate": 3.2406055690727225e-05, "loss": 0.4534, "step": 42815 }, { "epoch": 1.2501861286696545, "grad_norm": 0.42965304011490185, "learning_rate": 3.240335225736686e-05, "loss": 0.4371, "step": 42820 }, { "epoch": 1.2503321119399717, "grad_norm": 0.5005361275668873, "learning_rate": 3.240064882400649e-05, "loss": 0.4251, "step": 42825 }, { "epoch": 1.250478095210289, "grad_norm": 0.46731524368827, "learning_rate": 3.239794539064612e-05, "loss": 0.4323, "step": 42830 }, { "epoch": 1.2506240784806062, "grad_norm": 0.5098818532708804, "learning_rate": 3.2395241957285754e-05, "loss": 0.459, "step": 42835 }, { "epoch": 1.2507700617509234, "grad_norm": 0.5418271055058198, "learning_rate": 3.239253852392539e-05, "loss": 0.4856, "step": 42840 }, { "epoch": 1.2509160450212407, "grad_norm": 0.5098890699290171, "learning_rate": 3.2389835090565015e-05, "loss": 0.4824, "step": 42845 }, { "epoch": 1.2510620282915577, "grad_norm": 0.5098708463970915, "learning_rate": 3.238713165720465e-05, "loss": 0.4397, "step": 42850 }, { "epoch": 1.251208011561875, "grad_norm": 0.4849139868376767, "learning_rate": 3.238442822384428e-05, "loss": 0.4491, "step": 42855 }, { "epoch": 1.2513539948321921, "grad_norm": 0.46293484627777637, "learning_rate": 3.238172479048391e-05, "loss": 0.4242, "step": 42860 }, { "epoch": 1.2514999781025096, "grad_norm": 0.5120153917829015, "learning_rate": 3.237902135712355e-05, "loss": 0.4389, "step": 42865 }, { "epoch": 1.2516459613728266, "grad_norm": 0.5162464285695203, "learning_rate": 3.2376317923763185e-05, "loss": 0.4654, "step": 42870 }, { "epoch": 1.251791944643144, "grad_norm": 0.49220465375351596, "learning_rate": 3.237361449040281e-05, "loss": 0.4493, "step": 42875 }, { "epoch": 1.251937927913461, "grad_norm": 0.4892348795814999, "learning_rate": 3.237091105704245e-05, "loss": 0.4363, "step": 42880 }, { "epoch": 1.2520839111837783, "grad_norm": 0.49515181994614144, "learning_rate": 3.236820762368208e-05, "loss": 0.4468, "step": 42885 }, { "epoch": 1.2522298944540955, "grad_norm": 0.7893268650163628, "learning_rate": 3.236550419032171e-05, "loss": 0.4499, "step": 42890 }, { "epoch": 1.2523758777244127, "grad_norm": 0.4776662620943974, "learning_rate": 3.236280075696134e-05, "loss": 0.435, "step": 42895 }, { "epoch": 1.25252186099473, "grad_norm": 0.4810614660339649, "learning_rate": 3.2360097323600976e-05, "loss": 0.4468, "step": 42900 }, { "epoch": 1.2526678442650472, "grad_norm": 0.5055626376108652, "learning_rate": 3.23573938902406e-05, "loss": 0.471, "step": 42905 }, { "epoch": 1.2528138275353644, "grad_norm": 0.46967758071087, "learning_rate": 3.235469045688024e-05, "loss": 0.4532, "step": 42910 }, { "epoch": 1.2529598108056816, "grad_norm": 0.48322668090123827, "learning_rate": 3.235198702351987e-05, "loss": 0.4291, "step": 42915 }, { "epoch": 1.2531057940759989, "grad_norm": 0.5182972621449712, "learning_rate": 3.23492835901595e-05, "loss": 0.4686, "step": 42920 }, { "epoch": 1.253251777346316, "grad_norm": 0.4659774338142106, "learning_rate": 3.234658015679914e-05, "loss": 0.4532, "step": 42925 }, { "epoch": 1.2533977606166333, "grad_norm": 0.47809391227921316, "learning_rate": 3.234387672343877e-05, "loss": 0.4533, "step": 42930 }, { "epoch": 1.2535437438869506, "grad_norm": 0.4818959781752085, "learning_rate": 3.23411732900784e-05, "loss": 0.4555, "step": 42935 }, { "epoch": 1.2536897271572678, "grad_norm": 0.4871416520801551, "learning_rate": 3.2338469856718034e-05, "loss": 0.4295, "step": 42940 }, { "epoch": 1.253835710427585, "grad_norm": 0.5379115075074478, "learning_rate": 3.233576642335767e-05, "loss": 0.4389, "step": 42945 }, { "epoch": 1.2539816936979022, "grad_norm": 0.46843684591453694, "learning_rate": 3.2333062989997296e-05, "loss": 0.4553, "step": 42950 }, { "epoch": 1.2541276769682195, "grad_norm": 0.481133540723779, "learning_rate": 3.233035955663693e-05, "loss": 0.4269, "step": 42955 }, { "epoch": 1.2542736602385367, "grad_norm": 0.48699909378431755, "learning_rate": 3.2327656123276564e-05, "loss": 0.4381, "step": 42960 }, { "epoch": 1.254419643508854, "grad_norm": 0.5268988848463004, "learning_rate": 3.232495268991619e-05, "loss": 0.4749, "step": 42965 }, { "epoch": 1.2545656267791712, "grad_norm": 0.484462527507679, "learning_rate": 3.2322249256555825e-05, "loss": 0.4402, "step": 42970 }, { "epoch": 1.2547116100494884, "grad_norm": 0.49423265004069367, "learning_rate": 3.231954582319546e-05, "loss": 0.4454, "step": 42975 }, { "epoch": 1.2548575933198056, "grad_norm": 0.467086887052197, "learning_rate": 3.231684238983509e-05, "loss": 0.4281, "step": 42980 }, { "epoch": 1.2550035765901228, "grad_norm": 0.47969914795151397, "learning_rate": 3.231413895647473e-05, "loss": 0.4172, "step": 42985 }, { "epoch": 1.25514955986044, "grad_norm": 0.4975079338662516, "learning_rate": 3.231143552311436e-05, "loss": 0.4365, "step": 42990 }, { "epoch": 1.255295543130757, "grad_norm": 0.5102725985782796, "learning_rate": 3.230873208975399e-05, "loss": 0.4409, "step": 42995 }, { "epoch": 1.2554415264010745, "grad_norm": 0.4979210051096698, "learning_rate": 3.230602865639362e-05, "loss": 0.4469, "step": 43000 }, { "epoch": 1.2555875096713915, "grad_norm": 0.5016932936594206, "learning_rate": 3.2303325223033256e-05, "loss": 0.4126, "step": 43005 }, { "epoch": 1.255733492941709, "grad_norm": 0.4652503657253584, "learning_rate": 3.2300621789672884e-05, "loss": 0.4718, "step": 43010 }, { "epoch": 1.255879476212026, "grad_norm": 0.5138180910993138, "learning_rate": 3.229791835631252e-05, "loss": 0.4722, "step": 43015 }, { "epoch": 1.2560254594823435, "grad_norm": 0.5096154826137497, "learning_rate": 3.229521492295215e-05, "loss": 0.4608, "step": 43020 }, { "epoch": 1.2561714427526605, "grad_norm": 0.5335213438478466, "learning_rate": 3.229251148959178e-05, "loss": 0.4676, "step": 43025 }, { "epoch": 1.2563174260229777, "grad_norm": 0.4909273189003509, "learning_rate": 3.228980805623141e-05, "loss": 0.4272, "step": 43030 }, { "epoch": 1.256463409293295, "grad_norm": 0.45996924053402466, "learning_rate": 3.2287104622871054e-05, "loss": 0.4578, "step": 43035 }, { "epoch": 1.2566093925636121, "grad_norm": 0.5005860462910463, "learning_rate": 3.228440118951068e-05, "loss": 0.4385, "step": 43040 }, { "epoch": 1.2567553758339294, "grad_norm": 0.5031690597865401, "learning_rate": 3.2281697756150315e-05, "loss": 0.4515, "step": 43045 }, { "epoch": 1.2569013591042466, "grad_norm": 0.49260386518879834, "learning_rate": 3.227899432278995e-05, "loss": 0.4673, "step": 43050 }, { "epoch": 1.2570473423745638, "grad_norm": 0.472263188587567, "learning_rate": 3.2276290889429576e-05, "loss": 0.4544, "step": 43055 }, { "epoch": 1.257193325644881, "grad_norm": 0.49526010658431996, "learning_rate": 3.227358745606921e-05, "loss": 0.4439, "step": 43060 }, { "epoch": 1.2573393089151983, "grad_norm": 0.46906304425995715, "learning_rate": 3.2270884022708844e-05, "loss": 0.4516, "step": 43065 }, { "epoch": 1.2574852921855155, "grad_norm": 0.5208258666046237, "learning_rate": 3.226818058934847e-05, "loss": 0.4446, "step": 43070 }, { "epoch": 1.2576312754558328, "grad_norm": 0.49990310661131176, "learning_rate": 3.2265477155988105e-05, "loss": 0.4311, "step": 43075 }, { "epoch": 1.25777725872615, "grad_norm": 0.47732480224558294, "learning_rate": 3.226277372262774e-05, "loss": 0.4234, "step": 43080 }, { "epoch": 1.2579232419964672, "grad_norm": 0.43963553454253534, "learning_rate": 3.2260070289267367e-05, "loss": 0.4177, "step": 43085 }, { "epoch": 1.2580692252667844, "grad_norm": 0.47909379995195434, "learning_rate": 3.2257366855907e-05, "loss": 0.444, "step": 43090 }, { "epoch": 1.2582152085371017, "grad_norm": 0.43793014430402466, "learning_rate": 3.225466342254664e-05, "loss": 0.4133, "step": 43095 }, { "epoch": 1.258361191807419, "grad_norm": 0.46665374703719187, "learning_rate": 3.225195998918627e-05, "loss": 0.4336, "step": 43100 }, { "epoch": 1.2585071750777361, "grad_norm": 0.4932929485526988, "learning_rate": 3.22492565558259e-05, "loss": 0.4402, "step": 43105 }, { "epoch": 1.2586531583480534, "grad_norm": 0.5149162138283491, "learning_rate": 3.224655312246554e-05, "loss": 0.4771, "step": 43110 }, { "epoch": 1.2587991416183706, "grad_norm": 0.4510821256298131, "learning_rate": 3.2243849689105164e-05, "loss": 0.4203, "step": 43115 }, { "epoch": 1.2589451248886878, "grad_norm": 0.46280968283894375, "learning_rate": 3.22411462557448e-05, "loss": 0.4549, "step": 43120 }, { "epoch": 1.259091108159005, "grad_norm": 0.48760326986761754, "learning_rate": 3.223844282238443e-05, "loss": 0.4415, "step": 43125 }, { "epoch": 1.2592370914293223, "grad_norm": 0.4977584416154205, "learning_rate": 3.223573938902406e-05, "loss": 0.458, "step": 43130 }, { "epoch": 1.2593830746996395, "grad_norm": 0.46222883251587366, "learning_rate": 3.223303595566369e-05, "loss": 0.4589, "step": 43135 }, { "epoch": 1.2595290579699565, "grad_norm": 0.4591650187148725, "learning_rate": 3.223033252230333e-05, "loss": 0.457, "step": 43140 }, { "epoch": 1.259675041240274, "grad_norm": 0.486619830984857, "learning_rate": 3.2227629088942954e-05, "loss": 0.4334, "step": 43145 }, { "epoch": 1.259821024510591, "grad_norm": 0.46860563700213437, "learning_rate": 3.2224925655582595e-05, "loss": 0.4246, "step": 43150 }, { "epoch": 1.2599670077809084, "grad_norm": 0.45522544876870913, "learning_rate": 3.222222222222223e-05, "loss": 0.4247, "step": 43155 }, { "epoch": 1.2601129910512254, "grad_norm": 0.4941932305976761, "learning_rate": 3.2219518788861856e-05, "loss": 0.458, "step": 43160 }, { "epoch": 1.2602589743215429, "grad_norm": 0.5184317467255762, "learning_rate": 3.221681535550149e-05, "loss": 0.4367, "step": 43165 }, { "epoch": 1.2604049575918599, "grad_norm": 0.5252622809275912, "learning_rate": 3.2214111922141124e-05, "loss": 0.4735, "step": 43170 }, { "epoch": 1.2605509408621771, "grad_norm": 0.47585962930516573, "learning_rate": 3.221140848878075e-05, "loss": 0.4488, "step": 43175 }, { "epoch": 1.2606969241324943, "grad_norm": 0.5211032594782857, "learning_rate": 3.2208705055420386e-05, "loss": 0.4699, "step": 43180 }, { "epoch": 1.2608429074028116, "grad_norm": 0.46939015565395614, "learning_rate": 3.220600162206002e-05, "loss": 0.4444, "step": 43185 }, { "epoch": 1.2609888906731288, "grad_norm": 0.4663222672846695, "learning_rate": 3.220329818869965e-05, "loss": 0.443, "step": 43190 }, { "epoch": 1.261134873943446, "grad_norm": 0.4893937513382742, "learning_rate": 3.220059475533928e-05, "loss": 0.4688, "step": 43195 }, { "epoch": 1.2612808572137633, "grad_norm": 0.4979343081767585, "learning_rate": 3.2197891321978915e-05, "loss": 0.4287, "step": 43200 }, { "epoch": 1.2614268404840805, "grad_norm": 0.5047455369565339, "learning_rate": 3.219518788861855e-05, "loss": 0.4462, "step": 43205 }, { "epoch": 1.2615728237543977, "grad_norm": 0.5676791001829095, "learning_rate": 3.219248445525818e-05, "loss": 0.4462, "step": 43210 }, { "epoch": 1.261718807024715, "grad_norm": 0.4832358494281977, "learning_rate": 3.218978102189781e-05, "loss": 0.4127, "step": 43215 }, { "epoch": 1.2618647902950322, "grad_norm": 0.44547993358139487, "learning_rate": 3.2187077588537444e-05, "loss": 0.4432, "step": 43220 }, { "epoch": 1.2620107735653494, "grad_norm": 0.47590238205648433, "learning_rate": 3.218437415517708e-05, "loss": 0.4599, "step": 43225 }, { "epoch": 1.2621567568356666, "grad_norm": 0.45020884745698897, "learning_rate": 3.218167072181671e-05, "loss": 0.4279, "step": 43230 }, { "epoch": 1.2623027401059839, "grad_norm": 0.4890980944570033, "learning_rate": 3.217896728845634e-05, "loss": 0.4312, "step": 43235 }, { "epoch": 1.262448723376301, "grad_norm": 0.4910194010643294, "learning_rate": 3.2176263855095973e-05, "loss": 0.445, "step": 43240 }, { "epoch": 1.2625947066466183, "grad_norm": 0.4844844845605531, "learning_rate": 3.217356042173561e-05, "loss": 0.4641, "step": 43245 }, { "epoch": 1.2627406899169356, "grad_norm": 0.5125897339552397, "learning_rate": 3.2170856988375235e-05, "loss": 0.4499, "step": 43250 }, { "epoch": 1.2628866731872528, "grad_norm": 0.534455547916621, "learning_rate": 3.216815355501487e-05, "loss": 0.4314, "step": 43255 }, { "epoch": 1.26303265645757, "grad_norm": 0.48189676455254504, "learning_rate": 3.21654501216545e-05, "loss": 0.4392, "step": 43260 }, { "epoch": 1.2631786397278872, "grad_norm": 0.5282976079906992, "learning_rate": 3.216274668829414e-05, "loss": 0.4857, "step": 43265 }, { "epoch": 1.2633246229982045, "grad_norm": 0.4825526886015369, "learning_rate": 3.216004325493377e-05, "loss": 0.4519, "step": 43270 }, { "epoch": 1.2634706062685217, "grad_norm": 0.4784667829563615, "learning_rate": 3.21573398215734e-05, "loss": 0.4428, "step": 43275 }, { "epoch": 1.263616589538839, "grad_norm": 0.46734169386811486, "learning_rate": 3.215463638821303e-05, "loss": 0.4494, "step": 43280 }, { "epoch": 1.263762572809156, "grad_norm": 0.4859983221381505, "learning_rate": 3.2151932954852666e-05, "loss": 0.4674, "step": 43285 }, { "epoch": 1.2639085560794734, "grad_norm": 0.4801273421336324, "learning_rate": 3.21492295214923e-05, "loss": 0.4307, "step": 43290 }, { "epoch": 1.2640545393497904, "grad_norm": 0.4572104395110013, "learning_rate": 3.214652608813193e-05, "loss": 0.4394, "step": 43295 }, { "epoch": 1.2642005226201078, "grad_norm": 0.49974194191630705, "learning_rate": 3.214382265477156e-05, "loss": 0.4329, "step": 43300 }, { "epoch": 1.2643465058904249, "grad_norm": 0.46031993273541555, "learning_rate": 3.2141119221411195e-05, "loss": 0.4468, "step": 43305 }, { "epoch": 1.2644924891607423, "grad_norm": 0.4891354552018521, "learning_rate": 3.213841578805082e-05, "loss": 0.4725, "step": 43310 }, { "epoch": 1.2646384724310593, "grad_norm": 0.4523505263844226, "learning_rate": 3.2135712354690457e-05, "loss": 0.4442, "step": 43315 }, { "epoch": 1.2647844557013765, "grad_norm": 0.5544398661180354, "learning_rate": 3.213300892133009e-05, "loss": 0.4787, "step": 43320 }, { "epoch": 1.2649304389716938, "grad_norm": 0.45239316048876105, "learning_rate": 3.2130305487969725e-05, "loss": 0.4522, "step": 43325 }, { "epoch": 1.265076422242011, "grad_norm": 0.46411769024940874, "learning_rate": 3.212760205460936e-05, "loss": 0.4322, "step": 43330 }, { "epoch": 1.2652224055123282, "grad_norm": 0.5007868192734998, "learning_rate": 3.2124898621248986e-05, "loss": 0.4633, "step": 43335 }, { "epoch": 1.2653683887826455, "grad_norm": 0.48174911290544997, "learning_rate": 3.212219518788862e-05, "loss": 0.455, "step": 43340 }, { "epoch": 1.2655143720529627, "grad_norm": 0.4909817905354942, "learning_rate": 3.2119491754528254e-05, "loss": 0.4556, "step": 43345 }, { "epoch": 1.26566035532328, "grad_norm": 0.4789303602449986, "learning_rate": 3.211678832116788e-05, "loss": 0.4542, "step": 43350 }, { "epoch": 1.2658063385935971, "grad_norm": 0.47668220243680515, "learning_rate": 3.2114084887807515e-05, "loss": 0.4699, "step": 43355 }, { "epoch": 1.2659523218639144, "grad_norm": 0.5264753955269624, "learning_rate": 3.211138145444715e-05, "loss": 0.4649, "step": 43360 }, { "epoch": 1.2660983051342316, "grad_norm": 0.46535490289610026, "learning_rate": 3.210867802108678e-05, "loss": 0.4498, "step": 43365 }, { "epoch": 1.2662442884045488, "grad_norm": 0.5001532102809818, "learning_rate": 3.210597458772641e-05, "loss": 0.4576, "step": 43370 }, { "epoch": 1.266390271674866, "grad_norm": 0.4847042157097199, "learning_rate": 3.210327115436605e-05, "loss": 0.4507, "step": 43375 }, { "epoch": 1.2665362549451833, "grad_norm": 0.48322914637475933, "learning_rate": 3.210056772100568e-05, "loss": 0.4538, "step": 43380 }, { "epoch": 1.2666822382155005, "grad_norm": 0.46835045999235586, "learning_rate": 3.209786428764531e-05, "loss": 0.4357, "step": 43385 }, { "epoch": 1.2668282214858178, "grad_norm": 0.4768793173188467, "learning_rate": 3.2095160854284946e-05, "loss": 0.4529, "step": 43390 }, { "epoch": 1.266974204756135, "grad_norm": 0.5210994261223375, "learning_rate": 3.2092457420924574e-05, "loss": 0.4588, "step": 43395 }, { "epoch": 1.2671201880264522, "grad_norm": 0.46926883950569953, "learning_rate": 3.208975398756421e-05, "loss": 0.4047, "step": 43400 }, { "epoch": 1.2672661712967694, "grad_norm": 0.48157653297238684, "learning_rate": 3.208705055420384e-05, "loss": 0.4866, "step": 43405 }, { "epoch": 1.2674121545670867, "grad_norm": 0.4894832221874875, "learning_rate": 3.208434712084347e-05, "loss": 0.4493, "step": 43410 }, { "epoch": 1.267558137837404, "grad_norm": 0.4264465594656403, "learning_rate": 3.20816436874831e-05, "loss": 0.4472, "step": 43415 }, { "epoch": 1.2677041211077211, "grad_norm": 0.47718949265660254, "learning_rate": 3.207894025412274e-05, "loss": 0.4417, "step": 43420 }, { "epoch": 1.2678501043780384, "grad_norm": 0.4413917992691226, "learning_rate": 3.207623682076237e-05, "loss": 0.4292, "step": 43425 }, { "epoch": 1.2679960876483554, "grad_norm": 0.4382872567384488, "learning_rate": 3.2073533387402e-05, "loss": 0.4677, "step": 43430 }, { "epoch": 1.2681420709186728, "grad_norm": 0.45556229836541734, "learning_rate": 3.207082995404164e-05, "loss": 0.4538, "step": 43435 }, { "epoch": 1.2682880541889898, "grad_norm": 0.45488176665370655, "learning_rate": 3.2068126520681266e-05, "loss": 0.434, "step": 43440 }, { "epoch": 1.2684340374593073, "grad_norm": 0.49911760103445557, "learning_rate": 3.20654230873209e-05, "loss": 0.4464, "step": 43445 }, { "epoch": 1.2685800207296243, "grad_norm": 0.5178512978692915, "learning_rate": 3.2062719653960534e-05, "loss": 0.4495, "step": 43450 }, { "epoch": 1.2687260039999417, "grad_norm": 0.4698899089439383, "learning_rate": 3.206001622060016e-05, "loss": 0.4628, "step": 43455 }, { "epoch": 1.2688719872702587, "grad_norm": 0.48231729042922167, "learning_rate": 3.2057312787239795e-05, "loss": 0.4809, "step": 43460 }, { "epoch": 1.269017970540576, "grad_norm": 0.5098866260138267, "learning_rate": 3.205460935387943e-05, "loss": 0.4518, "step": 43465 }, { "epoch": 1.2691639538108932, "grad_norm": 0.4851348883504792, "learning_rate": 3.205190592051906e-05, "loss": 0.4095, "step": 43470 }, { "epoch": 1.2693099370812104, "grad_norm": 0.5153413138649451, "learning_rate": 3.204920248715869e-05, "loss": 0.464, "step": 43475 }, { "epoch": 1.2694559203515277, "grad_norm": 0.5243407143719405, "learning_rate": 3.2046499053798325e-05, "loss": 0.4359, "step": 43480 }, { "epoch": 1.2696019036218449, "grad_norm": 0.5056881135590482, "learning_rate": 3.204379562043795e-05, "loss": 0.4679, "step": 43485 }, { "epoch": 1.2697478868921621, "grad_norm": 0.47878668357807314, "learning_rate": 3.204109218707759e-05, "loss": 0.4672, "step": 43490 }, { "epoch": 1.2698938701624793, "grad_norm": 0.44129398032580036, "learning_rate": 3.203838875371723e-05, "loss": 0.4263, "step": 43495 }, { "epoch": 1.2700398534327966, "grad_norm": 0.4488429294441205, "learning_rate": 3.2035685320356854e-05, "loss": 0.4389, "step": 43500 }, { "epoch": 1.2701858367031138, "grad_norm": 0.4486328835093764, "learning_rate": 3.203298188699649e-05, "loss": 0.4301, "step": 43505 }, { "epoch": 1.270331819973431, "grad_norm": 0.5627410681187424, "learning_rate": 3.203027845363612e-05, "loss": 0.4831, "step": 43510 }, { "epoch": 1.2704778032437483, "grad_norm": 0.46862645459418734, "learning_rate": 3.202757502027575e-05, "loss": 0.4356, "step": 43515 }, { "epoch": 1.2706237865140655, "grad_norm": 0.44774391665755503, "learning_rate": 3.202487158691538e-05, "loss": 0.4263, "step": 43520 }, { "epoch": 1.2707697697843827, "grad_norm": 0.5213179132536221, "learning_rate": 3.202216815355502e-05, "loss": 0.453, "step": 43525 }, { "epoch": 1.2709157530547, "grad_norm": 0.5028977780069837, "learning_rate": 3.2019464720194645e-05, "loss": 0.4649, "step": 43530 }, { "epoch": 1.2710617363250172, "grad_norm": 0.5059543460419844, "learning_rate": 3.201676128683428e-05, "loss": 0.4533, "step": 43535 }, { "epoch": 1.2712077195953344, "grad_norm": 0.42829322957917304, "learning_rate": 3.201405785347391e-05, "loss": 0.4376, "step": 43540 }, { "epoch": 1.2713537028656516, "grad_norm": 0.4747426941087026, "learning_rate": 3.2011354420113547e-05, "loss": 0.4325, "step": 43545 }, { "epoch": 1.2714996861359689, "grad_norm": 0.47268407282003827, "learning_rate": 3.200865098675318e-05, "loss": 0.4533, "step": 43550 }, { "epoch": 1.271645669406286, "grad_norm": 0.4850208478598876, "learning_rate": 3.2005947553392815e-05, "loss": 0.4587, "step": 43555 }, { "epoch": 1.2717916526766033, "grad_norm": 0.46260619985464313, "learning_rate": 3.200324412003244e-05, "loss": 0.4562, "step": 43560 }, { "epoch": 1.2719376359469206, "grad_norm": 0.45172184936397036, "learning_rate": 3.2000540686672076e-05, "loss": 0.4432, "step": 43565 }, { "epoch": 1.2720836192172378, "grad_norm": 0.5247478621066606, "learning_rate": 3.199783725331171e-05, "loss": 0.4462, "step": 43570 }, { "epoch": 1.2722296024875548, "grad_norm": 0.42905550525429753, "learning_rate": 3.199513381995134e-05, "loss": 0.4346, "step": 43575 }, { "epoch": 1.2723755857578722, "grad_norm": 0.46584854341145815, "learning_rate": 3.199243038659097e-05, "loss": 0.4442, "step": 43580 }, { "epoch": 1.2725215690281892, "grad_norm": 0.4972699421887991, "learning_rate": 3.1989726953230605e-05, "loss": 0.4478, "step": 43585 }, { "epoch": 1.2726675522985067, "grad_norm": 0.4877653533951302, "learning_rate": 3.198702351987023e-05, "loss": 0.4575, "step": 43590 }, { "epoch": 1.2728135355688237, "grad_norm": 0.5026800453360831, "learning_rate": 3.1984320086509866e-05, "loss": 0.4361, "step": 43595 }, { "epoch": 1.2729595188391412, "grad_norm": 0.5101600878940676, "learning_rate": 3.19816166531495e-05, "loss": 0.4485, "step": 43600 }, { "epoch": 1.2731055021094582, "grad_norm": 0.5113019444402517, "learning_rate": 3.1978913219789134e-05, "loss": 0.4568, "step": 43605 }, { "epoch": 1.2732514853797756, "grad_norm": 0.457729249214222, "learning_rate": 3.197620978642877e-05, "loss": 0.4487, "step": 43610 }, { "epoch": 1.2733974686500926, "grad_norm": 0.5051864665323021, "learning_rate": 3.19735063530684e-05, "loss": 0.4596, "step": 43615 }, { "epoch": 1.2735434519204099, "grad_norm": 0.47915290139922445, "learning_rate": 3.197080291970803e-05, "loss": 0.4659, "step": 43620 }, { "epoch": 1.273689435190727, "grad_norm": 0.49596877368528874, "learning_rate": 3.1968099486347664e-05, "loss": 0.4548, "step": 43625 }, { "epoch": 1.2738354184610443, "grad_norm": 0.5013849566482745, "learning_rate": 3.19653960529873e-05, "loss": 0.4581, "step": 43630 }, { "epoch": 1.2739814017313615, "grad_norm": 0.4766456527333626, "learning_rate": 3.1962692619626925e-05, "loss": 0.4413, "step": 43635 }, { "epoch": 1.2741273850016788, "grad_norm": 0.46350969455307195, "learning_rate": 3.195998918626656e-05, "loss": 0.4107, "step": 43640 }, { "epoch": 1.274273368271996, "grad_norm": 0.4644886493220623, "learning_rate": 3.195728575290619e-05, "loss": 0.4229, "step": 43645 }, { "epoch": 1.2744193515423132, "grad_norm": 0.4635636707925036, "learning_rate": 3.195458231954582e-05, "loss": 0.4569, "step": 43650 }, { "epoch": 1.2745653348126305, "grad_norm": 0.5840200457645173, "learning_rate": 3.1951878886185454e-05, "loss": 0.4461, "step": 43655 }, { "epoch": 1.2747113180829477, "grad_norm": 0.46480972823878103, "learning_rate": 3.1949175452825095e-05, "loss": 0.4391, "step": 43660 }, { "epoch": 1.274857301353265, "grad_norm": 0.503484444341224, "learning_rate": 3.194647201946472e-05, "loss": 0.4706, "step": 43665 }, { "epoch": 1.2750032846235821, "grad_norm": 0.5254338282126394, "learning_rate": 3.1943768586104356e-05, "loss": 0.4385, "step": 43670 }, { "epoch": 1.2751492678938994, "grad_norm": 0.45791942288216947, "learning_rate": 3.194106515274399e-05, "loss": 0.468, "step": 43675 }, { "epoch": 1.2752952511642166, "grad_norm": 0.4739158582107301, "learning_rate": 3.193836171938362e-05, "loss": 0.4407, "step": 43680 }, { "epoch": 1.2754412344345338, "grad_norm": 0.48012214085640303, "learning_rate": 3.193565828602325e-05, "loss": 0.4323, "step": 43685 }, { "epoch": 1.275587217704851, "grad_norm": 0.4925101874573057, "learning_rate": 3.1932954852662885e-05, "loss": 0.4403, "step": 43690 }, { "epoch": 1.2757332009751683, "grad_norm": 0.4809172993250443, "learning_rate": 3.193025141930251e-05, "loss": 0.4658, "step": 43695 }, { "epoch": 1.2758791842454855, "grad_norm": 0.4762790070338586, "learning_rate": 3.192754798594215e-05, "loss": 0.4275, "step": 43700 }, { "epoch": 1.2760251675158027, "grad_norm": 0.46609701271396076, "learning_rate": 3.192484455258178e-05, "loss": 0.4317, "step": 43705 }, { "epoch": 1.27617115078612, "grad_norm": 0.502542245007333, "learning_rate": 3.192214111922141e-05, "loss": 0.4293, "step": 43710 }, { "epoch": 1.2763171340564372, "grad_norm": 0.48993562446134936, "learning_rate": 3.191943768586105e-05, "loss": 0.4309, "step": 43715 }, { "epoch": 1.2764631173267542, "grad_norm": 0.47406962857872864, "learning_rate": 3.191673425250068e-05, "loss": 0.4352, "step": 43720 }, { "epoch": 1.2766091005970717, "grad_norm": 0.47222414475909663, "learning_rate": 3.191403081914031e-05, "loss": 0.4608, "step": 43725 }, { "epoch": 1.2767550838673887, "grad_norm": 0.5420975601877202, "learning_rate": 3.1911327385779944e-05, "loss": 0.4602, "step": 43730 }, { "epoch": 1.2769010671377061, "grad_norm": 0.48065764095311647, "learning_rate": 3.190862395241958e-05, "loss": 0.4576, "step": 43735 }, { "epoch": 1.2770470504080231, "grad_norm": 0.49582798664277866, "learning_rate": 3.1905920519059205e-05, "loss": 0.4391, "step": 43740 }, { "epoch": 1.2771930336783406, "grad_norm": 0.50466147826393, "learning_rate": 3.190321708569884e-05, "loss": 0.4699, "step": 43745 }, { "epoch": 1.2773390169486576, "grad_norm": 0.5117727623952683, "learning_rate": 3.190051365233847e-05, "loss": 0.4508, "step": 43750 }, { "epoch": 1.277485000218975, "grad_norm": 0.5293095739072028, "learning_rate": 3.18978102189781e-05, "loss": 0.4397, "step": 43755 }, { "epoch": 1.277630983489292, "grad_norm": 0.47499627216046164, "learning_rate": 3.1895106785617734e-05, "loss": 0.45, "step": 43760 }, { "epoch": 1.2777769667596093, "grad_norm": 0.5191626493405561, "learning_rate": 3.189240335225737e-05, "loss": 0.4466, "step": 43765 }, { "epoch": 1.2779229500299265, "grad_norm": 0.5488228371787227, "learning_rate": 3.1889699918896996e-05, "loss": 0.4625, "step": 43770 }, { "epoch": 1.2780689333002437, "grad_norm": 0.4764202421578167, "learning_rate": 3.1886996485536637e-05, "loss": 0.4729, "step": 43775 }, { "epoch": 1.278214916570561, "grad_norm": 0.5256018125084394, "learning_rate": 3.188429305217627e-05, "loss": 0.437, "step": 43780 }, { "epoch": 1.2783608998408782, "grad_norm": 0.48795797973080146, "learning_rate": 3.18815896188159e-05, "loss": 0.4438, "step": 43785 }, { "epoch": 1.2785068831111954, "grad_norm": 0.518504485281638, "learning_rate": 3.187888618545553e-05, "loss": 0.4797, "step": 43790 }, { "epoch": 1.2786528663815127, "grad_norm": 0.45868749385130453, "learning_rate": 3.1876182752095166e-05, "loss": 0.4485, "step": 43795 }, { "epoch": 1.2787988496518299, "grad_norm": 0.5220442279652512, "learning_rate": 3.187347931873479e-05, "loss": 0.4407, "step": 43800 }, { "epoch": 1.2789448329221471, "grad_norm": 0.47876473147008813, "learning_rate": 3.187077588537443e-05, "loss": 0.4248, "step": 43805 }, { "epoch": 1.2790908161924643, "grad_norm": 0.5091626399044579, "learning_rate": 3.186807245201406e-05, "loss": 0.4355, "step": 43810 }, { "epoch": 1.2792367994627816, "grad_norm": 0.4709835806180515, "learning_rate": 3.186536901865369e-05, "loss": 0.4551, "step": 43815 }, { "epoch": 1.2793827827330988, "grad_norm": 0.45278666377921206, "learning_rate": 3.186266558529332e-05, "loss": 0.4521, "step": 43820 }, { "epoch": 1.279528766003416, "grad_norm": 0.5101735015996016, "learning_rate": 3.1859962151932956e-05, "loss": 0.4434, "step": 43825 }, { "epoch": 1.2796747492737333, "grad_norm": 0.517373698325183, "learning_rate": 3.185725871857259e-05, "loss": 0.4797, "step": 43830 }, { "epoch": 1.2798207325440505, "grad_norm": 0.48589955283170966, "learning_rate": 3.1854555285212224e-05, "loss": 0.4272, "step": 43835 }, { "epoch": 1.2799667158143677, "grad_norm": 0.5044618003935746, "learning_rate": 3.185185185185185e-05, "loss": 0.4475, "step": 43840 }, { "epoch": 1.280112699084685, "grad_norm": 0.4416018105322023, "learning_rate": 3.1849148418491486e-05, "loss": 0.4378, "step": 43845 }, { "epoch": 1.2802586823550022, "grad_norm": 0.46350955729840637, "learning_rate": 3.184644498513112e-05, "loss": 0.4277, "step": 43850 }, { "epoch": 1.2804046656253194, "grad_norm": 0.5091977188989025, "learning_rate": 3.1843741551770754e-05, "loss": 0.4741, "step": 43855 }, { "epoch": 1.2805506488956366, "grad_norm": 0.4860334304641719, "learning_rate": 3.184103811841038e-05, "loss": 0.4334, "step": 43860 }, { "epoch": 1.2806966321659539, "grad_norm": 0.46972784356871783, "learning_rate": 3.1838334685050015e-05, "loss": 0.4504, "step": 43865 }, { "epoch": 1.280842615436271, "grad_norm": 0.46313070728557637, "learning_rate": 3.183563125168965e-05, "loss": 0.4548, "step": 43870 }, { "epoch": 1.280988598706588, "grad_norm": 0.4698812689932843, "learning_rate": 3.1832927818329276e-05, "loss": 0.4243, "step": 43875 }, { "epoch": 1.2811345819769056, "grad_norm": 0.43635335283614496, "learning_rate": 3.183022438496891e-05, "loss": 0.4587, "step": 43880 }, { "epoch": 1.2812805652472226, "grad_norm": 0.4674897246500941, "learning_rate": 3.1827520951608544e-05, "loss": 0.4285, "step": 43885 }, { "epoch": 1.28142654851754, "grad_norm": 0.47624125863088806, "learning_rate": 3.182481751824818e-05, "loss": 0.461, "step": 43890 }, { "epoch": 1.281572531787857, "grad_norm": 0.4722061597994719, "learning_rate": 3.182211408488781e-05, "loss": 0.4463, "step": 43895 }, { "epoch": 1.2817185150581745, "grad_norm": 0.5323762872197539, "learning_rate": 3.181941065152744e-05, "loss": 0.4401, "step": 43900 }, { "epoch": 1.2818644983284915, "grad_norm": 0.5130374353257693, "learning_rate": 3.1816707218167073e-05, "loss": 0.4656, "step": 43905 }, { "epoch": 1.2820104815988087, "grad_norm": 0.542749936160964, "learning_rate": 3.181400378480671e-05, "loss": 0.4547, "step": 43910 }, { "epoch": 1.282156464869126, "grad_norm": 0.5449963258485233, "learning_rate": 3.181130035144634e-05, "loss": 0.4519, "step": 43915 }, { "epoch": 1.2823024481394432, "grad_norm": 0.4672454603733616, "learning_rate": 3.180859691808597e-05, "loss": 0.4301, "step": 43920 }, { "epoch": 1.2824484314097604, "grad_norm": 0.482764404786164, "learning_rate": 3.18058934847256e-05, "loss": 0.4231, "step": 43925 }, { "epoch": 1.2825944146800776, "grad_norm": 0.47799735703200863, "learning_rate": 3.180319005136524e-05, "loss": 0.4701, "step": 43930 }, { "epoch": 1.2827403979503949, "grad_norm": 0.5150253626987388, "learning_rate": 3.1800486618004864e-05, "loss": 0.4643, "step": 43935 }, { "epoch": 1.282886381220712, "grad_norm": 0.5006753097129785, "learning_rate": 3.17977831846445e-05, "loss": 0.48, "step": 43940 }, { "epoch": 1.2830323644910293, "grad_norm": 0.5023439114378786, "learning_rate": 3.179507975128413e-05, "loss": 0.4524, "step": 43945 }, { "epoch": 1.2831783477613465, "grad_norm": 0.4578512050419077, "learning_rate": 3.1792376317923766e-05, "loss": 0.4274, "step": 43950 }, { "epoch": 1.2833243310316638, "grad_norm": 0.4325268082785845, "learning_rate": 3.17896728845634e-05, "loss": 0.4447, "step": 43955 }, { "epoch": 1.283470314301981, "grad_norm": 0.44577187019805103, "learning_rate": 3.178696945120303e-05, "loss": 0.434, "step": 43960 }, { "epoch": 1.2836162975722982, "grad_norm": 0.5007586195524085, "learning_rate": 3.178426601784266e-05, "loss": 0.4624, "step": 43965 }, { "epoch": 1.2837622808426155, "grad_norm": 0.5122052252149049, "learning_rate": 3.1781562584482295e-05, "loss": 0.4365, "step": 43970 }, { "epoch": 1.2839082641129327, "grad_norm": 0.4363475656665625, "learning_rate": 3.177885915112192e-05, "loss": 0.4212, "step": 43975 }, { "epoch": 1.28405424738325, "grad_norm": 0.51083402848698, "learning_rate": 3.1776155717761556e-05, "loss": 0.4605, "step": 43980 }, { "epoch": 1.2842002306535671, "grad_norm": 0.4818763059599703, "learning_rate": 3.177345228440119e-05, "loss": 0.4482, "step": 43985 }, { "epoch": 1.2843462139238844, "grad_norm": 0.4532045926780026, "learning_rate": 3.1770748851040824e-05, "loss": 0.4365, "step": 43990 }, { "epoch": 1.2844921971942016, "grad_norm": 0.48240602827987417, "learning_rate": 3.176804541768045e-05, "loss": 0.4467, "step": 43995 }, { "epoch": 1.2846381804645188, "grad_norm": 0.4667018111456145, "learning_rate": 3.176534198432009e-05, "loss": 0.4281, "step": 44000 }, { "epoch": 1.284784163734836, "grad_norm": 0.48821371703050587, "learning_rate": 3.176263855095972e-05, "loss": 0.4496, "step": 44005 }, { "epoch": 1.2849301470051533, "grad_norm": 0.5260843480572429, "learning_rate": 3.1759935117599354e-05, "loss": 0.4641, "step": 44010 }, { "epoch": 1.2850761302754705, "grad_norm": 0.48552427659843655, "learning_rate": 3.175723168423899e-05, "loss": 0.4394, "step": 44015 }, { "epoch": 1.2852221135457875, "grad_norm": 0.5004803111907035, "learning_rate": 3.1754528250878615e-05, "loss": 0.4734, "step": 44020 }, { "epoch": 1.285368096816105, "grad_norm": 0.5010364929862945, "learning_rate": 3.175182481751825e-05, "loss": 0.4852, "step": 44025 }, { "epoch": 1.285514080086422, "grad_norm": 0.5011829904318562, "learning_rate": 3.174912138415788e-05, "loss": 0.4395, "step": 44030 }, { "epoch": 1.2856600633567394, "grad_norm": 0.47498494318798185, "learning_rate": 3.174641795079751e-05, "loss": 0.462, "step": 44035 }, { "epoch": 1.2858060466270564, "grad_norm": 0.4783184696471356, "learning_rate": 3.1743714517437144e-05, "loss": 0.4621, "step": 44040 }, { "epoch": 1.285952029897374, "grad_norm": 0.5014632167496463, "learning_rate": 3.174101108407678e-05, "loss": 0.4428, "step": 44045 }, { "epoch": 1.286098013167691, "grad_norm": 0.4783019039626395, "learning_rate": 3.173830765071641e-05, "loss": 0.4503, "step": 44050 }, { "epoch": 1.2862439964380081, "grad_norm": 0.4984887886319035, "learning_rate": 3.1735604217356046e-05, "loss": 0.4679, "step": 44055 }, { "epoch": 1.2863899797083254, "grad_norm": 0.47541569907633435, "learning_rate": 3.173290078399568e-05, "loss": 0.4395, "step": 44060 }, { "epoch": 1.2865359629786426, "grad_norm": 0.5011000904284789, "learning_rate": 3.173019735063531e-05, "loss": 0.4395, "step": 44065 }, { "epoch": 1.2866819462489598, "grad_norm": 0.4605194707997318, "learning_rate": 3.172749391727494e-05, "loss": 0.4435, "step": 44070 }, { "epoch": 1.286827929519277, "grad_norm": 0.5084676954680807, "learning_rate": 3.1724790483914576e-05, "loss": 0.4161, "step": 44075 }, { "epoch": 1.2869739127895943, "grad_norm": 0.4664882111735341, "learning_rate": 3.17220870505542e-05, "loss": 0.4314, "step": 44080 }, { "epoch": 1.2871198960599115, "grad_norm": 0.47778684606994404, "learning_rate": 3.171938361719384e-05, "loss": 0.4447, "step": 44085 }, { "epoch": 1.2872658793302287, "grad_norm": 0.5080070431444895, "learning_rate": 3.171668018383347e-05, "loss": 0.482, "step": 44090 }, { "epoch": 1.287411862600546, "grad_norm": 0.519103837918361, "learning_rate": 3.17139767504731e-05, "loss": 0.4426, "step": 44095 }, { "epoch": 1.2875578458708632, "grad_norm": 0.5120893384981393, "learning_rate": 3.171127331711273e-05, "loss": 0.4407, "step": 44100 }, { "epoch": 1.2877038291411804, "grad_norm": 0.4712787819661665, "learning_rate": 3.1708569883752366e-05, "loss": 0.4577, "step": 44105 }, { "epoch": 1.2878498124114977, "grad_norm": 0.44984005880485356, "learning_rate": 3.170586645039199e-05, "loss": 0.4435, "step": 44110 }, { "epoch": 1.2879957956818149, "grad_norm": 0.45987550554098316, "learning_rate": 3.1703163017031634e-05, "loss": 0.4411, "step": 44115 }, { "epoch": 1.288141778952132, "grad_norm": 0.49133082531214306, "learning_rate": 3.170045958367127e-05, "loss": 0.4385, "step": 44120 }, { "epoch": 1.2882877622224493, "grad_norm": 0.5203193551635089, "learning_rate": 3.1697756150310895e-05, "loss": 0.4579, "step": 44125 }, { "epoch": 1.2884337454927666, "grad_norm": 0.4815524632091801, "learning_rate": 3.169505271695053e-05, "loss": 0.4323, "step": 44130 }, { "epoch": 1.2885797287630838, "grad_norm": 0.5005190106689874, "learning_rate": 3.169234928359016e-05, "loss": 0.4477, "step": 44135 }, { "epoch": 1.288725712033401, "grad_norm": 0.4634448066425389, "learning_rate": 3.168964585022979e-05, "loss": 0.421, "step": 44140 }, { "epoch": 1.2888716953037183, "grad_norm": 0.5014140107452825, "learning_rate": 3.1686942416869425e-05, "loss": 0.4491, "step": 44145 }, { "epoch": 1.2890176785740355, "grad_norm": 0.45532440355108333, "learning_rate": 3.168423898350906e-05, "loss": 0.4241, "step": 44150 }, { "epoch": 1.2891636618443527, "grad_norm": 0.49453853450745805, "learning_rate": 3.1681535550148686e-05, "loss": 0.4654, "step": 44155 }, { "epoch": 1.28930964511467, "grad_norm": 0.4548336281944743, "learning_rate": 3.167883211678832e-05, "loss": 0.4508, "step": 44160 }, { "epoch": 1.289455628384987, "grad_norm": 0.4728282208457076, "learning_rate": 3.1676128683427954e-05, "loss": 0.4673, "step": 44165 }, { "epoch": 1.2896016116553044, "grad_norm": 0.4789968487163572, "learning_rate": 3.167342525006759e-05, "loss": 0.4641, "step": 44170 }, { "epoch": 1.2897475949256214, "grad_norm": 0.45615122380263323, "learning_rate": 3.167072181670722e-05, "loss": 0.444, "step": 44175 }, { "epoch": 1.2898935781959389, "grad_norm": 0.48880526191568324, "learning_rate": 3.1668018383346856e-05, "loss": 0.4479, "step": 44180 }, { "epoch": 1.2900395614662559, "grad_norm": 0.45589232927624507, "learning_rate": 3.166531494998648e-05, "loss": 0.4437, "step": 44185 }, { "epoch": 1.2901855447365733, "grad_norm": 0.47498778788114593, "learning_rate": 3.166261151662612e-05, "loss": 0.4407, "step": 44190 }, { "epoch": 1.2903315280068903, "grad_norm": 0.5047288990543718, "learning_rate": 3.165990808326575e-05, "loss": 0.4557, "step": 44195 }, { "epoch": 1.2904775112772076, "grad_norm": 0.4656715438518052, "learning_rate": 3.165720464990538e-05, "loss": 0.4427, "step": 44200 }, { "epoch": 1.2906234945475248, "grad_norm": 0.519125183008892, "learning_rate": 3.165450121654501e-05, "loss": 0.4232, "step": 44205 }, { "epoch": 1.290769477817842, "grad_norm": 0.4840289871862477, "learning_rate": 3.1651797783184646e-05, "loss": 0.426, "step": 44210 }, { "epoch": 1.2909154610881592, "grad_norm": 0.45277406821456156, "learning_rate": 3.1649094349824274e-05, "loss": 0.4591, "step": 44215 }, { "epoch": 1.2910614443584765, "grad_norm": 0.46453263568852415, "learning_rate": 3.164639091646391e-05, "loss": 0.4284, "step": 44220 }, { "epoch": 1.2912074276287937, "grad_norm": 0.4990470839286254, "learning_rate": 3.164368748310355e-05, "loss": 0.4381, "step": 44225 }, { "epoch": 1.291353410899111, "grad_norm": 0.43052384882058864, "learning_rate": 3.1640984049743176e-05, "loss": 0.4376, "step": 44230 }, { "epoch": 1.2914993941694282, "grad_norm": 0.4787841247653216, "learning_rate": 3.163828061638281e-05, "loss": 0.4851, "step": 44235 }, { "epoch": 1.2916453774397454, "grad_norm": 0.4792678992218631, "learning_rate": 3.1635577183022444e-05, "loss": 0.4317, "step": 44240 }, { "epoch": 1.2917913607100626, "grad_norm": 0.42774900016144674, "learning_rate": 3.163287374966207e-05, "loss": 0.4419, "step": 44245 }, { "epoch": 1.2919373439803798, "grad_norm": 0.4377443489954205, "learning_rate": 3.1630170316301705e-05, "loss": 0.4244, "step": 44250 }, { "epoch": 1.292083327250697, "grad_norm": 0.43624724336691423, "learning_rate": 3.162746688294134e-05, "loss": 0.4547, "step": 44255 }, { "epoch": 1.2922293105210143, "grad_norm": 0.43721524912352033, "learning_rate": 3.1624763449580966e-05, "loss": 0.4372, "step": 44260 }, { "epoch": 1.2923752937913315, "grad_norm": 0.5189155550784045, "learning_rate": 3.16220600162206e-05, "loss": 0.5046, "step": 44265 }, { "epoch": 1.2925212770616488, "grad_norm": 0.5000739527718235, "learning_rate": 3.1619356582860234e-05, "loss": 0.4474, "step": 44270 }, { "epoch": 1.292667260331966, "grad_norm": 0.5004482311664274, "learning_rate": 3.161665314949986e-05, "loss": 0.4439, "step": 44275 }, { "epoch": 1.2928132436022832, "grad_norm": 0.4838588953875074, "learning_rate": 3.1613949716139495e-05, "loss": 0.4351, "step": 44280 }, { "epoch": 1.2929592268726005, "grad_norm": 0.5002883581522894, "learning_rate": 3.1611246282779136e-05, "loss": 0.4286, "step": 44285 }, { "epoch": 1.2931052101429177, "grad_norm": 0.5071583311157416, "learning_rate": 3.1608542849418764e-05, "loss": 0.4384, "step": 44290 }, { "epoch": 1.293251193413235, "grad_norm": 0.511462720209147, "learning_rate": 3.16058394160584e-05, "loss": 0.4695, "step": 44295 }, { "epoch": 1.2933971766835521, "grad_norm": 0.43336822522397883, "learning_rate": 3.160313598269803e-05, "loss": 0.4446, "step": 44300 }, { "epoch": 1.2935431599538694, "grad_norm": 0.47347917570001513, "learning_rate": 3.160043254933766e-05, "loss": 0.4333, "step": 44305 }, { "epoch": 1.2936891432241864, "grad_norm": 0.49075047626498874, "learning_rate": 3.159772911597729e-05, "loss": 0.4311, "step": 44310 }, { "epoch": 1.2938351264945038, "grad_norm": 0.4589324288672044, "learning_rate": 3.159502568261693e-05, "loss": 0.4322, "step": 44315 }, { "epoch": 1.2939811097648208, "grad_norm": 0.5038902966077939, "learning_rate": 3.1592322249256554e-05, "loss": 0.4561, "step": 44320 }, { "epoch": 1.2941270930351383, "grad_norm": 0.47908420061101437, "learning_rate": 3.158961881589619e-05, "loss": 0.4374, "step": 44325 }, { "epoch": 1.2942730763054553, "grad_norm": 0.4681950779323006, "learning_rate": 3.158691538253582e-05, "loss": 0.4317, "step": 44330 }, { "epoch": 1.2944190595757727, "grad_norm": 0.44599846022675277, "learning_rate": 3.158421194917545e-05, "loss": 0.4385, "step": 44335 }, { "epoch": 1.2945650428460898, "grad_norm": 0.529464281116401, "learning_rate": 3.158150851581509e-05, "loss": 0.477, "step": 44340 }, { "epoch": 1.294711026116407, "grad_norm": 0.5032890142388868, "learning_rate": 3.1578805082454724e-05, "loss": 0.4708, "step": 44345 }, { "epoch": 1.2948570093867242, "grad_norm": 0.48952163256234427, "learning_rate": 3.157610164909435e-05, "loss": 0.4803, "step": 44350 }, { "epoch": 1.2950029926570414, "grad_norm": 0.4709792523773411, "learning_rate": 3.1573398215733985e-05, "loss": 0.4623, "step": 44355 }, { "epoch": 1.2951489759273587, "grad_norm": 0.49798775251760086, "learning_rate": 3.157069478237362e-05, "loss": 0.4599, "step": 44360 }, { "epoch": 1.295294959197676, "grad_norm": 0.5321270571090086, "learning_rate": 3.1567991349013247e-05, "loss": 0.4487, "step": 44365 }, { "epoch": 1.2954409424679931, "grad_norm": 0.47004017493131584, "learning_rate": 3.156528791565288e-05, "loss": 0.4469, "step": 44370 }, { "epoch": 1.2955869257383104, "grad_norm": 0.45272418721525515, "learning_rate": 3.1562584482292515e-05, "loss": 0.4628, "step": 44375 }, { "epoch": 1.2957329090086276, "grad_norm": 0.4417021235038249, "learning_rate": 3.155988104893214e-05, "loss": 0.4115, "step": 44380 }, { "epoch": 1.2958788922789448, "grad_norm": 0.5039276735249149, "learning_rate": 3.1557177615571776e-05, "loss": 0.4577, "step": 44385 }, { "epoch": 1.296024875549262, "grad_norm": 0.4744058407570247, "learning_rate": 3.155447418221141e-05, "loss": 0.427, "step": 44390 }, { "epoch": 1.2961708588195793, "grad_norm": 0.4624771869648221, "learning_rate": 3.1551770748851044e-05, "loss": 0.4094, "step": 44395 }, { "epoch": 1.2963168420898965, "grad_norm": 0.4675462693670204, "learning_rate": 3.154906731549068e-05, "loss": 0.443, "step": 44400 }, { "epoch": 1.2964628253602137, "grad_norm": 0.49113930284763596, "learning_rate": 3.154636388213031e-05, "loss": 0.4411, "step": 44405 }, { "epoch": 1.296608808630531, "grad_norm": 0.485475886369973, "learning_rate": 3.154366044876994e-05, "loss": 0.4348, "step": 44410 }, { "epoch": 1.2967547919008482, "grad_norm": 0.4757507495713912, "learning_rate": 3.154095701540957e-05, "loss": 0.4503, "step": 44415 }, { "epoch": 1.2969007751711654, "grad_norm": 0.49152200835340903, "learning_rate": 3.153825358204921e-05, "loss": 0.4396, "step": 44420 }, { "epoch": 1.2970467584414827, "grad_norm": 0.49587070618682155, "learning_rate": 3.1535550148688834e-05, "loss": 0.447, "step": 44425 }, { "epoch": 1.2971927417117999, "grad_norm": 0.49021014978057753, "learning_rate": 3.153284671532847e-05, "loss": 0.4712, "step": 44430 }, { "epoch": 1.297338724982117, "grad_norm": 0.4713324364714976, "learning_rate": 3.15301432819681e-05, "loss": 0.4244, "step": 44435 }, { "epoch": 1.2974847082524343, "grad_norm": 0.5205749954774849, "learning_rate": 3.152743984860773e-05, "loss": 0.47, "step": 44440 }, { "epoch": 1.2976306915227516, "grad_norm": 0.45526302473503644, "learning_rate": 3.1524736415247364e-05, "loss": 0.4282, "step": 44445 }, { "epoch": 1.2977766747930688, "grad_norm": 0.5111975995621354, "learning_rate": 3.1522032981887e-05, "loss": 0.4629, "step": 44450 }, { "epoch": 1.2979226580633858, "grad_norm": 0.49183397062624595, "learning_rate": 3.151932954852663e-05, "loss": 0.4538, "step": 44455 }, { "epoch": 1.2980686413337033, "grad_norm": 0.48305042835649864, "learning_rate": 3.1516626115166266e-05, "loss": 0.4587, "step": 44460 }, { "epoch": 1.2982146246040203, "grad_norm": 0.5283645377184679, "learning_rate": 3.15139226818059e-05, "loss": 0.455, "step": 44465 }, { "epoch": 1.2983606078743377, "grad_norm": 0.504719855175249, "learning_rate": 3.151121924844553e-05, "loss": 0.4655, "step": 44470 }, { "epoch": 1.2985065911446547, "grad_norm": 0.44407383770299586, "learning_rate": 3.150851581508516e-05, "loss": 0.4108, "step": 44475 }, { "epoch": 1.2986525744149722, "grad_norm": 0.49621939298440143, "learning_rate": 3.1505812381724795e-05, "loss": 0.4513, "step": 44480 }, { "epoch": 1.2987985576852892, "grad_norm": 0.48293509266003704, "learning_rate": 3.150310894836442e-05, "loss": 0.4376, "step": 44485 }, { "epoch": 1.2989445409556064, "grad_norm": 0.46769705279475204, "learning_rate": 3.1500405515004056e-05, "loss": 0.4368, "step": 44490 }, { "epoch": 1.2990905242259236, "grad_norm": 0.47253143922108953, "learning_rate": 3.149770208164369e-05, "loss": 0.4636, "step": 44495 }, { "epoch": 1.2992365074962409, "grad_norm": 0.4811271984270364, "learning_rate": 3.149499864828332e-05, "loss": 0.4665, "step": 44500 }, { "epoch": 1.299382490766558, "grad_norm": 0.44666942891986555, "learning_rate": 3.149229521492295e-05, "loss": 0.4501, "step": 44505 }, { "epoch": 1.2995284740368753, "grad_norm": 0.4652666954381376, "learning_rate": 3.1489591781562585e-05, "loss": 0.4373, "step": 44510 }, { "epoch": 1.2996744573071926, "grad_norm": 0.47702019751916946, "learning_rate": 3.148688834820222e-05, "loss": 0.4372, "step": 44515 }, { "epoch": 1.2998204405775098, "grad_norm": 0.5134341507590099, "learning_rate": 3.1484184914841853e-05, "loss": 0.4636, "step": 44520 }, { "epoch": 1.299966423847827, "grad_norm": 0.4900597262312493, "learning_rate": 3.148148148148148e-05, "loss": 0.4601, "step": 44525 }, { "epoch": 1.3001124071181442, "grad_norm": 0.5223623449649288, "learning_rate": 3.1478778048121115e-05, "loss": 0.4512, "step": 44530 }, { "epoch": 1.3002583903884615, "grad_norm": 0.482215818876585, "learning_rate": 3.147607461476075e-05, "loss": 0.4683, "step": 44535 }, { "epoch": 1.3004043736587787, "grad_norm": 0.5172726302867199, "learning_rate": 3.147337118140038e-05, "loss": 0.4804, "step": 44540 }, { "epoch": 1.300550356929096, "grad_norm": 0.4754734453714786, "learning_rate": 3.147066774804001e-05, "loss": 0.4619, "step": 44545 }, { "epoch": 1.3006963401994132, "grad_norm": 0.4555887928501846, "learning_rate": 3.1467964314679644e-05, "loss": 0.4342, "step": 44550 }, { "epoch": 1.3008423234697304, "grad_norm": 0.5048692050014603, "learning_rate": 3.146526088131928e-05, "loss": 0.4488, "step": 44555 }, { "epoch": 1.3009883067400476, "grad_norm": 0.43310314938406097, "learning_rate": 3.1462557447958905e-05, "loss": 0.438, "step": 44560 }, { "epoch": 1.3011342900103648, "grad_norm": 0.48523828866026913, "learning_rate": 3.1459854014598546e-05, "loss": 0.4397, "step": 44565 }, { "epoch": 1.301280273280682, "grad_norm": 0.4587760645345772, "learning_rate": 3.145715058123817e-05, "loss": 0.4406, "step": 44570 }, { "epoch": 1.3014262565509993, "grad_norm": 0.4891901140948174, "learning_rate": 3.145444714787781e-05, "loss": 0.4358, "step": 44575 }, { "epoch": 1.3015722398213165, "grad_norm": 0.47044682203395277, "learning_rate": 3.145174371451744e-05, "loss": 0.4366, "step": 44580 }, { "epoch": 1.3017182230916338, "grad_norm": 0.5020360555749488, "learning_rate": 3.144904028115707e-05, "loss": 0.4666, "step": 44585 }, { "epoch": 1.301864206361951, "grad_norm": 0.4895867145558656, "learning_rate": 3.14463368477967e-05, "loss": 0.4229, "step": 44590 }, { "epoch": 1.3020101896322682, "grad_norm": 0.48118691231983335, "learning_rate": 3.1443633414436337e-05, "loss": 0.4405, "step": 44595 }, { "epoch": 1.3021561729025852, "grad_norm": 0.5217612333318284, "learning_rate": 3.144092998107597e-05, "loss": 0.4706, "step": 44600 }, { "epoch": 1.3023021561729027, "grad_norm": 0.4877988116192868, "learning_rate": 3.14382265477156e-05, "loss": 0.4635, "step": 44605 }, { "epoch": 1.3024481394432197, "grad_norm": 0.48719639034843637, "learning_rate": 3.143552311435523e-05, "loss": 0.4333, "step": 44610 }, { "epoch": 1.3025941227135371, "grad_norm": 0.47563665501166913, "learning_rate": 3.1432819680994866e-05, "loss": 0.4372, "step": 44615 }, { "epoch": 1.3027401059838541, "grad_norm": 0.483467816880811, "learning_rate": 3.143011624763449e-05, "loss": 0.4332, "step": 44620 }, { "epoch": 1.3028860892541716, "grad_norm": 0.4519087986002639, "learning_rate": 3.1427412814274134e-05, "loss": 0.4384, "step": 44625 }, { "epoch": 1.3030320725244886, "grad_norm": 0.49366209037242886, "learning_rate": 3.142470938091376e-05, "loss": 0.4698, "step": 44630 }, { "epoch": 1.3031780557948058, "grad_norm": 0.45137057284453125, "learning_rate": 3.1422005947553395e-05, "loss": 0.4531, "step": 44635 }, { "epoch": 1.303324039065123, "grad_norm": 0.47424122808100055, "learning_rate": 3.141930251419303e-05, "loss": 0.4545, "step": 44640 }, { "epoch": 1.3034700223354403, "grad_norm": 0.513234864247395, "learning_rate": 3.1416599080832656e-05, "loss": 0.444, "step": 44645 }, { "epoch": 1.3036160056057575, "grad_norm": 0.45804016326596636, "learning_rate": 3.141389564747229e-05, "loss": 0.4425, "step": 44650 }, { "epoch": 1.3037619888760748, "grad_norm": 0.5103490900114038, "learning_rate": 3.1411192214111924e-05, "loss": 0.4646, "step": 44655 }, { "epoch": 1.303907972146392, "grad_norm": 0.4882892537451809, "learning_rate": 3.140848878075155e-05, "loss": 0.4616, "step": 44660 }, { "epoch": 1.3040539554167092, "grad_norm": 0.49494613722312286, "learning_rate": 3.1405785347391186e-05, "loss": 0.4379, "step": 44665 }, { "epoch": 1.3041999386870264, "grad_norm": 0.4776259613306586, "learning_rate": 3.140308191403082e-05, "loss": 0.4585, "step": 44670 }, { "epoch": 1.3043459219573437, "grad_norm": 0.5084314537173417, "learning_rate": 3.1400378480670454e-05, "loss": 0.4698, "step": 44675 }, { "epoch": 1.304491905227661, "grad_norm": 0.4602980422648204, "learning_rate": 3.139767504731009e-05, "loss": 0.4324, "step": 44680 }, { "epoch": 1.3046378884979781, "grad_norm": 0.4697943145206598, "learning_rate": 3.139497161394972e-05, "loss": 0.456, "step": 44685 }, { "epoch": 1.3047838717682954, "grad_norm": 0.4403346591321723, "learning_rate": 3.139226818058935e-05, "loss": 0.4358, "step": 44690 }, { "epoch": 1.3049298550386126, "grad_norm": 0.47812637802303926, "learning_rate": 3.138956474722898e-05, "loss": 0.4623, "step": 44695 }, { "epoch": 1.3050758383089298, "grad_norm": 0.47532903376298996, "learning_rate": 3.138686131386862e-05, "loss": 0.4643, "step": 44700 }, { "epoch": 1.305221821579247, "grad_norm": 0.49617882806751634, "learning_rate": 3.1384157880508244e-05, "loss": 0.4427, "step": 44705 }, { "epoch": 1.3053678048495643, "grad_norm": 0.48079519602665727, "learning_rate": 3.138145444714788e-05, "loss": 0.4577, "step": 44710 }, { "epoch": 1.3055137881198815, "grad_norm": 0.5184100788281475, "learning_rate": 3.137875101378751e-05, "loss": 0.4542, "step": 44715 }, { "epoch": 1.3056597713901987, "grad_norm": 0.4700281391689099, "learning_rate": 3.137604758042714e-05, "loss": 0.429, "step": 44720 }, { "epoch": 1.305805754660516, "grad_norm": 0.4931929718051579, "learning_rate": 3.1373344147066773e-05, "loss": 0.4427, "step": 44725 }, { "epoch": 1.3059517379308332, "grad_norm": 0.48704890443066534, "learning_rate": 3.137064071370641e-05, "loss": 0.4366, "step": 44730 }, { "epoch": 1.3060977212011504, "grad_norm": 0.5178380897220695, "learning_rate": 3.136793728034604e-05, "loss": 0.469, "step": 44735 }, { "epoch": 1.3062437044714676, "grad_norm": 0.5060676912109581, "learning_rate": 3.1365233846985675e-05, "loss": 0.4467, "step": 44740 }, { "epoch": 1.3063896877417847, "grad_norm": 0.49467476690672746, "learning_rate": 3.136253041362531e-05, "loss": 0.4568, "step": 44745 }, { "epoch": 1.306535671012102, "grad_norm": 0.4756555179343737, "learning_rate": 3.135982698026494e-05, "loss": 0.4294, "step": 44750 }, { "epoch": 1.3066816542824191, "grad_norm": 0.478001418563363, "learning_rate": 3.135712354690457e-05, "loss": 0.4514, "step": 44755 }, { "epoch": 1.3068276375527366, "grad_norm": 0.4778189708599005, "learning_rate": 3.1354420113544205e-05, "loss": 0.4828, "step": 44760 }, { "epoch": 1.3069736208230536, "grad_norm": 0.43410339541091114, "learning_rate": 3.135171668018383e-05, "loss": 0.4087, "step": 44765 }, { "epoch": 1.307119604093371, "grad_norm": 0.48551648905450295, "learning_rate": 3.1349013246823466e-05, "loss": 0.4419, "step": 44770 }, { "epoch": 1.307265587363688, "grad_norm": 0.5147534266134864, "learning_rate": 3.13463098134631e-05, "loss": 0.4594, "step": 44775 }, { "epoch": 1.3074115706340053, "grad_norm": 0.4577432349426537, "learning_rate": 3.134360638010273e-05, "loss": 0.4708, "step": 44780 }, { "epoch": 1.3075575539043225, "grad_norm": 0.4859662156891806, "learning_rate": 3.134090294674236e-05, "loss": 0.4576, "step": 44785 }, { "epoch": 1.3077035371746397, "grad_norm": 0.48530780752391367, "learning_rate": 3.1338199513381995e-05, "loss": 0.4271, "step": 44790 }, { "epoch": 1.307849520444957, "grad_norm": 0.457211605147514, "learning_rate": 3.133549608002163e-05, "loss": 0.4322, "step": 44795 }, { "epoch": 1.3079955037152742, "grad_norm": 0.4806499972240597, "learning_rate": 3.133279264666126e-05, "loss": 0.4707, "step": 44800 }, { "epoch": 1.3081414869855914, "grad_norm": 0.4891611433542825, "learning_rate": 3.13300892133009e-05, "loss": 0.4443, "step": 44805 }, { "epoch": 1.3082874702559086, "grad_norm": 0.46195903783195974, "learning_rate": 3.1327385779940525e-05, "loss": 0.4238, "step": 44810 }, { "epoch": 1.3084334535262259, "grad_norm": 0.4783046478127144, "learning_rate": 3.132468234658016e-05, "loss": 0.4424, "step": 44815 }, { "epoch": 1.308579436796543, "grad_norm": 0.45289327767996457, "learning_rate": 3.132197891321979e-05, "loss": 0.4392, "step": 44820 }, { "epoch": 1.3087254200668603, "grad_norm": 0.5063483611611503, "learning_rate": 3.131927547985942e-05, "loss": 0.455, "step": 44825 }, { "epoch": 1.3088714033371776, "grad_norm": 0.4221999061409351, "learning_rate": 3.1316572046499054e-05, "loss": 0.4613, "step": 44830 }, { "epoch": 1.3090173866074948, "grad_norm": 0.47451635438810147, "learning_rate": 3.131386861313869e-05, "loss": 0.4418, "step": 44835 }, { "epoch": 1.309163369877812, "grad_norm": 0.45946361245267203, "learning_rate": 3.1311165179778315e-05, "loss": 0.4451, "step": 44840 }, { "epoch": 1.3093093531481292, "grad_norm": 0.6713093907562179, "learning_rate": 3.130846174641795e-05, "loss": 0.4609, "step": 44845 }, { "epoch": 1.3094553364184465, "grad_norm": 0.4848226393794461, "learning_rate": 3.130575831305759e-05, "loss": 0.4316, "step": 44850 }, { "epoch": 1.3096013196887637, "grad_norm": 0.4521622638633633, "learning_rate": 3.130305487969722e-05, "loss": 0.4358, "step": 44855 }, { "epoch": 1.309747302959081, "grad_norm": 0.5067823343255781, "learning_rate": 3.130035144633685e-05, "loss": 0.4577, "step": 44860 }, { "epoch": 1.3098932862293982, "grad_norm": 0.4284852539435671, "learning_rate": 3.1297648012976485e-05, "loss": 0.4276, "step": 44865 }, { "epoch": 1.3100392694997154, "grad_norm": 0.44976850047387834, "learning_rate": 3.129494457961611e-05, "loss": 0.4167, "step": 44870 }, { "epoch": 1.3101852527700326, "grad_norm": 0.4747092914385204, "learning_rate": 3.1292241146255746e-05, "loss": 0.4341, "step": 44875 }, { "epoch": 1.3103312360403498, "grad_norm": 0.5196282958677803, "learning_rate": 3.128953771289538e-05, "loss": 0.4622, "step": 44880 }, { "epoch": 1.310477219310667, "grad_norm": 0.48419548620755776, "learning_rate": 3.128683427953501e-05, "loss": 0.4409, "step": 44885 }, { "epoch": 1.310623202580984, "grad_norm": 0.456780887439703, "learning_rate": 3.128413084617464e-05, "loss": 0.4724, "step": 44890 }, { "epoch": 1.3107691858513015, "grad_norm": 0.4431032145245752, "learning_rate": 3.1281427412814276e-05, "loss": 0.4058, "step": 44895 }, { "epoch": 1.3109151691216185, "grad_norm": 0.48734089660705054, "learning_rate": 3.12787239794539e-05, "loss": 0.4455, "step": 44900 }, { "epoch": 1.311061152391936, "grad_norm": 0.5419219930644997, "learning_rate": 3.1276020546093544e-05, "loss": 0.4383, "step": 44905 }, { "epoch": 1.311207135662253, "grad_norm": 0.47601467271814824, "learning_rate": 3.127331711273318e-05, "loss": 0.4385, "step": 44910 }, { "epoch": 1.3113531189325704, "grad_norm": 0.5204858971919961, "learning_rate": 3.1270613679372805e-05, "loss": 0.4381, "step": 44915 }, { "epoch": 1.3114991022028875, "grad_norm": 0.46612765563804853, "learning_rate": 3.126791024601244e-05, "loss": 0.4595, "step": 44920 }, { "epoch": 1.311645085473205, "grad_norm": 0.4557440741636199, "learning_rate": 3.126520681265207e-05, "loss": 0.4627, "step": 44925 }, { "epoch": 1.311791068743522, "grad_norm": 0.4645848451536863, "learning_rate": 3.12625033792917e-05, "loss": 0.4659, "step": 44930 }, { "epoch": 1.3119370520138391, "grad_norm": 0.49338886639974405, "learning_rate": 3.1259799945931334e-05, "loss": 0.4389, "step": 44935 }, { "epoch": 1.3120830352841564, "grad_norm": 0.469724377500152, "learning_rate": 3.125709651257097e-05, "loss": 0.4147, "step": 44940 }, { "epoch": 1.3122290185544736, "grad_norm": 0.46840264306995283, "learning_rate": 3.1254393079210595e-05, "loss": 0.4652, "step": 44945 }, { "epoch": 1.3123750018247908, "grad_norm": 0.44329611680196834, "learning_rate": 3.125168964585023e-05, "loss": 0.4556, "step": 44950 }, { "epoch": 1.312520985095108, "grad_norm": 0.5011791770091923, "learning_rate": 3.1248986212489863e-05, "loss": 0.4363, "step": 44955 }, { "epoch": 1.3126669683654253, "grad_norm": 0.43071904522730825, "learning_rate": 3.124628277912949e-05, "loss": 0.4077, "step": 44960 }, { "epoch": 1.3128129516357425, "grad_norm": 0.5233008500369886, "learning_rate": 3.124357934576913e-05, "loss": 0.4283, "step": 44965 }, { "epoch": 1.3129589349060597, "grad_norm": 0.4938818872345426, "learning_rate": 3.1240875912408765e-05, "loss": 0.4476, "step": 44970 }, { "epoch": 1.313104918176377, "grad_norm": 0.44744329945307854, "learning_rate": 3.123817247904839e-05, "loss": 0.448, "step": 44975 }, { "epoch": 1.3132509014466942, "grad_norm": 0.43850913456438045, "learning_rate": 3.123546904568803e-05, "loss": 0.4379, "step": 44980 }, { "epoch": 1.3133968847170114, "grad_norm": 0.45883173662571447, "learning_rate": 3.123276561232766e-05, "loss": 0.4368, "step": 44985 }, { "epoch": 1.3135428679873287, "grad_norm": 0.5110279179415151, "learning_rate": 3.123006217896729e-05, "loss": 0.4439, "step": 44990 }, { "epoch": 1.313688851257646, "grad_norm": 0.40291503775334536, "learning_rate": 3.122735874560692e-05, "loss": 0.4488, "step": 44995 }, { "epoch": 1.3138348345279631, "grad_norm": 0.47243157478852077, "learning_rate": 3.1224655312246556e-05, "loss": 0.4366, "step": 45000 }, { "epoch": 1.3139808177982804, "grad_norm": 0.5014086963642969, "learning_rate": 3.122195187888618e-05, "loss": 0.4345, "step": 45005 }, { "epoch": 1.3141268010685976, "grad_norm": 0.49888525294649877, "learning_rate": 3.121924844552582e-05, "loss": 0.4668, "step": 45010 }, { "epoch": 1.3142727843389148, "grad_norm": 0.49536618321631165, "learning_rate": 3.121654501216545e-05, "loss": 0.4721, "step": 45015 }, { "epoch": 1.314418767609232, "grad_norm": 0.4680499934817816, "learning_rate": 3.1213841578805085e-05, "loss": 0.4375, "step": 45020 }, { "epoch": 1.3145647508795493, "grad_norm": 0.47993039185469544, "learning_rate": 3.121113814544472e-05, "loss": 0.4453, "step": 45025 }, { "epoch": 1.3147107341498665, "grad_norm": 0.4839082672638357, "learning_rate": 3.120843471208435e-05, "loss": 0.4481, "step": 45030 }, { "epoch": 1.3148567174201837, "grad_norm": 0.4645957405490973, "learning_rate": 3.120573127872398e-05, "loss": 0.4587, "step": 45035 }, { "epoch": 1.315002700690501, "grad_norm": 0.4954183704153879, "learning_rate": 3.1203027845363614e-05, "loss": 0.4529, "step": 45040 }, { "epoch": 1.315148683960818, "grad_norm": 0.4660196168564685, "learning_rate": 3.120032441200325e-05, "loss": 0.4448, "step": 45045 }, { "epoch": 1.3152946672311354, "grad_norm": 0.463356151377524, "learning_rate": 3.1197620978642876e-05, "loss": 0.408, "step": 45050 }, { "epoch": 1.3154406505014524, "grad_norm": 0.5010796684713683, "learning_rate": 3.119491754528251e-05, "loss": 0.4621, "step": 45055 }, { "epoch": 1.3155866337717699, "grad_norm": 0.49049114210673295, "learning_rate": 3.1192214111922144e-05, "loss": 0.4425, "step": 45060 }, { "epoch": 1.3157326170420869, "grad_norm": 0.4849075220299759, "learning_rate": 3.118951067856177e-05, "loss": 0.4736, "step": 45065 }, { "epoch": 1.3158786003124043, "grad_norm": 0.4655643589635663, "learning_rate": 3.1186807245201405e-05, "loss": 0.4282, "step": 45070 }, { "epoch": 1.3160245835827213, "grad_norm": 0.5319191494987969, "learning_rate": 3.118410381184104e-05, "loss": 0.448, "step": 45075 }, { "epoch": 1.3161705668530386, "grad_norm": 0.4673228946521013, "learning_rate": 3.118140037848067e-05, "loss": 0.4293, "step": 45080 }, { "epoch": 1.3163165501233558, "grad_norm": 0.5099975031117641, "learning_rate": 3.117869694512031e-05, "loss": 0.4574, "step": 45085 }, { "epoch": 1.316462533393673, "grad_norm": 0.511692246096891, "learning_rate": 3.117599351175994e-05, "loss": 0.4316, "step": 45090 }, { "epoch": 1.3166085166639903, "grad_norm": 0.4555177591067347, "learning_rate": 3.117329007839957e-05, "loss": 0.4299, "step": 45095 }, { "epoch": 1.3167544999343075, "grad_norm": 0.4691859201571697, "learning_rate": 3.11705866450392e-05, "loss": 0.4504, "step": 45100 }, { "epoch": 1.3169004832046247, "grad_norm": 0.49291172421091894, "learning_rate": 3.1167883211678836e-05, "loss": 0.4453, "step": 45105 }, { "epoch": 1.317046466474942, "grad_norm": 0.4848661303008435, "learning_rate": 3.1165179778318464e-05, "loss": 0.4391, "step": 45110 }, { "epoch": 1.3171924497452592, "grad_norm": 0.4705239806953828, "learning_rate": 3.11624763449581e-05, "loss": 0.4484, "step": 45115 }, { "epoch": 1.3173384330155764, "grad_norm": 0.4662420642841495, "learning_rate": 3.115977291159773e-05, "loss": 0.4375, "step": 45120 }, { "epoch": 1.3174844162858936, "grad_norm": 0.48679565888228926, "learning_rate": 3.115706947823736e-05, "loss": 0.462, "step": 45125 }, { "epoch": 1.3176303995562109, "grad_norm": 0.5013469965770193, "learning_rate": 3.115436604487699e-05, "loss": 0.4455, "step": 45130 }, { "epoch": 1.317776382826528, "grad_norm": 0.4713360330634095, "learning_rate": 3.115166261151663e-05, "loss": 0.4401, "step": 45135 }, { "epoch": 1.3179223660968453, "grad_norm": 0.5207867304473488, "learning_rate": 3.114895917815626e-05, "loss": 0.4589, "step": 45140 }, { "epoch": 1.3180683493671626, "grad_norm": 0.5116665590283609, "learning_rate": 3.1146255744795895e-05, "loss": 0.4579, "step": 45145 }, { "epoch": 1.3182143326374798, "grad_norm": 0.44620676043390134, "learning_rate": 3.114355231143552e-05, "loss": 0.4299, "step": 45150 }, { "epoch": 1.318360315907797, "grad_norm": 0.47942959399147334, "learning_rate": 3.1140848878075156e-05, "loss": 0.4378, "step": 45155 }, { "epoch": 1.3185062991781142, "grad_norm": 0.49435166533148694, "learning_rate": 3.113814544471479e-05, "loss": 0.4647, "step": 45160 }, { "epoch": 1.3186522824484315, "grad_norm": 0.4912450986479086, "learning_rate": 3.1135442011354424e-05, "loss": 0.4589, "step": 45165 }, { "epoch": 1.3187982657187487, "grad_norm": 0.4905234768996347, "learning_rate": 3.113273857799405e-05, "loss": 0.43, "step": 45170 }, { "epoch": 1.318944248989066, "grad_norm": 0.43782857587608665, "learning_rate": 3.1130035144633685e-05, "loss": 0.447, "step": 45175 }, { "epoch": 1.3190902322593832, "grad_norm": 0.4685123205593606, "learning_rate": 3.112733171127332e-05, "loss": 0.4545, "step": 45180 }, { "epoch": 1.3192362155297004, "grad_norm": 0.544774257230189, "learning_rate": 3.1124628277912947e-05, "loss": 0.4546, "step": 45185 }, { "epoch": 1.3193821988000174, "grad_norm": 0.45459141912047, "learning_rate": 3.112192484455259e-05, "loss": 0.427, "step": 45190 }, { "epoch": 1.3195281820703348, "grad_norm": 0.4602912028120633, "learning_rate": 3.1119221411192215e-05, "loss": 0.4497, "step": 45195 }, { "epoch": 1.3196741653406519, "grad_norm": 0.48903782738583407, "learning_rate": 3.111651797783185e-05, "loss": 0.4404, "step": 45200 }, { "epoch": 1.3198201486109693, "grad_norm": 0.46146138796996544, "learning_rate": 3.111381454447148e-05, "loss": 0.4568, "step": 45205 }, { "epoch": 1.3199661318812863, "grad_norm": 0.46842385430636263, "learning_rate": 3.111111111111111e-05, "loss": 0.4417, "step": 45210 }, { "epoch": 1.3201121151516038, "grad_norm": 0.49598186807221917, "learning_rate": 3.1108407677750744e-05, "loss": 0.4638, "step": 45215 }, { "epoch": 1.3202580984219208, "grad_norm": 0.505087193908507, "learning_rate": 3.110570424439038e-05, "loss": 0.4675, "step": 45220 }, { "epoch": 1.320404081692238, "grad_norm": 0.4706998695502962, "learning_rate": 3.110300081103001e-05, "loss": 0.4474, "step": 45225 }, { "epoch": 1.3205500649625552, "grad_norm": 0.5012013015882357, "learning_rate": 3.110029737766964e-05, "loss": 0.4626, "step": 45230 }, { "epoch": 1.3206960482328725, "grad_norm": 0.46116507931920825, "learning_rate": 3.109759394430927e-05, "loss": 0.4272, "step": 45235 }, { "epoch": 1.3208420315031897, "grad_norm": 0.47286612287546226, "learning_rate": 3.109489051094891e-05, "loss": 0.4302, "step": 45240 }, { "epoch": 1.320988014773507, "grad_norm": 0.5154933672902864, "learning_rate": 3.109218707758854e-05, "loss": 0.4302, "step": 45245 }, { "epoch": 1.3211339980438241, "grad_norm": 0.5003016247636125, "learning_rate": 3.1089483644228175e-05, "loss": 0.463, "step": 45250 }, { "epoch": 1.3212799813141414, "grad_norm": 0.4626194722632619, "learning_rate": 3.10867802108678e-05, "loss": 0.4425, "step": 45255 }, { "epoch": 1.3214259645844586, "grad_norm": 0.5117543587509523, "learning_rate": 3.1084076777507436e-05, "loss": 0.4666, "step": 45260 }, { "epoch": 1.3215719478547758, "grad_norm": 0.43585386410434934, "learning_rate": 3.108137334414707e-05, "loss": 0.422, "step": 45265 }, { "epoch": 1.321717931125093, "grad_norm": 0.4850129606693782, "learning_rate": 3.10786699107867e-05, "loss": 0.4361, "step": 45270 }, { "epoch": 1.3218639143954103, "grad_norm": 0.5067274396946618, "learning_rate": 3.107596647742633e-05, "loss": 0.4588, "step": 45275 }, { "epoch": 1.3220098976657275, "grad_norm": 0.5365555147302283, "learning_rate": 3.1073263044065966e-05, "loss": 0.4458, "step": 45280 }, { "epoch": 1.3221558809360447, "grad_norm": 0.49889907837476205, "learning_rate": 3.107055961070559e-05, "loss": 0.4238, "step": 45285 }, { "epoch": 1.322301864206362, "grad_norm": 0.4479114394743461, "learning_rate": 3.106785617734523e-05, "loss": 0.4242, "step": 45290 }, { "epoch": 1.3224478474766792, "grad_norm": 0.4982180958017346, "learning_rate": 3.106515274398486e-05, "loss": 0.4497, "step": 45295 }, { "epoch": 1.3225938307469964, "grad_norm": 0.4993578490394279, "learning_rate": 3.1062449310624495e-05, "loss": 0.4478, "step": 45300 }, { "epoch": 1.3227398140173137, "grad_norm": 0.48384001080276445, "learning_rate": 3.105974587726413e-05, "loss": 0.4724, "step": 45305 }, { "epoch": 1.322885797287631, "grad_norm": 0.49358768755984456, "learning_rate": 3.105704244390376e-05, "loss": 0.4422, "step": 45310 }, { "epoch": 1.3230317805579481, "grad_norm": 0.5171316217765519, "learning_rate": 3.105433901054339e-05, "loss": 0.4393, "step": 45315 }, { "epoch": 1.3231777638282654, "grad_norm": 0.4921451633343757, "learning_rate": 3.1051635577183024e-05, "loss": 0.4547, "step": 45320 }, { "epoch": 1.3233237470985826, "grad_norm": 0.48371104166447365, "learning_rate": 3.104893214382266e-05, "loss": 0.4372, "step": 45325 }, { "epoch": 1.3234697303688998, "grad_norm": 0.5146884789361889, "learning_rate": 3.1046228710462286e-05, "loss": 0.4479, "step": 45330 }, { "epoch": 1.3236157136392168, "grad_norm": 0.4553879115798871, "learning_rate": 3.104352527710192e-05, "loss": 0.4422, "step": 45335 }, { "epoch": 1.3237616969095343, "grad_norm": 0.4627831018354618, "learning_rate": 3.1040821843741554e-05, "loss": 0.4388, "step": 45340 }, { "epoch": 1.3239076801798513, "grad_norm": 0.4783996272460682, "learning_rate": 3.103811841038118e-05, "loss": 0.4563, "step": 45345 }, { "epoch": 1.3240536634501687, "grad_norm": 0.5360531377683329, "learning_rate": 3.1035414977020815e-05, "loss": 0.4714, "step": 45350 }, { "epoch": 1.3241996467204857, "grad_norm": 0.48400605205213454, "learning_rate": 3.103271154366045e-05, "loss": 0.4432, "step": 45355 }, { "epoch": 1.3243456299908032, "grad_norm": 0.46967330398949453, "learning_rate": 3.103000811030008e-05, "loss": 0.4273, "step": 45360 }, { "epoch": 1.3244916132611202, "grad_norm": 0.48404804974322985, "learning_rate": 3.102730467693972e-05, "loss": 0.4294, "step": 45365 }, { "epoch": 1.3246375965314374, "grad_norm": 0.4862367316424203, "learning_rate": 3.102460124357935e-05, "loss": 0.4478, "step": 45370 }, { "epoch": 1.3247835798017547, "grad_norm": 0.44685020767918227, "learning_rate": 3.102189781021898e-05, "loss": 0.421, "step": 45375 }, { "epoch": 1.3249295630720719, "grad_norm": 0.4430870392887953, "learning_rate": 3.101919437685861e-05, "loss": 0.4331, "step": 45380 }, { "epoch": 1.325075546342389, "grad_norm": 0.5082456110632559, "learning_rate": 3.1016490943498246e-05, "loss": 0.4337, "step": 45385 }, { "epoch": 1.3252215296127063, "grad_norm": 0.4934024898108206, "learning_rate": 3.101378751013787e-05, "loss": 0.4711, "step": 45390 }, { "epoch": 1.3253675128830236, "grad_norm": 0.5359721171030535, "learning_rate": 3.101108407677751e-05, "loss": 0.4446, "step": 45395 }, { "epoch": 1.3255134961533408, "grad_norm": 0.48642577855365254, "learning_rate": 3.100838064341714e-05, "loss": 0.4367, "step": 45400 }, { "epoch": 1.325659479423658, "grad_norm": 0.47970051881891285, "learning_rate": 3.100567721005677e-05, "loss": 0.4214, "step": 45405 }, { "epoch": 1.3258054626939753, "grad_norm": 0.4758103918591942, "learning_rate": 3.10029737766964e-05, "loss": 0.4337, "step": 45410 }, { "epoch": 1.3259514459642925, "grad_norm": 0.5012875583149596, "learning_rate": 3.100027034333604e-05, "loss": 0.4605, "step": 45415 }, { "epoch": 1.3260974292346097, "grad_norm": 0.4915419076064004, "learning_rate": 3.099756690997567e-05, "loss": 0.4429, "step": 45420 }, { "epoch": 1.326243412504927, "grad_norm": 0.46940192866584957, "learning_rate": 3.0994863476615305e-05, "loss": 0.4357, "step": 45425 }, { "epoch": 1.3263893957752442, "grad_norm": 0.5005406932553341, "learning_rate": 3.099216004325494e-05, "loss": 0.4345, "step": 45430 }, { "epoch": 1.3265353790455614, "grad_norm": 0.49164449884304484, "learning_rate": 3.0989456609894566e-05, "loss": 0.4589, "step": 45435 }, { "epoch": 1.3266813623158786, "grad_norm": 0.467157410778429, "learning_rate": 3.09867531765342e-05, "loss": 0.4527, "step": 45440 }, { "epoch": 1.3268273455861959, "grad_norm": 0.49157340442172237, "learning_rate": 3.0984049743173834e-05, "loss": 0.4654, "step": 45445 }, { "epoch": 1.326973328856513, "grad_norm": 0.46532459913451524, "learning_rate": 3.098134630981346e-05, "loss": 0.4453, "step": 45450 }, { "epoch": 1.3271193121268303, "grad_norm": 0.5203543950763013, "learning_rate": 3.0978642876453095e-05, "loss": 0.4633, "step": 45455 }, { "epoch": 1.3272652953971475, "grad_norm": 0.44673277850113835, "learning_rate": 3.097593944309273e-05, "loss": 0.4374, "step": 45460 }, { "epoch": 1.3274112786674648, "grad_norm": 0.4825923812855396, "learning_rate": 3.0973236009732356e-05, "loss": 0.4218, "step": 45465 }, { "epoch": 1.327557261937782, "grad_norm": 0.43244028241041815, "learning_rate": 3.0970532576372e-05, "loss": 0.4276, "step": 45470 }, { "epoch": 1.3277032452080992, "grad_norm": 0.5156687787642871, "learning_rate": 3.096782914301163e-05, "loss": 0.4485, "step": 45475 }, { "epoch": 1.3278492284784162, "grad_norm": 0.4823767001629449, "learning_rate": 3.096512570965126e-05, "loss": 0.4285, "step": 45480 }, { "epoch": 1.3279952117487337, "grad_norm": 0.42363178793044237, "learning_rate": 3.096242227629089e-05, "loss": 0.4051, "step": 45485 }, { "epoch": 1.3281411950190507, "grad_norm": 0.48164690750789035, "learning_rate": 3.0959718842930526e-05, "loss": 0.4488, "step": 45490 }, { "epoch": 1.3282871782893682, "grad_norm": 0.5094514569104509, "learning_rate": 3.0957015409570154e-05, "loss": 0.4518, "step": 45495 }, { "epoch": 1.3284331615596852, "grad_norm": 0.4701276610774086, "learning_rate": 3.095431197620979e-05, "loss": 0.4568, "step": 45500 }, { "epoch": 1.3285791448300026, "grad_norm": 0.47080972031720064, "learning_rate": 3.095160854284942e-05, "loss": 0.4546, "step": 45505 }, { "epoch": 1.3287251281003196, "grad_norm": 0.46283095872028257, "learning_rate": 3.094890510948905e-05, "loss": 0.4416, "step": 45510 }, { "epoch": 1.3288711113706368, "grad_norm": 0.5436748756128226, "learning_rate": 3.094620167612868e-05, "loss": 0.4535, "step": 45515 }, { "epoch": 1.329017094640954, "grad_norm": 0.46579583652951656, "learning_rate": 3.094349824276832e-05, "loss": 0.4615, "step": 45520 }, { "epoch": 1.3291630779112713, "grad_norm": 0.4572358678138146, "learning_rate": 3.0940794809407944e-05, "loss": 0.4286, "step": 45525 }, { "epoch": 1.3293090611815885, "grad_norm": 0.4764823129843465, "learning_rate": 3.0938091376047585e-05, "loss": 0.4489, "step": 45530 }, { "epoch": 1.3294550444519058, "grad_norm": 0.4615507605493475, "learning_rate": 3.093538794268722e-05, "loss": 0.4177, "step": 45535 }, { "epoch": 1.329601027722223, "grad_norm": 0.5081339949634655, "learning_rate": 3.0932684509326846e-05, "loss": 0.4271, "step": 45540 }, { "epoch": 1.3297470109925402, "grad_norm": 0.5019664164512663, "learning_rate": 3.092998107596648e-05, "loss": 0.4658, "step": 45545 }, { "epoch": 1.3298929942628575, "grad_norm": 0.5287913647831172, "learning_rate": 3.0927277642606114e-05, "loss": 0.4174, "step": 45550 }, { "epoch": 1.3300389775331747, "grad_norm": 0.4469568399555266, "learning_rate": 3.092457420924574e-05, "loss": 0.4471, "step": 45555 }, { "epoch": 1.330184960803492, "grad_norm": 0.49072441534275346, "learning_rate": 3.0921870775885375e-05, "loss": 0.464, "step": 45560 }, { "epoch": 1.3303309440738091, "grad_norm": 0.48750089823766957, "learning_rate": 3.091916734252501e-05, "loss": 0.4587, "step": 45565 }, { "epoch": 1.3304769273441264, "grad_norm": 0.4696530030724935, "learning_rate": 3.091646390916464e-05, "loss": 0.4375, "step": 45570 }, { "epoch": 1.3306229106144436, "grad_norm": 0.5034095942509554, "learning_rate": 3.091376047580427e-05, "loss": 0.4406, "step": 45575 }, { "epoch": 1.3307688938847608, "grad_norm": 0.504230376196341, "learning_rate": 3.0911057042443905e-05, "loss": 0.4444, "step": 45580 }, { "epoch": 1.330914877155078, "grad_norm": 0.4781671904328088, "learning_rate": 3.090835360908354e-05, "loss": 0.4454, "step": 45585 }, { "epoch": 1.3310608604253953, "grad_norm": 0.5260954596506785, "learning_rate": 3.090565017572317e-05, "loss": 0.4686, "step": 45590 }, { "epoch": 1.3312068436957125, "grad_norm": 0.5139832708828377, "learning_rate": 3.090294674236281e-05, "loss": 0.4249, "step": 45595 }, { "epoch": 1.3313528269660297, "grad_norm": 0.4798982106822809, "learning_rate": 3.0900243309002434e-05, "loss": 0.4453, "step": 45600 }, { "epoch": 1.331498810236347, "grad_norm": 0.4888773009999919, "learning_rate": 3.089753987564207e-05, "loss": 0.4589, "step": 45605 }, { "epoch": 1.3316447935066642, "grad_norm": 0.4481126820522332, "learning_rate": 3.08948364422817e-05, "loss": 0.452, "step": 45610 }, { "epoch": 1.3317907767769814, "grad_norm": 0.500941219542411, "learning_rate": 3.089213300892133e-05, "loss": 0.4513, "step": 45615 }, { "epoch": 1.3319367600472987, "grad_norm": 0.46518791567621953, "learning_rate": 3.088942957556096e-05, "loss": 0.4343, "step": 45620 }, { "epoch": 1.3320827433176157, "grad_norm": 0.47532620773006284, "learning_rate": 3.08867261422006e-05, "loss": 0.4378, "step": 45625 }, { "epoch": 1.3322287265879331, "grad_norm": 0.49086532217728246, "learning_rate": 3.0884022708840225e-05, "loss": 0.4326, "step": 45630 }, { "epoch": 1.3323747098582501, "grad_norm": 0.49486295419809556, "learning_rate": 3.088131927547986e-05, "loss": 0.4477, "step": 45635 }, { "epoch": 1.3325206931285676, "grad_norm": 0.47716299758104386, "learning_rate": 3.08786158421195e-05, "loss": 0.4489, "step": 45640 }, { "epoch": 1.3326666763988846, "grad_norm": 0.47837899530939154, "learning_rate": 3.0875912408759127e-05, "loss": 0.4305, "step": 45645 }, { "epoch": 1.332812659669202, "grad_norm": 0.5121993624317206, "learning_rate": 3.087320897539876e-05, "loss": 0.4526, "step": 45650 }, { "epoch": 1.332958642939519, "grad_norm": 0.4730280961026548, "learning_rate": 3.0870505542038395e-05, "loss": 0.4637, "step": 45655 }, { "epoch": 1.3331046262098363, "grad_norm": 0.4868978786970334, "learning_rate": 3.086780210867802e-05, "loss": 0.4516, "step": 45660 }, { "epoch": 1.3332506094801535, "grad_norm": 0.46349159997178985, "learning_rate": 3.0865098675317656e-05, "loss": 0.461, "step": 45665 }, { "epoch": 1.3333965927504707, "grad_norm": 0.4862435951665846, "learning_rate": 3.086239524195729e-05, "loss": 0.452, "step": 45670 }, { "epoch": 1.333542576020788, "grad_norm": 0.44677377246510974, "learning_rate": 3.085969180859692e-05, "loss": 0.4511, "step": 45675 }, { "epoch": 1.3336885592911052, "grad_norm": 0.4691324070654488, "learning_rate": 3.085698837523655e-05, "loss": 0.4615, "step": 45680 }, { "epoch": 1.3338345425614224, "grad_norm": 0.47882958901014155, "learning_rate": 3.0854284941876185e-05, "loss": 0.4636, "step": 45685 }, { "epoch": 1.3339805258317396, "grad_norm": 0.4920228371536913, "learning_rate": 3.085158150851581e-05, "loss": 0.4347, "step": 45690 }, { "epoch": 1.3341265091020569, "grad_norm": 0.44327160543950567, "learning_rate": 3.0848878075155446e-05, "loss": 0.425, "step": 45695 }, { "epoch": 1.334272492372374, "grad_norm": 0.4807979327140929, "learning_rate": 3.084617464179508e-05, "loss": 0.4457, "step": 45700 }, { "epoch": 1.3344184756426913, "grad_norm": 0.4541364784711594, "learning_rate": 3.0843471208434714e-05, "loss": 0.445, "step": 45705 }, { "epoch": 1.3345644589130086, "grad_norm": 0.433810400159115, "learning_rate": 3.084076777507435e-05, "loss": 0.4518, "step": 45710 }, { "epoch": 1.3347104421833258, "grad_norm": 0.5031458188354473, "learning_rate": 3.083806434171398e-05, "loss": 0.4439, "step": 45715 }, { "epoch": 1.334856425453643, "grad_norm": 0.47408665209840717, "learning_rate": 3.083536090835361e-05, "loss": 0.4416, "step": 45720 }, { "epoch": 1.3350024087239603, "grad_norm": 0.49725055586037187, "learning_rate": 3.0832657474993244e-05, "loss": 0.4585, "step": 45725 }, { "epoch": 1.3351483919942775, "grad_norm": 0.5074690206703297, "learning_rate": 3.082995404163288e-05, "loss": 0.4635, "step": 45730 }, { "epoch": 1.3352943752645947, "grad_norm": 0.48963889767604674, "learning_rate": 3.0827250608272505e-05, "loss": 0.4565, "step": 45735 }, { "epoch": 1.335440358534912, "grad_norm": 0.44690184563695196, "learning_rate": 3.082454717491214e-05, "loss": 0.4329, "step": 45740 }, { "epoch": 1.3355863418052292, "grad_norm": 0.5044551828760107, "learning_rate": 3.082184374155177e-05, "loss": 0.4478, "step": 45745 }, { "epoch": 1.3357323250755464, "grad_norm": 0.49757301188207037, "learning_rate": 3.08191403081914e-05, "loss": 0.4622, "step": 45750 }, { "epoch": 1.3358783083458636, "grad_norm": 0.5543130422411083, "learning_rate": 3.081643687483104e-05, "loss": 0.4717, "step": 45755 }, { "epoch": 1.3360242916161809, "grad_norm": 0.44686316320419084, "learning_rate": 3.081373344147067e-05, "loss": 0.4288, "step": 45760 }, { "epoch": 1.336170274886498, "grad_norm": 0.4746163757323186, "learning_rate": 3.08110300081103e-05, "loss": 0.426, "step": 45765 }, { "epoch": 1.336316258156815, "grad_norm": 0.4614080579921663, "learning_rate": 3.0808326574749936e-05, "loss": 0.4255, "step": 45770 }, { "epoch": 1.3364622414271325, "grad_norm": 0.5106234557774764, "learning_rate": 3.080562314138957e-05, "loss": 0.4579, "step": 45775 }, { "epoch": 1.3366082246974496, "grad_norm": 0.49845780845283605, "learning_rate": 3.08029197080292e-05, "loss": 0.4329, "step": 45780 }, { "epoch": 1.336754207967767, "grad_norm": 0.5030855807473786, "learning_rate": 3.080021627466883e-05, "loss": 0.4425, "step": 45785 }, { "epoch": 1.336900191238084, "grad_norm": 0.459358803995872, "learning_rate": 3.0797512841308465e-05, "loss": 0.439, "step": 45790 }, { "epoch": 1.3370461745084015, "grad_norm": 0.5064579782112786, "learning_rate": 3.079480940794809e-05, "loss": 0.4216, "step": 45795 }, { "epoch": 1.3371921577787185, "grad_norm": 0.49495916885456603, "learning_rate": 3.079210597458773e-05, "loss": 0.4497, "step": 45800 }, { "epoch": 1.3373381410490357, "grad_norm": 0.4987438762795189, "learning_rate": 3.078940254122736e-05, "loss": 0.4496, "step": 45805 }, { "epoch": 1.337484124319353, "grad_norm": 0.47054600330956886, "learning_rate": 3.0786699107866995e-05, "loss": 0.4386, "step": 45810 }, { "epoch": 1.3376301075896702, "grad_norm": 0.49026051016959277, "learning_rate": 3.078399567450663e-05, "loss": 0.4437, "step": 45815 }, { "epoch": 1.3377760908599874, "grad_norm": 0.49097470025869905, "learning_rate": 3.0781292241146256e-05, "loss": 0.4654, "step": 45820 }, { "epoch": 1.3379220741303046, "grad_norm": 0.5080279165363021, "learning_rate": 3.077858880778589e-05, "loss": 0.4465, "step": 45825 }, { "epoch": 1.3380680574006218, "grad_norm": 0.46300547805810643, "learning_rate": 3.0775885374425524e-05, "loss": 0.4214, "step": 45830 }, { "epoch": 1.338214040670939, "grad_norm": 0.4668075460584393, "learning_rate": 3.077318194106515e-05, "loss": 0.4458, "step": 45835 }, { "epoch": 1.3383600239412563, "grad_norm": 0.4733194312018266, "learning_rate": 3.0770478507704785e-05, "loss": 0.4434, "step": 45840 }, { "epoch": 1.3385060072115735, "grad_norm": 0.5104305650563916, "learning_rate": 3.076777507434442e-05, "loss": 0.4483, "step": 45845 }, { "epoch": 1.3386519904818908, "grad_norm": 0.47019582658183473, "learning_rate": 3.076507164098405e-05, "loss": 0.4366, "step": 45850 }, { "epoch": 1.338797973752208, "grad_norm": 0.5141702332921718, "learning_rate": 3.076236820762368e-05, "loss": 0.4692, "step": 45855 }, { "epoch": 1.3389439570225252, "grad_norm": 0.517018950329778, "learning_rate": 3.0759664774263315e-05, "loss": 0.4149, "step": 45860 }, { "epoch": 1.3390899402928425, "grad_norm": 0.5101987787915873, "learning_rate": 3.075696134090295e-05, "loss": 0.4477, "step": 45865 }, { "epoch": 1.3392359235631597, "grad_norm": 0.47838183525894556, "learning_rate": 3.075425790754258e-05, "loss": 0.4357, "step": 45870 }, { "epoch": 1.339381906833477, "grad_norm": 0.4844947620892582, "learning_rate": 3.0751554474182217e-05, "loss": 0.4537, "step": 45875 }, { "epoch": 1.3395278901037941, "grad_norm": 0.5093662550305528, "learning_rate": 3.0748851040821844e-05, "loss": 0.4576, "step": 45880 }, { "epoch": 1.3396738733741114, "grad_norm": 0.4949374335056613, "learning_rate": 3.074614760746148e-05, "loss": 0.4396, "step": 45885 }, { "epoch": 1.3398198566444286, "grad_norm": 0.47448072882641296, "learning_rate": 3.074344417410111e-05, "loss": 0.45, "step": 45890 }, { "epoch": 1.3399658399147458, "grad_norm": 0.5030954238025974, "learning_rate": 3.074074074074074e-05, "loss": 0.4302, "step": 45895 }, { "epoch": 1.340111823185063, "grad_norm": 0.4642213741965342, "learning_rate": 3.073803730738037e-05, "loss": 0.4473, "step": 45900 }, { "epoch": 1.3402578064553803, "grad_norm": 0.4412139343616287, "learning_rate": 3.073533387402001e-05, "loss": 0.4363, "step": 45905 }, { "epoch": 1.3404037897256975, "grad_norm": 0.4332483684352691, "learning_rate": 3.073263044065964e-05, "loss": 0.4298, "step": 45910 }, { "epoch": 1.3405497729960145, "grad_norm": 0.4795853611358765, "learning_rate": 3.072992700729927e-05, "loss": 0.4617, "step": 45915 }, { "epoch": 1.340695756266332, "grad_norm": 0.5031562764018652, "learning_rate": 3.07272235739389e-05, "loss": 0.445, "step": 45920 }, { "epoch": 1.340841739536649, "grad_norm": 0.5304656379361723, "learning_rate": 3.0724520140578536e-05, "loss": 0.4379, "step": 45925 }, { "epoch": 1.3409877228069664, "grad_norm": 0.4865797768950822, "learning_rate": 3.072181670721817e-05, "loss": 0.4545, "step": 45930 }, { "epoch": 1.3411337060772834, "grad_norm": 0.49792288547990515, "learning_rate": 3.0719113273857804e-05, "loss": 0.4346, "step": 45935 }, { "epoch": 1.341279689347601, "grad_norm": 0.46882856797995703, "learning_rate": 3.071640984049743e-05, "loss": 0.4776, "step": 45940 }, { "epoch": 1.341425672617918, "grad_norm": 0.5320399710911619, "learning_rate": 3.0713706407137066e-05, "loss": 0.4689, "step": 45945 }, { "epoch": 1.3415716558882351, "grad_norm": 0.4918809564527737, "learning_rate": 3.07110029737767e-05, "loss": 0.4671, "step": 45950 }, { "epoch": 1.3417176391585524, "grad_norm": 0.49630061074432424, "learning_rate": 3.070829954041633e-05, "loss": 0.4482, "step": 45955 }, { "epoch": 1.3418636224288696, "grad_norm": 0.4887273436856903, "learning_rate": 3.070559610705596e-05, "loss": 0.4543, "step": 45960 }, { "epoch": 1.3420096056991868, "grad_norm": 0.4879055350199402, "learning_rate": 3.0702892673695595e-05, "loss": 0.4509, "step": 45965 }, { "epoch": 1.342155588969504, "grad_norm": 0.5111442733199019, "learning_rate": 3.070018924033522e-05, "loss": 0.4426, "step": 45970 }, { "epoch": 1.3423015722398213, "grad_norm": 0.5183091232111433, "learning_rate": 3.0697485806974856e-05, "loss": 0.433, "step": 45975 }, { "epoch": 1.3424475555101385, "grad_norm": 0.462453781317057, "learning_rate": 3.06947823736145e-05, "loss": 0.4777, "step": 45980 }, { "epoch": 1.3425935387804557, "grad_norm": 0.4757125625213574, "learning_rate": 3.0692078940254124e-05, "loss": 0.4178, "step": 45985 }, { "epoch": 1.342739522050773, "grad_norm": 0.5220688985795158, "learning_rate": 3.068937550689376e-05, "loss": 0.4649, "step": 45990 }, { "epoch": 1.3428855053210902, "grad_norm": 0.4926617702667252, "learning_rate": 3.068667207353339e-05, "loss": 0.4878, "step": 45995 }, { "epoch": 1.3430314885914074, "grad_norm": 0.4634654252408423, "learning_rate": 3.068396864017302e-05, "loss": 0.4508, "step": 46000 }, { "epoch": 1.3431774718617246, "grad_norm": 0.46504971822367563, "learning_rate": 3.0681265206812653e-05, "loss": 0.4539, "step": 46005 }, { "epoch": 1.3433234551320419, "grad_norm": 0.47573654060869375, "learning_rate": 3.067856177345229e-05, "loss": 0.438, "step": 46010 }, { "epoch": 1.343469438402359, "grad_norm": 0.5032187669277063, "learning_rate": 3.0675858340091915e-05, "loss": 0.4475, "step": 46015 }, { "epoch": 1.3436154216726763, "grad_norm": 0.44511398320104534, "learning_rate": 3.067315490673155e-05, "loss": 0.4498, "step": 46020 }, { "epoch": 1.3437614049429936, "grad_norm": 0.48496997083867205, "learning_rate": 3.067045147337118e-05, "loss": 0.4435, "step": 46025 }, { "epoch": 1.3439073882133108, "grad_norm": 0.4261688248472361, "learning_rate": 3.066774804001081e-05, "loss": 0.4734, "step": 46030 }, { "epoch": 1.344053371483628, "grad_norm": 0.48279747052106226, "learning_rate": 3.0665044606650444e-05, "loss": 0.4449, "step": 46035 }, { "epoch": 1.3441993547539453, "grad_norm": 0.4614116533337577, "learning_rate": 3.0662341173290085e-05, "loss": 0.4395, "step": 46040 }, { "epoch": 1.3443453380242625, "grad_norm": 0.5014910550238034, "learning_rate": 3.065963773992971e-05, "loss": 0.4391, "step": 46045 }, { "epoch": 1.3444913212945797, "grad_norm": 0.47828296925949615, "learning_rate": 3.0656934306569346e-05, "loss": 0.4277, "step": 46050 }, { "epoch": 1.344637304564897, "grad_norm": 0.45450357930348984, "learning_rate": 3.065423087320898e-05, "loss": 0.4418, "step": 46055 }, { "epoch": 1.344783287835214, "grad_norm": 0.46581202985094333, "learning_rate": 3.065152743984861e-05, "loss": 0.4461, "step": 46060 }, { "epoch": 1.3449292711055314, "grad_norm": 0.5048026809204846, "learning_rate": 3.064882400648824e-05, "loss": 0.4924, "step": 46065 }, { "epoch": 1.3450752543758484, "grad_norm": 0.5252371895422381, "learning_rate": 3.0646120573127875e-05, "loss": 0.4533, "step": 46070 }, { "epoch": 1.3452212376461659, "grad_norm": 0.487755672398289, "learning_rate": 3.06434171397675e-05, "loss": 0.4821, "step": 46075 }, { "epoch": 1.3453672209164829, "grad_norm": 0.486092267484878, "learning_rate": 3.0640713706407136e-05, "loss": 0.4367, "step": 46080 }, { "epoch": 1.3455132041868003, "grad_norm": 0.5574103524448574, "learning_rate": 3.063801027304677e-05, "loss": 0.4609, "step": 46085 }, { "epoch": 1.3456591874571173, "grad_norm": 0.44224980343552805, "learning_rate": 3.06353068396864e-05, "loss": 0.4485, "step": 46090 }, { "epoch": 1.3458051707274348, "grad_norm": 0.5034759236724089, "learning_rate": 3.063260340632604e-05, "loss": 0.4416, "step": 46095 }, { "epoch": 1.3459511539977518, "grad_norm": 0.5326860904613957, "learning_rate": 3.062989997296567e-05, "loss": 0.4899, "step": 46100 }, { "epoch": 1.346097137268069, "grad_norm": 0.4724512079485559, "learning_rate": 3.06271965396053e-05, "loss": 0.4539, "step": 46105 }, { "epoch": 1.3462431205383862, "grad_norm": 0.4798256416100505, "learning_rate": 3.0624493106244934e-05, "loss": 0.4636, "step": 46110 }, { "epoch": 1.3463891038087035, "grad_norm": 0.5116636213000575, "learning_rate": 3.062178967288457e-05, "loss": 0.4553, "step": 46115 }, { "epoch": 1.3465350870790207, "grad_norm": 0.47652308474783484, "learning_rate": 3.0619086239524195e-05, "loss": 0.4508, "step": 46120 }, { "epoch": 1.346681070349338, "grad_norm": 0.5025239709036109, "learning_rate": 3.061638280616383e-05, "loss": 0.4344, "step": 46125 }, { "epoch": 1.3468270536196552, "grad_norm": 0.48537511938065825, "learning_rate": 3.061367937280346e-05, "loss": 0.4556, "step": 46130 }, { "epoch": 1.3469730368899724, "grad_norm": 0.4705060961285067, "learning_rate": 3.061097593944309e-05, "loss": 0.4309, "step": 46135 }, { "epoch": 1.3471190201602896, "grad_norm": 0.4818601194417278, "learning_rate": 3.0608272506082724e-05, "loss": 0.4776, "step": 46140 }, { "epoch": 1.3472650034306068, "grad_norm": 0.5059850429007149, "learning_rate": 3.060556907272236e-05, "loss": 0.4665, "step": 46145 }, { "epoch": 1.347410986700924, "grad_norm": 0.4484486458655316, "learning_rate": 3.060286563936199e-05, "loss": 0.4531, "step": 46150 }, { "epoch": 1.3475569699712413, "grad_norm": 0.44946713017328077, "learning_rate": 3.0600162206001626e-05, "loss": 0.4603, "step": 46155 }, { "epoch": 1.3477029532415585, "grad_norm": 0.4921983542923777, "learning_rate": 3.059745877264126e-05, "loss": 0.4416, "step": 46160 }, { "epoch": 1.3478489365118758, "grad_norm": 0.5007492377678213, "learning_rate": 3.059475533928089e-05, "loss": 0.4584, "step": 46165 }, { "epoch": 1.347994919782193, "grad_norm": 0.4768084314590956, "learning_rate": 3.059205190592052e-05, "loss": 0.452, "step": 46170 }, { "epoch": 1.3481409030525102, "grad_norm": 0.4711566350744171, "learning_rate": 3.0589348472560156e-05, "loss": 0.4623, "step": 46175 }, { "epoch": 1.3482868863228274, "grad_norm": 0.5058474175187085, "learning_rate": 3.058664503919978e-05, "loss": 0.4488, "step": 46180 }, { "epoch": 1.3484328695931447, "grad_norm": 0.4605124080774583, "learning_rate": 3.058394160583942e-05, "loss": 0.4459, "step": 46185 }, { "epoch": 1.348578852863462, "grad_norm": 0.4807445080771213, "learning_rate": 3.058123817247905e-05, "loss": 0.448, "step": 46190 }, { "epoch": 1.3487248361337791, "grad_norm": 0.4654363300454508, "learning_rate": 3.057853473911868e-05, "loss": 0.4496, "step": 46195 }, { "epoch": 1.3488708194040964, "grad_norm": 0.48701756157461007, "learning_rate": 3.057583130575831e-05, "loss": 0.4693, "step": 46200 }, { "epoch": 1.3490168026744136, "grad_norm": 0.4410376948529264, "learning_rate": 3.0573127872397946e-05, "loss": 0.4352, "step": 46205 }, { "epoch": 1.3491627859447308, "grad_norm": 0.5170332210446498, "learning_rate": 3.057042443903758e-05, "loss": 0.4552, "step": 46210 }, { "epoch": 1.3493087692150478, "grad_norm": 0.4759972259076526, "learning_rate": 3.0567721005677214e-05, "loss": 0.4546, "step": 46215 }, { "epoch": 1.3494547524853653, "grad_norm": 0.4560885671292489, "learning_rate": 3.056501757231685e-05, "loss": 0.4584, "step": 46220 }, { "epoch": 1.3496007357556823, "grad_norm": 0.4573728394336386, "learning_rate": 3.0562314138956475e-05, "loss": 0.4379, "step": 46225 }, { "epoch": 1.3497467190259997, "grad_norm": 0.48944390983677155, "learning_rate": 3.055961070559611e-05, "loss": 0.4682, "step": 46230 }, { "epoch": 1.3498927022963167, "grad_norm": 0.4625397039414639, "learning_rate": 3.0556907272235743e-05, "loss": 0.4597, "step": 46235 }, { "epoch": 1.3500386855666342, "grad_norm": 0.5059173997315998, "learning_rate": 3.055420383887537e-05, "loss": 0.4536, "step": 46240 }, { "epoch": 1.3501846688369512, "grad_norm": 0.4543272939382549, "learning_rate": 3.0551500405515005e-05, "loss": 0.4354, "step": 46245 }, { "epoch": 1.3503306521072684, "grad_norm": 0.4116644512076724, "learning_rate": 3.054879697215464e-05, "loss": 0.439, "step": 46250 }, { "epoch": 1.3504766353775857, "grad_norm": 0.4478597484047729, "learning_rate": 3.0546093538794266e-05, "loss": 0.4236, "step": 46255 }, { "epoch": 1.350622618647903, "grad_norm": 0.4594193105381971, "learning_rate": 3.05433901054339e-05, "loss": 0.4473, "step": 46260 }, { "epoch": 1.3507686019182201, "grad_norm": 0.49618598428491906, "learning_rate": 3.054068667207354e-05, "loss": 0.4421, "step": 46265 }, { "epoch": 1.3509145851885374, "grad_norm": 0.5079551466782828, "learning_rate": 3.053798323871317e-05, "loss": 0.4602, "step": 46270 }, { "epoch": 1.3510605684588546, "grad_norm": 0.517123561726664, "learning_rate": 3.05352798053528e-05, "loss": 0.4734, "step": 46275 }, { "epoch": 1.3512065517291718, "grad_norm": 0.507707073403144, "learning_rate": 3.0532576371992436e-05, "loss": 0.4396, "step": 46280 }, { "epoch": 1.351352534999489, "grad_norm": 0.5056103472133544, "learning_rate": 3.052987293863206e-05, "loss": 0.4664, "step": 46285 }, { "epoch": 1.3514985182698063, "grad_norm": 0.44976832563378616, "learning_rate": 3.05271695052717e-05, "loss": 0.4144, "step": 46290 }, { "epoch": 1.3516445015401235, "grad_norm": 0.47350843865091813, "learning_rate": 3.052446607191133e-05, "loss": 0.4375, "step": 46295 }, { "epoch": 1.3517904848104407, "grad_norm": 0.5289328960069231, "learning_rate": 3.052176263855096e-05, "loss": 0.4738, "step": 46300 }, { "epoch": 1.351936468080758, "grad_norm": 0.44020954876555224, "learning_rate": 3.051905920519059e-05, "loss": 0.4321, "step": 46305 }, { "epoch": 1.3520824513510752, "grad_norm": 0.4917614865226527, "learning_rate": 3.0516355771830223e-05, "loss": 0.4495, "step": 46310 }, { "epoch": 1.3522284346213924, "grad_norm": 0.4827309095377377, "learning_rate": 3.0513652338469857e-05, "loss": 0.4664, "step": 46315 }, { "epoch": 1.3523744178917096, "grad_norm": 0.4880413422735937, "learning_rate": 3.0510948905109494e-05, "loss": 0.4404, "step": 46320 }, { "epoch": 1.3525204011620269, "grad_norm": 0.5010588994773725, "learning_rate": 3.0508245471749125e-05, "loss": 0.4524, "step": 46325 }, { "epoch": 1.352666384432344, "grad_norm": 0.48040686469998084, "learning_rate": 3.0505542038388756e-05, "loss": 0.4714, "step": 46330 }, { "epoch": 1.3528123677026613, "grad_norm": 0.48035281743599356, "learning_rate": 3.050283860502839e-05, "loss": 0.4439, "step": 46335 }, { "epoch": 1.3529583509729786, "grad_norm": 0.4924944034591122, "learning_rate": 3.050013517166802e-05, "loss": 0.4383, "step": 46340 }, { "epoch": 1.3531043342432958, "grad_norm": 0.49441065671890577, "learning_rate": 3.049743173830765e-05, "loss": 0.4555, "step": 46345 }, { "epoch": 1.353250317513613, "grad_norm": 0.4479158838337815, "learning_rate": 3.0494728304947285e-05, "loss": 0.453, "step": 46350 }, { "epoch": 1.3533963007839303, "grad_norm": 0.4989515858561775, "learning_rate": 3.0492024871586916e-05, "loss": 0.4458, "step": 46355 }, { "epoch": 1.3535422840542473, "grad_norm": 0.45011531180555747, "learning_rate": 3.048932143822655e-05, "loss": 0.4247, "step": 46360 }, { "epoch": 1.3536882673245647, "grad_norm": 0.5337667862540313, "learning_rate": 3.048661800486618e-05, "loss": 0.43, "step": 46365 }, { "epoch": 1.3538342505948817, "grad_norm": 0.48653404107182485, "learning_rate": 3.048391457150581e-05, "loss": 0.4497, "step": 46370 }, { "epoch": 1.3539802338651992, "grad_norm": 0.4861358728307972, "learning_rate": 3.0481211138145445e-05, "loss": 0.4328, "step": 46375 }, { "epoch": 1.3541262171355162, "grad_norm": 0.48494780383649366, "learning_rate": 3.0478507704785082e-05, "loss": 0.4633, "step": 46380 }, { "epoch": 1.3542722004058336, "grad_norm": 0.517890958018141, "learning_rate": 3.0475804271424713e-05, "loss": 0.4598, "step": 46385 }, { "epoch": 1.3544181836761506, "grad_norm": 0.5119734001855892, "learning_rate": 3.0473100838064344e-05, "loss": 0.4228, "step": 46390 }, { "epoch": 1.3545641669464679, "grad_norm": 0.5122606606930075, "learning_rate": 3.0470397404703978e-05, "loss": 0.4215, "step": 46395 }, { "epoch": 1.354710150216785, "grad_norm": 0.49641839098596857, "learning_rate": 3.0467693971343608e-05, "loss": 0.4593, "step": 46400 }, { "epoch": 1.3548561334871023, "grad_norm": 0.4847195679036458, "learning_rate": 3.046499053798324e-05, "loss": 0.4582, "step": 46405 }, { "epoch": 1.3550021167574196, "grad_norm": 0.4664121565451265, "learning_rate": 3.0462287104622873e-05, "loss": 0.4287, "step": 46410 }, { "epoch": 1.3551481000277368, "grad_norm": 0.5000550466635361, "learning_rate": 3.0459583671262503e-05, "loss": 0.4483, "step": 46415 }, { "epoch": 1.355294083298054, "grad_norm": 0.49762263432340714, "learning_rate": 3.0456880237902134e-05, "loss": 0.4638, "step": 46420 }, { "epoch": 1.3554400665683712, "grad_norm": 0.5079415593080517, "learning_rate": 3.0454176804541768e-05, "loss": 0.4394, "step": 46425 }, { "epoch": 1.3555860498386885, "grad_norm": 0.48686550859210437, "learning_rate": 3.04514733711814e-05, "loss": 0.4727, "step": 46430 }, { "epoch": 1.3557320331090057, "grad_norm": 0.46486524367870224, "learning_rate": 3.0448769937821036e-05, "loss": 0.4429, "step": 46435 }, { "epoch": 1.355878016379323, "grad_norm": 0.48392135267358066, "learning_rate": 3.044606650446067e-05, "loss": 0.436, "step": 46440 }, { "epoch": 1.3560239996496402, "grad_norm": 0.5059235935461087, "learning_rate": 3.04433630711003e-05, "loss": 0.4437, "step": 46445 }, { "epoch": 1.3561699829199574, "grad_norm": 0.5251809607726703, "learning_rate": 3.044065963773993e-05, "loss": 0.441, "step": 46450 }, { "epoch": 1.3563159661902746, "grad_norm": 0.4374146043590241, "learning_rate": 3.0437956204379565e-05, "loss": 0.4339, "step": 46455 }, { "epoch": 1.3564619494605918, "grad_norm": 0.5230970013674932, "learning_rate": 3.0435252771019196e-05, "loss": 0.4448, "step": 46460 }, { "epoch": 1.356607932730909, "grad_norm": 0.5065043172757773, "learning_rate": 3.0432549337658827e-05, "loss": 0.4486, "step": 46465 }, { "epoch": 1.3567539160012263, "grad_norm": 0.5112339377851784, "learning_rate": 3.042984590429846e-05, "loss": 0.4578, "step": 46470 }, { "epoch": 1.3568998992715435, "grad_norm": 0.4490252149698442, "learning_rate": 3.042714247093809e-05, "loss": 0.4655, "step": 46475 }, { "epoch": 1.3570458825418608, "grad_norm": 0.5180322651406946, "learning_rate": 3.0424439037577722e-05, "loss": 0.4687, "step": 46480 }, { "epoch": 1.357191865812178, "grad_norm": 0.5110703326867774, "learning_rate": 3.0421735604217356e-05, "loss": 0.463, "step": 46485 }, { "epoch": 1.3573378490824952, "grad_norm": 0.49450593788466424, "learning_rate": 3.0419032170856993e-05, "loss": 0.4673, "step": 46490 }, { "epoch": 1.3574838323528124, "grad_norm": 0.4718457584705203, "learning_rate": 3.0416328737496624e-05, "loss": 0.4311, "step": 46495 }, { "epoch": 1.3576298156231297, "grad_norm": 0.4444522990656292, "learning_rate": 3.0413625304136255e-05, "loss": 0.4223, "step": 46500 }, { "epoch": 1.3577757988934467, "grad_norm": 0.5158298662445037, "learning_rate": 3.041092187077589e-05, "loss": 0.4601, "step": 46505 }, { "epoch": 1.3579217821637641, "grad_norm": 0.44439072359718196, "learning_rate": 3.040821843741552e-05, "loss": 0.4334, "step": 46510 }, { "epoch": 1.3580677654340811, "grad_norm": 0.4912241769442155, "learning_rate": 3.0405515004055153e-05, "loss": 0.4438, "step": 46515 }, { "epoch": 1.3582137487043986, "grad_norm": 0.46378234099687565, "learning_rate": 3.0402811570694784e-05, "loss": 0.4736, "step": 46520 }, { "epoch": 1.3583597319747156, "grad_norm": 0.4931723031782246, "learning_rate": 3.0400108137334414e-05, "loss": 0.4392, "step": 46525 }, { "epoch": 1.358505715245033, "grad_norm": 0.4530234773403577, "learning_rate": 3.039740470397405e-05, "loss": 0.4128, "step": 46530 }, { "epoch": 1.35865169851535, "grad_norm": 0.4844743082508937, "learning_rate": 3.039470127061368e-05, "loss": 0.4351, "step": 46535 }, { "epoch": 1.3587976817856673, "grad_norm": 0.4681682777828249, "learning_rate": 3.039199783725331e-05, "loss": 0.4424, "step": 46540 }, { "epoch": 1.3589436650559845, "grad_norm": 0.5126155091637281, "learning_rate": 3.0389294403892944e-05, "loss": 0.4235, "step": 46545 }, { "epoch": 1.3590896483263017, "grad_norm": 0.47475015568084705, "learning_rate": 3.038659097053258e-05, "loss": 0.4556, "step": 46550 }, { "epoch": 1.359235631596619, "grad_norm": 0.46697133402146146, "learning_rate": 3.0383887537172212e-05, "loss": 0.422, "step": 46555 }, { "epoch": 1.3593816148669362, "grad_norm": 0.5296518498580107, "learning_rate": 3.0381184103811842e-05, "loss": 0.4658, "step": 46560 }, { "epoch": 1.3595275981372534, "grad_norm": 0.4863829923564539, "learning_rate": 3.0378480670451476e-05, "loss": 0.4572, "step": 46565 }, { "epoch": 1.3596735814075707, "grad_norm": 0.49042216469486805, "learning_rate": 3.0375777237091107e-05, "loss": 0.4455, "step": 46570 }, { "epoch": 1.359819564677888, "grad_norm": 0.4666875729880713, "learning_rate": 3.037307380373074e-05, "loss": 0.4307, "step": 46575 }, { "epoch": 1.3599655479482051, "grad_norm": 0.47553439335424474, "learning_rate": 3.037037037037037e-05, "loss": 0.4562, "step": 46580 }, { "epoch": 1.3601115312185224, "grad_norm": 0.4740696008842939, "learning_rate": 3.0367666937010002e-05, "loss": 0.462, "step": 46585 }, { "epoch": 1.3602575144888396, "grad_norm": 0.4354055858594974, "learning_rate": 3.0364963503649636e-05, "loss": 0.4566, "step": 46590 }, { "epoch": 1.3604034977591568, "grad_norm": 0.46678018769835483, "learning_rate": 3.0362260070289267e-05, "loss": 0.4647, "step": 46595 }, { "epoch": 1.360549481029474, "grad_norm": 0.5044601061951773, "learning_rate": 3.0359556636928897e-05, "loss": 0.4444, "step": 46600 }, { "epoch": 1.3606954642997913, "grad_norm": 0.47081494051648193, "learning_rate": 3.0356853203568535e-05, "loss": 0.4452, "step": 46605 }, { "epoch": 1.3608414475701085, "grad_norm": 0.4883416523815902, "learning_rate": 3.035414977020817e-05, "loss": 0.4846, "step": 46610 }, { "epoch": 1.3609874308404257, "grad_norm": 0.4414690266109428, "learning_rate": 3.03514463368478e-05, "loss": 0.4415, "step": 46615 }, { "epoch": 1.361133414110743, "grad_norm": 0.4706953128833771, "learning_rate": 3.034874290348743e-05, "loss": 0.4359, "step": 46620 }, { "epoch": 1.3612793973810602, "grad_norm": 0.4735634416979416, "learning_rate": 3.0346039470127064e-05, "loss": 0.4648, "step": 46625 }, { "epoch": 1.3614253806513774, "grad_norm": 0.526143975977996, "learning_rate": 3.0343336036766695e-05, "loss": 0.4508, "step": 46630 }, { "epoch": 1.3615713639216946, "grad_norm": 0.4735601883609949, "learning_rate": 3.0340632603406325e-05, "loss": 0.4295, "step": 46635 }, { "epoch": 1.3617173471920119, "grad_norm": 0.49838784240169176, "learning_rate": 3.033792917004596e-05, "loss": 0.4552, "step": 46640 }, { "epoch": 1.361863330462329, "grad_norm": 0.47054165136935994, "learning_rate": 3.033522573668559e-05, "loss": 0.4325, "step": 46645 }, { "epoch": 1.362009313732646, "grad_norm": 0.46380500470513325, "learning_rate": 3.0332522303325224e-05, "loss": 0.4442, "step": 46650 }, { "epoch": 1.3621552970029636, "grad_norm": 0.5087848721674394, "learning_rate": 3.0329818869964855e-05, "loss": 0.4519, "step": 46655 }, { "epoch": 1.3623012802732806, "grad_norm": 0.49414956570260615, "learning_rate": 3.0327115436604492e-05, "loss": 0.4759, "step": 46660 }, { "epoch": 1.362447263543598, "grad_norm": 0.4675585163227941, "learning_rate": 3.0324412003244123e-05, "loss": 0.4557, "step": 46665 }, { "epoch": 1.362593246813915, "grad_norm": 0.483384890224842, "learning_rate": 3.0321708569883757e-05, "loss": 0.4379, "step": 46670 }, { "epoch": 1.3627392300842325, "grad_norm": 0.4647783668950345, "learning_rate": 3.0319005136523387e-05, "loss": 0.4641, "step": 46675 }, { "epoch": 1.3628852133545495, "grad_norm": 0.5196354191394575, "learning_rate": 3.0316301703163018e-05, "loss": 0.4532, "step": 46680 }, { "epoch": 1.3630311966248667, "grad_norm": 0.5010117874430263, "learning_rate": 3.0313598269802652e-05, "loss": 0.4502, "step": 46685 }, { "epoch": 1.363177179895184, "grad_norm": 0.49843412996025815, "learning_rate": 3.0310894836442283e-05, "loss": 0.433, "step": 46690 }, { "epoch": 1.3633231631655012, "grad_norm": 0.4887982719839325, "learning_rate": 3.0308191403081913e-05, "loss": 0.4443, "step": 46695 }, { "epoch": 1.3634691464358184, "grad_norm": 0.5019789244690653, "learning_rate": 3.0305487969721547e-05, "loss": 0.4418, "step": 46700 }, { "epoch": 1.3636151297061356, "grad_norm": 0.47047357231600523, "learning_rate": 3.0302784536361178e-05, "loss": 0.4451, "step": 46705 }, { "epoch": 1.3637611129764529, "grad_norm": 0.4777217702960664, "learning_rate": 3.0300081103000812e-05, "loss": 0.46, "step": 46710 }, { "epoch": 1.36390709624677, "grad_norm": 0.4734652627097458, "learning_rate": 3.0297377669640442e-05, "loss": 0.4379, "step": 46715 }, { "epoch": 1.3640530795170873, "grad_norm": 0.4765924198707437, "learning_rate": 3.029467423628008e-05, "loss": 0.4548, "step": 46720 }, { "epoch": 1.3641990627874045, "grad_norm": 0.5269750084691426, "learning_rate": 3.029197080291971e-05, "loss": 0.4401, "step": 46725 }, { "epoch": 1.3643450460577218, "grad_norm": 0.5129242938325049, "learning_rate": 3.0289267369559345e-05, "loss": 0.4458, "step": 46730 }, { "epoch": 1.364491029328039, "grad_norm": 0.4462747650235137, "learning_rate": 3.0286563936198975e-05, "loss": 0.4462, "step": 46735 }, { "epoch": 1.3646370125983562, "grad_norm": 0.48579799461815143, "learning_rate": 3.0283860502838606e-05, "loss": 0.4359, "step": 46740 }, { "epoch": 1.3647829958686735, "grad_norm": 0.48693714476141553, "learning_rate": 3.028115706947824e-05, "loss": 0.4513, "step": 46745 }, { "epoch": 1.3649289791389907, "grad_norm": 0.4929574136867285, "learning_rate": 3.027845363611787e-05, "loss": 0.4744, "step": 46750 }, { "epoch": 1.365074962409308, "grad_norm": 0.46161585744721445, "learning_rate": 3.02757502027575e-05, "loss": 0.4409, "step": 46755 }, { "epoch": 1.3652209456796252, "grad_norm": 0.46296873380173, "learning_rate": 3.0273046769397135e-05, "loss": 0.4549, "step": 46760 }, { "epoch": 1.3653669289499424, "grad_norm": 0.4929624463096633, "learning_rate": 3.0270343336036766e-05, "loss": 0.4214, "step": 46765 }, { "epoch": 1.3655129122202596, "grad_norm": 0.4380807700912574, "learning_rate": 3.0267639902676396e-05, "loss": 0.4201, "step": 46770 }, { "epoch": 1.3656588954905768, "grad_norm": 0.47706607445886406, "learning_rate": 3.0264936469316034e-05, "loss": 0.4426, "step": 46775 }, { "epoch": 1.365804878760894, "grad_norm": 0.4601630510597422, "learning_rate": 3.0262233035955668e-05, "loss": 0.4492, "step": 46780 }, { "epoch": 1.3659508620312113, "grad_norm": 0.449231128711559, "learning_rate": 3.0259529602595298e-05, "loss": 0.4408, "step": 46785 }, { "epoch": 1.3660968453015285, "grad_norm": 0.5019490263260179, "learning_rate": 3.0256826169234932e-05, "loss": 0.425, "step": 46790 }, { "epoch": 1.3662428285718455, "grad_norm": 0.46832298156444535, "learning_rate": 3.0254122735874563e-05, "loss": 0.4264, "step": 46795 }, { "epoch": 1.366388811842163, "grad_norm": 0.46636151267714687, "learning_rate": 3.0251419302514194e-05, "loss": 0.4752, "step": 46800 }, { "epoch": 1.36653479511248, "grad_norm": 0.43946136985110823, "learning_rate": 3.0248715869153828e-05, "loss": 0.4265, "step": 46805 }, { "epoch": 1.3666807783827974, "grad_norm": 0.4805365262983196, "learning_rate": 3.0246012435793458e-05, "loss": 0.4316, "step": 46810 }, { "epoch": 1.3668267616531145, "grad_norm": 0.48047864607241075, "learning_rate": 3.024330900243309e-05, "loss": 0.459, "step": 46815 }, { "epoch": 1.366972744923432, "grad_norm": 0.4893276955301743, "learning_rate": 3.0240605569072723e-05, "loss": 0.4483, "step": 46820 }, { "epoch": 1.367118728193749, "grad_norm": 0.48397892094616524, "learning_rate": 3.0237902135712353e-05, "loss": 0.4588, "step": 46825 }, { "epoch": 1.3672647114640661, "grad_norm": 0.4818130428798242, "learning_rate": 3.023519870235199e-05, "loss": 0.4506, "step": 46830 }, { "epoch": 1.3674106947343834, "grad_norm": 0.5127246649388377, "learning_rate": 3.023249526899162e-05, "loss": 0.4257, "step": 46835 }, { "epoch": 1.3675566780047006, "grad_norm": 0.49122082361554575, "learning_rate": 3.0229791835631255e-05, "loss": 0.4553, "step": 46840 }, { "epoch": 1.3677026612750178, "grad_norm": 0.4814185679977887, "learning_rate": 3.0227088402270886e-05, "loss": 0.4392, "step": 46845 }, { "epoch": 1.367848644545335, "grad_norm": 0.4725231510721584, "learning_rate": 3.022438496891052e-05, "loss": 0.4549, "step": 46850 }, { "epoch": 1.3679946278156523, "grad_norm": 0.5073156891152955, "learning_rate": 3.022168153555015e-05, "loss": 0.4727, "step": 46855 }, { "epoch": 1.3681406110859695, "grad_norm": 0.44403122327010797, "learning_rate": 3.021897810218978e-05, "loss": 0.4386, "step": 46860 }, { "epoch": 1.3682865943562867, "grad_norm": 0.4713246024873389, "learning_rate": 3.0216274668829415e-05, "loss": 0.4513, "step": 46865 }, { "epoch": 1.368432577626604, "grad_norm": 0.4898090944818005, "learning_rate": 3.0213571235469046e-05, "loss": 0.4326, "step": 46870 }, { "epoch": 1.3685785608969212, "grad_norm": 0.5089986633249928, "learning_rate": 3.0210867802108677e-05, "loss": 0.4503, "step": 46875 }, { "epoch": 1.3687245441672384, "grad_norm": 0.5898896123014655, "learning_rate": 3.020816436874831e-05, "loss": 0.4457, "step": 46880 }, { "epoch": 1.3688705274375557, "grad_norm": 0.4913785578907199, "learning_rate": 3.020546093538794e-05, "loss": 0.4324, "step": 46885 }, { "epoch": 1.369016510707873, "grad_norm": 0.47603713616590093, "learning_rate": 3.020275750202758e-05, "loss": 0.4148, "step": 46890 }, { "epoch": 1.3691624939781901, "grad_norm": 0.49148669249556093, "learning_rate": 3.020005406866721e-05, "loss": 0.4545, "step": 46895 }, { "epoch": 1.3693084772485073, "grad_norm": 0.5199077596722822, "learning_rate": 3.0197350635306843e-05, "loss": 0.4413, "step": 46900 }, { "epoch": 1.3694544605188246, "grad_norm": 0.5203550834056697, "learning_rate": 3.0194647201946474e-05, "loss": 0.448, "step": 46905 }, { "epoch": 1.3696004437891418, "grad_norm": 0.49119203356746743, "learning_rate": 3.0191943768586105e-05, "loss": 0.4486, "step": 46910 }, { "epoch": 1.369746427059459, "grad_norm": 0.48102963085624273, "learning_rate": 3.018924033522574e-05, "loss": 0.4413, "step": 46915 }, { "epoch": 1.3698924103297763, "grad_norm": 0.4798110360610632, "learning_rate": 3.018653690186537e-05, "loss": 0.4262, "step": 46920 }, { "epoch": 1.3700383936000935, "grad_norm": 0.48160672689293627, "learning_rate": 3.0183833468505003e-05, "loss": 0.4391, "step": 46925 }, { "epoch": 1.3701843768704107, "grad_norm": 0.4954695505517151, "learning_rate": 3.0181130035144634e-05, "loss": 0.4246, "step": 46930 }, { "epoch": 1.370330360140728, "grad_norm": 0.48458528610574214, "learning_rate": 3.0178426601784264e-05, "loss": 0.4608, "step": 46935 }, { "epoch": 1.370476343411045, "grad_norm": 0.4603153071493101, "learning_rate": 3.01757231684239e-05, "loss": 0.4944, "step": 46940 }, { "epoch": 1.3706223266813624, "grad_norm": 0.49540386270249603, "learning_rate": 3.0173019735063536e-05, "loss": 0.4659, "step": 46945 }, { "epoch": 1.3707683099516794, "grad_norm": 0.49663537790268875, "learning_rate": 3.0170316301703166e-05, "loss": 0.4472, "step": 46950 }, { "epoch": 1.3709142932219969, "grad_norm": 0.4978455992551323, "learning_rate": 3.0167612868342797e-05, "loss": 0.4498, "step": 46955 }, { "epoch": 1.3710602764923139, "grad_norm": 0.4699132411935459, "learning_rate": 3.016490943498243e-05, "loss": 0.4189, "step": 46960 }, { "epoch": 1.3712062597626313, "grad_norm": 0.47475849593936303, "learning_rate": 3.0162206001622062e-05, "loss": 0.4489, "step": 46965 }, { "epoch": 1.3713522430329483, "grad_norm": 0.44528478401266786, "learning_rate": 3.0159502568261692e-05, "loss": 0.4574, "step": 46970 }, { "epoch": 1.3714982263032656, "grad_norm": 0.4553088586284895, "learning_rate": 3.0156799134901326e-05, "loss": 0.4269, "step": 46975 }, { "epoch": 1.3716442095735828, "grad_norm": 0.48056281228613207, "learning_rate": 3.0154095701540957e-05, "loss": 0.4575, "step": 46980 }, { "epoch": 1.3717901928439, "grad_norm": 0.5019411213443672, "learning_rate": 3.015139226818059e-05, "loss": 0.4552, "step": 46985 }, { "epoch": 1.3719361761142173, "grad_norm": 0.5273200272501031, "learning_rate": 3.014868883482022e-05, "loss": 0.4548, "step": 46990 }, { "epoch": 1.3720821593845345, "grad_norm": 0.4436601161838401, "learning_rate": 3.0145985401459852e-05, "loss": 0.4632, "step": 46995 }, { "epoch": 1.3722281426548517, "grad_norm": 0.4809884223136455, "learning_rate": 3.014328196809949e-05, "loss": 0.4489, "step": 47000 }, { "epoch": 1.372374125925169, "grad_norm": 0.4757219998044813, "learning_rate": 3.0140578534739124e-05, "loss": 0.4522, "step": 47005 }, { "epoch": 1.3725201091954862, "grad_norm": 0.4922335217192621, "learning_rate": 3.0137875101378754e-05, "loss": 0.4324, "step": 47010 }, { "epoch": 1.3726660924658034, "grad_norm": 0.5225270448543154, "learning_rate": 3.0135171668018385e-05, "loss": 0.4569, "step": 47015 }, { "epoch": 1.3728120757361206, "grad_norm": 0.5523059291579965, "learning_rate": 3.013246823465802e-05, "loss": 0.4552, "step": 47020 }, { "epoch": 1.3729580590064379, "grad_norm": 0.4753394690436407, "learning_rate": 3.012976480129765e-05, "loss": 0.446, "step": 47025 }, { "epoch": 1.373104042276755, "grad_norm": 0.4551827247557613, "learning_rate": 3.012706136793728e-05, "loss": 0.4482, "step": 47030 }, { "epoch": 1.3732500255470723, "grad_norm": 0.5010770504911845, "learning_rate": 3.0124357934576914e-05, "loss": 0.4449, "step": 47035 }, { "epoch": 1.3733960088173895, "grad_norm": 0.45630009274525973, "learning_rate": 3.0121654501216545e-05, "loss": 0.4576, "step": 47040 }, { "epoch": 1.3735419920877068, "grad_norm": 0.5002949394642096, "learning_rate": 3.0118951067856175e-05, "loss": 0.4613, "step": 47045 }, { "epoch": 1.373687975358024, "grad_norm": 0.46410805839923286, "learning_rate": 3.011624763449581e-05, "loss": 0.4145, "step": 47050 }, { "epoch": 1.3738339586283412, "grad_norm": 0.5130199015551322, "learning_rate": 3.011354420113544e-05, "loss": 0.4502, "step": 47055 }, { "epoch": 1.3739799418986585, "grad_norm": 0.4432738536818739, "learning_rate": 3.0110840767775077e-05, "loss": 0.4407, "step": 47060 }, { "epoch": 1.3741259251689757, "grad_norm": 0.4717443390999813, "learning_rate": 3.010813733441471e-05, "loss": 0.4323, "step": 47065 }, { "epoch": 1.374271908439293, "grad_norm": 0.49841526441446504, "learning_rate": 3.0105433901054342e-05, "loss": 0.4417, "step": 47070 }, { "epoch": 1.3744178917096102, "grad_norm": 0.4751215210949176, "learning_rate": 3.0102730467693973e-05, "loss": 0.4578, "step": 47075 }, { "epoch": 1.3745638749799274, "grad_norm": 0.5223993362401409, "learning_rate": 3.0100027034333607e-05, "loss": 0.4485, "step": 47080 }, { "epoch": 1.3747098582502444, "grad_norm": 0.5076319408689871, "learning_rate": 3.0097323600973237e-05, "loss": 0.4548, "step": 47085 }, { "epoch": 1.3748558415205618, "grad_norm": 0.4702148278141048, "learning_rate": 3.0094620167612868e-05, "loss": 0.4368, "step": 47090 }, { "epoch": 1.3750018247908788, "grad_norm": 0.4907449568308281, "learning_rate": 3.0091916734252502e-05, "loss": 0.4255, "step": 47095 }, { "epoch": 1.3751478080611963, "grad_norm": 0.4857164498978068, "learning_rate": 3.0089213300892133e-05, "loss": 0.4467, "step": 47100 }, { "epoch": 1.3752937913315133, "grad_norm": 0.45826408682291375, "learning_rate": 3.0086509867531763e-05, "loss": 0.4593, "step": 47105 }, { "epoch": 1.3754397746018308, "grad_norm": 0.5219270145540651, "learning_rate": 3.0083806434171397e-05, "loss": 0.4525, "step": 47110 }, { "epoch": 1.3755857578721478, "grad_norm": 0.5003370758412764, "learning_rate": 3.0081103000811035e-05, "loss": 0.4598, "step": 47115 }, { "epoch": 1.375731741142465, "grad_norm": 0.4936554774335869, "learning_rate": 3.0078399567450665e-05, "loss": 0.433, "step": 47120 }, { "epoch": 1.3758777244127822, "grad_norm": 0.47666507928141155, "learning_rate": 3.00756961340903e-05, "loss": 0.4422, "step": 47125 }, { "epoch": 1.3760237076830995, "grad_norm": 0.4940106063802831, "learning_rate": 3.007299270072993e-05, "loss": 0.4288, "step": 47130 }, { "epoch": 1.3761696909534167, "grad_norm": 0.44351045672069217, "learning_rate": 3.007028926736956e-05, "loss": 0.4258, "step": 47135 }, { "epoch": 1.376315674223734, "grad_norm": 0.4577862908074168, "learning_rate": 3.0067585834009195e-05, "loss": 0.4396, "step": 47140 }, { "epoch": 1.3764616574940511, "grad_norm": 0.4622440505060388, "learning_rate": 3.0064882400648825e-05, "loss": 0.4322, "step": 47145 }, { "epoch": 1.3766076407643684, "grad_norm": 0.4693343215023837, "learning_rate": 3.0062178967288456e-05, "loss": 0.4339, "step": 47150 }, { "epoch": 1.3767536240346856, "grad_norm": 0.47998733541593785, "learning_rate": 3.005947553392809e-05, "loss": 0.4373, "step": 47155 }, { "epoch": 1.3768996073050028, "grad_norm": 0.48150865264947207, "learning_rate": 3.005677210056772e-05, "loss": 0.4474, "step": 47160 }, { "epoch": 1.37704559057532, "grad_norm": 0.4863029394482241, "learning_rate": 3.005406866720735e-05, "loss": 0.4799, "step": 47165 }, { "epoch": 1.3771915738456373, "grad_norm": 0.47174631308485027, "learning_rate": 3.005136523384699e-05, "loss": 0.4411, "step": 47170 }, { "epoch": 1.3773375571159545, "grad_norm": 0.5060798050197926, "learning_rate": 3.0048661800486622e-05, "loss": 0.4462, "step": 47175 }, { "epoch": 1.3774835403862717, "grad_norm": 0.48487798066075677, "learning_rate": 3.0045958367126253e-05, "loss": 0.4536, "step": 47180 }, { "epoch": 1.377629523656589, "grad_norm": 0.46975660781178663, "learning_rate": 3.0043254933765884e-05, "loss": 0.4598, "step": 47185 }, { "epoch": 1.3777755069269062, "grad_norm": 0.46601367838589797, "learning_rate": 3.0040551500405518e-05, "loss": 0.4447, "step": 47190 }, { "epoch": 1.3779214901972234, "grad_norm": 0.4932930967206557, "learning_rate": 3.003784806704515e-05, "loss": 0.4574, "step": 47195 }, { "epoch": 1.3780674734675407, "grad_norm": 0.4996150518048542, "learning_rate": 3.0035144633684782e-05, "loss": 0.4426, "step": 47200 }, { "epoch": 1.378213456737858, "grad_norm": 0.5219377489265694, "learning_rate": 3.0032441200324413e-05, "loss": 0.4742, "step": 47205 }, { "epoch": 1.3783594400081751, "grad_norm": 0.4711925251924634, "learning_rate": 3.0029737766964044e-05, "loss": 0.4383, "step": 47210 }, { "epoch": 1.3785054232784923, "grad_norm": 0.4746816320491615, "learning_rate": 3.0027034333603678e-05, "loss": 0.4249, "step": 47215 }, { "epoch": 1.3786514065488096, "grad_norm": 0.45676134940427704, "learning_rate": 3.0024330900243308e-05, "loss": 0.4353, "step": 47220 }, { "epoch": 1.3787973898191268, "grad_norm": 0.4900919987101394, "learning_rate": 3.002162746688294e-05, "loss": 0.4474, "step": 47225 }, { "epoch": 1.3789433730894438, "grad_norm": 0.5025149938522632, "learning_rate": 3.0018924033522576e-05, "loss": 0.4411, "step": 47230 }, { "epoch": 1.3790893563597613, "grad_norm": 0.4708780632857249, "learning_rate": 3.001622060016221e-05, "loss": 0.4487, "step": 47235 }, { "epoch": 1.3792353396300783, "grad_norm": 0.4556523745507136, "learning_rate": 3.001351716680184e-05, "loss": 0.4408, "step": 47240 }, { "epoch": 1.3793813229003957, "grad_norm": 0.5069506772257185, "learning_rate": 3.001081373344147e-05, "loss": 0.4371, "step": 47245 }, { "epoch": 1.3795273061707127, "grad_norm": 0.5348961986056286, "learning_rate": 3.0008110300081106e-05, "loss": 0.4928, "step": 47250 }, { "epoch": 1.3796732894410302, "grad_norm": 0.4269551076909819, "learning_rate": 3.0005406866720736e-05, "loss": 0.4416, "step": 47255 }, { "epoch": 1.3798192727113472, "grad_norm": 0.5401749456650297, "learning_rate": 3.000270343336037e-05, "loss": 0.4743, "step": 47260 }, { "epoch": 1.3799652559816644, "grad_norm": 0.49201417843462836, "learning_rate": 3e-05, "loss": 0.4327, "step": 47265 }, { "epoch": 1.3801112392519816, "grad_norm": 0.4586996530062788, "learning_rate": 2.999729656663963e-05, "loss": 0.4321, "step": 47270 }, { "epoch": 1.3802572225222989, "grad_norm": 0.48641344047473284, "learning_rate": 2.9994593133279265e-05, "loss": 0.4671, "step": 47275 }, { "epoch": 1.380403205792616, "grad_norm": 0.435086604015949, "learning_rate": 2.9991889699918896e-05, "loss": 0.4248, "step": 47280 }, { "epoch": 1.3805491890629333, "grad_norm": 0.4630811866541619, "learning_rate": 2.9989186266558533e-05, "loss": 0.4544, "step": 47285 }, { "epoch": 1.3806951723332506, "grad_norm": 0.5152634156416501, "learning_rate": 2.9986482833198164e-05, "loss": 0.4334, "step": 47290 }, { "epoch": 1.3808411556035678, "grad_norm": 0.45846384864572026, "learning_rate": 2.9983779399837798e-05, "loss": 0.4565, "step": 47295 }, { "epoch": 1.380987138873885, "grad_norm": 0.4956900141366908, "learning_rate": 2.998107596647743e-05, "loss": 0.4471, "step": 47300 }, { "epoch": 1.3811331221442023, "grad_norm": 0.5126817245699656, "learning_rate": 2.997837253311706e-05, "loss": 0.468, "step": 47305 }, { "epoch": 1.3812791054145195, "grad_norm": 0.45257201471167247, "learning_rate": 2.9975669099756693e-05, "loss": 0.4324, "step": 47310 }, { "epoch": 1.3814250886848367, "grad_norm": 0.47180601573487924, "learning_rate": 2.9972965666396324e-05, "loss": 0.4535, "step": 47315 }, { "epoch": 1.381571071955154, "grad_norm": 0.4781169325056213, "learning_rate": 2.9970262233035955e-05, "loss": 0.4298, "step": 47320 }, { "epoch": 1.3817170552254712, "grad_norm": 0.48761619331855655, "learning_rate": 2.996755879967559e-05, "loss": 0.4418, "step": 47325 }, { "epoch": 1.3818630384957884, "grad_norm": 0.45463868005923785, "learning_rate": 2.996485536631522e-05, "loss": 0.4078, "step": 47330 }, { "epoch": 1.3820090217661056, "grad_norm": 0.5439169887989505, "learning_rate": 2.9962151932954853e-05, "loss": 0.4626, "step": 47335 }, { "epoch": 1.3821550050364229, "grad_norm": 0.48496800704651954, "learning_rate": 2.995944849959449e-05, "loss": 0.4463, "step": 47340 }, { "epoch": 1.38230098830674, "grad_norm": 0.4938932979819834, "learning_rate": 2.995674506623412e-05, "loss": 0.4831, "step": 47345 }, { "epoch": 1.3824469715770573, "grad_norm": 0.49039110064668623, "learning_rate": 2.9954041632873752e-05, "loss": 0.4942, "step": 47350 }, { "epoch": 1.3825929548473745, "grad_norm": 0.5009288573517604, "learning_rate": 2.9951338199513386e-05, "loss": 0.4346, "step": 47355 }, { "epoch": 1.3827389381176918, "grad_norm": 0.4388137159543146, "learning_rate": 2.9948634766153016e-05, "loss": 0.4366, "step": 47360 }, { "epoch": 1.382884921388009, "grad_norm": 0.4632601472156821, "learning_rate": 2.9945931332792647e-05, "loss": 0.4541, "step": 47365 }, { "epoch": 1.3830309046583262, "grad_norm": 0.5169608524230203, "learning_rate": 2.994322789943228e-05, "loss": 0.4445, "step": 47370 }, { "epoch": 1.3831768879286432, "grad_norm": 0.512812313693629, "learning_rate": 2.9940524466071912e-05, "loss": 0.4647, "step": 47375 }, { "epoch": 1.3833228711989607, "grad_norm": 0.48255202308533257, "learning_rate": 2.9937821032711542e-05, "loss": 0.4667, "step": 47380 }, { "epoch": 1.3834688544692777, "grad_norm": 0.43788713936736773, "learning_rate": 2.9935117599351176e-05, "loss": 0.4268, "step": 47385 }, { "epoch": 1.3836148377395951, "grad_norm": 0.4696769104098106, "learning_rate": 2.9932414165990807e-05, "loss": 0.4443, "step": 47390 }, { "epoch": 1.3837608210099122, "grad_norm": 0.4587548898343538, "learning_rate": 2.992971073263044e-05, "loss": 0.4378, "step": 47395 }, { "epoch": 1.3839068042802296, "grad_norm": 0.4813113167643012, "learning_rate": 2.992700729927008e-05, "loss": 0.415, "step": 47400 }, { "epoch": 1.3840527875505466, "grad_norm": 0.47637791903018456, "learning_rate": 2.992430386590971e-05, "loss": 0.4587, "step": 47405 }, { "epoch": 1.384198770820864, "grad_norm": 0.49581119005031205, "learning_rate": 2.992160043254934e-05, "loss": 0.45, "step": 47410 }, { "epoch": 1.384344754091181, "grad_norm": 0.4935053752759672, "learning_rate": 2.9918896999188974e-05, "loss": 0.4585, "step": 47415 }, { "epoch": 1.3844907373614983, "grad_norm": 0.4557726402790533, "learning_rate": 2.9916193565828604e-05, "loss": 0.4412, "step": 47420 }, { "epoch": 1.3846367206318155, "grad_norm": 0.509472149962745, "learning_rate": 2.9913490132468235e-05, "loss": 0.4767, "step": 47425 }, { "epoch": 1.3847827039021328, "grad_norm": 0.48661208494239105, "learning_rate": 2.991078669910787e-05, "loss": 0.4512, "step": 47430 }, { "epoch": 1.38492868717245, "grad_norm": 0.46523457372212246, "learning_rate": 2.99080832657475e-05, "loss": 0.4523, "step": 47435 }, { "epoch": 1.3850746704427672, "grad_norm": 0.5015412159330491, "learning_rate": 2.990537983238713e-05, "loss": 0.4493, "step": 47440 }, { "epoch": 1.3852206537130844, "grad_norm": 0.4698917841736275, "learning_rate": 2.9902676399026764e-05, "loss": 0.437, "step": 47445 }, { "epoch": 1.3853666369834017, "grad_norm": 0.48869660770712103, "learning_rate": 2.9899972965666395e-05, "loss": 0.4646, "step": 47450 }, { "epoch": 1.385512620253719, "grad_norm": 0.4819056634310864, "learning_rate": 2.9897269532306032e-05, "loss": 0.4384, "step": 47455 }, { "epoch": 1.3856586035240361, "grad_norm": 0.4801539496030698, "learning_rate": 2.9894566098945663e-05, "loss": 0.4337, "step": 47460 }, { "epoch": 1.3858045867943534, "grad_norm": 0.4308100395871715, "learning_rate": 2.9891862665585297e-05, "loss": 0.4156, "step": 47465 }, { "epoch": 1.3859505700646706, "grad_norm": 0.4782259492751594, "learning_rate": 2.9889159232224927e-05, "loss": 0.4337, "step": 47470 }, { "epoch": 1.3860965533349878, "grad_norm": 0.47769900608439153, "learning_rate": 2.988645579886456e-05, "loss": 0.4503, "step": 47475 }, { "epoch": 1.386242536605305, "grad_norm": 0.4607991183042921, "learning_rate": 2.9883752365504192e-05, "loss": 0.4699, "step": 47480 }, { "epoch": 1.3863885198756223, "grad_norm": 0.46776524438964984, "learning_rate": 2.9881048932143823e-05, "loss": 0.4531, "step": 47485 }, { "epoch": 1.3865345031459395, "grad_norm": 0.4496798563831141, "learning_rate": 2.9878345498783457e-05, "loss": 0.4231, "step": 47490 }, { "epoch": 1.3866804864162567, "grad_norm": 0.45344287599642885, "learning_rate": 2.9875642065423087e-05, "loss": 0.452, "step": 47495 }, { "epoch": 1.386826469686574, "grad_norm": 0.5082456447774266, "learning_rate": 2.9872938632062718e-05, "loss": 0.4487, "step": 47500 }, { "epoch": 1.3869724529568912, "grad_norm": 0.5000844530968068, "learning_rate": 2.9870235198702352e-05, "loss": 0.444, "step": 47505 }, { "epoch": 1.3871184362272084, "grad_norm": 0.46617929832114774, "learning_rate": 2.986753176534199e-05, "loss": 0.4368, "step": 47510 }, { "epoch": 1.3872644194975257, "grad_norm": 0.47479748556144974, "learning_rate": 2.986482833198162e-05, "loss": 0.4391, "step": 47515 }, { "epoch": 1.3874104027678429, "grad_norm": 0.5465853604592651, "learning_rate": 2.986212489862125e-05, "loss": 0.4677, "step": 47520 }, { "epoch": 1.3875563860381601, "grad_norm": 0.44859542042471146, "learning_rate": 2.9859421465260885e-05, "loss": 0.4387, "step": 47525 }, { "epoch": 1.3877023693084771, "grad_norm": 0.45566263007804414, "learning_rate": 2.9856718031900515e-05, "loss": 0.4707, "step": 47530 }, { "epoch": 1.3878483525787946, "grad_norm": 0.5108049089315431, "learning_rate": 2.985401459854015e-05, "loss": 0.449, "step": 47535 }, { "epoch": 1.3879943358491116, "grad_norm": 0.4961129176861813, "learning_rate": 2.985131116517978e-05, "loss": 0.4897, "step": 47540 }, { "epoch": 1.388140319119429, "grad_norm": 0.5146465386809563, "learning_rate": 2.984860773181941e-05, "loss": 0.457, "step": 47545 }, { "epoch": 1.388286302389746, "grad_norm": 0.4654402260091184, "learning_rate": 2.9845904298459045e-05, "loss": 0.439, "step": 47550 }, { "epoch": 1.3884322856600635, "grad_norm": 0.5232227007409792, "learning_rate": 2.9843200865098675e-05, "loss": 0.474, "step": 47555 }, { "epoch": 1.3885782689303805, "grad_norm": 0.5004783398210298, "learning_rate": 2.9840497431738306e-05, "loss": 0.4608, "step": 47560 }, { "epoch": 1.3887242522006977, "grad_norm": 0.5224563364776393, "learning_rate": 2.983779399837794e-05, "loss": 0.4677, "step": 47565 }, { "epoch": 1.388870235471015, "grad_norm": 0.47447741770731483, "learning_rate": 2.9835090565017577e-05, "loss": 0.4455, "step": 47570 }, { "epoch": 1.3890162187413322, "grad_norm": 0.4919772864335579, "learning_rate": 2.9832387131657208e-05, "loss": 0.4404, "step": 47575 }, { "epoch": 1.3891622020116494, "grad_norm": 0.48754152948857715, "learning_rate": 2.982968369829684e-05, "loss": 0.4665, "step": 47580 }, { "epoch": 1.3893081852819666, "grad_norm": 0.48682049152518325, "learning_rate": 2.9826980264936472e-05, "loss": 0.4696, "step": 47585 }, { "epoch": 1.3894541685522839, "grad_norm": 0.49720315228256473, "learning_rate": 2.9824276831576103e-05, "loss": 0.4514, "step": 47590 }, { "epoch": 1.389600151822601, "grad_norm": 0.5003937756219772, "learning_rate": 2.9821573398215734e-05, "loss": 0.4425, "step": 47595 }, { "epoch": 1.3897461350929183, "grad_norm": 0.46276975183840646, "learning_rate": 2.9818869964855368e-05, "loss": 0.432, "step": 47600 }, { "epoch": 1.3898921183632356, "grad_norm": 0.5041840605976458, "learning_rate": 2.9816166531495e-05, "loss": 0.4522, "step": 47605 }, { "epoch": 1.3900381016335528, "grad_norm": 0.4867806165591307, "learning_rate": 2.9813463098134632e-05, "loss": 0.4412, "step": 47610 }, { "epoch": 1.39018408490387, "grad_norm": 0.48061531057884743, "learning_rate": 2.9810759664774263e-05, "loss": 0.4396, "step": 47615 }, { "epoch": 1.3903300681741873, "grad_norm": 0.44474901089796454, "learning_rate": 2.9808056231413894e-05, "loss": 0.4173, "step": 47620 }, { "epoch": 1.3904760514445045, "grad_norm": 0.4531246011886768, "learning_rate": 2.980535279805353e-05, "loss": 0.4321, "step": 47625 }, { "epoch": 1.3906220347148217, "grad_norm": 0.4562578262066704, "learning_rate": 2.9802649364693165e-05, "loss": 0.4535, "step": 47630 }, { "epoch": 1.390768017985139, "grad_norm": 0.42631991324094176, "learning_rate": 2.9799945931332796e-05, "loss": 0.4331, "step": 47635 }, { "epoch": 1.3909140012554562, "grad_norm": 0.5033009867648761, "learning_rate": 2.9797242497972426e-05, "loss": 0.4379, "step": 47640 }, { "epoch": 1.3910599845257734, "grad_norm": 0.4564528009391364, "learning_rate": 2.979453906461206e-05, "loss": 0.4516, "step": 47645 }, { "epoch": 1.3912059677960906, "grad_norm": 0.47629669650681145, "learning_rate": 2.979183563125169e-05, "loss": 0.4543, "step": 47650 }, { "epoch": 1.3913519510664079, "grad_norm": 0.4645837959945979, "learning_rate": 2.978913219789132e-05, "loss": 0.4434, "step": 47655 }, { "epoch": 1.391497934336725, "grad_norm": 0.4791793650786723, "learning_rate": 2.9786428764530956e-05, "loss": 0.4296, "step": 47660 }, { "epoch": 1.3916439176070423, "grad_norm": 0.514720059074033, "learning_rate": 2.9783725331170586e-05, "loss": 0.4534, "step": 47665 }, { "epoch": 1.3917899008773595, "grad_norm": 0.5054163082288181, "learning_rate": 2.9781021897810217e-05, "loss": 0.4563, "step": 47670 }, { "epoch": 1.3919358841476765, "grad_norm": 0.5300773603849389, "learning_rate": 2.977831846444985e-05, "loss": 0.4579, "step": 47675 }, { "epoch": 1.392081867417994, "grad_norm": 0.5290098534870349, "learning_rate": 2.9775615031089488e-05, "loss": 0.4812, "step": 47680 }, { "epoch": 1.392227850688311, "grad_norm": 0.502763499673683, "learning_rate": 2.977291159772912e-05, "loss": 0.465, "step": 47685 }, { "epoch": 1.3923738339586285, "grad_norm": 0.47540575453474876, "learning_rate": 2.9770208164368753e-05, "loss": 0.4495, "step": 47690 }, { "epoch": 1.3925198172289455, "grad_norm": 0.5236340005452633, "learning_rate": 2.9767504731008383e-05, "loss": 0.4724, "step": 47695 }, { "epoch": 1.392665800499263, "grad_norm": 0.44656370724677197, "learning_rate": 2.9764801297648014e-05, "loss": 0.4409, "step": 47700 }, { "epoch": 1.39281178376958, "grad_norm": 0.4613966553987285, "learning_rate": 2.9762097864287648e-05, "loss": 0.4398, "step": 47705 }, { "epoch": 1.3929577670398972, "grad_norm": 0.4634941711977414, "learning_rate": 2.975939443092728e-05, "loss": 0.4537, "step": 47710 }, { "epoch": 1.3931037503102144, "grad_norm": 0.4868185880596912, "learning_rate": 2.975669099756691e-05, "loss": 0.4487, "step": 47715 }, { "epoch": 1.3932497335805316, "grad_norm": 0.46111879876969203, "learning_rate": 2.9753987564206543e-05, "loss": 0.4373, "step": 47720 }, { "epoch": 1.3933957168508488, "grad_norm": 0.49462817286100624, "learning_rate": 2.9751284130846174e-05, "loss": 0.4584, "step": 47725 }, { "epoch": 1.393541700121166, "grad_norm": 0.5306039794005204, "learning_rate": 2.9748580697485805e-05, "loss": 0.4389, "step": 47730 }, { "epoch": 1.3936876833914833, "grad_norm": 0.4981428777130868, "learning_rate": 2.974587726412544e-05, "loss": 0.4567, "step": 47735 }, { "epoch": 1.3938336666618005, "grad_norm": 0.5024936752637973, "learning_rate": 2.9743173830765076e-05, "loss": 0.4461, "step": 47740 }, { "epoch": 1.3939796499321178, "grad_norm": 0.4949924434228375, "learning_rate": 2.9740470397404707e-05, "loss": 0.4524, "step": 47745 }, { "epoch": 1.394125633202435, "grad_norm": 0.5370258712924818, "learning_rate": 2.973776696404434e-05, "loss": 0.4718, "step": 47750 }, { "epoch": 1.3942716164727522, "grad_norm": 0.467008016351928, "learning_rate": 2.973506353068397e-05, "loss": 0.4451, "step": 47755 }, { "epoch": 1.3944175997430694, "grad_norm": 0.48318989346931357, "learning_rate": 2.9732360097323602e-05, "loss": 0.4377, "step": 47760 }, { "epoch": 1.3945635830133867, "grad_norm": 0.49223840214361253, "learning_rate": 2.9729656663963236e-05, "loss": 0.4513, "step": 47765 }, { "epoch": 1.394709566283704, "grad_norm": 0.5130981290017224, "learning_rate": 2.9726953230602866e-05, "loss": 0.4627, "step": 47770 }, { "epoch": 1.3948555495540211, "grad_norm": 0.5334025289459339, "learning_rate": 2.9724249797242497e-05, "loss": 0.4817, "step": 47775 }, { "epoch": 1.3950015328243384, "grad_norm": 0.4801950017672745, "learning_rate": 2.972154636388213e-05, "loss": 0.4559, "step": 47780 }, { "epoch": 1.3951475160946556, "grad_norm": 0.46656455221259646, "learning_rate": 2.9718842930521762e-05, "loss": 0.449, "step": 47785 }, { "epoch": 1.3952934993649728, "grad_norm": 0.4404937012094042, "learning_rate": 2.9716139497161392e-05, "loss": 0.4557, "step": 47790 }, { "epoch": 1.39543948263529, "grad_norm": 0.502789607303006, "learning_rate": 2.971343606380103e-05, "loss": 0.4368, "step": 47795 }, { "epoch": 1.3955854659056073, "grad_norm": 0.5017019064975299, "learning_rate": 2.9710732630440664e-05, "loss": 0.4654, "step": 47800 }, { "epoch": 1.3957314491759245, "grad_norm": 0.4547449244781721, "learning_rate": 2.9708029197080294e-05, "loss": 0.4314, "step": 47805 }, { "epoch": 1.3958774324462417, "grad_norm": 0.4507348231506363, "learning_rate": 2.9705325763719925e-05, "loss": 0.4345, "step": 47810 }, { "epoch": 1.396023415716559, "grad_norm": 0.5180883070592625, "learning_rate": 2.970262233035956e-05, "loss": 0.48, "step": 47815 }, { "epoch": 1.396169398986876, "grad_norm": 0.455852986633411, "learning_rate": 2.969991889699919e-05, "loss": 0.4545, "step": 47820 }, { "epoch": 1.3963153822571934, "grad_norm": 0.5108031159459059, "learning_rate": 2.9697215463638824e-05, "loss": 0.4757, "step": 47825 }, { "epoch": 1.3964613655275104, "grad_norm": 0.49611092679920255, "learning_rate": 2.9694512030278454e-05, "loss": 0.4365, "step": 47830 }, { "epoch": 1.3966073487978279, "grad_norm": 0.5080104564347302, "learning_rate": 2.9691808596918085e-05, "loss": 0.4488, "step": 47835 }, { "epoch": 1.396753332068145, "grad_norm": 0.5155372669689854, "learning_rate": 2.968910516355772e-05, "loss": 0.4563, "step": 47840 }, { "epoch": 1.3968993153384623, "grad_norm": 0.4744299550243923, "learning_rate": 2.968640173019735e-05, "loss": 0.4288, "step": 47845 }, { "epoch": 1.3970452986087794, "grad_norm": 0.4930710638196359, "learning_rate": 2.9683698296836987e-05, "loss": 0.4566, "step": 47850 }, { "epoch": 1.3971912818790966, "grad_norm": 0.5092153470335057, "learning_rate": 2.9680994863476618e-05, "loss": 0.4631, "step": 47855 }, { "epoch": 1.3973372651494138, "grad_norm": 0.4720518544375778, "learning_rate": 2.967829143011625e-05, "loss": 0.4304, "step": 47860 }, { "epoch": 1.397483248419731, "grad_norm": 0.4906431091976575, "learning_rate": 2.9675587996755882e-05, "loss": 0.4478, "step": 47865 }, { "epoch": 1.3976292316900483, "grad_norm": 0.5120199016423268, "learning_rate": 2.9672884563395513e-05, "loss": 0.4636, "step": 47870 }, { "epoch": 1.3977752149603655, "grad_norm": 0.5031688907051673, "learning_rate": 2.9670181130035147e-05, "loss": 0.4339, "step": 47875 }, { "epoch": 1.3979211982306827, "grad_norm": 0.4891454330301172, "learning_rate": 2.9667477696674777e-05, "loss": 0.4481, "step": 47880 }, { "epoch": 1.398067181501, "grad_norm": 0.47955799984275205, "learning_rate": 2.966477426331441e-05, "loss": 0.4352, "step": 47885 }, { "epoch": 1.3982131647713172, "grad_norm": 0.513885166524143, "learning_rate": 2.9662070829954042e-05, "loss": 0.455, "step": 47890 }, { "epoch": 1.3983591480416344, "grad_norm": 0.5020865047698272, "learning_rate": 2.9659367396593673e-05, "loss": 0.4571, "step": 47895 }, { "epoch": 1.3985051313119516, "grad_norm": 0.47373116298592155, "learning_rate": 2.9656663963233307e-05, "loss": 0.4281, "step": 47900 }, { "epoch": 1.3986511145822689, "grad_norm": 0.4572531164385971, "learning_rate": 2.9653960529872944e-05, "loss": 0.4653, "step": 47905 }, { "epoch": 1.398797097852586, "grad_norm": 0.4890631568030843, "learning_rate": 2.9651257096512575e-05, "loss": 0.4281, "step": 47910 }, { "epoch": 1.3989430811229033, "grad_norm": 0.49690852649982603, "learning_rate": 2.9648553663152205e-05, "loss": 0.4497, "step": 47915 }, { "epoch": 1.3990890643932206, "grad_norm": 0.5000143191715921, "learning_rate": 2.964585022979184e-05, "loss": 0.4514, "step": 47920 }, { "epoch": 1.3992350476635378, "grad_norm": 0.4880716536896593, "learning_rate": 2.964314679643147e-05, "loss": 0.4432, "step": 47925 }, { "epoch": 1.399381030933855, "grad_norm": 0.4924464141650717, "learning_rate": 2.96404433630711e-05, "loss": 0.4259, "step": 47930 }, { "epoch": 1.3995270142041722, "grad_norm": 0.5202156256307048, "learning_rate": 2.9637739929710735e-05, "loss": 0.4365, "step": 47935 }, { "epoch": 1.3996729974744895, "grad_norm": 0.4948842564643437, "learning_rate": 2.9635036496350365e-05, "loss": 0.4641, "step": 47940 }, { "epoch": 1.3998189807448067, "grad_norm": 0.5103751363271272, "learning_rate": 2.9632333062989996e-05, "loss": 0.4259, "step": 47945 }, { "epoch": 1.399964964015124, "grad_norm": 0.4834376321479281, "learning_rate": 2.962962962962963e-05, "loss": 0.4468, "step": 47950 }, { "epoch": 1.4001109472854412, "grad_norm": 0.469025554644861, "learning_rate": 2.962692619626926e-05, "loss": 0.4657, "step": 47955 }, { "epoch": 1.4002569305557584, "grad_norm": 0.4827338045530426, "learning_rate": 2.9624222762908895e-05, "loss": 0.4783, "step": 47960 }, { "epoch": 1.4004029138260754, "grad_norm": 0.4570107727056536, "learning_rate": 2.9621519329548532e-05, "loss": 0.4484, "step": 47965 }, { "epoch": 1.4005488970963929, "grad_norm": 0.4620758417712642, "learning_rate": 2.9618815896188163e-05, "loss": 0.4498, "step": 47970 }, { "epoch": 1.4006948803667099, "grad_norm": 0.46633022533394203, "learning_rate": 2.9616112462827793e-05, "loss": 0.4184, "step": 47975 }, { "epoch": 1.4008408636370273, "grad_norm": 0.47115544662714026, "learning_rate": 2.9613409029467427e-05, "loss": 0.4553, "step": 47980 }, { "epoch": 1.4009868469073443, "grad_norm": 0.5199050555484331, "learning_rate": 2.9610705596107058e-05, "loss": 0.4585, "step": 47985 }, { "epoch": 1.4011328301776618, "grad_norm": 0.467241319425953, "learning_rate": 2.960800216274669e-05, "loss": 0.4568, "step": 47990 }, { "epoch": 1.4012788134479788, "grad_norm": 0.521795896827875, "learning_rate": 2.9605298729386322e-05, "loss": 0.4705, "step": 47995 }, { "epoch": 1.401424796718296, "grad_norm": 0.5147140898220586, "learning_rate": 2.9602595296025953e-05, "loss": 0.4453, "step": 48000 }, { "epoch": 1.4015707799886132, "grad_norm": 0.44077247331465147, "learning_rate": 2.9599891862665584e-05, "loss": 0.4354, "step": 48005 }, { "epoch": 1.4017167632589305, "grad_norm": 0.46701270999829125, "learning_rate": 2.9597188429305218e-05, "loss": 0.4503, "step": 48010 }, { "epoch": 1.4018627465292477, "grad_norm": 0.4611002894112688, "learning_rate": 2.959448499594485e-05, "loss": 0.451, "step": 48015 }, { "epoch": 1.402008729799565, "grad_norm": 0.4619777436562035, "learning_rate": 2.9591781562584486e-05, "loss": 0.4358, "step": 48020 }, { "epoch": 1.4021547130698822, "grad_norm": 0.5041485399235327, "learning_rate": 2.958907812922412e-05, "loss": 0.4117, "step": 48025 }, { "epoch": 1.4023006963401994, "grad_norm": 0.4829671384666108, "learning_rate": 2.958637469586375e-05, "loss": 0.4582, "step": 48030 }, { "epoch": 1.4024466796105166, "grad_norm": 0.5049477904868259, "learning_rate": 2.958367126250338e-05, "loss": 0.4545, "step": 48035 }, { "epoch": 1.4025926628808338, "grad_norm": 0.46874312462860845, "learning_rate": 2.9580967829143015e-05, "loss": 0.4352, "step": 48040 }, { "epoch": 1.402738646151151, "grad_norm": 0.49914567515165864, "learning_rate": 2.9578264395782646e-05, "loss": 0.4609, "step": 48045 }, { "epoch": 1.4028846294214683, "grad_norm": 0.4612043038486216, "learning_rate": 2.9575560962422276e-05, "loss": 0.4278, "step": 48050 }, { "epoch": 1.4030306126917855, "grad_norm": 0.4926378551669985, "learning_rate": 2.957285752906191e-05, "loss": 0.4576, "step": 48055 }, { "epoch": 1.4031765959621028, "grad_norm": 0.4702887826950298, "learning_rate": 2.957015409570154e-05, "loss": 0.4271, "step": 48060 }, { "epoch": 1.40332257923242, "grad_norm": 0.45664925221312774, "learning_rate": 2.956745066234117e-05, "loss": 0.4559, "step": 48065 }, { "epoch": 1.4034685625027372, "grad_norm": 0.4979993915810676, "learning_rate": 2.9564747228980806e-05, "loss": 0.4623, "step": 48070 }, { "epoch": 1.4036145457730544, "grad_norm": 0.5039733052337133, "learning_rate": 2.9562043795620443e-05, "loss": 0.4614, "step": 48075 }, { "epoch": 1.4037605290433717, "grad_norm": 0.45383508441996995, "learning_rate": 2.9559340362260074e-05, "loss": 0.4309, "step": 48080 }, { "epoch": 1.403906512313689, "grad_norm": 0.477589711118883, "learning_rate": 2.9556636928899704e-05, "loss": 0.4515, "step": 48085 }, { "epoch": 1.4040524955840061, "grad_norm": 0.4643472037736894, "learning_rate": 2.9553933495539338e-05, "loss": 0.4578, "step": 48090 }, { "epoch": 1.4041984788543234, "grad_norm": 0.4664896206320066, "learning_rate": 2.955123006217897e-05, "loss": 0.4425, "step": 48095 }, { "epoch": 1.4043444621246406, "grad_norm": 0.4384204204277079, "learning_rate": 2.9548526628818603e-05, "loss": 0.4473, "step": 48100 }, { "epoch": 1.4044904453949578, "grad_norm": 0.5239863113396128, "learning_rate": 2.9545823195458233e-05, "loss": 0.441, "step": 48105 }, { "epoch": 1.4046364286652748, "grad_norm": 0.5123981693063284, "learning_rate": 2.9543119762097864e-05, "loss": 0.445, "step": 48110 }, { "epoch": 1.4047824119355923, "grad_norm": 0.4807601395000768, "learning_rate": 2.9540416328737498e-05, "loss": 0.4462, "step": 48115 }, { "epoch": 1.4049283952059093, "grad_norm": 0.4731873789991809, "learning_rate": 2.953771289537713e-05, "loss": 0.4756, "step": 48120 }, { "epoch": 1.4050743784762267, "grad_norm": 0.4277139032419876, "learning_rate": 2.953500946201676e-05, "loss": 0.4447, "step": 48125 }, { "epoch": 1.4052203617465437, "grad_norm": 0.46383338103940225, "learning_rate": 2.9532306028656393e-05, "loss": 0.4594, "step": 48130 }, { "epoch": 1.4053663450168612, "grad_norm": 0.46336523226156395, "learning_rate": 2.952960259529603e-05, "loss": 0.4413, "step": 48135 }, { "epoch": 1.4055123282871782, "grad_norm": 0.45062234608268453, "learning_rate": 2.952689916193566e-05, "loss": 0.3959, "step": 48140 }, { "epoch": 1.4056583115574954, "grad_norm": 0.53328769208047, "learning_rate": 2.9524195728575292e-05, "loss": 0.4851, "step": 48145 }, { "epoch": 1.4058042948278127, "grad_norm": 0.4584105745176164, "learning_rate": 2.9521492295214926e-05, "loss": 0.4452, "step": 48150 }, { "epoch": 1.40595027809813, "grad_norm": 0.44018102979135065, "learning_rate": 2.9518788861854557e-05, "loss": 0.428, "step": 48155 }, { "epoch": 1.4060962613684471, "grad_norm": 0.5055760012232617, "learning_rate": 2.951608542849419e-05, "loss": 0.4459, "step": 48160 }, { "epoch": 1.4062422446387643, "grad_norm": 0.46026638632036176, "learning_rate": 2.951338199513382e-05, "loss": 0.4205, "step": 48165 }, { "epoch": 1.4063882279090816, "grad_norm": 0.44540584960267493, "learning_rate": 2.9510678561773452e-05, "loss": 0.4347, "step": 48170 }, { "epoch": 1.4065342111793988, "grad_norm": 0.5201460544982812, "learning_rate": 2.9507975128413086e-05, "loss": 0.428, "step": 48175 }, { "epoch": 1.406680194449716, "grad_norm": 0.45237049462791645, "learning_rate": 2.9505271695052717e-05, "loss": 0.4361, "step": 48180 }, { "epoch": 1.4068261777200333, "grad_norm": 0.4745576615894896, "learning_rate": 2.9502568261692347e-05, "loss": 0.436, "step": 48185 }, { "epoch": 1.4069721609903505, "grad_norm": 0.5160343980258014, "learning_rate": 2.9499864828331985e-05, "loss": 0.4425, "step": 48190 }, { "epoch": 1.4071181442606677, "grad_norm": 0.5017713217435567, "learning_rate": 2.949716139497162e-05, "loss": 0.4527, "step": 48195 }, { "epoch": 1.407264127530985, "grad_norm": 0.4976925015709088, "learning_rate": 2.949445796161125e-05, "loss": 0.4716, "step": 48200 }, { "epoch": 1.4074101108013022, "grad_norm": 0.5149228849813028, "learning_rate": 2.949175452825088e-05, "loss": 0.4359, "step": 48205 }, { "epoch": 1.4075560940716194, "grad_norm": 0.4627529369390347, "learning_rate": 2.9489051094890514e-05, "loss": 0.4481, "step": 48210 }, { "epoch": 1.4077020773419366, "grad_norm": 0.4701188495376209, "learning_rate": 2.9486347661530144e-05, "loss": 0.4518, "step": 48215 }, { "epoch": 1.4078480606122539, "grad_norm": 0.45466122532348807, "learning_rate": 2.9483644228169775e-05, "loss": 0.4526, "step": 48220 }, { "epoch": 1.407994043882571, "grad_norm": 0.476525634796285, "learning_rate": 2.948094079480941e-05, "loss": 0.4635, "step": 48225 }, { "epoch": 1.4081400271528883, "grad_norm": 0.4933842489448802, "learning_rate": 2.947823736144904e-05, "loss": 0.4371, "step": 48230 }, { "epoch": 1.4082860104232056, "grad_norm": 0.4741957071641103, "learning_rate": 2.9475533928088674e-05, "loss": 0.4574, "step": 48235 }, { "epoch": 1.4084319936935228, "grad_norm": 0.49818000182249594, "learning_rate": 2.9472830494728304e-05, "loss": 0.4597, "step": 48240 }, { "epoch": 1.40857797696384, "grad_norm": 0.4441677585172342, "learning_rate": 2.9470127061367942e-05, "loss": 0.4274, "step": 48245 }, { "epoch": 1.4087239602341572, "grad_norm": 0.47806189111710035, "learning_rate": 2.9467423628007572e-05, "loss": 0.439, "step": 48250 }, { "epoch": 1.4088699435044743, "grad_norm": 0.4246222678436212, "learning_rate": 2.9464720194647206e-05, "loss": 0.4441, "step": 48255 }, { "epoch": 1.4090159267747917, "grad_norm": 0.43474634041911797, "learning_rate": 2.9462016761286837e-05, "loss": 0.4338, "step": 48260 }, { "epoch": 1.4091619100451087, "grad_norm": 0.4943902008447685, "learning_rate": 2.9459313327926468e-05, "loss": 0.4282, "step": 48265 }, { "epoch": 1.4093078933154262, "grad_norm": 0.47429384641488564, "learning_rate": 2.94566098945661e-05, "loss": 0.4526, "step": 48270 }, { "epoch": 1.4094538765857432, "grad_norm": 0.5072737152512384, "learning_rate": 2.9453906461205732e-05, "loss": 0.4764, "step": 48275 }, { "epoch": 1.4095998598560606, "grad_norm": 0.49153800470603004, "learning_rate": 2.9451203027845363e-05, "loss": 0.4283, "step": 48280 }, { "epoch": 1.4097458431263776, "grad_norm": 0.49279367353461156, "learning_rate": 2.9448499594484997e-05, "loss": 0.4497, "step": 48285 }, { "epoch": 1.4098918263966949, "grad_norm": 0.48776660524022125, "learning_rate": 2.9445796161124627e-05, "loss": 0.4465, "step": 48290 }, { "epoch": 1.410037809667012, "grad_norm": 0.4716641022016771, "learning_rate": 2.944309272776426e-05, "loss": 0.4495, "step": 48295 }, { "epoch": 1.4101837929373293, "grad_norm": 0.4501684091188804, "learning_rate": 2.9440389294403892e-05, "loss": 0.4512, "step": 48300 }, { "epoch": 1.4103297762076465, "grad_norm": 0.506857450549181, "learning_rate": 2.943768586104353e-05, "loss": 0.4812, "step": 48305 }, { "epoch": 1.4104757594779638, "grad_norm": 0.4696615279706331, "learning_rate": 2.943498242768316e-05, "loss": 0.4306, "step": 48310 }, { "epoch": 1.410621742748281, "grad_norm": 0.4921850324647551, "learning_rate": 2.9432278994322794e-05, "loss": 0.4505, "step": 48315 }, { "epoch": 1.4107677260185982, "grad_norm": 0.47097221045965726, "learning_rate": 2.9429575560962425e-05, "loss": 0.4347, "step": 48320 }, { "epoch": 1.4109137092889155, "grad_norm": 0.4751826851890008, "learning_rate": 2.9426872127602055e-05, "loss": 0.4402, "step": 48325 }, { "epoch": 1.4110596925592327, "grad_norm": 0.4867283660671921, "learning_rate": 2.942416869424169e-05, "loss": 0.4337, "step": 48330 }, { "epoch": 1.41120567582955, "grad_norm": 0.4975324157374477, "learning_rate": 2.942146526088132e-05, "loss": 0.4498, "step": 48335 }, { "epoch": 1.4113516590998672, "grad_norm": 0.4491537900993785, "learning_rate": 2.941876182752095e-05, "loss": 0.4101, "step": 48340 }, { "epoch": 1.4114976423701844, "grad_norm": 0.46998138119438276, "learning_rate": 2.9416058394160585e-05, "loss": 0.4496, "step": 48345 }, { "epoch": 1.4116436256405016, "grad_norm": 0.4837177522832341, "learning_rate": 2.9413354960800215e-05, "loss": 0.4376, "step": 48350 }, { "epoch": 1.4117896089108188, "grad_norm": 0.49352724972352213, "learning_rate": 2.9410651527439846e-05, "loss": 0.4607, "step": 48355 }, { "epoch": 1.411935592181136, "grad_norm": 0.4681051003454698, "learning_rate": 2.9407948094079483e-05, "loss": 0.4412, "step": 48360 }, { "epoch": 1.4120815754514533, "grad_norm": 0.5487114474793585, "learning_rate": 2.9405244660719117e-05, "loss": 0.4463, "step": 48365 }, { "epoch": 1.4122275587217705, "grad_norm": 0.489585934978896, "learning_rate": 2.9402541227358748e-05, "loss": 0.484, "step": 48370 }, { "epoch": 1.4123735419920878, "grad_norm": 0.4709825928054077, "learning_rate": 2.9399837793998382e-05, "loss": 0.4443, "step": 48375 }, { "epoch": 1.412519525262405, "grad_norm": 0.49453441236685697, "learning_rate": 2.9397134360638013e-05, "loss": 0.4263, "step": 48380 }, { "epoch": 1.4126655085327222, "grad_norm": 0.4593620639698839, "learning_rate": 2.9394430927277643e-05, "loss": 0.46, "step": 48385 }, { "epoch": 1.4128114918030394, "grad_norm": 0.5174906565035124, "learning_rate": 2.9391727493917277e-05, "loss": 0.4624, "step": 48390 }, { "epoch": 1.4129574750733567, "grad_norm": 0.45047394469277136, "learning_rate": 2.9389024060556908e-05, "loss": 0.4446, "step": 48395 }, { "epoch": 1.4131034583436737, "grad_norm": 0.4717194544907058, "learning_rate": 2.938632062719654e-05, "loss": 0.4127, "step": 48400 }, { "epoch": 1.4132494416139911, "grad_norm": 0.47239665547224735, "learning_rate": 2.9383617193836172e-05, "loss": 0.4161, "step": 48405 }, { "epoch": 1.4133954248843081, "grad_norm": 0.4805185595807189, "learning_rate": 2.9380913760475803e-05, "loss": 0.4429, "step": 48410 }, { "epoch": 1.4135414081546256, "grad_norm": 0.47678067507852606, "learning_rate": 2.937821032711544e-05, "loss": 0.4314, "step": 48415 }, { "epoch": 1.4136873914249426, "grad_norm": 0.5883095320092516, "learning_rate": 2.937550689375507e-05, "loss": 0.4232, "step": 48420 }, { "epoch": 1.41383337469526, "grad_norm": 0.48014368018855097, "learning_rate": 2.9372803460394705e-05, "loss": 0.4586, "step": 48425 }, { "epoch": 1.413979357965577, "grad_norm": 0.48065196168991847, "learning_rate": 2.9370100027034336e-05, "loss": 0.4684, "step": 48430 }, { "epoch": 1.4141253412358943, "grad_norm": 0.4706957746334619, "learning_rate": 2.936739659367397e-05, "loss": 0.4608, "step": 48435 }, { "epoch": 1.4142713245062115, "grad_norm": 0.48635100391737246, "learning_rate": 2.93646931603136e-05, "loss": 0.4576, "step": 48440 }, { "epoch": 1.4144173077765287, "grad_norm": 0.45160069258378854, "learning_rate": 2.936198972695323e-05, "loss": 0.4849, "step": 48445 }, { "epoch": 1.414563291046846, "grad_norm": 0.43226773624603604, "learning_rate": 2.9359286293592865e-05, "loss": 0.4161, "step": 48450 }, { "epoch": 1.4147092743171632, "grad_norm": 0.48128358440965563, "learning_rate": 2.9356582860232496e-05, "loss": 0.419, "step": 48455 }, { "epoch": 1.4148552575874804, "grad_norm": 0.48964503665929554, "learning_rate": 2.9353879426872126e-05, "loss": 0.4432, "step": 48460 }, { "epoch": 1.4150012408577977, "grad_norm": 0.5104771333576782, "learning_rate": 2.935117599351176e-05, "loss": 0.4478, "step": 48465 }, { "epoch": 1.4151472241281149, "grad_norm": 0.4604675147030741, "learning_rate": 2.934847256015139e-05, "loss": 0.4489, "step": 48470 }, { "epoch": 1.4152932073984321, "grad_norm": 0.5080203721017384, "learning_rate": 2.934576912679103e-05, "loss": 0.442, "step": 48475 }, { "epoch": 1.4154391906687493, "grad_norm": 0.440413187858064, "learning_rate": 2.934306569343066e-05, "loss": 0.4233, "step": 48480 }, { "epoch": 1.4155851739390666, "grad_norm": 0.45743632108155974, "learning_rate": 2.9340362260070293e-05, "loss": 0.4443, "step": 48485 }, { "epoch": 1.4157311572093838, "grad_norm": 0.47003521836642503, "learning_rate": 2.9337658826709924e-05, "loss": 0.4553, "step": 48490 }, { "epoch": 1.415877140479701, "grad_norm": 0.5139851328305408, "learning_rate": 2.9334955393349554e-05, "loss": 0.4197, "step": 48495 }, { "epoch": 1.4160231237500183, "grad_norm": 0.49300993714783214, "learning_rate": 2.9332251959989188e-05, "loss": 0.4509, "step": 48500 }, { "epoch": 1.4161691070203355, "grad_norm": 0.4676144591706108, "learning_rate": 2.932954852662882e-05, "loss": 0.4441, "step": 48505 }, { "epoch": 1.4163150902906527, "grad_norm": 0.49625750072161723, "learning_rate": 2.9326845093268453e-05, "loss": 0.4268, "step": 48510 }, { "epoch": 1.41646107356097, "grad_norm": 0.4750680773972779, "learning_rate": 2.9324141659908083e-05, "loss": 0.4432, "step": 48515 }, { "epoch": 1.4166070568312872, "grad_norm": 0.4721709217260838, "learning_rate": 2.9321438226547714e-05, "loss": 0.4558, "step": 48520 }, { "epoch": 1.4167530401016044, "grad_norm": 0.4848356405885455, "learning_rate": 2.9318734793187348e-05, "loss": 0.4184, "step": 48525 }, { "epoch": 1.4168990233719216, "grad_norm": 0.4591107371867237, "learning_rate": 2.9316031359826985e-05, "loss": 0.4331, "step": 48530 }, { "epoch": 1.4170450066422389, "grad_norm": 0.41763727234154085, "learning_rate": 2.9313327926466616e-05, "loss": 0.4288, "step": 48535 }, { "epoch": 1.417190989912556, "grad_norm": 0.4835223128982454, "learning_rate": 2.9310624493106247e-05, "loss": 0.4646, "step": 48540 }, { "epoch": 1.417336973182873, "grad_norm": 0.4710881134982788, "learning_rate": 2.930792105974588e-05, "loss": 0.4371, "step": 48545 }, { "epoch": 1.4174829564531906, "grad_norm": 0.44645967217193244, "learning_rate": 2.930521762638551e-05, "loss": 0.4475, "step": 48550 }, { "epoch": 1.4176289397235076, "grad_norm": 0.44930187451982706, "learning_rate": 2.9302514193025142e-05, "loss": 0.4187, "step": 48555 }, { "epoch": 1.417774922993825, "grad_norm": 0.4772978487520221, "learning_rate": 2.9299810759664776e-05, "loss": 0.4425, "step": 48560 }, { "epoch": 1.417920906264142, "grad_norm": 0.500293619837336, "learning_rate": 2.9297107326304407e-05, "loss": 0.4445, "step": 48565 }, { "epoch": 1.4180668895344595, "grad_norm": 0.44735756196442733, "learning_rate": 2.929440389294404e-05, "loss": 0.453, "step": 48570 }, { "epoch": 1.4182128728047765, "grad_norm": 0.4421161319857975, "learning_rate": 2.929170045958367e-05, "loss": 0.4572, "step": 48575 }, { "epoch": 1.418358856075094, "grad_norm": 0.47172176107113245, "learning_rate": 2.9288997026223302e-05, "loss": 0.4493, "step": 48580 }, { "epoch": 1.418504839345411, "grad_norm": 0.4278801894012857, "learning_rate": 2.928629359286294e-05, "loss": 0.4415, "step": 48585 }, { "epoch": 1.4186508226157282, "grad_norm": 0.48163528521419025, "learning_rate": 2.9283590159502573e-05, "loss": 0.446, "step": 48590 }, { "epoch": 1.4187968058860454, "grad_norm": 0.5130727907633725, "learning_rate": 2.9280886726142204e-05, "loss": 0.4614, "step": 48595 }, { "epoch": 1.4189427891563626, "grad_norm": 0.5330349096079627, "learning_rate": 2.9278183292781835e-05, "loss": 0.4432, "step": 48600 }, { "epoch": 1.4190887724266799, "grad_norm": 0.44712958706163763, "learning_rate": 2.927547985942147e-05, "loss": 0.4336, "step": 48605 }, { "epoch": 1.419234755696997, "grad_norm": 0.4615419232545603, "learning_rate": 2.92727764260611e-05, "loss": 0.4449, "step": 48610 }, { "epoch": 1.4193807389673143, "grad_norm": 0.4902925553063305, "learning_rate": 2.927007299270073e-05, "loss": 0.4468, "step": 48615 }, { "epoch": 1.4195267222376315, "grad_norm": 0.4821594856293841, "learning_rate": 2.9267369559340364e-05, "loss": 0.4476, "step": 48620 }, { "epoch": 1.4196727055079488, "grad_norm": 0.4986210731238918, "learning_rate": 2.9264666125979994e-05, "loss": 0.4553, "step": 48625 }, { "epoch": 1.419818688778266, "grad_norm": 0.4603690886914519, "learning_rate": 2.9261962692619625e-05, "loss": 0.4408, "step": 48630 }, { "epoch": 1.4199646720485832, "grad_norm": 0.44891631134426785, "learning_rate": 2.925925925925926e-05, "loss": 0.4092, "step": 48635 }, { "epoch": 1.4201106553189005, "grad_norm": 0.4635823239743726, "learning_rate": 2.925655582589889e-05, "loss": 0.4522, "step": 48640 }, { "epoch": 1.4202566385892177, "grad_norm": 0.5262299856403398, "learning_rate": 2.9253852392538527e-05, "loss": 0.4743, "step": 48645 }, { "epoch": 1.420402621859535, "grad_norm": 0.48336506837216353, "learning_rate": 2.925114895917816e-05, "loss": 0.4761, "step": 48650 }, { "epoch": 1.4205486051298521, "grad_norm": 0.5085474700825686, "learning_rate": 2.9248445525817792e-05, "loss": 0.4393, "step": 48655 }, { "epoch": 1.4206945884001694, "grad_norm": 0.4835622812312455, "learning_rate": 2.9245742092457422e-05, "loss": 0.4316, "step": 48660 }, { "epoch": 1.4208405716704866, "grad_norm": 0.49523525374839955, "learning_rate": 2.9243038659097056e-05, "loss": 0.4468, "step": 48665 }, { "epoch": 1.4209865549408038, "grad_norm": 0.47680845974844654, "learning_rate": 2.9240335225736687e-05, "loss": 0.4442, "step": 48670 }, { "epoch": 1.421132538211121, "grad_norm": 0.46336035682120524, "learning_rate": 2.9237631792376318e-05, "loss": 0.4348, "step": 48675 }, { "epoch": 1.4212785214814383, "grad_norm": 0.40801286695383177, "learning_rate": 2.923492835901595e-05, "loss": 0.4286, "step": 48680 }, { "epoch": 1.4214245047517555, "grad_norm": 0.5009574832607684, "learning_rate": 2.9232224925655582e-05, "loss": 0.4637, "step": 48685 }, { "epoch": 1.4215704880220728, "grad_norm": 0.49770801881802074, "learning_rate": 2.9229521492295213e-05, "loss": 0.4399, "step": 48690 }, { "epoch": 1.42171647129239, "grad_norm": 0.49238707664672454, "learning_rate": 2.9226818058934847e-05, "loss": 0.4544, "step": 48695 }, { "epoch": 1.421862454562707, "grad_norm": 0.5280371098090559, "learning_rate": 2.9224114625574484e-05, "loss": 0.4572, "step": 48700 }, { "epoch": 1.4220084378330244, "grad_norm": 0.5036626507517734, "learning_rate": 2.9221411192214115e-05, "loss": 0.4748, "step": 48705 }, { "epoch": 1.4221544211033414, "grad_norm": 0.5160878245516408, "learning_rate": 2.9218707758853746e-05, "loss": 0.4649, "step": 48710 }, { "epoch": 1.422300404373659, "grad_norm": 0.46792397068702124, "learning_rate": 2.921600432549338e-05, "loss": 0.4329, "step": 48715 }, { "epoch": 1.422446387643976, "grad_norm": 0.4901223682567954, "learning_rate": 2.921330089213301e-05, "loss": 0.4746, "step": 48720 }, { "epoch": 1.4225923709142934, "grad_norm": 0.4784995573427043, "learning_rate": 2.9210597458772644e-05, "loss": 0.4335, "step": 48725 }, { "epoch": 1.4227383541846104, "grad_norm": 0.5303587420928241, "learning_rate": 2.9207894025412275e-05, "loss": 0.4563, "step": 48730 }, { "epoch": 1.4228843374549276, "grad_norm": 0.5243615683852838, "learning_rate": 2.9205190592051905e-05, "loss": 0.4886, "step": 48735 }, { "epoch": 1.4230303207252448, "grad_norm": 0.467174368004916, "learning_rate": 2.920248715869154e-05, "loss": 0.4327, "step": 48740 }, { "epoch": 1.423176303995562, "grad_norm": 0.4356780279458707, "learning_rate": 2.919978372533117e-05, "loss": 0.4321, "step": 48745 }, { "epoch": 1.4233222872658793, "grad_norm": 0.4866972543852304, "learning_rate": 2.91970802919708e-05, "loss": 0.4698, "step": 48750 }, { "epoch": 1.4234682705361965, "grad_norm": 0.48205844841759277, "learning_rate": 2.9194376858610438e-05, "loss": 0.4257, "step": 48755 }, { "epoch": 1.4236142538065137, "grad_norm": 0.45764061291857916, "learning_rate": 2.9191673425250072e-05, "loss": 0.4368, "step": 48760 }, { "epoch": 1.423760237076831, "grad_norm": 0.5435152869649912, "learning_rate": 2.9188969991889703e-05, "loss": 0.4495, "step": 48765 }, { "epoch": 1.4239062203471482, "grad_norm": 0.4707182423150777, "learning_rate": 2.9186266558529333e-05, "loss": 0.4401, "step": 48770 }, { "epoch": 1.4240522036174654, "grad_norm": 0.48296288639039375, "learning_rate": 2.9183563125168967e-05, "loss": 0.4659, "step": 48775 }, { "epoch": 1.4241981868877827, "grad_norm": 0.5031012695351226, "learning_rate": 2.9180859691808598e-05, "loss": 0.4486, "step": 48780 }, { "epoch": 1.4243441701580999, "grad_norm": 0.4671935307892162, "learning_rate": 2.9178156258448232e-05, "loss": 0.4223, "step": 48785 }, { "epoch": 1.4244901534284171, "grad_norm": 0.5182989233163363, "learning_rate": 2.9175452825087863e-05, "loss": 0.4287, "step": 48790 }, { "epoch": 1.4246361366987343, "grad_norm": 0.4851902937968709, "learning_rate": 2.9172749391727493e-05, "loss": 0.4561, "step": 48795 }, { "epoch": 1.4247821199690516, "grad_norm": 0.47958003980908115, "learning_rate": 2.9170045958367127e-05, "loss": 0.4399, "step": 48800 }, { "epoch": 1.4249281032393688, "grad_norm": 0.47221380822298864, "learning_rate": 2.9167342525006758e-05, "loss": 0.4537, "step": 48805 }, { "epoch": 1.425074086509686, "grad_norm": 0.48929823958818636, "learning_rate": 2.916463909164639e-05, "loss": 0.4494, "step": 48810 }, { "epoch": 1.4252200697800033, "grad_norm": 1.004067709274712, "learning_rate": 2.9161935658286026e-05, "loss": 0.4501, "step": 48815 }, { "epoch": 1.4253660530503205, "grad_norm": 0.4819957593861183, "learning_rate": 2.915923222492566e-05, "loss": 0.4422, "step": 48820 }, { "epoch": 1.4255120363206377, "grad_norm": 0.45959995889658567, "learning_rate": 2.915652879156529e-05, "loss": 0.4146, "step": 48825 }, { "epoch": 1.425658019590955, "grad_norm": 0.5067016136698066, "learning_rate": 2.915382535820492e-05, "loss": 0.4484, "step": 48830 }, { "epoch": 1.4258040028612722, "grad_norm": 0.4109547026991331, "learning_rate": 2.9151121924844555e-05, "loss": 0.434, "step": 48835 }, { "epoch": 1.4259499861315894, "grad_norm": 0.4534187444224476, "learning_rate": 2.9148418491484186e-05, "loss": 0.4658, "step": 48840 }, { "epoch": 1.4260959694019064, "grad_norm": 0.5116742260112214, "learning_rate": 2.9145715058123816e-05, "loss": 0.4843, "step": 48845 }, { "epoch": 1.4262419526722239, "grad_norm": 0.4911993979356117, "learning_rate": 2.914301162476345e-05, "loss": 0.4238, "step": 48850 }, { "epoch": 1.4263879359425409, "grad_norm": 0.4745583671775028, "learning_rate": 2.914030819140308e-05, "loss": 0.4576, "step": 48855 }, { "epoch": 1.4265339192128583, "grad_norm": 0.4601987333098413, "learning_rate": 2.9137604758042715e-05, "loss": 0.4455, "step": 48860 }, { "epoch": 1.4266799024831753, "grad_norm": 0.4675168097676141, "learning_rate": 2.9134901324682346e-05, "loss": 0.4342, "step": 48865 }, { "epoch": 1.4268258857534928, "grad_norm": 0.4892465335173188, "learning_rate": 2.9132197891321983e-05, "loss": 0.416, "step": 48870 }, { "epoch": 1.4269718690238098, "grad_norm": 0.5022084017777937, "learning_rate": 2.9129494457961614e-05, "loss": 0.4497, "step": 48875 }, { "epoch": 1.427117852294127, "grad_norm": 0.489450664921605, "learning_rate": 2.9126791024601248e-05, "loss": 0.4596, "step": 48880 }, { "epoch": 1.4272638355644442, "grad_norm": 0.4927279400465472, "learning_rate": 2.912408759124088e-05, "loss": 0.4492, "step": 48885 }, { "epoch": 1.4274098188347615, "grad_norm": 0.4398685477519757, "learning_rate": 2.912138415788051e-05, "loss": 0.422, "step": 48890 }, { "epoch": 1.4275558021050787, "grad_norm": 0.4438141571527387, "learning_rate": 2.9118680724520143e-05, "loss": 0.4125, "step": 48895 }, { "epoch": 1.427701785375396, "grad_norm": 0.5734471453891462, "learning_rate": 2.9115977291159774e-05, "loss": 0.4346, "step": 48900 }, { "epoch": 1.4278477686457132, "grad_norm": 0.4863324171887272, "learning_rate": 2.9113273857799404e-05, "loss": 0.4431, "step": 48905 }, { "epoch": 1.4279937519160304, "grad_norm": 0.4591254979624301, "learning_rate": 2.9110570424439038e-05, "loss": 0.4562, "step": 48910 }, { "epoch": 1.4281397351863476, "grad_norm": 0.4870083684518283, "learning_rate": 2.910786699107867e-05, "loss": 0.4432, "step": 48915 }, { "epoch": 1.4282857184566649, "grad_norm": 0.48459377903982465, "learning_rate": 2.9105163557718303e-05, "loss": 0.4648, "step": 48920 }, { "epoch": 1.428431701726982, "grad_norm": 0.5073285113648327, "learning_rate": 2.910246012435794e-05, "loss": 0.464, "step": 48925 }, { "epoch": 1.4285776849972993, "grad_norm": 0.4781053722464929, "learning_rate": 2.909975669099757e-05, "loss": 0.4753, "step": 48930 }, { "epoch": 1.4287236682676165, "grad_norm": 0.4402797239592098, "learning_rate": 2.90970532576372e-05, "loss": 0.4308, "step": 48935 }, { "epoch": 1.4288696515379338, "grad_norm": 0.5225359958031994, "learning_rate": 2.9094349824276836e-05, "loss": 0.4644, "step": 48940 }, { "epoch": 1.429015634808251, "grad_norm": 0.4660200534047998, "learning_rate": 2.9091646390916466e-05, "loss": 0.4336, "step": 48945 }, { "epoch": 1.4291616180785682, "grad_norm": 0.4635874854993672, "learning_rate": 2.9088942957556097e-05, "loss": 0.4189, "step": 48950 }, { "epoch": 1.4293076013488855, "grad_norm": 0.48474563012841315, "learning_rate": 2.908623952419573e-05, "loss": 0.4708, "step": 48955 }, { "epoch": 1.4294535846192027, "grad_norm": 0.45971377793288987, "learning_rate": 2.908353609083536e-05, "loss": 0.4529, "step": 48960 }, { "epoch": 1.42959956788952, "grad_norm": 0.43486350465627904, "learning_rate": 2.9080832657474992e-05, "loss": 0.4323, "step": 48965 }, { "epoch": 1.4297455511598371, "grad_norm": 0.466606939446852, "learning_rate": 2.9078129224114626e-05, "loss": 0.4446, "step": 48970 }, { "epoch": 1.4298915344301544, "grad_norm": 0.4713477667677401, "learning_rate": 2.9075425790754257e-05, "loss": 0.41, "step": 48975 }, { "epoch": 1.4300375177004716, "grad_norm": 0.47990817266710134, "learning_rate": 2.9072722357393887e-05, "loss": 0.4744, "step": 48980 }, { "epoch": 1.4301835009707888, "grad_norm": 0.4718874916017603, "learning_rate": 2.9070018924033525e-05, "loss": 0.4471, "step": 48985 }, { "epoch": 1.4303294842411058, "grad_norm": 0.47111779401423853, "learning_rate": 2.906731549067316e-05, "loss": 0.4421, "step": 48990 }, { "epoch": 1.4304754675114233, "grad_norm": 0.502417184491574, "learning_rate": 2.906461205731279e-05, "loss": 0.4449, "step": 48995 }, { "epoch": 1.4306214507817403, "grad_norm": 0.47615906855168205, "learning_rate": 2.9061908623952423e-05, "loss": 0.442, "step": 49000 }, { "epoch": 1.4307674340520578, "grad_norm": 0.47955400361003303, "learning_rate": 2.9059205190592054e-05, "loss": 0.4982, "step": 49005 }, { "epoch": 1.4309134173223748, "grad_norm": 0.5021230984807312, "learning_rate": 2.9056501757231685e-05, "loss": 0.4541, "step": 49010 }, { "epoch": 1.4310594005926922, "grad_norm": 0.477228184200784, "learning_rate": 2.905379832387132e-05, "loss": 0.4462, "step": 49015 }, { "epoch": 1.4312053838630092, "grad_norm": 0.5408385753715033, "learning_rate": 2.905109489051095e-05, "loss": 0.4783, "step": 49020 }, { "epoch": 1.4313513671333264, "grad_norm": 0.4677619139286993, "learning_rate": 2.904839145715058e-05, "loss": 0.461, "step": 49025 }, { "epoch": 1.4314973504036437, "grad_norm": 0.49590275520616856, "learning_rate": 2.9045688023790214e-05, "loss": 0.4643, "step": 49030 }, { "epoch": 1.431643333673961, "grad_norm": 0.44388127598409943, "learning_rate": 2.9042984590429844e-05, "loss": 0.4277, "step": 49035 }, { "epoch": 1.4317893169442781, "grad_norm": 0.4432790259440848, "learning_rate": 2.9040281157069482e-05, "loss": 0.4369, "step": 49040 }, { "epoch": 1.4319353002145954, "grad_norm": 0.448784617072741, "learning_rate": 2.9037577723709112e-05, "loss": 0.4499, "step": 49045 }, { "epoch": 1.4320812834849126, "grad_norm": 0.46067242440777684, "learning_rate": 2.9034874290348746e-05, "loss": 0.4296, "step": 49050 }, { "epoch": 1.4322272667552298, "grad_norm": 0.48949460083539575, "learning_rate": 2.9032170856988377e-05, "loss": 0.4157, "step": 49055 }, { "epoch": 1.432373250025547, "grad_norm": 0.48772085513494967, "learning_rate": 2.902946742362801e-05, "loss": 0.4495, "step": 49060 }, { "epoch": 1.4325192332958643, "grad_norm": 0.4923743442910467, "learning_rate": 2.9026763990267642e-05, "loss": 0.4575, "step": 49065 }, { "epoch": 1.4326652165661815, "grad_norm": 0.4562843046485683, "learning_rate": 2.9024060556907272e-05, "loss": 0.4455, "step": 49070 }, { "epoch": 1.4328111998364987, "grad_norm": 0.46980650479545577, "learning_rate": 2.9021357123546906e-05, "loss": 0.4375, "step": 49075 }, { "epoch": 1.432957183106816, "grad_norm": 0.4768301146105397, "learning_rate": 2.9018653690186537e-05, "loss": 0.4504, "step": 49080 }, { "epoch": 1.4331031663771332, "grad_norm": 0.4965328184733734, "learning_rate": 2.9015950256826168e-05, "loss": 0.4266, "step": 49085 }, { "epoch": 1.4332491496474504, "grad_norm": 0.48411627711448646, "learning_rate": 2.90132468234658e-05, "loss": 0.4587, "step": 49090 }, { "epoch": 1.4333951329177677, "grad_norm": 0.5104751808757255, "learning_rate": 2.901054339010544e-05, "loss": 0.4434, "step": 49095 }, { "epoch": 1.4335411161880849, "grad_norm": 0.4746955051366579, "learning_rate": 2.900783995674507e-05, "loss": 0.4357, "step": 49100 }, { "epoch": 1.4336870994584021, "grad_norm": 0.5351252770350408, "learning_rate": 2.90051365233847e-05, "loss": 0.4561, "step": 49105 }, { "epoch": 1.4338330827287193, "grad_norm": 0.49390303281894943, "learning_rate": 2.9002433090024334e-05, "loss": 0.4657, "step": 49110 }, { "epoch": 1.4339790659990366, "grad_norm": 0.47130271356685044, "learning_rate": 2.8999729656663965e-05, "loss": 0.4449, "step": 49115 }, { "epoch": 1.4341250492693538, "grad_norm": 0.5352738613483186, "learning_rate": 2.8997026223303596e-05, "loss": 0.4575, "step": 49120 }, { "epoch": 1.434271032539671, "grad_norm": 0.4276376587740198, "learning_rate": 2.899432278994323e-05, "loss": 0.4331, "step": 49125 }, { "epoch": 1.4344170158099883, "grad_norm": 0.45775890341471653, "learning_rate": 2.899161935658286e-05, "loss": 0.4451, "step": 49130 }, { "epoch": 1.4345629990803053, "grad_norm": 0.4845362756532609, "learning_rate": 2.8988915923222494e-05, "loss": 0.4611, "step": 49135 }, { "epoch": 1.4347089823506227, "grad_norm": 0.47450897214153154, "learning_rate": 2.8986212489862125e-05, "loss": 0.4278, "step": 49140 }, { "epoch": 1.4348549656209397, "grad_norm": 0.49121132507311543, "learning_rate": 2.8983509056501755e-05, "loss": 0.4391, "step": 49145 }, { "epoch": 1.4350009488912572, "grad_norm": 0.4876687755510322, "learning_rate": 2.898080562314139e-05, "loss": 0.4374, "step": 49150 }, { "epoch": 1.4351469321615742, "grad_norm": 0.492716544577732, "learning_rate": 2.8978102189781027e-05, "loss": 0.4579, "step": 49155 }, { "epoch": 1.4352929154318916, "grad_norm": 0.5094663683229889, "learning_rate": 2.8975398756420657e-05, "loss": 0.4828, "step": 49160 }, { "epoch": 1.4354388987022086, "grad_norm": 0.504058652349089, "learning_rate": 2.8972695323060288e-05, "loss": 0.4516, "step": 49165 }, { "epoch": 1.4355848819725259, "grad_norm": 0.46983528168661964, "learning_rate": 2.8969991889699922e-05, "loss": 0.4379, "step": 49170 }, { "epoch": 1.435730865242843, "grad_norm": 0.5157707202658737, "learning_rate": 2.8967288456339553e-05, "loss": 0.4502, "step": 49175 }, { "epoch": 1.4358768485131603, "grad_norm": 0.4582135614697518, "learning_rate": 2.8964585022979183e-05, "loss": 0.4039, "step": 49180 }, { "epoch": 1.4360228317834776, "grad_norm": 0.47996505086833285, "learning_rate": 2.8961881589618817e-05, "loss": 0.455, "step": 49185 }, { "epoch": 1.4361688150537948, "grad_norm": 0.4890389615059212, "learning_rate": 2.8959178156258448e-05, "loss": 0.4373, "step": 49190 }, { "epoch": 1.436314798324112, "grad_norm": 0.5231873114110542, "learning_rate": 2.8956474722898082e-05, "loss": 0.47, "step": 49195 }, { "epoch": 1.4364607815944292, "grad_norm": 0.4582683098328184, "learning_rate": 2.8953771289537713e-05, "loss": 0.4298, "step": 49200 }, { "epoch": 1.4366067648647465, "grad_norm": 0.48523147045923926, "learning_rate": 2.8951067856177343e-05, "loss": 0.475, "step": 49205 }, { "epoch": 1.4367527481350637, "grad_norm": 0.49418994448320885, "learning_rate": 2.894836442281698e-05, "loss": 0.461, "step": 49210 }, { "epoch": 1.436898731405381, "grad_norm": 0.49032454922708507, "learning_rate": 2.8945660989456615e-05, "loss": 0.4346, "step": 49215 }, { "epoch": 1.4370447146756982, "grad_norm": 0.5179323571247175, "learning_rate": 2.8942957556096245e-05, "loss": 0.4709, "step": 49220 }, { "epoch": 1.4371906979460154, "grad_norm": 0.4731007311387694, "learning_rate": 2.8940254122735876e-05, "loss": 0.4384, "step": 49225 }, { "epoch": 1.4373366812163326, "grad_norm": 0.5159746967210957, "learning_rate": 2.893755068937551e-05, "loss": 0.4436, "step": 49230 }, { "epoch": 1.4374826644866499, "grad_norm": 0.4434675174665634, "learning_rate": 2.893484725601514e-05, "loss": 0.4283, "step": 49235 }, { "epoch": 1.437628647756967, "grad_norm": 0.4254506181476383, "learning_rate": 2.893214382265477e-05, "loss": 0.4505, "step": 49240 }, { "epoch": 1.4377746310272843, "grad_norm": 0.44193186637029597, "learning_rate": 2.8929440389294405e-05, "loss": 0.4413, "step": 49245 }, { "epoch": 1.4379206142976015, "grad_norm": 0.49692019535307247, "learning_rate": 2.8926736955934036e-05, "loss": 0.4313, "step": 49250 }, { "epoch": 1.4380665975679188, "grad_norm": 0.4658140174318907, "learning_rate": 2.8924033522573666e-05, "loss": 0.4215, "step": 49255 }, { "epoch": 1.438212580838236, "grad_norm": 0.499554536686165, "learning_rate": 2.89213300892133e-05, "loss": 0.455, "step": 49260 }, { "epoch": 1.4383585641085532, "grad_norm": 0.4653207019256959, "learning_rate": 2.8918626655852938e-05, "loss": 0.4522, "step": 49265 }, { "epoch": 1.4385045473788705, "grad_norm": 0.5194226489108906, "learning_rate": 2.891592322249257e-05, "loss": 0.4625, "step": 49270 }, { "epoch": 1.4386505306491877, "grad_norm": 0.5014249866228304, "learning_rate": 2.8913219789132202e-05, "loss": 0.4565, "step": 49275 }, { "epoch": 1.4387965139195047, "grad_norm": 0.5000336491345834, "learning_rate": 2.8910516355771833e-05, "loss": 0.4549, "step": 49280 }, { "epoch": 1.4389424971898221, "grad_norm": 0.4971267047369768, "learning_rate": 2.8907812922411464e-05, "loss": 0.4375, "step": 49285 }, { "epoch": 1.4390884804601392, "grad_norm": 0.47852869499390416, "learning_rate": 2.8905109489051098e-05, "loss": 0.4433, "step": 49290 }, { "epoch": 1.4392344637304566, "grad_norm": 0.46714630880927716, "learning_rate": 2.890240605569073e-05, "loss": 0.4372, "step": 49295 }, { "epoch": 1.4393804470007736, "grad_norm": 0.47788862398213255, "learning_rate": 2.889970262233036e-05, "loss": 0.4494, "step": 49300 }, { "epoch": 1.439526430271091, "grad_norm": 0.5215291719093881, "learning_rate": 2.8896999188969993e-05, "loss": 0.4813, "step": 49305 }, { "epoch": 1.439672413541408, "grad_norm": 0.4565962331652611, "learning_rate": 2.8894295755609624e-05, "loss": 0.4358, "step": 49310 }, { "epoch": 1.4398183968117253, "grad_norm": 0.5111765805593169, "learning_rate": 2.8891592322249254e-05, "loss": 0.4421, "step": 49315 }, { "epoch": 1.4399643800820425, "grad_norm": 0.5092692626246166, "learning_rate": 2.8888888888888888e-05, "loss": 0.4684, "step": 49320 }, { "epoch": 1.4401103633523598, "grad_norm": 0.4879511451754653, "learning_rate": 2.8886185455528526e-05, "loss": 0.421, "step": 49325 }, { "epoch": 1.440256346622677, "grad_norm": 0.48489043621207173, "learning_rate": 2.8883482022168156e-05, "loss": 0.4329, "step": 49330 }, { "epoch": 1.4404023298929942, "grad_norm": 0.4837650566404545, "learning_rate": 2.888077858880779e-05, "loss": 0.4595, "step": 49335 }, { "epoch": 1.4405483131633114, "grad_norm": 0.4969744170488091, "learning_rate": 2.887807515544742e-05, "loss": 0.4524, "step": 49340 }, { "epoch": 1.4406942964336287, "grad_norm": 0.5077416950740273, "learning_rate": 2.887537172208705e-05, "loss": 0.4118, "step": 49345 }, { "epoch": 1.440840279703946, "grad_norm": 0.5883635839266397, "learning_rate": 2.8872668288726686e-05, "loss": 0.4351, "step": 49350 }, { "epoch": 1.4409862629742631, "grad_norm": 0.4879021975410247, "learning_rate": 2.8869964855366316e-05, "loss": 0.4749, "step": 49355 }, { "epoch": 1.4411322462445804, "grad_norm": 0.43788513812777125, "learning_rate": 2.8867261422005947e-05, "loss": 0.4292, "step": 49360 }, { "epoch": 1.4412782295148976, "grad_norm": 0.47395885162111395, "learning_rate": 2.886455798864558e-05, "loss": 0.4596, "step": 49365 }, { "epoch": 1.4414242127852148, "grad_norm": 0.45769796918387506, "learning_rate": 2.886185455528521e-05, "loss": 0.4327, "step": 49370 }, { "epoch": 1.441570196055532, "grad_norm": 0.45673844439705324, "learning_rate": 2.8859151121924842e-05, "loss": 0.4474, "step": 49375 }, { "epoch": 1.4417161793258493, "grad_norm": 0.5081005566482554, "learning_rate": 2.885644768856448e-05, "loss": 0.4472, "step": 49380 }, { "epoch": 1.4418621625961665, "grad_norm": 0.47479905955356233, "learning_rate": 2.8853744255204113e-05, "loss": 0.4399, "step": 49385 }, { "epoch": 1.4420081458664837, "grad_norm": 0.5121551950569059, "learning_rate": 2.8851040821843744e-05, "loss": 0.4652, "step": 49390 }, { "epoch": 1.442154129136801, "grad_norm": 0.5251009469107168, "learning_rate": 2.8848337388483375e-05, "loss": 0.4637, "step": 49395 }, { "epoch": 1.4423001124071182, "grad_norm": 0.47778602914744195, "learning_rate": 2.884563395512301e-05, "loss": 0.4449, "step": 49400 }, { "epoch": 1.4424460956774354, "grad_norm": 0.49659995987043576, "learning_rate": 2.884293052176264e-05, "loss": 0.4412, "step": 49405 }, { "epoch": 1.4425920789477527, "grad_norm": 0.48319990685254965, "learning_rate": 2.8840227088402273e-05, "loss": 0.4429, "step": 49410 }, { "epoch": 1.4427380622180699, "grad_norm": 0.514207086272472, "learning_rate": 2.8837523655041904e-05, "loss": 0.4494, "step": 49415 }, { "epoch": 1.4428840454883871, "grad_norm": 0.5098542380116797, "learning_rate": 2.8834820221681535e-05, "loss": 0.4161, "step": 49420 }, { "epoch": 1.4430300287587041, "grad_norm": 0.47280864974199543, "learning_rate": 2.883211678832117e-05, "loss": 0.4444, "step": 49425 }, { "epoch": 1.4431760120290216, "grad_norm": 0.5020039608535007, "learning_rate": 2.88294133549608e-05, "loss": 0.444, "step": 49430 }, { "epoch": 1.4433219952993386, "grad_norm": 0.43424418592920977, "learning_rate": 2.8826709921600437e-05, "loss": 0.4188, "step": 49435 }, { "epoch": 1.443467978569656, "grad_norm": 0.4522818740715498, "learning_rate": 2.8824006488240067e-05, "loss": 0.4529, "step": 49440 }, { "epoch": 1.443613961839973, "grad_norm": 0.4735316910507371, "learning_rate": 2.88213030548797e-05, "loss": 0.4314, "step": 49445 }, { "epoch": 1.4437599451102905, "grad_norm": 0.4822713317574975, "learning_rate": 2.8818599621519332e-05, "loss": 0.4537, "step": 49450 }, { "epoch": 1.4439059283806075, "grad_norm": 0.47086718911291253, "learning_rate": 2.8815896188158963e-05, "loss": 0.4453, "step": 49455 }, { "epoch": 1.4440519116509247, "grad_norm": 0.514598548950419, "learning_rate": 2.8813192754798597e-05, "loss": 0.4113, "step": 49460 }, { "epoch": 1.444197894921242, "grad_norm": 0.47489376539537514, "learning_rate": 2.8810489321438227e-05, "loss": 0.4585, "step": 49465 }, { "epoch": 1.4443438781915592, "grad_norm": 0.4571098704481997, "learning_rate": 2.880778588807786e-05, "loss": 0.4519, "step": 49470 }, { "epoch": 1.4444898614618764, "grad_norm": 0.4441880625202417, "learning_rate": 2.8805082454717492e-05, "loss": 0.4462, "step": 49475 }, { "epoch": 1.4446358447321936, "grad_norm": 0.47848605196737193, "learning_rate": 2.8802379021357122e-05, "loss": 0.4577, "step": 49480 }, { "epoch": 1.4447818280025109, "grad_norm": 0.4575072417398263, "learning_rate": 2.8799675587996756e-05, "loss": 0.4559, "step": 49485 }, { "epoch": 1.444927811272828, "grad_norm": 0.4829988940346195, "learning_rate": 2.8796972154636387e-05, "loss": 0.4445, "step": 49490 }, { "epoch": 1.4450737945431453, "grad_norm": 0.49806751231032986, "learning_rate": 2.8794268721276024e-05, "loss": 0.4611, "step": 49495 }, { "epoch": 1.4452197778134626, "grad_norm": 0.4608943075530658, "learning_rate": 2.8791565287915655e-05, "loss": 0.4681, "step": 49500 }, { "epoch": 1.4453657610837798, "grad_norm": 0.4712173931939973, "learning_rate": 2.878886185455529e-05, "loss": 0.4344, "step": 49505 }, { "epoch": 1.445511744354097, "grad_norm": 0.5031015526620984, "learning_rate": 2.878615842119492e-05, "loss": 0.4644, "step": 49510 }, { "epoch": 1.4456577276244142, "grad_norm": 0.4749303187504347, "learning_rate": 2.878345498783455e-05, "loss": 0.4576, "step": 49515 }, { "epoch": 1.4458037108947315, "grad_norm": 0.4558643902239004, "learning_rate": 2.8780751554474184e-05, "loss": 0.44, "step": 49520 }, { "epoch": 1.4459496941650487, "grad_norm": 0.44830258614724816, "learning_rate": 2.8778048121113815e-05, "loss": 0.4229, "step": 49525 }, { "epoch": 1.446095677435366, "grad_norm": 0.4998479652188918, "learning_rate": 2.8775344687753446e-05, "loss": 0.4658, "step": 49530 }, { "epoch": 1.4462416607056832, "grad_norm": 0.48675802149960345, "learning_rate": 2.877264125439308e-05, "loss": 0.4456, "step": 49535 }, { "epoch": 1.4463876439760004, "grad_norm": 0.5152577063514578, "learning_rate": 2.876993782103271e-05, "loss": 0.4631, "step": 49540 }, { "epoch": 1.4465336272463176, "grad_norm": 0.5089891589809915, "learning_rate": 2.8767234387672344e-05, "loss": 0.48, "step": 49545 }, { "epoch": 1.4466796105166349, "grad_norm": 0.4644582193815741, "learning_rate": 2.876453095431198e-05, "loss": 0.4248, "step": 49550 }, { "epoch": 1.446825593786952, "grad_norm": 0.46813344697170667, "learning_rate": 2.8761827520951612e-05, "loss": 0.4493, "step": 49555 }, { "epoch": 1.4469715770572693, "grad_norm": 0.4520187636520457, "learning_rate": 2.8759124087591243e-05, "loss": 0.4281, "step": 49560 }, { "epoch": 1.4471175603275865, "grad_norm": 0.49374457384588466, "learning_rate": 2.8756420654230877e-05, "loss": 0.4259, "step": 49565 }, { "epoch": 1.4472635435979035, "grad_norm": 0.47121348012029984, "learning_rate": 2.8753717220870507e-05, "loss": 0.4096, "step": 49570 }, { "epoch": 1.447409526868221, "grad_norm": 0.4926868879572389, "learning_rate": 2.8751013787510138e-05, "loss": 0.4717, "step": 49575 }, { "epoch": 1.447555510138538, "grad_norm": 0.4876865598578321, "learning_rate": 2.8748310354149772e-05, "loss": 0.4335, "step": 49580 }, { "epoch": 1.4477014934088555, "grad_norm": 0.44866335630491516, "learning_rate": 2.8745606920789403e-05, "loss": 0.4435, "step": 49585 }, { "epoch": 1.4478474766791725, "grad_norm": 0.5175743783413529, "learning_rate": 2.8742903487429033e-05, "loss": 0.4532, "step": 49590 }, { "epoch": 1.44799345994949, "grad_norm": 0.5052115010383545, "learning_rate": 2.8740200054068667e-05, "loss": 0.4692, "step": 49595 }, { "epoch": 1.448139443219807, "grad_norm": 0.44927923586248714, "learning_rate": 2.8737496620708298e-05, "loss": 0.4375, "step": 49600 }, { "epoch": 1.4482854264901242, "grad_norm": 0.45972149870174817, "learning_rate": 2.8734793187347935e-05, "loss": 0.4536, "step": 49605 }, { "epoch": 1.4484314097604414, "grad_norm": 0.4942111478960929, "learning_rate": 2.873208975398757e-05, "loss": 0.4307, "step": 49610 }, { "epoch": 1.4485773930307586, "grad_norm": 0.4482247859599418, "learning_rate": 2.87293863206272e-05, "loss": 0.4113, "step": 49615 }, { "epoch": 1.4487233763010758, "grad_norm": 0.4831443246588162, "learning_rate": 2.872668288726683e-05, "loss": 0.435, "step": 49620 }, { "epoch": 1.448869359571393, "grad_norm": 0.47693642364049516, "learning_rate": 2.8723979453906465e-05, "loss": 0.4527, "step": 49625 }, { "epoch": 1.4490153428417103, "grad_norm": 0.5108420604997214, "learning_rate": 2.8721276020546095e-05, "loss": 0.4462, "step": 49630 }, { "epoch": 1.4491613261120275, "grad_norm": 0.4749656049449445, "learning_rate": 2.8718572587185726e-05, "loss": 0.4506, "step": 49635 }, { "epoch": 1.4493073093823448, "grad_norm": 0.4725536727345575, "learning_rate": 2.871586915382536e-05, "loss": 0.4734, "step": 49640 }, { "epoch": 1.449453292652662, "grad_norm": 0.47217130072402663, "learning_rate": 2.871316572046499e-05, "loss": 0.4576, "step": 49645 }, { "epoch": 1.4495992759229792, "grad_norm": 0.4773580008691058, "learning_rate": 2.871046228710462e-05, "loss": 0.4427, "step": 49650 }, { "epoch": 1.4497452591932964, "grad_norm": 0.4910797302632094, "learning_rate": 2.8707758853744255e-05, "loss": 0.4279, "step": 49655 }, { "epoch": 1.4498912424636137, "grad_norm": 0.5326071624151562, "learning_rate": 2.8705055420383886e-05, "loss": 0.4737, "step": 49660 }, { "epoch": 1.450037225733931, "grad_norm": 0.511704465537916, "learning_rate": 2.8702351987023523e-05, "loss": 0.4762, "step": 49665 }, { "epoch": 1.4501832090042481, "grad_norm": 0.45021775633894734, "learning_rate": 2.8699648553663154e-05, "loss": 0.4543, "step": 49670 }, { "epoch": 1.4503291922745654, "grad_norm": 0.43756088396815457, "learning_rate": 2.8696945120302788e-05, "loss": 0.4396, "step": 49675 }, { "epoch": 1.4504751755448826, "grad_norm": 0.5273790228551034, "learning_rate": 2.869424168694242e-05, "loss": 0.4573, "step": 49680 }, { "epoch": 1.4506211588151998, "grad_norm": 0.4935028986034702, "learning_rate": 2.8691538253582052e-05, "loss": 0.4758, "step": 49685 }, { "epoch": 1.450767142085517, "grad_norm": 0.4512310790774041, "learning_rate": 2.8688834820221683e-05, "loss": 0.4291, "step": 49690 }, { "epoch": 1.4509131253558343, "grad_norm": 0.44964815851272244, "learning_rate": 2.8686131386861314e-05, "loss": 0.4345, "step": 49695 }, { "epoch": 1.4510591086261515, "grad_norm": 0.5057289185367642, "learning_rate": 2.8683427953500948e-05, "loss": 0.4453, "step": 49700 }, { "epoch": 1.4512050918964687, "grad_norm": 0.5268092917676489, "learning_rate": 2.868072452014058e-05, "loss": 0.4685, "step": 49705 }, { "epoch": 1.451351075166786, "grad_norm": 0.5146547191497849, "learning_rate": 2.867802108678021e-05, "loss": 0.4474, "step": 49710 }, { "epoch": 1.451497058437103, "grad_norm": 0.4752243348353841, "learning_rate": 2.8675317653419843e-05, "loss": 0.4227, "step": 49715 }, { "epoch": 1.4516430417074204, "grad_norm": 0.46262983633248134, "learning_rate": 2.867261422005948e-05, "loss": 0.4348, "step": 49720 }, { "epoch": 1.4517890249777374, "grad_norm": 0.46307388803527266, "learning_rate": 2.866991078669911e-05, "loss": 0.4619, "step": 49725 }, { "epoch": 1.4519350082480549, "grad_norm": 0.47969488072687194, "learning_rate": 2.866720735333874e-05, "loss": 0.4631, "step": 49730 }, { "epoch": 1.4520809915183719, "grad_norm": 0.48893236463565914, "learning_rate": 2.8664503919978376e-05, "loss": 0.4315, "step": 49735 }, { "epoch": 1.4522269747886893, "grad_norm": 0.5174790784075911, "learning_rate": 2.8661800486618006e-05, "loss": 0.4431, "step": 49740 }, { "epoch": 1.4523729580590063, "grad_norm": 0.44782176620659775, "learning_rate": 2.865909705325764e-05, "loss": 0.4223, "step": 49745 }, { "epoch": 1.4525189413293236, "grad_norm": 0.4797796165764325, "learning_rate": 2.865639361989727e-05, "loss": 0.4481, "step": 49750 }, { "epoch": 1.4526649245996408, "grad_norm": 0.4648114798170328, "learning_rate": 2.86536901865369e-05, "loss": 0.4391, "step": 49755 }, { "epoch": 1.452810907869958, "grad_norm": 0.516976563176525, "learning_rate": 2.8650986753176536e-05, "loss": 0.4626, "step": 49760 }, { "epoch": 1.4529568911402753, "grad_norm": 0.49311003355066135, "learning_rate": 2.8648283319816166e-05, "loss": 0.4546, "step": 49765 }, { "epoch": 1.4531028744105925, "grad_norm": 0.50774027354224, "learning_rate": 2.8645579886455797e-05, "loss": 0.4398, "step": 49770 }, { "epoch": 1.4532488576809097, "grad_norm": 0.4692562963815571, "learning_rate": 2.8642876453095434e-05, "loss": 0.4632, "step": 49775 }, { "epoch": 1.453394840951227, "grad_norm": 0.44405811787241056, "learning_rate": 2.8640173019735068e-05, "loss": 0.4493, "step": 49780 }, { "epoch": 1.4535408242215442, "grad_norm": 0.5270746922802857, "learning_rate": 2.86374695863747e-05, "loss": 0.4755, "step": 49785 }, { "epoch": 1.4536868074918614, "grad_norm": 0.5126531206200395, "learning_rate": 2.863476615301433e-05, "loss": 0.4588, "step": 49790 }, { "epoch": 1.4538327907621786, "grad_norm": 0.46516211094601023, "learning_rate": 2.8632062719653963e-05, "loss": 0.4555, "step": 49795 }, { "epoch": 1.4539787740324959, "grad_norm": 0.5134803480126551, "learning_rate": 2.8629359286293594e-05, "loss": 0.4763, "step": 49800 }, { "epoch": 1.454124757302813, "grad_norm": 0.47294803533997454, "learning_rate": 2.8626655852933225e-05, "loss": 0.4365, "step": 49805 }, { "epoch": 1.4542707405731303, "grad_norm": 0.49086152708085445, "learning_rate": 2.862395241957286e-05, "loss": 0.4575, "step": 49810 }, { "epoch": 1.4544167238434476, "grad_norm": 0.47716796843599446, "learning_rate": 2.862124898621249e-05, "loss": 0.4221, "step": 49815 }, { "epoch": 1.4545627071137648, "grad_norm": 0.4853832906108028, "learning_rate": 2.8618545552852123e-05, "loss": 0.4512, "step": 49820 }, { "epoch": 1.454708690384082, "grad_norm": 0.5297583276703828, "learning_rate": 2.8615842119491754e-05, "loss": 0.463, "step": 49825 }, { "epoch": 1.4548546736543992, "grad_norm": 0.5010530187785689, "learning_rate": 2.8613138686131385e-05, "loss": 0.432, "step": 49830 }, { "epoch": 1.4550006569247165, "grad_norm": 0.48905220374067176, "learning_rate": 2.8610435252771022e-05, "loss": 0.4768, "step": 49835 }, { "epoch": 1.4551466401950337, "grad_norm": 0.4628892772315885, "learning_rate": 2.8607731819410656e-05, "loss": 0.445, "step": 49840 }, { "epoch": 1.455292623465351, "grad_norm": 0.4980540291504088, "learning_rate": 2.8605028386050287e-05, "loss": 0.4396, "step": 49845 }, { "epoch": 1.4554386067356682, "grad_norm": 0.4926449718541491, "learning_rate": 2.8602324952689917e-05, "loss": 0.4324, "step": 49850 }, { "epoch": 1.4555845900059854, "grad_norm": 0.4958472762943479, "learning_rate": 2.859962151932955e-05, "loss": 0.4299, "step": 49855 }, { "epoch": 1.4557305732763024, "grad_norm": 0.48707274742425416, "learning_rate": 2.8596918085969182e-05, "loss": 0.4396, "step": 49860 }, { "epoch": 1.4558765565466198, "grad_norm": 0.49940034740005823, "learning_rate": 2.8594214652608813e-05, "loss": 0.4695, "step": 49865 }, { "epoch": 1.4560225398169369, "grad_norm": 0.466305769334298, "learning_rate": 2.8591511219248447e-05, "loss": 0.4465, "step": 49870 }, { "epoch": 1.4561685230872543, "grad_norm": 0.4370406576446054, "learning_rate": 2.8588807785888077e-05, "loss": 0.4212, "step": 49875 }, { "epoch": 1.4563145063575713, "grad_norm": 0.5128080316765976, "learning_rate": 2.8586104352527708e-05, "loss": 0.462, "step": 49880 }, { "epoch": 1.4564604896278888, "grad_norm": 0.4463367842045413, "learning_rate": 2.8583400919167342e-05, "loss": 0.4561, "step": 49885 }, { "epoch": 1.4566064728982058, "grad_norm": 0.5170952775035396, "learning_rate": 2.858069748580698e-05, "loss": 0.4592, "step": 49890 }, { "epoch": 1.4567524561685232, "grad_norm": 0.5064858643573483, "learning_rate": 2.857799405244661e-05, "loss": 0.4658, "step": 49895 }, { "epoch": 1.4568984394388402, "grad_norm": 0.4991983631534972, "learning_rate": 2.8575290619086244e-05, "loss": 0.4696, "step": 49900 }, { "epoch": 1.4570444227091575, "grad_norm": 0.49593312123316813, "learning_rate": 2.8572587185725874e-05, "loss": 0.4713, "step": 49905 }, { "epoch": 1.4571904059794747, "grad_norm": 0.5081779985521244, "learning_rate": 2.8569883752365505e-05, "loss": 0.4494, "step": 49910 }, { "epoch": 1.457336389249792, "grad_norm": 0.49569889059744726, "learning_rate": 2.856718031900514e-05, "loss": 0.4603, "step": 49915 }, { "epoch": 1.4574823725201091, "grad_norm": 0.5423656351537839, "learning_rate": 2.856447688564477e-05, "loss": 0.4761, "step": 49920 }, { "epoch": 1.4576283557904264, "grad_norm": 0.45365183653155106, "learning_rate": 2.85617734522844e-05, "loss": 0.4362, "step": 49925 }, { "epoch": 1.4577743390607436, "grad_norm": 0.46642062000535667, "learning_rate": 2.8559070018924034e-05, "loss": 0.4182, "step": 49930 }, { "epoch": 1.4579203223310608, "grad_norm": 0.48233165676801765, "learning_rate": 2.8556366585563665e-05, "loss": 0.4285, "step": 49935 }, { "epoch": 1.458066305601378, "grad_norm": 0.4658914567035928, "learning_rate": 2.8553663152203296e-05, "loss": 0.4745, "step": 49940 }, { "epoch": 1.4582122888716953, "grad_norm": 0.4763074008719042, "learning_rate": 2.8550959718842933e-05, "loss": 0.4536, "step": 49945 }, { "epoch": 1.4583582721420125, "grad_norm": 0.5025123120425702, "learning_rate": 2.8548256285482567e-05, "loss": 0.4474, "step": 49950 }, { "epoch": 1.4585042554123298, "grad_norm": 0.4883024275113869, "learning_rate": 2.8545552852122198e-05, "loss": 0.4406, "step": 49955 }, { "epoch": 1.458650238682647, "grad_norm": 0.48412671992802825, "learning_rate": 2.854284941876183e-05, "loss": 0.4612, "step": 49960 }, { "epoch": 1.4587962219529642, "grad_norm": 0.4464627458452338, "learning_rate": 2.8540145985401462e-05, "loss": 0.4136, "step": 49965 }, { "epoch": 1.4589422052232814, "grad_norm": 0.5210729320240951, "learning_rate": 2.8537442552041093e-05, "loss": 0.4539, "step": 49970 }, { "epoch": 1.4590881884935987, "grad_norm": 0.4573934752116354, "learning_rate": 2.8534739118680727e-05, "loss": 0.4145, "step": 49975 }, { "epoch": 1.459234171763916, "grad_norm": 0.5019355455874552, "learning_rate": 2.8532035685320358e-05, "loss": 0.4481, "step": 49980 }, { "epoch": 1.4593801550342331, "grad_norm": 0.4592560450647137, "learning_rate": 2.8529332251959988e-05, "loss": 0.4473, "step": 49985 }, { "epoch": 1.4595261383045504, "grad_norm": 0.4655813443745121, "learning_rate": 2.8526628818599622e-05, "loss": 0.44, "step": 49990 }, { "epoch": 1.4596721215748676, "grad_norm": 0.48684933114388984, "learning_rate": 2.8523925385239253e-05, "loss": 0.4491, "step": 49995 }, { "epoch": 1.4598181048451848, "grad_norm": 0.5192346590441532, "learning_rate": 2.8521221951878883e-05, "loss": 0.4407, "step": 50000 }, { "epoch": 1.459964088115502, "grad_norm": 0.4949337263989498, "learning_rate": 2.851851851851852e-05, "loss": 0.4736, "step": 50005 }, { "epoch": 1.4601100713858193, "grad_norm": 0.492447517214787, "learning_rate": 2.8515815085158155e-05, "loss": 0.4418, "step": 50010 }, { "epoch": 1.4602560546561363, "grad_norm": 0.4775156796536137, "learning_rate": 2.8513111651797785e-05, "loss": 0.45, "step": 50015 }, { "epoch": 1.4604020379264537, "grad_norm": 0.48254924879610583, "learning_rate": 2.8510408218437416e-05, "loss": 0.4486, "step": 50020 }, { "epoch": 1.4605480211967707, "grad_norm": 0.4837995306132325, "learning_rate": 2.850770478507705e-05, "loss": 0.443, "step": 50025 }, { "epoch": 1.4606940044670882, "grad_norm": 0.46809216099005985, "learning_rate": 2.850500135171668e-05, "loss": 0.4412, "step": 50030 }, { "epoch": 1.4608399877374052, "grad_norm": 0.5182950865000908, "learning_rate": 2.8502297918356315e-05, "loss": 0.4693, "step": 50035 }, { "epoch": 1.4609859710077227, "grad_norm": 0.478633498270258, "learning_rate": 2.8499594484995945e-05, "loss": 0.4128, "step": 50040 }, { "epoch": 1.4611319542780397, "grad_norm": 0.4834278144152104, "learning_rate": 2.8496891051635576e-05, "loss": 0.4518, "step": 50045 }, { "epoch": 1.4612779375483569, "grad_norm": 0.47732727151361626, "learning_rate": 2.849418761827521e-05, "loss": 0.4296, "step": 50050 }, { "epoch": 1.4614239208186741, "grad_norm": 0.48028101221911057, "learning_rate": 2.849148418491484e-05, "loss": 0.451, "step": 50055 }, { "epoch": 1.4615699040889913, "grad_norm": 0.4986457262528594, "learning_rate": 2.8488780751554478e-05, "loss": 0.4803, "step": 50060 }, { "epoch": 1.4617158873593086, "grad_norm": 0.4769590570904923, "learning_rate": 2.848607731819411e-05, "loss": 0.4541, "step": 50065 }, { "epoch": 1.4618618706296258, "grad_norm": 0.48926756008871963, "learning_rate": 2.8483373884833743e-05, "loss": 0.4378, "step": 50070 }, { "epoch": 1.462007853899943, "grad_norm": 0.4595378125915204, "learning_rate": 2.8480670451473373e-05, "loss": 0.4465, "step": 50075 }, { "epoch": 1.4621538371702603, "grad_norm": 0.4898764962335953, "learning_rate": 2.8477967018113004e-05, "loss": 0.4473, "step": 50080 }, { "epoch": 1.4622998204405775, "grad_norm": 0.45945990012146914, "learning_rate": 2.8475263584752638e-05, "loss": 0.4419, "step": 50085 }, { "epoch": 1.4624458037108947, "grad_norm": 0.5128412010771577, "learning_rate": 2.847256015139227e-05, "loss": 0.444, "step": 50090 }, { "epoch": 1.462591786981212, "grad_norm": 0.5224025248282274, "learning_rate": 2.8469856718031902e-05, "loss": 0.4439, "step": 50095 }, { "epoch": 1.4627377702515292, "grad_norm": 0.47260771247682365, "learning_rate": 2.8467153284671533e-05, "loss": 0.4651, "step": 50100 }, { "epoch": 1.4628837535218464, "grad_norm": 0.48551017339048497, "learning_rate": 2.8464449851311164e-05, "loss": 0.4662, "step": 50105 }, { "epoch": 1.4630297367921636, "grad_norm": 0.4914508241584341, "learning_rate": 2.8461746417950798e-05, "loss": 0.432, "step": 50110 }, { "epoch": 1.4631757200624809, "grad_norm": 0.4331114635973596, "learning_rate": 2.8459042984590435e-05, "loss": 0.4375, "step": 50115 }, { "epoch": 1.463321703332798, "grad_norm": 0.4992618027879592, "learning_rate": 2.8456339551230066e-05, "loss": 0.4847, "step": 50120 }, { "epoch": 1.4634676866031153, "grad_norm": 0.4716682102096964, "learning_rate": 2.8453636117869696e-05, "loss": 0.4256, "step": 50125 }, { "epoch": 1.4636136698734326, "grad_norm": 0.44967547180908907, "learning_rate": 2.845093268450933e-05, "loss": 0.4062, "step": 50130 }, { "epoch": 1.4637596531437498, "grad_norm": 0.48215877723866646, "learning_rate": 2.844822925114896e-05, "loss": 0.4324, "step": 50135 }, { "epoch": 1.463905636414067, "grad_norm": 0.5109844550625534, "learning_rate": 2.844552581778859e-05, "loss": 0.4572, "step": 50140 }, { "epoch": 1.4640516196843842, "grad_norm": 0.5307550643061534, "learning_rate": 2.8442822384428226e-05, "loss": 0.4602, "step": 50145 }, { "epoch": 1.4641976029547015, "grad_norm": 0.5098126363843254, "learning_rate": 2.8440118951067856e-05, "loss": 0.4557, "step": 50150 }, { "epoch": 1.4643435862250187, "grad_norm": 0.4821435338999527, "learning_rate": 2.8437415517707487e-05, "loss": 0.4447, "step": 50155 }, { "epoch": 1.4644895694953357, "grad_norm": 0.4481445107592871, "learning_rate": 2.843471208434712e-05, "loss": 0.4628, "step": 50160 }, { "epoch": 1.4646355527656532, "grad_norm": 0.4782614698451477, "learning_rate": 2.843200865098675e-05, "loss": 0.4311, "step": 50165 }, { "epoch": 1.4647815360359702, "grad_norm": 0.4608343425966308, "learning_rate": 2.8429305217626386e-05, "loss": 0.4375, "step": 50170 }, { "epoch": 1.4649275193062876, "grad_norm": 0.5387301724462058, "learning_rate": 2.8426601784266023e-05, "loss": 0.46, "step": 50175 }, { "epoch": 1.4650735025766046, "grad_norm": 0.4553385699401515, "learning_rate": 2.8423898350905654e-05, "loss": 0.463, "step": 50180 }, { "epoch": 1.465219485846922, "grad_norm": 0.4727048316918873, "learning_rate": 2.8421194917545284e-05, "loss": 0.4439, "step": 50185 }, { "epoch": 1.465365469117239, "grad_norm": 0.5356574429768352, "learning_rate": 2.8418491484184918e-05, "loss": 0.4525, "step": 50190 }, { "epoch": 1.4655114523875563, "grad_norm": 0.457046446244823, "learning_rate": 2.841578805082455e-05, "loss": 0.4459, "step": 50195 }, { "epoch": 1.4656574356578735, "grad_norm": 0.43311360681944294, "learning_rate": 2.841308461746418e-05, "loss": 0.4294, "step": 50200 }, { "epoch": 1.4658034189281908, "grad_norm": 0.48562487895364936, "learning_rate": 2.8410381184103813e-05, "loss": 0.4355, "step": 50205 }, { "epoch": 1.465949402198508, "grad_norm": 0.4527203022470069, "learning_rate": 2.8407677750743444e-05, "loss": 0.4355, "step": 50210 }, { "epoch": 1.4660953854688252, "grad_norm": 0.4793105861712284, "learning_rate": 2.8404974317383075e-05, "loss": 0.4455, "step": 50215 }, { "epoch": 1.4662413687391425, "grad_norm": 0.5172336692568906, "learning_rate": 2.840227088402271e-05, "loss": 0.4505, "step": 50220 }, { "epoch": 1.4663873520094597, "grad_norm": 0.4694773800593438, "learning_rate": 2.839956745066234e-05, "loss": 0.4488, "step": 50225 }, { "epoch": 1.466533335279777, "grad_norm": 0.49793341816822173, "learning_rate": 2.8396864017301977e-05, "loss": 0.4202, "step": 50230 }, { "epoch": 1.4666793185500941, "grad_norm": 0.4651281090906742, "learning_rate": 2.839416058394161e-05, "loss": 0.451, "step": 50235 }, { "epoch": 1.4668253018204114, "grad_norm": 0.48657826164939044, "learning_rate": 2.839145715058124e-05, "loss": 0.4654, "step": 50240 }, { "epoch": 1.4669712850907286, "grad_norm": 0.4404370310093295, "learning_rate": 2.8388753717220872e-05, "loss": 0.4538, "step": 50245 }, { "epoch": 1.4671172683610458, "grad_norm": 0.5621984085155631, "learning_rate": 2.8386050283860506e-05, "loss": 0.4771, "step": 50250 }, { "epoch": 1.467263251631363, "grad_norm": 0.47015457006996686, "learning_rate": 2.8383346850500137e-05, "loss": 0.4261, "step": 50255 }, { "epoch": 1.4674092349016803, "grad_norm": 0.49320615371534454, "learning_rate": 2.8380643417139767e-05, "loss": 0.4636, "step": 50260 }, { "epoch": 1.4675552181719975, "grad_norm": 0.5118208436371519, "learning_rate": 2.83779399837794e-05, "loss": 0.4495, "step": 50265 }, { "epoch": 1.4677012014423148, "grad_norm": 0.4959243921805045, "learning_rate": 2.8375236550419032e-05, "loss": 0.446, "step": 50270 }, { "epoch": 1.467847184712632, "grad_norm": 0.5335631531927483, "learning_rate": 2.8372533117058663e-05, "loss": 0.4535, "step": 50275 }, { "epoch": 1.4679931679829492, "grad_norm": 0.496999114540193, "learning_rate": 2.8369829683698297e-05, "loss": 0.4396, "step": 50280 }, { "epoch": 1.4681391512532664, "grad_norm": 0.4574179270449955, "learning_rate": 2.8367126250337934e-05, "loss": 0.4655, "step": 50285 }, { "epoch": 1.4682851345235837, "grad_norm": 0.4760313334785554, "learning_rate": 2.8364422816977565e-05, "loss": 0.4391, "step": 50290 }, { "epoch": 1.468431117793901, "grad_norm": 0.47791363294096856, "learning_rate": 2.8361719383617195e-05, "loss": 0.4666, "step": 50295 }, { "epoch": 1.4685771010642181, "grad_norm": 0.4764672062181465, "learning_rate": 2.835901595025683e-05, "loss": 0.4443, "step": 50300 }, { "epoch": 1.4687230843345351, "grad_norm": 0.49752851467895975, "learning_rate": 2.835631251689646e-05, "loss": 0.412, "step": 50305 }, { "epoch": 1.4688690676048526, "grad_norm": 0.49271443636122336, "learning_rate": 2.8353609083536094e-05, "loss": 0.4386, "step": 50310 }, { "epoch": 1.4690150508751696, "grad_norm": 0.5047253867234199, "learning_rate": 2.8350905650175724e-05, "loss": 0.4443, "step": 50315 }, { "epoch": 1.469161034145487, "grad_norm": 0.4803365651698704, "learning_rate": 2.8348202216815355e-05, "loss": 0.4496, "step": 50320 }, { "epoch": 1.469307017415804, "grad_norm": 0.4782401862283148, "learning_rate": 2.834549878345499e-05, "loss": 0.4243, "step": 50325 }, { "epoch": 1.4694530006861215, "grad_norm": 0.47944074749961474, "learning_rate": 2.834279535009462e-05, "loss": 0.4694, "step": 50330 }, { "epoch": 1.4695989839564385, "grad_norm": 0.49396757775682437, "learning_rate": 2.834009191673425e-05, "loss": 0.4681, "step": 50335 }, { "epoch": 1.4697449672267557, "grad_norm": 0.5267541307601528, "learning_rate": 2.8337388483373888e-05, "loss": 0.453, "step": 50340 }, { "epoch": 1.469890950497073, "grad_norm": 0.47157196561814335, "learning_rate": 2.8334685050013522e-05, "loss": 0.422, "step": 50345 }, { "epoch": 1.4700369337673902, "grad_norm": 0.5022750030911914, "learning_rate": 2.8331981616653152e-05, "loss": 0.4564, "step": 50350 }, { "epoch": 1.4701829170377074, "grad_norm": 0.4766039808238142, "learning_rate": 2.8329278183292783e-05, "loss": 0.4439, "step": 50355 }, { "epoch": 1.4703289003080247, "grad_norm": 0.48741114023472604, "learning_rate": 2.8326574749932417e-05, "loss": 0.463, "step": 50360 }, { "epoch": 1.4704748835783419, "grad_norm": 0.5179094758783911, "learning_rate": 2.8323871316572048e-05, "loss": 0.4432, "step": 50365 }, { "epoch": 1.4706208668486591, "grad_norm": 0.4342469593624239, "learning_rate": 2.832116788321168e-05, "loss": 0.4508, "step": 50370 }, { "epoch": 1.4707668501189763, "grad_norm": 0.4874741631916088, "learning_rate": 2.8318464449851312e-05, "loss": 0.44, "step": 50375 }, { "epoch": 1.4709128333892936, "grad_norm": 0.4900722706858197, "learning_rate": 2.8315761016490943e-05, "loss": 0.4621, "step": 50380 }, { "epoch": 1.4710588166596108, "grad_norm": 0.49362411775883014, "learning_rate": 2.8313057583130577e-05, "loss": 0.4519, "step": 50385 }, { "epoch": 1.471204799929928, "grad_norm": 0.49121594708670324, "learning_rate": 2.8310354149770208e-05, "loss": 0.4681, "step": 50390 }, { "epoch": 1.4713507832002453, "grad_norm": 0.47595138482240223, "learning_rate": 2.8307650716409838e-05, "loss": 0.452, "step": 50395 }, { "epoch": 1.4714967664705625, "grad_norm": 0.47467887612118076, "learning_rate": 2.8304947283049476e-05, "loss": 0.4269, "step": 50400 }, { "epoch": 1.4716427497408797, "grad_norm": 0.4563121094786914, "learning_rate": 2.830224384968911e-05, "loss": 0.4078, "step": 50405 }, { "epoch": 1.471788733011197, "grad_norm": 0.4643708378213718, "learning_rate": 2.829954041632874e-05, "loss": 0.4327, "step": 50410 }, { "epoch": 1.4719347162815142, "grad_norm": 0.4854505810378335, "learning_rate": 2.829683698296837e-05, "loss": 0.4539, "step": 50415 }, { "epoch": 1.4720806995518314, "grad_norm": 0.48962413520711434, "learning_rate": 2.8294133549608005e-05, "loss": 0.4787, "step": 50420 }, { "epoch": 1.4722266828221486, "grad_norm": 0.46567530622338266, "learning_rate": 2.8291430116247635e-05, "loss": 0.434, "step": 50425 }, { "epoch": 1.4723726660924659, "grad_norm": 0.4796246481571831, "learning_rate": 2.8288726682887266e-05, "loss": 0.4455, "step": 50430 }, { "epoch": 1.472518649362783, "grad_norm": 0.5029202316146494, "learning_rate": 2.82860232495269e-05, "loss": 0.4649, "step": 50435 }, { "epoch": 1.4726646326331003, "grad_norm": 0.4849096413265157, "learning_rate": 2.828331981616653e-05, "loss": 0.4472, "step": 50440 }, { "epoch": 1.4728106159034176, "grad_norm": 0.46126670338195797, "learning_rate": 2.8280616382806165e-05, "loss": 0.4426, "step": 50445 }, { "epoch": 1.4729565991737346, "grad_norm": 0.4548184860724687, "learning_rate": 2.8277912949445795e-05, "loss": 0.4577, "step": 50450 }, { "epoch": 1.473102582444052, "grad_norm": 0.4667500234186149, "learning_rate": 2.8275209516085433e-05, "loss": 0.4513, "step": 50455 }, { "epoch": 1.473248565714369, "grad_norm": 0.46367278880833307, "learning_rate": 2.8272506082725063e-05, "loss": 0.4344, "step": 50460 }, { "epoch": 1.4733945489846865, "grad_norm": 0.47548737547937764, "learning_rate": 2.8269802649364697e-05, "loss": 0.4439, "step": 50465 }, { "epoch": 1.4735405322550035, "grad_norm": 0.49118622833682135, "learning_rate": 2.8267099216004328e-05, "loss": 0.4658, "step": 50470 }, { "epoch": 1.473686515525321, "grad_norm": 0.4741455639935117, "learning_rate": 2.826439578264396e-05, "loss": 0.4673, "step": 50475 }, { "epoch": 1.473832498795638, "grad_norm": 0.5082985802082788, "learning_rate": 2.8261692349283593e-05, "loss": 0.4359, "step": 50480 }, { "epoch": 1.4739784820659552, "grad_norm": 0.48960803000573677, "learning_rate": 2.8258988915923223e-05, "loss": 0.4405, "step": 50485 }, { "epoch": 1.4741244653362724, "grad_norm": 0.4508608599439477, "learning_rate": 2.8256285482562854e-05, "loss": 0.4486, "step": 50490 }, { "epoch": 1.4742704486065896, "grad_norm": 0.47896001196466786, "learning_rate": 2.8253582049202488e-05, "loss": 0.436, "step": 50495 }, { "epoch": 1.4744164318769069, "grad_norm": 0.4557611180378843, "learning_rate": 2.825087861584212e-05, "loss": 0.4505, "step": 50500 }, { "epoch": 1.474562415147224, "grad_norm": 0.5078102368981299, "learning_rate": 2.8248175182481753e-05, "loss": 0.443, "step": 50505 }, { "epoch": 1.4747083984175413, "grad_norm": 0.4898427106999037, "learning_rate": 2.824547174912139e-05, "loss": 0.4488, "step": 50510 }, { "epoch": 1.4748543816878585, "grad_norm": 0.467701770644146, "learning_rate": 2.824276831576102e-05, "loss": 0.4351, "step": 50515 }, { "epoch": 1.4750003649581758, "grad_norm": 0.49012597736866076, "learning_rate": 2.824006488240065e-05, "loss": 0.4636, "step": 50520 }, { "epoch": 1.475146348228493, "grad_norm": 0.4780606443544282, "learning_rate": 2.8237361449040285e-05, "loss": 0.4509, "step": 50525 }, { "epoch": 1.4752923314988102, "grad_norm": 0.5221950627368898, "learning_rate": 2.8234658015679916e-05, "loss": 0.4446, "step": 50530 }, { "epoch": 1.4754383147691275, "grad_norm": 0.4641984470205739, "learning_rate": 2.8231954582319546e-05, "loss": 0.4615, "step": 50535 }, { "epoch": 1.4755842980394447, "grad_norm": 0.46270881829687427, "learning_rate": 2.822925114895918e-05, "loss": 0.4635, "step": 50540 }, { "epoch": 1.475730281309762, "grad_norm": 0.49112019523091344, "learning_rate": 2.822654771559881e-05, "loss": 0.4519, "step": 50545 }, { "epoch": 1.4758762645800791, "grad_norm": 0.4575849437103681, "learning_rate": 2.822384428223844e-05, "loss": 0.4421, "step": 50550 }, { "epoch": 1.4760222478503964, "grad_norm": 0.4918185040649932, "learning_rate": 2.8221140848878076e-05, "loss": 0.4454, "step": 50555 }, { "epoch": 1.4761682311207136, "grad_norm": 0.4893902253481344, "learning_rate": 2.8218437415517706e-05, "loss": 0.4733, "step": 50560 }, { "epoch": 1.4763142143910308, "grad_norm": 0.48716573688417203, "learning_rate": 2.8215733982157337e-05, "loss": 0.4511, "step": 50565 }, { "epoch": 1.476460197661348, "grad_norm": 0.46799362340081063, "learning_rate": 2.8213030548796974e-05, "loss": 0.4657, "step": 50570 }, { "epoch": 1.4766061809316653, "grad_norm": 0.44909777080349467, "learning_rate": 2.821032711543661e-05, "loss": 0.4282, "step": 50575 }, { "epoch": 1.4767521642019825, "grad_norm": 0.46725712457317875, "learning_rate": 2.820762368207624e-05, "loss": 0.45, "step": 50580 }, { "epoch": 1.4768981474722997, "grad_norm": 0.4231811858336979, "learning_rate": 2.8204920248715873e-05, "loss": 0.4316, "step": 50585 }, { "epoch": 1.477044130742617, "grad_norm": 0.4858260461368962, "learning_rate": 2.8202216815355504e-05, "loss": 0.4563, "step": 50590 }, { "epoch": 1.477190114012934, "grad_norm": 0.5132627209665573, "learning_rate": 2.8199513381995134e-05, "loss": 0.4624, "step": 50595 }, { "epoch": 1.4773360972832514, "grad_norm": 0.4669764912388512, "learning_rate": 2.8196809948634768e-05, "loss": 0.4803, "step": 50600 }, { "epoch": 1.4774820805535684, "grad_norm": 0.46349190739405494, "learning_rate": 2.81941065152744e-05, "loss": 0.4546, "step": 50605 }, { "epoch": 1.477628063823886, "grad_norm": 0.48706128111447533, "learning_rate": 2.819140308191403e-05, "loss": 0.4486, "step": 50610 }, { "epoch": 1.477774047094203, "grad_norm": 0.48456972425363554, "learning_rate": 2.8188699648553663e-05, "loss": 0.4759, "step": 50615 }, { "epoch": 1.4779200303645204, "grad_norm": 0.462600099059944, "learning_rate": 2.8185996215193294e-05, "loss": 0.4443, "step": 50620 }, { "epoch": 1.4780660136348374, "grad_norm": 0.4543469202218931, "learning_rate": 2.818329278183293e-05, "loss": 0.4439, "step": 50625 }, { "epoch": 1.4782119969051546, "grad_norm": 0.48120799820807214, "learning_rate": 2.8180589348472562e-05, "loss": 0.4369, "step": 50630 }, { "epoch": 1.4783579801754718, "grad_norm": 0.4820250812555812, "learning_rate": 2.8177885915112196e-05, "loss": 0.4733, "step": 50635 }, { "epoch": 1.478503963445789, "grad_norm": 0.4491366372654031, "learning_rate": 2.8175182481751827e-05, "loss": 0.4499, "step": 50640 }, { "epoch": 1.4786499467161063, "grad_norm": 0.5046954436866652, "learning_rate": 2.817247904839146e-05, "loss": 0.4496, "step": 50645 }, { "epoch": 1.4787959299864235, "grad_norm": 0.49243219706966207, "learning_rate": 2.816977561503109e-05, "loss": 0.4297, "step": 50650 }, { "epoch": 1.4789419132567407, "grad_norm": 0.47188263710964773, "learning_rate": 2.8167072181670722e-05, "loss": 0.4436, "step": 50655 }, { "epoch": 1.479087896527058, "grad_norm": 0.4544704542485561, "learning_rate": 2.8164368748310356e-05, "loss": 0.4482, "step": 50660 }, { "epoch": 1.4792338797973752, "grad_norm": 0.49293816363993725, "learning_rate": 2.8161665314949987e-05, "loss": 0.4536, "step": 50665 }, { "epoch": 1.4793798630676924, "grad_norm": 0.46165351603348503, "learning_rate": 2.8158961881589617e-05, "loss": 0.4343, "step": 50670 }, { "epoch": 1.4795258463380097, "grad_norm": 0.48884532089205174, "learning_rate": 2.815625844822925e-05, "loss": 0.4457, "step": 50675 }, { "epoch": 1.4796718296083269, "grad_norm": 0.4591267126816621, "learning_rate": 2.815355501486889e-05, "loss": 0.4438, "step": 50680 }, { "epoch": 1.4798178128786441, "grad_norm": 0.4741988692541909, "learning_rate": 2.815085158150852e-05, "loss": 0.4422, "step": 50685 }, { "epoch": 1.4799637961489613, "grad_norm": 0.45774882079930995, "learning_rate": 2.814814814814815e-05, "loss": 0.4571, "step": 50690 }, { "epoch": 1.4801097794192786, "grad_norm": 0.46496057378559685, "learning_rate": 2.8145444714787784e-05, "loss": 0.454, "step": 50695 }, { "epoch": 1.4802557626895958, "grad_norm": 0.4898279830141311, "learning_rate": 2.8142741281427415e-05, "loss": 0.4552, "step": 50700 }, { "epoch": 1.480401745959913, "grad_norm": 0.4434426524746165, "learning_rate": 2.8140037848067045e-05, "loss": 0.4577, "step": 50705 }, { "epoch": 1.4805477292302303, "grad_norm": 0.4928880072567003, "learning_rate": 2.813733441470668e-05, "loss": 0.4475, "step": 50710 }, { "epoch": 1.4806937125005475, "grad_norm": 0.5227985295945065, "learning_rate": 2.813463098134631e-05, "loss": 0.4632, "step": 50715 }, { "epoch": 1.4808396957708647, "grad_norm": 0.5175711920184262, "learning_rate": 2.8131927547985944e-05, "loss": 0.4408, "step": 50720 }, { "epoch": 1.480985679041182, "grad_norm": 0.47751617339253877, "learning_rate": 2.8129224114625574e-05, "loss": 0.465, "step": 50725 }, { "epoch": 1.4811316623114992, "grad_norm": 0.4508423296387418, "learning_rate": 2.8126520681265205e-05, "loss": 0.4348, "step": 50730 }, { "epoch": 1.4812776455818164, "grad_norm": 0.4962693942172858, "learning_rate": 2.812381724790484e-05, "loss": 0.4758, "step": 50735 }, { "epoch": 1.4814236288521334, "grad_norm": 0.4551592642786554, "learning_rate": 2.8121113814544477e-05, "loss": 0.4411, "step": 50740 }, { "epoch": 1.4815696121224509, "grad_norm": 0.4412081282624496, "learning_rate": 2.8118410381184107e-05, "loss": 0.4146, "step": 50745 }, { "epoch": 1.4817155953927679, "grad_norm": 0.5443248337218665, "learning_rate": 2.8115706947823738e-05, "loss": 0.4552, "step": 50750 }, { "epoch": 1.4818615786630853, "grad_norm": 0.4945212685926349, "learning_rate": 2.8113003514463372e-05, "loss": 0.4569, "step": 50755 }, { "epoch": 1.4820075619334023, "grad_norm": 0.4747334298267192, "learning_rate": 2.8110300081103002e-05, "loss": 0.4397, "step": 50760 }, { "epoch": 1.4821535452037198, "grad_norm": 0.4912783093511498, "learning_rate": 2.8107596647742633e-05, "loss": 0.4337, "step": 50765 }, { "epoch": 1.4822995284740368, "grad_norm": 0.41824137357971997, "learning_rate": 2.8104893214382267e-05, "loss": 0.4317, "step": 50770 }, { "epoch": 1.482445511744354, "grad_norm": 0.4701917901344871, "learning_rate": 2.8102189781021898e-05, "loss": 0.4621, "step": 50775 }, { "epoch": 1.4825914950146712, "grad_norm": 0.47499850664270443, "learning_rate": 2.809948634766153e-05, "loss": 0.4377, "step": 50780 }, { "epoch": 1.4827374782849885, "grad_norm": 0.4471038052790417, "learning_rate": 2.8096782914301162e-05, "loss": 0.447, "step": 50785 }, { "epoch": 1.4828834615553057, "grad_norm": 0.4761795410815011, "learning_rate": 2.8094079480940793e-05, "loss": 0.4645, "step": 50790 }, { "epoch": 1.483029444825623, "grad_norm": 0.4764376837620997, "learning_rate": 2.809137604758043e-05, "loss": 0.4636, "step": 50795 }, { "epoch": 1.4831754280959402, "grad_norm": 0.44077422562521296, "learning_rate": 2.8088672614220064e-05, "loss": 0.4366, "step": 50800 }, { "epoch": 1.4833214113662574, "grad_norm": 0.45493940241534503, "learning_rate": 2.8085969180859695e-05, "loss": 0.4514, "step": 50805 }, { "epoch": 1.4834673946365746, "grad_norm": 0.4896409578276411, "learning_rate": 2.8083265747499326e-05, "loss": 0.4583, "step": 50810 }, { "epoch": 1.4836133779068919, "grad_norm": 0.5051139920347588, "learning_rate": 2.808056231413896e-05, "loss": 0.4667, "step": 50815 }, { "epoch": 1.483759361177209, "grad_norm": 0.4791147407586507, "learning_rate": 2.807785888077859e-05, "loss": 0.4522, "step": 50820 }, { "epoch": 1.4839053444475263, "grad_norm": 0.5236699248961834, "learning_rate": 2.807515544741822e-05, "loss": 0.4421, "step": 50825 }, { "epoch": 1.4840513277178435, "grad_norm": 0.47448462093529103, "learning_rate": 2.8072452014057855e-05, "loss": 0.4177, "step": 50830 }, { "epoch": 1.4841973109881608, "grad_norm": 0.5330741785987965, "learning_rate": 2.8069748580697485e-05, "loss": 0.4643, "step": 50835 }, { "epoch": 1.484343294258478, "grad_norm": 0.4561043876218257, "learning_rate": 2.8067045147337116e-05, "loss": 0.4432, "step": 50840 }, { "epoch": 1.4844892775287952, "grad_norm": 0.5084419297283072, "learning_rate": 2.806434171397675e-05, "loss": 0.444, "step": 50845 }, { "epoch": 1.4846352607991125, "grad_norm": 0.47303015919597435, "learning_rate": 2.8061638280616387e-05, "loss": 0.4452, "step": 50850 }, { "epoch": 1.4847812440694297, "grad_norm": 0.47192264556059044, "learning_rate": 2.8058934847256018e-05, "loss": 0.4476, "step": 50855 }, { "epoch": 1.484927227339747, "grad_norm": 0.5054971199219419, "learning_rate": 2.8056231413895652e-05, "loss": 0.4704, "step": 50860 }, { "epoch": 1.4850732106100641, "grad_norm": 0.4975350721168739, "learning_rate": 2.8053527980535283e-05, "loss": 0.4616, "step": 50865 }, { "epoch": 1.4852191938803814, "grad_norm": 0.4785829260154385, "learning_rate": 2.8050824547174913e-05, "loss": 0.4284, "step": 50870 }, { "epoch": 1.4853651771506986, "grad_norm": 0.46249783484826296, "learning_rate": 2.8048121113814547e-05, "loss": 0.4424, "step": 50875 }, { "epoch": 1.4855111604210158, "grad_norm": 0.5171881845005495, "learning_rate": 2.8045417680454178e-05, "loss": 0.4457, "step": 50880 }, { "epoch": 1.4856571436913328, "grad_norm": 0.4886927169394983, "learning_rate": 2.804271424709381e-05, "loss": 0.4553, "step": 50885 }, { "epoch": 1.4858031269616503, "grad_norm": 0.42998518080308046, "learning_rate": 2.8040010813733443e-05, "loss": 0.4245, "step": 50890 }, { "epoch": 1.4859491102319673, "grad_norm": 0.46963686403113275, "learning_rate": 2.8037307380373073e-05, "loss": 0.4232, "step": 50895 }, { "epoch": 1.4860950935022847, "grad_norm": 0.4489925810967602, "learning_rate": 2.8034603947012704e-05, "loss": 0.4294, "step": 50900 }, { "epoch": 1.4862410767726018, "grad_norm": 0.504679597757789, "learning_rate": 2.8031900513652338e-05, "loss": 0.4704, "step": 50905 }, { "epoch": 1.4863870600429192, "grad_norm": 0.4594000471163355, "learning_rate": 2.8029197080291975e-05, "loss": 0.4389, "step": 50910 }, { "epoch": 1.4865330433132362, "grad_norm": 0.469249202443518, "learning_rate": 2.8026493646931606e-05, "loss": 0.4602, "step": 50915 }, { "epoch": 1.4866790265835534, "grad_norm": 0.5272059698739648, "learning_rate": 2.802379021357124e-05, "loss": 0.4766, "step": 50920 }, { "epoch": 1.4868250098538707, "grad_norm": 0.5297749766250645, "learning_rate": 2.802108678021087e-05, "loss": 0.4303, "step": 50925 }, { "epoch": 1.486970993124188, "grad_norm": 0.48427842110496094, "learning_rate": 2.80183833468505e-05, "loss": 0.4621, "step": 50930 }, { "epoch": 1.4871169763945051, "grad_norm": 0.44262900110868514, "learning_rate": 2.8015679913490135e-05, "loss": 0.4111, "step": 50935 }, { "epoch": 1.4872629596648224, "grad_norm": 0.4572315037158865, "learning_rate": 2.8012976480129766e-05, "loss": 0.4551, "step": 50940 }, { "epoch": 1.4874089429351396, "grad_norm": 0.47119593462813797, "learning_rate": 2.8010273046769396e-05, "loss": 0.4253, "step": 50945 }, { "epoch": 1.4875549262054568, "grad_norm": 0.4697823143263162, "learning_rate": 2.800756961340903e-05, "loss": 0.4566, "step": 50950 }, { "epoch": 1.487700909475774, "grad_norm": 0.45871646065625626, "learning_rate": 2.800486618004866e-05, "loss": 0.458, "step": 50955 }, { "epoch": 1.4878468927460913, "grad_norm": 0.4983961728858193, "learning_rate": 2.800216274668829e-05, "loss": 0.4456, "step": 50960 }, { "epoch": 1.4879928760164085, "grad_norm": 0.45467833868317015, "learning_rate": 2.799945931332793e-05, "loss": 0.4177, "step": 50965 }, { "epoch": 1.4881388592867257, "grad_norm": 0.4372426098766042, "learning_rate": 2.7996755879967563e-05, "loss": 0.4265, "step": 50970 }, { "epoch": 1.488284842557043, "grad_norm": 0.4590147010666535, "learning_rate": 2.7994052446607194e-05, "loss": 0.456, "step": 50975 }, { "epoch": 1.4884308258273602, "grad_norm": 0.5101708598449883, "learning_rate": 2.7991349013246824e-05, "loss": 0.4335, "step": 50980 }, { "epoch": 1.4885768090976774, "grad_norm": 0.5081386579427453, "learning_rate": 2.798864557988646e-05, "loss": 0.4442, "step": 50985 }, { "epoch": 1.4887227923679947, "grad_norm": 0.5694979792826645, "learning_rate": 2.798594214652609e-05, "loss": 0.4537, "step": 50990 }, { "epoch": 1.4888687756383119, "grad_norm": 0.4475780809027783, "learning_rate": 2.7983238713165723e-05, "loss": 0.4464, "step": 50995 }, { "epoch": 1.489014758908629, "grad_norm": 0.4861890511181923, "learning_rate": 2.7980535279805354e-05, "loss": 0.4403, "step": 51000 }, { "epoch": 1.4891607421789463, "grad_norm": 0.49460639108672383, "learning_rate": 2.7977831846444984e-05, "loss": 0.4662, "step": 51005 }, { "epoch": 1.4893067254492636, "grad_norm": 0.46940285876145227, "learning_rate": 2.7975128413084618e-05, "loss": 0.4465, "step": 51010 }, { "epoch": 1.4894527087195808, "grad_norm": 0.45855067103288766, "learning_rate": 2.797242497972425e-05, "loss": 0.4417, "step": 51015 }, { "epoch": 1.489598691989898, "grad_norm": 0.4494589282664238, "learning_rate": 2.7969721546363886e-05, "loss": 0.4364, "step": 51020 }, { "epoch": 1.4897446752602153, "grad_norm": 0.49986569056792046, "learning_rate": 2.7967018113003517e-05, "loss": 0.4619, "step": 51025 }, { "epoch": 1.4898906585305323, "grad_norm": 0.48273483003214324, "learning_rate": 2.796431467964315e-05, "loss": 0.4594, "step": 51030 }, { "epoch": 1.4900366418008497, "grad_norm": 0.5044799746831241, "learning_rate": 2.796161124628278e-05, "loss": 0.4481, "step": 51035 }, { "epoch": 1.4901826250711667, "grad_norm": 0.4115112114728249, "learning_rate": 2.7958907812922412e-05, "loss": 0.4314, "step": 51040 }, { "epoch": 1.4903286083414842, "grad_norm": 0.48774407388878943, "learning_rate": 2.7956204379562046e-05, "loss": 0.49, "step": 51045 }, { "epoch": 1.4904745916118012, "grad_norm": 0.4782208108083093, "learning_rate": 2.7953500946201677e-05, "loss": 0.4288, "step": 51050 }, { "epoch": 1.4906205748821186, "grad_norm": 0.48687910501137327, "learning_rate": 2.7950797512841307e-05, "loss": 0.4554, "step": 51055 }, { "epoch": 1.4907665581524356, "grad_norm": 0.457075938043837, "learning_rate": 2.794809407948094e-05, "loss": 0.4473, "step": 51060 }, { "epoch": 1.490912541422753, "grad_norm": 0.5440722623045561, "learning_rate": 2.7945390646120572e-05, "loss": 0.4679, "step": 51065 }, { "epoch": 1.49105852469307, "grad_norm": 0.5051229280121259, "learning_rate": 2.7942687212760206e-05, "loss": 0.4287, "step": 51070 }, { "epoch": 1.4912045079633873, "grad_norm": 0.48162086394668036, "learning_rate": 2.7939983779399837e-05, "loss": 0.4393, "step": 51075 }, { "epoch": 1.4913504912337046, "grad_norm": 0.4533745827638275, "learning_rate": 2.7937280346039474e-05, "loss": 0.435, "step": 51080 }, { "epoch": 1.4914964745040218, "grad_norm": 0.468664206982917, "learning_rate": 2.7934576912679105e-05, "loss": 0.4397, "step": 51085 }, { "epoch": 1.491642457774339, "grad_norm": 0.47299485109062994, "learning_rate": 2.793187347931874e-05, "loss": 0.4412, "step": 51090 }, { "epoch": 1.4917884410446562, "grad_norm": 0.4882315617593812, "learning_rate": 2.792917004595837e-05, "loss": 0.4378, "step": 51095 }, { "epoch": 1.4919344243149735, "grad_norm": 0.4633794667761386, "learning_rate": 2.7926466612598e-05, "loss": 0.4319, "step": 51100 }, { "epoch": 1.4920804075852907, "grad_norm": 0.4657374529210538, "learning_rate": 2.7923763179237634e-05, "loss": 0.4338, "step": 51105 }, { "epoch": 1.492226390855608, "grad_norm": 0.46188932510082437, "learning_rate": 2.7921059745877265e-05, "loss": 0.4251, "step": 51110 }, { "epoch": 1.4923723741259252, "grad_norm": 0.4658281194780679, "learning_rate": 2.7918356312516895e-05, "loss": 0.4393, "step": 51115 }, { "epoch": 1.4925183573962424, "grad_norm": 0.4658746925042622, "learning_rate": 2.791565287915653e-05, "loss": 0.4429, "step": 51120 }, { "epoch": 1.4926643406665596, "grad_norm": 0.4495650359212601, "learning_rate": 2.791294944579616e-05, "loss": 0.4288, "step": 51125 }, { "epoch": 1.4928103239368768, "grad_norm": 0.5021797025043597, "learning_rate": 2.7910246012435794e-05, "loss": 0.4469, "step": 51130 }, { "epoch": 1.492956307207194, "grad_norm": 0.4887881285102812, "learning_rate": 2.790754257907543e-05, "loss": 0.4403, "step": 51135 }, { "epoch": 1.4931022904775113, "grad_norm": 0.4954576399998077, "learning_rate": 2.7904839145715062e-05, "loss": 0.4379, "step": 51140 }, { "epoch": 1.4932482737478285, "grad_norm": 0.4265951989709882, "learning_rate": 2.7902135712354693e-05, "loss": 0.4146, "step": 51145 }, { "epoch": 1.4933942570181458, "grad_norm": 0.48301347310654535, "learning_rate": 2.7899432278994327e-05, "loss": 0.4721, "step": 51150 }, { "epoch": 1.493540240288463, "grad_norm": 0.4462140729507215, "learning_rate": 2.7896728845633957e-05, "loss": 0.4295, "step": 51155 }, { "epoch": 1.4936862235587802, "grad_norm": 0.47069267601735165, "learning_rate": 2.7894025412273588e-05, "loss": 0.4413, "step": 51160 }, { "epoch": 1.4938322068290975, "grad_norm": 0.48715669162105524, "learning_rate": 2.7891321978913222e-05, "loss": 0.4704, "step": 51165 }, { "epoch": 1.4939781900994147, "grad_norm": 0.4823581482305352, "learning_rate": 2.7888618545552852e-05, "loss": 0.4294, "step": 51170 }, { "epoch": 1.494124173369732, "grad_norm": 0.49693196523490835, "learning_rate": 2.7885915112192483e-05, "loss": 0.4707, "step": 51175 }, { "epoch": 1.4942701566400491, "grad_norm": 0.4899515719132146, "learning_rate": 2.7883211678832117e-05, "loss": 0.4379, "step": 51180 }, { "epoch": 1.4944161399103661, "grad_norm": 0.4716628931951304, "learning_rate": 2.7880508245471748e-05, "loss": 0.4093, "step": 51185 }, { "epoch": 1.4945621231806836, "grad_norm": 0.541784877907182, "learning_rate": 2.7877804812111385e-05, "loss": 0.4274, "step": 51190 }, { "epoch": 1.4947081064510006, "grad_norm": 0.46452405838190036, "learning_rate": 2.7875101378751016e-05, "loss": 0.424, "step": 51195 }, { "epoch": 1.494854089721318, "grad_norm": 0.46815738421582903, "learning_rate": 2.787239794539065e-05, "loss": 0.4375, "step": 51200 }, { "epoch": 1.495000072991635, "grad_norm": 0.44820785174191824, "learning_rate": 2.786969451203028e-05, "loss": 0.4461, "step": 51205 }, { "epoch": 1.4951460562619525, "grad_norm": 0.44364365593732447, "learning_rate": 2.7866991078669914e-05, "loss": 0.456, "step": 51210 }, { "epoch": 1.4952920395322695, "grad_norm": 0.47176934812987775, "learning_rate": 2.7864287645309545e-05, "loss": 0.4625, "step": 51215 }, { "epoch": 1.4954380228025868, "grad_norm": 0.49550631341174267, "learning_rate": 2.7861584211949176e-05, "loss": 0.4159, "step": 51220 }, { "epoch": 1.495584006072904, "grad_norm": 0.4711901794545559, "learning_rate": 2.785888077858881e-05, "loss": 0.4667, "step": 51225 }, { "epoch": 1.4957299893432212, "grad_norm": 0.4878638374719147, "learning_rate": 2.785617734522844e-05, "loss": 0.4671, "step": 51230 }, { "epoch": 1.4958759726135384, "grad_norm": 0.46612125981313784, "learning_rate": 2.785347391186807e-05, "loss": 0.4204, "step": 51235 }, { "epoch": 1.4960219558838557, "grad_norm": 0.4365240540930781, "learning_rate": 2.7850770478507705e-05, "loss": 0.4336, "step": 51240 }, { "epoch": 1.496167939154173, "grad_norm": 0.4523760228777022, "learning_rate": 2.7848067045147335e-05, "loss": 0.4126, "step": 51245 }, { "epoch": 1.4963139224244901, "grad_norm": 0.4695237330464759, "learning_rate": 2.7845363611786973e-05, "loss": 0.4573, "step": 51250 }, { "epoch": 1.4964599056948074, "grad_norm": 0.4511122336185062, "learning_rate": 2.7842660178426603e-05, "loss": 0.4397, "step": 51255 }, { "epoch": 1.4966058889651246, "grad_norm": 0.4677201705298053, "learning_rate": 2.7839956745066238e-05, "loss": 0.4405, "step": 51260 }, { "epoch": 1.4967518722354418, "grad_norm": 0.47884940494681405, "learning_rate": 2.7837253311705868e-05, "loss": 0.4432, "step": 51265 }, { "epoch": 1.496897855505759, "grad_norm": 0.4954749326031134, "learning_rate": 2.7834549878345502e-05, "loss": 0.4441, "step": 51270 }, { "epoch": 1.4970438387760763, "grad_norm": 0.47217342116542055, "learning_rate": 2.7831846444985133e-05, "loss": 0.4332, "step": 51275 }, { "epoch": 1.4971898220463935, "grad_norm": 0.4173799529943908, "learning_rate": 2.7829143011624763e-05, "loss": 0.3978, "step": 51280 }, { "epoch": 1.4973358053167107, "grad_norm": 0.44206092342950737, "learning_rate": 2.7826439578264397e-05, "loss": 0.4273, "step": 51285 }, { "epoch": 1.497481788587028, "grad_norm": 0.46753268356753697, "learning_rate": 2.7823736144904028e-05, "loss": 0.4273, "step": 51290 }, { "epoch": 1.4976277718573452, "grad_norm": 0.4944839407940898, "learning_rate": 2.782103271154366e-05, "loss": 0.454, "step": 51295 }, { "epoch": 1.4977737551276624, "grad_norm": 0.4176845168843318, "learning_rate": 2.7818329278183293e-05, "loss": 0.4133, "step": 51300 }, { "epoch": 1.4979197383979796, "grad_norm": 0.4928149721497877, "learning_rate": 2.781562584482293e-05, "loss": 0.4278, "step": 51305 }, { "epoch": 1.4980657216682969, "grad_norm": 0.48548591949375725, "learning_rate": 2.781292241146256e-05, "loss": 0.4574, "step": 51310 }, { "epoch": 1.498211704938614, "grad_norm": 0.48197333619116733, "learning_rate": 2.781021897810219e-05, "loss": 0.4552, "step": 51315 }, { "epoch": 1.4983576882089313, "grad_norm": 0.5069687788930559, "learning_rate": 2.7807515544741825e-05, "loss": 0.4357, "step": 51320 }, { "epoch": 1.4985036714792486, "grad_norm": 0.4682656142418837, "learning_rate": 2.7804812111381456e-05, "loss": 0.4758, "step": 51325 }, { "epoch": 1.4986496547495656, "grad_norm": 0.517861284933087, "learning_rate": 2.7802108678021087e-05, "loss": 0.4378, "step": 51330 }, { "epoch": 1.498795638019883, "grad_norm": 0.4628812711359839, "learning_rate": 2.779940524466072e-05, "loss": 0.4159, "step": 51335 }, { "epoch": 1.4989416212902, "grad_norm": 0.4743178943708515, "learning_rate": 2.779670181130035e-05, "loss": 0.4682, "step": 51340 }, { "epoch": 1.4990876045605175, "grad_norm": 0.4781380046351853, "learning_rate": 2.7793998377939985e-05, "loss": 0.4336, "step": 51345 }, { "epoch": 1.4992335878308345, "grad_norm": 0.4343595835046883, "learning_rate": 2.7791294944579616e-05, "loss": 0.4215, "step": 51350 }, { "epoch": 1.499379571101152, "grad_norm": 0.5153120859276721, "learning_rate": 2.7788591511219246e-05, "loss": 0.4447, "step": 51355 }, { "epoch": 1.499525554371469, "grad_norm": 0.48069035977463687, "learning_rate": 2.7785888077858884e-05, "loss": 0.4218, "step": 51360 }, { "epoch": 1.4996715376417862, "grad_norm": 0.4679290501375135, "learning_rate": 2.7783184644498518e-05, "loss": 0.474, "step": 51365 }, { "epoch": 1.4998175209121034, "grad_norm": 0.5622616043893621, "learning_rate": 2.778048121113815e-05, "loss": 0.4735, "step": 51370 }, { "epoch": 1.4999635041824206, "grad_norm": 0.5197583653213929, "learning_rate": 2.777777777777778e-05, "loss": 0.4569, "step": 51375 }, { "epoch": 1.5001094874527379, "grad_norm": 0.4982928048363698, "learning_rate": 2.7775074344417413e-05, "loss": 0.4414, "step": 51380 }, { "epoch": 1.500255470723055, "grad_norm": 0.5121446690279012, "learning_rate": 2.7772370911057044e-05, "loss": 0.4423, "step": 51385 }, { "epoch": 1.5004014539933723, "grad_norm": 0.4837480256503168, "learning_rate": 2.7769667477696674e-05, "loss": 0.4101, "step": 51390 }, { "epoch": 1.5005474372636896, "grad_norm": 0.47082043432498294, "learning_rate": 2.776696404433631e-05, "loss": 0.4515, "step": 51395 }, { "epoch": 1.5006934205340068, "grad_norm": 0.47351262438794145, "learning_rate": 2.776426061097594e-05, "loss": 0.4462, "step": 51400 }, { "epoch": 1.500839403804324, "grad_norm": 0.519942741413186, "learning_rate": 2.7761557177615573e-05, "loss": 0.4685, "step": 51405 }, { "epoch": 1.5009853870746412, "grad_norm": 0.48305468108405913, "learning_rate": 2.7758853744255204e-05, "loss": 0.4261, "step": 51410 }, { "epoch": 1.5011313703449585, "grad_norm": 0.5244789669604478, "learning_rate": 2.7756150310894834e-05, "loss": 0.4607, "step": 51415 }, { "epoch": 1.5012773536152757, "grad_norm": 0.4445544086614769, "learning_rate": 2.775344687753447e-05, "loss": 0.4453, "step": 51420 }, { "epoch": 1.501423336885593, "grad_norm": 0.47186303802329055, "learning_rate": 2.7750743444174106e-05, "loss": 0.4491, "step": 51425 }, { "epoch": 1.5015693201559102, "grad_norm": 0.5212756754999087, "learning_rate": 2.7748040010813736e-05, "loss": 0.4738, "step": 51430 }, { "epoch": 1.5017153034262274, "grad_norm": 0.46789761585487794, "learning_rate": 2.7745336577453367e-05, "loss": 0.4255, "step": 51435 }, { "epoch": 1.5018612866965446, "grad_norm": 0.49501643245552346, "learning_rate": 2.7742633144093e-05, "loss": 0.4372, "step": 51440 }, { "epoch": 1.5020072699668618, "grad_norm": 0.48965367715289876, "learning_rate": 2.773992971073263e-05, "loss": 0.4633, "step": 51445 }, { "epoch": 1.502153253237179, "grad_norm": 0.4826241701282221, "learning_rate": 2.7737226277372262e-05, "loss": 0.4483, "step": 51450 }, { "epoch": 1.502299236507496, "grad_norm": 0.45288803355591967, "learning_rate": 2.7734522844011896e-05, "loss": 0.4124, "step": 51455 }, { "epoch": 1.5024452197778135, "grad_norm": 0.48666764898876547, "learning_rate": 2.7731819410651527e-05, "loss": 0.4713, "step": 51460 }, { "epoch": 1.5025912030481305, "grad_norm": 0.5061540804845774, "learning_rate": 2.7729115977291157e-05, "loss": 0.4512, "step": 51465 }, { "epoch": 1.502737186318448, "grad_norm": 0.4635604513303264, "learning_rate": 2.772641254393079e-05, "loss": 0.4527, "step": 51470 }, { "epoch": 1.502883169588765, "grad_norm": 0.5243367100189004, "learning_rate": 2.772370911057043e-05, "loss": 0.4671, "step": 51475 }, { "epoch": 1.5030291528590825, "grad_norm": 0.45048911580337614, "learning_rate": 2.772100567721006e-05, "loss": 0.4166, "step": 51480 }, { "epoch": 1.5031751361293995, "grad_norm": 0.46071099087142625, "learning_rate": 2.7718302243849693e-05, "loss": 0.4394, "step": 51485 }, { "epoch": 1.503321119399717, "grad_norm": 0.5284436359959389, "learning_rate": 2.7715598810489324e-05, "loss": 0.4554, "step": 51490 }, { "epoch": 1.503467102670034, "grad_norm": 0.47419955874394687, "learning_rate": 2.7712895377128955e-05, "loss": 0.4622, "step": 51495 }, { "epoch": 1.5036130859403514, "grad_norm": 0.5019933912114475, "learning_rate": 2.771019194376859e-05, "loss": 0.4747, "step": 51500 }, { "epoch": 1.5037590692106684, "grad_norm": 0.4715007296923691, "learning_rate": 2.770748851040822e-05, "loss": 0.4394, "step": 51505 }, { "epoch": 1.5039050524809858, "grad_norm": 0.48223765529972473, "learning_rate": 2.770478507704785e-05, "loss": 0.4355, "step": 51510 }, { "epoch": 1.5040510357513028, "grad_norm": 0.46413051183447096, "learning_rate": 2.7702081643687484e-05, "loss": 0.4326, "step": 51515 }, { "epoch": 1.5041970190216203, "grad_norm": 0.4608366140768776, "learning_rate": 2.7699378210327115e-05, "loss": 0.4414, "step": 51520 }, { "epoch": 1.5043430022919373, "grad_norm": 1.7488927924242663, "learning_rate": 2.7696674776966745e-05, "loss": 0.4438, "step": 51525 }, { "epoch": 1.5044889855622545, "grad_norm": 0.48831285600232777, "learning_rate": 2.7693971343606383e-05, "loss": 0.4341, "step": 51530 }, { "epoch": 1.5046349688325718, "grad_norm": 0.45103749157415707, "learning_rate": 2.7691267910246017e-05, "loss": 0.4531, "step": 51535 }, { "epoch": 1.504780952102889, "grad_norm": 0.44670287861466856, "learning_rate": 2.7688564476885647e-05, "loss": 0.4658, "step": 51540 }, { "epoch": 1.5049269353732062, "grad_norm": 0.4709898060452153, "learning_rate": 2.768586104352528e-05, "loss": 0.4301, "step": 51545 }, { "epoch": 1.5050729186435234, "grad_norm": 0.4527417449610704, "learning_rate": 2.7683157610164912e-05, "loss": 0.4271, "step": 51550 }, { "epoch": 1.5052189019138407, "grad_norm": 0.5219446216199715, "learning_rate": 2.7680454176804543e-05, "loss": 0.4647, "step": 51555 }, { "epoch": 1.505364885184158, "grad_norm": 0.4970006012932983, "learning_rate": 2.7677750743444177e-05, "loss": 0.4572, "step": 51560 }, { "epoch": 1.5055108684544751, "grad_norm": 0.47346247752698717, "learning_rate": 2.7675047310083807e-05, "loss": 0.4541, "step": 51565 }, { "epoch": 1.5056568517247924, "grad_norm": 0.46000136577724715, "learning_rate": 2.7672343876723438e-05, "loss": 0.4368, "step": 51570 }, { "epoch": 1.5058028349951096, "grad_norm": 0.436645081983147, "learning_rate": 2.7669640443363072e-05, "loss": 0.4361, "step": 51575 }, { "epoch": 1.5059488182654268, "grad_norm": 0.4948351089449958, "learning_rate": 2.7666937010002702e-05, "loss": 0.4573, "step": 51580 }, { "epoch": 1.506094801535744, "grad_norm": 0.44806413037211973, "learning_rate": 2.7664233576642333e-05, "loss": 0.4828, "step": 51585 }, { "epoch": 1.5062407848060613, "grad_norm": 0.4691738543083584, "learning_rate": 2.766153014328197e-05, "loss": 0.4292, "step": 51590 }, { "epoch": 1.5063867680763785, "grad_norm": 0.4959933742244982, "learning_rate": 2.7658826709921604e-05, "loss": 0.4514, "step": 51595 }, { "epoch": 1.5065327513466955, "grad_norm": 0.4964381177640824, "learning_rate": 2.7656123276561235e-05, "loss": 0.4304, "step": 51600 }, { "epoch": 1.506678734617013, "grad_norm": 0.5253907906639672, "learning_rate": 2.7653419843200866e-05, "loss": 0.4403, "step": 51605 }, { "epoch": 1.50682471788733, "grad_norm": 0.4895685635022199, "learning_rate": 2.76507164098405e-05, "loss": 0.4613, "step": 51610 }, { "epoch": 1.5069707011576474, "grad_norm": 0.48018946344757835, "learning_rate": 2.764801297648013e-05, "loss": 0.4358, "step": 51615 }, { "epoch": 1.5071166844279644, "grad_norm": 0.46407980371359575, "learning_rate": 2.7645309543119764e-05, "loss": 0.4278, "step": 51620 }, { "epoch": 1.5072626676982819, "grad_norm": 0.4826904572778277, "learning_rate": 2.7642606109759395e-05, "loss": 0.4631, "step": 51625 }, { "epoch": 1.5074086509685989, "grad_norm": 0.4496941913170047, "learning_rate": 2.7639902676399026e-05, "loss": 0.4467, "step": 51630 }, { "epoch": 1.5075546342389163, "grad_norm": 0.47561688499764837, "learning_rate": 2.763719924303866e-05, "loss": 0.4201, "step": 51635 }, { "epoch": 1.5077006175092333, "grad_norm": 0.539599941850428, "learning_rate": 2.763449580967829e-05, "loss": 0.4585, "step": 51640 }, { "epoch": 1.5078466007795508, "grad_norm": 0.4713195835271102, "learning_rate": 2.7631792376317928e-05, "loss": 0.4508, "step": 51645 }, { "epoch": 1.5079925840498678, "grad_norm": 0.4741283055309813, "learning_rate": 2.7629088942957558e-05, "loss": 0.4317, "step": 51650 }, { "epoch": 1.5081385673201853, "grad_norm": 0.5008098712411938, "learning_rate": 2.7626385509597192e-05, "loss": 0.4555, "step": 51655 }, { "epoch": 1.5082845505905023, "grad_norm": 0.5033231027290531, "learning_rate": 2.7623682076236823e-05, "loss": 0.4506, "step": 51660 }, { "epoch": 1.5084305338608197, "grad_norm": 0.4836995482336337, "learning_rate": 2.7620978642876454e-05, "loss": 0.448, "step": 51665 }, { "epoch": 1.5085765171311367, "grad_norm": 0.4811602656498828, "learning_rate": 2.7618275209516088e-05, "loss": 0.4355, "step": 51670 }, { "epoch": 1.508722500401454, "grad_norm": 0.5150757596531084, "learning_rate": 2.7615571776155718e-05, "loss": 0.4808, "step": 51675 }, { "epoch": 1.5088684836717712, "grad_norm": 0.43278990756216434, "learning_rate": 2.7612868342795352e-05, "loss": 0.424, "step": 51680 }, { "epoch": 1.5090144669420884, "grad_norm": 0.4567192022254889, "learning_rate": 2.7610164909434983e-05, "loss": 0.4403, "step": 51685 }, { "epoch": 1.5091604502124056, "grad_norm": 0.49120158614420073, "learning_rate": 2.7607461476074613e-05, "loss": 0.4436, "step": 51690 }, { "epoch": 1.5093064334827229, "grad_norm": 0.4879241429421243, "learning_rate": 2.7604758042714247e-05, "loss": 0.4349, "step": 51695 }, { "epoch": 1.50945241675304, "grad_norm": 0.45861998427753087, "learning_rate": 2.7602054609353885e-05, "loss": 0.469, "step": 51700 }, { "epoch": 1.5095984000233573, "grad_norm": 0.49177192498886363, "learning_rate": 2.7599351175993515e-05, "loss": 0.4479, "step": 51705 }, { "epoch": 1.5097443832936746, "grad_norm": 0.4545179068261489, "learning_rate": 2.7596647742633146e-05, "loss": 0.4556, "step": 51710 }, { "epoch": 1.5098903665639918, "grad_norm": 0.48445803901281365, "learning_rate": 2.759394430927278e-05, "loss": 0.4318, "step": 51715 }, { "epoch": 1.510036349834309, "grad_norm": 0.44925703662052996, "learning_rate": 2.759124087591241e-05, "loss": 0.4112, "step": 51720 }, { "epoch": 1.5101823331046262, "grad_norm": 0.5037515911976077, "learning_rate": 2.758853744255204e-05, "loss": 0.4352, "step": 51725 }, { "epoch": 1.5103283163749435, "grad_norm": 0.46194165424193645, "learning_rate": 2.7585834009191675e-05, "loss": 0.4355, "step": 51730 }, { "epoch": 1.5104742996452607, "grad_norm": 0.4454813067233519, "learning_rate": 2.7583130575831306e-05, "loss": 0.4695, "step": 51735 }, { "epoch": 1.510620282915578, "grad_norm": 0.46291873133197337, "learning_rate": 2.7580427142470937e-05, "loss": 0.4345, "step": 51740 }, { "epoch": 1.510766266185895, "grad_norm": 0.47220989326105706, "learning_rate": 2.757772370911057e-05, "loss": 0.4238, "step": 51745 }, { "epoch": 1.5109122494562124, "grad_norm": 0.48129237077435294, "learning_rate": 2.75750202757502e-05, "loss": 0.4276, "step": 51750 }, { "epoch": 1.5110582327265294, "grad_norm": 0.5150445508816592, "learning_rate": 2.7572316842389835e-05, "loss": 0.46, "step": 51755 }, { "epoch": 1.5112042159968468, "grad_norm": 0.4520548972235378, "learning_rate": 2.7569613409029473e-05, "loss": 0.4402, "step": 51760 }, { "epoch": 1.5113501992671639, "grad_norm": 0.46229946871945243, "learning_rate": 2.7566909975669103e-05, "loss": 0.4357, "step": 51765 }, { "epoch": 1.5114961825374813, "grad_norm": 0.5128930220633047, "learning_rate": 2.7564206542308734e-05, "loss": 0.4536, "step": 51770 }, { "epoch": 1.5116421658077983, "grad_norm": 0.5079676242489186, "learning_rate": 2.7561503108948368e-05, "loss": 0.4748, "step": 51775 }, { "epoch": 1.5117881490781158, "grad_norm": 0.4694872353047548, "learning_rate": 2.7558799675588e-05, "loss": 0.4285, "step": 51780 }, { "epoch": 1.5119341323484328, "grad_norm": 0.48949574591706474, "learning_rate": 2.755609624222763e-05, "loss": 0.4534, "step": 51785 }, { "epoch": 1.5120801156187502, "grad_norm": 0.4499953651783175, "learning_rate": 2.7553392808867263e-05, "loss": 0.4331, "step": 51790 }, { "epoch": 1.5122260988890672, "grad_norm": 0.509903564671674, "learning_rate": 2.7550689375506894e-05, "loss": 0.4346, "step": 51795 }, { "epoch": 1.5123720821593847, "grad_norm": 0.4746975902170162, "learning_rate": 2.7547985942146524e-05, "loss": 0.4491, "step": 51800 }, { "epoch": 1.5125180654297017, "grad_norm": 0.46885733604654406, "learning_rate": 2.754528250878616e-05, "loss": 0.4368, "step": 51805 }, { "epoch": 1.5126640487000191, "grad_norm": 0.5063413202651288, "learning_rate": 2.754257907542579e-05, "loss": 0.4454, "step": 51810 }, { "epoch": 1.5128100319703361, "grad_norm": 0.5228894383209761, "learning_rate": 2.7539875642065426e-05, "loss": 0.4685, "step": 51815 }, { "epoch": 1.5129560152406534, "grad_norm": 0.4821741237702595, "learning_rate": 2.753717220870506e-05, "loss": 0.452, "step": 51820 }, { "epoch": 1.5131019985109706, "grad_norm": 0.4801084208192356, "learning_rate": 2.753446877534469e-05, "loss": 0.4451, "step": 51825 }, { "epoch": 1.5132479817812878, "grad_norm": 0.4325221059732936, "learning_rate": 2.753176534198432e-05, "loss": 0.4528, "step": 51830 }, { "epoch": 1.513393965051605, "grad_norm": 0.4504369012184059, "learning_rate": 2.7529061908623956e-05, "loss": 0.4359, "step": 51835 }, { "epoch": 1.5135399483219223, "grad_norm": 0.48382858980340987, "learning_rate": 2.7526358475263586e-05, "loss": 0.4655, "step": 51840 }, { "epoch": 1.5136859315922395, "grad_norm": 0.5496433797480581, "learning_rate": 2.7523655041903217e-05, "loss": 0.4648, "step": 51845 }, { "epoch": 1.5138319148625567, "grad_norm": 0.49860382467212894, "learning_rate": 2.752095160854285e-05, "loss": 0.446, "step": 51850 }, { "epoch": 1.513977898132874, "grad_norm": 0.4859092615525953, "learning_rate": 2.751824817518248e-05, "loss": 0.4402, "step": 51855 }, { "epoch": 1.5141238814031912, "grad_norm": 0.5269547400091756, "learning_rate": 2.7515544741822112e-05, "loss": 0.4897, "step": 51860 }, { "epoch": 1.5142698646735084, "grad_norm": 0.4768360538721604, "learning_rate": 2.7512841308461746e-05, "loss": 0.4327, "step": 51865 }, { "epoch": 1.5144158479438257, "grad_norm": 0.44770486464271253, "learning_rate": 2.7510137875101384e-05, "loss": 0.4187, "step": 51870 }, { "epoch": 1.514561831214143, "grad_norm": 0.5024066653922444, "learning_rate": 2.7507434441741014e-05, "loss": 0.4588, "step": 51875 }, { "epoch": 1.5147078144844601, "grad_norm": 0.47014429580944955, "learning_rate": 2.7504731008380645e-05, "loss": 0.4464, "step": 51880 }, { "epoch": 1.5148537977547774, "grad_norm": 0.47055839497756424, "learning_rate": 2.750202757502028e-05, "loss": 0.4435, "step": 51885 }, { "epoch": 1.5149997810250944, "grad_norm": 0.43720104703289764, "learning_rate": 2.749932414165991e-05, "loss": 0.4032, "step": 51890 }, { "epoch": 1.5151457642954118, "grad_norm": 0.49977565067296975, "learning_rate": 2.7496620708299543e-05, "loss": 0.4362, "step": 51895 }, { "epoch": 1.5152917475657288, "grad_norm": 0.43559538967125194, "learning_rate": 2.7493917274939174e-05, "loss": 0.4283, "step": 51900 }, { "epoch": 1.5154377308360463, "grad_norm": 0.4726625031593412, "learning_rate": 2.7491213841578805e-05, "loss": 0.4604, "step": 51905 }, { "epoch": 1.5155837141063633, "grad_norm": 0.4983065948470317, "learning_rate": 2.748851040821844e-05, "loss": 0.4521, "step": 51910 }, { "epoch": 1.5157296973766807, "grad_norm": 0.47266314691516337, "learning_rate": 2.748580697485807e-05, "loss": 0.4327, "step": 51915 }, { "epoch": 1.5158756806469977, "grad_norm": 0.49916706206927686, "learning_rate": 2.74831035414977e-05, "loss": 0.4246, "step": 51920 }, { "epoch": 1.5160216639173152, "grad_norm": 0.4986568675619121, "learning_rate": 2.7480400108137334e-05, "loss": 0.4526, "step": 51925 }, { "epoch": 1.5161676471876322, "grad_norm": 0.4469151488864709, "learning_rate": 2.747769667477697e-05, "loss": 0.4383, "step": 51930 }, { "epoch": 1.5163136304579496, "grad_norm": 0.4808958232795277, "learning_rate": 2.7474993241416602e-05, "loss": 0.4513, "step": 51935 }, { "epoch": 1.5164596137282667, "grad_norm": 0.4767140311724184, "learning_rate": 2.7472289808056233e-05, "loss": 0.4365, "step": 51940 }, { "epoch": 1.516605596998584, "grad_norm": 0.4748920612498191, "learning_rate": 2.7469586374695867e-05, "loss": 0.4556, "step": 51945 }, { "epoch": 1.5167515802689011, "grad_norm": 0.4934362144050329, "learning_rate": 2.7466882941335497e-05, "loss": 0.4352, "step": 51950 }, { "epoch": 1.5168975635392186, "grad_norm": 0.498698100536019, "learning_rate": 2.746417950797513e-05, "loss": 0.4534, "step": 51955 }, { "epoch": 1.5170435468095356, "grad_norm": 0.5117632866628041, "learning_rate": 2.7461476074614762e-05, "loss": 0.4582, "step": 51960 }, { "epoch": 1.5171895300798528, "grad_norm": 0.4568221575033265, "learning_rate": 2.7458772641254393e-05, "loss": 0.4699, "step": 51965 }, { "epoch": 1.51733551335017, "grad_norm": 0.48977269794415795, "learning_rate": 2.7456069207894027e-05, "loss": 0.4198, "step": 51970 }, { "epoch": 1.5174814966204873, "grad_norm": 0.4831896870973301, "learning_rate": 2.7453365774533657e-05, "loss": 0.436, "step": 51975 }, { "epoch": 1.5176274798908045, "grad_norm": 0.4741628358092791, "learning_rate": 2.7450662341173288e-05, "loss": 0.4378, "step": 51980 }, { "epoch": 1.5177734631611217, "grad_norm": 0.46413726391178634, "learning_rate": 2.7447958907812925e-05, "loss": 0.414, "step": 51985 }, { "epoch": 1.517919446431439, "grad_norm": 0.4896354058573235, "learning_rate": 2.744525547445256e-05, "loss": 0.4427, "step": 51990 }, { "epoch": 1.5180654297017562, "grad_norm": 0.455317587065784, "learning_rate": 2.744255204109219e-05, "loss": 0.409, "step": 51995 }, { "epoch": 1.5182114129720734, "grad_norm": 0.4903758167681535, "learning_rate": 2.743984860773182e-05, "loss": 0.4486, "step": 52000 }, { "epoch": 1.5183573962423906, "grad_norm": 0.47646225102637063, "learning_rate": 2.7437145174371454e-05, "loss": 0.4396, "step": 52005 }, { "epoch": 1.5185033795127079, "grad_norm": 0.4732615317933084, "learning_rate": 2.7434441741011085e-05, "loss": 0.433, "step": 52010 }, { "epoch": 1.518649362783025, "grad_norm": 0.474149270567681, "learning_rate": 2.7431738307650716e-05, "loss": 0.4135, "step": 52015 }, { "epoch": 1.5187953460533423, "grad_norm": 0.529224764534112, "learning_rate": 2.742903487429035e-05, "loss": 0.4616, "step": 52020 }, { "epoch": 1.5189413293236596, "grad_norm": 0.4889664131400715, "learning_rate": 2.742633144092998e-05, "loss": 0.4725, "step": 52025 }, { "epoch": 1.5190873125939768, "grad_norm": 0.4923037245174727, "learning_rate": 2.7423628007569614e-05, "loss": 0.4543, "step": 52030 }, { "epoch": 1.5192332958642938, "grad_norm": 0.502112061300415, "learning_rate": 2.7420924574209245e-05, "loss": 0.4488, "step": 52035 }, { "epoch": 1.5193792791346112, "grad_norm": 0.46352340304881007, "learning_rate": 2.7418221140848882e-05, "loss": 0.4377, "step": 52040 }, { "epoch": 1.5195252624049282, "grad_norm": 0.49547198071771675, "learning_rate": 2.7415517707488513e-05, "loss": 0.4465, "step": 52045 }, { "epoch": 1.5196712456752457, "grad_norm": 0.45788648388146636, "learning_rate": 2.7412814274128147e-05, "loss": 0.4445, "step": 52050 }, { "epoch": 1.5198172289455627, "grad_norm": 0.49543939911596124, "learning_rate": 2.7410110840767778e-05, "loss": 0.466, "step": 52055 }, { "epoch": 1.5199632122158802, "grad_norm": 0.4546882997646067, "learning_rate": 2.7407407407407408e-05, "loss": 0.4468, "step": 52060 }, { "epoch": 1.5201091954861972, "grad_norm": 0.4751226675770214, "learning_rate": 2.7404703974047042e-05, "loss": 0.4346, "step": 52065 }, { "epoch": 1.5202551787565146, "grad_norm": 0.45940281022702134, "learning_rate": 2.7402000540686673e-05, "loss": 0.4367, "step": 52070 }, { "epoch": 1.5204011620268316, "grad_norm": 0.4751005350795448, "learning_rate": 2.7399297107326304e-05, "loss": 0.4434, "step": 52075 }, { "epoch": 1.520547145297149, "grad_norm": 0.4855820242897092, "learning_rate": 2.7396593673965938e-05, "loss": 0.4552, "step": 52080 }, { "epoch": 1.520693128567466, "grad_norm": 0.4682361640097379, "learning_rate": 2.7393890240605568e-05, "loss": 0.4546, "step": 52085 }, { "epoch": 1.5208391118377835, "grad_norm": 0.522344801055634, "learning_rate": 2.73911868072452e-05, "loss": 0.4437, "step": 52090 }, { "epoch": 1.5209850951081005, "grad_norm": 0.4651896807123587, "learning_rate": 2.7388483373884833e-05, "loss": 0.3959, "step": 52095 }, { "epoch": 1.521131078378418, "grad_norm": 0.4467095342735744, "learning_rate": 2.738577994052447e-05, "loss": 0.4312, "step": 52100 }, { "epoch": 1.521277061648735, "grad_norm": 0.4648068671560389, "learning_rate": 2.73830765071641e-05, "loss": 0.4121, "step": 52105 }, { "epoch": 1.5214230449190522, "grad_norm": 0.4793151205725525, "learning_rate": 2.7380373073803735e-05, "loss": 0.4629, "step": 52110 }, { "epoch": 1.5215690281893695, "grad_norm": 0.5122269320083426, "learning_rate": 2.7377669640443365e-05, "loss": 0.4446, "step": 52115 }, { "epoch": 1.5217150114596867, "grad_norm": 0.4757945574112688, "learning_rate": 2.7374966207082996e-05, "loss": 0.4546, "step": 52120 }, { "epoch": 1.521860994730004, "grad_norm": 0.4914391743915552, "learning_rate": 2.737226277372263e-05, "loss": 0.4544, "step": 52125 }, { "epoch": 1.5220069780003211, "grad_norm": 0.42254970580913986, "learning_rate": 2.736955934036226e-05, "loss": 0.4174, "step": 52130 }, { "epoch": 1.5221529612706384, "grad_norm": 0.4693574214730921, "learning_rate": 2.736685590700189e-05, "loss": 0.4142, "step": 52135 }, { "epoch": 1.5222989445409556, "grad_norm": 0.47373540461976127, "learning_rate": 2.7364152473641525e-05, "loss": 0.427, "step": 52140 }, { "epoch": 1.5224449278112728, "grad_norm": 0.44056932441237723, "learning_rate": 2.7361449040281156e-05, "loss": 0.3992, "step": 52145 }, { "epoch": 1.52259091108159, "grad_norm": 0.47147252472426193, "learning_rate": 2.7358745606920787e-05, "loss": 0.4379, "step": 52150 }, { "epoch": 1.5227368943519073, "grad_norm": 0.4810051222796399, "learning_rate": 2.7356042173560424e-05, "loss": 0.4328, "step": 52155 }, { "epoch": 1.5228828776222245, "grad_norm": 0.49931899047512857, "learning_rate": 2.7353338740200058e-05, "loss": 0.4358, "step": 52160 }, { "epoch": 1.5230288608925417, "grad_norm": 0.49297789823096083, "learning_rate": 2.735063530683969e-05, "loss": 0.4582, "step": 52165 }, { "epoch": 1.523174844162859, "grad_norm": 0.47371421317858153, "learning_rate": 2.7347931873479323e-05, "loss": 0.446, "step": 52170 }, { "epoch": 1.5233208274331762, "grad_norm": 0.46265074970212977, "learning_rate": 2.7345228440118953e-05, "loss": 0.443, "step": 52175 }, { "epoch": 1.5234668107034934, "grad_norm": 0.4716438896341251, "learning_rate": 2.7342525006758584e-05, "loss": 0.4611, "step": 52180 }, { "epoch": 1.5236127939738107, "grad_norm": 0.4774396869980689, "learning_rate": 2.7339821573398218e-05, "loss": 0.4417, "step": 52185 }, { "epoch": 1.5237587772441277, "grad_norm": 0.46095538235774336, "learning_rate": 2.733711814003785e-05, "loss": 0.4527, "step": 52190 }, { "epoch": 1.5239047605144451, "grad_norm": 0.4722563150357844, "learning_rate": 2.733441470667748e-05, "loss": 0.4585, "step": 52195 }, { "epoch": 1.5240507437847621, "grad_norm": 0.4617436838833524, "learning_rate": 2.7331711273317113e-05, "loss": 0.4546, "step": 52200 }, { "epoch": 1.5241967270550796, "grad_norm": 0.46880148192166193, "learning_rate": 2.7329007839956744e-05, "loss": 0.4288, "step": 52205 }, { "epoch": 1.5243427103253966, "grad_norm": 0.4969358350874494, "learning_rate": 2.732630440659638e-05, "loss": 0.427, "step": 52210 }, { "epoch": 1.524488693595714, "grad_norm": 0.4954483726370579, "learning_rate": 2.7323600973236012e-05, "loss": 0.4377, "step": 52215 }, { "epoch": 1.524634676866031, "grad_norm": 0.47714945185504143, "learning_rate": 2.7320897539875646e-05, "loss": 0.4568, "step": 52220 }, { "epoch": 1.5247806601363485, "grad_norm": 0.47121685419386866, "learning_rate": 2.7318194106515276e-05, "loss": 0.4406, "step": 52225 }, { "epoch": 1.5249266434066655, "grad_norm": 0.4807567071767096, "learning_rate": 2.7315490673154907e-05, "loss": 0.4376, "step": 52230 }, { "epoch": 1.525072626676983, "grad_norm": 0.45760072153038217, "learning_rate": 2.731278723979454e-05, "loss": 0.4411, "step": 52235 }, { "epoch": 1.5252186099473, "grad_norm": 0.4786463507444127, "learning_rate": 2.731008380643417e-05, "loss": 0.4514, "step": 52240 }, { "epoch": 1.5253645932176174, "grad_norm": 0.4862809459581493, "learning_rate": 2.7307380373073806e-05, "loss": 0.4286, "step": 52245 }, { "epoch": 1.5255105764879344, "grad_norm": 0.5000017954140689, "learning_rate": 2.7304676939713436e-05, "loss": 0.4336, "step": 52250 }, { "epoch": 1.5256565597582517, "grad_norm": 0.4606383249591316, "learning_rate": 2.7301973506353067e-05, "loss": 0.4553, "step": 52255 }, { "epoch": 1.5258025430285689, "grad_norm": 0.46417493658011494, "learning_rate": 2.72992700729927e-05, "loss": 0.4365, "step": 52260 }, { "epoch": 1.525948526298886, "grad_norm": 0.4775991285552999, "learning_rate": 2.729656663963233e-05, "loss": 0.4536, "step": 52265 }, { "epoch": 1.5260945095692033, "grad_norm": 0.4898190889441563, "learning_rate": 2.729386320627197e-05, "loss": 0.4663, "step": 52270 }, { "epoch": 1.5262404928395206, "grad_norm": 0.4824448743077239, "learning_rate": 2.72911597729116e-05, "loss": 0.4619, "step": 52275 }, { "epoch": 1.5263864761098378, "grad_norm": 0.4747943594443737, "learning_rate": 2.7288456339551234e-05, "loss": 0.4326, "step": 52280 }, { "epoch": 1.526532459380155, "grad_norm": 0.4713515867462905, "learning_rate": 2.7285752906190864e-05, "loss": 0.4472, "step": 52285 }, { "epoch": 1.5266784426504723, "grad_norm": 0.4852357010147105, "learning_rate": 2.7283049472830495e-05, "loss": 0.4752, "step": 52290 }, { "epoch": 1.5268244259207895, "grad_norm": 0.4822437041517449, "learning_rate": 2.728034603947013e-05, "loss": 0.4513, "step": 52295 }, { "epoch": 1.5269704091911067, "grad_norm": 0.5033449925245753, "learning_rate": 2.727764260610976e-05, "loss": 0.4627, "step": 52300 }, { "epoch": 1.527116392461424, "grad_norm": 0.4818948972678652, "learning_rate": 2.7274939172749394e-05, "loss": 0.4397, "step": 52305 }, { "epoch": 1.5272623757317412, "grad_norm": 0.47511737476616944, "learning_rate": 2.7272235739389024e-05, "loss": 0.4575, "step": 52310 }, { "epoch": 1.5274083590020584, "grad_norm": 0.4750052102328688, "learning_rate": 2.7269532306028655e-05, "loss": 0.4328, "step": 52315 }, { "epoch": 1.5275543422723756, "grad_norm": 0.5262105665029634, "learning_rate": 2.726682887266829e-05, "loss": 0.4719, "step": 52320 }, { "epoch": 1.5277003255426929, "grad_norm": 0.5135497356101019, "learning_rate": 2.7264125439307926e-05, "loss": 0.4627, "step": 52325 }, { "epoch": 1.52784630881301, "grad_norm": 0.5109062589357175, "learning_rate": 2.7261422005947557e-05, "loss": 0.4925, "step": 52330 }, { "epoch": 1.527992292083327, "grad_norm": 0.43358553600596506, "learning_rate": 2.7258718572587187e-05, "loss": 0.4467, "step": 52335 }, { "epoch": 1.5281382753536445, "grad_norm": 0.4913169471748213, "learning_rate": 2.725601513922682e-05, "loss": 0.449, "step": 52340 }, { "epoch": 1.5282842586239616, "grad_norm": 0.48043720764296866, "learning_rate": 2.7253311705866452e-05, "loss": 0.4663, "step": 52345 }, { "epoch": 1.528430241894279, "grad_norm": 0.4782244176910635, "learning_rate": 2.7250608272506083e-05, "loss": 0.4317, "step": 52350 }, { "epoch": 1.528576225164596, "grad_norm": 0.4602817818856186, "learning_rate": 2.7247904839145717e-05, "loss": 0.4466, "step": 52355 }, { "epoch": 1.5287222084349135, "grad_norm": 0.48733797328843603, "learning_rate": 2.7245201405785347e-05, "loss": 0.443, "step": 52360 }, { "epoch": 1.5288681917052305, "grad_norm": 0.4666819816632574, "learning_rate": 2.7242497972424978e-05, "loss": 0.4034, "step": 52365 }, { "epoch": 1.529014174975548, "grad_norm": 0.4634876417375128, "learning_rate": 2.7239794539064612e-05, "loss": 0.4432, "step": 52370 }, { "epoch": 1.529160158245865, "grad_norm": 0.4752971905661459, "learning_rate": 2.7237091105704243e-05, "loss": 0.4348, "step": 52375 }, { "epoch": 1.5293061415161824, "grad_norm": 0.4605155206172106, "learning_rate": 2.723438767234388e-05, "loss": 0.4752, "step": 52380 }, { "epoch": 1.5294521247864994, "grad_norm": 0.43096229633375877, "learning_rate": 2.7231684238983514e-05, "loss": 0.4366, "step": 52385 }, { "epoch": 1.5295981080568168, "grad_norm": 0.4624251311605074, "learning_rate": 2.7228980805623145e-05, "loss": 0.4227, "step": 52390 }, { "epoch": 1.5297440913271338, "grad_norm": 0.45461161961183577, "learning_rate": 2.7226277372262775e-05, "loss": 0.4299, "step": 52395 }, { "epoch": 1.529890074597451, "grad_norm": 0.46209333974190386, "learning_rate": 2.722357393890241e-05, "loss": 0.434, "step": 52400 }, { "epoch": 1.5300360578677683, "grad_norm": 0.44995206093380524, "learning_rate": 2.722087050554204e-05, "loss": 0.4129, "step": 52405 }, { "epoch": 1.5301820411380855, "grad_norm": 0.46885436194903435, "learning_rate": 2.721816707218167e-05, "loss": 0.45, "step": 52410 }, { "epoch": 1.5303280244084028, "grad_norm": 0.5125158232736626, "learning_rate": 2.7215463638821304e-05, "loss": 0.4553, "step": 52415 }, { "epoch": 1.53047400767872, "grad_norm": 0.4900751055199962, "learning_rate": 2.7212760205460935e-05, "loss": 0.4608, "step": 52420 }, { "epoch": 1.5306199909490372, "grad_norm": 0.515059629216995, "learning_rate": 2.7210056772100566e-05, "loss": 0.4206, "step": 52425 }, { "epoch": 1.5307659742193545, "grad_norm": 0.4223158943685712, "learning_rate": 2.72073533387402e-05, "loss": 0.4262, "step": 52430 }, { "epoch": 1.5309119574896717, "grad_norm": 0.4960661893003798, "learning_rate": 2.720464990537983e-05, "loss": 0.455, "step": 52435 }, { "epoch": 1.531057940759989, "grad_norm": 0.4729049798150573, "learning_rate": 2.7201946472019468e-05, "loss": 0.4211, "step": 52440 }, { "epoch": 1.5312039240303061, "grad_norm": 0.4589626735324094, "learning_rate": 2.7199243038659102e-05, "loss": 0.4504, "step": 52445 }, { "epoch": 1.5313499073006234, "grad_norm": 0.45936738302156005, "learning_rate": 2.7196539605298732e-05, "loss": 0.4475, "step": 52450 }, { "epoch": 1.5314958905709406, "grad_norm": 0.47936554853885205, "learning_rate": 2.7193836171938363e-05, "loss": 0.4516, "step": 52455 }, { "epoch": 1.5316418738412578, "grad_norm": 0.46765310713524905, "learning_rate": 2.7191132738577997e-05, "loss": 0.4402, "step": 52460 }, { "epoch": 1.531787857111575, "grad_norm": 0.5026801097129977, "learning_rate": 2.7188429305217628e-05, "loss": 0.4589, "step": 52465 }, { "epoch": 1.5319338403818923, "grad_norm": 0.4481377309884903, "learning_rate": 2.7185725871857258e-05, "loss": 0.4396, "step": 52470 }, { "epoch": 1.5320798236522095, "grad_norm": 0.49930508344645536, "learning_rate": 2.7183022438496892e-05, "loss": 0.4386, "step": 52475 }, { "epoch": 1.5322258069225265, "grad_norm": 0.47528686109137064, "learning_rate": 2.7180319005136523e-05, "loss": 0.4199, "step": 52480 }, { "epoch": 1.532371790192844, "grad_norm": 0.4861742358955004, "learning_rate": 2.7177615571776154e-05, "loss": 0.4502, "step": 52485 }, { "epoch": 1.532517773463161, "grad_norm": 0.47081093018298803, "learning_rate": 2.7174912138415788e-05, "loss": 0.4436, "step": 52490 }, { "epoch": 1.5326637567334784, "grad_norm": 0.4796817519732678, "learning_rate": 2.7172208705055425e-05, "loss": 0.4444, "step": 52495 }, { "epoch": 1.5328097400037954, "grad_norm": 0.5255248494618675, "learning_rate": 2.7169505271695056e-05, "loss": 0.4469, "step": 52500 }, { "epoch": 1.532955723274113, "grad_norm": 0.4435344860507208, "learning_rate": 2.7166801838334686e-05, "loss": 0.4396, "step": 52505 }, { "epoch": 1.53310170654443, "grad_norm": 0.4917136025025609, "learning_rate": 2.716409840497432e-05, "loss": 0.4678, "step": 52510 }, { "epoch": 1.5332476898147473, "grad_norm": 0.48708858269235633, "learning_rate": 2.716139497161395e-05, "loss": 0.4529, "step": 52515 }, { "epoch": 1.5333936730850644, "grad_norm": 0.4773286654824679, "learning_rate": 2.7158691538253585e-05, "loss": 0.4621, "step": 52520 }, { "epoch": 1.5335396563553818, "grad_norm": 0.4437966166582442, "learning_rate": 2.7155988104893215e-05, "loss": 0.4187, "step": 52525 }, { "epoch": 1.5336856396256988, "grad_norm": 0.4618415930231338, "learning_rate": 2.7153284671532846e-05, "loss": 0.445, "step": 52530 }, { "epoch": 1.5338316228960163, "grad_norm": 0.5066505505765981, "learning_rate": 2.715058123817248e-05, "loss": 0.4514, "step": 52535 }, { "epoch": 1.5339776061663333, "grad_norm": 0.4986783454484819, "learning_rate": 2.714787780481211e-05, "loss": 0.4517, "step": 52540 }, { "epoch": 1.5341235894366507, "grad_norm": 0.47180732591347135, "learning_rate": 2.714517437145174e-05, "loss": 0.4488, "step": 52545 }, { "epoch": 1.5342695727069677, "grad_norm": 0.4798734557752931, "learning_rate": 2.714247093809138e-05, "loss": 0.43, "step": 52550 }, { "epoch": 1.534415555977285, "grad_norm": 0.505410493146048, "learning_rate": 2.7139767504731013e-05, "loss": 0.4594, "step": 52555 }, { "epoch": 1.5345615392476022, "grad_norm": 0.46019839939705715, "learning_rate": 2.7137064071370643e-05, "loss": 0.4604, "step": 52560 }, { "epoch": 1.5347075225179194, "grad_norm": 0.43461059698842885, "learning_rate": 2.7134360638010274e-05, "loss": 0.438, "step": 52565 }, { "epoch": 1.5348535057882366, "grad_norm": 0.4678051809395435, "learning_rate": 2.7131657204649908e-05, "loss": 0.4489, "step": 52570 }, { "epoch": 1.5349994890585539, "grad_norm": 0.518423662808464, "learning_rate": 2.712895377128954e-05, "loss": 0.4592, "step": 52575 }, { "epoch": 1.535145472328871, "grad_norm": 0.4784067692431958, "learning_rate": 2.7126250337929173e-05, "loss": 0.4622, "step": 52580 }, { "epoch": 1.5352914555991883, "grad_norm": 0.47843863128820774, "learning_rate": 2.7123546904568803e-05, "loss": 0.4403, "step": 52585 }, { "epoch": 1.5354374388695056, "grad_norm": 0.45930983791803254, "learning_rate": 2.7120843471208434e-05, "loss": 0.4406, "step": 52590 }, { "epoch": 1.5355834221398228, "grad_norm": 0.46745477380113776, "learning_rate": 2.7118140037848068e-05, "loss": 0.4194, "step": 52595 }, { "epoch": 1.53572940541014, "grad_norm": 0.46163197947024087, "learning_rate": 2.71154366044877e-05, "loss": 0.4617, "step": 52600 }, { "epoch": 1.5358753886804573, "grad_norm": 0.46084244178482175, "learning_rate": 2.711273317112733e-05, "loss": 0.4365, "step": 52605 }, { "epoch": 1.5360213719507745, "grad_norm": 0.5068470479180595, "learning_rate": 2.7110029737766967e-05, "loss": 0.4678, "step": 52610 }, { "epoch": 1.5361673552210917, "grad_norm": 0.4994726625830031, "learning_rate": 2.71073263044066e-05, "loss": 0.4781, "step": 52615 }, { "epoch": 1.536313338491409, "grad_norm": 0.41282999929812136, "learning_rate": 2.710462287104623e-05, "loss": 0.3974, "step": 52620 }, { "epoch": 1.536459321761726, "grad_norm": 0.4457458460768944, "learning_rate": 2.7101919437685862e-05, "loss": 0.4037, "step": 52625 }, { "epoch": 1.5366053050320434, "grad_norm": 0.48797952187816934, "learning_rate": 2.7099216004325496e-05, "loss": 0.4095, "step": 52630 }, { "epoch": 1.5367512883023604, "grad_norm": 0.45389341713125175, "learning_rate": 2.7096512570965126e-05, "loss": 0.4329, "step": 52635 }, { "epoch": 1.5368972715726779, "grad_norm": 0.4966901139815532, "learning_rate": 2.7093809137604757e-05, "loss": 0.4524, "step": 52640 }, { "epoch": 1.5370432548429949, "grad_norm": 0.4512378432468506, "learning_rate": 2.709110570424439e-05, "loss": 0.4659, "step": 52645 }, { "epoch": 1.5371892381133123, "grad_norm": 0.4808772000607366, "learning_rate": 2.7088402270884022e-05, "loss": 0.4201, "step": 52650 }, { "epoch": 1.5373352213836293, "grad_norm": 0.5057752461507427, "learning_rate": 2.7085698837523656e-05, "loss": 0.4353, "step": 52655 }, { "epoch": 1.5374812046539468, "grad_norm": 0.46449729294282777, "learning_rate": 2.7082995404163286e-05, "loss": 0.4372, "step": 52660 }, { "epoch": 1.5376271879242638, "grad_norm": 0.48244875658160574, "learning_rate": 2.7080291970802924e-05, "loss": 0.4375, "step": 52665 }, { "epoch": 1.5377731711945812, "grad_norm": 0.49226143781905657, "learning_rate": 2.7077588537442554e-05, "loss": 0.4637, "step": 52670 }, { "epoch": 1.5379191544648982, "grad_norm": 0.5142092165993065, "learning_rate": 2.707488510408219e-05, "loss": 0.4541, "step": 52675 }, { "epoch": 1.5380651377352157, "grad_norm": 0.4459193627382799, "learning_rate": 2.707218167072182e-05, "loss": 0.4231, "step": 52680 }, { "epoch": 1.5382111210055327, "grad_norm": 0.44594933546630694, "learning_rate": 2.706947823736145e-05, "loss": 0.4178, "step": 52685 }, { "epoch": 1.5383571042758502, "grad_norm": 0.4297269709264221, "learning_rate": 2.7066774804001084e-05, "loss": 0.4393, "step": 52690 }, { "epoch": 1.5385030875461672, "grad_norm": 0.4343436975635642, "learning_rate": 2.7064071370640714e-05, "loss": 0.4308, "step": 52695 }, { "epoch": 1.5386490708164844, "grad_norm": 0.5208485452633219, "learning_rate": 2.7061367937280345e-05, "loss": 0.455, "step": 52700 }, { "epoch": 1.5387950540868016, "grad_norm": 0.4786061958469368, "learning_rate": 2.705866450391998e-05, "loss": 0.4377, "step": 52705 }, { "epoch": 1.5389410373571188, "grad_norm": 0.46526022747161816, "learning_rate": 2.705596107055961e-05, "loss": 0.4356, "step": 52710 }, { "epoch": 1.539087020627436, "grad_norm": 0.4930586972757469, "learning_rate": 2.7053257637199244e-05, "loss": 0.4502, "step": 52715 }, { "epoch": 1.5392330038977533, "grad_norm": 0.47952707984288606, "learning_rate": 2.705055420383888e-05, "loss": 0.4505, "step": 52720 }, { "epoch": 1.5393789871680705, "grad_norm": 0.4880150389577235, "learning_rate": 2.704785077047851e-05, "loss": 0.4598, "step": 52725 }, { "epoch": 1.5395249704383878, "grad_norm": 0.4300285122096621, "learning_rate": 2.7045147337118142e-05, "loss": 0.4317, "step": 52730 }, { "epoch": 1.539670953708705, "grad_norm": 0.5082068647105683, "learning_rate": 2.7042443903757776e-05, "loss": 0.4532, "step": 52735 }, { "epoch": 1.5398169369790222, "grad_norm": 0.485997795012132, "learning_rate": 2.7039740470397407e-05, "loss": 0.4353, "step": 52740 }, { "epoch": 1.5399629202493395, "grad_norm": 0.46070779941574014, "learning_rate": 2.7037037037037037e-05, "loss": 0.451, "step": 52745 }, { "epoch": 1.5401089035196567, "grad_norm": 0.4617896449783966, "learning_rate": 2.703433360367667e-05, "loss": 0.4491, "step": 52750 }, { "epoch": 1.540254886789974, "grad_norm": 0.4462439911599837, "learning_rate": 2.7031630170316302e-05, "loss": 0.4175, "step": 52755 }, { "epoch": 1.5404008700602911, "grad_norm": 0.5109462642974136, "learning_rate": 2.7028926736955933e-05, "loss": 0.472, "step": 52760 }, { "epoch": 1.5405468533306084, "grad_norm": 0.5350047572946538, "learning_rate": 2.7026223303595567e-05, "loss": 0.4544, "step": 52765 }, { "epoch": 1.5406928366009254, "grad_norm": 0.46748581066604034, "learning_rate": 2.7023519870235197e-05, "loss": 0.4443, "step": 52770 }, { "epoch": 1.5408388198712428, "grad_norm": 0.4460752819221477, "learning_rate": 2.7020816436874835e-05, "loss": 0.4151, "step": 52775 }, { "epoch": 1.5409848031415598, "grad_norm": 0.48162657763399147, "learning_rate": 2.7018113003514465e-05, "loss": 0.4375, "step": 52780 }, { "epoch": 1.5411307864118773, "grad_norm": 0.50132631615405, "learning_rate": 2.70154095701541e-05, "loss": 0.423, "step": 52785 }, { "epoch": 1.5412767696821943, "grad_norm": 0.47230459532611146, "learning_rate": 2.701270613679373e-05, "loss": 0.4491, "step": 52790 }, { "epoch": 1.5414227529525117, "grad_norm": 0.44109230176723263, "learning_rate": 2.7010002703433364e-05, "loss": 0.441, "step": 52795 }, { "epoch": 1.5415687362228288, "grad_norm": 0.498300589283736, "learning_rate": 2.7007299270072995e-05, "loss": 0.4624, "step": 52800 }, { "epoch": 1.5417147194931462, "grad_norm": 0.49866773780170204, "learning_rate": 2.7004595836712625e-05, "loss": 0.4303, "step": 52805 }, { "epoch": 1.5418607027634632, "grad_norm": 0.46668361636444405, "learning_rate": 2.700189240335226e-05, "loss": 0.4437, "step": 52810 }, { "epoch": 1.5420066860337807, "grad_norm": 0.4630916635027439, "learning_rate": 2.699918896999189e-05, "loss": 0.4384, "step": 52815 }, { "epoch": 1.5421526693040977, "grad_norm": 0.5258346012851544, "learning_rate": 2.699648553663152e-05, "loss": 0.465, "step": 52820 }, { "epoch": 1.5422986525744151, "grad_norm": 0.47567966184473326, "learning_rate": 2.6993782103271155e-05, "loss": 0.4453, "step": 52825 }, { "epoch": 1.5424446358447321, "grad_norm": 0.47728052507636115, "learning_rate": 2.6991078669910785e-05, "loss": 0.4407, "step": 52830 }, { "epoch": 1.5425906191150496, "grad_norm": 0.4551092511246869, "learning_rate": 2.6988375236550423e-05, "loss": 0.42, "step": 52835 }, { "epoch": 1.5427366023853666, "grad_norm": 0.47029978115239146, "learning_rate": 2.6985671803190053e-05, "loss": 0.4632, "step": 52840 }, { "epoch": 1.5428825856556838, "grad_norm": 0.4745262727804975, "learning_rate": 2.6982968369829687e-05, "loss": 0.4585, "step": 52845 }, { "epoch": 1.543028568926001, "grad_norm": 0.5042147972405588, "learning_rate": 2.6980264936469318e-05, "loss": 0.4567, "step": 52850 }, { "epoch": 1.5431745521963183, "grad_norm": 0.4633672148989011, "learning_rate": 2.6977561503108952e-05, "loss": 0.437, "step": 52855 }, { "epoch": 1.5433205354666355, "grad_norm": 0.47486007234964955, "learning_rate": 2.6974858069748582e-05, "loss": 0.438, "step": 52860 }, { "epoch": 1.5434665187369527, "grad_norm": 0.44179178750271747, "learning_rate": 2.6972154636388213e-05, "loss": 0.4289, "step": 52865 }, { "epoch": 1.54361250200727, "grad_norm": 0.4693119153530119, "learning_rate": 2.6969451203027847e-05, "loss": 0.4444, "step": 52870 }, { "epoch": 1.5437584852775872, "grad_norm": 0.4804003612832015, "learning_rate": 2.6966747769667478e-05, "loss": 0.4538, "step": 52875 }, { "epoch": 1.5439044685479044, "grad_norm": 0.5167094035851221, "learning_rate": 2.6964044336307108e-05, "loss": 0.47, "step": 52880 }, { "epoch": 1.5440504518182216, "grad_norm": 0.44734336733045466, "learning_rate": 2.6961340902946742e-05, "loss": 0.4473, "step": 52885 }, { "epoch": 1.5441964350885389, "grad_norm": 0.5023169401943378, "learning_rate": 2.695863746958638e-05, "loss": 0.459, "step": 52890 }, { "epoch": 1.544342418358856, "grad_norm": 0.4684776121344861, "learning_rate": 2.695593403622601e-05, "loss": 0.4712, "step": 52895 }, { "epoch": 1.5444884016291733, "grad_norm": 0.5648872137286173, "learning_rate": 2.695323060286564e-05, "loss": 0.4719, "step": 52900 }, { "epoch": 1.5446343848994906, "grad_norm": 0.4628125486073025, "learning_rate": 2.6950527169505275e-05, "loss": 0.4391, "step": 52905 }, { "epoch": 1.5447803681698078, "grad_norm": 0.45987595948731913, "learning_rate": 2.6947823736144906e-05, "loss": 0.4303, "step": 52910 }, { "epoch": 1.5449263514401248, "grad_norm": 0.4722643347684202, "learning_rate": 2.6945120302784536e-05, "loss": 0.431, "step": 52915 }, { "epoch": 1.5450723347104423, "grad_norm": 0.4470329418298518, "learning_rate": 2.694241686942417e-05, "loss": 0.4142, "step": 52920 }, { "epoch": 1.5452183179807593, "grad_norm": 0.4764008351833104, "learning_rate": 2.69397134360638e-05, "loss": 0.4456, "step": 52925 }, { "epoch": 1.5453643012510767, "grad_norm": 0.4899308356193716, "learning_rate": 2.6937010002703435e-05, "loss": 0.4457, "step": 52930 }, { "epoch": 1.5455102845213937, "grad_norm": 0.4520601110831969, "learning_rate": 2.6934306569343065e-05, "loss": 0.4648, "step": 52935 }, { "epoch": 1.5456562677917112, "grad_norm": 0.47314495568867465, "learning_rate": 2.6931603135982696e-05, "loss": 0.429, "step": 52940 }, { "epoch": 1.5458022510620282, "grad_norm": 0.4877245432683082, "learning_rate": 2.6928899702622334e-05, "loss": 0.4384, "step": 52945 }, { "epoch": 1.5459482343323456, "grad_norm": 0.5223745556635604, "learning_rate": 2.6926196269261968e-05, "loss": 0.4521, "step": 52950 }, { "epoch": 1.5460942176026626, "grad_norm": 0.5106144491276818, "learning_rate": 2.6923492835901598e-05, "loss": 0.4406, "step": 52955 }, { "epoch": 1.54624020087298, "grad_norm": 0.46978727394322933, "learning_rate": 2.692078940254123e-05, "loss": 0.4454, "step": 52960 }, { "epoch": 1.546386184143297, "grad_norm": 0.4864090676567043, "learning_rate": 2.6918085969180863e-05, "loss": 0.4304, "step": 52965 }, { "epoch": 1.5465321674136145, "grad_norm": 0.4673363220880871, "learning_rate": 2.6915382535820493e-05, "loss": 0.4504, "step": 52970 }, { "epoch": 1.5466781506839316, "grad_norm": 0.4975749522465468, "learning_rate": 2.6912679102460124e-05, "loss": 0.4439, "step": 52975 }, { "epoch": 1.546824133954249, "grad_norm": 0.4693007637169726, "learning_rate": 2.6909975669099758e-05, "loss": 0.4634, "step": 52980 }, { "epoch": 1.546970117224566, "grad_norm": 0.4894072865207688, "learning_rate": 2.690727223573939e-05, "loss": 0.4412, "step": 52985 }, { "epoch": 1.5471161004948832, "grad_norm": 0.4739147811066448, "learning_rate": 2.6904568802379023e-05, "loss": 0.4359, "step": 52990 }, { "epoch": 1.5472620837652005, "grad_norm": 0.45189676735833717, "learning_rate": 2.6901865369018653e-05, "loss": 0.4029, "step": 52995 }, { "epoch": 1.5474080670355177, "grad_norm": 0.4564203305138102, "learning_rate": 2.6899161935658284e-05, "loss": 0.4505, "step": 53000 }, { "epoch": 1.547554050305835, "grad_norm": 0.5385684566013771, "learning_rate": 2.689645850229792e-05, "loss": 0.4863, "step": 53005 }, { "epoch": 1.5477000335761522, "grad_norm": 0.4314079026517526, "learning_rate": 2.6893755068937555e-05, "loss": 0.4309, "step": 53010 }, { "epoch": 1.5478460168464694, "grad_norm": 0.438217899213374, "learning_rate": 2.6891051635577186e-05, "loss": 0.4335, "step": 53015 }, { "epoch": 1.5479920001167866, "grad_norm": 0.4822393716686528, "learning_rate": 2.6888348202216817e-05, "loss": 0.4562, "step": 53020 }, { "epoch": 1.5481379833871038, "grad_norm": 0.45259878034115053, "learning_rate": 2.688564476885645e-05, "loss": 0.4337, "step": 53025 }, { "epoch": 1.548283966657421, "grad_norm": 0.4878915836112576, "learning_rate": 2.688294133549608e-05, "loss": 0.4517, "step": 53030 }, { "epoch": 1.5484299499277383, "grad_norm": 0.42300605957665594, "learning_rate": 2.6880237902135712e-05, "loss": 0.4334, "step": 53035 }, { "epoch": 1.5485759331980555, "grad_norm": 0.5062280922028907, "learning_rate": 2.6877534468775346e-05, "loss": 0.4412, "step": 53040 }, { "epoch": 1.5487219164683728, "grad_norm": 0.4772619763504435, "learning_rate": 2.6874831035414976e-05, "loss": 0.4461, "step": 53045 }, { "epoch": 1.54886789973869, "grad_norm": 0.5002313621958772, "learning_rate": 2.6872127602054607e-05, "loss": 0.4271, "step": 53050 }, { "epoch": 1.5490138830090072, "grad_norm": 0.480790711192577, "learning_rate": 2.686942416869424e-05, "loss": 0.4385, "step": 53055 }, { "epoch": 1.5491598662793242, "grad_norm": 0.48488246487371045, "learning_rate": 2.686672073533388e-05, "loss": 0.4707, "step": 53060 }, { "epoch": 1.5493058495496417, "grad_norm": 0.47632148350672376, "learning_rate": 2.686401730197351e-05, "loss": 0.4544, "step": 53065 }, { "epoch": 1.5494518328199587, "grad_norm": 0.4581590000334291, "learning_rate": 2.6861313868613143e-05, "loss": 0.4885, "step": 53070 }, { "epoch": 1.5495978160902761, "grad_norm": 0.4963089356908818, "learning_rate": 2.6858610435252774e-05, "loss": 0.4481, "step": 53075 }, { "epoch": 1.5497437993605931, "grad_norm": 0.4847361926055064, "learning_rate": 2.6855907001892404e-05, "loss": 0.4305, "step": 53080 }, { "epoch": 1.5498897826309106, "grad_norm": 0.4724552566258036, "learning_rate": 2.685320356853204e-05, "loss": 0.4273, "step": 53085 }, { "epoch": 1.5500357659012276, "grad_norm": 0.5238607576870443, "learning_rate": 2.685050013517167e-05, "loss": 0.4577, "step": 53090 }, { "epoch": 1.550181749171545, "grad_norm": 0.5034634459222528, "learning_rate": 2.68477967018113e-05, "loss": 0.4545, "step": 53095 }, { "epoch": 1.550327732441862, "grad_norm": 0.4399675570764658, "learning_rate": 2.6845093268450934e-05, "loss": 0.4239, "step": 53100 }, { "epoch": 1.5504737157121795, "grad_norm": 0.4595852449059197, "learning_rate": 2.6842389835090564e-05, "loss": 0.4328, "step": 53105 }, { "epoch": 1.5506196989824965, "grad_norm": 0.4953884052599719, "learning_rate": 2.6839686401730195e-05, "loss": 0.479, "step": 53110 }, { "epoch": 1.550765682252814, "grad_norm": 0.5166190946281127, "learning_rate": 2.6836982968369832e-05, "loss": 0.4511, "step": 53115 }, { "epoch": 1.550911665523131, "grad_norm": 0.4961889115433943, "learning_rate": 2.6834279535009466e-05, "loss": 0.4314, "step": 53120 }, { "epoch": 1.5510576487934484, "grad_norm": 0.4858996844237249, "learning_rate": 2.6831576101649097e-05, "loss": 0.4533, "step": 53125 }, { "epoch": 1.5512036320637654, "grad_norm": 0.49421005778898947, "learning_rate": 2.682887266828873e-05, "loss": 0.4692, "step": 53130 }, { "epoch": 1.5513496153340827, "grad_norm": 0.5054966141086995, "learning_rate": 2.682616923492836e-05, "loss": 0.4426, "step": 53135 }, { "epoch": 1.5514955986044, "grad_norm": 0.4596101734796978, "learning_rate": 2.6823465801567992e-05, "loss": 0.393, "step": 53140 }, { "epoch": 1.5516415818747171, "grad_norm": 0.4237185331225668, "learning_rate": 2.6820762368207626e-05, "loss": 0.4522, "step": 53145 }, { "epoch": 1.5517875651450344, "grad_norm": 0.4896566025965358, "learning_rate": 2.6818058934847257e-05, "loss": 0.4542, "step": 53150 }, { "epoch": 1.5519335484153516, "grad_norm": 0.44673186814611865, "learning_rate": 2.6815355501486887e-05, "loss": 0.4197, "step": 53155 }, { "epoch": 1.5520795316856688, "grad_norm": 0.4866327451918987, "learning_rate": 2.681265206812652e-05, "loss": 0.4495, "step": 53160 }, { "epoch": 1.552225514955986, "grad_norm": 0.48025740640272274, "learning_rate": 2.6809948634766152e-05, "loss": 0.4277, "step": 53165 }, { "epoch": 1.5523714982263033, "grad_norm": 0.48739163124887647, "learning_rate": 2.6807245201405783e-05, "loss": 0.4667, "step": 53170 }, { "epoch": 1.5525174814966205, "grad_norm": 0.47235916674555956, "learning_rate": 2.680454176804542e-05, "loss": 0.4348, "step": 53175 }, { "epoch": 1.5526634647669377, "grad_norm": 0.47468793515617497, "learning_rate": 2.6801838334685054e-05, "loss": 0.4515, "step": 53180 }, { "epoch": 1.552809448037255, "grad_norm": 0.4552714894556816, "learning_rate": 2.6799134901324685e-05, "loss": 0.462, "step": 53185 }, { "epoch": 1.5529554313075722, "grad_norm": 0.4503252542700662, "learning_rate": 2.6796431467964315e-05, "loss": 0.4498, "step": 53190 }, { "epoch": 1.5531014145778894, "grad_norm": 0.4922518251902833, "learning_rate": 2.679372803460395e-05, "loss": 0.4602, "step": 53195 }, { "epoch": 1.5532473978482066, "grad_norm": 0.4564279066038499, "learning_rate": 2.679102460124358e-05, "loss": 0.4412, "step": 53200 }, { "epoch": 1.5533933811185237, "grad_norm": 0.501838048638997, "learning_rate": 2.6788321167883214e-05, "loss": 0.4524, "step": 53205 }, { "epoch": 1.553539364388841, "grad_norm": 0.44322733574198814, "learning_rate": 2.6785617734522845e-05, "loss": 0.4313, "step": 53210 }, { "epoch": 1.5536853476591581, "grad_norm": 0.4799716288587415, "learning_rate": 2.6782914301162475e-05, "loss": 0.454, "step": 53215 }, { "epoch": 1.5538313309294756, "grad_norm": 0.4520941950409102, "learning_rate": 2.678021086780211e-05, "loss": 0.4286, "step": 53220 }, { "epoch": 1.5539773141997926, "grad_norm": 0.48840698398476756, "learning_rate": 2.677750743444174e-05, "loss": 0.4712, "step": 53225 }, { "epoch": 1.55412329747011, "grad_norm": 0.4743327467762573, "learning_rate": 2.6774804001081377e-05, "loss": 0.4448, "step": 53230 }, { "epoch": 1.554269280740427, "grad_norm": 0.5055513528858943, "learning_rate": 2.6772100567721008e-05, "loss": 0.4603, "step": 53235 }, { "epoch": 1.5544152640107445, "grad_norm": 0.4528541059273707, "learning_rate": 2.6769397134360642e-05, "loss": 0.427, "step": 53240 }, { "epoch": 1.5545612472810615, "grad_norm": 0.49565304793354736, "learning_rate": 2.6766693701000273e-05, "loss": 0.4455, "step": 53245 }, { "epoch": 1.554707230551379, "grad_norm": 0.4949318420311779, "learning_rate": 2.6763990267639903e-05, "loss": 0.4373, "step": 53250 }, { "epoch": 1.554853213821696, "grad_norm": 0.4801934173459624, "learning_rate": 2.6761286834279537e-05, "loss": 0.4704, "step": 53255 }, { "epoch": 1.5549991970920134, "grad_norm": 0.47792436513679204, "learning_rate": 2.6758583400919168e-05, "loss": 0.4431, "step": 53260 }, { "epoch": 1.5551451803623304, "grad_norm": 0.49590285602687806, "learning_rate": 2.67558799675588e-05, "loss": 0.4711, "step": 53265 }, { "epoch": 1.5552911636326479, "grad_norm": 0.4479681369415372, "learning_rate": 2.6753176534198432e-05, "loss": 0.4213, "step": 53270 }, { "epoch": 1.5554371469029649, "grad_norm": 0.48129553200398106, "learning_rate": 2.6750473100838063e-05, "loss": 0.4578, "step": 53275 }, { "epoch": 1.555583130173282, "grad_norm": 0.4492956545890151, "learning_rate": 2.6747769667477697e-05, "loss": 0.4384, "step": 53280 }, { "epoch": 1.5557291134435993, "grad_norm": 0.4720136839414774, "learning_rate": 2.6745066234117334e-05, "loss": 0.4264, "step": 53285 }, { "epoch": 1.5558750967139165, "grad_norm": 0.49619825213152435, "learning_rate": 2.6742362800756965e-05, "loss": 0.4555, "step": 53290 }, { "epoch": 1.5560210799842338, "grad_norm": 0.4709626468209554, "learning_rate": 2.6739659367396596e-05, "loss": 0.4417, "step": 53295 }, { "epoch": 1.556167063254551, "grad_norm": 0.4421128729978188, "learning_rate": 2.673695593403623e-05, "loss": 0.4538, "step": 53300 }, { "epoch": 1.5563130465248682, "grad_norm": 0.44681416311059935, "learning_rate": 2.673425250067586e-05, "loss": 0.4824, "step": 53305 }, { "epoch": 1.5564590297951855, "grad_norm": 0.4789008466747817, "learning_rate": 2.673154906731549e-05, "loss": 0.4127, "step": 53310 }, { "epoch": 1.5566050130655027, "grad_norm": 0.43805575088081544, "learning_rate": 2.6728845633955125e-05, "loss": 0.4319, "step": 53315 }, { "epoch": 1.55675099633582, "grad_norm": 0.4720959224760747, "learning_rate": 2.6726142200594756e-05, "loss": 0.4531, "step": 53320 }, { "epoch": 1.5568969796061372, "grad_norm": 0.45453189873701894, "learning_rate": 2.6723438767234386e-05, "loss": 0.4434, "step": 53325 }, { "epoch": 1.5570429628764544, "grad_norm": 0.8836975056343312, "learning_rate": 2.672073533387402e-05, "loss": 0.4513, "step": 53330 }, { "epoch": 1.5571889461467716, "grad_norm": 0.44535245955326624, "learning_rate": 2.671803190051365e-05, "loss": 0.444, "step": 53335 }, { "epoch": 1.5573349294170888, "grad_norm": 0.46464372489837336, "learning_rate": 2.6715328467153285e-05, "loss": 0.4491, "step": 53340 }, { "epoch": 1.557480912687406, "grad_norm": 0.5020899958466204, "learning_rate": 2.6712625033792922e-05, "loss": 0.4123, "step": 53345 }, { "epoch": 1.557626895957723, "grad_norm": 0.4719197933333629, "learning_rate": 2.6709921600432553e-05, "loss": 0.4407, "step": 53350 }, { "epoch": 1.5577728792280405, "grad_norm": 0.4825046867775737, "learning_rate": 2.6707218167072184e-05, "loss": 0.4528, "step": 53355 }, { "epoch": 1.5579188624983575, "grad_norm": 0.5245917116563777, "learning_rate": 2.6704514733711818e-05, "loss": 0.4869, "step": 53360 }, { "epoch": 1.558064845768675, "grad_norm": 0.4976730755136499, "learning_rate": 2.6701811300351448e-05, "loss": 0.4375, "step": 53365 }, { "epoch": 1.558210829038992, "grad_norm": 0.4811017938062649, "learning_rate": 2.669910786699108e-05, "loss": 0.4296, "step": 53370 }, { "epoch": 1.5583568123093094, "grad_norm": 0.521027776723548, "learning_rate": 2.6696404433630713e-05, "loss": 0.4396, "step": 53375 }, { "epoch": 1.5585027955796265, "grad_norm": 0.4909789914421638, "learning_rate": 2.6693701000270343e-05, "loss": 0.4568, "step": 53380 }, { "epoch": 1.558648778849944, "grad_norm": 0.4659967641982829, "learning_rate": 2.6690997566909974e-05, "loss": 0.4675, "step": 53385 }, { "epoch": 1.558794762120261, "grad_norm": 0.4781289203779887, "learning_rate": 2.6688294133549608e-05, "loss": 0.4275, "step": 53390 }, { "epoch": 1.5589407453905784, "grad_norm": 0.4373801409382837, "learning_rate": 2.668559070018924e-05, "loss": 0.4006, "step": 53395 }, { "epoch": 1.5590867286608954, "grad_norm": 0.45777847978145153, "learning_rate": 2.6682887266828876e-05, "loss": 0.4355, "step": 53400 }, { "epoch": 1.5592327119312128, "grad_norm": 0.5168669681798989, "learning_rate": 2.6680183833468507e-05, "loss": 0.4221, "step": 53405 }, { "epoch": 1.5593786952015298, "grad_norm": 0.49745427503690465, "learning_rate": 2.667748040010814e-05, "loss": 0.4471, "step": 53410 }, { "epoch": 1.5595246784718473, "grad_norm": 0.5210386841960029, "learning_rate": 2.667477696674777e-05, "loss": 0.4537, "step": 53415 }, { "epoch": 1.5596706617421643, "grad_norm": 0.48775918141297453, "learning_rate": 2.6672073533387405e-05, "loss": 0.4253, "step": 53420 }, { "epoch": 1.5598166450124815, "grad_norm": 0.476632137465704, "learning_rate": 2.6669370100027036e-05, "loss": 0.4343, "step": 53425 }, { "epoch": 1.5599626282827987, "grad_norm": 0.48379006868765095, "learning_rate": 2.6666666666666667e-05, "loss": 0.4121, "step": 53430 }, { "epoch": 1.560108611553116, "grad_norm": 0.4580739706049475, "learning_rate": 2.66639632333063e-05, "loss": 0.413, "step": 53435 }, { "epoch": 1.5602545948234332, "grad_norm": 0.5193235254448659, "learning_rate": 2.666125979994593e-05, "loss": 0.4532, "step": 53440 }, { "epoch": 1.5604005780937504, "grad_norm": 0.4866744913559879, "learning_rate": 2.6658556366585562e-05, "loss": 0.4471, "step": 53445 }, { "epoch": 1.5605465613640677, "grad_norm": 0.4714022691327112, "learning_rate": 2.6655852933225196e-05, "loss": 0.4409, "step": 53450 }, { "epoch": 1.560692544634385, "grad_norm": 0.49493475094425843, "learning_rate": 2.6653149499864833e-05, "loss": 0.463, "step": 53455 }, { "epoch": 1.5608385279047021, "grad_norm": 0.4557926680311556, "learning_rate": 2.6650446066504464e-05, "loss": 0.4295, "step": 53460 }, { "epoch": 1.5609845111750194, "grad_norm": 0.4641486795940444, "learning_rate": 2.6647742633144095e-05, "loss": 0.4309, "step": 53465 }, { "epoch": 1.5611304944453366, "grad_norm": 0.4851042395789395, "learning_rate": 2.664503919978373e-05, "loss": 0.4244, "step": 53470 }, { "epoch": 1.5612764777156538, "grad_norm": 0.4746676027024199, "learning_rate": 2.664233576642336e-05, "loss": 0.4184, "step": 53475 }, { "epoch": 1.561422460985971, "grad_norm": 0.4872925045406447, "learning_rate": 2.6639632333062993e-05, "loss": 0.4433, "step": 53480 }, { "epoch": 1.5615684442562883, "grad_norm": 0.4855307633137149, "learning_rate": 2.6636928899702624e-05, "loss": 0.4479, "step": 53485 }, { "epoch": 1.5617144275266055, "grad_norm": 0.4513808168511977, "learning_rate": 2.6634225466342254e-05, "loss": 0.4183, "step": 53490 }, { "epoch": 1.5618604107969227, "grad_norm": 0.45575967905077397, "learning_rate": 2.663152203298189e-05, "loss": 0.423, "step": 53495 }, { "epoch": 1.56200639406724, "grad_norm": 0.496770054430942, "learning_rate": 2.662881859962152e-05, "loss": 0.4706, "step": 53500 }, { "epoch": 1.562152377337557, "grad_norm": 0.48935871561267386, "learning_rate": 2.662611516626115e-05, "loss": 0.4377, "step": 53505 }, { "epoch": 1.5622983606078744, "grad_norm": 0.48907161670754, "learning_rate": 2.6623411732900784e-05, "loss": 0.414, "step": 53510 }, { "epoch": 1.5624443438781914, "grad_norm": 0.4586735135142973, "learning_rate": 2.662070829954042e-05, "loss": 0.436, "step": 53515 }, { "epoch": 1.5625903271485089, "grad_norm": 0.48599966448186643, "learning_rate": 2.661800486618005e-05, "loss": 0.4538, "step": 53520 }, { "epoch": 1.5627363104188259, "grad_norm": 0.48096885062710787, "learning_rate": 2.6615301432819682e-05, "loss": 0.4441, "step": 53525 }, { "epoch": 1.5628822936891433, "grad_norm": 0.481272821673489, "learning_rate": 2.6612597999459316e-05, "loss": 0.4603, "step": 53530 }, { "epoch": 1.5630282769594603, "grad_norm": 0.4960542300035541, "learning_rate": 2.6609894566098947e-05, "loss": 0.4644, "step": 53535 }, { "epoch": 1.5631742602297778, "grad_norm": 0.49324988580780893, "learning_rate": 2.6607191132738578e-05, "loss": 0.4604, "step": 53540 }, { "epoch": 1.5633202435000948, "grad_norm": 0.47692607159513106, "learning_rate": 2.660448769937821e-05, "loss": 0.4238, "step": 53545 }, { "epoch": 1.5634662267704122, "grad_norm": 0.5405205629306326, "learning_rate": 2.6601784266017842e-05, "loss": 0.471, "step": 53550 }, { "epoch": 1.5636122100407293, "grad_norm": 0.5027985757595266, "learning_rate": 2.6599080832657476e-05, "loss": 0.4534, "step": 53555 }, { "epoch": 1.5637581933110467, "grad_norm": 0.4431105556152232, "learning_rate": 2.6596377399297107e-05, "loss": 0.4218, "step": 53560 }, { "epoch": 1.5639041765813637, "grad_norm": 0.5060537656711731, "learning_rate": 2.6593673965936737e-05, "loss": 0.4594, "step": 53565 }, { "epoch": 1.564050159851681, "grad_norm": 0.4871595338486942, "learning_rate": 2.6590970532576375e-05, "loss": 0.461, "step": 53570 }, { "epoch": 1.5641961431219982, "grad_norm": 0.4511650354232401, "learning_rate": 2.658826709921601e-05, "loss": 0.4232, "step": 53575 }, { "epoch": 1.5643421263923154, "grad_norm": 0.4658813835599209, "learning_rate": 2.658556366585564e-05, "loss": 0.4289, "step": 53580 }, { "epoch": 1.5644881096626326, "grad_norm": 0.5314005918007191, "learning_rate": 2.658286023249527e-05, "loss": 0.4408, "step": 53585 }, { "epoch": 1.5646340929329499, "grad_norm": 0.5441950777004073, "learning_rate": 2.6580156799134904e-05, "loss": 0.4665, "step": 53590 }, { "epoch": 1.564780076203267, "grad_norm": 0.4734487593284494, "learning_rate": 2.6577453365774535e-05, "loss": 0.4414, "step": 53595 }, { "epoch": 1.5649260594735843, "grad_norm": 0.4848184171261744, "learning_rate": 2.6574749932414165e-05, "loss": 0.4608, "step": 53600 }, { "epoch": 1.5650720427439015, "grad_norm": 0.47389701872641465, "learning_rate": 2.65720464990538e-05, "loss": 0.4655, "step": 53605 }, { "epoch": 1.5652180260142188, "grad_norm": 0.5143260873997071, "learning_rate": 2.656934306569343e-05, "loss": 0.4623, "step": 53610 }, { "epoch": 1.565364009284536, "grad_norm": 0.4740826231997358, "learning_rate": 2.6566639632333064e-05, "loss": 0.4518, "step": 53615 }, { "epoch": 1.5655099925548532, "grad_norm": 0.47078815479468344, "learning_rate": 2.6563936198972695e-05, "loss": 0.4535, "step": 53620 }, { "epoch": 1.5656559758251705, "grad_norm": 0.4855387015414626, "learning_rate": 2.6561232765612332e-05, "loss": 0.4392, "step": 53625 }, { "epoch": 1.5658019590954877, "grad_norm": 0.47166328064489477, "learning_rate": 2.6558529332251963e-05, "loss": 0.4383, "step": 53630 }, { "epoch": 1.565947942365805, "grad_norm": 0.4738242614186869, "learning_rate": 2.6555825898891597e-05, "loss": 0.4516, "step": 53635 }, { "epoch": 1.5660939256361222, "grad_norm": 0.47955394805906454, "learning_rate": 2.6553122465531227e-05, "loss": 0.4315, "step": 53640 }, { "epoch": 1.5662399089064394, "grad_norm": 0.46790138051779084, "learning_rate": 2.6550419032170858e-05, "loss": 0.4253, "step": 53645 }, { "epoch": 1.5663858921767564, "grad_norm": 0.44418415357418706, "learning_rate": 2.6547715598810492e-05, "loss": 0.402, "step": 53650 }, { "epoch": 1.5665318754470738, "grad_norm": 0.4845795195261664, "learning_rate": 2.6545012165450123e-05, "loss": 0.4426, "step": 53655 }, { "epoch": 1.5666778587173908, "grad_norm": 0.4972782231710503, "learning_rate": 2.6542308732089753e-05, "loss": 0.4631, "step": 53660 }, { "epoch": 1.5668238419877083, "grad_norm": 0.439582615020517, "learning_rate": 2.6539605298729387e-05, "loss": 0.4351, "step": 53665 }, { "epoch": 1.5669698252580253, "grad_norm": 0.45040494317008445, "learning_rate": 2.6536901865369018e-05, "loss": 0.447, "step": 53670 }, { "epoch": 1.5671158085283428, "grad_norm": 0.5115035153996893, "learning_rate": 2.653419843200865e-05, "loss": 0.4475, "step": 53675 }, { "epoch": 1.5672617917986598, "grad_norm": 0.47422811573550555, "learning_rate": 2.6531494998648282e-05, "loss": 0.4411, "step": 53680 }, { "epoch": 1.5674077750689772, "grad_norm": 0.43796573789937127, "learning_rate": 2.652879156528792e-05, "loss": 0.4449, "step": 53685 }, { "epoch": 1.5675537583392942, "grad_norm": 0.4283766531677837, "learning_rate": 2.652608813192755e-05, "loss": 0.4239, "step": 53690 }, { "epoch": 1.5676997416096117, "grad_norm": 0.4409141711900561, "learning_rate": 2.6523384698567184e-05, "loss": 0.4528, "step": 53695 }, { "epoch": 1.5678457248799287, "grad_norm": 0.4383491860078911, "learning_rate": 2.6520681265206815e-05, "loss": 0.4698, "step": 53700 }, { "epoch": 1.5679917081502461, "grad_norm": 0.46560242316753275, "learning_rate": 2.6517977831846446e-05, "loss": 0.4343, "step": 53705 }, { "epoch": 1.5681376914205631, "grad_norm": 0.4814127335537126, "learning_rate": 2.651527439848608e-05, "loss": 0.4554, "step": 53710 }, { "epoch": 1.5682836746908806, "grad_norm": 0.47031857324091586, "learning_rate": 2.651257096512571e-05, "loss": 0.4335, "step": 53715 }, { "epoch": 1.5684296579611976, "grad_norm": 0.501077027739636, "learning_rate": 2.650986753176534e-05, "loss": 0.4486, "step": 53720 }, { "epoch": 1.5685756412315148, "grad_norm": 0.4406273429372073, "learning_rate": 2.6507164098404975e-05, "loss": 0.4056, "step": 53725 }, { "epoch": 1.568721624501832, "grad_norm": 0.4676338885299495, "learning_rate": 2.6504460665044606e-05, "loss": 0.4658, "step": 53730 }, { "epoch": 1.5688676077721493, "grad_norm": 0.5013611314949085, "learning_rate": 2.6501757231684236e-05, "loss": 0.4195, "step": 53735 }, { "epoch": 1.5690135910424665, "grad_norm": 0.4753790386305886, "learning_rate": 2.6499053798323874e-05, "loss": 0.4611, "step": 53740 }, { "epoch": 1.5691595743127837, "grad_norm": 0.4886752259340086, "learning_rate": 2.6496350364963508e-05, "loss": 0.4344, "step": 53745 }, { "epoch": 1.569305557583101, "grad_norm": 0.4585555402632208, "learning_rate": 2.6493646931603138e-05, "loss": 0.4361, "step": 53750 }, { "epoch": 1.5694515408534182, "grad_norm": 0.5021697108181312, "learning_rate": 2.6490943498242772e-05, "loss": 0.4625, "step": 53755 }, { "epoch": 1.5695975241237354, "grad_norm": 0.5067174778160931, "learning_rate": 2.6488240064882403e-05, "loss": 0.4472, "step": 53760 }, { "epoch": 1.5697435073940527, "grad_norm": 0.4351416879451593, "learning_rate": 2.6485536631522034e-05, "loss": 0.4208, "step": 53765 }, { "epoch": 1.56988949066437, "grad_norm": 0.47938892858721593, "learning_rate": 2.6482833198161668e-05, "loss": 0.4453, "step": 53770 }, { "epoch": 1.5700354739346871, "grad_norm": 0.47906990854327713, "learning_rate": 2.6480129764801298e-05, "loss": 0.4115, "step": 53775 }, { "epoch": 1.5701814572050043, "grad_norm": 0.5229231349968971, "learning_rate": 2.647742633144093e-05, "loss": 0.4635, "step": 53780 }, { "epoch": 1.5703274404753216, "grad_norm": 0.43049614876429887, "learning_rate": 2.6474722898080563e-05, "loss": 0.4294, "step": 53785 }, { "epoch": 1.5704734237456388, "grad_norm": 0.49220175010482553, "learning_rate": 2.6472019464720193e-05, "loss": 0.451, "step": 53790 }, { "epoch": 1.5706194070159558, "grad_norm": 0.44622828285951804, "learning_rate": 2.646931603135983e-05, "loss": 0.4262, "step": 53795 }, { "epoch": 1.5707653902862733, "grad_norm": 0.5055846866970815, "learning_rate": 2.646661259799946e-05, "loss": 0.4636, "step": 53800 }, { "epoch": 1.5709113735565903, "grad_norm": 0.4840751532451393, "learning_rate": 2.6463909164639095e-05, "loss": 0.4146, "step": 53805 }, { "epoch": 1.5710573568269077, "grad_norm": 0.4378555339818526, "learning_rate": 2.6461205731278726e-05, "loss": 0.4427, "step": 53810 }, { "epoch": 1.5712033400972247, "grad_norm": 0.4871869243114812, "learning_rate": 2.6458502297918357e-05, "loss": 0.4471, "step": 53815 }, { "epoch": 1.5713493233675422, "grad_norm": 0.48882305316404295, "learning_rate": 2.645579886455799e-05, "loss": 0.4457, "step": 53820 }, { "epoch": 1.5714953066378592, "grad_norm": 0.49720593864941665, "learning_rate": 2.645309543119762e-05, "loss": 0.4585, "step": 53825 }, { "epoch": 1.5716412899081766, "grad_norm": 0.46895983782260175, "learning_rate": 2.6450391997837255e-05, "loss": 0.421, "step": 53830 }, { "epoch": 1.5717872731784936, "grad_norm": 0.4654806327608813, "learning_rate": 2.6447688564476886e-05, "loss": 0.4378, "step": 53835 }, { "epoch": 1.571933256448811, "grad_norm": 0.46102129061988345, "learning_rate": 2.6444985131116517e-05, "loss": 0.4525, "step": 53840 }, { "epoch": 1.572079239719128, "grad_norm": 0.5364998307383225, "learning_rate": 2.644228169775615e-05, "loss": 0.4286, "step": 53845 }, { "epoch": 1.5722252229894456, "grad_norm": 0.46417154220251183, "learning_rate": 2.643957826439578e-05, "loss": 0.4581, "step": 53850 }, { "epoch": 1.5723712062597626, "grad_norm": 0.4620314151591611, "learning_rate": 2.643687483103542e-05, "loss": 0.4626, "step": 53855 }, { "epoch": 1.57251718953008, "grad_norm": 0.44640286288938064, "learning_rate": 2.643417139767505e-05, "loss": 0.4711, "step": 53860 }, { "epoch": 1.572663172800397, "grad_norm": 0.4551880684257982, "learning_rate": 2.6431467964314683e-05, "loss": 0.4564, "step": 53865 }, { "epoch": 1.5728091560707143, "grad_norm": 0.4907491794406675, "learning_rate": 2.6428764530954314e-05, "loss": 0.4483, "step": 53870 }, { "epoch": 1.5729551393410315, "grad_norm": 0.4618960069256603, "learning_rate": 2.6426061097593945e-05, "loss": 0.4325, "step": 53875 }, { "epoch": 1.5731011226113487, "grad_norm": 0.5064475821258702, "learning_rate": 2.642335766423358e-05, "loss": 0.4426, "step": 53880 }, { "epoch": 1.573247105881666, "grad_norm": 0.48657387218180165, "learning_rate": 2.642065423087321e-05, "loss": 0.451, "step": 53885 }, { "epoch": 1.5733930891519832, "grad_norm": 0.4830920092637734, "learning_rate": 2.6417950797512843e-05, "loss": 0.439, "step": 53890 }, { "epoch": 1.5735390724223004, "grad_norm": 0.4466667789434152, "learning_rate": 2.6415247364152474e-05, "loss": 0.4419, "step": 53895 }, { "epoch": 1.5736850556926176, "grad_norm": 0.5016949917797742, "learning_rate": 2.6412543930792104e-05, "loss": 0.4271, "step": 53900 }, { "epoch": 1.5738310389629349, "grad_norm": 0.49305298797405245, "learning_rate": 2.640984049743174e-05, "loss": 0.4595, "step": 53905 }, { "epoch": 1.573977022233252, "grad_norm": 0.45977720008962575, "learning_rate": 2.6407137064071376e-05, "loss": 0.4085, "step": 53910 }, { "epoch": 1.5741230055035693, "grad_norm": 0.4992890939871395, "learning_rate": 2.6404433630711006e-05, "loss": 0.44, "step": 53915 }, { "epoch": 1.5742689887738865, "grad_norm": 0.46027459341399607, "learning_rate": 2.6401730197350637e-05, "loss": 0.4495, "step": 53920 }, { "epoch": 1.5744149720442038, "grad_norm": 0.4342806795564251, "learning_rate": 2.639902676399027e-05, "loss": 0.4031, "step": 53925 }, { "epoch": 1.574560955314521, "grad_norm": 0.4735739985491248, "learning_rate": 2.6396323330629902e-05, "loss": 0.4618, "step": 53930 }, { "epoch": 1.5747069385848382, "grad_norm": 0.45861877122727, "learning_rate": 2.6393619897269532e-05, "loss": 0.4602, "step": 53935 }, { "epoch": 1.5748529218551552, "grad_norm": 0.4605123030074402, "learning_rate": 2.6390916463909166e-05, "loss": 0.4296, "step": 53940 }, { "epoch": 1.5749989051254727, "grad_norm": 0.42555982092266365, "learning_rate": 2.6388213030548797e-05, "loss": 0.4383, "step": 53945 }, { "epoch": 1.5751448883957897, "grad_norm": 0.46113435663380814, "learning_rate": 2.6385509597188428e-05, "loss": 0.4666, "step": 53950 }, { "epoch": 1.5752908716661072, "grad_norm": 0.4787426033842967, "learning_rate": 2.638280616382806e-05, "loss": 0.4592, "step": 53955 }, { "epoch": 1.5754368549364242, "grad_norm": 0.48182305528810665, "learning_rate": 2.6380102730467692e-05, "loss": 0.4345, "step": 53960 }, { "epoch": 1.5755828382067416, "grad_norm": 0.44177231799190547, "learning_rate": 2.637739929710733e-05, "loss": 0.4502, "step": 53965 }, { "epoch": 1.5757288214770586, "grad_norm": 0.5166228157455749, "learning_rate": 2.6374695863746964e-05, "loss": 0.4639, "step": 53970 }, { "epoch": 1.575874804747376, "grad_norm": 0.4864123840403753, "learning_rate": 2.6371992430386594e-05, "loss": 0.4508, "step": 53975 }, { "epoch": 1.576020788017693, "grad_norm": 0.4756990210021842, "learning_rate": 2.6369288997026225e-05, "loss": 0.4451, "step": 53980 }, { "epoch": 1.5761667712880105, "grad_norm": 0.48198588291152494, "learning_rate": 2.636658556366586e-05, "loss": 0.4441, "step": 53985 }, { "epoch": 1.5763127545583275, "grad_norm": 0.46016735178785695, "learning_rate": 2.636388213030549e-05, "loss": 0.4311, "step": 53990 }, { "epoch": 1.576458737828645, "grad_norm": 0.4819268913034599, "learning_rate": 2.636117869694512e-05, "loss": 0.4578, "step": 53995 }, { "epoch": 1.576604721098962, "grad_norm": 0.5184203223048691, "learning_rate": 2.6358475263584754e-05, "loss": 0.4452, "step": 54000 }, { "epoch": 1.5767507043692794, "grad_norm": 0.465056690723789, "learning_rate": 2.6355771830224385e-05, "loss": 0.4243, "step": 54005 }, { "epoch": 1.5768966876395965, "grad_norm": 0.4777641331011182, "learning_rate": 2.6353068396864015e-05, "loss": 0.4362, "step": 54010 }, { "epoch": 1.5770426709099137, "grad_norm": 0.5083544037271892, "learning_rate": 2.635036496350365e-05, "loss": 0.4354, "step": 54015 }, { "epoch": 1.577188654180231, "grad_norm": 0.479185751230509, "learning_rate": 2.634766153014328e-05, "loss": 0.4586, "step": 54020 }, { "epoch": 1.5773346374505481, "grad_norm": 0.48053511624383716, "learning_rate": 2.6344958096782917e-05, "loss": 0.4737, "step": 54025 }, { "epoch": 1.5774806207208654, "grad_norm": 0.4616314675364491, "learning_rate": 2.634225466342255e-05, "loss": 0.4518, "step": 54030 }, { "epoch": 1.5776266039911826, "grad_norm": 0.45287402448725034, "learning_rate": 2.6339551230062182e-05, "loss": 0.409, "step": 54035 }, { "epoch": 1.5777725872614998, "grad_norm": 0.48658774640108793, "learning_rate": 2.6336847796701813e-05, "loss": 0.4477, "step": 54040 }, { "epoch": 1.577918570531817, "grad_norm": 0.4855629314406704, "learning_rate": 2.6334144363341447e-05, "loss": 0.4431, "step": 54045 }, { "epoch": 1.5780645538021343, "grad_norm": 0.47258937113857863, "learning_rate": 2.6331440929981077e-05, "loss": 0.401, "step": 54050 }, { "epoch": 1.5782105370724515, "grad_norm": 0.5097037983463744, "learning_rate": 2.6328737496620708e-05, "loss": 0.4323, "step": 54055 }, { "epoch": 1.5783565203427687, "grad_norm": 0.48675448990724207, "learning_rate": 2.6326034063260342e-05, "loss": 0.4303, "step": 54060 }, { "epoch": 1.578502503613086, "grad_norm": 0.45143203658905245, "learning_rate": 2.6323330629899973e-05, "loss": 0.459, "step": 54065 }, { "epoch": 1.5786484868834032, "grad_norm": 0.5055135926768723, "learning_rate": 2.6320627196539603e-05, "loss": 0.4478, "step": 54070 }, { "epoch": 1.5787944701537204, "grad_norm": 0.4651657138351517, "learning_rate": 2.6317923763179237e-05, "loss": 0.4594, "step": 54075 }, { "epoch": 1.5789404534240377, "grad_norm": 0.4356961564533336, "learning_rate": 2.6315220329818875e-05, "loss": 0.4147, "step": 54080 }, { "epoch": 1.5790864366943547, "grad_norm": 0.475484161985986, "learning_rate": 2.6312516896458505e-05, "loss": 0.4483, "step": 54085 }, { "epoch": 1.5792324199646721, "grad_norm": 0.46525977586173156, "learning_rate": 2.6309813463098136e-05, "loss": 0.4254, "step": 54090 }, { "epoch": 1.5793784032349891, "grad_norm": 0.47617422847358404, "learning_rate": 2.630711002973777e-05, "loss": 0.4568, "step": 54095 }, { "epoch": 1.5795243865053066, "grad_norm": 0.4807512116209738, "learning_rate": 2.63044065963774e-05, "loss": 0.456, "step": 54100 }, { "epoch": 1.5796703697756236, "grad_norm": 0.46466473921680035, "learning_rate": 2.6301703163017035e-05, "loss": 0.4245, "step": 54105 }, { "epoch": 1.579816353045941, "grad_norm": 0.4960928333784405, "learning_rate": 2.6298999729656665e-05, "loss": 0.4507, "step": 54110 }, { "epoch": 1.579962336316258, "grad_norm": 0.4874229802500232, "learning_rate": 2.6296296296296296e-05, "loss": 0.4311, "step": 54115 }, { "epoch": 1.5801083195865755, "grad_norm": 0.49759109216688074, "learning_rate": 2.629359286293593e-05, "loss": 0.4089, "step": 54120 }, { "epoch": 1.5802543028568925, "grad_norm": 0.4851404452865195, "learning_rate": 2.629088942957556e-05, "loss": 0.4463, "step": 54125 }, { "epoch": 1.58040028612721, "grad_norm": 0.47753653761451115, "learning_rate": 2.628818599621519e-05, "loss": 0.4284, "step": 54130 }, { "epoch": 1.580546269397527, "grad_norm": 0.49247676177155264, "learning_rate": 2.628548256285483e-05, "loss": 0.4393, "step": 54135 }, { "epoch": 1.5806922526678444, "grad_norm": 0.49898235008349334, "learning_rate": 2.6282779129494462e-05, "loss": 0.437, "step": 54140 }, { "epoch": 1.5808382359381614, "grad_norm": 0.4984446336066509, "learning_rate": 2.6280075696134093e-05, "loss": 0.4364, "step": 54145 }, { "epoch": 1.5809842192084789, "grad_norm": 0.48831218143369315, "learning_rate": 2.6277372262773724e-05, "loss": 0.409, "step": 54150 }, { "epoch": 1.5811302024787959, "grad_norm": 0.5462079829833247, "learning_rate": 2.6274668829413358e-05, "loss": 0.4718, "step": 54155 }, { "epoch": 1.581276185749113, "grad_norm": 0.4856158573039844, "learning_rate": 2.6271965396052988e-05, "loss": 0.4382, "step": 54160 }, { "epoch": 1.5814221690194303, "grad_norm": 0.4686775976114145, "learning_rate": 2.6269261962692622e-05, "loss": 0.4847, "step": 54165 }, { "epoch": 1.5815681522897476, "grad_norm": 0.5138849761001558, "learning_rate": 2.6266558529332253e-05, "loss": 0.4407, "step": 54170 }, { "epoch": 1.5817141355600648, "grad_norm": 0.4791577596780386, "learning_rate": 2.6263855095971884e-05, "loss": 0.4521, "step": 54175 }, { "epoch": 1.581860118830382, "grad_norm": 0.4678999459485991, "learning_rate": 2.6261151662611518e-05, "loss": 0.4357, "step": 54180 }, { "epoch": 1.5820061021006993, "grad_norm": 0.5311588623413387, "learning_rate": 2.6258448229251148e-05, "loss": 0.4762, "step": 54185 }, { "epoch": 1.5821520853710165, "grad_norm": 0.48923505247603943, "learning_rate": 2.625574479589078e-05, "loss": 0.4864, "step": 54190 }, { "epoch": 1.5822980686413337, "grad_norm": 0.4961625715900565, "learning_rate": 2.6253041362530416e-05, "loss": 0.481, "step": 54195 }, { "epoch": 1.582444051911651, "grad_norm": 0.4845579279432541, "learning_rate": 2.625033792917005e-05, "loss": 0.4443, "step": 54200 }, { "epoch": 1.5825900351819682, "grad_norm": 0.43589916966907777, "learning_rate": 2.624763449580968e-05, "loss": 0.4412, "step": 54205 }, { "epoch": 1.5827360184522854, "grad_norm": 0.4718138722656278, "learning_rate": 2.624493106244931e-05, "loss": 0.4385, "step": 54210 }, { "epoch": 1.5828820017226026, "grad_norm": 0.5073382382748475, "learning_rate": 2.6242227629088945e-05, "loss": 0.4678, "step": 54215 }, { "epoch": 1.5830279849929199, "grad_norm": 0.44979930840674626, "learning_rate": 2.6239524195728576e-05, "loss": 0.4595, "step": 54220 }, { "epoch": 1.583173968263237, "grad_norm": 0.45939707567162874, "learning_rate": 2.6236820762368207e-05, "loss": 0.43, "step": 54225 }, { "epoch": 1.583319951533554, "grad_norm": 0.4196851912859035, "learning_rate": 2.623411732900784e-05, "loss": 0.4095, "step": 54230 }, { "epoch": 1.5834659348038715, "grad_norm": 0.47013347884049944, "learning_rate": 2.623141389564747e-05, "loss": 0.4589, "step": 54235 }, { "epoch": 1.5836119180741886, "grad_norm": 0.49520088768097864, "learning_rate": 2.6228710462287105e-05, "loss": 0.4602, "step": 54240 }, { "epoch": 1.583757901344506, "grad_norm": 0.45366509627591833, "learning_rate": 2.6226007028926736e-05, "loss": 0.4707, "step": 54245 }, { "epoch": 1.583903884614823, "grad_norm": 0.4702170618681803, "learning_rate": 2.6223303595566373e-05, "loss": 0.443, "step": 54250 }, { "epoch": 1.5840498678851405, "grad_norm": 0.4494733731487653, "learning_rate": 2.6220600162206004e-05, "loss": 0.4361, "step": 54255 }, { "epoch": 1.5841958511554575, "grad_norm": 0.4802102694738193, "learning_rate": 2.6217896728845638e-05, "loss": 0.4307, "step": 54260 }, { "epoch": 1.584341834425775, "grad_norm": 0.4723342502836421, "learning_rate": 2.621519329548527e-05, "loss": 0.4569, "step": 54265 }, { "epoch": 1.584487817696092, "grad_norm": 0.5265192867514531, "learning_rate": 2.62124898621249e-05, "loss": 0.4634, "step": 54270 }, { "epoch": 1.5846338009664094, "grad_norm": 0.4856829022949693, "learning_rate": 2.6209786428764533e-05, "loss": 0.439, "step": 54275 }, { "epoch": 1.5847797842367264, "grad_norm": 0.47751276940305837, "learning_rate": 2.6207082995404164e-05, "loss": 0.4476, "step": 54280 }, { "epoch": 1.5849257675070438, "grad_norm": 0.48635878121213966, "learning_rate": 2.6204379562043795e-05, "loss": 0.4025, "step": 54285 }, { "epoch": 1.5850717507773608, "grad_norm": 0.48225735846608114, "learning_rate": 2.620167612868343e-05, "loss": 0.4355, "step": 54290 }, { "epoch": 1.5852177340476783, "grad_norm": 0.49883615975165874, "learning_rate": 2.619897269532306e-05, "loss": 0.4383, "step": 54295 }, { "epoch": 1.5853637173179953, "grad_norm": 0.5260711790940547, "learning_rate": 2.6196269261962693e-05, "loss": 0.4833, "step": 54300 }, { "epoch": 1.5855097005883125, "grad_norm": 0.5031844862650314, "learning_rate": 2.6193565828602327e-05, "loss": 0.4573, "step": 54305 }, { "epoch": 1.5856556838586298, "grad_norm": 0.4810845028543993, "learning_rate": 2.619086239524196e-05, "loss": 0.4594, "step": 54310 }, { "epoch": 1.585801667128947, "grad_norm": 0.4571040949627854, "learning_rate": 2.6188158961881592e-05, "loss": 0.4784, "step": 54315 }, { "epoch": 1.5859476503992642, "grad_norm": 0.47123173392045276, "learning_rate": 2.6185455528521226e-05, "loss": 0.4555, "step": 54320 }, { "epoch": 1.5860936336695814, "grad_norm": 0.4575071360196116, "learning_rate": 2.6182752095160856e-05, "loss": 0.4645, "step": 54325 }, { "epoch": 1.5862396169398987, "grad_norm": 0.4884886291068744, "learning_rate": 2.6180048661800487e-05, "loss": 0.439, "step": 54330 }, { "epoch": 1.586385600210216, "grad_norm": 0.48489812176798114, "learning_rate": 2.617734522844012e-05, "loss": 0.4342, "step": 54335 }, { "epoch": 1.5865315834805331, "grad_norm": 0.46255336461465246, "learning_rate": 2.6174641795079752e-05, "loss": 0.4227, "step": 54340 }, { "epoch": 1.5866775667508504, "grad_norm": 0.4913148087021517, "learning_rate": 2.6171938361719382e-05, "loss": 0.4553, "step": 54345 }, { "epoch": 1.5868235500211676, "grad_norm": 0.44402815749610697, "learning_rate": 2.6169234928359016e-05, "loss": 0.4421, "step": 54350 }, { "epoch": 1.5869695332914848, "grad_norm": 0.4408943275597555, "learning_rate": 2.6166531494998647e-05, "loss": 0.4371, "step": 54355 }, { "epoch": 1.587115516561802, "grad_norm": 0.4939996131549382, "learning_rate": 2.6163828061638278e-05, "loss": 0.4207, "step": 54360 }, { "epoch": 1.5872614998321193, "grad_norm": 0.5530209586714994, "learning_rate": 2.6161124628277915e-05, "loss": 0.4607, "step": 54365 }, { "epoch": 1.5874074831024365, "grad_norm": 0.5010118456712628, "learning_rate": 2.615842119491755e-05, "loss": 0.4476, "step": 54370 }, { "epoch": 1.5875534663727535, "grad_norm": 0.48155400830751, "learning_rate": 2.615571776155718e-05, "loss": 0.472, "step": 54375 }, { "epoch": 1.587699449643071, "grad_norm": 0.4659282181129585, "learning_rate": 2.6153014328196814e-05, "loss": 0.4636, "step": 54380 }, { "epoch": 1.587845432913388, "grad_norm": 0.5214502254098123, "learning_rate": 2.6150310894836444e-05, "loss": 0.4752, "step": 54385 }, { "epoch": 1.5879914161837054, "grad_norm": 0.4901785740161114, "learning_rate": 2.6147607461476075e-05, "loss": 0.4647, "step": 54390 }, { "epoch": 1.5881373994540224, "grad_norm": 0.48625911018876855, "learning_rate": 2.614490402811571e-05, "loss": 0.4401, "step": 54395 }, { "epoch": 1.5882833827243399, "grad_norm": 0.5074531385854022, "learning_rate": 2.614220059475534e-05, "loss": 0.468, "step": 54400 }, { "epoch": 1.588429365994657, "grad_norm": 0.4682741921666845, "learning_rate": 2.613949716139497e-05, "loss": 0.4369, "step": 54405 }, { "epoch": 1.5885753492649743, "grad_norm": 0.4664854027203276, "learning_rate": 2.6136793728034604e-05, "loss": 0.4265, "step": 54410 }, { "epoch": 1.5887213325352914, "grad_norm": 0.44443477127024383, "learning_rate": 2.6134090294674235e-05, "loss": 0.4336, "step": 54415 }, { "epoch": 1.5888673158056088, "grad_norm": 0.5107560616998189, "learning_rate": 2.6131386861313872e-05, "loss": 0.424, "step": 54420 }, { "epoch": 1.5890132990759258, "grad_norm": 0.5047883679578077, "learning_rate": 2.6128683427953503e-05, "loss": 0.4512, "step": 54425 }, { "epoch": 1.5891592823462433, "grad_norm": 0.44035319793331, "learning_rate": 2.6125979994593137e-05, "loss": 0.464, "step": 54430 }, { "epoch": 1.5893052656165603, "grad_norm": 0.4813195686790958, "learning_rate": 2.6123276561232767e-05, "loss": 0.4519, "step": 54435 }, { "epoch": 1.5894512488868777, "grad_norm": 0.4547551756976473, "learning_rate": 2.6120573127872398e-05, "loss": 0.4545, "step": 54440 }, { "epoch": 1.5895972321571947, "grad_norm": 0.49878295566840225, "learning_rate": 2.6117869694512032e-05, "loss": 0.4527, "step": 54445 }, { "epoch": 1.589743215427512, "grad_norm": 0.4602498869853487, "learning_rate": 2.6115166261151663e-05, "loss": 0.4554, "step": 54450 }, { "epoch": 1.5898891986978292, "grad_norm": 0.42330338089112823, "learning_rate": 2.6112462827791297e-05, "loss": 0.4249, "step": 54455 }, { "epoch": 1.5900351819681464, "grad_norm": 0.5390371656332783, "learning_rate": 2.6109759394430927e-05, "loss": 0.4651, "step": 54460 }, { "epoch": 1.5901811652384636, "grad_norm": 0.46659589191645706, "learning_rate": 2.6107055961070558e-05, "loss": 0.4309, "step": 54465 }, { "epoch": 1.5903271485087809, "grad_norm": 0.48760109135802254, "learning_rate": 2.6104352527710192e-05, "loss": 0.4703, "step": 54470 }, { "epoch": 1.590473131779098, "grad_norm": 0.45081817329436435, "learning_rate": 2.610164909434983e-05, "loss": 0.4429, "step": 54475 }, { "epoch": 1.5906191150494153, "grad_norm": 0.483850816762127, "learning_rate": 2.609894566098946e-05, "loss": 0.4314, "step": 54480 }, { "epoch": 1.5907650983197326, "grad_norm": 0.4908897609926977, "learning_rate": 2.609624222762909e-05, "loss": 0.4446, "step": 54485 }, { "epoch": 1.5909110815900498, "grad_norm": 0.5070584197377968, "learning_rate": 2.6093538794268725e-05, "loss": 0.4681, "step": 54490 }, { "epoch": 1.591057064860367, "grad_norm": 0.46612532756895797, "learning_rate": 2.6090835360908355e-05, "loss": 0.4372, "step": 54495 }, { "epoch": 1.5912030481306842, "grad_norm": 0.44117685694024983, "learning_rate": 2.6088131927547986e-05, "loss": 0.4529, "step": 54500 }, { "epoch": 1.5913490314010015, "grad_norm": 0.5294408612665295, "learning_rate": 2.608542849418762e-05, "loss": 0.4498, "step": 54505 }, { "epoch": 1.5914950146713187, "grad_norm": 0.4894806071767997, "learning_rate": 2.608272506082725e-05, "loss": 0.4273, "step": 54510 }, { "epoch": 1.591640997941636, "grad_norm": 0.44483322755646315, "learning_rate": 2.6080021627466885e-05, "loss": 0.4305, "step": 54515 }, { "epoch": 1.591786981211953, "grad_norm": 0.4370801546110815, "learning_rate": 2.6077318194106515e-05, "loss": 0.435, "step": 54520 }, { "epoch": 1.5919329644822704, "grad_norm": 0.5131544165341745, "learning_rate": 2.6074614760746146e-05, "loss": 0.4754, "step": 54525 }, { "epoch": 1.5920789477525874, "grad_norm": 0.5108836580327132, "learning_rate": 2.607191132738578e-05, "loss": 0.4375, "step": 54530 }, { "epoch": 1.5922249310229049, "grad_norm": 0.49344179889486744, "learning_rate": 2.6069207894025417e-05, "loss": 0.4405, "step": 54535 }, { "epoch": 1.5923709142932219, "grad_norm": 0.49521455099121037, "learning_rate": 2.6066504460665048e-05, "loss": 0.461, "step": 54540 }, { "epoch": 1.5925168975635393, "grad_norm": 0.5139984889262333, "learning_rate": 2.606380102730468e-05, "loss": 0.4623, "step": 54545 }, { "epoch": 1.5926628808338563, "grad_norm": 0.4678967169596536, "learning_rate": 2.6061097593944312e-05, "loss": 0.4443, "step": 54550 }, { "epoch": 1.5928088641041738, "grad_norm": 0.5016134399443284, "learning_rate": 2.6058394160583943e-05, "loss": 0.443, "step": 54555 }, { "epoch": 1.5929548473744908, "grad_norm": 0.4485227234948916, "learning_rate": 2.6055690727223574e-05, "loss": 0.4386, "step": 54560 }, { "epoch": 1.5931008306448082, "grad_norm": 0.5169752467426998, "learning_rate": 2.6052987293863208e-05, "loss": 0.4806, "step": 54565 }, { "epoch": 1.5932468139151252, "grad_norm": 0.48095110031181426, "learning_rate": 2.605028386050284e-05, "loss": 0.4374, "step": 54570 }, { "epoch": 1.5933927971854427, "grad_norm": 0.4662898567084806, "learning_rate": 2.604758042714247e-05, "loss": 0.444, "step": 54575 }, { "epoch": 1.5935387804557597, "grad_norm": 0.5132162165693245, "learning_rate": 2.6044876993782103e-05, "loss": 0.4561, "step": 54580 }, { "epoch": 1.5936847637260771, "grad_norm": 0.5003959131584215, "learning_rate": 2.6042173560421734e-05, "loss": 0.4445, "step": 54585 }, { "epoch": 1.5938307469963942, "grad_norm": 0.46203875905309433, "learning_rate": 2.603947012706137e-05, "loss": 0.4566, "step": 54590 }, { "epoch": 1.5939767302667114, "grad_norm": 0.5025441903523986, "learning_rate": 2.6036766693701005e-05, "loss": 0.4417, "step": 54595 }, { "epoch": 1.5941227135370286, "grad_norm": 0.4967802664972391, "learning_rate": 2.6034063260340636e-05, "loss": 0.4398, "step": 54600 }, { "epoch": 1.5942686968073458, "grad_norm": 0.4413322255531845, "learning_rate": 2.6031359826980266e-05, "loss": 0.425, "step": 54605 }, { "epoch": 1.594414680077663, "grad_norm": 0.47200999130688165, "learning_rate": 2.60286563936199e-05, "loss": 0.4622, "step": 54610 }, { "epoch": 1.5945606633479803, "grad_norm": 0.4512255296482244, "learning_rate": 2.602595296025953e-05, "loss": 0.445, "step": 54615 }, { "epoch": 1.5947066466182975, "grad_norm": 0.45736521860834506, "learning_rate": 2.602324952689916e-05, "loss": 0.4175, "step": 54620 }, { "epoch": 1.5948526298886148, "grad_norm": 0.4656284877651885, "learning_rate": 2.6020546093538795e-05, "loss": 0.4604, "step": 54625 }, { "epoch": 1.594998613158932, "grad_norm": 0.4416277815528914, "learning_rate": 2.6017842660178426e-05, "loss": 0.4238, "step": 54630 }, { "epoch": 1.5951445964292492, "grad_norm": 0.4840377099097738, "learning_rate": 2.6015139226818057e-05, "loss": 0.4403, "step": 54635 }, { "epoch": 1.5952905796995664, "grad_norm": 0.548868365616379, "learning_rate": 2.601243579345769e-05, "loss": 0.4471, "step": 54640 }, { "epoch": 1.5954365629698837, "grad_norm": 0.45890223667153063, "learning_rate": 2.6009732360097328e-05, "loss": 0.4235, "step": 54645 }, { "epoch": 1.595582546240201, "grad_norm": 0.47939329904911426, "learning_rate": 2.600702892673696e-05, "loss": 0.4332, "step": 54650 }, { "epoch": 1.5957285295105181, "grad_norm": 0.4481244107586797, "learning_rate": 2.6004325493376593e-05, "loss": 0.4359, "step": 54655 }, { "epoch": 1.5958745127808354, "grad_norm": 0.4843520650411607, "learning_rate": 2.6001622060016223e-05, "loss": 0.4409, "step": 54660 }, { "epoch": 1.5960204960511526, "grad_norm": 0.4517999353056325, "learning_rate": 2.5998918626655854e-05, "loss": 0.4345, "step": 54665 }, { "epoch": 1.5961664793214698, "grad_norm": 0.4621263664038348, "learning_rate": 2.5996215193295488e-05, "loss": 0.4696, "step": 54670 }, { "epoch": 1.5963124625917868, "grad_norm": 0.48487913393711096, "learning_rate": 2.599351175993512e-05, "loss": 0.4426, "step": 54675 }, { "epoch": 1.5964584458621043, "grad_norm": 0.5041803164739781, "learning_rate": 2.599080832657475e-05, "loss": 0.4598, "step": 54680 }, { "epoch": 1.5966044291324213, "grad_norm": 0.47401263767321544, "learning_rate": 2.5988104893214383e-05, "loss": 0.4539, "step": 54685 }, { "epoch": 1.5967504124027387, "grad_norm": 0.4653552529200222, "learning_rate": 2.5985401459854014e-05, "loss": 0.4206, "step": 54690 }, { "epoch": 1.5968963956730557, "grad_norm": 0.49884720156980594, "learning_rate": 2.5982698026493645e-05, "loss": 0.446, "step": 54695 }, { "epoch": 1.5970423789433732, "grad_norm": 0.4474101857403726, "learning_rate": 2.597999459313328e-05, "loss": 0.4622, "step": 54700 }, { "epoch": 1.5971883622136902, "grad_norm": 0.4704507698901001, "learning_rate": 2.5977291159772916e-05, "loss": 0.3945, "step": 54705 }, { "epoch": 1.5973343454840077, "grad_norm": 0.4565956717155697, "learning_rate": 2.5974587726412547e-05, "loss": 0.4341, "step": 54710 }, { "epoch": 1.5974803287543247, "grad_norm": 0.45555835876741413, "learning_rate": 2.5971884293052177e-05, "loss": 0.4449, "step": 54715 }, { "epoch": 1.5976263120246421, "grad_norm": 0.4854366227379145, "learning_rate": 2.596918085969181e-05, "loss": 0.4366, "step": 54720 }, { "epoch": 1.5977722952949591, "grad_norm": 0.47710321760679986, "learning_rate": 2.5966477426331442e-05, "loss": 0.4173, "step": 54725 }, { "epoch": 1.5979182785652766, "grad_norm": 0.4898039738170865, "learning_rate": 2.5963773992971076e-05, "loss": 0.4235, "step": 54730 }, { "epoch": 1.5980642618355936, "grad_norm": 0.46457177210123596, "learning_rate": 2.5961070559610706e-05, "loss": 0.4441, "step": 54735 }, { "epoch": 1.5982102451059108, "grad_norm": 0.4591638220618699, "learning_rate": 2.5958367126250337e-05, "loss": 0.4367, "step": 54740 }, { "epoch": 1.598356228376228, "grad_norm": 0.46626130760814, "learning_rate": 2.595566369288997e-05, "loss": 0.4509, "step": 54745 }, { "epoch": 1.5985022116465453, "grad_norm": 0.47373737641954883, "learning_rate": 2.5952960259529602e-05, "loss": 0.4143, "step": 54750 }, { "epoch": 1.5986481949168625, "grad_norm": 0.495784599105492, "learning_rate": 2.5950256826169232e-05, "loss": 0.4414, "step": 54755 }, { "epoch": 1.5987941781871797, "grad_norm": 0.47273282016364704, "learning_rate": 2.594755339280887e-05, "loss": 0.4393, "step": 54760 }, { "epoch": 1.598940161457497, "grad_norm": 0.4790334945734853, "learning_rate": 2.5944849959448504e-05, "loss": 0.4628, "step": 54765 }, { "epoch": 1.5990861447278142, "grad_norm": 0.46933149774445704, "learning_rate": 2.5942146526088134e-05, "loss": 0.4511, "step": 54770 }, { "epoch": 1.5992321279981314, "grad_norm": 0.4419553812038259, "learning_rate": 2.5939443092727765e-05, "loss": 0.4647, "step": 54775 }, { "epoch": 1.5993781112684486, "grad_norm": 0.5242317328685446, "learning_rate": 2.59367396593674e-05, "loss": 0.4457, "step": 54780 }, { "epoch": 1.5995240945387659, "grad_norm": 0.47353276659938953, "learning_rate": 2.593403622600703e-05, "loss": 0.4427, "step": 54785 }, { "epoch": 1.599670077809083, "grad_norm": 0.47602950988038667, "learning_rate": 2.5931332792646664e-05, "loss": 0.4472, "step": 54790 }, { "epoch": 1.5998160610794003, "grad_norm": 0.4984995933852232, "learning_rate": 2.5928629359286294e-05, "loss": 0.4675, "step": 54795 }, { "epoch": 1.5999620443497176, "grad_norm": 0.4799546473428432, "learning_rate": 2.5925925925925925e-05, "loss": 0.4762, "step": 54800 }, { "epoch": 1.6001080276200348, "grad_norm": 0.4533602073014357, "learning_rate": 2.592322249256556e-05, "loss": 0.4383, "step": 54805 }, { "epoch": 1.600254010890352, "grad_norm": 0.4630428566930383, "learning_rate": 2.592051905920519e-05, "loss": 0.4195, "step": 54810 }, { "epoch": 1.6003999941606692, "grad_norm": 0.47428434852612933, "learning_rate": 2.5917815625844827e-05, "loss": 0.4259, "step": 54815 }, { "epoch": 1.6005459774309863, "grad_norm": 0.5311347340755759, "learning_rate": 2.5915112192484458e-05, "loss": 0.4489, "step": 54820 }, { "epoch": 1.6006919607013037, "grad_norm": 0.4522132063678949, "learning_rate": 2.591240875912409e-05, "loss": 0.4527, "step": 54825 }, { "epoch": 1.6008379439716207, "grad_norm": 0.45848278723509694, "learning_rate": 2.5909705325763722e-05, "loss": 0.4618, "step": 54830 }, { "epoch": 1.6009839272419382, "grad_norm": 0.5314508496936695, "learning_rate": 2.5907001892403353e-05, "loss": 0.4765, "step": 54835 }, { "epoch": 1.6011299105122552, "grad_norm": 0.5141692763005209, "learning_rate": 2.5904298459042987e-05, "loss": 0.4416, "step": 54840 }, { "epoch": 1.6012758937825726, "grad_norm": 0.5064385403823912, "learning_rate": 2.5901595025682617e-05, "loss": 0.4577, "step": 54845 }, { "epoch": 1.6014218770528896, "grad_norm": 0.47208309701613355, "learning_rate": 2.5898891592322248e-05, "loss": 0.4264, "step": 54850 }, { "epoch": 1.601567860323207, "grad_norm": 0.5230356698931322, "learning_rate": 2.5896188158961882e-05, "loss": 0.4434, "step": 54855 }, { "epoch": 1.601713843593524, "grad_norm": 0.5183894518699272, "learning_rate": 2.5893484725601513e-05, "loss": 0.4405, "step": 54860 }, { "epoch": 1.6018598268638415, "grad_norm": 0.48878159473155114, "learning_rate": 2.5890781292241147e-05, "loss": 0.4359, "step": 54865 }, { "epoch": 1.6020058101341585, "grad_norm": 0.46122983455126515, "learning_rate": 2.5888077858880777e-05, "loss": 0.4442, "step": 54870 }, { "epoch": 1.602151793404476, "grad_norm": 0.49834934643055084, "learning_rate": 2.5885374425520415e-05, "loss": 0.4372, "step": 54875 }, { "epoch": 1.602297776674793, "grad_norm": 0.4798759335101808, "learning_rate": 2.5882670992160045e-05, "loss": 0.4714, "step": 54880 }, { "epoch": 1.6024437599451102, "grad_norm": 0.4862947184239922, "learning_rate": 2.587996755879968e-05, "loss": 0.4711, "step": 54885 }, { "epoch": 1.6025897432154275, "grad_norm": 0.47515105993799855, "learning_rate": 2.587726412543931e-05, "loss": 0.4578, "step": 54890 }, { "epoch": 1.6027357264857447, "grad_norm": 0.4966036597893771, "learning_rate": 2.587456069207894e-05, "loss": 0.4799, "step": 54895 }, { "epoch": 1.602881709756062, "grad_norm": 0.47389880174504295, "learning_rate": 2.5871857258718575e-05, "loss": 0.4707, "step": 54900 }, { "epoch": 1.6030276930263792, "grad_norm": 0.467375470880681, "learning_rate": 2.5869153825358205e-05, "loss": 0.4198, "step": 54905 }, { "epoch": 1.6031736762966964, "grad_norm": 0.49152554060723874, "learning_rate": 2.5866450391997836e-05, "loss": 0.44, "step": 54910 }, { "epoch": 1.6033196595670136, "grad_norm": 0.4899348957626451, "learning_rate": 2.586374695863747e-05, "loss": 0.4533, "step": 54915 }, { "epoch": 1.6034656428373308, "grad_norm": 0.45261592736467177, "learning_rate": 2.58610435252771e-05, "loss": 0.4137, "step": 54920 }, { "epoch": 1.603611626107648, "grad_norm": 0.49357888061856353, "learning_rate": 2.5858340091916735e-05, "loss": 0.4335, "step": 54925 }, { "epoch": 1.6037576093779653, "grad_norm": 0.4915865034131069, "learning_rate": 2.5855636658556372e-05, "loss": 0.4513, "step": 54930 }, { "epoch": 1.6039035926482825, "grad_norm": 0.4634681905796882, "learning_rate": 2.5852933225196003e-05, "loss": 0.439, "step": 54935 }, { "epoch": 1.6040495759185998, "grad_norm": 0.46744354959825535, "learning_rate": 2.5850229791835633e-05, "loss": 0.4179, "step": 54940 }, { "epoch": 1.604195559188917, "grad_norm": 0.45214860396763634, "learning_rate": 2.5847526358475267e-05, "loss": 0.4423, "step": 54945 }, { "epoch": 1.6043415424592342, "grad_norm": 0.44630594262998624, "learning_rate": 2.5844822925114898e-05, "loss": 0.4445, "step": 54950 }, { "epoch": 1.6044875257295514, "grad_norm": 0.45742580214811296, "learning_rate": 2.584211949175453e-05, "loss": 0.4297, "step": 54955 }, { "epoch": 1.6046335089998687, "grad_norm": 0.4616127859879465, "learning_rate": 2.5839416058394162e-05, "loss": 0.4375, "step": 54960 }, { "epoch": 1.6047794922701857, "grad_norm": 0.48035583560841494, "learning_rate": 2.5836712625033793e-05, "loss": 0.4448, "step": 54965 }, { "epoch": 1.6049254755405031, "grad_norm": 0.5241087168444549, "learning_rate": 2.5834009191673424e-05, "loss": 0.4625, "step": 54970 }, { "epoch": 1.6050714588108201, "grad_norm": 0.4624852426911447, "learning_rate": 2.5831305758313058e-05, "loss": 0.4504, "step": 54975 }, { "epoch": 1.6052174420811376, "grad_norm": 0.487351611586633, "learning_rate": 2.582860232495269e-05, "loss": 0.43, "step": 54980 }, { "epoch": 1.6053634253514546, "grad_norm": 0.4865243368052886, "learning_rate": 2.5825898891592326e-05, "loss": 0.4578, "step": 54985 }, { "epoch": 1.605509408621772, "grad_norm": 0.4423778485200212, "learning_rate": 2.5823195458231956e-05, "loss": 0.4575, "step": 54990 }, { "epoch": 1.605655391892089, "grad_norm": 0.5041878287232701, "learning_rate": 2.582049202487159e-05, "loss": 0.4303, "step": 54995 }, { "epoch": 1.6058013751624065, "grad_norm": 0.4958084565271019, "learning_rate": 2.581778859151122e-05, "loss": 0.4472, "step": 55000 }, { "epoch": 1.6059473584327235, "grad_norm": 0.47335913904801596, "learning_rate": 2.5815085158150855e-05, "loss": 0.4617, "step": 55005 }, { "epoch": 1.606093341703041, "grad_norm": 0.4840691586309788, "learning_rate": 2.5812381724790486e-05, "loss": 0.4316, "step": 55010 }, { "epoch": 1.606239324973358, "grad_norm": 0.5017829635639097, "learning_rate": 2.5809678291430116e-05, "loss": 0.4421, "step": 55015 }, { "epoch": 1.6063853082436754, "grad_norm": 0.44094674506337445, "learning_rate": 2.580697485806975e-05, "loss": 0.4378, "step": 55020 }, { "epoch": 1.6065312915139924, "grad_norm": 0.4833313015555902, "learning_rate": 2.580427142470938e-05, "loss": 0.4522, "step": 55025 }, { "epoch": 1.6066772747843099, "grad_norm": 0.45067668576531267, "learning_rate": 2.580156799134901e-05, "loss": 0.4463, "step": 55030 }, { "epoch": 1.606823258054627, "grad_norm": 0.4797556677791787, "learning_rate": 2.5798864557988646e-05, "loss": 0.4363, "step": 55035 }, { "epoch": 1.6069692413249441, "grad_norm": 0.4849436385120261, "learning_rate": 2.5796161124628276e-05, "loss": 0.461, "step": 55040 }, { "epoch": 1.6071152245952613, "grad_norm": 0.5075133277785938, "learning_rate": 2.5793457691267914e-05, "loss": 0.4748, "step": 55045 }, { "epoch": 1.6072612078655786, "grad_norm": 0.47467024563009236, "learning_rate": 2.5790754257907544e-05, "loss": 0.461, "step": 55050 }, { "epoch": 1.6074071911358958, "grad_norm": 0.47021904030888867, "learning_rate": 2.5788050824547178e-05, "loss": 0.4796, "step": 55055 }, { "epoch": 1.607553174406213, "grad_norm": 0.44292685305077706, "learning_rate": 2.578534739118681e-05, "loss": 0.4348, "step": 55060 }, { "epoch": 1.6076991576765303, "grad_norm": 0.47037407119234265, "learning_rate": 2.5782643957826443e-05, "loss": 0.4228, "step": 55065 }, { "epoch": 1.6078451409468475, "grad_norm": 0.4579722528313931, "learning_rate": 2.5779940524466073e-05, "loss": 0.409, "step": 55070 }, { "epoch": 1.6079911242171647, "grad_norm": 0.47004438692507217, "learning_rate": 2.5777237091105704e-05, "loss": 0.456, "step": 55075 }, { "epoch": 1.608137107487482, "grad_norm": 0.4444390571614667, "learning_rate": 2.5774533657745338e-05, "loss": 0.4478, "step": 55080 }, { "epoch": 1.6082830907577992, "grad_norm": 0.46118318069681946, "learning_rate": 2.577183022438497e-05, "loss": 0.4438, "step": 55085 }, { "epoch": 1.6084290740281164, "grad_norm": 0.510671424162827, "learning_rate": 2.57691267910246e-05, "loss": 0.4435, "step": 55090 }, { "epoch": 1.6085750572984336, "grad_norm": 0.4746682429879431, "learning_rate": 2.5766423357664233e-05, "loss": 0.4705, "step": 55095 }, { "epoch": 1.6087210405687509, "grad_norm": 0.4954845954309247, "learning_rate": 2.576371992430387e-05, "loss": 0.4397, "step": 55100 }, { "epoch": 1.608867023839068, "grad_norm": 0.48552659831725914, "learning_rate": 2.57610164909435e-05, "loss": 0.43, "step": 55105 }, { "epoch": 1.609013007109385, "grad_norm": 0.464598290473237, "learning_rate": 2.5758313057583132e-05, "loss": 0.4079, "step": 55110 }, { "epoch": 1.6091589903797026, "grad_norm": 0.4321936370399023, "learning_rate": 2.5755609624222766e-05, "loss": 0.447, "step": 55115 }, { "epoch": 1.6093049736500196, "grad_norm": 0.49812799978335204, "learning_rate": 2.5752906190862397e-05, "loss": 0.4302, "step": 55120 }, { "epoch": 1.609450956920337, "grad_norm": 0.4166811521815571, "learning_rate": 2.5750202757502027e-05, "loss": 0.4133, "step": 55125 }, { "epoch": 1.609596940190654, "grad_norm": 0.5149770276252514, "learning_rate": 2.574749932414166e-05, "loss": 0.454, "step": 55130 }, { "epoch": 1.6097429234609715, "grad_norm": 0.42941423788364025, "learning_rate": 2.5744795890781292e-05, "loss": 0.4537, "step": 55135 }, { "epoch": 1.6098889067312885, "grad_norm": 0.4613730671442342, "learning_rate": 2.5742092457420926e-05, "loss": 0.4538, "step": 55140 }, { "epoch": 1.610034890001606, "grad_norm": 0.4808196487974896, "learning_rate": 2.5739389024060556e-05, "loss": 0.4361, "step": 55145 }, { "epoch": 1.610180873271923, "grad_norm": 0.47725779373461896, "learning_rate": 2.5736685590700187e-05, "loss": 0.4548, "step": 55150 }, { "epoch": 1.6103268565422404, "grad_norm": 0.48009973540012246, "learning_rate": 2.5733982157339825e-05, "loss": 0.4043, "step": 55155 }, { "epoch": 1.6104728398125574, "grad_norm": 0.4793443116744497, "learning_rate": 2.573127872397946e-05, "loss": 0.424, "step": 55160 }, { "epoch": 1.6106188230828749, "grad_norm": 0.4736186170774014, "learning_rate": 2.572857529061909e-05, "loss": 0.4475, "step": 55165 }, { "epoch": 1.6107648063531919, "grad_norm": 0.45008947452308556, "learning_rate": 2.572587185725872e-05, "loss": 0.45, "step": 55170 }, { "epoch": 1.6109107896235093, "grad_norm": 0.4428322671472335, "learning_rate": 2.5723168423898354e-05, "loss": 0.4447, "step": 55175 }, { "epoch": 1.6110567728938263, "grad_norm": 0.4735890465467379, "learning_rate": 2.5720464990537984e-05, "loss": 0.459, "step": 55180 }, { "epoch": 1.6112027561641435, "grad_norm": 0.4830401815854205, "learning_rate": 2.5717761557177615e-05, "loss": 0.4609, "step": 55185 }, { "epoch": 1.6113487394344608, "grad_norm": 0.4672572095221575, "learning_rate": 2.571505812381725e-05, "loss": 0.4695, "step": 55190 }, { "epoch": 1.611494722704778, "grad_norm": 0.4936621528805712, "learning_rate": 2.571235469045688e-05, "loss": 0.4369, "step": 55195 }, { "epoch": 1.6116407059750952, "grad_norm": 0.4335518190440097, "learning_rate": 2.5709651257096514e-05, "loss": 0.4446, "step": 55200 }, { "epoch": 1.6117866892454125, "grad_norm": 0.47385092585258926, "learning_rate": 2.5706947823736144e-05, "loss": 0.441, "step": 55205 }, { "epoch": 1.6119326725157297, "grad_norm": 0.4218242515215509, "learning_rate": 2.5704244390375775e-05, "loss": 0.4541, "step": 55210 }, { "epoch": 1.612078655786047, "grad_norm": 0.4798200588820331, "learning_rate": 2.5701540957015412e-05, "loss": 0.446, "step": 55215 }, { "epoch": 1.6122246390563642, "grad_norm": 0.4538528347966073, "learning_rate": 2.5698837523655046e-05, "loss": 0.4292, "step": 55220 }, { "epoch": 1.6123706223266814, "grad_norm": 0.46873996308547666, "learning_rate": 2.5696134090294677e-05, "loss": 0.4288, "step": 55225 }, { "epoch": 1.6125166055969986, "grad_norm": 0.48424264537933454, "learning_rate": 2.5693430656934308e-05, "loss": 0.4491, "step": 55230 }, { "epoch": 1.6126625888673158, "grad_norm": 0.46182874987638384, "learning_rate": 2.569072722357394e-05, "loss": 0.4091, "step": 55235 }, { "epoch": 1.612808572137633, "grad_norm": 0.46300953083604446, "learning_rate": 2.5688023790213572e-05, "loss": 0.4496, "step": 55240 }, { "epoch": 1.6129545554079503, "grad_norm": 0.46523907451828356, "learning_rate": 2.5685320356853203e-05, "loss": 0.4291, "step": 55245 }, { "epoch": 1.6131005386782675, "grad_norm": 0.4654930815336479, "learning_rate": 2.5682616923492837e-05, "loss": 0.4476, "step": 55250 }, { "epoch": 1.6132465219485845, "grad_norm": 0.46233411568811617, "learning_rate": 2.5679913490132467e-05, "loss": 0.442, "step": 55255 }, { "epoch": 1.613392505218902, "grad_norm": 0.46695533940302425, "learning_rate": 2.5677210056772098e-05, "loss": 0.4306, "step": 55260 }, { "epoch": 1.613538488489219, "grad_norm": 0.46981456634127294, "learning_rate": 2.5674506623411732e-05, "loss": 0.4512, "step": 55265 }, { "epoch": 1.6136844717595364, "grad_norm": 0.4799763781449178, "learning_rate": 2.567180319005137e-05, "loss": 0.4264, "step": 55270 }, { "epoch": 1.6138304550298535, "grad_norm": 0.5136766624325063, "learning_rate": 2.5669099756691e-05, "loss": 0.4326, "step": 55275 }, { "epoch": 1.613976438300171, "grad_norm": 0.49532246564959515, "learning_rate": 2.5666396323330634e-05, "loss": 0.4483, "step": 55280 }, { "epoch": 1.614122421570488, "grad_norm": 0.4944323293248547, "learning_rate": 2.5663692889970265e-05, "loss": 0.4561, "step": 55285 }, { "epoch": 1.6142684048408054, "grad_norm": 0.439486689819915, "learning_rate": 2.5660989456609895e-05, "loss": 0.4287, "step": 55290 }, { "epoch": 1.6144143881111224, "grad_norm": 0.48395346689033697, "learning_rate": 2.565828602324953e-05, "loss": 0.4313, "step": 55295 }, { "epoch": 1.6145603713814398, "grad_norm": 0.49187433584691276, "learning_rate": 2.565558258988916e-05, "loss": 0.4369, "step": 55300 }, { "epoch": 1.6147063546517568, "grad_norm": 0.47617024782283285, "learning_rate": 2.565287915652879e-05, "loss": 0.4502, "step": 55305 }, { "epoch": 1.6148523379220743, "grad_norm": 0.49710213205834963, "learning_rate": 2.5650175723168425e-05, "loss": 0.4793, "step": 55310 }, { "epoch": 1.6149983211923913, "grad_norm": 0.4674623966260449, "learning_rate": 2.5647472289808055e-05, "loss": 0.4156, "step": 55315 }, { "epoch": 1.6151443044627087, "grad_norm": 0.4701360883940019, "learning_rate": 2.5644768856447686e-05, "loss": 0.4228, "step": 55320 }, { "epoch": 1.6152902877330257, "grad_norm": 0.4896646962750602, "learning_rate": 2.5642065423087323e-05, "loss": 0.4396, "step": 55325 }, { "epoch": 1.615436271003343, "grad_norm": 0.5336189826267674, "learning_rate": 2.5639361989726957e-05, "loss": 0.4658, "step": 55330 }, { "epoch": 1.6155822542736602, "grad_norm": 0.45738237837434326, "learning_rate": 2.5636658556366588e-05, "loss": 0.4467, "step": 55335 }, { "epoch": 1.6157282375439774, "grad_norm": 0.47229020802186045, "learning_rate": 2.5633955123006222e-05, "loss": 0.453, "step": 55340 }, { "epoch": 1.6158742208142947, "grad_norm": 0.4684064224209124, "learning_rate": 2.5631251689645853e-05, "loss": 0.433, "step": 55345 }, { "epoch": 1.6160202040846119, "grad_norm": 0.5168337892143794, "learning_rate": 2.5628548256285483e-05, "loss": 0.4318, "step": 55350 }, { "epoch": 1.6161661873549291, "grad_norm": 0.475089409861706, "learning_rate": 2.5625844822925117e-05, "loss": 0.4208, "step": 55355 }, { "epoch": 1.6163121706252463, "grad_norm": 0.48116137264666364, "learning_rate": 2.5623141389564748e-05, "loss": 0.4512, "step": 55360 }, { "epoch": 1.6164581538955636, "grad_norm": 0.4696013091227738, "learning_rate": 2.562043795620438e-05, "loss": 0.4633, "step": 55365 }, { "epoch": 1.6166041371658808, "grad_norm": 0.45460601054973726, "learning_rate": 2.5617734522844012e-05, "loss": 0.4483, "step": 55370 }, { "epoch": 1.616750120436198, "grad_norm": 0.49388449181107114, "learning_rate": 2.5615031089483643e-05, "loss": 0.4363, "step": 55375 }, { "epoch": 1.6168961037065153, "grad_norm": 0.42358420900143806, "learning_rate": 2.561232765612328e-05, "loss": 0.428, "step": 55380 }, { "epoch": 1.6170420869768325, "grad_norm": 0.46342937583442145, "learning_rate": 2.560962422276291e-05, "loss": 0.4417, "step": 55385 }, { "epoch": 1.6171880702471497, "grad_norm": 0.4541995227504051, "learning_rate": 2.5606920789402545e-05, "loss": 0.4241, "step": 55390 }, { "epoch": 1.617334053517467, "grad_norm": 0.47396012476815313, "learning_rate": 2.5604217356042176e-05, "loss": 0.4588, "step": 55395 }, { "epoch": 1.617480036787784, "grad_norm": 0.4565450681306639, "learning_rate": 2.5601513922681806e-05, "loss": 0.4473, "step": 55400 }, { "epoch": 1.6176260200581014, "grad_norm": 0.48452293043009076, "learning_rate": 2.559881048932144e-05, "loss": 0.4256, "step": 55405 }, { "epoch": 1.6177720033284184, "grad_norm": 0.4940952456107025, "learning_rate": 2.559610705596107e-05, "loss": 0.4328, "step": 55410 }, { "epoch": 1.6179179865987359, "grad_norm": 0.4648801533084296, "learning_rate": 2.5593403622600705e-05, "loss": 0.4856, "step": 55415 }, { "epoch": 1.6180639698690529, "grad_norm": 0.5094086206153948, "learning_rate": 2.5590700189240336e-05, "loss": 0.4493, "step": 55420 }, { "epoch": 1.6182099531393703, "grad_norm": 0.45074038350037027, "learning_rate": 2.5587996755879966e-05, "loss": 0.4433, "step": 55425 }, { "epoch": 1.6183559364096873, "grad_norm": 0.5217922063870871, "learning_rate": 2.55852933225196e-05, "loss": 0.4383, "step": 55430 }, { "epoch": 1.6185019196800048, "grad_norm": 0.4833923332658626, "learning_rate": 2.558258988915923e-05, "loss": 0.4321, "step": 55435 }, { "epoch": 1.6186479029503218, "grad_norm": 0.41908838544459487, "learning_rate": 2.5579886455798868e-05, "loss": 0.4284, "step": 55440 }, { "epoch": 1.6187938862206392, "grad_norm": 0.4975158512898154, "learning_rate": 2.55771830224385e-05, "loss": 0.4433, "step": 55445 }, { "epoch": 1.6189398694909563, "grad_norm": 0.4973253462084773, "learning_rate": 2.5574479589078133e-05, "loss": 0.4494, "step": 55450 }, { "epoch": 1.6190858527612737, "grad_norm": 0.5163655785942544, "learning_rate": 2.5571776155717764e-05, "loss": 0.4844, "step": 55455 }, { "epoch": 1.6192318360315907, "grad_norm": 0.43386428379821457, "learning_rate": 2.5569072722357394e-05, "loss": 0.4114, "step": 55460 }, { "epoch": 1.6193778193019082, "grad_norm": 0.5046555098934332, "learning_rate": 2.5566369288997028e-05, "loss": 0.455, "step": 55465 }, { "epoch": 1.6195238025722252, "grad_norm": 0.47473897774128326, "learning_rate": 2.556366585563666e-05, "loss": 0.4441, "step": 55470 }, { "epoch": 1.6196697858425424, "grad_norm": 0.4796970068089252, "learning_rate": 2.556096242227629e-05, "loss": 0.4252, "step": 55475 }, { "epoch": 1.6198157691128596, "grad_norm": 0.5351071898324035, "learning_rate": 2.5558258988915923e-05, "loss": 0.4376, "step": 55480 }, { "epoch": 1.6199617523831769, "grad_norm": 0.4868313787185003, "learning_rate": 2.5555555555555554e-05, "loss": 0.4434, "step": 55485 }, { "epoch": 1.620107735653494, "grad_norm": 0.4025950646760006, "learning_rate": 2.5552852122195188e-05, "loss": 0.4169, "step": 55490 }, { "epoch": 1.6202537189238113, "grad_norm": 0.45808920005039405, "learning_rate": 2.5550148688834825e-05, "loss": 0.401, "step": 55495 }, { "epoch": 1.6203997021941285, "grad_norm": 0.4758461035314294, "learning_rate": 2.5547445255474456e-05, "loss": 0.4583, "step": 55500 }, { "epoch": 1.6205456854644458, "grad_norm": 0.49253841424115047, "learning_rate": 2.5544741822114087e-05, "loss": 0.4379, "step": 55505 }, { "epoch": 1.620691668734763, "grad_norm": 0.5280842832869558, "learning_rate": 2.554203838875372e-05, "loss": 0.4561, "step": 55510 }, { "epoch": 1.6208376520050802, "grad_norm": 0.4524431635017447, "learning_rate": 2.553933495539335e-05, "loss": 0.4655, "step": 55515 }, { "epoch": 1.6209836352753975, "grad_norm": 0.5115049827624009, "learning_rate": 2.5536631522032982e-05, "loss": 0.4462, "step": 55520 }, { "epoch": 1.6211296185457147, "grad_norm": 0.47072820942835225, "learning_rate": 2.5533928088672616e-05, "loss": 0.4356, "step": 55525 }, { "epoch": 1.621275601816032, "grad_norm": 0.5189340892206027, "learning_rate": 2.5531224655312247e-05, "loss": 0.4362, "step": 55530 }, { "epoch": 1.6214215850863491, "grad_norm": 0.4899869158573971, "learning_rate": 2.5528521221951877e-05, "loss": 0.4733, "step": 55535 }, { "epoch": 1.6215675683566664, "grad_norm": 0.44499337852696036, "learning_rate": 2.552581778859151e-05, "loss": 0.4426, "step": 55540 }, { "epoch": 1.6217135516269834, "grad_norm": 0.5375936548493516, "learning_rate": 2.5523114355231142e-05, "loss": 0.4858, "step": 55545 }, { "epoch": 1.6218595348973008, "grad_norm": 0.5050380099446733, "learning_rate": 2.552041092187078e-05, "loss": 0.4321, "step": 55550 }, { "epoch": 1.6220055181676178, "grad_norm": 0.4482114072217962, "learning_rate": 2.5517707488510413e-05, "loss": 0.4307, "step": 55555 }, { "epoch": 1.6221515014379353, "grad_norm": 0.4570204009377139, "learning_rate": 2.5515004055150044e-05, "loss": 0.4604, "step": 55560 }, { "epoch": 1.6222974847082523, "grad_norm": 0.4427061837154166, "learning_rate": 2.5512300621789675e-05, "loss": 0.4263, "step": 55565 }, { "epoch": 1.6224434679785698, "grad_norm": 0.5213607360310256, "learning_rate": 2.550959718842931e-05, "loss": 0.4522, "step": 55570 }, { "epoch": 1.6225894512488868, "grad_norm": 0.45843612514626636, "learning_rate": 2.550689375506894e-05, "loss": 0.4385, "step": 55575 }, { "epoch": 1.6227354345192042, "grad_norm": 0.47312612286691097, "learning_rate": 2.550419032170857e-05, "loss": 0.4491, "step": 55580 }, { "epoch": 1.6228814177895212, "grad_norm": 0.4235015058896025, "learning_rate": 2.5501486888348204e-05, "loss": 0.4351, "step": 55585 }, { "epoch": 1.6230274010598387, "grad_norm": 0.4673743771443576, "learning_rate": 2.5498783454987834e-05, "loss": 0.4489, "step": 55590 }, { "epoch": 1.6231733843301557, "grad_norm": 0.460650545835503, "learning_rate": 2.5496080021627465e-05, "loss": 0.4409, "step": 55595 }, { "epoch": 1.6233193676004731, "grad_norm": 0.46982170156523806, "learning_rate": 2.54933765882671e-05, "loss": 0.4491, "step": 55600 }, { "epoch": 1.6234653508707901, "grad_norm": 0.4902601951523088, "learning_rate": 2.549067315490673e-05, "loss": 0.4679, "step": 55605 }, { "epoch": 1.6236113341411076, "grad_norm": 0.4824721872611151, "learning_rate": 2.5487969721546367e-05, "loss": 0.4235, "step": 55610 }, { "epoch": 1.6237573174114246, "grad_norm": 0.45726246432425555, "learning_rate": 2.5485266288185998e-05, "loss": 0.4313, "step": 55615 }, { "epoch": 1.6239033006817418, "grad_norm": 0.5015278347721202, "learning_rate": 2.5482562854825632e-05, "loss": 0.456, "step": 55620 }, { "epoch": 1.624049283952059, "grad_norm": 0.48482103724220343, "learning_rate": 2.5479859421465262e-05, "loss": 0.4153, "step": 55625 }, { "epoch": 1.6241952672223763, "grad_norm": 0.4541487343970074, "learning_rate": 2.5477155988104896e-05, "loss": 0.4441, "step": 55630 }, { "epoch": 1.6243412504926935, "grad_norm": 0.5011531841450454, "learning_rate": 2.5474452554744527e-05, "loss": 0.4424, "step": 55635 }, { "epoch": 1.6244872337630107, "grad_norm": 0.4446178250433684, "learning_rate": 2.5471749121384158e-05, "loss": 0.4477, "step": 55640 }, { "epoch": 1.624633217033328, "grad_norm": 0.4804181404982977, "learning_rate": 2.546904568802379e-05, "loss": 0.4402, "step": 55645 }, { "epoch": 1.6247792003036452, "grad_norm": 0.4485178166368814, "learning_rate": 2.5466342254663422e-05, "loss": 0.4318, "step": 55650 }, { "epoch": 1.6249251835739624, "grad_norm": 0.4799534898723637, "learning_rate": 2.5463638821303053e-05, "loss": 0.4563, "step": 55655 }, { "epoch": 1.6250711668442797, "grad_norm": 0.4644764092666092, "learning_rate": 2.5460935387942687e-05, "loss": 0.4421, "step": 55660 }, { "epoch": 1.6252171501145969, "grad_norm": 0.4595587357312757, "learning_rate": 2.5458231954582324e-05, "loss": 0.4844, "step": 55665 }, { "epoch": 1.6253631333849141, "grad_norm": 0.4794295590294186, "learning_rate": 2.5455528521221955e-05, "loss": 0.4463, "step": 55670 }, { "epoch": 1.6255091166552313, "grad_norm": 0.4509153414289474, "learning_rate": 2.5452825087861586e-05, "loss": 0.4487, "step": 55675 }, { "epoch": 1.6256550999255486, "grad_norm": 0.45246664793992386, "learning_rate": 2.545012165450122e-05, "loss": 0.4592, "step": 55680 }, { "epoch": 1.6258010831958658, "grad_norm": 0.47588160859823747, "learning_rate": 2.544741822114085e-05, "loss": 0.4163, "step": 55685 }, { "epoch": 1.6259470664661828, "grad_norm": 0.46916999700364487, "learning_rate": 2.5444714787780484e-05, "loss": 0.4312, "step": 55690 }, { "epoch": 1.6260930497365003, "grad_norm": 0.47277818039083996, "learning_rate": 2.5442011354420115e-05, "loss": 0.4451, "step": 55695 }, { "epoch": 1.6262390330068173, "grad_norm": 0.4930861682911349, "learning_rate": 2.5439307921059745e-05, "loss": 0.448, "step": 55700 }, { "epoch": 1.6263850162771347, "grad_norm": 0.4939882213566405, "learning_rate": 2.543660448769938e-05, "loss": 0.4585, "step": 55705 }, { "epoch": 1.6265309995474517, "grad_norm": 0.5105683375136401, "learning_rate": 2.543390105433901e-05, "loss": 0.4587, "step": 55710 }, { "epoch": 1.6266769828177692, "grad_norm": 0.43050618743707986, "learning_rate": 2.543119762097864e-05, "loss": 0.4193, "step": 55715 }, { "epoch": 1.6268229660880862, "grad_norm": 0.4810933173466332, "learning_rate": 2.5428494187618278e-05, "loss": 0.4509, "step": 55720 }, { "epoch": 1.6269689493584036, "grad_norm": 0.46166385884732325, "learning_rate": 2.5425790754257912e-05, "loss": 0.4338, "step": 55725 }, { "epoch": 1.6271149326287206, "grad_norm": 0.45156745243227525, "learning_rate": 2.5423087320897543e-05, "loss": 0.425, "step": 55730 }, { "epoch": 1.627260915899038, "grad_norm": 0.475660808816531, "learning_rate": 2.5420383887537173e-05, "loss": 0.444, "step": 55735 }, { "epoch": 1.627406899169355, "grad_norm": 0.4776441484500965, "learning_rate": 2.5417680454176807e-05, "loss": 0.436, "step": 55740 }, { "epoch": 1.6275528824396726, "grad_norm": 0.4631531443100666, "learning_rate": 2.5414977020816438e-05, "loss": 0.4415, "step": 55745 }, { "epoch": 1.6276988657099896, "grad_norm": 0.5152362552931159, "learning_rate": 2.541227358745607e-05, "loss": 0.4536, "step": 55750 }, { "epoch": 1.627844848980307, "grad_norm": 0.4574237587120854, "learning_rate": 2.5409570154095703e-05, "loss": 0.4311, "step": 55755 }, { "epoch": 1.627990832250624, "grad_norm": 0.4647491104310059, "learning_rate": 2.5406866720735333e-05, "loss": 0.4207, "step": 55760 }, { "epoch": 1.6281368155209412, "grad_norm": 0.4531793366731726, "learning_rate": 2.5404163287374967e-05, "loss": 0.4333, "step": 55765 }, { "epoch": 1.6282827987912585, "grad_norm": 0.4690126416521767, "learning_rate": 2.5401459854014598e-05, "loss": 0.4184, "step": 55770 }, { "epoch": 1.6284287820615757, "grad_norm": 0.44157615484104595, "learning_rate": 2.539875642065423e-05, "loss": 0.4326, "step": 55775 }, { "epoch": 1.628574765331893, "grad_norm": 0.47188231818109716, "learning_rate": 2.5396052987293866e-05, "loss": 0.435, "step": 55780 }, { "epoch": 1.6287207486022102, "grad_norm": 0.48515303501396373, "learning_rate": 2.53933495539335e-05, "loss": 0.4412, "step": 55785 }, { "epoch": 1.6288667318725274, "grad_norm": 0.5463952150141551, "learning_rate": 2.539064612057313e-05, "loss": 0.4786, "step": 55790 }, { "epoch": 1.6290127151428446, "grad_norm": 0.4916872162350509, "learning_rate": 2.538794268721276e-05, "loss": 0.4409, "step": 55795 }, { "epoch": 1.6291586984131619, "grad_norm": 0.48085573085431915, "learning_rate": 2.5385239253852395e-05, "loss": 0.4397, "step": 55800 }, { "epoch": 1.629304681683479, "grad_norm": 0.500802085840469, "learning_rate": 2.5382535820492026e-05, "loss": 0.435, "step": 55805 }, { "epoch": 1.6294506649537963, "grad_norm": 0.5131428151863845, "learning_rate": 2.5379832387131656e-05, "loss": 0.4617, "step": 55810 }, { "epoch": 1.6295966482241135, "grad_norm": 0.48577096939201103, "learning_rate": 2.537712895377129e-05, "loss": 0.4317, "step": 55815 }, { "epoch": 1.6297426314944308, "grad_norm": 0.48114414935861827, "learning_rate": 2.537442552041092e-05, "loss": 0.4652, "step": 55820 }, { "epoch": 1.629888614764748, "grad_norm": 0.49223583515037783, "learning_rate": 2.5371722087050555e-05, "loss": 0.4408, "step": 55825 }, { "epoch": 1.6300345980350652, "grad_norm": 0.4852989027889581, "learning_rate": 2.5369018653690186e-05, "loss": 0.4337, "step": 55830 }, { "epoch": 1.6301805813053822, "grad_norm": 0.48457302014993275, "learning_rate": 2.5366315220329823e-05, "loss": 0.454, "step": 55835 }, { "epoch": 1.6303265645756997, "grad_norm": 0.44420541718588563, "learning_rate": 2.5363611786969454e-05, "loss": 0.4452, "step": 55840 }, { "epoch": 1.6304725478460167, "grad_norm": 0.492327092626484, "learning_rate": 2.5360908353609088e-05, "loss": 0.4635, "step": 55845 }, { "epoch": 1.6306185311163341, "grad_norm": 0.533965273148643, "learning_rate": 2.535820492024872e-05, "loss": 0.4521, "step": 55850 }, { "epoch": 1.6307645143866512, "grad_norm": 0.5241616053953356, "learning_rate": 2.535550148688835e-05, "loss": 0.4424, "step": 55855 }, { "epoch": 1.6309104976569686, "grad_norm": 0.49214569111001133, "learning_rate": 2.5352798053527983e-05, "loss": 0.4629, "step": 55860 }, { "epoch": 1.6310564809272856, "grad_norm": 0.4708524296929319, "learning_rate": 2.5350094620167614e-05, "loss": 0.4514, "step": 55865 }, { "epoch": 1.631202464197603, "grad_norm": 0.4840291885752932, "learning_rate": 2.5347391186807244e-05, "loss": 0.4592, "step": 55870 }, { "epoch": 1.63134844746792, "grad_norm": 0.4766364410927163, "learning_rate": 2.5344687753446878e-05, "loss": 0.4286, "step": 55875 }, { "epoch": 1.6314944307382375, "grad_norm": 0.4729401961500682, "learning_rate": 2.534198432008651e-05, "loss": 0.4483, "step": 55880 }, { "epoch": 1.6316404140085545, "grad_norm": 0.4864186696520008, "learning_rate": 2.533928088672614e-05, "loss": 0.4631, "step": 55885 }, { "epoch": 1.631786397278872, "grad_norm": 0.4999766309526685, "learning_rate": 2.5336577453365777e-05, "loss": 0.452, "step": 55890 }, { "epoch": 1.631932380549189, "grad_norm": 0.5164672590975405, "learning_rate": 2.533387402000541e-05, "loss": 0.4431, "step": 55895 }, { "epoch": 1.6320783638195064, "grad_norm": 0.44963975762615027, "learning_rate": 2.533117058664504e-05, "loss": 0.4146, "step": 55900 }, { "epoch": 1.6322243470898234, "grad_norm": 0.4481564670782237, "learning_rate": 2.5328467153284675e-05, "loss": 0.4243, "step": 55905 }, { "epoch": 1.6323703303601407, "grad_norm": 0.4738501221533159, "learning_rate": 2.5325763719924306e-05, "loss": 0.4414, "step": 55910 }, { "epoch": 1.632516313630458, "grad_norm": 0.5041237610156473, "learning_rate": 2.5323060286563937e-05, "loss": 0.435, "step": 55915 }, { "epoch": 1.6326622969007751, "grad_norm": 0.4979558817097164, "learning_rate": 2.532035685320357e-05, "loss": 0.4483, "step": 55920 }, { "epoch": 1.6328082801710924, "grad_norm": 0.46740765417808405, "learning_rate": 2.53176534198432e-05, "loss": 0.4495, "step": 55925 }, { "epoch": 1.6329542634414096, "grad_norm": 0.4913844324946692, "learning_rate": 2.5314949986482832e-05, "loss": 0.4197, "step": 55930 }, { "epoch": 1.6331002467117268, "grad_norm": 0.5024182650364409, "learning_rate": 2.5312246553122466e-05, "loss": 0.4453, "step": 55935 }, { "epoch": 1.633246229982044, "grad_norm": 0.522399467715239, "learning_rate": 2.5309543119762097e-05, "loss": 0.4681, "step": 55940 }, { "epoch": 1.6333922132523613, "grad_norm": 0.4806168982802638, "learning_rate": 2.5306839686401727e-05, "loss": 0.4408, "step": 55945 }, { "epoch": 1.6335381965226785, "grad_norm": 0.48864155240601653, "learning_rate": 2.5304136253041365e-05, "loss": 0.4622, "step": 55950 }, { "epoch": 1.6336841797929957, "grad_norm": 0.4403938398250547, "learning_rate": 2.5301432819681e-05, "loss": 0.443, "step": 55955 }, { "epoch": 1.633830163063313, "grad_norm": 0.44318910402990286, "learning_rate": 2.529872938632063e-05, "loss": 0.4221, "step": 55960 }, { "epoch": 1.6339761463336302, "grad_norm": 0.5042011372268548, "learning_rate": 2.5296025952960263e-05, "loss": 0.4514, "step": 55965 }, { "epoch": 1.6341221296039474, "grad_norm": 0.48521169488325366, "learning_rate": 2.5293322519599894e-05, "loss": 0.4466, "step": 55970 }, { "epoch": 1.6342681128742647, "grad_norm": 0.4685044050772243, "learning_rate": 2.5290619086239525e-05, "loss": 0.433, "step": 55975 }, { "epoch": 1.6344140961445819, "grad_norm": 0.47863691028495947, "learning_rate": 2.528791565287916e-05, "loss": 0.4535, "step": 55980 }, { "epoch": 1.6345600794148991, "grad_norm": 0.44980973454333467, "learning_rate": 2.528521221951879e-05, "loss": 0.414, "step": 55985 }, { "epoch": 1.6347060626852161, "grad_norm": 0.5109496045280585, "learning_rate": 2.528250878615842e-05, "loss": 0.4303, "step": 55990 }, { "epoch": 1.6348520459555336, "grad_norm": 0.4996745626942361, "learning_rate": 2.5279805352798054e-05, "loss": 0.442, "step": 55995 }, { "epoch": 1.6349980292258506, "grad_norm": 0.49991804398852957, "learning_rate": 2.5277101919437684e-05, "loss": 0.462, "step": 56000 }, { "epoch": 1.635144012496168, "grad_norm": 0.46013559020699035, "learning_rate": 2.5274398486077322e-05, "loss": 0.4396, "step": 56005 }, { "epoch": 1.635289995766485, "grad_norm": 0.463676491008647, "learning_rate": 2.5271695052716952e-05, "loss": 0.447, "step": 56010 }, { "epoch": 1.6354359790368025, "grad_norm": 0.4505309027334372, "learning_rate": 2.5268991619356586e-05, "loss": 0.4101, "step": 56015 }, { "epoch": 1.6355819623071195, "grad_norm": 0.5078913464928122, "learning_rate": 2.5266288185996217e-05, "loss": 0.4444, "step": 56020 }, { "epoch": 1.635727945577437, "grad_norm": 0.5068541797458787, "learning_rate": 2.5263584752635848e-05, "loss": 0.4406, "step": 56025 }, { "epoch": 1.635873928847754, "grad_norm": 0.4537462784099775, "learning_rate": 2.5260881319275482e-05, "loss": 0.4368, "step": 56030 }, { "epoch": 1.6360199121180714, "grad_norm": 0.4448602586994528, "learning_rate": 2.5258177885915112e-05, "loss": 0.4238, "step": 56035 }, { "epoch": 1.6361658953883884, "grad_norm": 0.47850276990355606, "learning_rate": 2.5255474452554746e-05, "loss": 0.4391, "step": 56040 }, { "epoch": 1.6363118786587059, "grad_norm": 0.45975667259933933, "learning_rate": 2.5252771019194377e-05, "loss": 0.4417, "step": 56045 }, { "epoch": 1.6364578619290229, "grad_norm": 0.4937738503892987, "learning_rate": 2.5250067585834008e-05, "loss": 0.4451, "step": 56050 }, { "epoch": 1.63660384519934, "grad_norm": 0.4604650525494782, "learning_rate": 2.524736415247364e-05, "loss": 0.4366, "step": 56055 }, { "epoch": 1.6367498284696573, "grad_norm": 0.47422238855646803, "learning_rate": 2.524466071911328e-05, "loss": 0.4358, "step": 56060 }, { "epoch": 1.6368958117399746, "grad_norm": 0.47195731528392415, "learning_rate": 2.524195728575291e-05, "loss": 0.4472, "step": 56065 }, { "epoch": 1.6370417950102918, "grad_norm": 0.47174781575955, "learning_rate": 2.523925385239254e-05, "loss": 0.4274, "step": 56070 }, { "epoch": 1.637187778280609, "grad_norm": 0.46029394166118065, "learning_rate": 2.5236550419032174e-05, "loss": 0.4536, "step": 56075 }, { "epoch": 1.6373337615509262, "grad_norm": 0.5990973204846622, "learning_rate": 2.5233846985671805e-05, "loss": 0.4784, "step": 56080 }, { "epoch": 1.6374797448212435, "grad_norm": 0.46423499410973906, "learning_rate": 2.5231143552311436e-05, "loss": 0.4552, "step": 56085 }, { "epoch": 1.6376257280915607, "grad_norm": 0.4685773471255612, "learning_rate": 2.522844011895107e-05, "loss": 0.4063, "step": 56090 }, { "epoch": 1.637771711361878, "grad_norm": 0.4569363419520207, "learning_rate": 2.52257366855907e-05, "loss": 0.4336, "step": 56095 }, { "epoch": 1.6379176946321952, "grad_norm": 0.47201644834339473, "learning_rate": 2.5223033252230334e-05, "loss": 0.4379, "step": 56100 }, { "epoch": 1.6380636779025124, "grad_norm": 0.49971908095174594, "learning_rate": 2.5220329818869965e-05, "loss": 0.4284, "step": 56105 }, { "epoch": 1.6382096611728296, "grad_norm": 0.48770271793719044, "learning_rate": 2.5217626385509595e-05, "loss": 0.4462, "step": 56110 }, { "epoch": 1.6383556444431469, "grad_norm": 0.517031016248129, "learning_rate": 2.521492295214923e-05, "loss": 0.439, "step": 56115 }, { "epoch": 1.638501627713464, "grad_norm": 0.46964053484485724, "learning_rate": 2.5212219518788867e-05, "loss": 0.4929, "step": 56120 }, { "epoch": 1.6386476109837813, "grad_norm": 0.5040095713515828, "learning_rate": 2.5209516085428497e-05, "loss": 0.4489, "step": 56125 }, { "epoch": 1.6387935942540985, "grad_norm": 0.48138945434852487, "learning_rate": 2.5206812652068128e-05, "loss": 0.4482, "step": 56130 }, { "epoch": 1.6389395775244155, "grad_norm": 0.48593067826070085, "learning_rate": 2.5204109218707762e-05, "loss": 0.4568, "step": 56135 }, { "epoch": 1.639085560794733, "grad_norm": 0.46647637041439205, "learning_rate": 2.5201405785347393e-05, "loss": 0.4558, "step": 56140 }, { "epoch": 1.63923154406505, "grad_norm": 0.5023461059213208, "learning_rate": 2.5198702351987023e-05, "loss": 0.4718, "step": 56145 }, { "epoch": 1.6393775273353675, "grad_norm": 0.47001391091805744, "learning_rate": 2.5195998918626657e-05, "loss": 0.446, "step": 56150 }, { "epoch": 1.6395235106056845, "grad_norm": 0.5038075229438475, "learning_rate": 2.5193295485266288e-05, "loss": 0.4537, "step": 56155 }, { "epoch": 1.639669493876002, "grad_norm": 0.48784267416031163, "learning_rate": 2.519059205190592e-05, "loss": 0.4529, "step": 56160 }, { "epoch": 1.639815477146319, "grad_norm": 0.4661492824640706, "learning_rate": 2.5187888618545553e-05, "loss": 0.44, "step": 56165 }, { "epoch": 1.6399614604166364, "grad_norm": 0.49888841291925007, "learning_rate": 2.5185185185185183e-05, "loss": 0.4424, "step": 56170 }, { "epoch": 1.6401074436869534, "grad_norm": 0.48800552105994743, "learning_rate": 2.518248175182482e-05, "loss": 0.4364, "step": 56175 }, { "epoch": 1.6402534269572708, "grad_norm": 0.46034412272897146, "learning_rate": 2.5179778318464455e-05, "loss": 0.4527, "step": 56180 }, { "epoch": 1.6403994102275878, "grad_norm": 0.4740866576914018, "learning_rate": 2.5177074885104085e-05, "loss": 0.3988, "step": 56185 }, { "epoch": 1.6405453934979053, "grad_norm": 0.4763423025524689, "learning_rate": 2.5174371451743716e-05, "loss": 0.4283, "step": 56190 }, { "epoch": 1.6406913767682223, "grad_norm": 0.5190821873603054, "learning_rate": 2.517166801838335e-05, "loss": 0.4536, "step": 56195 }, { "epoch": 1.6408373600385397, "grad_norm": 0.44637503372134774, "learning_rate": 2.516896458502298e-05, "loss": 0.4273, "step": 56200 }, { "epoch": 1.6409833433088568, "grad_norm": 0.4811046166632253, "learning_rate": 2.516626115166261e-05, "loss": 0.4302, "step": 56205 }, { "epoch": 1.641129326579174, "grad_norm": 0.48336684493112964, "learning_rate": 2.5163557718302245e-05, "loss": 0.4506, "step": 56210 }, { "epoch": 1.6412753098494912, "grad_norm": 0.4358727399725329, "learning_rate": 2.5160854284941876e-05, "loss": 0.4233, "step": 56215 }, { "epoch": 1.6414212931198084, "grad_norm": 0.5312523086350527, "learning_rate": 2.5158150851581506e-05, "loss": 0.4713, "step": 56220 }, { "epoch": 1.6415672763901257, "grad_norm": 0.46682110706981067, "learning_rate": 2.515544741822114e-05, "loss": 0.4596, "step": 56225 }, { "epoch": 1.641713259660443, "grad_norm": 0.4408986303882481, "learning_rate": 2.5152743984860778e-05, "loss": 0.4641, "step": 56230 }, { "epoch": 1.6418592429307601, "grad_norm": 0.4231991108681277, "learning_rate": 2.515004055150041e-05, "loss": 0.4387, "step": 56235 }, { "epoch": 1.6420052262010774, "grad_norm": 0.4453396880232971, "learning_rate": 2.5147337118140042e-05, "loss": 0.4403, "step": 56240 }, { "epoch": 1.6421512094713946, "grad_norm": 0.4559292650140066, "learning_rate": 2.5144633684779673e-05, "loss": 0.4432, "step": 56245 }, { "epoch": 1.6422971927417118, "grad_norm": 0.4887087258718033, "learning_rate": 2.5141930251419304e-05, "loss": 0.4302, "step": 56250 }, { "epoch": 1.642443176012029, "grad_norm": 0.42761161824355204, "learning_rate": 2.5139226818058938e-05, "loss": 0.4089, "step": 56255 }, { "epoch": 1.6425891592823463, "grad_norm": 0.44061944601554076, "learning_rate": 2.513652338469857e-05, "loss": 0.4006, "step": 56260 }, { "epoch": 1.6427351425526635, "grad_norm": 0.4828068018834806, "learning_rate": 2.51338199513382e-05, "loss": 0.4114, "step": 56265 }, { "epoch": 1.6428811258229807, "grad_norm": 0.48942800001937486, "learning_rate": 2.5131116517977833e-05, "loss": 0.4626, "step": 56270 }, { "epoch": 1.643027109093298, "grad_norm": 0.42720731745286494, "learning_rate": 2.5128413084617464e-05, "loss": 0.4517, "step": 56275 }, { "epoch": 1.643173092363615, "grad_norm": 0.5239175953869095, "learning_rate": 2.5125709651257094e-05, "loss": 0.4817, "step": 56280 }, { "epoch": 1.6433190756339324, "grad_norm": 0.5308660038531786, "learning_rate": 2.5123006217896728e-05, "loss": 0.4436, "step": 56285 }, { "epoch": 1.6434650589042494, "grad_norm": 0.42437020480714815, "learning_rate": 2.5120302784536366e-05, "loss": 0.4324, "step": 56290 }, { "epoch": 1.6436110421745669, "grad_norm": 0.46400540146469094, "learning_rate": 2.5117599351175996e-05, "loss": 0.4278, "step": 56295 }, { "epoch": 1.643757025444884, "grad_norm": 0.5073921119608401, "learning_rate": 2.5114895917815627e-05, "loss": 0.4471, "step": 56300 }, { "epoch": 1.6439030087152013, "grad_norm": 0.4918702690382973, "learning_rate": 2.511219248445526e-05, "loss": 0.4311, "step": 56305 }, { "epoch": 1.6440489919855183, "grad_norm": 0.4768983508398442, "learning_rate": 2.510948905109489e-05, "loss": 0.4386, "step": 56310 }, { "epoch": 1.6441949752558358, "grad_norm": 0.453603436082665, "learning_rate": 2.5106785617734526e-05, "loss": 0.4261, "step": 56315 }, { "epoch": 1.6443409585261528, "grad_norm": 0.46975912523828345, "learning_rate": 2.5104082184374156e-05, "loss": 0.4553, "step": 56320 }, { "epoch": 1.6444869417964703, "grad_norm": 0.45124952620130426, "learning_rate": 2.5101378751013787e-05, "loss": 0.4739, "step": 56325 }, { "epoch": 1.6446329250667873, "grad_norm": 0.4634936271616632, "learning_rate": 2.509867531765342e-05, "loss": 0.4506, "step": 56330 }, { "epoch": 1.6447789083371047, "grad_norm": 0.44734449168824114, "learning_rate": 2.509597188429305e-05, "loss": 0.4419, "step": 56335 }, { "epoch": 1.6449248916074217, "grad_norm": 0.4707427711325131, "learning_rate": 2.5093268450932682e-05, "loss": 0.4372, "step": 56340 }, { "epoch": 1.6450708748777392, "grad_norm": 0.4882541767812738, "learning_rate": 2.509056501757232e-05, "loss": 0.4569, "step": 56345 }, { "epoch": 1.6452168581480562, "grad_norm": 0.49254927461374687, "learning_rate": 2.5087861584211953e-05, "loss": 0.4481, "step": 56350 }, { "epoch": 1.6453628414183734, "grad_norm": 0.4468963038409267, "learning_rate": 2.5085158150851584e-05, "loss": 0.3933, "step": 56355 }, { "epoch": 1.6455088246886906, "grad_norm": 0.4835306866944892, "learning_rate": 2.5082454717491215e-05, "loss": 0.4578, "step": 56360 }, { "epoch": 1.6456548079590079, "grad_norm": 0.4421451900910726, "learning_rate": 2.507975128413085e-05, "loss": 0.4151, "step": 56365 }, { "epoch": 1.645800791229325, "grad_norm": 0.48976073246973073, "learning_rate": 2.507704785077048e-05, "loss": 0.4413, "step": 56370 }, { "epoch": 1.6459467744996423, "grad_norm": 0.4969322244980067, "learning_rate": 2.5074344417410113e-05, "loss": 0.4638, "step": 56375 }, { "epoch": 1.6460927577699596, "grad_norm": 0.4946910779702852, "learning_rate": 2.5071640984049744e-05, "loss": 0.4449, "step": 56380 }, { "epoch": 1.6462387410402768, "grad_norm": 0.4990135658484927, "learning_rate": 2.5068937550689375e-05, "loss": 0.4602, "step": 56385 }, { "epoch": 1.646384724310594, "grad_norm": 0.435384513368081, "learning_rate": 2.506623411732901e-05, "loss": 0.4433, "step": 56390 }, { "epoch": 1.6465307075809112, "grad_norm": 0.5445502641391137, "learning_rate": 2.506353068396864e-05, "loss": 0.4741, "step": 56395 }, { "epoch": 1.6466766908512285, "grad_norm": 0.44589810464224855, "learning_rate": 2.5060827250608277e-05, "loss": 0.4492, "step": 56400 }, { "epoch": 1.6468226741215457, "grad_norm": 0.44908549774099604, "learning_rate": 2.5058123817247907e-05, "loss": 0.4306, "step": 56405 }, { "epoch": 1.646968657391863, "grad_norm": 0.5015317437397363, "learning_rate": 2.505542038388754e-05, "loss": 0.4546, "step": 56410 }, { "epoch": 1.6471146406621802, "grad_norm": 0.4587748390997033, "learning_rate": 2.5052716950527172e-05, "loss": 0.441, "step": 56415 }, { "epoch": 1.6472606239324974, "grad_norm": 0.4502983661741302, "learning_rate": 2.5050013517166802e-05, "loss": 0.4597, "step": 56420 }, { "epoch": 1.6474066072028144, "grad_norm": 0.4596964531393667, "learning_rate": 2.5047310083806436e-05, "loss": 0.426, "step": 56425 }, { "epoch": 1.6475525904731319, "grad_norm": 0.4869859381688115, "learning_rate": 2.5044606650446067e-05, "loss": 0.4586, "step": 56430 }, { "epoch": 1.6476985737434489, "grad_norm": 0.4524849819912982, "learning_rate": 2.5041903217085698e-05, "loss": 0.4155, "step": 56435 }, { "epoch": 1.6478445570137663, "grad_norm": 0.4676698555414947, "learning_rate": 2.5039199783725332e-05, "loss": 0.4299, "step": 56440 }, { "epoch": 1.6479905402840833, "grad_norm": 0.5282083486705358, "learning_rate": 2.5036496350364962e-05, "loss": 0.4729, "step": 56445 }, { "epoch": 1.6481365235544008, "grad_norm": 0.46614964905879297, "learning_rate": 2.5033792917004596e-05, "loss": 0.4302, "step": 56450 }, { "epoch": 1.6482825068247178, "grad_norm": 0.46843734903262885, "learning_rate": 2.5031089483644227e-05, "loss": 0.4421, "step": 56455 }, { "epoch": 1.6484284900950352, "grad_norm": 0.45206680849079356, "learning_rate": 2.5028386050283864e-05, "loss": 0.4264, "step": 56460 }, { "epoch": 1.6485744733653522, "grad_norm": 0.47958785042380947, "learning_rate": 2.5025682616923495e-05, "loss": 0.4358, "step": 56465 }, { "epoch": 1.6487204566356697, "grad_norm": 0.4637027982998353, "learning_rate": 2.502297918356313e-05, "loss": 0.4289, "step": 56470 }, { "epoch": 1.6488664399059867, "grad_norm": 0.5053453818195646, "learning_rate": 2.502027575020276e-05, "loss": 0.449, "step": 56475 }, { "epoch": 1.6490124231763041, "grad_norm": 0.4748910896858206, "learning_rate": 2.501757231684239e-05, "loss": 0.4205, "step": 56480 }, { "epoch": 1.6491584064466212, "grad_norm": 0.4947482921733393, "learning_rate": 2.5014868883482024e-05, "loss": 0.4327, "step": 56485 }, { "epoch": 1.6493043897169386, "grad_norm": 0.4743228619528006, "learning_rate": 2.5012165450121655e-05, "loss": 0.4537, "step": 56490 }, { "epoch": 1.6494503729872556, "grad_norm": 0.5037788087573186, "learning_rate": 2.5009462016761286e-05, "loss": 0.4335, "step": 56495 }, { "epoch": 1.6495963562575728, "grad_norm": 0.4709674204566856, "learning_rate": 2.500675858340092e-05, "loss": 0.4436, "step": 56500 }, { "epoch": 1.64974233952789, "grad_norm": 0.47351572908104445, "learning_rate": 2.500405515004055e-05, "loss": 0.4479, "step": 56505 }, { "epoch": 1.6498883227982073, "grad_norm": 0.46740859306050286, "learning_rate": 2.5001351716680184e-05, "loss": 0.4109, "step": 56510 }, { "epoch": 1.6500343060685245, "grad_norm": 0.4722374498919155, "learning_rate": 2.4998648283319818e-05, "loss": 0.4444, "step": 56515 }, { "epoch": 1.6501802893388418, "grad_norm": 0.4117646717767724, "learning_rate": 2.499594484995945e-05, "loss": 0.4387, "step": 56520 }, { "epoch": 1.650326272609159, "grad_norm": 0.42585192218503326, "learning_rate": 2.499324141659908e-05, "loss": 0.4408, "step": 56525 }, { "epoch": 1.6504722558794762, "grad_norm": 0.44096708688425656, "learning_rate": 2.4990537983238717e-05, "loss": 0.43, "step": 56530 }, { "epoch": 1.6506182391497934, "grad_norm": 0.46098568348568275, "learning_rate": 2.4987834549878347e-05, "loss": 0.4283, "step": 56535 }, { "epoch": 1.6507642224201107, "grad_norm": 0.5049916033188586, "learning_rate": 2.4985131116517978e-05, "loss": 0.4394, "step": 56540 }, { "epoch": 1.650910205690428, "grad_norm": 0.44937827858595597, "learning_rate": 2.4982427683157612e-05, "loss": 0.4275, "step": 56545 }, { "epoch": 1.6510561889607451, "grad_norm": 0.4764950197526633, "learning_rate": 2.4979724249797243e-05, "loss": 0.451, "step": 56550 }, { "epoch": 1.6512021722310624, "grad_norm": 0.46824383733863745, "learning_rate": 2.4977020816436877e-05, "loss": 0.4449, "step": 56555 }, { "epoch": 1.6513481555013796, "grad_norm": 0.49092987484779665, "learning_rate": 2.497431738307651e-05, "loss": 0.4556, "step": 56560 }, { "epoch": 1.6514941387716968, "grad_norm": 0.5285646886327064, "learning_rate": 2.497161394971614e-05, "loss": 0.4631, "step": 56565 }, { "epoch": 1.6516401220420138, "grad_norm": 0.4512626191597298, "learning_rate": 2.4968910516355772e-05, "loss": 0.4004, "step": 56570 }, { "epoch": 1.6517861053123313, "grad_norm": 0.49055133425701236, "learning_rate": 2.4966207082995406e-05, "loss": 0.4405, "step": 56575 }, { "epoch": 1.6519320885826483, "grad_norm": 0.46399833830532133, "learning_rate": 2.4963503649635037e-05, "loss": 0.4334, "step": 56580 }, { "epoch": 1.6520780718529657, "grad_norm": 0.47914876174963783, "learning_rate": 2.496080021627467e-05, "loss": 0.439, "step": 56585 }, { "epoch": 1.6522240551232827, "grad_norm": 0.4811193946245748, "learning_rate": 2.4958096782914305e-05, "loss": 0.4533, "step": 56590 }, { "epoch": 1.6523700383936002, "grad_norm": 0.4516439232914487, "learning_rate": 2.4955393349553935e-05, "loss": 0.4382, "step": 56595 }, { "epoch": 1.6525160216639172, "grad_norm": 0.5128760301070949, "learning_rate": 2.4952689916193566e-05, "loss": 0.4642, "step": 56600 }, { "epoch": 1.6526620049342347, "grad_norm": 0.44994891742514725, "learning_rate": 2.49499864828332e-05, "loss": 0.4171, "step": 56605 }, { "epoch": 1.6528079882045517, "grad_norm": 0.475249622570647, "learning_rate": 2.494728304947283e-05, "loss": 0.4193, "step": 56610 }, { "epoch": 1.652953971474869, "grad_norm": 0.516360455033222, "learning_rate": 2.4944579616112465e-05, "loss": 0.4602, "step": 56615 }, { "epoch": 1.6530999547451861, "grad_norm": 0.5081232915721751, "learning_rate": 2.49418761827521e-05, "loss": 0.4426, "step": 56620 }, { "epoch": 1.6532459380155036, "grad_norm": 0.4569573979352861, "learning_rate": 2.493917274939173e-05, "loss": 0.4489, "step": 56625 }, { "epoch": 1.6533919212858206, "grad_norm": 0.4214719064106656, "learning_rate": 2.493646931603136e-05, "loss": 0.4386, "step": 56630 }, { "epoch": 1.653537904556138, "grad_norm": 0.4330958075431969, "learning_rate": 2.4933765882670994e-05, "loss": 0.4312, "step": 56635 }, { "epoch": 1.653683887826455, "grad_norm": 0.47421982609556723, "learning_rate": 2.4931062449310628e-05, "loss": 0.4836, "step": 56640 }, { "epoch": 1.6538298710967723, "grad_norm": 0.47616279098117703, "learning_rate": 2.492835901595026e-05, "loss": 0.4613, "step": 56645 }, { "epoch": 1.6539758543670895, "grad_norm": 0.4759698629604598, "learning_rate": 2.492565558258989e-05, "loss": 0.4265, "step": 56650 }, { "epoch": 1.6541218376374067, "grad_norm": 0.4504236995572042, "learning_rate": 2.4922952149229523e-05, "loss": 0.4316, "step": 56655 }, { "epoch": 1.654267820907724, "grad_norm": 0.4663210136225586, "learning_rate": 2.4920248715869154e-05, "loss": 0.4431, "step": 56660 }, { "epoch": 1.6544138041780412, "grad_norm": 0.4656244976586634, "learning_rate": 2.4917545282508788e-05, "loss": 0.4366, "step": 56665 }, { "epoch": 1.6545597874483584, "grad_norm": 0.4556187817889672, "learning_rate": 2.4914841849148422e-05, "loss": 0.4399, "step": 56670 }, { "epoch": 1.6547057707186756, "grad_norm": 0.46724412879981103, "learning_rate": 2.4912138415788052e-05, "loss": 0.4295, "step": 56675 }, { "epoch": 1.6548517539889929, "grad_norm": 0.45891141892590653, "learning_rate": 2.4909434982427683e-05, "loss": 0.431, "step": 56680 }, { "epoch": 1.65499773725931, "grad_norm": 0.4570274852463105, "learning_rate": 2.4906731549067317e-05, "loss": 0.4671, "step": 56685 }, { "epoch": 1.6551437205296273, "grad_norm": 0.494087361929275, "learning_rate": 2.4904028115706948e-05, "loss": 0.4532, "step": 56690 }, { "epoch": 1.6552897037999446, "grad_norm": 0.502447081114578, "learning_rate": 2.490132468234658e-05, "loss": 0.4471, "step": 56695 }, { "epoch": 1.6554356870702618, "grad_norm": 0.4685441252862193, "learning_rate": 2.4898621248986216e-05, "loss": 0.4618, "step": 56700 }, { "epoch": 1.655581670340579, "grad_norm": 0.4930768220473413, "learning_rate": 2.4895917815625846e-05, "loss": 0.4696, "step": 56705 }, { "epoch": 1.6557276536108962, "grad_norm": 0.47400623066202324, "learning_rate": 2.4893214382265477e-05, "loss": 0.4459, "step": 56710 }, { "epoch": 1.6558736368812133, "grad_norm": 0.4704189133920239, "learning_rate": 2.489051094890511e-05, "loss": 0.4272, "step": 56715 }, { "epoch": 1.6560196201515307, "grad_norm": 0.4583041006215401, "learning_rate": 2.488780751554474e-05, "loss": 0.4345, "step": 56720 }, { "epoch": 1.6561656034218477, "grad_norm": 0.49537878269747176, "learning_rate": 2.4885104082184376e-05, "loss": 0.4553, "step": 56725 }, { "epoch": 1.6563115866921652, "grad_norm": 0.45209895748026174, "learning_rate": 2.488240064882401e-05, "loss": 0.4445, "step": 56730 }, { "epoch": 1.6564575699624822, "grad_norm": 0.492424829309199, "learning_rate": 2.487969721546364e-05, "loss": 0.4518, "step": 56735 }, { "epoch": 1.6566035532327996, "grad_norm": 0.487711769705705, "learning_rate": 2.487699378210327e-05, "loss": 0.4667, "step": 56740 }, { "epoch": 1.6567495365031166, "grad_norm": 0.4679416246414968, "learning_rate": 2.4874290348742905e-05, "loss": 0.4433, "step": 56745 }, { "epoch": 1.656895519773434, "grad_norm": 0.47497715956828823, "learning_rate": 2.4871586915382535e-05, "loss": 0.4301, "step": 56750 }, { "epoch": 1.657041503043751, "grad_norm": 0.46439372692048964, "learning_rate": 2.486888348202217e-05, "loss": 0.4329, "step": 56755 }, { "epoch": 1.6571874863140685, "grad_norm": 0.49548939183403923, "learning_rate": 2.4866180048661803e-05, "loss": 0.426, "step": 56760 }, { "epoch": 1.6573334695843855, "grad_norm": 0.4706856163267816, "learning_rate": 2.4863476615301434e-05, "loss": 0.4122, "step": 56765 }, { "epoch": 1.657479452854703, "grad_norm": 0.45600052382321987, "learning_rate": 2.4860773181941065e-05, "loss": 0.465, "step": 56770 }, { "epoch": 1.65762543612502, "grad_norm": 0.4790109639012559, "learning_rate": 2.48580697485807e-05, "loss": 0.4572, "step": 56775 }, { "epoch": 1.6577714193953375, "grad_norm": 0.49277255443084095, "learning_rate": 2.485536631522033e-05, "loss": 0.4573, "step": 56780 }, { "epoch": 1.6579174026656545, "grad_norm": 0.4767846748802105, "learning_rate": 2.4852662881859963e-05, "loss": 0.4425, "step": 56785 }, { "epoch": 1.6580633859359717, "grad_norm": 0.4887236535861622, "learning_rate": 2.4849959448499597e-05, "loss": 0.4614, "step": 56790 }, { "epoch": 1.658209369206289, "grad_norm": 0.4922454537049217, "learning_rate": 2.4847256015139228e-05, "loss": 0.421, "step": 56795 }, { "epoch": 1.6583553524766061, "grad_norm": 0.4996321750015129, "learning_rate": 2.484455258177886e-05, "loss": 0.4399, "step": 56800 }, { "epoch": 1.6585013357469234, "grad_norm": 0.45471412131691713, "learning_rate": 2.4841849148418493e-05, "loss": 0.4441, "step": 56805 }, { "epoch": 1.6586473190172406, "grad_norm": 0.46807402429343986, "learning_rate": 2.4839145715058127e-05, "loss": 0.4467, "step": 56810 }, { "epoch": 1.6587933022875578, "grad_norm": 0.47463853252858923, "learning_rate": 2.4836442281697757e-05, "loss": 0.4409, "step": 56815 }, { "epoch": 1.658939285557875, "grad_norm": 0.47459292095540434, "learning_rate": 2.483373884833739e-05, "loss": 0.5141, "step": 56820 }, { "epoch": 1.6590852688281923, "grad_norm": 0.5390773190842043, "learning_rate": 2.4831035414977022e-05, "loss": 0.436, "step": 56825 }, { "epoch": 1.6592312520985095, "grad_norm": 0.44374395177024967, "learning_rate": 2.4828331981616652e-05, "loss": 0.4314, "step": 56830 }, { "epoch": 1.6593772353688268, "grad_norm": 0.45744132648925795, "learning_rate": 2.4825628548256287e-05, "loss": 0.4648, "step": 56835 }, { "epoch": 1.659523218639144, "grad_norm": 0.48197812482122265, "learning_rate": 2.482292511489592e-05, "loss": 0.4163, "step": 56840 }, { "epoch": 1.6596692019094612, "grad_norm": 0.4160453095906456, "learning_rate": 2.482022168153555e-05, "loss": 0.4097, "step": 56845 }, { "epoch": 1.6598151851797784, "grad_norm": 0.48594171407276043, "learning_rate": 2.4817518248175185e-05, "loss": 0.4623, "step": 56850 }, { "epoch": 1.6599611684500957, "grad_norm": 0.46044139614570134, "learning_rate": 2.4814814814814816e-05, "loss": 0.4115, "step": 56855 }, { "epoch": 1.6601071517204127, "grad_norm": 0.49188241979506075, "learning_rate": 2.4812111381454446e-05, "loss": 0.4262, "step": 56860 }, { "epoch": 1.6602531349907301, "grad_norm": 0.48193510867272565, "learning_rate": 2.480940794809408e-05, "loss": 0.4119, "step": 56865 }, { "epoch": 1.6603991182610471, "grad_norm": 0.5117487859152546, "learning_rate": 2.4806704514733714e-05, "loss": 0.4606, "step": 56870 }, { "epoch": 1.6605451015313646, "grad_norm": 0.48222084284045974, "learning_rate": 2.4804001081373345e-05, "loss": 0.4201, "step": 56875 }, { "epoch": 1.6606910848016816, "grad_norm": 0.4867003171657106, "learning_rate": 2.480129764801298e-05, "loss": 0.4358, "step": 56880 }, { "epoch": 1.660837068071999, "grad_norm": 0.47815984735590433, "learning_rate": 2.479859421465261e-05, "loss": 0.4311, "step": 56885 }, { "epoch": 1.660983051342316, "grad_norm": 0.4408877271033787, "learning_rate": 2.479589078129224e-05, "loss": 0.4346, "step": 56890 }, { "epoch": 1.6611290346126335, "grad_norm": 0.4698842401793696, "learning_rate": 2.4793187347931878e-05, "loss": 0.4382, "step": 56895 }, { "epoch": 1.6612750178829505, "grad_norm": 0.4665823463864405, "learning_rate": 2.479048391457151e-05, "loss": 0.4356, "step": 56900 }, { "epoch": 1.661421001153268, "grad_norm": 0.4371258559921177, "learning_rate": 2.478778048121114e-05, "loss": 0.4372, "step": 56905 }, { "epoch": 1.661566984423585, "grad_norm": 0.4409510476883278, "learning_rate": 2.4785077047850773e-05, "loss": 0.4577, "step": 56910 }, { "epoch": 1.6617129676939024, "grad_norm": 0.49111802189787535, "learning_rate": 2.4782373614490404e-05, "loss": 0.4755, "step": 56915 }, { "epoch": 1.6618589509642194, "grad_norm": 0.5225137590501484, "learning_rate": 2.4779670181130034e-05, "loss": 0.4451, "step": 56920 }, { "epoch": 1.6620049342345369, "grad_norm": 0.4754189361734014, "learning_rate": 2.4776966747769668e-05, "loss": 0.4309, "step": 56925 }, { "epoch": 1.6621509175048539, "grad_norm": 0.4714488565673816, "learning_rate": 2.4774263314409302e-05, "loss": 0.4406, "step": 56930 }, { "epoch": 1.6622969007751711, "grad_norm": 0.49206573256418545, "learning_rate": 2.4771559881048933e-05, "loss": 0.4234, "step": 56935 }, { "epoch": 1.6624428840454883, "grad_norm": 0.4775662818506711, "learning_rate": 2.4768856447688567e-05, "loss": 0.457, "step": 56940 }, { "epoch": 1.6625888673158056, "grad_norm": 0.47112033950572, "learning_rate": 2.4766153014328197e-05, "loss": 0.4438, "step": 56945 }, { "epoch": 1.6627348505861228, "grad_norm": 0.4731837313547627, "learning_rate": 2.4763449580967828e-05, "loss": 0.468, "step": 56950 }, { "epoch": 1.66288083385644, "grad_norm": 0.4722768534168479, "learning_rate": 2.4760746147607462e-05, "loss": 0.4618, "step": 56955 }, { "epoch": 1.6630268171267573, "grad_norm": 0.47599784274805274, "learning_rate": 2.4758042714247096e-05, "loss": 0.459, "step": 56960 }, { "epoch": 1.6631728003970745, "grad_norm": 0.5008467162776044, "learning_rate": 2.4755339280886727e-05, "loss": 0.4557, "step": 56965 }, { "epoch": 1.6633187836673917, "grad_norm": 0.46064873606214, "learning_rate": 2.475263584752636e-05, "loss": 0.4427, "step": 56970 }, { "epoch": 1.663464766937709, "grad_norm": 0.5014958359310728, "learning_rate": 2.474993241416599e-05, "loss": 0.4364, "step": 56975 }, { "epoch": 1.6636107502080262, "grad_norm": 0.466904321771683, "learning_rate": 2.4747228980805625e-05, "loss": 0.4358, "step": 56980 }, { "epoch": 1.6637567334783434, "grad_norm": 0.46625739445778763, "learning_rate": 2.4744525547445256e-05, "loss": 0.4517, "step": 56985 }, { "epoch": 1.6639027167486606, "grad_norm": 0.42733098665478153, "learning_rate": 2.474182211408489e-05, "loss": 0.4171, "step": 56990 }, { "epoch": 1.6640487000189779, "grad_norm": 0.4489716807469848, "learning_rate": 2.473911868072452e-05, "loss": 0.4281, "step": 56995 }, { "epoch": 1.664194683289295, "grad_norm": 0.46737780146992175, "learning_rate": 2.4736415247364155e-05, "loss": 0.4554, "step": 57000 }, { "epoch": 1.664340666559612, "grad_norm": 0.49120659422000873, "learning_rate": 2.4733711814003785e-05, "loss": 0.4402, "step": 57005 }, { "epoch": 1.6644866498299296, "grad_norm": 0.4782784917514317, "learning_rate": 2.473100838064342e-05, "loss": 0.4455, "step": 57010 }, { "epoch": 1.6646326331002466, "grad_norm": 0.4661589503880283, "learning_rate": 2.472830494728305e-05, "loss": 0.4184, "step": 57015 }, { "epoch": 1.664778616370564, "grad_norm": 0.46363129700418876, "learning_rate": 2.4725601513922684e-05, "loss": 0.4285, "step": 57020 }, { "epoch": 1.664924599640881, "grad_norm": 0.4526091625032542, "learning_rate": 2.4722898080562315e-05, "loss": 0.4372, "step": 57025 }, { "epoch": 1.6650705829111985, "grad_norm": 0.4901718927099383, "learning_rate": 2.472019464720195e-05, "loss": 0.4561, "step": 57030 }, { "epoch": 1.6652165661815155, "grad_norm": 0.42641127622916963, "learning_rate": 2.471749121384158e-05, "loss": 0.4326, "step": 57035 }, { "epoch": 1.665362549451833, "grad_norm": 0.481836952339961, "learning_rate": 2.4714787780481213e-05, "loss": 0.4337, "step": 57040 }, { "epoch": 1.66550853272215, "grad_norm": 0.4698748096591807, "learning_rate": 2.4712084347120844e-05, "loss": 0.4318, "step": 57045 }, { "epoch": 1.6656545159924674, "grad_norm": 0.4717925685419586, "learning_rate": 2.4709380913760478e-05, "loss": 0.4388, "step": 57050 }, { "epoch": 1.6658004992627844, "grad_norm": 0.43928061897062937, "learning_rate": 2.470667748040011e-05, "loss": 0.4279, "step": 57055 }, { "epoch": 1.6659464825331018, "grad_norm": 0.4722250021432403, "learning_rate": 2.470397404703974e-05, "loss": 0.44, "step": 57060 }, { "epoch": 1.6660924658034189, "grad_norm": 0.47671101091246854, "learning_rate": 2.4701270613679376e-05, "loss": 0.4353, "step": 57065 }, { "epoch": 1.6662384490737363, "grad_norm": 0.4637351833703591, "learning_rate": 2.4698567180319007e-05, "loss": 0.4249, "step": 57070 }, { "epoch": 1.6663844323440533, "grad_norm": 0.45559408468401286, "learning_rate": 2.4695863746958638e-05, "loss": 0.4209, "step": 57075 }, { "epoch": 1.6665304156143705, "grad_norm": 0.4542615817146807, "learning_rate": 2.4693160313598272e-05, "loss": 0.4429, "step": 57080 }, { "epoch": 1.6666763988846878, "grad_norm": 0.48738960422530525, "learning_rate": 2.4690456880237902e-05, "loss": 0.4425, "step": 57085 }, { "epoch": 1.666822382155005, "grad_norm": 0.44671162109409185, "learning_rate": 2.4687753446877533e-05, "loss": 0.4099, "step": 57090 }, { "epoch": 1.6669683654253222, "grad_norm": 0.4708919451011401, "learning_rate": 2.468505001351717e-05, "loss": 0.4492, "step": 57095 }, { "epoch": 1.6671143486956395, "grad_norm": 0.4959647087336981, "learning_rate": 2.46823465801568e-05, "loss": 0.4495, "step": 57100 }, { "epoch": 1.6672603319659567, "grad_norm": 0.47363247280456583, "learning_rate": 2.467964314679643e-05, "loss": 0.443, "step": 57105 }, { "epoch": 1.667406315236274, "grad_norm": 0.464780216558311, "learning_rate": 2.4676939713436066e-05, "loss": 0.4487, "step": 57110 }, { "epoch": 1.6675522985065911, "grad_norm": 0.4989918795699005, "learning_rate": 2.4674236280075696e-05, "loss": 0.4223, "step": 57115 }, { "epoch": 1.6676982817769084, "grad_norm": 0.44908459751472973, "learning_rate": 2.4671532846715327e-05, "loss": 0.4557, "step": 57120 }, { "epoch": 1.6678442650472256, "grad_norm": 0.48827317028624406, "learning_rate": 2.4668829413354964e-05, "loss": 0.4323, "step": 57125 }, { "epoch": 1.6679902483175428, "grad_norm": 0.5086854951543809, "learning_rate": 2.4666125979994595e-05, "loss": 0.4745, "step": 57130 }, { "epoch": 1.66813623158786, "grad_norm": 0.4922608719004291, "learning_rate": 2.4663422546634226e-05, "loss": 0.449, "step": 57135 }, { "epoch": 1.6682822148581773, "grad_norm": 0.4730547872158946, "learning_rate": 2.466071911327386e-05, "loss": 0.4405, "step": 57140 }, { "epoch": 1.6684281981284945, "grad_norm": 0.45217797949639105, "learning_rate": 2.465801567991349e-05, "loss": 0.4456, "step": 57145 }, { "epoch": 1.6685741813988118, "grad_norm": 0.4521774588880305, "learning_rate": 2.4655312246553124e-05, "loss": 0.4225, "step": 57150 }, { "epoch": 1.668720164669129, "grad_norm": 0.43876675313909635, "learning_rate": 2.4652608813192758e-05, "loss": 0.4292, "step": 57155 }, { "epoch": 1.668866147939446, "grad_norm": 0.41796991623203106, "learning_rate": 2.464990537983239e-05, "loss": 0.4333, "step": 57160 }, { "epoch": 1.6690121312097634, "grad_norm": 0.49652716593220464, "learning_rate": 2.464720194647202e-05, "loss": 0.4395, "step": 57165 }, { "epoch": 1.6691581144800804, "grad_norm": 0.48877897893500905, "learning_rate": 2.4644498513111653e-05, "loss": 0.4335, "step": 57170 }, { "epoch": 1.669304097750398, "grad_norm": 0.46978049187769233, "learning_rate": 2.4641795079751284e-05, "loss": 0.4414, "step": 57175 }, { "epoch": 1.669450081020715, "grad_norm": 0.508245435480657, "learning_rate": 2.4639091646390918e-05, "loss": 0.4567, "step": 57180 }, { "epoch": 1.6695960642910324, "grad_norm": 0.47883507029155437, "learning_rate": 2.4636388213030552e-05, "loss": 0.424, "step": 57185 }, { "epoch": 1.6697420475613494, "grad_norm": 0.46902348331337573, "learning_rate": 2.4633684779670183e-05, "loss": 0.4625, "step": 57190 }, { "epoch": 1.6698880308316668, "grad_norm": 0.4930681460664508, "learning_rate": 2.4630981346309813e-05, "loss": 0.4542, "step": 57195 }, { "epoch": 1.6700340141019838, "grad_norm": 0.4803684610003657, "learning_rate": 2.4628277912949447e-05, "loss": 0.4305, "step": 57200 }, { "epoch": 1.6701799973723013, "grad_norm": 0.4808864940096352, "learning_rate": 2.4625574479589078e-05, "loss": 0.448, "step": 57205 }, { "epoch": 1.6703259806426183, "grad_norm": 0.5005285270206735, "learning_rate": 2.4622871046228712e-05, "loss": 0.4575, "step": 57210 }, { "epoch": 1.6704719639129357, "grad_norm": 0.44670870731306295, "learning_rate": 2.4620167612868346e-05, "loss": 0.4315, "step": 57215 }, { "epoch": 1.6706179471832527, "grad_norm": 0.44888038644646916, "learning_rate": 2.4617464179507977e-05, "loss": 0.4334, "step": 57220 }, { "epoch": 1.67076393045357, "grad_norm": 0.4550802963402769, "learning_rate": 2.4614760746147607e-05, "loss": 0.4278, "step": 57225 }, { "epoch": 1.6709099137238872, "grad_norm": 0.4207557462120044, "learning_rate": 2.461205731278724e-05, "loss": 0.3859, "step": 57230 }, { "epoch": 1.6710558969942044, "grad_norm": 0.4432757261356897, "learning_rate": 2.4609353879426875e-05, "loss": 0.4376, "step": 57235 }, { "epoch": 1.6712018802645217, "grad_norm": 0.5068449819698129, "learning_rate": 2.4606650446066506e-05, "loss": 0.45, "step": 57240 }, { "epoch": 1.6713478635348389, "grad_norm": 0.4889623708431098, "learning_rate": 2.460394701270614e-05, "loss": 0.4837, "step": 57245 }, { "epoch": 1.6714938468051561, "grad_norm": 0.470704984161827, "learning_rate": 2.460124357934577e-05, "loss": 0.4708, "step": 57250 }, { "epoch": 1.6716398300754733, "grad_norm": 0.49814514218792977, "learning_rate": 2.45985401459854e-05, "loss": 0.4434, "step": 57255 }, { "epoch": 1.6717858133457906, "grad_norm": 0.4994803622637055, "learning_rate": 2.4595836712625035e-05, "loss": 0.4451, "step": 57260 }, { "epoch": 1.6719317966161078, "grad_norm": 0.4556708474262653, "learning_rate": 2.459313327926467e-05, "loss": 0.4528, "step": 57265 }, { "epoch": 1.672077779886425, "grad_norm": 0.47996431205564116, "learning_rate": 2.45904298459043e-05, "loss": 0.4565, "step": 57270 }, { "epoch": 1.6722237631567423, "grad_norm": 0.49296159767419173, "learning_rate": 2.4587726412543934e-05, "loss": 0.4495, "step": 57275 }, { "epoch": 1.6723697464270595, "grad_norm": 0.45466856883159473, "learning_rate": 2.4585022979183564e-05, "loss": 0.4379, "step": 57280 }, { "epoch": 1.6725157296973767, "grad_norm": 0.4812618959087265, "learning_rate": 2.4582319545823195e-05, "loss": 0.4655, "step": 57285 }, { "epoch": 1.672661712967694, "grad_norm": 0.526100882833534, "learning_rate": 2.457961611246283e-05, "loss": 0.4628, "step": 57290 }, { "epoch": 1.6728076962380112, "grad_norm": 0.4708306978000663, "learning_rate": 2.4576912679102463e-05, "loss": 0.4364, "step": 57295 }, { "epoch": 1.6729536795083284, "grad_norm": 0.4745326160647823, "learning_rate": 2.4574209245742094e-05, "loss": 0.4283, "step": 57300 }, { "epoch": 1.6730996627786454, "grad_norm": 0.48688681546334056, "learning_rate": 2.4571505812381724e-05, "loss": 0.4159, "step": 57305 }, { "epoch": 1.6732456460489629, "grad_norm": 0.48239655927488506, "learning_rate": 2.456880237902136e-05, "loss": 0.4198, "step": 57310 }, { "epoch": 1.6733916293192799, "grad_norm": 0.46200657716417476, "learning_rate": 2.456609894566099e-05, "loss": 0.4501, "step": 57315 }, { "epoch": 1.6735376125895973, "grad_norm": 0.4863692882703548, "learning_rate": 2.4563395512300623e-05, "loss": 0.4566, "step": 57320 }, { "epoch": 1.6736835958599143, "grad_norm": 0.4946139181549754, "learning_rate": 2.4560692078940257e-05, "loss": 0.4386, "step": 57325 }, { "epoch": 1.6738295791302318, "grad_norm": 0.46631196473707043, "learning_rate": 2.4557988645579888e-05, "loss": 0.4206, "step": 57330 }, { "epoch": 1.6739755624005488, "grad_norm": 0.4541780271668449, "learning_rate": 2.4555285212219518e-05, "loss": 0.468, "step": 57335 }, { "epoch": 1.6741215456708662, "grad_norm": 0.4431181725941671, "learning_rate": 2.4552581778859152e-05, "loss": 0.4314, "step": 57340 }, { "epoch": 1.6742675289411832, "grad_norm": 0.4577329159769645, "learning_rate": 2.4549878345498783e-05, "loss": 0.4146, "step": 57345 }, { "epoch": 1.6744135122115007, "grad_norm": 0.5233069379468245, "learning_rate": 2.4547174912138417e-05, "loss": 0.4541, "step": 57350 }, { "epoch": 1.6745594954818177, "grad_norm": 0.46931900431179074, "learning_rate": 2.454447147877805e-05, "loss": 0.4287, "step": 57355 }, { "epoch": 1.6747054787521352, "grad_norm": 0.4741704150984065, "learning_rate": 2.454176804541768e-05, "loss": 0.4181, "step": 57360 }, { "epoch": 1.6748514620224522, "grad_norm": 0.4873660165946071, "learning_rate": 2.4539064612057312e-05, "loss": 0.4317, "step": 57365 }, { "epoch": 1.6749974452927694, "grad_norm": 0.4819229217960249, "learning_rate": 2.4536361178696946e-05, "loss": 0.4696, "step": 57370 }, { "epoch": 1.6751434285630866, "grad_norm": 0.45366426424744455, "learning_rate": 2.4533657745336577e-05, "loss": 0.4116, "step": 57375 }, { "epoch": 1.6752894118334039, "grad_norm": 0.534020026151532, "learning_rate": 2.453095431197621e-05, "loss": 0.4638, "step": 57380 }, { "epoch": 1.675435395103721, "grad_norm": 0.45525604204064585, "learning_rate": 2.4528250878615845e-05, "loss": 0.4179, "step": 57385 }, { "epoch": 1.6755813783740383, "grad_norm": 0.49827503729263395, "learning_rate": 2.4525547445255475e-05, "loss": 0.439, "step": 57390 }, { "epoch": 1.6757273616443555, "grad_norm": 0.5041313265438357, "learning_rate": 2.4522844011895106e-05, "loss": 0.4363, "step": 57395 }, { "epoch": 1.6758733449146728, "grad_norm": 0.46776604750593553, "learning_rate": 2.452014057853474e-05, "loss": 0.4609, "step": 57400 }, { "epoch": 1.67601932818499, "grad_norm": 0.47338919511905647, "learning_rate": 2.4517437145174374e-05, "loss": 0.4337, "step": 57405 }, { "epoch": 1.6761653114553072, "grad_norm": 0.46240202687922755, "learning_rate": 2.4514733711814005e-05, "loss": 0.4348, "step": 57410 }, { "epoch": 1.6763112947256245, "grad_norm": 0.5050417162874375, "learning_rate": 2.451203027845364e-05, "loss": 0.4592, "step": 57415 }, { "epoch": 1.6764572779959417, "grad_norm": 0.5015773546454595, "learning_rate": 2.450932684509327e-05, "loss": 0.4483, "step": 57420 }, { "epoch": 1.676603261266259, "grad_norm": 0.49051086634408075, "learning_rate": 2.45066234117329e-05, "loss": 0.4571, "step": 57425 }, { "epoch": 1.6767492445365761, "grad_norm": 0.4525290940680051, "learning_rate": 2.4503919978372534e-05, "loss": 0.4372, "step": 57430 }, { "epoch": 1.6768952278068934, "grad_norm": 0.4851812518671584, "learning_rate": 2.4501216545012168e-05, "loss": 0.4354, "step": 57435 }, { "epoch": 1.6770412110772106, "grad_norm": 0.4902088827104363, "learning_rate": 2.44985131116518e-05, "loss": 0.4136, "step": 57440 }, { "epoch": 1.6771871943475278, "grad_norm": 0.4927615444939488, "learning_rate": 2.4495809678291433e-05, "loss": 0.4417, "step": 57445 }, { "epoch": 1.6773331776178448, "grad_norm": 0.4745245881054251, "learning_rate": 2.4493106244931063e-05, "loss": 0.4476, "step": 57450 }, { "epoch": 1.6774791608881623, "grad_norm": 0.4685111763419823, "learning_rate": 2.4490402811570694e-05, "loss": 0.4563, "step": 57455 }, { "epoch": 1.6776251441584793, "grad_norm": 0.48993484286968464, "learning_rate": 2.4487699378210328e-05, "loss": 0.4475, "step": 57460 }, { "epoch": 1.6777711274287967, "grad_norm": 0.5192683224498738, "learning_rate": 2.4484995944849962e-05, "loss": 0.4486, "step": 57465 }, { "epoch": 1.6779171106991138, "grad_norm": 0.4765498951151913, "learning_rate": 2.4482292511489592e-05, "loss": 0.4335, "step": 57470 }, { "epoch": 1.6780630939694312, "grad_norm": 0.4840397053637984, "learning_rate": 2.4479589078129227e-05, "loss": 0.4408, "step": 57475 }, { "epoch": 1.6782090772397482, "grad_norm": 0.473409733416577, "learning_rate": 2.4476885644768857e-05, "loss": 0.4511, "step": 57480 }, { "epoch": 1.6783550605100657, "grad_norm": 0.4790274990832236, "learning_rate": 2.4474182211408488e-05, "loss": 0.4208, "step": 57485 }, { "epoch": 1.6785010437803827, "grad_norm": 0.4735762441663759, "learning_rate": 2.4471478778048125e-05, "loss": 0.4349, "step": 57490 }, { "epoch": 1.6786470270507001, "grad_norm": 0.45783635127396594, "learning_rate": 2.4468775344687756e-05, "loss": 0.4582, "step": 57495 }, { "epoch": 1.6787930103210171, "grad_norm": 0.47498347559433757, "learning_rate": 2.4466071911327386e-05, "loss": 0.4639, "step": 57500 }, { "epoch": 1.6789389935913346, "grad_norm": 0.48919635173081977, "learning_rate": 2.446336847796702e-05, "loss": 0.4504, "step": 57505 }, { "epoch": 1.6790849768616516, "grad_norm": 0.43036053838284927, "learning_rate": 2.446066504460665e-05, "loss": 0.4238, "step": 57510 }, { "epoch": 1.679230960131969, "grad_norm": 0.4635181729533374, "learning_rate": 2.445796161124628e-05, "loss": 0.4261, "step": 57515 }, { "epoch": 1.679376943402286, "grad_norm": 0.46303352724530417, "learning_rate": 2.445525817788592e-05, "loss": 0.4418, "step": 57520 }, { "epoch": 1.6795229266726033, "grad_norm": 0.4601061316193282, "learning_rate": 2.445255474452555e-05, "loss": 0.4548, "step": 57525 }, { "epoch": 1.6796689099429205, "grad_norm": 0.4473368701357224, "learning_rate": 2.444985131116518e-05, "loss": 0.481, "step": 57530 }, { "epoch": 1.6798148932132377, "grad_norm": 0.45613045696145504, "learning_rate": 2.4447147877804814e-05, "loss": 0.4269, "step": 57535 }, { "epoch": 1.679960876483555, "grad_norm": 0.49755725713450333, "learning_rate": 2.4444444444444445e-05, "loss": 0.4569, "step": 57540 }, { "epoch": 1.6801068597538722, "grad_norm": 0.5003038351664721, "learning_rate": 2.4441741011084076e-05, "loss": 0.4377, "step": 57545 }, { "epoch": 1.6802528430241894, "grad_norm": 0.46338286960434305, "learning_rate": 2.4439037577723713e-05, "loss": 0.4392, "step": 57550 }, { "epoch": 1.6803988262945067, "grad_norm": 0.48354188202283477, "learning_rate": 2.4436334144363344e-05, "loss": 0.4658, "step": 57555 }, { "epoch": 1.6805448095648239, "grad_norm": 0.49950461119539974, "learning_rate": 2.4433630711002974e-05, "loss": 0.4465, "step": 57560 }, { "epoch": 1.6806907928351411, "grad_norm": 0.4882950984995475, "learning_rate": 2.4430927277642608e-05, "loss": 0.4479, "step": 57565 }, { "epoch": 1.6808367761054583, "grad_norm": 0.4650397578576874, "learning_rate": 2.442822384428224e-05, "loss": 0.4179, "step": 57570 }, { "epoch": 1.6809827593757756, "grad_norm": 0.47380899220832784, "learning_rate": 2.4425520410921873e-05, "loss": 0.4352, "step": 57575 }, { "epoch": 1.6811287426460928, "grad_norm": 0.49609897595076463, "learning_rate": 2.4422816977561503e-05, "loss": 0.4382, "step": 57580 }, { "epoch": 1.68127472591641, "grad_norm": 0.4681655869149722, "learning_rate": 2.4420113544201137e-05, "loss": 0.4626, "step": 57585 }, { "epoch": 1.6814207091867273, "grad_norm": 0.4362979928351901, "learning_rate": 2.4417410110840768e-05, "loss": 0.4263, "step": 57590 }, { "epoch": 1.6815666924570443, "grad_norm": 0.4885064306860862, "learning_rate": 2.4414706677480402e-05, "loss": 0.4227, "step": 57595 }, { "epoch": 1.6817126757273617, "grad_norm": 0.46484965421726643, "learning_rate": 2.4412003244120033e-05, "loss": 0.4315, "step": 57600 }, { "epoch": 1.6818586589976787, "grad_norm": 0.4723687461623009, "learning_rate": 2.4409299810759667e-05, "loss": 0.4573, "step": 57605 }, { "epoch": 1.6820046422679962, "grad_norm": 0.44996692089680407, "learning_rate": 2.4406596377399297e-05, "loss": 0.4432, "step": 57610 }, { "epoch": 1.6821506255383132, "grad_norm": 0.49886283060761566, "learning_rate": 2.440389294403893e-05, "loss": 0.437, "step": 57615 }, { "epoch": 1.6822966088086306, "grad_norm": 0.4741474906171807, "learning_rate": 2.4401189510678562e-05, "loss": 0.4456, "step": 57620 }, { "epoch": 1.6824425920789476, "grad_norm": 0.4797096504203245, "learning_rate": 2.4398486077318196e-05, "loss": 0.4423, "step": 57625 }, { "epoch": 1.682588575349265, "grad_norm": 0.4070497020927791, "learning_rate": 2.4395782643957827e-05, "loss": 0.4578, "step": 57630 }, { "epoch": 1.682734558619582, "grad_norm": 0.4510726169941429, "learning_rate": 2.439307921059746e-05, "loss": 0.449, "step": 57635 }, { "epoch": 1.6828805418898996, "grad_norm": 0.4815053319576482, "learning_rate": 2.439037577723709e-05, "loss": 0.4329, "step": 57640 }, { "epoch": 1.6830265251602166, "grad_norm": 0.49222896258823357, "learning_rate": 2.4387672343876725e-05, "loss": 0.4579, "step": 57645 }, { "epoch": 1.683172508430534, "grad_norm": 0.44218490608385735, "learning_rate": 2.4384968910516356e-05, "loss": 0.4438, "step": 57650 }, { "epoch": 1.683318491700851, "grad_norm": 0.49591172625227575, "learning_rate": 2.438226547715599e-05, "loss": 0.4333, "step": 57655 }, { "epoch": 1.6834644749711685, "grad_norm": 0.4777659724222559, "learning_rate": 2.4379562043795624e-05, "loss": 0.4122, "step": 57660 }, { "epoch": 1.6836104582414855, "grad_norm": 0.4849938865166754, "learning_rate": 2.4376858610435255e-05, "loss": 0.4034, "step": 57665 }, { "epoch": 1.6837564415118027, "grad_norm": 0.48788525855284653, "learning_rate": 2.4374155177074885e-05, "loss": 0.472, "step": 57670 }, { "epoch": 1.68390242478212, "grad_norm": 0.44043549294287226, "learning_rate": 2.437145174371452e-05, "loss": 0.4549, "step": 57675 }, { "epoch": 1.6840484080524372, "grad_norm": 0.4498955616684933, "learning_rate": 2.436874831035415e-05, "loss": 0.4235, "step": 57680 }, { "epoch": 1.6841943913227544, "grad_norm": 0.4645448061487231, "learning_rate": 2.436604487699378e-05, "loss": 0.4443, "step": 57685 }, { "epoch": 1.6843403745930716, "grad_norm": 0.49575938956490817, "learning_rate": 2.4363341443633418e-05, "loss": 0.4496, "step": 57690 }, { "epoch": 1.6844863578633889, "grad_norm": 0.47062782353617405, "learning_rate": 2.436063801027305e-05, "loss": 0.4421, "step": 57695 }, { "epoch": 1.684632341133706, "grad_norm": 0.5228849157403928, "learning_rate": 2.435793457691268e-05, "loss": 0.4715, "step": 57700 }, { "epoch": 1.6847783244040233, "grad_norm": 0.3946258705576903, "learning_rate": 2.4355231143552313e-05, "loss": 0.4044, "step": 57705 }, { "epoch": 1.6849243076743405, "grad_norm": 0.45460778564351234, "learning_rate": 2.4352527710191944e-05, "loss": 0.4439, "step": 57710 }, { "epoch": 1.6850702909446578, "grad_norm": 0.49123730366025203, "learning_rate": 2.4349824276831574e-05, "loss": 0.4529, "step": 57715 }, { "epoch": 1.685216274214975, "grad_norm": 0.4636260475079183, "learning_rate": 2.4347120843471212e-05, "loss": 0.4566, "step": 57720 }, { "epoch": 1.6853622574852922, "grad_norm": 0.5068623027662392, "learning_rate": 2.4344417410110842e-05, "loss": 0.4275, "step": 57725 }, { "epoch": 1.6855082407556095, "grad_norm": 0.4530620265579184, "learning_rate": 2.4341713976750473e-05, "loss": 0.4386, "step": 57730 }, { "epoch": 1.6856542240259267, "grad_norm": 0.47714436149902195, "learning_rate": 2.4339010543390107e-05, "loss": 0.4701, "step": 57735 }, { "epoch": 1.6858002072962437, "grad_norm": 0.44847526997086173, "learning_rate": 2.4336307110029738e-05, "loss": 0.4418, "step": 57740 }, { "epoch": 1.6859461905665611, "grad_norm": 0.5109963367719136, "learning_rate": 2.433360367666937e-05, "loss": 0.4239, "step": 57745 }, { "epoch": 1.6860921738368781, "grad_norm": 0.4818766792930208, "learning_rate": 2.4330900243309006e-05, "loss": 0.4461, "step": 57750 }, { "epoch": 1.6862381571071956, "grad_norm": 0.519332410270032, "learning_rate": 2.4328196809948636e-05, "loss": 0.4462, "step": 57755 }, { "epoch": 1.6863841403775126, "grad_norm": 0.4992002288648333, "learning_rate": 2.4325493376588267e-05, "loss": 0.4334, "step": 57760 }, { "epoch": 1.68653012364783, "grad_norm": 0.49330783729827726, "learning_rate": 2.43227899432279e-05, "loss": 0.4394, "step": 57765 }, { "epoch": 1.686676106918147, "grad_norm": 0.52510475669949, "learning_rate": 2.432008650986753e-05, "loss": 0.4401, "step": 57770 }, { "epoch": 1.6868220901884645, "grad_norm": 0.4287596837846171, "learning_rate": 2.4317383076507166e-05, "loss": 0.4422, "step": 57775 }, { "epoch": 1.6869680734587815, "grad_norm": 0.5178812075242422, "learning_rate": 2.43146796431468e-05, "loss": 0.4625, "step": 57780 }, { "epoch": 1.687114056729099, "grad_norm": 0.44964126369631496, "learning_rate": 2.431197620978643e-05, "loss": 0.4407, "step": 57785 }, { "epoch": 1.687260039999416, "grad_norm": 0.4703683192862401, "learning_rate": 2.430927277642606e-05, "loss": 0.4226, "step": 57790 }, { "epoch": 1.6874060232697334, "grad_norm": 0.5233299297978793, "learning_rate": 2.4306569343065695e-05, "loss": 0.4448, "step": 57795 }, { "epoch": 1.6875520065400504, "grad_norm": 0.4381971695710777, "learning_rate": 2.4303865909705325e-05, "loss": 0.4312, "step": 57800 }, { "epoch": 1.687697989810368, "grad_norm": 0.48662916469302836, "learning_rate": 2.430116247634496e-05, "loss": 0.4349, "step": 57805 }, { "epoch": 1.687843973080685, "grad_norm": 0.4596415576585456, "learning_rate": 2.4298459042984593e-05, "loss": 0.4293, "step": 57810 }, { "epoch": 1.6879899563510021, "grad_norm": 0.4470906871938331, "learning_rate": 2.4295755609624224e-05, "loss": 0.4231, "step": 57815 }, { "epoch": 1.6881359396213194, "grad_norm": 0.5379910475496585, "learning_rate": 2.4293052176263855e-05, "loss": 0.4434, "step": 57820 }, { "epoch": 1.6882819228916366, "grad_norm": 0.5397161859042601, "learning_rate": 2.429034874290349e-05, "loss": 0.4489, "step": 57825 }, { "epoch": 1.6884279061619538, "grad_norm": 0.45268650156861384, "learning_rate": 2.4287645309543123e-05, "loss": 0.4201, "step": 57830 }, { "epoch": 1.688573889432271, "grad_norm": 0.47610908223884446, "learning_rate": 2.4284941876182753e-05, "loss": 0.4148, "step": 57835 }, { "epoch": 1.6887198727025883, "grad_norm": 0.5115843583899452, "learning_rate": 2.4282238442822387e-05, "loss": 0.4652, "step": 57840 }, { "epoch": 1.6888658559729055, "grad_norm": 0.4715036684232393, "learning_rate": 2.4279535009462018e-05, "loss": 0.4365, "step": 57845 }, { "epoch": 1.6890118392432227, "grad_norm": 0.4639220513698371, "learning_rate": 2.427683157610165e-05, "loss": 0.4364, "step": 57850 }, { "epoch": 1.68915782251354, "grad_norm": 0.45587928623630536, "learning_rate": 2.4274128142741283e-05, "loss": 0.4364, "step": 57855 }, { "epoch": 1.6893038057838572, "grad_norm": 0.4836762800255483, "learning_rate": 2.4271424709380917e-05, "loss": 0.4377, "step": 57860 }, { "epoch": 1.6894497890541744, "grad_norm": 0.4701413150824606, "learning_rate": 2.4268721276020547e-05, "loss": 0.4443, "step": 57865 }, { "epoch": 1.6895957723244917, "grad_norm": 0.5015617276754109, "learning_rate": 2.426601784266018e-05, "loss": 0.4528, "step": 57870 }, { "epoch": 1.6897417555948089, "grad_norm": 0.4622711947116225, "learning_rate": 2.4263314409299812e-05, "loss": 0.454, "step": 57875 }, { "epoch": 1.689887738865126, "grad_norm": 0.42733201554286576, "learning_rate": 2.4260610975939443e-05, "loss": 0.4337, "step": 57880 }, { "epoch": 1.6900337221354431, "grad_norm": 0.4853023635787602, "learning_rate": 2.4257907542579077e-05, "loss": 0.4581, "step": 57885 }, { "epoch": 1.6901797054057606, "grad_norm": 0.5043036516479332, "learning_rate": 2.425520410921871e-05, "loss": 0.4385, "step": 57890 }, { "epoch": 1.6903256886760776, "grad_norm": 0.44226894990432725, "learning_rate": 2.425250067585834e-05, "loss": 0.4198, "step": 57895 }, { "epoch": 1.690471671946395, "grad_norm": 0.4952339794041037, "learning_rate": 2.4249797242497975e-05, "loss": 0.466, "step": 57900 }, { "epoch": 1.690617655216712, "grad_norm": 0.44813443356100924, "learning_rate": 2.4247093809137606e-05, "loss": 0.4608, "step": 57905 }, { "epoch": 1.6907636384870295, "grad_norm": 0.4820053434230484, "learning_rate": 2.4244390375777236e-05, "loss": 0.4335, "step": 57910 }, { "epoch": 1.6909096217573465, "grad_norm": 0.48155803411298365, "learning_rate": 2.424168694241687e-05, "loss": 0.4429, "step": 57915 }, { "epoch": 1.691055605027664, "grad_norm": 0.42930307063210765, "learning_rate": 2.4238983509056504e-05, "loss": 0.4394, "step": 57920 }, { "epoch": 1.691201588297981, "grad_norm": 0.4618865990166971, "learning_rate": 2.4236280075696135e-05, "loss": 0.4371, "step": 57925 }, { "epoch": 1.6913475715682984, "grad_norm": 0.48153383365491575, "learning_rate": 2.423357664233577e-05, "loss": 0.4456, "step": 57930 }, { "epoch": 1.6914935548386154, "grad_norm": 0.4720002274730781, "learning_rate": 2.42308732089754e-05, "loss": 0.4164, "step": 57935 }, { "epoch": 1.6916395381089329, "grad_norm": 0.4658293987773609, "learning_rate": 2.422816977561503e-05, "loss": 0.4382, "step": 57940 }, { "epoch": 1.6917855213792499, "grad_norm": 0.4843111609508382, "learning_rate": 2.4225466342254664e-05, "loss": 0.455, "step": 57945 }, { "epoch": 1.6919315046495673, "grad_norm": 0.4293813716537582, "learning_rate": 2.42227629088943e-05, "loss": 0.4169, "step": 57950 }, { "epoch": 1.6920774879198843, "grad_norm": 0.48043179353126536, "learning_rate": 2.422005947553393e-05, "loss": 0.4377, "step": 57955 }, { "epoch": 1.6922234711902016, "grad_norm": 0.4979629347561012, "learning_rate": 2.421735604217356e-05, "loss": 0.4257, "step": 57960 }, { "epoch": 1.6923694544605188, "grad_norm": 0.462568222862848, "learning_rate": 2.4214652608813194e-05, "loss": 0.4241, "step": 57965 }, { "epoch": 1.692515437730836, "grad_norm": 0.4755046035921417, "learning_rate": 2.4211949175452824e-05, "loss": 0.4464, "step": 57970 }, { "epoch": 1.6926614210011532, "grad_norm": 0.4715987155786095, "learning_rate": 2.4209245742092458e-05, "loss": 0.4377, "step": 57975 }, { "epoch": 1.6928074042714705, "grad_norm": 0.4442712669021609, "learning_rate": 2.4206542308732092e-05, "loss": 0.4234, "step": 57980 }, { "epoch": 1.6929533875417877, "grad_norm": 0.5013575220251977, "learning_rate": 2.4203838875371723e-05, "loss": 0.4275, "step": 57985 }, { "epoch": 1.693099370812105, "grad_norm": 0.48487976767501567, "learning_rate": 2.4201135442011353e-05, "loss": 0.4339, "step": 57990 }, { "epoch": 1.6932453540824222, "grad_norm": 0.49790101428700567, "learning_rate": 2.4198432008650988e-05, "loss": 0.4321, "step": 57995 }, { "epoch": 1.6933913373527394, "grad_norm": 0.49478583194599274, "learning_rate": 2.419572857529062e-05, "loss": 0.4243, "step": 58000 }, { "epoch": 1.6935373206230566, "grad_norm": 0.44549482738341944, "learning_rate": 2.4193025141930252e-05, "loss": 0.4253, "step": 58005 }, { "epoch": 1.6936833038933738, "grad_norm": 0.5164859265527273, "learning_rate": 2.4190321708569886e-05, "loss": 0.4392, "step": 58010 }, { "epoch": 1.693829287163691, "grad_norm": 0.474201119293214, "learning_rate": 2.4187618275209517e-05, "loss": 0.4364, "step": 58015 }, { "epoch": 1.6939752704340083, "grad_norm": 0.4933059316904374, "learning_rate": 2.4184914841849147e-05, "loss": 0.4638, "step": 58020 }, { "epoch": 1.6941212537043255, "grad_norm": 0.4821082908216501, "learning_rate": 2.418221140848878e-05, "loss": 0.4335, "step": 58025 }, { "epoch": 1.6942672369746425, "grad_norm": 0.4823319512096831, "learning_rate": 2.4179507975128415e-05, "loss": 0.4666, "step": 58030 }, { "epoch": 1.69441322024496, "grad_norm": 0.4362061872380024, "learning_rate": 2.4176804541768046e-05, "loss": 0.4412, "step": 58035 }, { "epoch": 1.694559203515277, "grad_norm": 0.45609758358978886, "learning_rate": 2.417410110840768e-05, "loss": 0.4521, "step": 58040 }, { "epoch": 1.6947051867855945, "grad_norm": 0.5386623279046885, "learning_rate": 2.417139767504731e-05, "loss": 0.4447, "step": 58045 }, { "epoch": 1.6948511700559115, "grad_norm": 0.4766221993704168, "learning_rate": 2.416869424168694e-05, "loss": 0.4358, "step": 58050 }, { "epoch": 1.694997153326229, "grad_norm": 0.5007596338561329, "learning_rate": 2.4165990808326575e-05, "loss": 0.4594, "step": 58055 }, { "epoch": 1.695143136596546, "grad_norm": 0.48463640258505686, "learning_rate": 2.416328737496621e-05, "loss": 0.4606, "step": 58060 }, { "epoch": 1.6952891198668634, "grad_norm": 0.47350678199766966, "learning_rate": 2.416058394160584e-05, "loss": 0.463, "step": 58065 }, { "epoch": 1.6954351031371804, "grad_norm": 0.481843390042811, "learning_rate": 2.4157880508245474e-05, "loss": 0.4593, "step": 58070 }, { "epoch": 1.6955810864074978, "grad_norm": 0.4879594750296457, "learning_rate": 2.4155177074885105e-05, "loss": 0.4344, "step": 58075 }, { "epoch": 1.6957270696778148, "grad_norm": 0.46326673637364807, "learning_rate": 2.4152473641524735e-05, "loss": 0.4474, "step": 58080 }, { "epoch": 1.6958730529481323, "grad_norm": 0.4618031042953875, "learning_rate": 2.4149770208164373e-05, "loss": 0.4355, "step": 58085 }, { "epoch": 1.6960190362184493, "grad_norm": 0.4526237294548608, "learning_rate": 2.4147066774804003e-05, "loss": 0.4103, "step": 58090 }, { "epoch": 1.6961650194887667, "grad_norm": 0.4501689670941438, "learning_rate": 2.4144363341443634e-05, "loss": 0.4415, "step": 58095 }, { "epoch": 1.6963110027590838, "grad_norm": 0.40307634733222697, "learning_rate": 2.4141659908083268e-05, "loss": 0.4244, "step": 58100 }, { "epoch": 1.696456986029401, "grad_norm": 0.4795844745512309, "learning_rate": 2.41389564747229e-05, "loss": 0.4384, "step": 58105 }, { "epoch": 1.6966029692997182, "grad_norm": 0.4417679377834812, "learning_rate": 2.413625304136253e-05, "loss": 0.4295, "step": 58110 }, { "epoch": 1.6967489525700354, "grad_norm": 0.46105647637943126, "learning_rate": 2.4133549608002167e-05, "loss": 0.4443, "step": 58115 }, { "epoch": 1.6968949358403527, "grad_norm": 0.44425553855938316, "learning_rate": 2.4130846174641797e-05, "loss": 0.4351, "step": 58120 }, { "epoch": 1.69704091911067, "grad_norm": 0.4461541702223373, "learning_rate": 2.4128142741281428e-05, "loss": 0.4272, "step": 58125 }, { "epoch": 1.6971869023809871, "grad_norm": 0.4946959654763197, "learning_rate": 2.4125439307921062e-05, "loss": 0.4275, "step": 58130 }, { "epoch": 1.6973328856513044, "grad_norm": 0.5085363275567824, "learning_rate": 2.4122735874560692e-05, "loss": 0.4486, "step": 58135 }, { "epoch": 1.6974788689216216, "grad_norm": 0.4912140377892966, "learning_rate": 2.4120032441200323e-05, "loss": 0.4466, "step": 58140 }, { "epoch": 1.6976248521919388, "grad_norm": 0.5000697833515493, "learning_rate": 2.411732900783996e-05, "loss": 0.4506, "step": 58145 }, { "epoch": 1.697770835462256, "grad_norm": 0.49156139312911135, "learning_rate": 2.411462557447959e-05, "loss": 0.458, "step": 58150 }, { "epoch": 1.6979168187325733, "grad_norm": 0.4715508132976585, "learning_rate": 2.411192214111922e-05, "loss": 0.4602, "step": 58155 }, { "epoch": 1.6980628020028905, "grad_norm": 0.450703775594059, "learning_rate": 2.4109218707758856e-05, "loss": 0.4356, "step": 58160 }, { "epoch": 1.6982087852732077, "grad_norm": 0.502363496229097, "learning_rate": 2.4106515274398486e-05, "loss": 0.4389, "step": 58165 }, { "epoch": 1.698354768543525, "grad_norm": 0.5439677328907733, "learning_rate": 2.410381184103812e-05, "loss": 0.4823, "step": 58170 }, { "epoch": 1.698500751813842, "grad_norm": 0.4578571339876259, "learning_rate": 2.4101108407677754e-05, "loss": 0.4377, "step": 58175 }, { "epoch": 1.6986467350841594, "grad_norm": 0.4882756685844646, "learning_rate": 2.4098404974317385e-05, "loss": 0.4576, "step": 58180 }, { "epoch": 1.6987927183544764, "grad_norm": 0.4735526318740564, "learning_rate": 2.4095701540957016e-05, "loss": 0.4299, "step": 58185 }, { "epoch": 1.6989387016247939, "grad_norm": 0.47825950670576584, "learning_rate": 2.409299810759665e-05, "loss": 0.4233, "step": 58190 }, { "epoch": 1.6990846848951109, "grad_norm": 0.49294338658563336, "learning_rate": 2.409029467423628e-05, "loss": 0.4658, "step": 58195 }, { "epoch": 1.6992306681654283, "grad_norm": 0.44816156025587967, "learning_rate": 2.4087591240875914e-05, "loss": 0.4197, "step": 58200 }, { "epoch": 1.6993766514357453, "grad_norm": 0.46103907393338306, "learning_rate": 2.4084887807515545e-05, "loss": 0.4708, "step": 58205 }, { "epoch": 1.6995226347060628, "grad_norm": 0.4685498741700941, "learning_rate": 2.408218437415518e-05, "loss": 0.4592, "step": 58210 }, { "epoch": 1.6996686179763798, "grad_norm": 0.4561856437552305, "learning_rate": 2.407948094079481e-05, "loss": 0.4396, "step": 58215 }, { "epoch": 1.6998146012466973, "grad_norm": 0.48613403059892873, "learning_rate": 2.4076777507434443e-05, "loss": 0.4266, "step": 58220 }, { "epoch": 1.6999605845170143, "grad_norm": 0.4656973916579035, "learning_rate": 2.4074074074074074e-05, "loss": 0.4417, "step": 58225 }, { "epoch": 1.7001065677873317, "grad_norm": 0.4605800436617728, "learning_rate": 2.4071370640713708e-05, "loss": 0.4355, "step": 58230 }, { "epoch": 1.7002525510576487, "grad_norm": 0.46031065182303865, "learning_rate": 2.406866720735334e-05, "loss": 0.421, "step": 58235 }, { "epoch": 1.7003985343279662, "grad_norm": 0.45593670167923717, "learning_rate": 2.4065963773992973e-05, "loss": 0.4277, "step": 58240 }, { "epoch": 1.7005445175982832, "grad_norm": 0.45335080738465683, "learning_rate": 2.4063260340632603e-05, "loss": 0.4458, "step": 58245 }, { "epoch": 1.7006905008686004, "grad_norm": 0.5061407940600384, "learning_rate": 2.4060556907272237e-05, "loss": 0.4484, "step": 58250 }, { "epoch": 1.7008364841389176, "grad_norm": 0.47508766194750274, "learning_rate": 2.405785347391187e-05, "loss": 0.4274, "step": 58255 }, { "epoch": 1.7009824674092349, "grad_norm": 0.4658563309620526, "learning_rate": 2.4055150040551502e-05, "loss": 0.4402, "step": 58260 }, { "epoch": 1.701128450679552, "grad_norm": 0.5005036726993393, "learning_rate": 2.4052446607191133e-05, "loss": 0.4699, "step": 58265 }, { "epoch": 1.7012744339498693, "grad_norm": 0.5062138681437247, "learning_rate": 2.4049743173830767e-05, "loss": 0.4563, "step": 58270 }, { "epoch": 1.7014204172201866, "grad_norm": 0.42159316088400284, "learning_rate": 2.4047039740470397e-05, "loss": 0.4298, "step": 58275 }, { "epoch": 1.7015664004905038, "grad_norm": 0.47839788341427236, "learning_rate": 2.404433630711003e-05, "loss": 0.4754, "step": 58280 }, { "epoch": 1.701712383760821, "grad_norm": 0.4422503105861303, "learning_rate": 2.4041632873749665e-05, "loss": 0.4397, "step": 58285 }, { "epoch": 1.7018583670311382, "grad_norm": 0.47656417445744276, "learning_rate": 2.4038929440389296e-05, "loss": 0.4389, "step": 58290 }, { "epoch": 1.7020043503014555, "grad_norm": 0.46457059940982015, "learning_rate": 2.4036226007028927e-05, "loss": 0.4212, "step": 58295 }, { "epoch": 1.7021503335717727, "grad_norm": 0.5209228076011653, "learning_rate": 2.403352257366856e-05, "loss": 0.4669, "step": 58300 }, { "epoch": 1.70229631684209, "grad_norm": 0.45258484553600203, "learning_rate": 2.403081914030819e-05, "loss": 0.4507, "step": 58305 }, { "epoch": 1.7024423001124072, "grad_norm": 0.46885158109207203, "learning_rate": 2.4028115706947825e-05, "loss": 0.425, "step": 58310 }, { "epoch": 1.7025882833827244, "grad_norm": 0.47518704197516315, "learning_rate": 2.402541227358746e-05, "loss": 0.4334, "step": 58315 }, { "epoch": 1.7027342666530414, "grad_norm": 0.48804457338738955, "learning_rate": 2.402270884022709e-05, "loss": 0.4483, "step": 58320 }, { "epoch": 1.7028802499233588, "grad_norm": 0.4886864058013687, "learning_rate": 2.402000540686672e-05, "loss": 0.4455, "step": 58325 }, { "epoch": 1.7030262331936759, "grad_norm": 0.4567027347270024, "learning_rate": 2.4017301973506354e-05, "loss": 0.4642, "step": 58330 }, { "epoch": 1.7031722164639933, "grad_norm": 0.4757728477598552, "learning_rate": 2.4014598540145985e-05, "loss": 0.4353, "step": 58335 }, { "epoch": 1.7033181997343103, "grad_norm": 0.4864714143441116, "learning_rate": 2.401189510678562e-05, "loss": 0.4618, "step": 58340 }, { "epoch": 1.7034641830046278, "grad_norm": 0.4743939644944442, "learning_rate": 2.4009191673425253e-05, "loss": 0.4375, "step": 58345 }, { "epoch": 1.7036101662749448, "grad_norm": 0.4779141667719394, "learning_rate": 2.4006488240064884e-05, "loss": 0.4435, "step": 58350 }, { "epoch": 1.7037561495452622, "grad_norm": 0.4875085205206901, "learning_rate": 2.4003784806704514e-05, "loss": 0.4532, "step": 58355 }, { "epoch": 1.7039021328155792, "grad_norm": 0.4628672676004039, "learning_rate": 2.400108137334415e-05, "loss": 0.4327, "step": 58360 }, { "epoch": 1.7040481160858967, "grad_norm": 0.5006720642098826, "learning_rate": 2.399837793998378e-05, "loss": 0.4333, "step": 58365 }, { "epoch": 1.7041940993562137, "grad_norm": 0.4928492004475997, "learning_rate": 2.3995674506623413e-05, "loss": 0.4522, "step": 58370 }, { "epoch": 1.7043400826265311, "grad_norm": 0.43353241313318674, "learning_rate": 2.3992971073263047e-05, "loss": 0.4368, "step": 58375 }, { "epoch": 1.7044860658968481, "grad_norm": 0.4761056151441301, "learning_rate": 2.3990267639902678e-05, "loss": 0.4519, "step": 58380 }, { "epoch": 1.7046320491671656, "grad_norm": 0.5123213256982408, "learning_rate": 2.3987564206542308e-05, "loss": 0.4735, "step": 58385 }, { "epoch": 1.7047780324374826, "grad_norm": 0.4696163174387757, "learning_rate": 2.3984860773181942e-05, "loss": 0.4539, "step": 58390 }, { "epoch": 1.7049240157077998, "grad_norm": 0.4885504890146322, "learning_rate": 2.3982157339821573e-05, "loss": 0.4365, "step": 58395 }, { "epoch": 1.705069998978117, "grad_norm": 0.49886874271184034, "learning_rate": 2.3979453906461207e-05, "loss": 0.4831, "step": 58400 }, { "epoch": 1.7052159822484343, "grad_norm": 0.5181844165309045, "learning_rate": 2.397675047310084e-05, "loss": 0.4708, "step": 58405 }, { "epoch": 1.7053619655187515, "grad_norm": 0.4709765984309053, "learning_rate": 2.397404703974047e-05, "loss": 0.4504, "step": 58410 }, { "epoch": 1.7055079487890688, "grad_norm": 0.5090246892977971, "learning_rate": 2.3971343606380102e-05, "loss": 0.4733, "step": 58415 }, { "epoch": 1.705653932059386, "grad_norm": 0.4223508666092475, "learning_rate": 2.3968640173019736e-05, "loss": 0.4158, "step": 58420 }, { "epoch": 1.7057999153297032, "grad_norm": 0.49066799885732193, "learning_rate": 2.396593673965937e-05, "loss": 0.4552, "step": 58425 }, { "epoch": 1.7059458986000204, "grad_norm": 0.4662655593899223, "learning_rate": 2.3963233306299e-05, "loss": 0.4806, "step": 58430 }, { "epoch": 1.7060918818703377, "grad_norm": 0.501134684464807, "learning_rate": 2.3960529872938635e-05, "loss": 0.4562, "step": 58435 }, { "epoch": 1.706237865140655, "grad_norm": 0.4863251174363092, "learning_rate": 2.3957826439578265e-05, "loss": 0.467, "step": 58440 }, { "epoch": 1.7063838484109721, "grad_norm": 0.4772105816090938, "learning_rate": 2.3955123006217896e-05, "loss": 0.4452, "step": 58445 }, { "epoch": 1.7065298316812894, "grad_norm": 0.46091694212990236, "learning_rate": 2.395241957285753e-05, "loss": 0.431, "step": 58450 }, { "epoch": 1.7066758149516066, "grad_norm": 0.487938932006253, "learning_rate": 2.3949716139497164e-05, "loss": 0.429, "step": 58455 }, { "epoch": 1.7068217982219238, "grad_norm": 0.5425034153922154, "learning_rate": 2.3947012706136795e-05, "loss": 0.4828, "step": 58460 }, { "epoch": 1.706967781492241, "grad_norm": 0.5155396951071604, "learning_rate": 2.394430927277643e-05, "loss": 0.444, "step": 58465 }, { "epoch": 1.7071137647625583, "grad_norm": 0.45845474017043075, "learning_rate": 2.394160583941606e-05, "loss": 0.4392, "step": 58470 }, { "epoch": 1.7072597480328753, "grad_norm": 0.5048831913834558, "learning_rate": 2.393890240605569e-05, "loss": 0.4233, "step": 58475 }, { "epoch": 1.7074057313031927, "grad_norm": 0.4882679763748233, "learning_rate": 2.3936198972695324e-05, "loss": 0.4626, "step": 58480 }, { "epoch": 1.7075517145735097, "grad_norm": 0.48724011025484276, "learning_rate": 2.3933495539334958e-05, "loss": 0.4429, "step": 58485 }, { "epoch": 1.7076976978438272, "grad_norm": 0.46404373314778846, "learning_rate": 2.393079210597459e-05, "loss": 0.4384, "step": 58490 }, { "epoch": 1.7078436811141442, "grad_norm": 0.4410793909602669, "learning_rate": 2.3928088672614223e-05, "loss": 0.432, "step": 58495 }, { "epoch": 1.7079896643844616, "grad_norm": 0.4638124242814348, "learning_rate": 2.3925385239253853e-05, "loss": 0.4336, "step": 58500 }, { "epoch": 1.7081356476547787, "grad_norm": 0.3997154681802492, "learning_rate": 2.3922681805893484e-05, "loss": 0.4024, "step": 58505 }, { "epoch": 1.708281630925096, "grad_norm": 0.4909630806694501, "learning_rate": 2.3919978372533118e-05, "loss": 0.4155, "step": 58510 }, { "epoch": 1.7084276141954131, "grad_norm": 0.4525987740949037, "learning_rate": 2.3917274939172752e-05, "loss": 0.4504, "step": 58515 }, { "epoch": 1.7085735974657306, "grad_norm": 0.4442428879185515, "learning_rate": 2.3914571505812383e-05, "loss": 0.415, "step": 58520 }, { "epoch": 1.7087195807360476, "grad_norm": 0.45779705577494817, "learning_rate": 2.3911868072452017e-05, "loss": 0.4327, "step": 58525 }, { "epoch": 1.708865564006365, "grad_norm": 0.44151709933980315, "learning_rate": 2.3909164639091647e-05, "loss": 0.4298, "step": 58530 }, { "epoch": 1.709011547276682, "grad_norm": 0.4798839996033929, "learning_rate": 2.3906461205731278e-05, "loss": 0.4648, "step": 58535 }, { "epoch": 1.7091575305469993, "grad_norm": 0.5022895361725467, "learning_rate": 2.3903757772370912e-05, "loss": 0.4309, "step": 58540 }, { "epoch": 1.7093035138173165, "grad_norm": 0.5061786808049457, "learning_rate": 2.3901054339010546e-05, "loss": 0.4654, "step": 58545 }, { "epoch": 1.7094494970876337, "grad_norm": 0.48594721314471817, "learning_rate": 2.3898350905650176e-05, "loss": 0.464, "step": 58550 }, { "epoch": 1.709595480357951, "grad_norm": 0.4714175010545967, "learning_rate": 2.389564747228981e-05, "loss": 0.4455, "step": 58555 }, { "epoch": 1.7097414636282682, "grad_norm": 0.4747799176389913, "learning_rate": 2.389294403892944e-05, "loss": 0.4462, "step": 58560 }, { "epoch": 1.7098874468985854, "grad_norm": 0.49409188638582235, "learning_rate": 2.389024060556907e-05, "loss": 0.4412, "step": 58565 }, { "epoch": 1.7100334301689026, "grad_norm": 0.5187041477373289, "learning_rate": 2.3887537172208706e-05, "loss": 0.4467, "step": 58570 }, { "epoch": 1.7101794134392199, "grad_norm": 0.4522253947049944, "learning_rate": 2.388483373884834e-05, "loss": 0.4279, "step": 58575 }, { "epoch": 1.710325396709537, "grad_norm": 0.44881942966171684, "learning_rate": 2.388213030548797e-05, "loss": 0.396, "step": 58580 }, { "epoch": 1.7104713799798543, "grad_norm": 0.4500223185167436, "learning_rate": 2.3879426872127604e-05, "loss": 0.4128, "step": 58585 }, { "epoch": 1.7106173632501716, "grad_norm": 0.529375251901339, "learning_rate": 2.3876723438767235e-05, "loss": 0.4594, "step": 58590 }, { "epoch": 1.7107633465204888, "grad_norm": 0.46315713712019685, "learning_rate": 2.387402000540687e-05, "loss": 0.4503, "step": 58595 }, { "epoch": 1.710909329790806, "grad_norm": 0.4655546138187538, "learning_rate": 2.38713165720465e-05, "loss": 0.4373, "step": 58600 }, { "epoch": 1.7110553130611232, "grad_norm": 0.5053132061458789, "learning_rate": 2.3868613138686134e-05, "loss": 0.4374, "step": 58605 }, { "epoch": 1.7112012963314405, "grad_norm": 0.5038706029494964, "learning_rate": 2.3865909705325764e-05, "loss": 0.4246, "step": 58610 }, { "epoch": 1.7113472796017577, "grad_norm": 0.4571918289451146, "learning_rate": 2.3863206271965395e-05, "loss": 0.4372, "step": 58615 }, { "epoch": 1.7114932628720747, "grad_norm": 0.4804490676986941, "learning_rate": 2.386050283860503e-05, "loss": 0.4592, "step": 58620 }, { "epoch": 1.7116392461423922, "grad_norm": 0.4695077699757087, "learning_rate": 2.3857799405244663e-05, "loss": 0.4377, "step": 58625 }, { "epoch": 1.7117852294127092, "grad_norm": 0.5108310449008163, "learning_rate": 2.3855095971884293e-05, "loss": 0.4458, "step": 58630 }, { "epoch": 1.7119312126830266, "grad_norm": 0.48906120658643626, "learning_rate": 2.3852392538523928e-05, "loss": 0.4484, "step": 58635 }, { "epoch": 1.7120771959533436, "grad_norm": 0.5312529104375373, "learning_rate": 2.3849689105163558e-05, "loss": 0.452, "step": 58640 }, { "epoch": 1.712223179223661, "grad_norm": 0.49286453868197405, "learning_rate": 2.384698567180319e-05, "loss": 0.4516, "step": 58645 }, { "epoch": 1.712369162493978, "grad_norm": 0.48825326820181925, "learning_rate": 2.3844282238442823e-05, "loss": 0.4508, "step": 58650 }, { "epoch": 1.7125151457642955, "grad_norm": 0.45778303426049977, "learning_rate": 2.3841578805082457e-05, "loss": 0.445, "step": 58655 }, { "epoch": 1.7126611290346125, "grad_norm": 0.45960597034965606, "learning_rate": 2.3838875371722087e-05, "loss": 0.43, "step": 58660 }, { "epoch": 1.71280711230493, "grad_norm": 0.5063286630917873, "learning_rate": 2.383617193836172e-05, "loss": 0.4399, "step": 58665 }, { "epoch": 1.712953095575247, "grad_norm": 0.4385361801721305, "learning_rate": 2.3833468505001352e-05, "loss": 0.4052, "step": 58670 }, { "epoch": 1.7130990788455644, "grad_norm": 0.47949785769628456, "learning_rate": 2.3830765071640983e-05, "loss": 0.4352, "step": 58675 }, { "epoch": 1.7132450621158815, "grad_norm": 0.48725299013807905, "learning_rate": 2.382806163828062e-05, "loss": 0.4405, "step": 58680 }, { "epoch": 1.713391045386199, "grad_norm": 0.4354502626417322, "learning_rate": 2.382535820492025e-05, "loss": 0.4365, "step": 58685 }, { "epoch": 1.713537028656516, "grad_norm": 0.47859333328227566, "learning_rate": 2.382265477155988e-05, "loss": 0.4278, "step": 58690 }, { "epoch": 1.7136830119268331, "grad_norm": 0.510674578277651, "learning_rate": 2.3819951338199515e-05, "loss": 0.479, "step": 58695 }, { "epoch": 1.7138289951971504, "grad_norm": 0.5115374440167018, "learning_rate": 2.3817247904839146e-05, "loss": 0.4615, "step": 58700 }, { "epoch": 1.7139749784674676, "grad_norm": 0.5288826053429065, "learning_rate": 2.3814544471478777e-05, "loss": 0.4436, "step": 58705 }, { "epoch": 1.7141209617377848, "grad_norm": 0.47009187169843164, "learning_rate": 2.3811841038118414e-05, "loss": 0.4514, "step": 58710 }, { "epoch": 1.714266945008102, "grad_norm": 0.4519422841550515, "learning_rate": 2.3809137604758045e-05, "loss": 0.4244, "step": 58715 }, { "epoch": 1.7144129282784193, "grad_norm": 0.5183675686539223, "learning_rate": 2.3806434171397675e-05, "loss": 0.4628, "step": 58720 }, { "epoch": 1.7145589115487365, "grad_norm": 0.43534680108835916, "learning_rate": 2.380373073803731e-05, "loss": 0.4265, "step": 58725 }, { "epoch": 1.7147048948190537, "grad_norm": 0.4457553910208186, "learning_rate": 2.380102730467694e-05, "loss": 0.4191, "step": 58730 }, { "epoch": 1.714850878089371, "grad_norm": 0.495826997719146, "learning_rate": 2.379832387131657e-05, "loss": 0.4465, "step": 58735 }, { "epoch": 1.7149968613596882, "grad_norm": 0.47356515759154744, "learning_rate": 2.3795620437956208e-05, "loss": 0.4312, "step": 58740 }, { "epoch": 1.7151428446300054, "grad_norm": 0.5038859373848922, "learning_rate": 2.379291700459584e-05, "loss": 0.4527, "step": 58745 }, { "epoch": 1.7152888279003227, "grad_norm": 0.4833081883055163, "learning_rate": 2.379021357123547e-05, "loss": 0.447, "step": 58750 }, { "epoch": 1.71543481117064, "grad_norm": 0.4685249460523055, "learning_rate": 2.3787510137875103e-05, "loss": 0.4858, "step": 58755 }, { "epoch": 1.7155807944409571, "grad_norm": 0.48942972207300817, "learning_rate": 2.3784806704514734e-05, "loss": 0.4208, "step": 58760 }, { "epoch": 1.7157267777112741, "grad_norm": 0.47138925321183484, "learning_rate": 2.3782103271154368e-05, "loss": 0.4519, "step": 58765 }, { "epoch": 1.7158727609815916, "grad_norm": 0.5298634403508535, "learning_rate": 2.3779399837794002e-05, "loss": 0.4749, "step": 58770 }, { "epoch": 1.7160187442519086, "grad_norm": 0.47616821228425266, "learning_rate": 2.3776696404433632e-05, "loss": 0.4248, "step": 58775 }, { "epoch": 1.716164727522226, "grad_norm": 0.45149599497774257, "learning_rate": 2.3773992971073263e-05, "loss": 0.4214, "step": 58780 }, { "epoch": 1.716310710792543, "grad_norm": 0.4884664413673193, "learning_rate": 2.3771289537712897e-05, "loss": 0.4308, "step": 58785 }, { "epoch": 1.7164566940628605, "grad_norm": 0.4628749995535947, "learning_rate": 2.3768586104352528e-05, "loss": 0.4331, "step": 58790 }, { "epoch": 1.7166026773331775, "grad_norm": 0.48147037073117566, "learning_rate": 2.376588267099216e-05, "loss": 0.4482, "step": 58795 }, { "epoch": 1.716748660603495, "grad_norm": 0.4909952101893859, "learning_rate": 2.3763179237631796e-05, "loss": 0.4591, "step": 58800 }, { "epoch": 1.716894643873812, "grad_norm": 0.5020602696963905, "learning_rate": 2.3760475804271426e-05, "loss": 0.4604, "step": 58805 }, { "epoch": 1.7170406271441294, "grad_norm": 0.4718709324528392, "learning_rate": 2.3757772370911057e-05, "loss": 0.4115, "step": 58810 }, { "epoch": 1.7171866104144464, "grad_norm": 0.4724284394077713, "learning_rate": 2.375506893755069e-05, "loss": 0.4691, "step": 58815 }, { "epoch": 1.7173325936847639, "grad_norm": 0.49177758470746413, "learning_rate": 2.375236550419032e-05, "loss": 0.4436, "step": 58820 }, { "epoch": 1.7174785769550809, "grad_norm": 0.43002675583978855, "learning_rate": 2.3749662070829956e-05, "loss": 0.4482, "step": 58825 }, { "epoch": 1.7176245602253983, "grad_norm": 0.45334845840773286, "learning_rate": 2.374695863746959e-05, "loss": 0.4643, "step": 58830 }, { "epoch": 1.7177705434957153, "grad_norm": 0.492646496634359, "learning_rate": 2.374425520410922e-05, "loss": 0.4571, "step": 58835 }, { "epoch": 1.7179165267660326, "grad_norm": 0.4574034739801075, "learning_rate": 2.374155177074885e-05, "loss": 0.4145, "step": 58840 }, { "epoch": 1.7180625100363498, "grad_norm": 0.49964756764196344, "learning_rate": 2.3738848337388485e-05, "loss": 0.4733, "step": 58845 }, { "epoch": 1.718208493306667, "grad_norm": 0.44956226510133723, "learning_rate": 2.373614490402812e-05, "loss": 0.4201, "step": 58850 }, { "epoch": 1.7183544765769843, "grad_norm": 0.45269277637105915, "learning_rate": 2.373344147066775e-05, "loss": 0.4462, "step": 58855 }, { "epoch": 1.7185004598473015, "grad_norm": 0.48189467772680555, "learning_rate": 2.373073803730738e-05, "loss": 0.4655, "step": 58860 }, { "epoch": 1.7186464431176187, "grad_norm": 0.48162441611839635, "learning_rate": 2.3728034603947014e-05, "loss": 0.4378, "step": 58865 }, { "epoch": 1.718792426387936, "grad_norm": 0.4633720046463976, "learning_rate": 2.3725331170586645e-05, "loss": 0.427, "step": 58870 }, { "epoch": 1.7189384096582532, "grad_norm": 0.4845763759057171, "learning_rate": 2.372262773722628e-05, "loss": 0.4546, "step": 58875 }, { "epoch": 1.7190843929285704, "grad_norm": 0.47619757449796696, "learning_rate": 2.3719924303865913e-05, "loss": 0.4254, "step": 58880 }, { "epoch": 1.7192303761988876, "grad_norm": 0.4579872768620352, "learning_rate": 2.3717220870505543e-05, "loss": 0.4286, "step": 58885 }, { "epoch": 1.7193763594692049, "grad_norm": 0.4624664486413781, "learning_rate": 2.3714517437145174e-05, "loss": 0.4501, "step": 58890 }, { "epoch": 1.719522342739522, "grad_norm": 0.47149383785186333, "learning_rate": 2.3711814003784808e-05, "loss": 0.4705, "step": 58895 }, { "epoch": 1.7196683260098393, "grad_norm": 0.5007448935269071, "learning_rate": 2.370911057042444e-05, "loss": 0.4437, "step": 58900 }, { "epoch": 1.7198143092801566, "grad_norm": 0.4622529599142686, "learning_rate": 2.3706407137064073e-05, "loss": 0.4412, "step": 58905 }, { "epoch": 1.7199602925504736, "grad_norm": 0.462275694285261, "learning_rate": 2.3703703703703707e-05, "loss": 0.4514, "step": 58910 }, { "epoch": 1.720106275820791, "grad_norm": 0.46801907805587695, "learning_rate": 2.3701000270343337e-05, "loss": 0.4386, "step": 58915 }, { "epoch": 1.720252259091108, "grad_norm": 0.4822036357917023, "learning_rate": 2.3698296836982968e-05, "loss": 0.4673, "step": 58920 }, { "epoch": 1.7203982423614255, "grad_norm": 0.42859145037495255, "learning_rate": 2.3695593403622602e-05, "loss": 0.4098, "step": 58925 }, { "epoch": 1.7205442256317425, "grad_norm": 0.443243639370887, "learning_rate": 2.3692889970262233e-05, "loss": 0.4336, "step": 58930 }, { "epoch": 1.72069020890206, "grad_norm": 0.45073079798671867, "learning_rate": 2.3690186536901867e-05, "loss": 0.4301, "step": 58935 }, { "epoch": 1.720836192172377, "grad_norm": 0.499065694931809, "learning_rate": 2.36874831035415e-05, "loss": 0.4544, "step": 58940 }, { "epoch": 1.7209821754426944, "grad_norm": 0.48134596839681765, "learning_rate": 2.368477967018113e-05, "loss": 0.436, "step": 58945 }, { "epoch": 1.7211281587130114, "grad_norm": 0.4694439288358946, "learning_rate": 2.3682076236820762e-05, "loss": 0.4701, "step": 58950 }, { "epoch": 1.7212741419833288, "grad_norm": 0.4873096682535309, "learning_rate": 2.3679372803460396e-05, "loss": 0.4386, "step": 58955 }, { "epoch": 1.7214201252536458, "grad_norm": 0.46593962437037245, "learning_rate": 2.3676669370100026e-05, "loss": 0.4342, "step": 58960 }, { "epoch": 1.7215661085239633, "grad_norm": 0.5239704536502404, "learning_rate": 2.367396593673966e-05, "loss": 0.4546, "step": 58965 }, { "epoch": 1.7217120917942803, "grad_norm": 0.46338105972633625, "learning_rate": 2.3671262503379294e-05, "loss": 0.4505, "step": 58970 }, { "epoch": 1.7218580750645978, "grad_norm": 0.48142031528673435, "learning_rate": 2.3668559070018925e-05, "loss": 0.4229, "step": 58975 }, { "epoch": 1.7220040583349148, "grad_norm": 0.45277718208412826, "learning_rate": 2.3665855636658556e-05, "loss": 0.441, "step": 58980 }, { "epoch": 1.722150041605232, "grad_norm": 0.4810701533378495, "learning_rate": 2.366315220329819e-05, "loss": 0.4405, "step": 58985 }, { "epoch": 1.7222960248755492, "grad_norm": 0.4953888510514417, "learning_rate": 2.366044876993782e-05, "loss": 0.4553, "step": 58990 }, { "epoch": 1.7224420081458665, "grad_norm": 0.4778765758321882, "learning_rate": 2.3657745336577454e-05, "loss": 0.4569, "step": 58995 }, { "epoch": 1.7225879914161837, "grad_norm": 0.4980142196651409, "learning_rate": 2.365504190321709e-05, "loss": 0.4285, "step": 59000 }, { "epoch": 1.722733974686501, "grad_norm": 0.4563130951608166, "learning_rate": 2.365233846985672e-05, "loss": 0.4261, "step": 59005 }, { "epoch": 1.7228799579568181, "grad_norm": 0.4524956538906106, "learning_rate": 2.364963503649635e-05, "loss": 0.4412, "step": 59010 }, { "epoch": 1.7230259412271354, "grad_norm": 0.42428095960423184, "learning_rate": 2.3646931603135984e-05, "loss": 0.4385, "step": 59015 }, { "epoch": 1.7231719244974526, "grad_norm": 0.43383443256742166, "learning_rate": 2.3644228169775618e-05, "loss": 0.4271, "step": 59020 }, { "epoch": 1.7233179077677698, "grad_norm": 0.4609776037867648, "learning_rate": 2.3641524736415248e-05, "loss": 0.4489, "step": 59025 }, { "epoch": 1.723463891038087, "grad_norm": 0.49504100667995177, "learning_rate": 2.3638821303054882e-05, "loss": 0.443, "step": 59030 }, { "epoch": 1.7236098743084043, "grad_norm": 0.4713598785653674, "learning_rate": 2.3636117869694513e-05, "loss": 0.4182, "step": 59035 }, { "epoch": 1.7237558575787215, "grad_norm": 0.4627580205780155, "learning_rate": 2.3633414436334144e-05, "loss": 0.4201, "step": 59040 }, { "epoch": 1.7239018408490387, "grad_norm": 0.5029369469919537, "learning_rate": 2.3630711002973778e-05, "loss": 0.452, "step": 59045 }, { "epoch": 1.724047824119356, "grad_norm": 0.47747583536885513, "learning_rate": 2.362800756961341e-05, "loss": 0.4407, "step": 59050 }, { "epoch": 1.724193807389673, "grad_norm": 0.4692933714515509, "learning_rate": 2.3625304136253042e-05, "loss": 0.4307, "step": 59055 }, { "epoch": 1.7243397906599904, "grad_norm": 0.4689424794100078, "learning_rate": 2.3622600702892676e-05, "loss": 0.4484, "step": 59060 }, { "epoch": 1.7244857739303074, "grad_norm": 0.4688357518699064, "learning_rate": 2.3619897269532307e-05, "loss": 0.4489, "step": 59065 }, { "epoch": 1.724631757200625, "grad_norm": 0.44447426048375194, "learning_rate": 2.3617193836171937e-05, "loss": 0.4318, "step": 59070 }, { "epoch": 1.724777740470942, "grad_norm": 0.4913819213516453, "learning_rate": 2.3614490402811575e-05, "loss": 0.4239, "step": 59075 }, { "epoch": 1.7249237237412594, "grad_norm": 0.46821937912371847, "learning_rate": 2.3611786969451205e-05, "loss": 0.4496, "step": 59080 }, { "epoch": 1.7250697070115764, "grad_norm": 0.427315958310795, "learning_rate": 2.3609083536090836e-05, "loss": 0.4364, "step": 59085 }, { "epoch": 1.7252156902818938, "grad_norm": 0.3903358867169912, "learning_rate": 2.360638010273047e-05, "loss": 0.4589, "step": 59090 }, { "epoch": 1.7253616735522108, "grad_norm": 0.47966250220461754, "learning_rate": 2.36036766693701e-05, "loss": 0.4378, "step": 59095 }, { "epoch": 1.7255076568225283, "grad_norm": 0.487847844006976, "learning_rate": 2.360097323600973e-05, "loss": 0.452, "step": 59100 }, { "epoch": 1.7256536400928453, "grad_norm": 0.47474008420860786, "learning_rate": 2.359826980264937e-05, "loss": 0.4519, "step": 59105 }, { "epoch": 1.7257996233631627, "grad_norm": 0.4806184728394066, "learning_rate": 2.3595566369289e-05, "loss": 0.4483, "step": 59110 }, { "epoch": 1.7259456066334797, "grad_norm": 0.5383389575721867, "learning_rate": 2.359286293592863e-05, "loss": 0.45, "step": 59115 }, { "epoch": 1.7260915899037972, "grad_norm": 0.45908080027410797, "learning_rate": 2.3590159502568264e-05, "loss": 0.4406, "step": 59120 }, { "epoch": 1.7262375731741142, "grad_norm": 0.4561773016080101, "learning_rate": 2.3587456069207895e-05, "loss": 0.4168, "step": 59125 }, { "epoch": 1.7263835564444314, "grad_norm": 0.5180894472999849, "learning_rate": 2.3584752635847525e-05, "loss": 0.4433, "step": 59130 }, { "epoch": 1.7265295397147487, "grad_norm": 0.47258875848079207, "learning_rate": 2.358204920248716e-05, "loss": 0.4386, "step": 59135 }, { "epoch": 1.7266755229850659, "grad_norm": 0.46437231975776044, "learning_rate": 2.3579345769126793e-05, "loss": 0.441, "step": 59140 }, { "epoch": 1.726821506255383, "grad_norm": 0.5014464503333944, "learning_rate": 2.3576642335766424e-05, "loss": 0.4563, "step": 59145 }, { "epoch": 1.7269674895257003, "grad_norm": 0.4975507110989036, "learning_rate": 2.3573938902406058e-05, "loss": 0.453, "step": 59150 }, { "epoch": 1.7271134727960176, "grad_norm": 0.46048115011231233, "learning_rate": 2.357123546904569e-05, "loss": 0.4338, "step": 59155 }, { "epoch": 1.7272594560663348, "grad_norm": 0.4993186868943449, "learning_rate": 2.3568532035685323e-05, "loss": 0.4293, "step": 59160 }, { "epoch": 1.727405439336652, "grad_norm": 0.45790795543680934, "learning_rate": 2.3565828602324953e-05, "loss": 0.4404, "step": 59165 }, { "epoch": 1.7275514226069693, "grad_norm": 0.4785996547426396, "learning_rate": 2.3563125168964587e-05, "loss": 0.4442, "step": 59170 }, { "epoch": 1.7276974058772865, "grad_norm": 0.4538844728761172, "learning_rate": 2.3560421735604218e-05, "loss": 0.4473, "step": 59175 }, { "epoch": 1.7278433891476037, "grad_norm": 0.44357849299378727, "learning_rate": 2.3557718302243852e-05, "loss": 0.4445, "step": 59180 }, { "epoch": 1.727989372417921, "grad_norm": 0.479912587599391, "learning_rate": 2.3555014868883482e-05, "loss": 0.4579, "step": 59185 }, { "epoch": 1.7281353556882382, "grad_norm": 0.4695596517331735, "learning_rate": 2.3552311435523116e-05, "loss": 0.4471, "step": 59190 }, { "epoch": 1.7282813389585554, "grad_norm": 0.496819717259692, "learning_rate": 2.3549608002162747e-05, "loss": 0.4386, "step": 59195 }, { "epoch": 1.7284273222288724, "grad_norm": 0.4685407747171291, "learning_rate": 2.354690456880238e-05, "loss": 0.4341, "step": 59200 }, { "epoch": 1.7285733054991899, "grad_norm": 0.5043069441298614, "learning_rate": 2.354420113544201e-05, "loss": 0.4501, "step": 59205 }, { "epoch": 1.7287192887695069, "grad_norm": 0.443788466957687, "learning_rate": 2.3541497702081646e-05, "loss": 0.4274, "step": 59210 }, { "epoch": 1.7288652720398243, "grad_norm": 0.45720923577758343, "learning_rate": 2.3538794268721276e-05, "loss": 0.4173, "step": 59215 }, { "epoch": 1.7290112553101413, "grad_norm": 0.47222581782262074, "learning_rate": 2.353609083536091e-05, "loss": 0.4364, "step": 59220 }, { "epoch": 1.7291572385804588, "grad_norm": 0.49107471379654316, "learning_rate": 2.353338740200054e-05, "loss": 0.4336, "step": 59225 }, { "epoch": 1.7293032218507758, "grad_norm": 0.48401729616067396, "learning_rate": 2.3530683968640175e-05, "loss": 0.43, "step": 59230 }, { "epoch": 1.7294492051210932, "grad_norm": 0.5096678539982439, "learning_rate": 2.3527980535279806e-05, "loss": 0.4595, "step": 59235 }, { "epoch": 1.7295951883914102, "grad_norm": 0.4525652463728195, "learning_rate": 2.352527710191944e-05, "loss": 0.4222, "step": 59240 }, { "epoch": 1.7297411716617277, "grad_norm": 0.5000944524479904, "learning_rate": 2.3522573668559074e-05, "loss": 0.4402, "step": 59245 }, { "epoch": 1.7298871549320447, "grad_norm": 0.487380466475998, "learning_rate": 2.3519870235198704e-05, "loss": 0.4397, "step": 59250 }, { "epoch": 1.7300331382023622, "grad_norm": 0.4621037469916854, "learning_rate": 2.3517166801838335e-05, "loss": 0.4211, "step": 59255 }, { "epoch": 1.7301791214726792, "grad_norm": 0.4600637873336356, "learning_rate": 2.351446336847797e-05, "loss": 0.3954, "step": 59260 }, { "epoch": 1.7303251047429966, "grad_norm": 0.45567145844498, "learning_rate": 2.35117599351176e-05, "loss": 0.4258, "step": 59265 }, { "epoch": 1.7304710880133136, "grad_norm": 0.47282425686387414, "learning_rate": 2.350905650175723e-05, "loss": 0.4399, "step": 59270 }, { "epoch": 1.7306170712836308, "grad_norm": 0.48797130008033085, "learning_rate": 2.3506353068396868e-05, "loss": 0.4483, "step": 59275 }, { "epoch": 1.730763054553948, "grad_norm": 0.503505406896274, "learning_rate": 2.3503649635036498e-05, "loss": 0.462, "step": 59280 }, { "epoch": 1.7309090378242653, "grad_norm": 0.45895975632463465, "learning_rate": 2.350094620167613e-05, "loss": 0.4746, "step": 59285 }, { "epoch": 1.7310550210945825, "grad_norm": 0.43823953926389114, "learning_rate": 2.3498242768315763e-05, "loss": 0.4179, "step": 59290 }, { "epoch": 1.7312010043648998, "grad_norm": 0.4653231698881014, "learning_rate": 2.3495539334955393e-05, "loss": 0.4385, "step": 59295 }, { "epoch": 1.731346987635217, "grad_norm": 0.473405495901032, "learning_rate": 2.3492835901595024e-05, "loss": 0.4275, "step": 59300 }, { "epoch": 1.7314929709055342, "grad_norm": 0.5139365040847095, "learning_rate": 2.349013246823466e-05, "loss": 0.4511, "step": 59305 }, { "epoch": 1.7316389541758515, "grad_norm": 0.4629533664413975, "learning_rate": 2.3487429034874292e-05, "loss": 0.4379, "step": 59310 }, { "epoch": 1.7317849374461687, "grad_norm": 0.4710089092143705, "learning_rate": 2.3484725601513923e-05, "loss": 0.4427, "step": 59315 }, { "epoch": 1.731930920716486, "grad_norm": 0.4447763469243903, "learning_rate": 2.3482022168153557e-05, "loss": 0.4128, "step": 59320 }, { "epoch": 1.7320769039868031, "grad_norm": 0.43795976167659323, "learning_rate": 2.3479318734793187e-05, "loss": 0.4465, "step": 59325 }, { "epoch": 1.7322228872571204, "grad_norm": 0.47096737263823274, "learning_rate": 2.347661530143282e-05, "loss": 0.4377, "step": 59330 }, { "epoch": 1.7323688705274376, "grad_norm": 0.45144387512555434, "learning_rate": 2.3473911868072455e-05, "loss": 0.444, "step": 59335 }, { "epoch": 1.7325148537977548, "grad_norm": 0.47422620967766166, "learning_rate": 2.3471208434712086e-05, "loss": 0.4121, "step": 59340 }, { "epoch": 1.7326608370680718, "grad_norm": 0.4480573116864509, "learning_rate": 2.3468505001351717e-05, "loss": 0.4442, "step": 59345 }, { "epoch": 1.7328068203383893, "grad_norm": 0.4781079953687695, "learning_rate": 2.346580156799135e-05, "loss": 0.4303, "step": 59350 }, { "epoch": 1.7329528036087063, "grad_norm": 0.4969316747645235, "learning_rate": 2.346309813463098e-05, "loss": 0.4445, "step": 59355 }, { "epoch": 1.7330987868790237, "grad_norm": 0.4425005809127839, "learning_rate": 2.3460394701270615e-05, "loss": 0.4203, "step": 59360 }, { "epoch": 1.7332447701493408, "grad_norm": 0.42567686080278777, "learning_rate": 2.345769126791025e-05, "loss": 0.4451, "step": 59365 }, { "epoch": 1.7333907534196582, "grad_norm": 0.4964900987949272, "learning_rate": 2.345498783454988e-05, "loss": 0.4148, "step": 59370 }, { "epoch": 1.7335367366899752, "grad_norm": 0.4623939039403833, "learning_rate": 2.345228440118951e-05, "loss": 0.4436, "step": 59375 }, { "epoch": 1.7336827199602927, "grad_norm": 0.42404148624200266, "learning_rate": 2.3449580967829144e-05, "loss": 0.3963, "step": 59380 }, { "epoch": 1.7338287032306097, "grad_norm": 0.4479211343601255, "learning_rate": 2.3446877534468775e-05, "loss": 0.4765, "step": 59385 }, { "epoch": 1.7339746865009271, "grad_norm": 0.4618620517137619, "learning_rate": 2.344417410110841e-05, "loss": 0.4888, "step": 59390 }, { "epoch": 1.7341206697712441, "grad_norm": 0.4479672082077732, "learning_rate": 2.3441470667748043e-05, "loss": 0.4364, "step": 59395 }, { "epoch": 1.7342666530415616, "grad_norm": 0.5000509568497191, "learning_rate": 2.3438767234387674e-05, "loss": 0.4438, "step": 59400 }, { "epoch": 1.7344126363118786, "grad_norm": 0.48108329810679656, "learning_rate": 2.3436063801027304e-05, "loss": 0.4406, "step": 59405 }, { "epoch": 1.734558619582196, "grad_norm": 0.4361033726548839, "learning_rate": 2.343336036766694e-05, "loss": 0.4538, "step": 59410 }, { "epoch": 1.734704602852513, "grad_norm": 0.5112405643060869, "learning_rate": 2.3430656934306572e-05, "loss": 0.4495, "step": 59415 }, { "epoch": 1.7348505861228303, "grad_norm": 0.4584078644277668, "learning_rate": 2.3427953500946203e-05, "loss": 0.4206, "step": 59420 }, { "epoch": 1.7349965693931475, "grad_norm": 0.44920395994400775, "learning_rate": 2.3425250067585837e-05, "loss": 0.4351, "step": 59425 }, { "epoch": 1.7351425526634647, "grad_norm": 0.46725051734975676, "learning_rate": 2.3422546634225468e-05, "loss": 0.4375, "step": 59430 }, { "epoch": 1.735288535933782, "grad_norm": 0.5064774927267365, "learning_rate": 2.3419843200865098e-05, "loss": 0.4582, "step": 59435 }, { "epoch": 1.7354345192040992, "grad_norm": 0.45586570565381196, "learning_rate": 2.3417139767504732e-05, "loss": 0.4279, "step": 59440 }, { "epoch": 1.7355805024744164, "grad_norm": 0.43075125461544955, "learning_rate": 2.3414436334144366e-05, "loss": 0.4501, "step": 59445 }, { "epoch": 1.7357264857447336, "grad_norm": 0.50952944390664, "learning_rate": 2.3411732900783997e-05, "loss": 0.4565, "step": 59450 }, { "epoch": 1.7358724690150509, "grad_norm": 0.47898495573657496, "learning_rate": 2.340902946742363e-05, "loss": 0.478, "step": 59455 }, { "epoch": 1.736018452285368, "grad_norm": 0.523301260058934, "learning_rate": 2.340632603406326e-05, "loss": 0.4463, "step": 59460 }, { "epoch": 1.7361644355556853, "grad_norm": 0.5322731835538635, "learning_rate": 2.3403622600702892e-05, "loss": 0.4642, "step": 59465 }, { "epoch": 1.7363104188260026, "grad_norm": 0.5287860028515432, "learning_rate": 2.3400919167342526e-05, "loss": 0.4544, "step": 59470 }, { "epoch": 1.7364564020963198, "grad_norm": 0.4765648038810929, "learning_rate": 2.339821573398216e-05, "loss": 0.4554, "step": 59475 }, { "epoch": 1.736602385366637, "grad_norm": 0.44617803130898703, "learning_rate": 2.339551230062179e-05, "loss": 0.4226, "step": 59480 }, { "epoch": 1.7367483686369543, "grad_norm": 0.515242538429381, "learning_rate": 2.3392808867261425e-05, "loss": 0.4399, "step": 59485 }, { "epoch": 1.7368943519072713, "grad_norm": 0.46177340347786405, "learning_rate": 2.3390105433901055e-05, "loss": 0.4346, "step": 59490 }, { "epoch": 1.7370403351775887, "grad_norm": 0.4441693174492394, "learning_rate": 2.3387402000540686e-05, "loss": 0.4234, "step": 59495 }, { "epoch": 1.7371863184479057, "grad_norm": 0.49344722118433965, "learning_rate": 2.338469856718032e-05, "loss": 0.4513, "step": 59500 }, { "epoch": 1.7373323017182232, "grad_norm": 0.5112159655029942, "learning_rate": 2.3381995133819954e-05, "loss": 0.4418, "step": 59505 }, { "epoch": 1.7374782849885402, "grad_norm": 0.4542343872497552, "learning_rate": 2.3379291700459585e-05, "loss": 0.4357, "step": 59510 }, { "epoch": 1.7376242682588576, "grad_norm": 0.4958169495171898, "learning_rate": 2.3376588267099215e-05, "loss": 0.4435, "step": 59515 }, { "epoch": 1.7377702515291746, "grad_norm": 0.45428108366405306, "learning_rate": 2.337388483373885e-05, "loss": 0.4424, "step": 59520 }, { "epoch": 1.737916234799492, "grad_norm": 0.4485193660437179, "learning_rate": 2.337118140037848e-05, "loss": 0.4263, "step": 59525 }, { "epoch": 1.738062218069809, "grad_norm": 0.5023035178387749, "learning_rate": 2.3368477967018114e-05, "loss": 0.4628, "step": 59530 }, { "epoch": 1.7382082013401265, "grad_norm": 0.49303902740551225, "learning_rate": 2.3365774533657748e-05, "loss": 0.462, "step": 59535 }, { "epoch": 1.7383541846104436, "grad_norm": 0.4785425989092736, "learning_rate": 2.336307110029738e-05, "loss": 0.4197, "step": 59540 }, { "epoch": 1.738500167880761, "grad_norm": 0.48983680896145887, "learning_rate": 2.336036766693701e-05, "loss": 0.4589, "step": 59545 }, { "epoch": 1.738646151151078, "grad_norm": 0.45840623731432867, "learning_rate": 2.3357664233576643e-05, "loss": 0.4468, "step": 59550 }, { "epoch": 1.7387921344213955, "grad_norm": 0.4898348519869097, "learning_rate": 2.3354960800216274e-05, "loss": 0.4451, "step": 59555 }, { "epoch": 1.7389381176917125, "grad_norm": 0.4467478257413099, "learning_rate": 2.3352257366855908e-05, "loss": 0.4231, "step": 59560 }, { "epoch": 1.7390841009620297, "grad_norm": 0.46193934313200186, "learning_rate": 2.3349553933495542e-05, "loss": 0.4058, "step": 59565 }, { "epoch": 1.739230084232347, "grad_norm": 0.5512124135905796, "learning_rate": 2.3346850500135173e-05, "loss": 0.4492, "step": 59570 }, { "epoch": 1.7393760675026642, "grad_norm": 0.45370779331355654, "learning_rate": 2.3344147066774803e-05, "loss": 0.4358, "step": 59575 }, { "epoch": 1.7395220507729814, "grad_norm": 0.4978456999354111, "learning_rate": 2.3341443633414437e-05, "loss": 0.4582, "step": 59580 }, { "epoch": 1.7396680340432986, "grad_norm": 0.48456888585976565, "learning_rate": 2.333874020005407e-05, "loss": 0.4428, "step": 59585 }, { "epoch": 1.7398140173136158, "grad_norm": 0.43044280228416715, "learning_rate": 2.3336036766693702e-05, "loss": 0.4554, "step": 59590 }, { "epoch": 1.739960000583933, "grad_norm": 0.4462222982969771, "learning_rate": 2.3333333333333336e-05, "loss": 0.4554, "step": 59595 }, { "epoch": 1.7401059838542503, "grad_norm": 0.49540548330554635, "learning_rate": 2.3330629899972966e-05, "loss": 0.4652, "step": 59600 }, { "epoch": 1.7402519671245675, "grad_norm": 0.4640881152011339, "learning_rate": 2.3327926466612597e-05, "loss": 0.4522, "step": 59605 }, { "epoch": 1.7403979503948848, "grad_norm": 0.47819632768202264, "learning_rate": 2.332522303325223e-05, "loss": 0.4408, "step": 59610 }, { "epoch": 1.740543933665202, "grad_norm": 0.5135660322541004, "learning_rate": 2.3322519599891865e-05, "loss": 0.4398, "step": 59615 }, { "epoch": 1.7406899169355192, "grad_norm": 0.4861542235124362, "learning_rate": 2.3319816166531496e-05, "loss": 0.431, "step": 59620 }, { "epoch": 1.7408359002058365, "grad_norm": 0.5328556586216099, "learning_rate": 2.331711273317113e-05, "loss": 0.4421, "step": 59625 }, { "epoch": 1.7409818834761537, "grad_norm": 0.47520208034675365, "learning_rate": 2.331440929981076e-05, "loss": 0.4123, "step": 59630 }, { "epoch": 1.741127866746471, "grad_norm": 0.4650017242685451, "learning_rate": 2.331170586645039e-05, "loss": 0.4353, "step": 59635 }, { "epoch": 1.7412738500167881, "grad_norm": 0.49766610073688794, "learning_rate": 2.3309002433090025e-05, "loss": 0.4555, "step": 59640 }, { "epoch": 1.7414198332871051, "grad_norm": 0.5138416674710159, "learning_rate": 2.330629899972966e-05, "loss": 0.4708, "step": 59645 }, { "epoch": 1.7415658165574226, "grad_norm": 0.4826836778083326, "learning_rate": 2.330359556636929e-05, "loss": 0.4334, "step": 59650 }, { "epoch": 1.7417117998277396, "grad_norm": 0.4359715382445705, "learning_rate": 2.3300892133008924e-05, "loss": 0.4484, "step": 59655 }, { "epoch": 1.741857783098057, "grad_norm": 0.48344929141492404, "learning_rate": 2.3298188699648554e-05, "loss": 0.4408, "step": 59660 }, { "epoch": 1.742003766368374, "grad_norm": 0.46951982643559914, "learning_rate": 2.3295485266288185e-05, "loss": 0.4505, "step": 59665 }, { "epoch": 1.7421497496386915, "grad_norm": 0.48518308751147227, "learning_rate": 2.3292781832927822e-05, "loss": 0.4245, "step": 59670 }, { "epoch": 1.7422957329090085, "grad_norm": 0.46411850323316944, "learning_rate": 2.3290078399567453e-05, "loss": 0.4478, "step": 59675 }, { "epoch": 1.742441716179326, "grad_norm": 0.48858733879742977, "learning_rate": 2.3287374966207084e-05, "loss": 0.4393, "step": 59680 }, { "epoch": 1.742587699449643, "grad_norm": 0.48548657489214764, "learning_rate": 2.3284671532846718e-05, "loss": 0.4718, "step": 59685 }, { "epoch": 1.7427336827199604, "grad_norm": 0.5009648829561767, "learning_rate": 2.3281968099486348e-05, "loss": 0.4301, "step": 59690 }, { "epoch": 1.7428796659902774, "grad_norm": 0.4640694124788908, "learning_rate": 2.327926466612598e-05, "loss": 0.4402, "step": 59695 }, { "epoch": 1.743025649260595, "grad_norm": 0.4731591220066795, "learning_rate": 2.3276561232765616e-05, "loss": 0.4199, "step": 59700 }, { "epoch": 1.743171632530912, "grad_norm": 0.4676217152139334, "learning_rate": 2.3273857799405247e-05, "loss": 0.4451, "step": 59705 }, { "epoch": 1.7433176158012291, "grad_norm": 0.4768672422333322, "learning_rate": 2.3271154366044877e-05, "loss": 0.4596, "step": 59710 }, { "epoch": 1.7434635990715464, "grad_norm": 0.4662607849884085, "learning_rate": 2.326845093268451e-05, "loss": 0.4525, "step": 59715 }, { "epoch": 1.7436095823418636, "grad_norm": 0.5367743433039541, "learning_rate": 2.3265747499324142e-05, "loss": 0.4345, "step": 59720 }, { "epoch": 1.7437555656121808, "grad_norm": 0.48959977138900695, "learning_rate": 2.3263044065963773e-05, "loss": 0.4217, "step": 59725 }, { "epoch": 1.743901548882498, "grad_norm": 0.48126970254654133, "learning_rate": 2.326034063260341e-05, "loss": 0.448, "step": 59730 }, { "epoch": 1.7440475321528153, "grad_norm": 0.4587697973440071, "learning_rate": 2.325763719924304e-05, "loss": 0.4491, "step": 59735 }, { "epoch": 1.7441935154231325, "grad_norm": 0.4888812662253052, "learning_rate": 2.325493376588267e-05, "loss": 0.4655, "step": 59740 }, { "epoch": 1.7443394986934497, "grad_norm": 0.4754717002582362, "learning_rate": 2.3252230332522305e-05, "loss": 0.444, "step": 59745 }, { "epoch": 1.744485481963767, "grad_norm": 0.4495795978134223, "learning_rate": 2.3249526899161936e-05, "loss": 0.4565, "step": 59750 }, { "epoch": 1.7446314652340842, "grad_norm": 0.4990034062639064, "learning_rate": 2.324682346580157e-05, "loss": 0.4527, "step": 59755 }, { "epoch": 1.7447774485044014, "grad_norm": 0.49188990935672755, "learning_rate": 2.3244120032441204e-05, "loss": 0.4588, "step": 59760 }, { "epoch": 1.7449234317747186, "grad_norm": 0.4235902591006984, "learning_rate": 2.3241416599080835e-05, "loss": 0.4069, "step": 59765 }, { "epoch": 1.7450694150450359, "grad_norm": 0.46499746173739914, "learning_rate": 2.3238713165720465e-05, "loss": 0.4261, "step": 59770 }, { "epoch": 1.745215398315353, "grad_norm": 0.45863527961952144, "learning_rate": 2.32360097323601e-05, "loss": 0.4292, "step": 59775 }, { "epoch": 1.7453613815856703, "grad_norm": 0.47790069915886946, "learning_rate": 2.323330629899973e-05, "loss": 0.4116, "step": 59780 }, { "epoch": 1.7455073648559876, "grad_norm": 0.47416615811498847, "learning_rate": 2.3230602865639364e-05, "loss": 0.4386, "step": 59785 }, { "epoch": 1.7456533481263046, "grad_norm": 0.4456670020077189, "learning_rate": 2.3227899432278994e-05, "loss": 0.4241, "step": 59790 }, { "epoch": 1.745799331396622, "grad_norm": 0.48351525756963776, "learning_rate": 2.322519599891863e-05, "loss": 0.4373, "step": 59795 }, { "epoch": 1.745945314666939, "grad_norm": 0.4889814088302132, "learning_rate": 2.322249256555826e-05, "loss": 0.4194, "step": 59800 }, { "epoch": 1.7460912979372565, "grad_norm": 0.4799252959276066, "learning_rate": 2.3219789132197893e-05, "loss": 0.449, "step": 59805 }, { "epoch": 1.7462372812075735, "grad_norm": 0.5092466815021348, "learning_rate": 2.3217085698837524e-05, "loss": 0.4433, "step": 59810 }, { "epoch": 1.746383264477891, "grad_norm": 0.5158591459288633, "learning_rate": 2.3214382265477158e-05, "loss": 0.4525, "step": 59815 }, { "epoch": 1.746529247748208, "grad_norm": 0.4965769035076882, "learning_rate": 2.321167883211679e-05, "loss": 0.4377, "step": 59820 }, { "epoch": 1.7466752310185254, "grad_norm": 0.42586772688202096, "learning_rate": 2.3208975398756422e-05, "loss": 0.4305, "step": 59825 }, { "epoch": 1.7468212142888424, "grad_norm": 0.4995940855342533, "learning_rate": 2.3206271965396053e-05, "loss": 0.4172, "step": 59830 }, { "epoch": 1.7469671975591599, "grad_norm": 0.43045772620419687, "learning_rate": 2.3203568532035687e-05, "loss": 0.4145, "step": 59835 }, { "epoch": 1.7471131808294769, "grad_norm": 0.49578997207540315, "learning_rate": 2.320086509867532e-05, "loss": 0.4716, "step": 59840 }, { "epoch": 1.7472591640997943, "grad_norm": 0.4577022321927191, "learning_rate": 2.319816166531495e-05, "loss": 0.4446, "step": 59845 }, { "epoch": 1.7474051473701113, "grad_norm": 0.46286415553610916, "learning_rate": 2.3195458231954582e-05, "loss": 0.4093, "step": 59850 }, { "epoch": 1.7475511306404286, "grad_norm": 0.45988494600861984, "learning_rate": 2.3192754798594216e-05, "loss": 0.4283, "step": 59855 }, { "epoch": 1.7476971139107458, "grad_norm": 0.49536478056950684, "learning_rate": 2.3190051365233847e-05, "loss": 0.4167, "step": 59860 }, { "epoch": 1.747843097181063, "grad_norm": 0.5099697470040662, "learning_rate": 2.318734793187348e-05, "loss": 0.4307, "step": 59865 }, { "epoch": 1.7479890804513802, "grad_norm": 0.4658109818738108, "learning_rate": 2.3184644498513115e-05, "loss": 0.4465, "step": 59870 }, { "epoch": 1.7481350637216975, "grad_norm": 0.4682970635689444, "learning_rate": 2.3181941065152746e-05, "loss": 0.4339, "step": 59875 }, { "epoch": 1.7482810469920147, "grad_norm": 0.4652069393594191, "learning_rate": 2.3179237631792376e-05, "loss": 0.4199, "step": 59880 }, { "epoch": 1.748427030262332, "grad_norm": 0.492685730588585, "learning_rate": 2.317653419843201e-05, "loss": 0.4427, "step": 59885 }, { "epoch": 1.7485730135326492, "grad_norm": 0.43357535515222434, "learning_rate": 2.317383076507164e-05, "loss": 0.4218, "step": 59890 }, { "epoch": 1.7487189968029664, "grad_norm": 0.46598019312833605, "learning_rate": 2.317112733171127e-05, "loss": 0.4499, "step": 59895 }, { "epoch": 1.7488649800732836, "grad_norm": 0.5024076454807366, "learning_rate": 2.316842389835091e-05, "loss": 0.4577, "step": 59900 }, { "epoch": 1.7490109633436008, "grad_norm": 0.48806321187294804, "learning_rate": 2.316572046499054e-05, "loss": 0.4636, "step": 59905 }, { "epoch": 1.749156946613918, "grad_norm": 0.545717729731797, "learning_rate": 2.316301703163017e-05, "loss": 0.4526, "step": 59910 }, { "epoch": 1.7493029298842353, "grad_norm": 0.4450904924048401, "learning_rate": 2.3160313598269804e-05, "loss": 0.428, "step": 59915 }, { "epoch": 1.7494489131545525, "grad_norm": 0.485036579337452, "learning_rate": 2.3157610164909435e-05, "loss": 0.4219, "step": 59920 }, { "epoch": 1.7495948964248698, "grad_norm": 0.49231320413100155, "learning_rate": 2.315490673154907e-05, "loss": 0.4359, "step": 59925 }, { "epoch": 1.749740879695187, "grad_norm": 0.43197412524097867, "learning_rate": 2.3152203298188703e-05, "loss": 0.4263, "step": 59930 }, { "epoch": 1.749886862965504, "grad_norm": 0.4535558665958972, "learning_rate": 2.3149499864828333e-05, "loss": 0.4194, "step": 59935 }, { "epoch": 1.7500328462358214, "grad_norm": 0.47515939890572834, "learning_rate": 2.3146796431467964e-05, "loss": 0.4689, "step": 59940 }, { "epoch": 1.7501788295061385, "grad_norm": 0.4506311320924466, "learning_rate": 2.3144092998107598e-05, "loss": 0.4403, "step": 59945 }, { "epoch": 1.750324812776456, "grad_norm": 0.45090376344448824, "learning_rate": 2.314138956474723e-05, "loss": 0.4339, "step": 59950 }, { "epoch": 1.750470796046773, "grad_norm": 0.4777225945556648, "learning_rate": 2.3138686131386863e-05, "loss": 0.429, "step": 59955 }, { "epoch": 1.7506167793170904, "grad_norm": 0.5024865873357349, "learning_rate": 2.3135982698026497e-05, "loss": 0.4581, "step": 59960 }, { "epoch": 1.7507627625874074, "grad_norm": 0.45908417352328384, "learning_rate": 2.3133279264666127e-05, "loss": 0.4334, "step": 59965 }, { "epoch": 1.7509087458577248, "grad_norm": 0.4507728584049989, "learning_rate": 2.3130575831305758e-05, "loss": 0.457, "step": 59970 }, { "epoch": 1.7510547291280418, "grad_norm": 0.4843648759274744, "learning_rate": 2.3127872397945392e-05, "loss": 0.4397, "step": 59975 }, { "epoch": 1.7512007123983593, "grad_norm": 0.5217798903307169, "learning_rate": 2.3125168964585023e-05, "loss": 0.4593, "step": 59980 }, { "epoch": 1.7513466956686763, "grad_norm": 0.49671949115953823, "learning_rate": 2.3122465531224657e-05, "loss": 0.424, "step": 59985 }, { "epoch": 1.7514926789389937, "grad_norm": 0.453959991311483, "learning_rate": 2.311976209786429e-05, "loss": 0.4692, "step": 59990 }, { "epoch": 1.7516386622093107, "grad_norm": 0.4759110367912787, "learning_rate": 2.311705866450392e-05, "loss": 0.4258, "step": 59995 }, { "epoch": 1.7517846454796282, "grad_norm": 0.45050141962220086, "learning_rate": 2.3114355231143552e-05, "loss": 0.4333, "step": 60000 }, { "epoch": 1.7519306287499452, "grad_norm": 0.4305081532646169, "learning_rate": 2.3111651797783186e-05, "loss": 0.397, "step": 60005 }, { "epoch": 1.7520766120202624, "grad_norm": 0.5008681166130875, "learning_rate": 2.310894836442282e-05, "loss": 0.4457, "step": 60010 }, { "epoch": 1.7522225952905797, "grad_norm": 0.49041846547743007, "learning_rate": 2.310624493106245e-05, "loss": 0.4238, "step": 60015 }, { "epoch": 1.752368578560897, "grad_norm": 0.48187237087512724, "learning_rate": 2.3103541497702084e-05, "loss": 0.43, "step": 60020 }, { "epoch": 1.7525145618312141, "grad_norm": 0.4894790819602646, "learning_rate": 2.3100838064341715e-05, "loss": 0.4441, "step": 60025 }, { "epoch": 1.7526605451015314, "grad_norm": 0.5135045693637442, "learning_rate": 2.3098134630981346e-05, "loss": 0.4621, "step": 60030 }, { "epoch": 1.7528065283718486, "grad_norm": 0.48912442275987117, "learning_rate": 2.309543119762098e-05, "loss": 0.4792, "step": 60035 }, { "epoch": 1.7529525116421658, "grad_norm": 0.522554672493372, "learning_rate": 2.3092727764260614e-05, "loss": 0.4458, "step": 60040 }, { "epoch": 1.753098494912483, "grad_norm": 0.4800704762579457, "learning_rate": 2.3090024330900244e-05, "loss": 0.433, "step": 60045 }, { "epoch": 1.7532444781828003, "grad_norm": 0.4550974613570047, "learning_rate": 2.308732089753988e-05, "loss": 0.419, "step": 60050 }, { "epoch": 1.7533904614531175, "grad_norm": 0.49220691725317467, "learning_rate": 2.308461746417951e-05, "loss": 0.4513, "step": 60055 }, { "epoch": 1.7535364447234347, "grad_norm": 0.5099431250778907, "learning_rate": 2.308191403081914e-05, "loss": 0.4509, "step": 60060 }, { "epoch": 1.753682427993752, "grad_norm": 0.4277865467145995, "learning_rate": 2.3079210597458774e-05, "loss": 0.4292, "step": 60065 }, { "epoch": 1.7538284112640692, "grad_norm": 0.49731987534678374, "learning_rate": 2.3076507164098408e-05, "loss": 0.4432, "step": 60070 }, { "epoch": 1.7539743945343864, "grad_norm": 0.45042094217001166, "learning_rate": 2.3073803730738038e-05, "loss": 0.4213, "step": 60075 }, { "epoch": 1.7541203778047034, "grad_norm": 0.4924514729061652, "learning_rate": 2.3071100297377672e-05, "loss": 0.4409, "step": 60080 }, { "epoch": 1.7542663610750209, "grad_norm": 0.4922740479476866, "learning_rate": 2.3068396864017303e-05, "loss": 0.4347, "step": 60085 }, { "epoch": 1.7544123443453379, "grad_norm": 0.5147840778591788, "learning_rate": 2.3065693430656934e-05, "loss": 0.4335, "step": 60090 }, { "epoch": 1.7545583276156553, "grad_norm": 0.45287963083017074, "learning_rate": 2.3062989997296568e-05, "loss": 0.4287, "step": 60095 }, { "epoch": 1.7547043108859723, "grad_norm": 0.4870874141754653, "learning_rate": 2.30602865639362e-05, "loss": 0.42, "step": 60100 }, { "epoch": 1.7548502941562898, "grad_norm": 0.42964763568852504, "learning_rate": 2.3057583130575832e-05, "loss": 0.4009, "step": 60105 }, { "epoch": 1.7549962774266068, "grad_norm": 0.4229156296513743, "learning_rate": 2.3054879697215466e-05, "loss": 0.4315, "step": 60110 }, { "epoch": 1.7551422606969243, "grad_norm": 0.485913702581892, "learning_rate": 2.3052176263855097e-05, "loss": 0.4654, "step": 60115 }, { "epoch": 1.7552882439672413, "grad_norm": 0.43502678649015325, "learning_rate": 2.3049472830494727e-05, "loss": 0.4281, "step": 60120 }, { "epoch": 1.7554342272375587, "grad_norm": 0.47209087481003364, "learning_rate": 2.304676939713436e-05, "loss": 0.4278, "step": 60125 }, { "epoch": 1.7555802105078757, "grad_norm": 0.5191420799812076, "learning_rate": 2.3044065963773995e-05, "loss": 0.4642, "step": 60130 }, { "epoch": 1.7557261937781932, "grad_norm": 0.4465120110212485, "learning_rate": 2.3041362530413626e-05, "loss": 0.4541, "step": 60135 }, { "epoch": 1.7558721770485102, "grad_norm": 0.4366513244909253, "learning_rate": 2.303865909705326e-05, "loss": 0.4464, "step": 60140 }, { "epoch": 1.7560181603188276, "grad_norm": 0.47026994975045955, "learning_rate": 2.303595566369289e-05, "loss": 0.4584, "step": 60145 }, { "epoch": 1.7561641435891446, "grad_norm": 0.4703394203551629, "learning_rate": 2.303325223033252e-05, "loss": 0.4365, "step": 60150 }, { "epoch": 1.7563101268594619, "grad_norm": 0.5015946559424928, "learning_rate": 2.3030548796972155e-05, "loss": 0.4432, "step": 60155 }, { "epoch": 1.756456110129779, "grad_norm": 0.5212857917904994, "learning_rate": 2.302784536361179e-05, "loss": 0.4417, "step": 60160 }, { "epoch": 1.7566020934000963, "grad_norm": 0.4913614227318778, "learning_rate": 2.302514193025142e-05, "loss": 0.4384, "step": 60165 }, { "epoch": 1.7567480766704135, "grad_norm": 0.45664370313474306, "learning_rate": 2.302243849689105e-05, "loss": 0.4259, "step": 60170 }, { "epoch": 1.7568940599407308, "grad_norm": 0.4624229081185574, "learning_rate": 2.3019735063530685e-05, "loss": 0.4118, "step": 60175 }, { "epoch": 1.757040043211048, "grad_norm": 0.45770692690591835, "learning_rate": 2.301703163017032e-05, "loss": 0.4108, "step": 60180 }, { "epoch": 1.7571860264813652, "grad_norm": 0.5138493961201783, "learning_rate": 2.301432819680995e-05, "loss": 0.4519, "step": 60185 }, { "epoch": 1.7573320097516825, "grad_norm": 0.4315718550071392, "learning_rate": 2.3011624763449583e-05, "loss": 0.4224, "step": 60190 }, { "epoch": 1.7574779930219997, "grad_norm": 0.49261482655605365, "learning_rate": 2.3008921330089214e-05, "loss": 0.4435, "step": 60195 }, { "epoch": 1.757623976292317, "grad_norm": 0.49173678152076983, "learning_rate": 2.3006217896728845e-05, "loss": 0.4477, "step": 60200 }, { "epoch": 1.7577699595626342, "grad_norm": 0.4615695831905919, "learning_rate": 2.300351446336848e-05, "loss": 0.4503, "step": 60205 }, { "epoch": 1.7579159428329514, "grad_norm": 0.48453627075625433, "learning_rate": 2.3000811030008113e-05, "loss": 0.4648, "step": 60210 }, { "epoch": 1.7580619261032686, "grad_norm": 0.45855194292850854, "learning_rate": 2.2998107596647743e-05, "loss": 0.4544, "step": 60215 }, { "epoch": 1.7582079093735858, "grad_norm": 0.48364853633270016, "learning_rate": 2.2995404163287377e-05, "loss": 0.4379, "step": 60220 }, { "epoch": 1.7583538926439028, "grad_norm": 0.5049273140471537, "learning_rate": 2.2992700729927008e-05, "loss": 0.4309, "step": 60225 }, { "epoch": 1.7584998759142203, "grad_norm": 0.42642832366000805, "learning_rate": 2.298999729656664e-05, "loss": 0.4359, "step": 60230 }, { "epoch": 1.7586458591845373, "grad_norm": 0.49587953170588245, "learning_rate": 2.2987293863206272e-05, "loss": 0.4344, "step": 60235 }, { "epoch": 1.7587918424548548, "grad_norm": 0.4988963649199969, "learning_rate": 2.2984590429845906e-05, "loss": 0.4455, "step": 60240 }, { "epoch": 1.7589378257251718, "grad_norm": 0.46976940488277413, "learning_rate": 2.2981886996485537e-05, "loss": 0.4335, "step": 60245 }, { "epoch": 1.7590838089954892, "grad_norm": 0.44879599453519015, "learning_rate": 2.297918356312517e-05, "loss": 0.4393, "step": 60250 }, { "epoch": 1.7592297922658062, "grad_norm": 0.47164991163811, "learning_rate": 2.29764801297648e-05, "loss": 0.4384, "step": 60255 }, { "epoch": 1.7593757755361237, "grad_norm": 0.441907673688133, "learning_rate": 2.2973776696404432e-05, "loss": 0.4256, "step": 60260 }, { "epoch": 1.7595217588064407, "grad_norm": 0.4945365314429553, "learning_rate": 2.297107326304407e-05, "loss": 0.4712, "step": 60265 }, { "epoch": 1.7596677420767581, "grad_norm": 0.4542736803575402, "learning_rate": 2.29683698296837e-05, "loss": 0.4061, "step": 60270 }, { "epoch": 1.7598137253470751, "grad_norm": 0.4678189485426474, "learning_rate": 2.296566639632333e-05, "loss": 0.445, "step": 60275 }, { "epoch": 1.7599597086173926, "grad_norm": 0.47560250318439784, "learning_rate": 2.2962962962962965e-05, "loss": 0.4258, "step": 60280 }, { "epoch": 1.7601056918877096, "grad_norm": 0.4539094739888601, "learning_rate": 2.2960259529602596e-05, "loss": 0.4528, "step": 60285 }, { "epoch": 1.760251675158027, "grad_norm": 0.49426275093771976, "learning_rate": 2.2957556096242226e-05, "loss": 0.4308, "step": 60290 }, { "epoch": 1.760397658428344, "grad_norm": 0.47063978096892645, "learning_rate": 2.2954852662881864e-05, "loss": 0.4347, "step": 60295 }, { "epoch": 1.7605436416986613, "grad_norm": 0.481908518896442, "learning_rate": 2.2952149229521494e-05, "loss": 0.4339, "step": 60300 }, { "epoch": 1.7606896249689785, "grad_norm": 0.4987462685601428, "learning_rate": 2.2949445796161125e-05, "loss": 0.4529, "step": 60305 }, { "epoch": 1.7608356082392957, "grad_norm": 0.5213944310956036, "learning_rate": 2.294674236280076e-05, "loss": 0.4511, "step": 60310 }, { "epoch": 1.760981591509613, "grad_norm": 0.48977157565374213, "learning_rate": 2.294403892944039e-05, "loss": 0.4512, "step": 60315 }, { "epoch": 1.7611275747799302, "grad_norm": 0.5018512844702948, "learning_rate": 2.294133549608002e-05, "loss": 0.427, "step": 60320 }, { "epoch": 1.7612735580502474, "grad_norm": 0.47776303790633196, "learning_rate": 2.2938632062719658e-05, "loss": 0.4611, "step": 60325 }, { "epoch": 1.7614195413205647, "grad_norm": 0.5530918593114083, "learning_rate": 2.2935928629359288e-05, "loss": 0.4745, "step": 60330 }, { "epoch": 1.761565524590882, "grad_norm": 0.46855718594352863, "learning_rate": 2.293322519599892e-05, "loss": 0.4108, "step": 60335 }, { "epoch": 1.7617115078611991, "grad_norm": 0.46338475697608555, "learning_rate": 2.2930521762638553e-05, "loss": 0.4718, "step": 60340 }, { "epoch": 1.7618574911315164, "grad_norm": 0.45799830084999926, "learning_rate": 2.2927818329278183e-05, "loss": 0.4335, "step": 60345 }, { "epoch": 1.7620034744018336, "grad_norm": 0.4800354131540089, "learning_rate": 2.2925114895917817e-05, "loss": 0.4137, "step": 60350 }, { "epoch": 1.7621494576721508, "grad_norm": 0.49925441746627575, "learning_rate": 2.292241146255745e-05, "loss": 0.4176, "step": 60355 }, { "epoch": 1.762295440942468, "grad_norm": 0.4997239364717553, "learning_rate": 2.2919708029197082e-05, "loss": 0.4557, "step": 60360 }, { "epoch": 1.7624414242127853, "grad_norm": 0.4950138126547336, "learning_rate": 2.2917004595836713e-05, "loss": 0.4375, "step": 60365 }, { "epoch": 1.7625874074831023, "grad_norm": 0.49583646214771276, "learning_rate": 2.2914301162476347e-05, "loss": 0.4493, "step": 60370 }, { "epoch": 1.7627333907534197, "grad_norm": 0.4971107281331704, "learning_rate": 2.2911597729115977e-05, "loss": 0.4494, "step": 60375 }, { "epoch": 1.7628793740237367, "grad_norm": 0.4575660539954119, "learning_rate": 2.290889429575561e-05, "loss": 0.4243, "step": 60380 }, { "epoch": 1.7630253572940542, "grad_norm": 0.44070252329840304, "learning_rate": 2.2906190862395245e-05, "loss": 0.4004, "step": 60385 }, { "epoch": 1.7631713405643712, "grad_norm": 0.48031170588424205, "learning_rate": 2.2903487429034876e-05, "loss": 0.4097, "step": 60390 }, { "epoch": 1.7633173238346886, "grad_norm": 0.450178735097004, "learning_rate": 2.2900783995674507e-05, "loss": 0.4207, "step": 60395 }, { "epoch": 1.7634633071050057, "grad_norm": 0.47690401629403417, "learning_rate": 2.289808056231414e-05, "loss": 0.4244, "step": 60400 }, { "epoch": 1.763609290375323, "grad_norm": 0.4768027667876226, "learning_rate": 2.289537712895377e-05, "loss": 0.4334, "step": 60405 }, { "epoch": 1.76375527364564, "grad_norm": 0.45619050841203995, "learning_rate": 2.2892673695593405e-05, "loss": 0.4341, "step": 60410 }, { "epoch": 1.7639012569159576, "grad_norm": 0.44435100351992896, "learning_rate": 2.2889970262233036e-05, "loss": 0.4337, "step": 60415 }, { "epoch": 1.7640472401862746, "grad_norm": 0.4855040900677122, "learning_rate": 2.288726682887267e-05, "loss": 0.4458, "step": 60420 }, { "epoch": 1.764193223456592, "grad_norm": 0.49046897243088344, "learning_rate": 2.28845633955123e-05, "loss": 0.4476, "step": 60425 }, { "epoch": 1.764339206726909, "grad_norm": 0.44920371933641057, "learning_rate": 2.2881859962151934e-05, "loss": 0.4253, "step": 60430 }, { "epoch": 1.7644851899972265, "grad_norm": 0.4797177306143298, "learning_rate": 2.287915652879157e-05, "loss": 0.4817, "step": 60435 }, { "epoch": 1.7646311732675435, "grad_norm": 0.4934024473700916, "learning_rate": 2.28764530954312e-05, "loss": 0.4457, "step": 60440 }, { "epoch": 1.7647771565378607, "grad_norm": 0.45519421019791767, "learning_rate": 2.287374966207083e-05, "loss": 0.4379, "step": 60445 }, { "epoch": 1.764923139808178, "grad_norm": 0.4592483161424408, "learning_rate": 2.2871046228710464e-05, "loss": 0.4324, "step": 60450 }, { "epoch": 1.7650691230784952, "grad_norm": 0.49118159178838006, "learning_rate": 2.2868342795350094e-05, "loss": 0.4443, "step": 60455 }, { "epoch": 1.7652151063488124, "grad_norm": 0.47216109292508945, "learning_rate": 2.286563936198973e-05, "loss": 0.4166, "step": 60460 }, { "epoch": 1.7653610896191296, "grad_norm": 0.5206657906905072, "learning_rate": 2.2862935928629362e-05, "loss": 0.4372, "step": 60465 }, { "epoch": 1.7655070728894469, "grad_norm": 0.4873123581854389, "learning_rate": 2.2860232495268993e-05, "loss": 0.4379, "step": 60470 }, { "epoch": 1.765653056159764, "grad_norm": 0.4831851622857345, "learning_rate": 2.2857529061908624e-05, "loss": 0.4432, "step": 60475 }, { "epoch": 1.7657990394300813, "grad_norm": 0.45186639576935594, "learning_rate": 2.2854825628548258e-05, "loss": 0.4214, "step": 60480 }, { "epoch": 1.7659450227003985, "grad_norm": 0.4852620246732485, "learning_rate": 2.2852122195187888e-05, "loss": 0.423, "step": 60485 }, { "epoch": 1.7660910059707158, "grad_norm": 0.4207150912369212, "learning_rate": 2.2849418761827522e-05, "loss": 0.4233, "step": 60490 }, { "epoch": 1.766236989241033, "grad_norm": 0.46474536919190146, "learning_rate": 2.2846715328467156e-05, "loss": 0.4255, "step": 60495 }, { "epoch": 1.7663829725113502, "grad_norm": 0.4842224386961826, "learning_rate": 2.2844011895106787e-05, "loss": 0.4352, "step": 60500 }, { "epoch": 1.7665289557816675, "grad_norm": 0.47091911317789253, "learning_rate": 2.2841308461746418e-05, "loss": 0.4329, "step": 60505 }, { "epoch": 1.7666749390519847, "grad_norm": 0.48707357208449065, "learning_rate": 2.283860502838605e-05, "loss": 0.4135, "step": 60510 }, { "epoch": 1.7668209223223017, "grad_norm": 0.49470397911257535, "learning_rate": 2.2835901595025682e-05, "loss": 0.4478, "step": 60515 }, { "epoch": 1.7669669055926192, "grad_norm": 0.48366692597590916, "learning_rate": 2.2833198161665316e-05, "loss": 0.4385, "step": 60520 }, { "epoch": 1.7671128888629362, "grad_norm": 0.5020101999369971, "learning_rate": 2.283049472830495e-05, "loss": 0.4655, "step": 60525 }, { "epoch": 1.7672588721332536, "grad_norm": 0.4337433108583918, "learning_rate": 2.282779129494458e-05, "loss": 0.4123, "step": 60530 }, { "epoch": 1.7674048554035706, "grad_norm": 0.5110585953755038, "learning_rate": 2.282508786158421e-05, "loss": 0.4201, "step": 60535 }, { "epoch": 1.767550838673888, "grad_norm": 0.4969300805586985, "learning_rate": 2.2822384428223845e-05, "loss": 0.4597, "step": 60540 }, { "epoch": 1.767696821944205, "grad_norm": 0.45653867336827914, "learning_rate": 2.2819680994863476e-05, "loss": 0.4317, "step": 60545 }, { "epoch": 1.7678428052145225, "grad_norm": 0.4650008531302411, "learning_rate": 2.281697756150311e-05, "loss": 0.436, "step": 60550 }, { "epoch": 1.7679887884848395, "grad_norm": 0.5081924270608025, "learning_rate": 2.2814274128142744e-05, "loss": 0.4673, "step": 60555 }, { "epoch": 1.768134771755157, "grad_norm": 0.47707637833879785, "learning_rate": 2.2811570694782375e-05, "loss": 0.4436, "step": 60560 }, { "epoch": 1.768280755025474, "grad_norm": 0.46870075012366996, "learning_rate": 2.2808867261422005e-05, "loss": 0.4264, "step": 60565 }, { "epoch": 1.7684267382957914, "grad_norm": 0.5157173037829444, "learning_rate": 2.280616382806164e-05, "loss": 0.4509, "step": 60570 }, { "epoch": 1.7685727215661085, "grad_norm": 0.46410015118153847, "learning_rate": 2.280346039470127e-05, "loss": 0.4405, "step": 60575 }, { "epoch": 1.768718704836426, "grad_norm": 0.46069157339658917, "learning_rate": 2.2800756961340904e-05, "loss": 0.4177, "step": 60580 }, { "epoch": 1.768864688106743, "grad_norm": 0.4450784259313821, "learning_rate": 2.2798053527980538e-05, "loss": 0.4562, "step": 60585 }, { "epoch": 1.7690106713770601, "grad_norm": 0.5167071646518596, "learning_rate": 2.279535009462017e-05, "loss": 0.4595, "step": 60590 }, { "epoch": 1.7691566546473774, "grad_norm": 0.47595347019385015, "learning_rate": 2.27926466612598e-05, "loss": 0.4291, "step": 60595 }, { "epoch": 1.7693026379176946, "grad_norm": 0.48938748305621305, "learning_rate": 2.2789943227899433e-05, "loss": 0.4363, "step": 60600 }, { "epoch": 1.7694486211880118, "grad_norm": 0.4618634063052278, "learning_rate": 2.2787239794539067e-05, "loss": 0.4513, "step": 60605 }, { "epoch": 1.769594604458329, "grad_norm": 0.4743384726003595, "learning_rate": 2.2784536361178698e-05, "loss": 0.4372, "step": 60610 }, { "epoch": 1.7697405877286463, "grad_norm": 0.49290220337828883, "learning_rate": 2.2781832927818332e-05, "loss": 0.4447, "step": 60615 }, { "epoch": 1.7698865709989635, "grad_norm": 0.49743757631808255, "learning_rate": 2.2779129494457963e-05, "loss": 0.429, "step": 60620 }, { "epoch": 1.7700325542692807, "grad_norm": 0.47440446077803955, "learning_rate": 2.2776426061097593e-05, "loss": 0.4204, "step": 60625 }, { "epoch": 1.770178537539598, "grad_norm": 0.5242703183134899, "learning_rate": 2.2773722627737227e-05, "loss": 0.4235, "step": 60630 }, { "epoch": 1.7703245208099152, "grad_norm": 0.4995926865453311, "learning_rate": 2.277101919437686e-05, "loss": 0.4392, "step": 60635 }, { "epoch": 1.7704705040802324, "grad_norm": 0.4830004554262603, "learning_rate": 2.2768315761016492e-05, "loss": 0.4381, "step": 60640 }, { "epoch": 1.7706164873505497, "grad_norm": 0.47360989832829326, "learning_rate": 2.2765612327656126e-05, "loss": 0.4212, "step": 60645 }, { "epoch": 1.770762470620867, "grad_norm": 0.49256107526165444, "learning_rate": 2.2762908894295756e-05, "loss": 0.4211, "step": 60650 }, { "epoch": 1.7709084538911841, "grad_norm": 0.45521203222026646, "learning_rate": 2.2760205460935387e-05, "loss": 0.4375, "step": 60655 }, { "epoch": 1.7710544371615011, "grad_norm": 0.49728730638865637, "learning_rate": 2.275750202757502e-05, "loss": 0.4348, "step": 60660 }, { "epoch": 1.7712004204318186, "grad_norm": 0.4577064140626221, "learning_rate": 2.2754798594214655e-05, "loss": 0.422, "step": 60665 }, { "epoch": 1.7713464037021356, "grad_norm": 0.4892019934554924, "learning_rate": 2.2752095160854286e-05, "loss": 0.4296, "step": 60670 }, { "epoch": 1.771492386972453, "grad_norm": 0.4905360783164589, "learning_rate": 2.274939172749392e-05, "loss": 0.445, "step": 60675 }, { "epoch": 1.77163837024277, "grad_norm": 0.5272497694680038, "learning_rate": 2.274668829413355e-05, "loss": 0.4644, "step": 60680 }, { "epoch": 1.7717843535130875, "grad_norm": 0.495214446006452, "learning_rate": 2.274398486077318e-05, "loss": 0.434, "step": 60685 }, { "epoch": 1.7719303367834045, "grad_norm": 0.4644558858685788, "learning_rate": 2.2741281427412815e-05, "loss": 0.4348, "step": 60690 }, { "epoch": 1.772076320053722, "grad_norm": 0.48692361169989007, "learning_rate": 2.273857799405245e-05, "loss": 0.4532, "step": 60695 }, { "epoch": 1.772222303324039, "grad_norm": 0.4925294657444424, "learning_rate": 2.273587456069208e-05, "loss": 0.4708, "step": 60700 }, { "epoch": 1.7723682865943564, "grad_norm": 0.4844872752557856, "learning_rate": 2.2733171127331714e-05, "loss": 0.435, "step": 60705 }, { "epoch": 1.7725142698646734, "grad_norm": 0.4803294825484957, "learning_rate": 2.2730467693971344e-05, "loss": 0.4453, "step": 60710 }, { "epoch": 1.7726602531349909, "grad_norm": 0.5103852309446388, "learning_rate": 2.2727764260610975e-05, "loss": 0.4588, "step": 60715 }, { "epoch": 1.7728062364053079, "grad_norm": 0.4944917236937353, "learning_rate": 2.272506082725061e-05, "loss": 0.4702, "step": 60720 }, { "epoch": 1.7729522196756253, "grad_norm": 0.436762190425501, "learning_rate": 2.2722357393890243e-05, "loss": 0.4298, "step": 60725 }, { "epoch": 1.7730982029459423, "grad_norm": 0.486069011288724, "learning_rate": 2.2719653960529874e-05, "loss": 0.426, "step": 60730 }, { "epoch": 1.7732441862162596, "grad_norm": 0.538141286632376, "learning_rate": 2.2716950527169508e-05, "loss": 0.4439, "step": 60735 }, { "epoch": 1.7733901694865768, "grad_norm": 0.44852805605571583, "learning_rate": 2.2714247093809138e-05, "loss": 0.3829, "step": 60740 }, { "epoch": 1.773536152756894, "grad_norm": 0.47342998845086603, "learning_rate": 2.271154366044877e-05, "loss": 0.4412, "step": 60745 }, { "epoch": 1.7736821360272113, "grad_norm": 0.49752139968799747, "learning_rate": 2.2708840227088403e-05, "loss": 0.4416, "step": 60750 }, { "epoch": 1.7738281192975285, "grad_norm": 0.44750033553027624, "learning_rate": 2.2706136793728037e-05, "loss": 0.4056, "step": 60755 }, { "epoch": 1.7739741025678457, "grad_norm": 0.4969186626634302, "learning_rate": 2.2703433360367667e-05, "loss": 0.4594, "step": 60760 }, { "epoch": 1.774120085838163, "grad_norm": 0.4461658767534498, "learning_rate": 2.27007299270073e-05, "loss": 0.4406, "step": 60765 }, { "epoch": 1.7742660691084802, "grad_norm": 0.4748854306484342, "learning_rate": 2.2698026493646932e-05, "loss": 0.4534, "step": 60770 }, { "epoch": 1.7744120523787974, "grad_norm": 0.4543741608865272, "learning_rate": 2.2695323060286566e-05, "loss": 0.4417, "step": 60775 }, { "epoch": 1.7745580356491146, "grad_norm": 0.4518307459466068, "learning_rate": 2.2692619626926197e-05, "loss": 0.4099, "step": 60780 }, { "epoch": 1.7747040189194319, "grad_norm": 0.46572714001728016, "learning_rate": 2.268991619356583e-05, "loss": 0.4279, "step": 60785 }, { "epoch": 1.774850002189749, "grad_norm": 0.4798937833390479, "learning_rate": 2.268721276020546e-05, "loss": 0.4275, "step": 60790 }, { "epoch": 1.7749959854600663, "grad_norm": 0.454749235571179, "learning_rate": 2.2684509326845095e-05, "loss": 0.4426, "step": 60795 }, { "epoch": 1.7751419687303835, "grad_norm": 0.48718230977259075, "learning_rate": 2.2681805893484726e-05, "loss": 0.4264, "step": 60800 }, { "epoch": 1.7752879520007008, "grad_norm": 0.47282719498697673, "learning_rate": 2.267910246012436e-05, "loss": 0.4051, "step": 60805 }, { "epoch": 1.775433935271018, "grad_norm": 0.43549960829440354, "learning_rate": 2.267639902676399e-05, "loss": 0.4446, "step": 60810 }, { "epoch": 1.775579918541335, "grad_norm": 0.45237931386383, "learning_rate": 2.2673695593403625e-05, "loss": 0.4341, "step": 60815 }, { "epoch": 1.7757259018116525, "grad_norm": 0.4528816283097299, "learning_rate": 2.2670992160043255e-05, "loss": 0.4433, "step": 60820 }, { "epoch": 1.7758718850819695, "grad_norm": 0.4747040433205621, "learning_rate": 2.2668288726682886e-05, "loss": 0.4456, "step": 60825 }, { "epoch": 1.776017868352287, "grad_norm": 0.49267351155274075, "learning_rate": 2.266558529332252e-05, "loss": 0.4334, "step": 60830 }, { "epoch": 1.776163851622604, "grad_norm": 0.45197499918577466, "learning_rate": 2.2662881859962154e-05, "loss": 0.4449, "step": 60835 }, { "epoch": 1.7763098348929214, "grad_norm": 0.4431547479825563, "learning_rate": 2.2660178426601785e-05, "loss": 0.429, "step": 60840 }, { "epoch": 1.7764558181632384, "grad_norm": 0.48264020835389604, "learning_rate": 2.265747499324142e-05, "loss": 0.4329, "step": 60845 }, { "epoch": 1.7766018014335558, "grad_norm": 0.4790124889584304, "learning_rate": 2.265477155988105e-05, "loss": 0.4622, "step": 60850 }, { "epoch": 1.7767477847038728, "grad_norm": 0.4872461084963947, "learning_rate": 2.265206812652068e-05, "loss": 0.4368, "step": 60855 }, { "epoch": 1.7768937679741903, "grad_norm": 0.4403589423519534, "learning_rate": 2.2649364693160317e-05, "loss": 0.4601, "step": 60860 }, { "epoch": 1.7770397512445073, "grad_norm": 0.49187078001782736, "learning_rate": 2.2646661259799948e-05, "loss": 0.4418, "step": 60865 }, { "epoch": 1.7771857345148248, "grad_norm": 0.5102990719941806, "learning_rate": 2.264395782643958e-05, "loss": 0.4424, "step": 60870 }, { "epoch": 1.7773317177851418, "grad_norm": 0.4673147543517679, "learning_rate": 2.2641254393079212e-05, "loss": 0.4349, "step": 60875 }, { "epoch": 1.777477701055459, "grad_norm": 0.5163994080988328, "learning_rate": 2.2638550959718843e-05, "loss": 0.4639, "step": 60880 }, { "epoch": 1.7776236843257762, "grad_norm": 0.495750297786446, "learning_rate": 2.2635847526358474e-05, "loss": 0.4521, "step": 60885 }, { "epoch": 1.7777696675960935, "grad_norm": 0.46993482582203366, "learning_rate": 2.263314409299811e-05, "loss": 0.4581, "step": 60890 }, { "epoch": 1.7779156508664107, "grad_norm": 0.460297378169422, "learning_rate": 2.263044065963774e-05, "loss": 0.4592, "step": 60895 }, { "epoch": 1.778061634136728, "grad_norm": 0.4546889022549384, "learning_rate": 2.2627737226277372e-05, "loss": 0.4518, "step": 60900 }, { "epoch": 1.7782076174070451, "grad_norm": 0.46205553313361947, "learning_rate": 2.2625033792917006e-05, "loss": 0.4485, "step": 60905 }, { "epoch": 1.7783536006773624, "grad_norm": 0.46085971757516, "learning_rate": 2.2622330359556637e-05, "loss": 0.4389, "step": 60910 }, { "epoch": 1.7784995839476796, "grad_norm": 0.5011531233952367, "learning_rate": 2.2619626926196268e-05, "loss": 0.4193, "step": 60915 }, { "epoch": 1.7786455672179968, "grad_norm": 0.4830104786600827, "learning_rate": 2.2616923492835905e-05, "loss": 0.4484, "step": 60920 }, { "epoch": 1.778791550488314, "grad_norm": 0.4855113308252899, "learning_rate": 2.2614220059475536e-05, "loss": 0.4451, "step": 60925 }, { "epoch": 1.7789375337586313, "grad_norm": 0.48216147320713465, "learning_rate": 2.2611516626115166e-05, "loss": 0.4725, "step": 60930 }, { "epoch": 1.7790835170289485, "grad_norm": 0.48614631106302875, "learning_rate": 2.26088131927548e-05, "loss": 0.423, "step": 60935 }, { "epoch": 1.7792295002992657, "grad_norm": 0.46891590933902305, "learning_rate": 2.260610975939443e-05, "loss": 0.4412, "step": 60940 }, { "epoch": 1.779375483569583, "grad_norm": 0.45443600373925874, "learning_rate": 2.2603406326034065e-05, "loss": 0.4448, "step": 60945 }, { "epoch": 1.7795214668399002, "grad_norm": 0.4855840347189831, "learning_rate": 2.26007028926737e-05, "loss": 0.4519, "step": 60950 }, { "epoch": 1.7796674501102174, "grad_norm": 0.4862734154602773, "learning_rate": 2.259799945931333e-05, "loss": 0.4318, "step": 60955 }, { "epoch": 1.7798134333805344, "grad_norm": 0.4486993138835877, "learning_rate": 2.259529602595296e-05, "loss": 0.4282, "step": 60960 }, { "epoch": 1.7799594166508519, "grad_norm": 0.44098211765865997, "learning_rate": 2.2592592592592594e-05, "loss": 0.4357, "step": 60965 }, { "epoch": 1.780105399921169, "grad_norm": 0.48730657585519277, "learning_rate": 2.2589889159232225e-05, "loss": 0.4192, "step": 60970 }, { "epoch": 1.7802513831914863, "grad_norm": 0.47414286970464914, "learning_rate": 2.258718572587186e-05, "loss": 0.4361, "step": 60975 }, { "epoch": 1.7803973664618034, "grad_norm": 0.4441150536626117, "learning_rate": 2.2584482292511493e-05, "loss": 0.4375, "step": 60980 }, { "epoch": 1.7805433497321208, "grad_norm": 0.4420229758742115, "learning_rate": 2.2581778859151123e-05, "loss": 0.4375, "step": 60985 }, { "epoch": 1.7806893330024378, "grad_norm": 0.44787286490706485, "learning_rate": 2.2579075425790754e-05, "loss": 0.4125, "step": 60990 }, { "epoch": 1.7808353162727553, "grad_norm": 0.4641709452937312, "learning_rate": 2.2576371992430388e-05, "loss": 0.4349, "step": 60995 }, { "epoch": 1.7809812995430723, "grad_norm": 0.4810322138065801, "learning_rate": 2.257366855907002e-05, "loss": 0.4317, "step": 61000 }, { "epoch": 1.7811272828133897, "grad_norm": 0.4778641847431289, "learning_rate": 2.2570965125709653e-05, "loss": 0.4702, "step": 61005 }, { "epoch": 1.7812732660837067, "grad_norm": 0.47649593899925813, "learning_rate": 2.2568261692349287e-05, "loss": 0.442, "step": 61010 }, { "epoch": 1.7814192493540242, "grad_norm": 0.4613631891167365, "learning_rate": 2.2565558258988917e-05, "loss": 0.4502, "step": 61015 }, { "epoch": 1.7815652326243412, "grad_norm": 0.48762275197435345, "learning_rate": 2.2562854825628548e-05, "loss": 0.4042, "step": 61020 }, { "epoch": 1.7817112158946584, "grad_norm": 0.46423208735790517, "learning_rate": 2.2560151392268182e-05, "loss": 0.4436, "step": 61025 }, { "epoch": 1.7818571991649756, "grad_norm": 0.4454030507633903, "learning_rate": 2.2557447958907816e-05, "loss": 0.4093, "step": 61030 }, { "epoch": 1.7820031824352929, "grad_norm": 0.4474823037878576, "learning_rate": 2.2554744525547447e-05, "loss": 0.421, "step": 61035 }, { "epoch": 1.78214916570561, "grad_norm": 0.44569070690919643, "learning_rate": 2.255204109218708e-05, "loss": 0.4265, "step": 61040 }, { "epoch": 1.7822951489759273, "grad_norm": 0.47273270955113084, "learning_rate": 2.254933765882671e-05, "loss": 0.4272, "step": 61045 }, { "epoch": 1.7824411322462446, "grad_norm": 0.48812895396215156, "learning_rate": 2.2546634225466342e-05, "loss": 0.452, "step": 61050 }, { "epoch": 1.7825871155165618, "grad_norm": 0.42543368423285044, "learning_rate": 2.2543930792105976e-05, "loss": 0.4223, "step": 61055 }, { "epoch": 1.782733098786879, "grad_norm": 0.476409880669018, "learning_rate": 2.254122735874561e-05, "loss": 0.4407, "step": 61060 }, { "epoch": 1.7828790820571963, "grad_norm": 0.5164900793316625, "learning_rate": 2.253852392538524e-05, "loss": 0.4595, "step": 61065 }, { "epoch": 1.7830250653275135, "grad_norm": 0.4544098303734319, "learning_rate": 2.253582049202487e-05, "loss": 0.4048, "step": 61070 }, { "epoch": 1.7831710485978307, "grad_norm": 0.46399848713403463, "learning_rate": 2.2533117058664505e-05, "loss": 0.4489, "step": 61075 }, { "epoch": 1.783317031868148, "grad_norm": 0.4607985348914235, "learning_rate": 2.2530413625304136e-05, "loss": 0.4204, "step": 61080 }, { "epoch": 1.7834630151384652, "grad_norm": 0.4890266214605412, "learning_rate": 2.252771019194377e-05, "loss": 0.4287, "step": 61085 }, { "epoch": 1.7836089984087824, "grad_norm": 0.5195861860707084, "learning_rate": 2.2525006758583404e-05, "loss": 0.4439, "step": 61090 }, { "epoch": 1.7837549816790996, "grad_norm": 0.4955908188231051, "learning_rate": 2.2522303325223034e-05, "loss": 0.4292, "step": 61095 }, { "epoch": 1.7839009649494169, "grad_norm": 0.4402294263595353, "learning_rate": 2.2519599891862665e-05, "loss": 0.4262, "step": 61100 }, { "epoch": 1.7840469482197339, "grad_norm": 0.473385836802877, "learning_rate": 2.25168964585023e-05, "loss": 0.4158, "step": 61105 }, { "epoch": 1.7841929314900513, "grad_norm": 0.4586337653736434, "learning_rate": 2.251419302514193e-05, "loss": 0.4033, "step": 61110 }, { "epoch": 1.7843389147603683, "grad_norm": 0.5031310774459206, "learning_rate": 2.2511489591781564e-05, "loss": 0.4422, "step": 61115 }, { "epoch": 1.7844848980306858, "grad_norm": 0.4831593743754851, "learning_rate": 2.2508786158421198e-05, "loss": 0.451, "step": 61120 }, { "epoch": 1.7846308813010028, "grad_norm": 0.4828710414939829, "learning_rate": 2.2506082725060828e-05, "loss": 0.4664, "step": 61125 }, { "epoch": 1.7847768645713202, "grad_norm": 0.48436024288069135, "learning_rate": 2.250337929170046e-05, "loss": 0.4365, "step": 61130 }, { "epoch": 1.7849228478416372, "grad_norm": 0.4516986035573471, "learning_rate": 2.2500675858340093e-05, "loss": 0.4313, "step": 61135 }, { "epoch": 1.7850688311119547, "grad_norm": 0.4353322451105843, "learning_rate": 2.2497972424979724e-05, "loss": 0.4359, "step": 61140 }, { "epoch": 1.7852148143822717, "grad_norm": 0.46807707987978997, "learning_rate": 2.2495268991619358e-05, "loss": 0.4584, "step": 61145 }, { "epoch": 1.7853607976525891, "grad_norm": 0.4859485233412024, "learning_rate": 2.249256555825899e-05, "loss": 0.4423, "step": 61150 }, { "epoch": 1.7855067809229062, "grad_norm": 0.5031920714466279, "learning_rate": 2.2489862124898622e-05, "loss": 0.4553, "step": 61155 }, { "epoch": 1.7856527641932236, "grad_norm": 0.46365206898682093, "learning_rate": 2.2487158691538253e-05, "loss": 0.4546, "step": 61160 }, { "epoch": 1.7857987474635406, "grad_norm": 0.4618670541297103, "learning_rate": 2.2484455258177887e-05, "loss": 0.4462, "step": 61165 }, { "epoch": 1.785944730733858, "grad_norm": 0.4865295321939115, "learning_rate": 2.2481751824817517e-05, "loss": 0.4558, "step": 61170 }, { "epoch": 1.786090714004175, "grad_norm": 0.47717381688574734, "learning_rate": 2.247904839145715e-05, "loss": 0.4226, "step": 61175 }, { "epoch": 1.7862366972744923, "grad_norm": 0.5354335217423495, "learning_rate": 2.2476344958096785e-05, "loss": 0.4577, "step": 61180 }, { "epoch": 1.7863826805448095, "grad_norm": 0.5023534999127401, "learning_rate": 2.2473641524736416e-05, "loss": 0.4549, "step": 61185 }, { "epoch": 1.7865286638151268, "grad_norm": 0.498787986730735, "learning_rate": 2.2470938091376047e-05, "loss": 0.4525, "step": 61190 }, { "epoch": 1.786674647085444, "grad_norm": 0.450033055675567, "learning_rate": 2.246823465801568e-05, "loss": 0.4352, "step": 61195 }, { "epoch": 1.7868206303557612, "grad_norm": 0.4310629022741371, "learning_rate": 2.2465531224655315e-05, "loss": 0.4169, "step": 61200 }, { "epoch": 1.7869666136260784, "grad_norm": 0.42188833132049847, "learning_rate": 2.2462827791294945e-05, "loss": 0.4251, "step": 61205 }, { "epoch": 1.7871125968963957, "grad_norm": 0.5106939166216186, "learning_rate": 2.246012435793458e-05, "loss": 0.4709, "step": 61210 }, { "epoch": 1.787258580166713, "grad_norm": 0.44549121725974544, "learning_rate": 2.245742092457421e-05, "loss": 0.435, "step": 61215 }, { "epoch": 1.7874045634370301, "grad_norm": 0.49035993278337775, "learning_rate": 2.245471749121384e-05, "loss": 0.4571, "step": 61220 }, { "epoch": 1.7875505467073474, "grad_norm": 0.46111140548946017, "learning_rate": 2.2452014057853475e-05, "loss": 0.4334, "step": 61225 }, { "epoch": 1.7876965299776646, "grad_norm": 0.48485838525632696, "learning_rate": 2.244931062449311e-05, "loss": 0.424, "step": 61230 }, { "epoch": 1.7878425132479818, "grad_norm": 0.4436654259187103, "learning_rate": 2.244660719113274e-05, "loss": 0.4245, "step": 61235 }, { "epoch": 1.787988496518299, "grad_norm": 0.4711744711649219, "learning_rate": 2.2443903757772373e-05, "loss": 0.4531, "step": 61240 }, { "epoch": 1.7881344797886163, "grad_norm": 0.4650691890584466, "learning_rate": 2.2441200324412004e-05, "loss": 0.443, "step": 61245 }, { "epoch": 1.7882804630589333, "grad_norm": 0.45811147077954106, "learning_rate": 2.2438496891051635e-05, "loss": 0.4447, "step": 61250 }, { "epoch": 1.7884264463292507, "grad_norm": 0.5010283928690165, "learning_rate": 2.243579345769127e-05, "loss": 0.4382, "step": 61255 }, { "epoch": 1.7885724295995677, "grad_norm": 0.4521668266388003, "learning_rate": 2.2433090024330903e-05, "loss": 0.4123, "step": 61260 }, { "epoch": 1.7887184128698852, "grad_norm": 0.4912927736453406, "learning_rate": 2.2430386590970533e-05, "loss": 0.4118, "step": 61265 }, { "epoch": 1.7888643961402022, "grad_norm": 0.49775873624577327, "learning_rate": 2.2427683157610167e-05, "loss": 0.469, "step": 61270 }, { "epoch": 1.7890103794105197, "grad_norm": 0.5010574232911427, "learning_rate": 2.2424979724249798e-05, "loss": 0.4274, "step": 61275 }, { "epoch": 1.7891563626808367, "grad_norm": 0.49612303079547315, "learning_rate": 2.242227629088943e-05, "loss": 0.4302, "step": 61280 }, { "epoch": 1.7893023459511541, "grad_norm": 0.4571491313549205, "learning_rate": 2.2419572857529066e-05, "loss": 0.4216, "step": 61285 }, { "epoch": 1.7894483292214711, "grad_norm": 0.45301678432812, "learning_rate": 2.2416869424168696e-05, "loss": 0.458, "step": 61290 }, { "epoch": 1.7895943124917886, "grad_norm": 0.439448874139226, "learning_rate": 2.2414165990808327e-05, "loss": 0.4406, "step": 61295 }, { "epoch": 1.7897402957621056, "grad_norm": 0.48727120658352463, "learning_rate": 2.241146255744796e-05, "loss": 0.4392, "step": 61300 }, { "epoch": 1.789886279032423, "grad_norm": 0.48443232149639337, "learning_rate": 2.2408759124087592e-05, "loss": 0.462, "step": 61305 }, { "epoch": 1.79003226230274, "grad_norm": 0.48432714969731677, "learning_rate": 2.2406055690727222e-05, "loss": 0.4505, "step": 61310 }, { "epoch": 1.7901782455730575, "grad_norm": 0.4389789572203218, "learning_rate": 2.240335225736686e-05, "loss": 0.4389, "step": 61315 }, { "epoch": 1.7903242288433745, "grad_norm": 0.46053261687977437, "learning_rate": 2.240064882400649e-05, "loss": 0.4427, "step": 61320 }, { "epoch": 1.7904702121136917, "grad_norm": 0.44423614672588674, "learning_rate": 2.239794539064612e-05, "loss": 0.4404, "step": 61325 }, { "epoch": 1.790616195384009, "grad_norm": 0.49522501636859484, "learning_rate": 2.2395241957285755e-05, "loss": 0.4644, "step": 61330 }, { "epoch": 1.7907621786543262, "grad_norm": 0.5105675454584923, "learning_rate": 2.2392538523925386e-05, "loss": 0.4239, "step": 61335 }, { "epoch": 1.7909081619246434, "grad_norm": 0.4877019930159752, "learning_rate": 2.2389835090565016e-05, "loss": 0.4343, "step": 61340 }, { "epoch": 1.7910541451949606, "grad_norm": 0.5014527202493372, "learning_rate": 2.238713165720465e-05, "loss": 0.4429, "step": 61345 }, { "epoch": 1.7912001284652779, "grad_norm": 0.5085726884513465, "learning_rate": 2.2384428223844284e-05, "loss": 0.4242, "step": 61350 }, { "epoch": 1.791346111735595, "grad_norm": 0.4655857542273992, "learning_rate": 2.2381724790483915e-05, "loss": 0.433, "step": 61355 }, { "epoch": 1.7914920950059123, "grad_norm": 0.5280721685238429, "learning_rate": 2.237902135712355e-05, "loss": 0.4942, "step": 61360 }, { "epoch": 1.7916380782762296, "grad_norm": 0.4628498968094886, "learning_rate": 2.237631792376318e-05, "loss": 0.434, "step": 61365 }, { "epoch": 1.7917840615465468, "grad_norm": 0.45864029952814045, "learning_rate": 2.2373614490402814e-05, "loss": 0.4535, "step": 61370 }, { "epoch": 1.791930044816864, "grad_norm": 0.45733801727236373, "learning_rate": 2.2370911057042444e-05, "loss": 0.4439, "step": 61375 }, { "epoch": 1.7920760280871812, "grad_norm": 0.4585425665439279, "learning_rate": 2.2368207623682078e-05, "loss": 0.4104, "step": 61380 }, { "epoch": 1.7922220113574985, "grad_norm": 0.5034231772703714, "learning_rate": 2.236550419032171e-05, "loss": 0.4445, "step": 61385 }, { "epoch": 1.7923679946278157, "grad_norm": 0.5236591936181914, "learning_rate": 2.2362800756961343e-05, "loss": 0.4436, "step": 61390 }, { "epoch": 1.7925139778981327, "grad_norm": 0.46114087413525323, "learning_rate": 2.2360097323600973e-05, "loss": 0.4334, "step": 61395 }, { "epoch": 1.7926599611684502, "grad_norm": 0.4307309828833691, "learning_rate": 2.2357393890240607e-05, "loss": 0.4073, "step": 61400 }, { "epoch": 1.7928059444387672, "grad_norm": 0.5034573602310181, "learning_rate": 2.2354690456880238e-05, "loss": 0.4568, "step": 61405 }, { "epoch": 1.7929519277090846, "grad_norm": 0.4780595930576841, "learning_rate": 2.2351987023519872e-05, "loss": 0.4328, "step": 61410 }, { "epoch": 1.7930979109794016, "grad_norm": 0.46777403868989487, "learning_rate": 2.2349283590159503e-05, "loss": 0.4236, "step": 61415 }, { "epoch": 1.793243894249719, "grad_norm": 0.47552164255220025, "learning_rate": 2.2346580156799137e-05, "loss": 0.4376, "step": 61420 }, { "epoch": 1.793389877520036, "grad_norm": 0.4592535058227235, "learning_rate": 2.2343876723438767e-05, "loss": 0.4378, "step": 61425 }, { "epoch": 1.7935358607903535, "grad_norm": 0.512449500458084, "learning_rate": 2.23411732900784e-05, "loss": 0.4408, "step": 61430 }, { "epoch": 1.7936818440606705, "grad_norm": 0.4487947998850759, "learning_rate": 2.2338469856718032e-05, "loss": 0.4217, "step": 61435 }, { "epoch": 1.793827827330988, "grad_norm": 0.4414610320413094, "learning_rate": 2.2335766423357666e-05, "loss": 0.4198, "step": 61440 }, { "epoch": 1.793973810601305, "grad_norm": 0.4672868275590746, "learning_rate": 2.2333062989997297e-05, "loss": 0.4165, "step": 61445 }, { "epoch": 1.7941197938716225, "grad_norm": 0.47031634097704406, "learning_rate": 2.233035955663693e-05, "loss": 0.4393, "step": 61450 }, { "epoch": 1.7942657771419395, "grad_norm": 0.5278348285831964, "learning_rate": 2.2327656123276565e-05, "loss": 0.4663, "step": 61455 }, { "epoch": 1.794411760412257, "grad_norm": 0.4775399138843955, "learning_rate": 2.2324952689916195e-05, "loss": 0.4418, "step": 61460 }, { "epoch": 1.794557743682574, "grad_norm": 0.4661377345229991, "learning_rate": 2.2322249256555826e-05, "loss": 0.4468, "step": 61465 }, { "epoch": 1.7947037269528912, "grad_norm": 0.5456230401529519, "learning_rate": 2.231954582319546e-05, "loss": 0.4719, "step": 61470 }, { "epoch": 1.7948497102232084, "grad_norm": 0.4492116922894826, "learning_rate": 2.231684238983509e-05, "loss": 0.4461, "step": 61475 }, { "epoch": 1.7949956934935256, "grad_norm": 0.4860058411933293, "learning_rate": 2.231413895647472e-05, "loss": 0.4207, "step": 61480 }, { "epoch": 1.7951416767638428, "grad_norm": 0.4441854059967834, "learning_rate": 2.231143552311436e-05, "loss": 0.4046, "step": 61485 }, { "epoch": 1.79528766003416, "grad_norm": 0.47873058253984213, "learning_rate": 2.230873208975399e-05, "loss": 0.4329, "step": 61490 }, { "epoch": 1.7954336433044773, "grad_norm": 0.5109264939362523, "learning_rate": 2.230602865639362e-05, "loss": 0.4634, "step": 61495 }, { "epoch": 1.7955796265747945, "grad_norm": 0.4280695107949201, "learning_rate": 2.2303325223033254e-05, "loss": 0.421, "step": 61500 }, { "epoch": 1.7957256098451118, "grad_norm": 0.4728909283657651, "learning_rate": 2.2300621789672884e-05, "loss": 0.4088, "step": 61505 }, { "epoch": 1.795871593115429, "grad_norm": 0.5128019733893989, "learning_rate": 2.2297918356312515e-05, "loss": 0.4273, "step": 61510 }, { "epoch": 1.7960175763857462, "grad_norm": 0.4614201176640777, "learning_rate": 2.2295214922952152e-05, "loss": 0.4267, "step": 61515 }, { "epoch": 1.7961635596560634, "grad_norm": 0.47066459569381497, "learning_rate": 2.2292511489591783e-05, "loss": 0.4447, "step": 61520 }, { "epoch": 1.7963095429263807, "grad_norm": 0.4743279454474077, "learning_rate": 2.2289808056231414e-05, "loss": 0.4531, "step": 61525 }, { "epoch": 1.796455526196698, "grad_norm": 0.4742577835047998, "learning_rate": 2.2287104622871048e-05, "loss": 0.4305, "step": 61530 }, { "epoch": 1.7966015094670151, "grad_norm": 0.4602965489273305, "learning_rate": 2.2284401189510678e-05, "loss": 0.4291, "step": 61535 }, { "epoch": 1.7967474927373321, "grad_norm": 0.4793265629608443, "learning_rate": 2.2281697756150312e-05, "loss": 0.4268, "step": 61540 }, { "epoch": 1.7968934760076496, "grad_norm": 0.4558266308178956, "learning_rate": 2.2278994322789946e-05, "loss": 0.413, "step": 61545 }, { "epoch": 1.7970394592779666, "grad_norm": 0.48277178851413244, "learning_rate": 2.2276290889429577e-05, "loss": 0.4386, "step": 61550 }, { "epoch": 1.797185442548284, "grad_norm": 0.4320994697477604, "learning_rate": 2.2273587456069208e-05, "loss": 0.4492, "step": 61555 }, { "epoch": 1.797331425818601, "grad_norm": 0.4643880828170141, "learning_rate": 2.227088402270884e-05, "loss": 0.4429, "step": 61560 }, { "epoch": 1.7974774090889185, "grad_norm": 0.43925238647539644, "learning_rate": 2.2268180589348472e-05, "loss": 0.4147, "step": 61565 }, { "epoch": 1.7976233923592355, "grad_norm": 0.4771203686688166, "learning_rate": 2.2265477155988106e-05, "loss": 0.4287, "step": 61570 }, { "epoch": 1.797769375629553, "grad_norm": 0.4650431449579039, "learning_rate": 2.226277372262774e-05, "loss": 0.4048, "step": 61575 }, { "epoch": 1.79791535889987, "grad_norm": 0.4957597970529412, "learning_rate": 2.226007028926737e-05, "loss": 0.4386, "step": 61580 }, { "epoch": 1.7980613421701874, "grad_norm": 0.4853083299631392, "learning_rate": 2.2257366855907e-05, "loss": 0.4721, "step": 61585 }, { "epoch": 1.7982073254405044, "grad_norm": 0.49566267714745543, "learning_rate": 2.2254663422546635e-05, "loss": 0.4316, "step": 61590 }, { "epoch": 1.7983533087108219, "grad_norm": 0.4459739823202336, "learning_rate": 2.225195998918627e-05, "loss": 0.465, "step": 61595 }, { "epoch": 1.798499291981139, "grad_norm": 0.5087782738816993, "learning_rate": 2.22492565558259e-05, "loss": 0.4609, "step": 61600 }, { "epoch": 1.7986452752514563, "grad_norm": 0.43685216567298846, "learning_rate": 2.2246553122465534e-05, "loss": 0.4441, "step": 61605 }, { "epoch": 1.7987912585217734, "grad_norm": 0.47320006747768073, "learning_rate": 2.2243849689105165e-05, "loss": 0.4323, "step": 61610 }, { "epoch": 1.7989372417920906, "grad_norm": 0.49556712051847945, "learning_rate": 2.2241146255744795e-05, "loss": 0.4337, "step": 61615 }, { "epoch": 1.7990832250624078, "grad_norm": 0.4288342750831567, "learning_rate": 2.223844282238443e-05, "loss": 0.4111, "step": 61620 }, { "epoch": 1.799229208332725, "grad_norm": 0.4541492605350652, "learning_rate": 2.2235739389024063e-05, "loss": 0.4217, "step": 61625 }, { "epoch": 1.7993751916030423, "grad_norm": 0.5021624200155705, "learning_rate": 2.2233035955663694e-05, "loss": 0.4544, "step": 61630 }, { "epoch": 1.7995211748733595, "grad_norm": 0.4651465112408157, "learning_rate": 2.2230332522303328e-05, "loss": 0.4081, "step": 61635 }, { "epoch": 1.7996671581436767, "grad_norm": 0.46241502738583484, "learning_rate": 2.222762908894296e-05, "loss": 0.43, "step": 61640 }, { "epoch": 1.799813141413994, "grad_norm": 0.4498019392882979, "learning_rate": 2.222492565558259e-05, "loss": 0.4318, "step": 61645 }, { "epoch": 1.7999591246843112, "grad_norm": 0.503033123619256, "learning_rate": 2.2222222222222223e-05, "loss": 0.4484, "step": 61650 }, { "epoch": 1.8001051079546284, "grad_norm": 0.47049071156310446, "learning_rate": 2.2219518788861857e-05, "loss": 0.4323, "step": 61655 }, { "epoch": 1.8002510912249456, "grad_norm": 0.45992646727048025, "learning_rate": 2.2216815355501488e-05, "loss": 0.4287, "step": 61660 }, { "epoch": 1.8003970744952629, "grad_norm": 0.4874004521592929, "learning_rate": 2.2214111922141122e-05, "loss": 0.4125, "step": 61665 }, { "epoch": 1.80054305776558, "grad_norm": 0.4871943579850022, "learning_rate": 2.2211408488780753e-05, "loss": 0.5022, "step": 61670 }, { "epoch": 1.8006890410358973, "grad_norm": 0.486437050543227, "learning_rate": 2.2208705055420383e-05, "loss": 0.4178, "step": 61675 }, { "epoch": 1.8008350243062146, "grad_norm": 0.45487050703076043, "learning_rate": 2.2206001622060017e-05, "loss": 0.4395, "step": 61680 }, { "epoch": 1.8009810075765316, "grad_norm": 0.42257453842164105, "learning_rate": 2.220329818869965e-05, "loss": 0.415, "step": 61685 }, { "epoch": 1.801126990846849, "grad_norm": 0.5216205124318283, "learning_rate": 2.2200594755339282e-05, "loss": 0.4416, "step": 61690 }, { "epoch": 1.801272974117166, "grad_norm": 0.49297174737541183, "learning_rate": 2.2197891321978916e-05, "loss": 0.4194, "step": 61695 }, { "epoch": 1.8014189573874835, "grad_norm": 0.4802452758314474, "learning_rate": 2.2195187888618546e-05, "loss": 0.4593, "step": 61700 }, { "epoch": 1.8015649406578005, "grad_norm": 0.44791495628903305, "learning_rate": 2.2192484455258177e-05, "loss": 0.4567, "step": 61705 }, { "epoch": 1.801710923928118, "grad_norm": 0.4755878546382485, "learning_rate": 2.218978102189781e-05, "loss": 0.4379, "step": 61710 }, { "epoch": 1.801856907198435, "grad_norm": 0.48225789741449193, "learning_rate": 2.2187077588537445e-05, "loss": 0.4443, "step": 61715 }, { "epoch": 1.8020028904687524, "grad_norm": 0.4725068849520954, "learning_rate": 2.2184374155177076e-05, "loss": 0.4425, "step": 61720 }, { "epoch": 1.8021488737390694, "grad_norm": 0.5170904322988223, "learning_rate": 2.2181670721816706e-05, "loss": 0.4309, "step": 61725 }, { "epoch": 1.8022948570093869, "grad_norm": 0.44435357279761656, "learning_rate": 2.217896728845634e-05, "loss": 0.4319, "step": 61730 }, { "epoch": 1.8024408402797039, "grad_norm": 0.5029985722032847, "learning_rate": 2.217626385509597e-05, "loss": 0.4455, "step": 61735 }, { "epoch": 1.8025868235500213, "grad_norm": 0.5098653638194487, "learning_rate": 2.2173560421735605e-05, "loss": 0.4335, "step": 61740 }, { "epoch": 1.8027328068203383, "grad_norm": 0.5162544343760017, "learning_rate": 2.217085698837524e-05, "loss": 0.4658, "step": 61745 }, { "epoch": 1.8028787900906558, "grad_norm": 0.4249379537284754, "learning_rate": 2.216815355501487e-05, "loss": 0.4267, "step": 61750 }, { "epoch": 1.8030247733609728, "grad_norm": 0.46119676895790435, "learning_rate": 2.21654501216545e-05, "loss": 0.4395, "step": 61755 }, { "epoch": 1.80317075663129, "grad_norm": 0.4716488433080659, "learning_rate": 2.2162746688294134e-05, "loss": 0.4875, "step": 61760 }, { "epoch": 1.8033167399016072, "grad_norm": 0.4934338223061732, "learning_rate": 2.2160043254933768e-05, "loss": 0.4223, "step": 61765 }, { "epoch": 1.8034627231719245, "grad_norm": 0.46268772535617386, "learning_rate": 2.21573398215734e-05, "loss": 0.4307, "step": 61770 }, { "epoch": 1.8036087064422417, "grad_norm": 0.5314741763285726, "learning_rate": 2.2154636388213033e-05, "loss": 0.4654, "step": 61775 }, { "epoch": 1.803754689712559, "grad_norm": 0.49531782944151265, "learning_rate": 2.2151932954852664e-05, "loss": 0.4625, "step": 61780 }, { "epoch": 1.8039006729828762, "grad_norm": 0.4823049698688246, "learning_rate": 2.2149229521492294e-05, "loss": 0.4525, "step": 61785 }, { "epoch": 1.8040466562531934, "grad_norm": 0.49694283422570606, "learning_rate": 2.2146526088131928e-05, "loss": 0.4435, "step": 61790 }, { "epoch": 1.8041926395235106, "grad_norm": 0.48610689536005847, "learning_rate": 2.2143822654771562e-05, "loss": 0.4188, "step": 61795 }, { "epoch": 1.8043386227938278, "grad_norm": 0.4938383525311308, "learning_rate": 2.2141119221411193e-05, "loss": 0.4618, "step": 61800 }, { "epoch": 1.804484606064145, "grad_norm": 0.45708909547633775, "learning_rate": 2.2138415788050827e-05, "loss": 0.419, "step": 61805 }, { "epoch": 1.8046305893344623, "grad_norm": 0.4711868847079947, "learning_rate": 2.2135712354690457e-05, "loss": 0.4358, "step": 61810 }, { "epoch": 1.8047765726047795, "grad_norm": 0.46729582259328967, "learning_rate": 2.2133008921330088e-05, "loss": 0.4269, "step": 61815 }, { "epoch": 1.8049225558750968, "grad_norm": 0.41543926778291707, "learning_rate": 2.2130305487969722e-05, "loss": 0.4028, "step": 61820 }, { "epoch": 1.805068539145414, "grad_norm": 0.45388636343848465, "learning_rate": 2.2127602054609356e-05, "loss": 0.4314, "step": 61825 }, { "epoch": 1.805214522415731, "grad_norm": 0.47655797823999074, "learning_rate": 2.2124898621248987e-05, "loss": 0.423, "step": 61830 }, { "epoch": 1.8053605056860484, "grad_norm": 0.4234940508298702, "learning_rate": 2.212219518788862e-05, "loss": 0.4236, "step": 61835 }, { "epoch": 1.8055064889563655, "grad_norm": 0.4687998071751849, "learning_rate": 2.211949175452825e-05, "loss": 0.4432, "step": 61840 }, { "epoch": 1.805652472226683, "grad_norm": 0.4402839063262211, "learning_rate": 2.2116788321167882e-05, "loss": 0.414, "step": 61845 }, { "epoch": 1.805798455497, "grad_norm": 0.45723931033364107, "learning_rate": 2.211408488780752e-05, "loss": 0.4279, "step": 61850 }, { "epoch": 1.8059444387673174, "grad_norm": 0.4385140645884417, "learning_rate": 2.211138145444715e-05, "loss": 0.4215, "step": 61855 }, { "epoch": 1.8060904220376344, "grad_norm": 0.47150077721948663, "learning_rate": 2.210867802108678e-05, "loss": 0.4507, "step": 61860 }, { "epoch": 1.8062364053079518, "grad_norm": 0.48146387998501966, "learning_rate": 2.2105974587726415e-05, "loss": 0.4471, "step": 61865 }, { "epoch": 1.8063823885782688, "grad_norm": 0.4658403475537928, "learning_rate": 2.2103271154366045e-05, "loss": 0.4067, "step": 61870 }, { "epoch": 1.8065283718485863, "grad_norm": 0.47358759458044, "learning_rate": 2.2100567721005676e-05, "loss": 0.447, "step": 61875 }, { "epoch": 1.8066743551189033, "grad_norm": 0.44967117911902954, "learning_rate": 2.2097864287645313e-05, "loss": 0.4542, "step": 61880 }, { "epoch": 1.8068203383892207, "grad_norm": 0.468289386624159, "learning_rate": 2.2095160854284944e-05, "loss": 0.4161, "step": 61885 }, { "epoch": 1.8069663216595377, "grad_norm": 0.4697460132318704, "learning_rate": 2.2092457420924575e-05, "loss": 0.442, "step": 61890 }, { "epoch": 1.8071123049298552, "grad_norm": 0.5007799122921727, "learning_rate": 2.208975398756421e-05, "loss": 0.4557, "step": 61895 }, { "epoch": 1.8072582882001722, "grad_norm": 0.49845604552065087, "learning_rate": 2.208705055420384e-05, "loss": 0.451, "step": 61900 }, { "epoch": 1.8074042714704894, "grad_norm": 0.43928780646687965, "learning_rate": 2.208434712084347e-05, "loss": 0.4432, "step": 61905 }, { "epoch": 1.8075502547408067, "grad_norm": 0.4738218153129075, "learning_rate": 2.2081643687483107e-05, "loss": 0.4309, "step": 61910 }, { "epoch": 1.807696238011124, "grad_norm": 0.5242094665075403, "learning_rate": 2.2078940254122738e-05, "loss": 0.4603, "step": 61915 }, { "epoch": 1.8078422212814411, "grad_norm": 0.4956248952100477, "learning_rate": 2.207623682076237e-05, "loss": 0.4411, "step": 61920 }, { "epoch": 1.8079882045517583, "grad_norm": 0.4862977288506569, "learning_rate": 2.2073533387402002e-05, "loss": 0.4323, "step": 61925 }, { "epoch": 1.8081341878220756, "grad_norm": 0.4573188595964045, "learning_rate": 2.2070829954041633e-05, "loss": 0.4268, "step": 61930 }, { "epoch": 1.8082801710923928, "grad_norm": 0.5007729734449617, "learning_rate": 2.2068126520681267e-05, "loss": 0.4305, "step": 61935 }, { "epoch": 1.80842615436271, "grad_norm": 0.4821564619519732, "learning_rate": 2.20654230873209e-05, "loss": 0.4205, "step": 61940 }, { "epoch": 1.8085721376330273, "grad_norm": 0.5220721128092807, "learning_rate": 2.2062719653960532e-05, "loss": 0.4573, "step": 61945 }, { "epoch": 1.8087181209033445, "grad_norm": 0.5327713678618944, "learning_rate": 2.2060016220600162e-05, "loss": 0.4457, "step": 61950 }, { "epoch": 1.8088641041736617, "grad_norm": 0.48641393157968865, "learning_rate": 2.2057312787239796e-05, "loss": 0.4551, "step": 61955 }, { "epoch": 1.809010087443979, "grad_norm": 0.44013569164946675, "learning_rate": 2.2054609353879427e-05, "loss": 0.4165, "step": 61960 }, { "epoch": 1.8091560707142962, "grad_norm": 0.4870790634272592, "learning_rate": 2.205190592051906e-05, "loss": 0.447, "step": 61965 }, { "epoch": 1.8093020539846134, "grad_norm": 0.43517083304904364, "learning_rate": 2.2049202487158695e-05, "loss": 0.4212, "step": 61970 }, { "epoch": 1.8094480372549304, "grad_norm": 0.48100249334278716, "learning_rate": 2.2046499053798326e-05, "loss": 0.446, "step": 61975 }, { "epoch": 1.8095940205252479, "grad_norm": 0.4619321416983614, "learning_rate": 2.2043795620437956e-05, "loss": 0.4516, "step": 61980 }, { "epoch": 1.8097400037955649, "grad_norm": 0.4653265737908831, "learning_rate": 2.204109218707759e-05, "loss": 0.4231, "step": 61985 }, { "epoch": 1.8098859870658823, "grad_norm": 0.4534592984629897, "learning_rate": 2.203838875371722e-05, "loss": 0.4201, "step": 61990 }, { "epoch": 1.8100319703361993, "grad_norm": 0.45979821275333943, "learning_rate": 2.2035685320356855e-05, "loss": 0.4497, "step": 61995 }, { "epoch": 1.8101779536065168, "grad_norm": 0.46073440032861457, "learning_rate": 2.2032981886996485e-05, "loss": 0.4281, "step": 62000 }, { "epoch": 1.8103239368768338, "grad_norm": 0.4968885174072132, "learning_rate": 2.203027845363612e-05, "loss": 0.4702, "step": 62005 }, { "epoch": 1.8104699201471512, "grad_norm": 0.5735897519443379, "learning_rate": 2.202757502027575e-05, "loss": 0.4643, "step": 62010 }, { "epoch": 1.8106159034174683, "grad_norm": 0.47746122460091717, "learning_rate": 2.2024871586915384e-05, "loss": 0.4265, "step": 62015 }, { "epoch": 1.8107618866877857, "grad_norm": 0.49660932193759233, "learning_rate": 2.2022168153555018e-05, "loss": 0.4497, "step": 62020 }, { "epoch": 1.8109078699581027, "grad_norm": 0.4849489516461984, "learning_rate": 2.201946472019465e-05, "loss": 0.4416, "step": 62025 }, { "epoch": 1.8110538532284202, "grad_norm": 0.4607101140636883, "learning_rate": 2.201676128683428e-05, "loss": 0.4235, "step": 62030 }, { "epoch": 1.8111998364987372, "grad_norm": 0.47893871134481014, "learning_rate": 2.2014057853473913e-05, "loss": 0.4421, "step": 62035 }, { "epoch": 1.8113458197690546, "grad_norm": 0.4998053636353214, "learning_rate": 2.2011354420113544e-05, "loss": 0.4244, "step": 62040 }, { "epoch": 1.8114918030393716, "grad_norm": 0.4634025647165174, "learning_rate": 2.2008650986753178e-05, "loss": 0.4377, "step": 62045 }, { "epoch": 1.8116377863096889, "grad_norm": 0.4630758747646623, "learning_rate": 2.2005947553392812e-05, "loss": 0.431, "step": 62050 }, { "epoch": 1.811783769580006, "grad_norm": 0.473753986987549, "learning_rate": 2.2003244120032443e-05, "loss": 0.4329, "step": 62055 }, { "epoch": 1.8119297528503233, "grad_norm": 0.4901385919462993, "learning_rate": 2.2000540686672073e-05, "loss": 0.434, "step": 62060 }, { "epoch": 1.8120757361206405, "grad_norm": 0.4971437360141997, "learning_rate": 2.1997837253311707e-05, "loss": 0.4825, "step": 62065 }, { "epoch": 1.8122217193909578, "grad_norm": 0.499056137047652, "learning_rate": 2.1995133819951338e-05, "loss": 0.4233, "step": 62070 }, { "epoch": 1.812367702661275, "grad_norm": 0.45508997347346675, "learning_rate": 2.1992430386590972e-05, "loss": 0.447, "step": 62075 }, { "epoch": 1.8125136859315922, "grad_norm": 0.47042721656586045, "learning_rate": 2.1989726953230606e-05, "loss": 0.4276, "step": 62080 }, { "epoch": 1.8126596692019095, "grad_norm": 0.47634394953522785, "learning_rate": 2.1987023519870237e-05, "loss": 0.4246, "step": 62085 }, { "epoch": 1.8128056524722267, "grad_norm": 0.44113405707639664, "learning_rate": 2.1984320086509867e-05, "loss": 0.4483, "step": 62090 }, { "epoch": 1.812951635742544, "grad_norm": 0.4932381769854479, "learning_rate": 2.19816166531495e-05, "loss": 0.4532, "step": 62095 }, { "epoch": 1.8130976190128612, "grad_norm": 0.456765940555331, "learning_rate": 2.1978913219789132e-05, "loss": 0.4444, "step": 62100 }, { "epoch": 1.8132436022831784, "grad_norm": 0.4807945064716675, "learning_rate": 2.1976209786428766e-05, "loss": 0.4501, "step": 62105 }, { "epoch": 1.8133895855534956, "grad_norm": 0.4301912356667747, "learning_rate": 2.19735063530684e-05, "loss": 0.404, "step": 62110 }, { "epoch": 1.8135355688238128, "grad_norm": 0.4830181756111246, "learning_rate": 2.197080291970803e-05, "loss": 0.4144, "step": 62115 }, { "epoch": 1.81368155209413, "grad_norm": 0.44088317041499675, "learning_rate": 2.196809948634766e-05, "loss": 0.4083, "step": 62120 }, { "epoch": 1.8138275353644473, "grad_norm": 0.4455999418876753, "learning_rate": 2.1965396052987295e-05, "loss": 0.4304, "step": 62125 }, { "epoch": 1.8139735186347643, "grad_norm": 0.45697886870402343, "learning_rate": 2.1962692619626926e-05, "loss": 0.4395, "step": 62130 }, { "epoch": 1.8141195019050818, "grad_norm": 0.49674395805682564, "learning_rate": 2.195998918626656e-05, "loss": 0.4441, "step": 62135 }, { "epoch": 1.8142654851753988, "grad_norm": 0.5010842279485067, "learning_rate": 2.1957285752906194e-05, "loss": 0.4532, "step": 62140 }, { "epoch": 1.8144114684457162, "grad_norm": 0.440797457502008, "learning_rate": 2.1954582319545824e-05, "loss": 0.4509, "step": 62145 }, { "epoch": 1.8145574517160332, "grad_norm": 0.5365479804176394, "learning_rate": 2.1951878886185455e-05, "loss": 0.4748, "step": 62150 }, { "epoch": 1.8147034349863507, "grad_norm": 0.48082429438120555, "learning_rate": 2.194917545282509e-05, "loss": 0.458, "step": 62155 }, { "epoch": 1.8148494182566677, "grad_norm": 0.464755773548773, "learning_rate": 2.194647201946472e-05, "loss": 0.4497, "step": 62160 }, { "epoch": 1.8149954015269851, "grad_norm": 0.45092879922408613, "learning_rate": 2.1943768586104354e-05, "loss": 0.4302, "step": 62165 }, { "epoch": 1.8151413847973021, "grad_norm": 0.48208236553046463, "learning_rate": 2.1941065152743988e-05, "loss": 0.445, "step": 62170 }, { "epoch": 1.8152873680676196, "grad_norm": 0.47063529289754547, "learning_rate": 2.1938361719383618e-05, "loss": 0.462, "step": 62175 }, { "epoch": 1.8154333513379366, "grad_norm": 0.4951370938629852, "learning_rate": 2.193565828602325e-05, "loss": 0.4103, "step": 62180 }, { "epoch": 1.815579334608254, "grad_norm": 0.48112749475958344, "learning_rate": 2.1932954852662883e-05, "loss": 0.4209, "step": 62185 }, { "epoch": 1.815725317878571, "grad_norm": 0.4092574029038914, "learning_rate": 2.1930251419302517e-05, "loss": 0.3977, "step": 62190 }, { "epoch": 1.8158713011488883, "grad_norm": 0.48409898650885363, "learning_rate": 2.1927547985942148e-05, "loss": 0.4402, "step": 62195 }, { "epoch": 1.8160172844192055, "grad_norm": 0.5004737962565744, "learning_rate": 2.192484455258178e-05, "loss": 0.4509, "step": 62200 }, { "epoch": 1.8161632676895227, "grad_norm": 0.4483816628159387, "learning_rate": 2.1922141119221412e-05, "loss": 0.4434, "step": 62205 }, { "epoch": 1.81630925095984, "grad_norm": 0.4595545463071256, "learning_rate": 2.1919437685861043e-05, "loss": 0.414, "step": 62210 }, { "epoch": 1.8164552342301572, "grad_norm": 0.4966935973681346, "learning_rate": 2.1916734252500677e-05, "loss": 0.4321, "step": 62215 }, { "epoch": 1.8166012175004744, "grad_norm": 0.4633442816453909, "learning_rate": 2.191403081914031e-05, "loss": 0.4552, "step": 62220 }, { "epoch": 1.8167472007707917, "grad_norm": 0.4357046920308189, "learning_rate": 2.191132738577994e-05, "loss": 0.4211, "step": 62225 }, { "epoch": 1.8168931840411089, "grad_norm": 0.478495141904459, "learning_rate": 2.1908623952419575e-05, "loss": 0.4711, "step": 62230 }, { "epoch": 1.8170391673114261, "grad_norm": 0.4438791634120056, "learning_rate": 2.1905920519059206e-05, "loss": 0.4051, "step": 62235 }, { "epoch": 1.8171851505817433, "grad_norm": 0.4707406200125205, "learning_rate": 2.1903217085698837e-05, "loss": 0.4461, "step": 62240 }, { "epoch": 1.8173311338520606, "grad_norm": 0.47615940265841655, "learning_rate": 2.190051365233847e-05, "loss": 0.4347, "step": 62245 }, { "epoch": 1.8174771171223778, "grad_norm": 0.42674947485540987, "learning_rate": 2.1897810218978105e-05, "loss": 0.4139, "step": 62250 }, { "epoch": 1.817623100392695, "grad_norm": 0.4950006095276758, "learning_rate": 2.1895106785617735e-05, "loss": 0.4456, "step": 62255 }, { "epoch": 1.8177690836630123, "grad_norm": 0.45094820899236043, "learning_rate": 2.189240335225737e-05, "loss": 0.4333, "step": 62260 }, { "epoch": 1.8179150669333295, "grad_norm": 0.48949835575270284, "learning_rate": 2.1889699918897e-05, "loss": 0.4494, "step": 62265 }, { "epoch": 1.8180610502036467, "grad_norm": 0.46264468920997565, "learning_rate": 2.188699648553663e-05, "loss": 0.4562, "step": 62270 }, { "epoch": 1.8182070334739637, "grad_norm": 0.45992423913449587, "learning_rate": 2.1884293052176265e-05, "loss": 0.4168, "step": 62275 }, { "epoch": 1.8183530167442812, "grad_norm": 0.4876619983898045, "learning_rate": 2.18815896188159e-05, "loss": 0.4239, "step": 62280 }, { "epoch": 1.8184990000145982, "grad_norm": 0.4540510180906051, "learning_rate": 2.187888618545553e-05, "loss": 0.4412, "step": 62285 }, { "epoch": 1.8186449832849156, "grad_norm": 0.4402761707162937, "learning_rate": 2.1876182752095163e-05, "loss": 0.3966, "step": 62290 }, { "epoch": 1.8187909665552326, "grad_norm": 0.46650728414503556, "learning_rate": 2.1873479318734794e-05, "loss": 0.4523, "step": 62295 }, { "epoch": 1.81893694982555, "grad_norm": 0.46700551100430887, "learning_rate": 2.1870775885374425e-05, "loss": 0.4268, "step": 62300 }, { "epoch": 1.819082933095867, "grad_norm": 0.4885261335456337, "learning_rate": 2.186807245201406e-05, "loss": 0.4463, "step": 62305 }, { "epoch": 1.8192289163661846, "grad_norm": 0.48256071790776334, "learning_rate": 2.1865369018653693e-05, "loss": 0.4547, "step": 62310 }, { "epoch": 1.8193748996365016, "grad_norm": 0.47404747442425754, "learning_rate": 2.1862665585293323e-05, "loss": 0.4477, "step": 62315 }, { "epoch": 1.819520882906819, "grad_norm": 0.5025276384578408, "learning_rate": 2.1859962151932957e-05, "loss": 0.4362, "step": 62320 }, { "epoch": 1.819666866177136, "grad_norm": 0.45199455456783455, "learning_rate": 2.1857258718572588e-05, "loss": 0.4134, "step": 62325 }, { "epoch": 1.8198128494474535, "grad_norm": 0.49187686458038143, "learning_rate": 2.185455528521222e-05, "loss": 0.4578, "step": 62330 }, { "epoch": 1.8199588327177705, "grad_norm": 0.46830718746446753, "learning_rate": 2.1851851851851852e-05, "loss": 0.4062, "step": 62335 }, { "epoch": 1.8201048159880877, "grad_norm": 0.44950662330190666, "learning_rate": 2.1849148418491486e-05, "loss": 0.4394, "step": 62340 }, { "epoch": 1.820250799258405, "grad_norm": 0.47609527166559673, "learning_rate": 2.1846444985131117e-05, "loss": 0.4425, "step": 62345 }, { "epoch": 1.8203967825287222, "grad_norm": 0.5178734018039945, "learning_rate": 2.184374155177075e-05, "loss": 0.4449, "step": 62350 }, { "epoch": 1.8205427657990394, "grad_norm": 0.49819007651094743, "learning_rate": 2.1841038118410382e-05, "loss": 0.4123, "step": 62355 }, { "epoch": 1.8206887490693566, "grad_norm": 0.5042324906455595, "learning_rate": 2.1838334685050016e-05, "loss": 0.4669, "step": 62360 }, { "epoch": 1.8208347323396739, "grad_norm": 0.4747554555736755, "learning_rate": 2.1835631251689646e-05, "loss": 0.4174, "step": 62365 }, { "epoch": 1.820980715609991, "grad_norm": 0.47483068986388705, "learning_rate": 2.183292781832928e-05, "loss": 0.4386, "step": 62370 }, { "epoch": 1.8211266988803083, "grad_norm": 0.45007960405962516, "learning_rate": 2.183022438496891e-05, "loss": 0.4608, "step": 62375 }, { "epoch": 1.8212726821506255, "grad_norm": 0.4891868182672334, "learning_rate": 2.182752095160854e-05, "loss": 0.4366, "step": 62380 }, { "epoch": 1.8214186654209428, "grad_norm": 0.4789210421645299, "learning_rate": 2.1824817518248176e-05, "loss": 0.4357, "step": 62385 }, { "epoch": 1.82156464869126, "grad_norm": 0.48813333056103636, "learning_rate": 2.182211408488781e-05, "loss": 0.4759, "step": 62390 }, { "epoch": 1.8217106319615772, "grad_norm": 0.45766815484249307, "learning_rate": 2.181941065152744e-05, "loss": 0.4366, "step": 62395 }, { "epoch": 1.8218566152318945, "grad_norm": 0.45238535123145474, "learning_rate": 2.1816707218167074e-05, "loss": 0.424, "step": 62400 }, { "epoch": 1.8220025985022117, "grad_norm": 0.5254889462492559, "learning_rate": 2.1814003784806705e-05, "loss": 0.4131, "step": 62405 }, { "epoch": 1.822148581772529, "grad_norm": 0.4610294597321306, "learning_rate": 2.1811300351446336e-05, "loss": 0.4438, "step": 62410 }, { "epoch": 1.8222945650428461, "grad_norm": 0.47948343270168425, "learning_rate": 2.180859691808597e-05, "loss": 0.4483, "step": 62415 }, { "epoch": 1.8224405483131632, "grad_norm": 0.4832292172051537, "learning_rate": 2.1805893484725604e-05, "loss": 0.4541, "step": 62420 }, { "epoch": 1.8225865315834806, "grad_norm": 0.5065636624538978, "learning_rate": 2.1803190051365234e-05, "loss": 0.4452, "step": 62425 }, { "epoch": 1.8227325148537976, "grad_norm": 0.46912394237491006, "learning_rate": 2.1800486618004868e-05, "loss": 0.4546, "step": 62430 }, { "epoch": 1.822878498124115, "grad_norm": 0.4470705757430273, "learning_rate": 2.17977831846445e-05, "loss": 0.457, "step": 62435 }, { "epoch": 1.823024481394432, "grad_norm": 0.4672153466294485, "learning_rate": 2.179507975128413e-05, "loss": 0.4395, "step": 62440 }, { "epoch": 1.8231704646647495, "grad_norm": 0.4535248411141423, "learning_rate": 2.1792376317923767e-05, "loss": 0.4242, "step": 62445 }, { "epoch": 1.8233164479350665, "grad_norm": 0.48794332035997556, "learning_rate": 2.1789672884563397e-05, "loss": 0.4414, "step": 62450 }, { "epoch": 1.823462431205384, "grad_norm": 0.4751111317730928, "learning_rate": 2.1786969451203028e-05, "loss": 0.4375, "step": 62455 }, { "epoch": 1.823608414475701, "grad_norm": 0.4915775364230506, "learning_rate": 2.1784266017842662e-05, "loss": 0.4381, "step": 62460 }, { "epoch": 1.8237543977460184, "grad_norm": 0.4982984822310313, "learning_rate": 2.1781562584482293e-05, "loss": 0.4305, "step": 62465 }, { "epoch": 1.8239003810163354, "grad_norm": 0.46535729337829346, "learning_rate": 2.1778859151121923e-05, "loss": 0.4516, "step": 62470 }, { "epoch": 1.824046364286653, "grad_norm": 0.46458988079448366, "learning_rate": 2.177615571776156e-05, "loss": 0.4263, "step": 62475 }, { "epoch": 1.82419234755697, "grad_norm": 0.4560002786189917, "learning_rate": 2.177345228440119e-05, "loss": 0.4281, "step": 62480 }, { "epoch": 1.8243383308272874, "grad_norm": 0.47718017085579195, "learning_rate": 2.1770748851040822e-05, "loss": 0.4347, "step": 62485 }, { "epoch": 1.8244843140976044, "grad_norm": 0.5015921762080487, "learning_rate": 2.1768045417680456e-05, "loss": 0.4488, "step": 62490 }, { "epoch": 1.8246302973679216, "grad_norm": 0.482740512029779, "learning_rate": 2.1765341984320087e-05, "loss": 0.4361, "step": 62495 }, { "epoch": 1.8247762806382388, "grad_norm": 0.4641798724412186, "learning_rate": 2.1762638550959717e-05, "loss": 0.4224, "step": 62500 }, { "epoch": 1.824922263908556, "grad_norm": 0.48659941619162483, "learning_rate": 2.1759935117599355e-05, "loss": 0.4124, "step": 62505 }, { "epoch": 1.8250682471788733, "grad_norm": 0.4689675172632853, "learning_rate": 2.1757231684238985e-05, "loss": 0.4474, "step": 62510 }, { "epoch": 1.8252142304491905, "grad_norm": 0.448414038836869, "learning_rate": 2.1754528250878616e-05, "loss": 0.4356, "step": 62515 }, { "epoch": 1.8253602137195077, "grad_norm": 0.49745157587198646, "learning_rate": 2.175182481751825e-05, "loss": 0.4498, "step": 62520 }, { "epoch": 1.825506196989825, "grad_norm": 0.4429453902594463, "learning_rate": 2.174912138415788e-05, "loss": 0.4352, "step": 62525 }, { "epoch": 1.8256521802601422, "grad_norm": 0.4760143015492704, "learning_rate": 2.1746417950797515e-05, "loss": 0.4296, "step": 62530 }, { "epoch": 1.8257981635304594, "grad_norm": 0.5403047489230562, "learning_rate": 2.174371451743715e-05, "loss": 0.4707, "step": 62535 }, { "epoch": 1.8259441468007767, "grad_norm": 0.45245610233272376, "learning_rate": 2.174101108407678e-05, "loss": 0.4364, "step": 62540 }, { "epoch": 1.8260901300710939, "grad_norm": 0.44269460471908584, "learning_rate": 2.173830765071641e-05, "loss": 0.4442, "step": 62545 }, { "epoch": 1.8262361133414111, "grad_norm": 0.47627069638609715, "learning_rate": 2.1735604217356044e-05, "loss": 0.4586, "step": 62550 }, { "epoch": 1.8263820966117283, "grad_norm": 0.4609447765353593, "learning_rate": 2.1732900783995674e-05, "loss": 0.4291, "step": 62555 }, { "epoch": 1.8265280798820456, "grad_norm": 0.48913507744401413, "learning_rate": 2.173019735063531e-05, "loss": 0.4173, "step": 62560 }, { "epoch": 1.8266740631523626, "grad_norm": 0.4888160939139551, "learning_rate": 2.1727493917274942e-05, "loss": 0.449, "step": 62565 }, { "epoch": 1.82682004642268, "grad_norm": 0.4672591369946901, "learning_rate": 2.1724790483914573e-05, "loss": 0.4613, "step": 62570 }, { "epoch": 1.826966029692997, "grad_norm": 0.4689671783806216, "learning_rate": 2.1722087050554204e-05, "loss": 0.4456, "step": 62575 }, { "epoch": 1.8271120129633145, "grad_norm": 0.4984724711759228, "learning_rate": 2.1719383617193838e-05, "loss": 0.4223, "step": 62580 }, { "epoch": 1.8272579962336315, "grad_norm": 0.47319206994955965, "learning_rate": 2.171668018383347e-05, "loss": 0.4353, "step": 62585 }, { "epoch": 1.827403979503949, "grad_norm": 0.4665088086971665, "learning_rate": 2.1713976750473102e-05, "loss": 0.4336, "step": 62590 }, { "epoch": 1.827549962774266, "grad_norm": 0.4594176517524144, "learning_rate": 2.1711273317112736e-05, "loss": 0.4209, "step": 62595 }, { "epoch": 1.8276959460445834, "grad_norm": 0.4299758757748378, "learning_rate": 2.1708569883752367e-05, "loss": 0.433, "step": 62600 }, { "epoch": 1.8278419293149004, "grad_norm": 0.45056121549573735, "learning_rate": 2.1705866450391998e-05, "loss": 0.4155, "step": 62605 }, { "epoch": 1.8279879125852179, "grad_norm": 0.462696290441514, "learning_rate": 2.170316301703163e-05, "loss": 0.419, "step": 62610 }, { "epoch": 1.8281338958555349, "grad_norm": 0.5273150094106395, "learning_rate": 2.1700459583671266e-05, "loss": 0.4264, "step": 62615 }, { "epoch": 1.8282798791258523, "grad_norm": 0.46107849799441925, "learning_rate": 2.1697756150310896e-05, "loss": 0.4407, "step": 62620 }, { "epoch": 1.8284258623961693, "grad_norm": 0.4391786656916674, "learning_rate": 2.169505271695053e-05, "loss": 0.4354, "step": 62625 }, { "epoch": 1.8285718456664868, "grad_norm": 0.4758769481774941, "learning_rate": 2.169234928359016e-05, "loss": 0.446, "step": 62630 }, { "epoch": 1.8287178289368038, "grad_norm": 0.49617528437758457, "learning_rate": 2.168964585022979e-05, "loss": 0.454, "step": 62635 }, { "epoch": 1.828863812207121, "grad_norm": 0.48546317412891005, "learning_rate": 2.1686942416869425e-05, "loss": 0.4495, "step": 62640 }, { "epoch": 1.8290097954774382, "grad_norm": 0.4768819432812759, "learning_rate": 2.168423898350906e-05, "loss": 0.4473, "step": 62645 }, { "epoch": 1.8291557787477555, "grad_norm": 0.393221342939978, "learning_rate": 2.168153555014869e-05, "loss": 0.4088, "step": 62650 }, { "epoch": 1.8293017620180727, "grad_norm": 0.4881853956356559, "learning_rate": 2.167883211678832e-05, "loss": 0.447, "step": 62655 }, { "epoch": 1.82944774528839, "grad_norm": 0.4922742140252252, "learning_rate": 2.1676128683427955e-05, "loss": 0.4172, "step": 62660 }, { "epoch": 1.8295937285587072, "grad_norm": 0.5136873666860735, "learning_rate": 2.1673425250067585e-05, "loss": 0.4436, "step": 62665 }, { "epoch": 1.8297397118290244, "grad_norm": 0.47539232071395243, "learning_rate": 2.167072181670722e-05, "loss": 0.427, "step": 62670 }, { "epoch": 1.8298856950993416, "grad_norm": 0.4641736651737967, "learning_rate": 2.1668018383346853e-05, "loss": 0.4229, "step": 62675 }, { "epoch": 1.8300316783696589, "grad_norm": 0.46054435094432145, "learning_rate": 2.1665314949986484e-05, "loss": 0.4436, "step": 62680 }, { "epoch": 1.830177661639976, "grad_norm": 0.4694197881963668, "learning_rate": 2.1662611516626115e-05, "loss": 0.434, "step": 62685 }, { "epoch": 1.8303236449102933, "grad_norm": 0.46747639149076803, "learning_rate": 2.165990808326575e-05, "loss": 0.4881, "step": 62690 }, { "epoch": 1.8304696281806105, "grad_norm": 0.46326643960716235, "learning_rate": 2.165720464990538e-05, "loss": 0.4306, "step": 62695 }, { "epoch": 1.8306156114509278, "grad_norm": 0.4735556301507108, "learning_rate": 2.1654501216545013e-05, "loss": 0.4483, "step": 62700 }, { "epoch": 1.830761594721245, "grad_norm": 0.4945068434136905, "learning_rate": 2.1651797783184647e-05, "loss": 0.462, "step": 62705 }, { "epoch": 1.830907577991562, "grad_norm": 0.5295862993939356, "learning_rate": 2.1649094349824278e-05, "loss": 0.4377, "step": 62710 }, { "epoch": 1.8310535612618795, "grad_norm": 0.4557732846843135, "learning_rate": 2.164639091646391e-05, "loss": 0.4385, "step": 62715 }, { "epoch": 1.8311995445321965, "grad_norm": 0.472842459949868, "learning_rate": 2.1643687483103543e-05, "loss": 0.4567, "step": 62720 }, { "epoch": 1.831345527802514, "grad_norm": 0.4600171323277963, "learning_rate": 2.1640984049743173e-05, "loss": 0.4148, "step": 62725 }, { "epoch": 1.831491511072831, "grad_norm": 0.44388963725078007, "learning_rate": 2.1638280616382807e-05, "loss": 0.405, "step": 62730 }, { "epoch": 1.8316374943431484, "grad_norm": 0.43354273836403134, "learning_rate": 2.163557718302244e-05, "loss": 0.4557, "step": 62735 }, { "epoch": 1.8317834776134654, "grad_norm": 0.4778984458533936, "learning_rate": 2.1632873749662072e-05, "loss": 0.4295, "step": 62740 }, { "epoch": 1.8319294608837828, "grad_norm": 0.4425131963892422, "learning_rate": 2.1630170316301702e-05, "loss": 0.41, "step": 62745 }, { "epoch": 1.8320754441540998, "grad_norm": 0.42567791751486606, "learning_rate": 2.1627466882941336e-05, "loss": 0.414, "step": 62750 }, { "epoch": 1.8322214274244173, "grad_norm": 0.4666860382444187, "learning_rate": 2.1624763449580967e-05, "loss": 0.4597, "step": 62755 }, { "epoch": 1.8323674106947343, "grad_norm": 0.48088079146434454, "learning_rate": 2.16220600162206e-05, "loss": 0.4204, "step": 62760 }, { "epoch": 1.8325133939650518, "grad_norm": 0.49821489586239215, "learning_rate": 2.1619356582860235e-05, "loss": 0.4441, "step": 62765 }, { "epoch": 1.8326593772353688, "grad_norm": 0.4679949721222889, "learning_rate": 2.1616653149499866e-05, "loss": 0.4372, "step": 62770 }, { "epoch": 1.8328053605056862, "grad_norm": 0.4111808806061903, "learning_rate": 2.1613949716139496e-05, "loss": 0.4025, "step": 62775 }, { "epoch": 1.8329513437760032, "grad_norm": 0.5067395891385075, "learning_rate": 2.161124628277913e-05, "loss": 0.4248, "step": 62780 }, { "epoch": 1.8330973270463204, "grad_norm": 0.4521508334353167, "learning_rate": 2.1608542849418764e-05, "loss": 0.4484, "step": 62785 }, { "epoch": 1.8332433103166377, "grad_norm": 0.43299537383001746, "learning_rate": 2.1605839416058395e-05, "loss": 0.442, "step": 62790 }, { "epoch": 1.833389293586955, "grad_norm": 0.4697900812681173, "learning_rate": 2.160313598269803e-05, "loss": 0.4234, "step": 62795 }, { "epoch": 1.8335352768572721, "grad_norm": 0.4911989173358307, "learning_rate": 2.160043254933766e-05, "loss": 0.4168, "step": 62800 }, { "epoch": 1.8336812601275894, "grad_norm": 0.4849457126701538, "learning_rate": 2.159772911597729e-05, "loss": 0.4568, "step": 62805 }, { "epoch": 1.8338272433979066, "grad_norm": 0.4642830646680742, "learning_rate": 2.1595025682616924e-05, "loss": 0.4364, "step": 62810 }, { "epoch": 1.8339732266682238, "grad_norm": 0.4620863531993163, "learning_rate": 2.1592322249256558e-05, "loss": 0.411, "step": 62815 }, { "epoch": 1.834119209938541, "grad_norm": 0.4844602126816977, "learning_rate": 2.158961881589619e-05, "loss": 0.4433, "step": 62820 }, { "epoch": 1.8342651932088583, "grad_norm": 0.44755935392618434, "learning_rate": 2.1586915382535823e-05, "loss": 0.4564, "step": 62825 }, { "epoch": 1.8344111764791755, "grad_norm": 0.47878578540282235, "learning_rate": 2.1584211949175454e-05, "loss": 0.4353, "step": 62830 }, { "epoch": 1.8345571597494927, "grad_norm": 0.45534090513843517, "learning_rate": 2.1581508515815084e-05, "loss": 0.401, "step": 62835 }, { "epoch": 1.83470314301981, "grad_norm": 0.4479542859263585, "learning_rate": 2.1578805082454718e-05, "loss": 0.4545, "step": 62840 }, { "epoch": 1.8348491262901272, "grad_norm": 0.4541403008412395, "learning_rate": 2.1576101649094352e-05, "loss": 0.4527, "step": 62845 }, { "epoch": 1.8349951095604444, "grad_norm": 0.47844751175155703, "learning_rate": 2.1573398215733983e-05, "loss": 0.4267, "step": 62850 }, { "epoch": 1.8351410928307614, "grad_norm": 0.48610416067784, "learning_rate": 2.1570694782373617e-05, "loss": 0.4219, "step": 62855 }, { "epoch": 1.8352870761010789, "grad_norm": 0.4446015876110646, "learning_rate": 2.1567991349013247e-05, "loss": 0.4317, "step": 62860 }, { "epoch": 1.835433059371396, "grad_norm": 0.466736809310463, "learning_rate": 2.1565287915652878e-05, "loss": 0.4455, "step": 62865 }, { "epoch": 1.8355790426417133, "grad_norm": 0.5030579467724936, "learning_rate": 2.1562584482292515e-05, "loss": 0.4614, "step": 62870 }, { "epoch": 1.8357250259120304, "grad_norm": 0.5011501819476597, "learning_rate": 2.1559881048932146e-05, "loss": 0.4198, "step": 62875 }, { "epoch": 1.8358710091823478, "grad_norm": 0.45665559316871424, "learning_rate": 2.1557177615571777e-05, "loss": 0.4133, "step": 62880 }, { "epoch": 1.8360169924526648, "grad_norm": 0.4704536788429925, "learning_rate": 2.155447418221141e-05, "loss": 0.4408, "step": 62885 }, { "epoch": 1.8361629757229823, "grad_norm": 0.4880897005347294, "learning_rate": 2.155177074885104e-05, "loss": 0.4395, "step": 62890 }, { "epoch": 1.8363089589932993, "grad_norm": 0.4664170594509058, "learning_rate": 2.1549067315490672e-05, "loss": 0.4408, "step": 62895 }, { "epoch": 1.8364549422636167, "grad_norm": 0.4676028916108966, "learning_rate": 2.1546363882130306e-05, "loss": 0.4085, "step": 62900 }, { "epoch": 1.8366009255339337, "grad_norm": 0.5238773834806523, "learning_rate": 2.154366044876994e-05, "loss": 0.4699, "step": 62905 }, { "epoch": 1.8367469088042512, "grad_norm": 0.4530741708611304, "learning_rate": 2.154095701540957e-05, "loss": 0.4325, "step": 62910 }, { "epoch": 1.8368928920745682, "grad_norm": 0.5002911938114462, "learning_rate": 2.1538253582049205e-05, "loss": 0.4678, "step": 62915 }, { "epoch": 1.8370388753448856, "grad_norm": 0.4700739233511883, "learning_rate": 2.1535550148688835e-05, "loss": 0.4531, "step": 62920 }, { "epoch": 1.8371848586152026, "grad_norm": 0.48989531098274547, "learning_rate": 2.1532846715328466e-05, "loss": 0.4584, "step": 62925 }, { "epoch": 1.8373308418855199, "grad_norm": 0.4685746270177156, "learning_rate": 2.15301432819681e-05, "loss": 0.4279, "step": 62930 }, { "epoch": 1.837476825155837, "grad_norm": 0.4553093445588456, "learning_rate": 2.1527439848607734e-05, "loss": 0.443, "step": 62935 }, { "epoch": 1.8376228084261543, "grad_norm": 0.473787134358528, "learning_rate": 2.1524736415247365e-05, "loss": 0.4356, "step": 62940 }, { "epoch": 1.8377687916964716, "grad_norm": 0.4743302019421062, "learning_rate": 2.1522032981887e-05, "loss": 0.4315, "step": 62945 }, { "epoch": 1.8379147749667888, "grad_norm": 0.47030745610900493, "learning_rate": 2.151932954852663e-05, "loss": 0.43, "step": 62950 }, { "epoch": 1.838060758237106, "grad_norm": 0.49925507026263544, "learning_rate": 2.1516626115166263e-05, "loss": 0.4388, "step": 62955 }, { "epoch": 1.8382067415074232, "grad_norm": 0.5119240098679402, "learning_rate": 2.1513922681805894e-05, "loss": 0.444, "step": 62960 }, { "epoch": 1.8383527247777405, "grad_norm": 0.5006558306055338, "learning_rate": 2.1511219248445528e-05, "loss": 0.4636, "step": 62965 }, { "epoch": 1.8384987080480577, "grad_norm": 0.4953364087667069, "learning_rate": 2.150851581508516e-05, "loss": 0.4508, "step": 62970 }, { "epoch": 1.838644691318375, "grad_norm": 0.45592371342392113, "learning_rate": 2.1505812381724792e-05, "loss": 0.4156, "step": 62975 }, { "epoch": 1.8387906745886922, "grad_norm": 0.4732647061530096, "learning_rate": 2.1503108948364423e-05, "loss": 0.4328, "step": 62980 }, { "epoch": 1.8389366578590094, "grad_norm": 0.42419836319570176, "learning_rate": 2.1500405515004057e-05, "loss": 0.4289, "step": 62985 }, { "epoch": 1.8390826411293266, "grad_norm": 0.47196609943419726, "learning_rate": 2.1497702081643688e-05, "loss": 0.4378, "step": 62990 }, { "epoch": 1.8392286243996439, "grad_norm": 0.4710018654717741, "learning_rate": 2.1494998648283322e-05, "loss": 0.4568, "step": 62995 }, { "epoch": 1.8393746076699609, "grad_norm": 0.5016343583647371, "learning_rate": 2.1492295214922952e-05, "loss": 0.4524, "step": 63000 }, { "epoch": 1.8395205909402783, "grad_norm": 0.45275098052721313, "learning_rate": 2.1489591781562586e-05, "loss": 0.4265, "step": 63005 }, { "epoch": 1.8396665742105953, "grad_norm": 0.467018687158303, "learning_rate": 2.1486888348202217e-05, "loss": 0.4313, "step": 63010 }, { "epoch": 1.8398125574809128, "grad_norm": 0.43722407055867324, "learning_rate": 2.148418491484185e-05, "loss": 0.4279, "step": 63015 }, { "epoch": 1.8399585407512298, "grad_norm": 0.44386058675929563, "learning_rate": 2.148148148148148e-05, "loss": 0.4154, "step": 63020 }, { "epoch": 1.8401045240215472, "grad_norm": 0.46583814802140205, "learning_rate": 2.1478778048121116e-05, "loss": 0.4506, "step": 63025 }, { "epoch": 1.8402505072918642, "grad_norm": 0.4348419887899876, "learning_rate": 2.1476074614760746e-05, "loss": 0.4028, "step": 63030 }, { "epoch": 1.8403964905621817, "grad_norm": 0.48984709666413084, "learning_rate": 2.1473371181400377e-05, "loss": 0.4318, "step": 63035 }, { "epoch": 1.8405424738324987, "grad_norm": 0.44223345262030245, "learning_rate": 2.1470667748040014e-05, "loss": 0.4545, "step": 63040 }, { "epoch": 1.8406884571028161, "grad_norm": 0.4582148613586315, "learning_rate": 2.1467964314679645e-05, "loss": 0.4214, "step": 63045 }, { "epoch": 1.8408344403731332, "grad_norm": 0.47431081372684825, "learning_rate": 2.1465260881319276e-05, "loss": 0.4099, "step": 63050 }, { "epoch": 1.8409804236434506, "grad_norm": 0.46987515830748594, "learning_rate": 2.146255744795891e-05, "loss": 0.4357, "step": 63055 }, { "epoch": 1.8411264069137676, "grad_norm": 0.4778877902482961, "learning_rate": 2.145985401459854e-05, "loss": 0.4275, "step": 63060 }, { "epoch": 1.841272390184085, "grad_norm": 0.4416920611284718, "learning_rate": 2.145715058123817e-05, "loss": 0.4265, "step": 63065 }, { "epoch": 1.841418373454402, "grad_norm": 0.4452377107941582, "learning_rate": 2.1454447147877808e-05, "loss": 0.4392, "step": 63070 }, { "epoch": 1.8415643567247193, "grad_norm": 0.4339000017968843, "learning_rate": 2.145174371451744e-05, "loss": 0.3915, "step": 63075 }, { "epoch": 1.8417103399950365, "grad_norm": 0.4552342592134896, "learning_rate": 2.144904028115707e-05, "loss": 0.4496, "step": 63080 }, { "epoch": 1.8418563232653538, "grad_norm": 0.4648927386803185, "learning_rate": 2.1446336847796703e-05, "loss": 0.4127, "step": 63085 }, { "epoch": 1.842002306535671, "grad_norm": 0.4728386692171227, "learning_rate": 2.1443633414436334e-05, "loss": 0.4383, "step": 63090 }, { "epoch": 1.8421482898059882, "grad_norm": 0.4588831436870615, "learning_rate": 2.1440929981075965e-05, "loss": 0.4153, "step": 63095 }, { "epoch": 1.8422942730763054, "grad_norm": 0.43416315314684234, "learning_rate": 2.1438226547715602e-05, "loss": 0.4432, "step": 63100 }, { "epoch": 1.8424402563466227, "grad_norm": 0.4611490227168159, "learning_rate": 2.1435523114355233e-05, "loss": 0.4249, "step": 63105 }, { "epoch": 1.84258623961694, "grad_norm": 0.47893341608909024, "learning_rate": 2.1432819680994863e-05, "loss": 0.4681, "step": 63110 }, { "epoch": 1.8427322228872571, "grad_norm": 0.46069038844606286, "learning_rate": 2.1430116247634497e-05, "loss": 0.4255, "step": 63115 }, { "epoch": 1.8428782061575744, "grad_norm": 0.47421571634846915, "learning_rate": 2.1427412814274128e-05, "loss": 0.4297, "step": 63120 }, { "epoch": 1.8430241894278916, "grad_norm": 0.4756395617331648, "learning_rate": 2.1424709380913762e-05, "loss": 0.4499, "step": 63125 }, { "epoch": 1.8431701726982088, "grad_norm": 0.5213934245532748, "learning_rate": 2.1422005947553396e-05, "loss": 0.4291, "step": 63130 }, { "epoch": 1.843316155968526, "grad_norm": 0.47616995856002264, "learning_rate": 2.1419302514193027e-05, "loss": 0.4179, "step": 63135 }, { "epoch": 1.8434621392388433, "grad_norm": 0.4323329846132473, "learning_rate": 2.1416599080832657e-05, "loss": 0.4129, "step": 63140 }, { "epoch": 1.8436081225091603, "grad_norm": 0.4528828932452525, "learning_rate": 2.141389564747229e-05, "loss": 0.4341, "step": 63145 }, { "epoch": 1.8437541057794777, "grad_norm": 0.4341926758089506, "learning_rate": 2.1411192214111922e-05, "loss": 0.4062, "step": 63150 }, { "epoch": 1.8439000890497947, "grad_norm": 0.5066116838277887, "learning_rate": 2.1408488780751556e-05, "loss": 0.4357, "step": 63155 }, { "epoch": 1.8440460723201122, "grad_norm": 0.45257213634778165, "learning_rate": 2.140578534739119e-05, "loss": 0.4078, "step": 63160 }, { "epoch": 1.8441920555904292, "grad_norm": 0.4447610218477061, "learning_rate": 2.140308191403082e-05, "loss": 0.4426, "step": 63165 }, { "epoch": 1.8443380388607467, "grad_norm": 0.5028840833591386, "learning_rate": 2.140037848067045e-05, "loss": 0.4417, "step": 63170 }, { "epoch": 1.8444840221310637, "grad_norm": 0.45926112667785013, "learning_rate": 2.1397675047310085e-05, "loss": 0.4227, "step": 63175 }, { "epoch": 1.8446300054013811, "grad_norm": 0.45241058931236083, "learning_rate": 2.1394971613949716e-05, "loss": 0.4187, "step": 63180 }, { "epoch": 1.8447759886716981, "grad_norm": 0.47872724287689994, "learning_rate": 2.139226818058935e-05, "loss": 0.4442, "step": 63185 }, { "epoch": 1.8449219719420156, "grad_norm": 0.4722627687305312, "learning_rate": 2.1389564747228984e-05, "loss": 0.4394, "step": 63190 }, { "epoch": 1.8450679552123326, "grad_norm": 0.4757724375843699, "learning_rate": 2.1386861313868614e-05, "loss": 0.417, "step": 63195 }, { "epoch": 1.84521393848265, "grad_norm": 0.4720480046277469, "learning_rate": 2.1384157880508245e-05, "loss": 0.4396, "step": 63200 }, { "epoch": 1.845359921752967, "grad_norm": 0.4932589812270805, "learning_rate": 2.138145444714788e-05, "loss": 0.4133, "step": 63205 }, { "epoch": 1.8455059050232845, "grad_norm": 0.5116880130585717, "learning_rate": 2.1378751013787513e-05, "loss": 0.4616, "step": 63210 }, { "epoch": 1.8456518882936015, "grad_norm": 0.456786750694663, "learning_rate": 2.1376047580427144e-05, "loss": 0.4205, "step": 63215 }, { "epoch": 1.8457978715639187, "grad_norm": 0.4759397190403565, "learning_rate": 2.1373344147066778e-05, "loss": 0.4321, "step": 63220 }, { "epoch": 1.845943854834236, "grad_norm": 0.47713888819873757, "learning_rate": 2.137064071370641e-05, "loss": 0.4305, "step": 63225 }, { "epoch": 1.8460898381045532, "grad_norm": 0.44086246616414004, "learning_rate": 2.136793728034604e-05, "loss": 0.4213, "step": 63230 }, { "epoch": 1.8462358213748704, "grad_norm": 0.484982580150312, "learning_rate": 2.1365233846985673e-05, "loss": 0.4278, "step": 63235 }, { "epoch": 1.8463818046451876, "grad_norm": 0.4402640412949319, "learning_rate": 2.1362530413625307e-05, "loss": 0.4379, "step": 63240 }, { "epoch": 1.8465277879155049, "grad_norm": 0.47096612788070114, "learning_rate": 2.1359826980264938e-05, "loss": 0.4194, "step": 63245 }, { "epoch": 1.846673771185822, "grad_norm": 0.45528763536389144, "learning_rate": 2.135712354690457e-05, "loss": 0.418, "step": 63250 }, { "epoch": 1.8468197544561393, "grad_norm": 0.4991301904707215, "learning_rate": 2.1354420113544202e-05, "loss": 0.4409, "step": 63255 }, { "epoch": 1.8469657377264566, "grad_norm": 0.46617285064847225, "learning_rate": 2.1351716680183833e-05, "loss": 0.4539, "step": 63260 }, { "epoch": 1.8471117209967738, "grad_norm": 0.4996839030110328, "learning_rate": 2.1349013246823467e-05, "loss": 0.4523, "step": 63265 }, { "epoch": 1.847257704267091, "grad_norm": 0.43617469728731206, "learning_rate": 2.13463098134631e-05, "loss": 0.4461, "step": 63270 }, { "epoch": 1.8474036875374082, "grad_norm": 0.4943396976437374, "learning_rate": 2.134360638010273e-05, "loss": 0.4377, "step": 63275 }, { "epoch": 1.8475496708077255, "grad_norm": 0.49143299378545063, "learning_rate": 2.1340902946742362e-05, "loss": 0.417, "step": 63280 }, { "epoch": 1.8476956540780427, "grad_norm": 0.47884395121124096, "learning_rate": 2.1338199513381996e-05, "loss": 0.444, "step": 63285 }, { "epoch": 1.84784163734836, "grad_norm": 0.4962853387024381, "learning_rate": 2.1335496080021627e-05, "loss": 0.4762, "step": 63290 }, { "epoch": 1.8479876206186772, "grad_norm": 0.4438231630077998, "learning_rate": 2.133279264666126e-05, "loss": 0.4323, "step": 63295 }, { "epoch": 1.8481336038889942, "grad_norm": 0.4724170150554386, "learning_rate": 2.1330089213300895e-05, "loss": 0.4221, "step": 63300 }, { "epoch": 1.8482795871593116, "grad_norm": 0.47080585200480785, "learning_rate": 2.1327385779940525e-05, "loss": 0.4333, "step": 63305 }, { "epoch": 1.8484255704296286, "grad_norm": 0.47297871120182633, "learning_rate": 2.1324682346580156e-05, "loss": 0.4447, "step": 63310 }, { "epoch": 1.848571553699946, "grad_norm": 0.4622349618082653, "learning_rate": 2.132197891321979e-05, "loss": 0.4462, "step": 63315 }, { "epoch": 1.848717536970263, "grad_norm": 0.42931818802696475, "learning_rate": 2.131927547985942e-05, "loss": 0.4117, "step": 63320 }, { "epoch": 1.8488635202405805, "grad_norm": 0.47807663701390524, "learning_rate": 2.1316572046499055e-05, "loss": 0.4572, "step": 63325 }, { "epoch": 1.8490095035108975, "grad_norm": 0.4666909658954357, "learning_rate": 2.131386861313869e-05, "loss": 0.4466, "step": 63330 }, { "epoch": 1.849155486781215, "grad_norm": 0.511981148675476, "learning_rate": 2.131116517977832e-05, "loss": 0.452, "step": 63335 }, { "epoch": 1.849301470051532, "grad_norm": 0.4475297880835008, "learning_rate": 2.130846174641795e-05, "loss": 0.4295, "step": 63340 }, { "epoch": 1.8494474533218495, "grad_norm": 0.458622601579339, "learning_rate": 2.1305758313057584e-05, "loss": 0.432, "step": 63345 }, { "epoch": 1.8495934365921665, "grad_norm": 0.47798782891365804, "learning_rate": 2.1303054879697215e-05, "loss": 0.4482, "step": 63350 }, { "epoch": 1.849739419862484, "grad_norm": 0.4755425492036228, "learning_rate": 2.130035144633685e-05, "loss": 0.4451, "step": 63355 }, { "epoch": 1.849885403132801, "grad_norm": 0.509987526273295, "learning_rate": 2.1297648012976483e-05, "loss": 0.4473, "step": 63360 }, { "epoch": 1.8500313864031181, "grad_norm": 0.44619588392773774, "learning_rate": 2.1294944579616113e-05, "loss": 0.4451, "step": 63365 }, { "epoch": 1.8501773696734354, "grad_norm": 0.47238626198201894, "learning_rate": 2.1292241146255744e-05, "loss": 0.4401, "step": 63370 }, { "epoch": 1.8503233529437526, "grad_norm": 0.5185041996408883, "learning_rate": 2.1289537712895378e-05, "loss": 0.447, "step": 63375 }, { "epoch": 1.8504693362140698, "grad_norm": 0.4283491909611087, "learning_rate": 2.1286834279535012e-05, "loss": 0.425, "step": 63380 }, { "epoch": 1.850615319484387, "grad_norm": 0.4739019786265291, "learning_rate": 2.1284130846174642e-05, "loss": 0.4221, "step": 63385 }, { "epoch": 1.8507613027547043, "grad_norm": 0.4522448288340067, "learning_rate": 2.1281427412814276e-05, "loss": 0.4503, "step": 63390 }, { "epoch": 1.8509072860250215, "grad_norm": 0.48887759594943214, "learning_rate": 2.1278723979453907e-05, "loss": 0.4298, "step": 63395 }, { "epoch": 1.8510532692953388, "grad_norm": 0.4693649606717114, "learning_rate": 2.1276020546093538e-05, "loss": 0.4368, "step": 63400 }, { "epoch": 1.851199252565656, "grad_norm": 0.46428386397535965, "learning_rate": 2.1273317112733172e-05, "loss": 0.4562, "step": 63405 }, { "epoch": 1.8513452358359732, "grad_norm": 0.49703712454324483, "learning_rate": 2.1270613679372806e-05, "loss": 0.4291, "step": 63410 }, { "epoch": 1.8514912191062904, "grad_norm": 0.42059769051189455, "learning_rate": 2.1267910246012436e-05, "loss": 0.4146, "step": 63415 }, { "epoch": 1.8516372023766077, "grad_norm": 0.48982441283946704, "learning_rate": 2.126520681265207e-05, "loss": 0.4572, "step": 63420 }, { "epoch": 1.851783185646925, "grad_norm": 0.47284081519983895, "learning_rate": 2.12625033792917e-05, "loss": 0.4027, "step": 63425 }, { "epoch": 1.8519291689172421, "grad_norm": 0.45930406254566813, "learning_rate": 2.125979994593133e-05, "loss": 0.4141, "step": 63430 }, { "epoch": 1.8520751521875594, "grad_norm": 0.48317834951504013, "learning_rate": 2.1257096512570966e-05, "loss": 0.4411, "step": 63435 }, { "epoch": 1.8522211354578766, "grad_norm": 0.4989287206350935, "learning_rate": 2.12543930792106e-05, "loss": 0.4551, "step": 63440 }, { "epoch": 1.8523671187281936, "grad_norm": 0.4309292830246462, "learning_rate": 2.125168964585023e-05, "loss": 0.4145, "step": 63445 }, { "epoch": 1.852513101998511, "grad_norm": 0.47269800329059913, "learning_rate": 2.1248986212489864e-05, "loss": 0.4449, "step": 63450 }, { "epoch": 1.852659085268828, "grad_norm": 0.4638458364169805, "learning_rate": 2.1246282779129495e-05, "loss": 0.419, "step": 63455 }, { "epoch": 1.8528050685391455, "grad_norm": 0.44791039077677997, "learning_rate": 2.1243579345769126e-05, "loss": 0.4386, "step": 63460 }, { "epoch": 1.8529510518094625, "grad_norm": 0.49464317308722505, "learning_rate": 2.1240875912408763e-05, "loss": 0.4509, "step": 63465 }, { "epoch": 1.85309703507978, "grad_norm": 0.5096623616305634, "learning_rate": 2.1238172479048394e-05, "loss": 0.4398, "step": 63470 }, { "epoch": 1.853243018350097, "grad_norm": 0.4678224163677381, "learning_rate": 2.1235469045688024e-05, "loss": 0.4564, "step": 63475 }, { "epoch": 1.8533890016204144, "grad_norm": 0.4410303072698728, "learning_rate": 2.1232765612327658e-05, "loss": 0.4291, "step": 63480 }, { "epoch": 1.8535349848907314, "grad_norm": 0.47685683842642457, "learning_rate": 2.123006217896729e-05, "loss": 0.4154, "step": 63485 }, { "epoch": 1.8536809681610489, "grad_norm": 0.4278835916509605, "learning_rate": 2.122735874560692e-05, "loss": 0.4221, "step": 63490 }, { "epoch": 1.8538269514313659, "grad_norm": 0.49566077049535123, "learning_rate": 2.1224655312246557e-05, "loss": 0.4386, "step": 63495 }, { "epoch": 1.8539729347016833, "grad_norm": 0.4776287173511954, "learning_rate": 2.1221951878886187e-05, "loss": 0.4251, "step": 63500 }, { "epoch": 1.8541481146260639, "grad_norm": 0.44167830932970975, "learning_rate": 2.1219248445525818e-05, "loss": 0.4415, "step": 63505 }, { "epoch": 1.854294097896381, "grad_norm": 0.4724282889178904, "learning_rate": 2.1216545012165452e-05, "loss": 0.4236, "step": 63510 }, { "epoch": 1.8544400811666983, "grad_norm": 0.47307565351050124, "learning_rate": 2.1213841578805083e-05, "loss": 0.4163, "step": 63515 }, { "epoch": 1.8545860644370156, "grad_norm": 0.4391724943039593, "learning_rate": 2.1211138145444713e-05, "loss": 0.4473, "step": 63520 }, { "epoch": 1.8547320477073326, "grad_norm": 0.46034917704117695, "learning_rate": 2.120843471208435e-05, "loss": 0.4135, "step": 63525 }, { "epoch": 1.85487803097765, "grad_norm": 0.4716445126786017, "learning_rate": 2.120573127872398e-05, "loss": 0.4226, "step": 63530 }, { "epoch": 1.855024014247967, "grad_norm": 0.46588551651786897, "learning_rate": 2.1203027845363612e-05, "loss": 0.437, "step": 63535 }, { "epoch": 1.8551699975182845, "grad_norm": 0.5170486834682828, "learning_rate": 2.1200324412003246e-05, "loss": 0.4591, "step": 63540 }, { "epoch": 1.8553159807886015, "grad_norm": 0.46372313975514806, "learning_rate": 2.1197620978642877e-05, "loss": 0.4189, "step": 63545 }, { "epoch": 1.855461964058919, "grad_norm": 0.49033375777536037, "learning_rate": 2.119491754528251e-05, "loss": 0.4192, "step": 63550 }, { "epoch": 1.855607947329236, "grad_norm": 0.43828171066924726, "learning_rate": 2.119221411192214e-05, "loss": 0.417, "step": 63555 }, { "epoch": 1.8557539305995534, "grad_norm": 0.47635517151120826, "learning_rate": 2.1189510678561775e-05, "loss": 0.4358, "step": 63560 }, { "epoch": 1.8558999138698704, "grad_norm": 0.4978161122390206, "learning_rate": 2.1186807245201406e-05, "loss": 0.4599, "step": 63565 }, { "epoch": 1.8560458971401879, "grad_norm": 0.445605606376615, "learning_rate": 2.118410381184104e-05, "loss": 0.4146, "step": 63570 }, { "epoch": 1.8561918804105049, "grad_norm": 0.5340192570142587, "learning_rate": 2.118140037848067e-05, "loss": 0.469, "step": 63575 }, { "epoch": 1.8563378636808223, "grad_norm": 0.5195990141173851, "learning_rate": 2.1178696945120305e-05, "loss": 0.4697, "step": 63580 }, { "epoch": 1.8564838469511393, "grad_norm": 0.4366625482043803, "learning_rate": 2.1175993511759935e-05, "loss": 0.4287, "step": 63585 }, { "epoch": 1.8566298302214568, "grad_norm": 0.4701660219053347, "learning_rate": 2.117329007839957e-05, "loss": 0.4197, "step": 63590 }, { "epoch": 1.8567758134917738, "grad_norm": 0.5000809559436281, "learning_rate": 2.11705866450392e-05, "loss": 0.429, "step": 63595 }, { "epoch": 1.856921796762091, "grad_norm": 0.4339718647090168, "learning_rate": 2.1167883211678834e-05, "loss": 0.4204, "step": 63600 }, { "epoch": 1.8570677800324082, "grad_norm": 0.47998746291671096, "learning_rate": 2.1165179778318464e-05, "loss": 0.4428, "step": 63605 }, { "epoch": 1.8572137633027255, "grad_norm": 0.46431746727934065, "learning_rate": 2.11624763449581e-05, "loss": 0.4244, "step": 63610 }, { "epoch": 1.8573597465730427, "grad_norm": 0.4358486507485605, "learning_rate": 2.115977291159773e-05, "loss": 0.4374, "step": 63615 }, { "epoch": 1.85750572984336, "grad_norm": 0.5293909423020262, "learning_rate": 2.1157069478237363e-05, "loss": 0.4546, "step": 63620 }, { "epoch": 1.8576517131136772, "grad_norm": 0.4470969830375389, "learning_rate": 2.1154366044876994e-05, "loss": 0.393, "step": 63625 }, { "epoch": 1.8577976963839944, "grad_norm": 0.48878170688472655, "learning_rate": 2.1151662611516628e-05, "loss": 0.4428, "step": 63630 }, { "epoch": 1.8579436796543116, "grad_norm": 0.4893959071239906, "learning_rate": 2.1148959178156262e-05, "loss": 0.4517, "step": 63635 }, { "epoch": 1.8580896629246288, "grad_norm": 0.45028543269160054, "learning_rate": 2.1146255744795892e-05, "loss": 0.4135, "step": 63640 }, { "epoch": 1.858235646194946, "grad_norm": 0.4402171240287573, "learning_rate": 2.1143552311435523e-05, "loss": 0.4393, "step": 63645 }, { "epoch": 1.8583816294652633, "grad_norm": 0.5011366338927474, "learning_rate": 2.1140848878075157e-05, "loss": 0.4102, "step": 63650 }, { "epoch": 1.8585276127355805, "grad_norm": 0.4815076785581964, "learning_rate": 2.1138145444714788e-05, "loss": 0.409, "step": 63655 }, { "epoch": 1.8586735960058978, "grad_norm": 0.5067842067976963, "learning_rate": 2.113544201135442e-05, "loss": 0.45, "step": 63660 }, { "epoch": 1.858819579276215, "grad_norm": 0.4402917965242073, "learning_rate": 2.1132738577994056e-05, "loss": 0.4471, "step": 63665 }, { "epoch": 1.858965562546532, "grad_norm": 0.4958388346831523, "learning_rate": 2.1130035144633686e-05, "loss": 0.4181, "step": 63670 }, { "epoch": 1.8591115458168495, "grad_norm": 0.4968623621153604, "learning_rate": 2.1127331711273317e-05, "loss": 0.4463, "step": 63675 }, { "epoch": 1.8592575290871665, "grad_norm": 0.45679918361049493, "learning_rate": 2.112462827791295e-05, "loss": 0.4467, "step": 63680 }, { "epoch": 1.859403512357484, "grad_norm": 0.45235494320508673, "learning_rate": 2.112192484455258e-05, "loss": 0.4348, "step": 63685 }, { "epoch": 1.859549495627801, "grad_norm": 0.4668928968310921, "learning_rate": 2.1119221411192212e-05, "loss": 0.4328, "step": 63690 }, { "epoch": 1.8596954788981184, "grad_norm": 0.4527222830579077, "learning_rate": 2.111651797783185e-05, "loss": 0.4188, "step": 63695 }, { "epoch": 1.8598414621684354, "grad_norm": 0.5017659026950714, "learning_rate": 2.111381454447148e-05, "loss": 0.4442, "step": 63700 }, { "epoch": 1.8599874454387528, "grad_norm": 0.4758063094251618, "learning_rate": 2.111111111111111e-05, "loss": 0.45, "step": 63705 }, { "epoch": 1.8601334287090698, "grad_norm": 0.47419970647955706, "learning_rate": 2.1108407677750745e-05, "loss": 0.4251, "step": 63710 }, { "epoch": 1.8602794119793873, "grad_norm": 0.5070385439089584, "learning_rate": 2.1105704244390375e-05, "loss": 0.4449, "step": 63715 }, { "epoch": 1.8604253952497043, "grad_norm": 0.47987650999195625, "learning_rate": 2.110300081103001e-05, "loss": 0.4303, "step": 63720 }, { "epoch": 1.8605713785200217, "grad_norm": 0.5402888778371638, "learning_rate": 2.1100297377669643e-05, "loss": 0.4382, "step": 63725 }, { "epoch": 1.8607173617903388, "grad_norm": 0.5007302778758548, "learning_rate": 2.1097593944309274e-05, "loss": 0.4555, "step": 63730 }, { "epoch": 1.8608633450606562, "grad_norm": 0.44150564789011526, "learning_rate": 2.1094890510948905e-05, "loss": 0.4191, "step": 63735 }, { "epoch": 1.8610093283309732, "grad_norm": 0.48976330273564866, "learning_rate": 2.109218707758854e-05, "loss": 0.441, "step": 63740 }, { "epoch": 1.8611553116012904, "grad_norm": 0.4585231758052562, "learning_rate": 2.108948364422817e-05, "loss": 0.4409, "step": 63745 }, { "epoch": 1.8613012948716077, "grad_norm": 0.46969709682895816, "learning_rate": 2.1086780210867803e-05, "loss": 0.4368, "step": 63750 }, { "epoch": 1.861447278141925, "grad_norm": 0.4697733673701981, "learning_rate": 2.1084076777507437e-05, "loss": 0.4461, "step": 63755 }, { "epoch": 1.8615932614122421, "grad_norm": 0.46588155537062376, "learning_rate": 2.1081373344147068e-05, "loss": 0.4564, "step": 63760 }, { "epoch": 1.8617392446825594, "grad_norm": 0.47225126851044497, "learning_rate": 2.10786699107867e-05, "loss": 0.4354, "step": 63765 }, { "epoch": 1.8618852279528766, "grad_norm": 0.4954734239169421, "learning_rate": 2.1075966477426333e-05, "loss": 0.4524, "step": 63770 }, { "epoch": 1.8620312112231938, "grad_norm": 0.4545601077054836, "learning_rate": 2.1073263044065963e-05, "loss": 0.4479, "step": 63775 }, { "epoch": 1.862177194493511, "grad_norm": 0.5175768193764253, "learning_rate": 2.1070559610705597e-05, "loss": 0.468, "step": 63780 }, { "epoch": 1.8623231777638283, "grad_norm": 0.49928322930901353, "learning_rate": 2.106785617734523e-05, "loss": 0.4487, "step": 63785 }, { "epoch": 1.8624691610341455, "grad_norm": 0.4557963585324458, "learning_rate": 2.1065152743984862e-05, "loss": 0.4096, "step": 63790 }, { "epoch": 1.8626151443044627, "grad_norm": 0.47806007495372776, "learning_rate": 2.1062449310624492e-05, "loss": 0.4308, "step": 63795 }, { "epoch": 1.86276112757478, "grad_norm": 0.4732701497694826, "learning_rate": 2.1059745877264126e-05, "loss": 0.4349, "step": 63800 }, { "epoch": 1.8629071108450972, "grad_norm": 0.5291968479442167, "learning_rate": 2.105704244390376e-05, "loss": 0.435, "step": 63805 }, { "epoch": 1.8630530941154144, "grad_norm": 0.4431525694721416, "learning_rate": 2.105433901054339e-05, "loss": 0.4216, "step": 63810 }, { "epoch": 1.8631990773857317, "grad_norm": 0.47555397447086106, "learning_rate": 2.1051635577183025e-05, "loss": 0.4382, "step": 63815 }, { "epoch": 1.8633450606560489, "grad_norm": 0.4771757194408605, "learning_rate": 2.1048932143822656e-05, "loss": 0.4348, "step": 63820 }, { "epoch": 1.8634910439263659, "grad_norm": 0.4583868771204389, "learning_rate": 2.1046228710462286e-05, "loss": 0.448, "step": 63825 }, { "epoch": 1.8636370271966833, "grad_norm": 0.48340078196532404, "learning_rate": 2.104352527710192e-05, "loss": 0.4464, "step": 63830 }, { "epoch": 1.8637830104670003, "grad_norm": 0.44122968071803836, "learning_rate": 2.1040821843741554e-05, "loss": 0.4408, "step": 63835 }, { "epoch": 1.8639289937373178, "grad_norm": 0.4843163034526863, "learning_rate": 2.1038118410381185e-05, "loss": 0.4539, "step": 63840 }, { "epoch": 1.8640749770076348, "grad_norm": 0.4512137643728659, "learning_rate": 2.103541497702082e-05, "loss": 0.4676, "step": 63845 }, { "epoch": 1.8642209602779523, "grad_norm": 0.4399628166415711, "learning_rate": 2.103271154366045e-05, "loss": 0.4456, "step": 63850 }, { "epoch": 1.8643669435482693, "grad_norm": 0.4859375522493375, "learning_rate": 2.103000811030008e-05, "loss": 0.4375, "step": 63855 }, { "epoch": 1.8645129268185867, "grad_norm": 0.49114962582287064, "learning_rate": 2.1027304676939714e-05, "loss": 0.4338, "step": 63860 }, { "epoch": 1.8646589100889037, "grad_norm": 0.48549256844870625, "learning_rate": 2.102460124357935e-05, "loss": 0.4572, "step": 63865 }, { "epoch": 1.8648048933592212, "grad_norm": 0.48360953962640285, "learning_rate": 2.102189781021898e-05, "loss": 0.4497, "step": 63870 }, { "epoch": 1.8649508766295382, "grad_norm": 0.49100821962709823, "learning_rate": 2.1019194376858613e-05, "loss": 0.4448, "step": 63875 }, { "epoch": 1.8650968598998556, "grad_norm": 0.4910899971464472, "learning_rate": 2.1016490943498244e-05, "loss": 0.4529, "step": 63880 }, { "epoch": 1.8652428431701726, "grad_norm": 0.5060257973951838, "learning_rate": 2.1013787510137874e-05, "loss": 0.4418, "step": 63885 }, { "epoch": 1.8653888264404899, "grad_norm": 0.47135490278514364, "learning_rate": 2.1011084076777508e-05, "loss": 0.4316, "step": 63890 }, { "epoch": 1.865534809710807, "grad_norm": 0.45570837192233343, "learning_rate": 2.1008380643417142e-05, "loss": 0.4236, "step": 63895 }, { "epoch": 1.8656807929811243, "grad_norm": 0.46228277028063613, "learning_rate": 2.1005677210056773e-05, "loss": 0.4247, "step": 63900 }, { "epoch": 1.8658267762514416, "grad_norm": 0.4819510305787906, "learning_rate": 2.1002973776696407e-05, "loss": 0.4372, "step": 63905 }, { "epoch": 1.8659727595217588, "grad_norm": 0.45041936653985165, "learning_rate": 2.1000270343336037e-05, "loss": 0.4186, "step": 63910 }, { "epoch": 1.866118742792076, "grad_norm": 0.496553668293295, "learning_rate": 2.0997566909975668e-05, "loss": 0.4411, "step": 63915 }, { "epoch": 1.8662647260623932, "grad_norm": 0.41297399617038144, "learning_rate": 2.0994863476615302e-05, "loss": 0.44, "step": 63920 }, { "epoch": 1.8664107093327105, "grad_norm": 0.4356199154057724, "learning_rate": 2.0992160043254936e-05, "loss": 0.461, "step": 63925 }, { "epoch": 1.8665566926030277, "grad_norm": 0.4533578111863458, "learning_rate": 2.0989456609894567e-05, "loss": 0.4399, "step": 63930 }, { "epoch": 1.866702675873345, "grad_norm": 0.5137702332869882, "learning_rate": 2.0986753176534197e-05, "loss": 0.4301, "step": 63935 }, { "epoch": 1.8668486591436622, "grad_norm": 0.5090426815825732, "learning_rate": 2.098404974317383e-05, "loss": 0.4539, "step": 63940 }, { "epoch": 1.8669946424139794, "grad_norm": 0.487273618101368, "learning_rate": 2.0981346309813462e-05, "loss": 0.4447, "step": 63945 }, { "epoch": 1.8671406256842966, "grad_norm": 0.4664400857930426, "learning_rate": 2.0978642876453096e-05, "loss": 0.4096, "step": 63950 }, { "epoch": 1.8672866089546138, "grad_norm": 0.4947155949377883, "learning_rate": 2.097593944309273e-05, "loss": 0.4562, "step": 63955 }, { "epoch": 1.867432592224931, "grad_norm": 0.42825168781285033, "learning_rate": 2.097323600973236e-05, "loss": 0.4312, "step": 63960 }, { "epoch": 1.8675785754952483, "grad_norm": 0.47006368801737874, "learning_rate": 2.097053257637199e-05, "loss": 0.4378, "step": 63965 }, { "epoch": 1.8677245587655653, "grad_norm": 0.4673057216536053, "learning_rate": 2.0967829143011625e-05, "loss": 0.4452, "step": 63970 }, { "epoch": 1.8678705420358828, "grad_norm": 0.4968742784277149, "learning_rate": 2.096512570965126e-05, "loss": 0.4125, "step": 63975 }, { "epoch": 1.8680165253061998, "grad_norm": 0.4767670756141696, "learning_rate": 2.096242227629089e-05, "loss": 0.4477, "step": 63980 }, { "epoch": 1.8681625085765172, "grad_norm": 0.4718990103896659, "learning_rate": 2.0959718842930524e-05, "loss": 0.4441, "step": 63985 }, { "epoch": 1.8683084918468342, "grad_norm": 0.45503074775690294, "learning_rate": 2.0957015409570155e-05, "loss": 0.4377, "step": 63990 }, { "epoch": 1.8684544751171517, "grad_norm": 0.5086951742674879, "learning_rate": 2.0954311976209785e-05, "loss": 0.435, "step": 63995 }, { "epoch": 1.8686004583874687, "grad_norm": 0.4807822048554939, "learning_rate": 2.095160854284942e-05, "loss": 0.4503, "step": 64000 }, { "epoch": 1.8687464416577861, "grad_norm": 0.4487596327519098, "learning_rate": 2.0948905109489053e-05, "loss": 0.4301, "step": 64005 }, { "epoch": 1.8688924249281031, "grad_norm": 0.49867220962887987, "learning_rate": 2.0946201676128684e-05, "loss": 0.4331, "step": 64010 }, { "epoch": 1.8690384081984206, "grad_norm": 0.48430151931831966, "learning_rate": 2.0943498242768318e-05, "loss": 0.4334, "step": 64015 }, { "epoch": 1.8691843914687376, "grad_norm": 0.452313587671772, "learning_rate": 2.094079480940795e-05, "loss": 0.421, "step": 64020 }, { "epoch": 1.869330374739055, "grad_norm": 0.45803784284465754, "learning_rate": 2.093809137604758e-05, "loss": 0.4067, "step": 64025 }, { "epoch": 1.869476358009372, "grad_norm": 0.44490424058538336, "learning_rate": 2.0935387942687216e-05, "loss": 0.4201, "step": 64030 }, { "epoch": 1.8696223412796893, "grad_norm": 0.4644887802151254, "learning_rate": 2.0932684509326847e-05, "loss": 0.4413, "step": 64035 }, { "epoch": 1.8697683245500065, "grad_norm": 0.49426327729626085, "learning_rate": 2.0929981075966478e-05, "loss": 0.4591, "step": 64040 }, { "epoch": 1.8699143078203238, "grad_norm": 0.5025600270935134, "learning_rate": 2.0927277642606112e-05, "loss": 0.4527, "step": 64045 }, { "epoch": 1.870060291090641, "grad_norm": 0.4590268011238425, "learning_rate": 2.0924574209245742e-05, "loss": 0.4414, "step": 64050 }, { "epoch": 1.8702062743609582, "grad_norm": 0.485409239920007, "learning_rate": 2.0921870775885373e-05, "loss": 0.4274, "step": 64055 }, { "epoch": 1.8703522576312754, "grad_norm": 0.4780757122559185, "learning_rate": 2.091916734252501e-05, "loss": 0.4353, "step": 64060 }, { "epoch": 1.8704982409015927, "grad_norm": 0.44597308025843124, "learning_rate": 2.091646390916464e-05, "loss": 0.4324, "step": 64065 }, { "epoch": 1.87064422417191, "grad_norm": 0.48505206993509564, "learning_rate": 2.091376047580427e-05, "loss": 0.4343, "step": 64070 }, { "epoch": 1.8707902074422271, "grad_norm": 0.4868026338526691, "learning_rate": 2.0911057042443906e-05, "loss": 0.4847, "step": 64075 }, { "epoch": 1.8709361907125444, "grad_norm": 0.4693001180795616, "learning_rate": 2.0908353609083536e-05, "loss": 0.4415, "step": 64080 }, { "epoch": 1.8710821739828616, "grad_norm": 0.4769479787881654, "learning_rate": 2.0905650175723167e-05, "loss": 0.4541, "step": 64085 }, { "epoch": 1.8712281572531788, "grad_norm": 0.5067980939642429, "learning_rate": 2.0902946742362804e-05, "loss": 0.4406, "step": 64090 }, { "epoch": 1.871374140523496, "grad_norm": 0.4611764547864435, "learning_rate": 2.0900243309002435e-05, "loss": 0.4172, "step": 64095 }, { "epoch": 1.8715201237938133, "grad_norm": 0.431333668902388, "learning_rate": 2.0897539875642066e-05, "loss": 0.4399, "step": 64100 }, { "epoch": 1.8716661070641305, "grad_norm": 0.5463979978766419, "learning_rate": 2.08948364422817e-05, "loss": 0.4508, "step": 64105 }, { "epoch": 1.8718120903344477, "grad_norm": 0.45742543079564896, "learning_rate": 2.089213300892133e-05, "loss": 0.45, "step": 64110 }, { "epoch": 1.8719580736047647, "grad_norm": 0.46587557931581314, "learning_rate": 2.0889429575560964e-05, "loss": 0.4288, "step": 64115 }, { "epoch": 1.8721040568750822, "grad_norm": 0.4921492036263722, "learning_rate": 2.0886726142200598e-05, "loss": 0.4185, "step": 64120 }, { "epoch": 1.8722500401453992, "grad_norm": 0.46422392081001534, "learning_rate": 2.088402270884023e-05, "loss": 0.4388, "step": 64125 }, { "epoch": 1.8723960234157166, "grad_norm": 0.5290021026978233, "learning_rate": 2.088131927547986e-05, "loss": 0.4798, "step": 64130 }, { "epoch": 1.8725420066860337, "grad_norm": 0.47013419040912546, "learning_rate": 2.0878615842119493e-05, "loss": 0.449, "step": 64135 }, { "epoch": 1.872687989956351, "grad_norm": 0.4519798454379729, "learning_rate": 2.0875912408759124e-05, "loss": 0.4041, "step": 64140 }, { "epoch": 1.8728339732266681, "grad_norm": 0.5026760957962269, "learning_rate": 2.0873208975398758e-05, "loss": 0.4325, "step": 64145 }, { "epoch": 1.8729799564969856, "grad_norm": 0.4549179737944019, "learning_rate": 2.0870505542038392e-05, "loss": 0.4322, "step": 64150 }, { "epoch": 1.8731259397673026, "grad_norm": 0.4983109323261003, "learning_rate": 2.0867802108678023e-05, "loss": 0.4076, "step": 64155 }, { "epoch": 1.87327192303762, "grad_norm": 0.5061774087051195, "learning_rate": 2.0865098675317653e-05, "loss": 0.4344, "step": 64160 }, { "epoch": 1.873417906307937, "grad_norm": 0.4394746590676137, "learning_rate": 2.0862395241957287e-05, "loss": 0.4277, "step": 64165 }, { "epoch": 1.8735638895782545, "grad_norm": 0.5187225377510737, "learning_rate": 2.0859691808596918e-05, "loss": 0.45, "step": 64170 }, { "epoch": 1.8737098728485715, "grad_norm": 0.4705990415359207, "learning_rate": 2.0856988375236552e-05, "loss": 0.43, "step": 64175 }, { "epoch": 1.873855856118889, "grad_norm": 0.46385474902781293, "learning_rate": 2.0854284941876186e-05, "loss": 0.4, "step": 64180 }, { "epoch": 1.874001839389206, "grad_norm": 0.44840970771160404, "learning_rate": 2.0851581508515817e-05, "loss": 0.414, "step": 64185 }, { "epoch": 1.8741478226595232, "grad_norm": 0.4770783878864096, "learning_rate": 2.0848878075155447e-05, "loss": 0.4343, "step": 64190 }, { "epoch": 1.8742938059298404, "grad_norm": 0.4980022178146427, "learning_rate": 2.084617464179508e-05, "loss": 0.4258, "step": 64195 }, { "epoch": 1.8744397892001576, "grad_norm": 0.441717181220187, "learning_rate": 2.0843471208434715e-05, "loss": 0.4238, "step": 64200 }, { "epoch": 1.8745857724704749, "grad_norm": 0.4532448979354987, "learning_rate": 2.0840767775074346e-05, "loss": 0.4294, "step": 64205 }, { "epoch": 1.874731755740792, "grad_norm": 0.5436205313940531, "learning_rate": 2.0838064341713977e-05, "loss": 0.44, "step": 64210 }, { "epoch": 1.8748777390111093, "grad_norm": 0.4871281640326442, "learning_rate": 2.083536090835361e-05, "loss": 0.4437, "step": 64215 }, { "epoch": 1.8750237222814266, "grad_norm": 0.4697934568845883, "learning_rate": 2.083265747499324e-05, "loss": 0.4314, "step": 64220 }, { "epoch": 1.8751697055517438, "grad_norm": 0.48353643775504757, "learning_rate": 2.0829954041632875e-05, "loss": 0.4588, "step": 64225 }, { "epoch": 1.875315688822061, "grad_norm": 0.4676133844779865, "learning_rate": 2.082725060827251e-05, "loss": 0.4585, "step": 64230 }, { "epoch": 1.8754616720923782, "grad_norm": 0.4535544871333398, "learning_rate": 2.082454717491214e-05, "loss": 0.4254, "step": 64235 }, { "epoch": 1.8756076553626955, "grad_norm": 0.48135156902500636, "learning_rate": 2.082184374155177e-05, "loss": 0.4288, "step": 64240 }, { "epoch": 1.8757536386330127, "grad_norm": 0.44227481131848945, "learning_rate": 2.0819140308191404e-05, "loss": 0.4327, "step": 64245 }, { "epoch": 1.87589962190333, "grad_norm": 0.45586768560336, "learning_rate": 2.0816436874831035e-05, "loss": 0.428, "step": 64250 }, { "epoch": 1.8760456051736472, "grad_norm": 0.47416908430408716, "learning_rate": 2.081373344147067e-05, "loss": 0.4211, "step": 64255 }, { "epoch": 1.8761915884439642, "grad_norm": 0.47982198251555247, "learning_rate": 2.0811030008110303e-05, "loss": 0.4353, "step": 64260 }, { "epoch": 1.8763375717142816, "grad_norm": 0.4599112508235172, "learning_rate": 2.0808326574749934e-05, "loss": 0.4327, "step": 64265 }, { "epoch": 1.8764835549845986, "grad_norm": 0.45015582630423295, "learning_rate": 2.0805623141389564e-05, "loss": 0.4394, "step": 64270 }, { "epoch": 1.876629538254916, "grad_norm": 0.4842473016007481, "learning_rate": 2.08029197080292e-05, "loss": 0.4218, "step": 64275 }, { "epoch": 1.876775521525233, "grad_norm": 0.4335521783453005, "learning_rate": 2.080021627466883e-05, "loss": 0.4333, "step": 64280 }, { "epoch": 1.8769215047955505, "grad_norm": 0.4650136304783581, "learning_rate": 2.0797512841308463e-05, "loss": 0.4216, "step": 64285 }, { "epoch": 1.8770674880658675, "grad_norm": 0.476241198863834, "learning_rate": 2.0794809407948097e-05, "loss": 0.4265, "step": 64290 }, { "epoch": 1.877213471336185, "grad_norm": 0.4243972948960792, "learning_rate": 2.0792105974587728e-05, "loss": 0.418, "step": 64295 }, { "epoch": 1.877359454606502, "grad_norm": 0.471696321116278, "learning_rate": 2.0789402541227358e-05, "loss": 0.4437, "step": 64300 }, { "epoch": 1.8775054378768194, "grad_norm": 0.4424944860647615, "learning_rate": 2.0786699107866992e-05, "loss": 0.4219, "step": 64305 }, { "epoch": 1.8776514211471365, "grad_norm": 0.4819119079885477, "learning_rate": 2.0783995674506623e-05, "loss": 0.4345, "step": 64310 }, { "epoch": 1.877797404417454, "grad_norm": 0.47964635918273374, "learning_rate": 2.0781292241146257e-05, "loss": 0.4513, "step": 64315 }, { "epoch": 1.877943387687771, "grad_norm": 0.5357622952817663, "learning_rate": 2.077858880778589e-05, "loss": 0.4258, "step": 64320 }, { "epoch": 1.8780893709580884, "grad_norm": 0.493801490402222, "learning_rate": 2.077588537442552e-05, "loss": 0.4437, "step": 64325 }, { "epoch": 1.8782353542284054, "grad_norm": 0.4463068317056311, "learning_rate": 2.0773181941065152e-05, "loss": 0.418, "step": 64330 }, { "epoch": 1.8783813374987226, "grad_norm": 0.5075520518850325, "learning_rate": 2.0770478507704786e-05, "loss": 0.4433, "step": 64335 }, { "epoch": 1.8785273207690398, "grad_norm": 0.5179215045860027, "learning_rate": 2.0767775074344417e-05, "loss": 0.449, "step": 64340 }, { "epoch": 1.878673304039357, "grad_norm": 0.4472436880343865, "learning_rate": 2.076507164098405e-05, "loss": 0.4242, "step": 64345 }, { "epoch": 1.8788192873096743, "grad_norm": 0.5157845082047336, "learning_rate": 2.0762368207623685e-05, "loss": 0.4284, "step": 64350 }, { "epoch": 1.8789652705799915, "grad_norm": 0.46645442588935543, "learning_rate": 2.0759664774263315e-05, "loss": 0.4081, "step": 64355 }, { "epoch": 1.8791112538503087, "grad_norm": 0.48921366687523193, "learning_rate": 2.0756961340902946e-05, "loss": 0.4193, "step": 64360 }, { "epoch": 1.879257237120626, "grad_norm": 0.5127017568578228, "learning_rate": 2.075425790754258e-05, "loss": 0.4358, "step": 64365 }, { "epoch": 1.8794032203909432, "grad_norm": 0.4816104761597466, "learning_rate": 2.0751554474182214e-05, "loss": 0.4458, "step": 64370 }, { "epoch": 1.8795492036612604, "grad_norm": 0.4041453791760713, "learning_rate": 2.0748851040821845e-05, "loss": 0.4059, "step": 64375 }, { "epoch": 1.8796951869315777, "grad_norm": 0.4876109614329305, "learning_rate": 2.074614760746148e-05, "loss": 0.4271, "step": 64380 }, { "epoch": 1.879841170201895, "grad_norm": 0.4549294426108778, "learning_rate": 2.074344417410111e-05, "loss": 0.4071, "step": 64385 }, { "epoch": 1.8799871534722121, "grad_norm": 0.4855612219254747, "learning_rate": 2.074074074074074e-05, "loss": 0.4403, "step": 64390 }, { "epoch": 1.8801331367425294, "grad_norm": 0.4647470217411422, "learning_rate": 2.0738037307380374e-05, "loss": 0.4279, "step": 64395 }, { "epoch": 1.8802791200128466, "grad_norm": 0.4894141871315403, "learning_rate": 2.0735333874020008e-05, "loss": 0.4875, "step": 64400 }, { "epoch": 1.8804251032831636, "grad_norm": 0.47537543275462435, "learning_rate": 2.073263044065964e-05, "loss": 0.4258, "step": 64405 }, { "epoch": 1.880571086553481, "grad_norm": 0.46287902488329885, "learning_rate": 2.0729927007299273e-05, "loss": 0.4314, "step": 64410 }, { "epoch": 1.880717069823798, "grad_norm": 0.43754289721246375, "learning_rate": 2.0727223573938903e-05, "loss": 0.4183, "step": 64415 }, { "epoch": 1.8808630530941155, "grad_norm": 0.4747905902434745, "learning_rate": 2.0724520140578534e-05, "loss": 0.4375, "step": 64420 }, { "epoch": 1.8810090363644325, "grad_norm": 0.469630804637427, "learning_rate": 2.0721816707218168e-05, "loss": 0.418, "step": 64425 }, { "epoch": 1.88115501963475, "grad_norm": 0.4465677963256573, "learning_rate": 2.0719113273857802e-05, "loss": 0.4192, "step": 64430 }, { "epoch": 1.881301002905067, "grad_norm": 0.46943494055341645, "learning_rate": 2.0716409840497432e-05, "loss": 0.4408, "step": 64435 }, { "epoch": 1.8814469861753844, "grad_norm": 0.45802572532191416, "learning_rate": 2.0713706407137066e-05, "loss": 0.4489, "step": 64440 }, { "epoch": 1.8815929694457014, "grad_norm": 0.4790301104020668, "learning_rate": 2.0711002973776697e-05, "loss": 0.4369, "step": 64445 }, { "epoch": 1.8817389527160189, "grad_norm": 0.45204074671692535, "learning_rate": 2.0708299540416328e-05, "loss": 0.4284, "step": 64450 }, { "epoch": 1.8818849359863359, "grad_norm": 0.4640215688882995, "learning_rate": 2.0705596107055962e-05, "loss": 0.4538, "step": 64455 }, { "epoch": 1.8820309192566533, "grad_norm": 0.48391381968734304, "learning_rate": 2.0702892673695596e-05, "loss": 0.4364, "step": 64460 }, { "epoch": 1.8821769025269703, "grad_norm": 0.4707924608738866, "learning_rate": 2.0700189240335226e-05, "loss": 0.4203, "step": 64465 }, { "epoch": 1.8823228857972878, "grad_norm": 0.47375086937594224, "learning_rate": 2.069748580697486e-05, "loss": 0.4471, "step": 64470 }, { "epoch": 1.8824688690676048, "grad_norm": 0.466460607082021, "learning_rate": 2.069478237361449e-05, "loss": 0.4192, "step": 64475 }, { "epoch": 1.882614852337922, "grad_norm": 0.45136090391461214, "learning_rate": 2.069207894025412e-05, "loss": 0.4559, "step": 64480 }, { "epoch": 1.8827608356082393, "grad_norm": 0.4892917842082665, "learning_rate": 2.0689375506893756e-05, "loss": 0.4428, "step": 64485 }, { "epoch": 1.8829068188785565, "grad_norm": 0.44644687492579, "learning_rate": 2.068667207353339e-05, "loss": 0.4397, "step": 64490 }, { "epoch": 1.8830528021488737, "grad_norm": 0.41738175946887845, "learning_rate": 2.068396864017302e-05, "loss": 0.4178, "step": 64495 }, { "epoch": 1.883198785419191, "grad_norm": 0.48037742288684904, "learning_rate": 2.0681265206812654e-05, "loss": 0.4578, "step": 64500 }, { "epoch": 1.8833447686895082, "grad_norm": 0.5283991222564123, "learning_rate": 2.0678561773452285e-05, "loss": 0.4307, "step": 64505 }, { "epoch": 1.8834907519598254, "grad_norm": 0.46281955298822486, "learning_rate": 2.0675858340091916e-05, "loss": 0.4333, "step": 64510 }, { "epoch": 1.8836367352301426, "grad_norm": 0.4441724587116658, "learning_rate": 2.067315490673155e-05, "loss": 0.4012, "step": 64515 }, { "epoch": 1.8837827185004599, "grad_norm": 0.48860703082641227, "learning_rate": 2.0670451473371184e-05, "loss": 0.4664, "step": 64520 }, { "epoch": 1.883928701770777, "grad_norm": 0.46212418024030766, "learning_rate": 2.0667748040010814e-05, "loss": 0.4195, "step": 64525 }, { "epoch": 1.8840746850410943, "grad_norm": 0.4677240428187396, "learning_rate": 2.0665044606650448e-05, "loss": 0.431, "step": 64530 }, { "epoch": 1.8842206683114116, "grad_norm": 0.4706060848216194, "learning_rate": 2.066234117329008e-05, "loss": 0.4532, "step": 64535 }, { "epoch": 1.8843666515817288, "grad_norm": 0.5093753465680886, "learning_rate": 2.0659637739929713e-05, "loss": 0.4315, "step": 64540 }, { "epoch": 1.884512634852046, "grad_norm": 0.46396915040344905, "learning_rate": 2.0656934306569343e-05, "loss": 0.4223, "step": 64545 }, { "epoch": 1.884658618122363, "grad_norm": 0.45896575210410195, "learning_rate": 2.0654230873208977e-05, "loss": 0.4206, "step": 64550 }, { "epoch": 1.8848046013926805, "grad_norm": 0.4705668734630411, "learning_rate": 2.0651527439848608e-05, "loss": 0.4354, "step": 64555 }, { "epoch": 1.8849505846629975, "grad_norm": 0.4558599359203314, "learning_rate": 2.0648824006488242e-05, "loss": 0.4355, "step": 64560 }, { "epoch": 1.885096567933315, "grad_norm": 0.4771284522027175, "learning_rate": 2.0646120573127873e-05, "loss": 0.4476, "step": 64565 }, { "epoch": 1.885242551203632, "grad_norm": 0.5105088916583856, "learning_rate": 2.0643417139767507e-05, "loss": 0.4464, "step": 64570 }, { "epoch": 1.8853885344739494, "grad_norm": 0.4697733053437297, "learning_rate": 2.0640713706407137e-05, "loss": 0.4632, "step": 64575 }, { "epoch": 1.8855345177442664, "grad_norm": 0.4839044057841436, "learning_rate": 2.063801027304677e-05, "loss": 0.4491, "step": 64580 }, { "epoch": 1.8856805010145838, "grad_norm": 0.47845021868745813, "learning_rate": 2.0635306839686402e-05, "loss": 0.4368, "step": 64585 }, { "epoch": 1.8858264842849009, "grad_norm": 0.4790770545013751, "learning_rate": 2.0632603406326033e-05, "loss": 0.4276, "step": 64590 }, { "epoch": 1.8859724675552183, "grad_norm": 0.48521071531089716, "learning_rate": 2.0629899972965667e-05, "loss": 0.4354, "step": 64595 }, { "epoch": 1.8861184508255353, "grad_norm": 0.46494698606427226, "learning_rate": 2.06271965396053e-05, "loss": 0.461, "step": 64600 }, { "epoch": 1.8862644340958528, "grad_norm": 0.4582316306513355, "learning_rate": 2.062449310624493e-05, "loss": 0.4193, "step": 64605 }, { "epoch": 1.8864104173661698, "grad_norm": 0.4949871658680338, "learning_rate": 2.0621789672884565e-05, "loss": 0.4221, "step": 64610 }, { "epoch": 1.8865564006364872, "grad_norm": 0.46576693097896926, "learning_rate": 2.0619086239524196e-05, "loss": 0.4555, "step": 64615 }, { "epoch": 1.8867023839068042, "grad_norm": 0.469954815813003, "learning_rate": 2.0616382806163827e-05, "loss": 0.4205, "step": 64620 }, { "epoch": 1.8868483671771215, "grad_norm": 0.48398297049231875, "learning_rate": 2.0613679372803464e-05, "loss": 0.4417, "step": 64625 }, { "epoch": 1.8869943504474387, "grad_norm": 0.541718273618436, "learning_rate": 2.0610975939443095e-05, "loss": 0.4691, "step": 64630 }, { "epoch": 1.887140333717756, "grad_norm": 0.4972696390333044, "learning_rate": 2.0608272506082725e-05, "loss": 0.4435, "step": 64635 }, { "epoch": 1.8872863169880731, "grad_norm": 0.4768846940083719, "learning_rate": 2.060556907272236e-05, "loss": 0.4443, "step": 64640 }, { "epoch": 1.8874323002583904, "grad_norm": 0.4967006872114074, "learning_rate": 2.060286563936199e-05, "loss": 0.4358, "step": 64645 }, { "epoch": 1.8875782835287076, "grad_norm": 0.4529073822033832, "learning_rate": 2.060016220600162e-05, "loss": 0.419, "step": 64650 }, { "epoch": 1.8877242667990248, "grad_norm": 0.4923152269819851, "learning_rate": 2.0597458772641258e-05, "loss": 0.422, "step": 64655 }, { "epoch": 1.887870250069342, "grad_norm": 0.4887102847148715, "learning_rate": 2.059475533928089e-05, "loss": 0.4591, "step": 64660 }, { "epoch": 1.8880162333396593, "grad_norm": 0.49536648708968856, "learning_rate": 2.059205190592052e-05, "loss": 0.4339, "step": 64665 }, { "epoch": 1.8881622166099765, "grad_norm": 0.4900506537716809, "learning_rate": 2.0589348472560153e-05, "loss": 0.4493, "step": 64670 }, { "epoch": 1.8883081998802937, "grad_norm": 0.4695299564813563, "learning_rate": 2.0586645039199784e-05, "loss": 0.4351, "step": 64675 }, { "epoch": 1.888454183150611, "grad_norm": 0.48159733023317264, "learning_rate": 2.0583941605839414e-05, "loss": 0.4474, "step": 64680 }, { "epoch": 1.8886001664209282, "grad_norm": 0.4549613863521019, "learning_rate": 2.0581238172479052e-05, "loss": 0.3932, "step": 64685 }, { "epoch": 1.8887461496912454, "grad_norm": 0.4975102093175359, "learning_rate": 2.0578534739118682e-05, "loss": 0.427, "step": 64690 }, { "epoch": 1.8888921329615624, "grad_norm": 0.4752961704505255, "learning_rate": 2.0575831305758313e-05, "loss": 0.4657, "step": 64695 }, { "epoch": 1.88903811623188, "grad_norm": 0.4341492787401196, "learning_rate": 2.0573127872397947e-05, "loss": 0.4367, "step": 64700 }, { "epoch": 1.889184099502197, "grad_norm": 0.4412870984705128, "learning_rate": 2.0570424439037578e-05, "loss": 0.4351, "step": 64705 }, { "epoch": 1.8893300827725144, "grad_norm": 0.4675275332196027, "learning_rate": 2.056772100567721e-05, "loss": 0.4504, "step": 64710 }, { "epoch": 1.8894760660428314, "grad_norm": 0.4502683728676758, "learning_rate": 2.0565017572316846e-05, "loss": 0.4327, "step": 64715 }, { "epoch": 1.8896220493131488, "grad_norm": 0.47847724507370454, "learning_rate": 2.0562314138956476e-05, "loss": 0.4286, "step": 64720 }, { "epoch": 1.8897680325834658, "grad_norm": 0.47354696990915335, "learning_rate": 2.0559610705596107e-05, "loss": 0.4547, "step": 64725 }, { "epoch": 1.8899140158537833, "grad_norm": 0.46382102126453306, "learning_rate": 2.055690727223574e-05, "loss": 0.4443, "step": 64730 }, { "epoch": 1.8900599991241003, "grad_norm": 0.46499931113350085, "learning_rate": 2.055420383887537e-05, "loss": 0.4328, "step": 64735 }, { "epoch": 1.8902059823944177, "grad_norm": 0.535077062913819, "learning_rate": 2.0551500405515006e-05, "loss": 0.4596, "step": 64740 }, { "epoch": 1.8903519656647347, "grad_norm": 0.48002488389895287, "learning_rate": 2.054879697215464e-05, "loss": 0.4144, "step": 64745 }, { "epoch": 1.8904979489350522, "grad_norm": 0.45982019699636667, "learning_rate": 2.054609353879427e-05, "loss": 0.4409, "step": 64750 }, { "epoch": 1.8906439322053692, "grad_norm": 0.4815530101078641, "learning_rate": 2.05433901054339e-05, "loss": 0.4182, "step": 64755 }, { "epoch": 1.8907899154756866, "grad_norm": 0.45658290917533056, "learning_rate": 2.0540686672073535e-05, "loss": 0.4386, "step": 64760 }, { "epoch": 1.8909358987460037, "grad_norm": 0.47619166694430604, "learning_rate": 2.0537983238713165e-05, "loss": 0.4405, "step": 64765 }, { "epoch": 1.8910818820163209, "grad_norm": 0.4929571976763987, "learning_rate": 2.05352798053528e-05, "loss": 0.4279, "step": 64770 }, { "epoch": 1.891227865286638, "grad_norm": 0.5051871413103762, "learning_rate": 2.0532576371992433e-05, "loss": 0.4492, "step": 64775 }, { "epoch": 1.8913738485569553, "grad_norm": 0.45898505619023927, "learning_rate": 2.0529872938632064e-05, "loss": 0.444, "step": 64780 }, { "epoch": 1.8915198318272726, "grad_norm": 0.44426571167741713, "learning_rate": 2.0527169505271695e-05, "loss": 0.4189, "step": 64785 }, { "epoch": 1.8916658150975898, "grad_norm": 0.5081914686977896, "learning_rate": 2.052446607191133e-05, "loss": 0.4538, "step": 64790 }, { "epoch": 1.891811798367907, "grad_norm": 0.48915205027012654, "learning_rate": 2.0521762638550963e-05, "loss": 0.4214, "step": 64795 }, { "epoch": 1.8919577816382243, "grad_norm": 0.4780124168119497, "learning_rate": 2.0519059205190593e-05, "loss": 0.4214, "step": 64800 }, { "epoch": 1.8921037649085415, "grad_norm": 0.5061376616445873, "learning_rate": 2.0516355771830227e-05, "loss": 0.4419, "step": 64805 }, { "epoch": 1.8922497481788587, "grad_norm": 0.44714129589385787, "learning_rate": 2.0513652338469858e-05, "loss": 0.4177, "step": 64810 }, { "epoch": 1.892395731449176, "grad_norm": 0.5094463629811099, "learning_rate": 2.051094890510949e-05, "loss": 0.443, "step": 64815 }, { "epoch": 1.8925417147194932, "grad_norm": 0.4721672491461279, "learning_rate": 2.0508245471749123e-05, "loss": 0.431, "step": 64820 }, { "epoch": 1.8926876979898104, "grad_norm": 0.48604911433017056, "learning_rate": 2.0505542038388757e-05, "loss": 0.4161, "step": 64825 }, { "epoch": 1.8928336812601276, "grad_norm": 0.448438235159758, "learning_rate": 2.0502838605028387e-05, "loss": 0.4355, "step": 64830 }, { "epoch": 1.8929796645304449, "grad_norm": 0.46257834475943843, "learning_rate": 2.050013517166802e-05, "loss": 0.4331, "step": 64835 }, { "epoch": 1.8931256478007619, "grad_norm": 0.47950280359398384, "learning_rate": 2.0497431738307652e-05, "loss": 0.4323, "step": 64840 }, { "epoch": 1.8932716310710793, "grad_norm": 0.48132449250653364, "learning_rate": 2.0494728304947282e-05, "loss": 0.4312, "step": 64845 }, { "epoch": 1.8934176143413963, "grad_norm": 0.47221303654677327, "learning_rate": 2.0492024871586917e-05, "loss": 0.4249, "step": 64850 }, { "epoch": 1.8935635976117138, "grad_norm": 0.4336685865210764, "learning_rate": 2.048932143822655e-05, "loss": 0.4535, "step": 64855 }, { "epoch": 1.8937095808820308, "grad_norm": 0.5338066454301459, "learning_rate": 2.048661800486618e-05, "loss": 0.4353, "step": 64860 }, { "epoch": 1.8938555641523482, "grad_norm": 0.45862239348561973, "learning_rate": 2.0483914571505812e-05, "loss": 0.4507, "step": 64865 }, { "epoch": 1.8940015474226652, "grad_norm": 0.488150283154586, "learning_rate": 2.0481211138145446e-05, "loss": 0.4562, "step": 64870 }, { "epoch": 1.8941475306929827, "grad_norm": 0.4398433015299401, "learning_rate": 2.0478507704785076e-05, "loss": 0.4364, "step": 64875 }, { "epoch": 1.8942935139632997, "grad_norm": 0.4973883546521565, "learning_rate": 2.047580427142471e-05, "loss": 0.4243, "step": 64880 }, { "epoch": 1.8944394972336172, "grad_norm": 0.46702828821473474, "learning_rate": 2.0473100838064344e-05, "loss": 0.4391, "step": 64885 }, { "epoch": 1.8945854805039342, "grad_norm": 0.48592789575459205, "learning_rate": 2.0470397404703975e-05, "loss": 0.4646, "step": 64890 }, { "epoch": 1.8947314637742516, "grad_norm": 0.4675070428636055, "learning_rate": 2.0467693971343606e-05, "loss": 0.4363, "step": 64895 }, { "epoch": 1.8948774470445686, "grad_norm": 0.4295334426102992, "learning_rate": 2.046499053798324e-05, "loss": 0.432, "step": 64900 }, { "epoch": 1.895023430314886, "grad_norm": 0.45125248913823723, "learning_rate": 2.046228710462287e-05, "loss": 0.4374, "step": 64905 }, { "epoch": 1.895169413585203, "grad_norm": 0.4627308254916572, "learning_rate": 2.0459583671262504e-05, "loss": 0.4318, "step": 64910 }, { "epoch": 1.8953153968555203, "grad_norm": 0.43132762459862695, "learning_rate": 2.045688023790214e-05, "loss": 0.4047, "step": 64915 }, { "epoch": 1.8954613801258375, "grad_norm": 0.46692693895639154, "learning_rate": 2.045417680454177e-05, "loss": 0.447, "step": 64920 }, { "epoch": 1.8956073633961548, "grad_norm": 0.4633894527695983, "learning_rate": 2.04514733711814e-05, "loss": 0.4157, "step": 64925 }, { "epoch": 1.895753346666472, "grad_norm": 0.4465632614296682, "learning_rate": 2.0448769937821034e-05, "loss": 0.4091, "step": 64930 }, { "epoch": 1.8958993299367892, "grad_norm": 0.4850791678834252, "learning_rate": 2.0446066504460664e-05, "loss": 0.4091, "step": 64935 }, { "epoch": 1.8960453132071065, "grad_norm": 0.47444696751520793, "learning_rate": 2.0443363071100298e-05, "loss": 0.4373, "step": 64940 }, { "epoch": 1.8961912964774237, "grad_norm": 0.45625940091915246, "learning_rate": 2.0440659637739932e-05, "loss": 0.4391, "step": 64945 }, { "epoch": 1.896337279747741, "grad_norm": 0.4825886866210479, "learning_rate": 2.0437956204379563e-05, "loss": 0.4316, "step": 64950 }, { "epoch": 1.8964832630180581, "grad_norm": 0.5203971555995601, "learning_rate": 2.0435252771019193e-05, "loss": 0.4452, "step": 64955 }, { "epoch": 1.8966292462883754, "grad_norm": 0.46367085283145626, "learning_rate": 2.0432549337658827e-05, "loss": 0.4404, "step": 64960 }, { "epoch": 1.8967752295586926, "grad_norm": 0.5126366170331762, "learning_rate": 2.042984590429846e-05, "loss": 0.4526, "step": 64965 }, { "epoch": 1.8969212128290098, "grad_norm": 0.4680076071502339, "learning_rate": 2.0427142470938092e-05, "loss": 0.4471, "step": 64970 }, { "epoch": 1.897067196099327, "grad_norm": 0.4753086122908035, "learning_rate": 2.0424439037577726e-05, "loss": 0.4317, "step": 64975 }, { "epoch": 1.8972131793696443, "grad_norm": 0.4913021946041503, "learning_rate": 2.0421735604217357e-05, "loss": 0.4651, "step": 64980 }, { "epoch": 1.8973591626399613, "grad_norm": 0.4767800373297185, "learning_rate": 2.0419032170856987e-05, "loss": 0.4436, "step": 64985 }, { "epoch": 1.8975051459102787, "grad_norm": 0.4695239944010894, "learning_rate": 2.041632873749662e-05, "loss": 0.4461, "step": 64990 }, { "epoch": 1.8976511291805958, "grad_norm": 0.4752003373029207, "learning_rate": 2.0413625304136255e-05, "loss": 0.451, "step": 64995 }, { "epoch": 1.8977971124509132, "grad_norm": 0.4629404834460614, "learning_rate": 2.0410921870775886e-05, "loss": 0.4521, "step": 65000 }, { "epoch": 1.8979430957212302, "grad_norm": 0.4837767235940646, "learning_rate": 2.040821843741552e-05, "loss": 0.4384, "step": 65005 }, { "epoch": 1.8980890789915477, "grad_norm": 0.42717691662946383, "learning_rate": 2.040551500405515e-05, "loss": 0.4305, "step": 65010 }, { "epoch": 1.8982350622618647, "grad_norm": 0.46787123924056256, "learning_rate": 2.040281157069478e-05, "loss": 0.4319, "step": 65015 }, { "epoch": 1.8983810455321821, "grad_norm": 0.45961846096636877, "learning_rate": 2.0400108137334415e-05, "loss": 0.4433, "step": 65020 }, { "epoch": 1.8985270288024991, "grad_norm": 0.46394811084358717, "learning_rate": 2.039740470397405e-05, "loss": 0.4562, "step": 65025 }, { "epoch": 1.8986730120728166, "grad_norm": 0.4487901231243196, "learning_rate": 2.039470127061368e-05, "loss": 0.4678, "step": 65030 }, { "epoch": 1.8988189953431336, "grad_norm": 0.4787125700439389, "learning_rate": 2.0391997837253314e-05, "loss": 0.4445, "step": 65035 }, { "epoch": 1.898964978613451, "grad_norm": 0.45730629773802267, "learning_rate": 2.0389294403892945e-05, "loss": 0.4102, "step": 65040 }, { "epoch": 1.899110961883768, "grad_norm": 0.46284090000242123, "learning_rate": 2.0386590970532575e-05, "loss": 0.4386, "step": 65045 }, { "epoch": 1.8992569451540855, "grad_norm": 0.47378062851299485, "learning_rate": 2.0383887537172213e-05, "loss": 0.4354, "step": 65050 }, { "epoch": 1.8994029284244025, "grad_norm": 0.4672565396377489, "learning_rate": 2.0381184103811843e-05, "loss": 0.4322, "step": 65055 }, { "epoch": 1.8995489116947197, "grad_norm": 0.4893271740774658, "learning_rate": 2.0378480670451474e-05, "loss": 0.438, "step": 65060 }, { "epoch": 1.899694894965037, "grad_norm": 0.47432557044344426, "learning_rate": 2.0375777237091108e-05, "loss": 0.4297, "step": 65065 }, { "epoch": 1.8998408782353542, "grad_norm": 0.4612851931995668, "learning_rate": 2.037307380373074e-05, "loss": 0.4334, "step": 65070 }, { "epoch": 1.8999868615056714, "grad_norm": 0.43004282708383346, "learning_rate": 2.037037037037037e-05, "loss": 0.4204, "step": 65075 }, { "epoch": 1.9001328447759886, "grad_norm": 0.5000532302930388, "learning_rate": 2.0367666937010006e-05, "loss": 0.4589, "step": 65080 }, { "epoch": 1.9002788280463059, "grad_norm": 0.47179123910808957, "learning_rate": 2.0364963503649637e-05, "loss": 0.4177, "step": 65085 }, { "epoch": 1.900424811316623, "grad_norm": 0.4638127847392239, "learning_rate": 2.0362260070289268e-05, "loss": 0.4114, "step": 65090 }, { "epoch": 1.9005707945869403, "grad_norm": 0.47825649090977246, "learning_rate": 2.0359556636928902e-05, "loss": 0.4438, "step": 65095 }, { "epoch": 1.9007167778572576, "grad_norm": 0.44437551220619254, "learning_rate": 2.0356853203568532e-05, "loss": 0.4443, "step": 65100 }, { "epoch": 1.9008627611275748, "grad_norm": 0.47637465709770466, "learning_rate": 2.0354149770208163e-05, "loss": 0.4143, "step": 65105 }, { "epoch": 1.901008744397892, "grad_norm": 0.4656341687667718, "learning_rate": 2.0351446336847797e-05, "loss": 0.4532, "step": 65110 }, { "epoch": 1.9011547276682093, "grad_norm": 0.44343076641758167, "learning_rate": 2.034874290348743e-05, "loss": 0.4488, "step": 65115 }, { "epoch": 1.9013007109385265, "grad_norm": 0.5003422876442651, "learning_rate": 2.034603947012706e-05, "loss": 0.455, "step": 65120 }, { "epoch": 1.9014466942088437, "grad_norm": 0.44433601079904383, "learning_rate": 2.0343336036766696e-05, "loss": 0.4509, "step": 65125 }, { "epoch": 1.901592677479161, "grad_norm": 0.4943082947789179, "learning_rate": 2.0340632603406326e-05, "loss": 0.4651, "step": 65130 }, { "epoch": 1.9017386607494782, "grad_norm": 0.47979725090664355, "learning_rate": 2.033792917004596e-05, "loss": 0.4698, "step": 65135 }, { "epoch": 1.9018846440197952, "grad_norm": 0.46820070578583317, "learning_rate": 2.033522573668559e-05, "loss": 0.431, "step": 65140 }, { "epoch": 1.9020306272901126, "grad_norm": 0.5091605739093817, "learning_rate": 2.0332522303325225e-05, "loss": 0.4464, "step": 65145 }, { "epoch": 1.9021766105604296, "grad_norm": 0.5306067314282995, "learning_rate": 2.0329818869964856e-05, "loss": 0.4152, "step": 65150 }, { "epoch": 1.902322593830747, "grad_norm": 0.4493708841100698, "learning_rate": 2.032711543660449e-05, "loss": 0.4334, "step": 65155 }, { "epoch": 1.902468577101064, "grad_norm": 0.48971918125638725, "learning_rate": 2.032441200324412e-05, "loss": 0.4314, "step": 65160 }, { "epoch": 1.9026145603713815, "grad_norm": 0.49403742295535896, "learning_rate": 2.0321708569883754e-05, "loss": 0.4525, "step": 65165 }, { "epoch": 1.9027605436416986, "grad_norm": 0.4526905710276178, "learning_rate": 2.0319005136523385e-05, "loss": 0.4096, "step": 65170 }, { "epoch": 1.902906526912016, "grad_norm": 0.5187752492376841, "learning_rate": 2.031630170316302e-05, "loss": 0.4326, "step": 65175 }, { "epoch": 1.903052510182333, "grad_norm": 0.4945089081076463, "learning_rate": 2.031359826980265e-05, "loss": 0.4114, "step": 65180 }, { "epoch": 1.9031984934526505, "grad_norm": 0.4674052108487019, "learning_rate": 2.0310894836442283e-05, "loss": 0.4283, "step": 65185 }, { "epoch": 1.9033444767229675, "grad_norm": 0.5170825691915367, "learning_rate": 2.0308191403081914e-05, "loss": 0.4356, "step": 65190 }, { "epoch": 1.903490459993285, "grad_norm": 0.48573620009929985, "learning_rate": 2.0305487969721548e-05, "loss": 0.4254, "step": 65195 }, { "epoch": 1.903636443263602, "grad_norm": 0.4519742862927547, "learning_rate": 2.030278453636118e-05, "loss": 0.4081, "step": 65200 }, { "epoch": 1.9037824265339192, "grad_norm": 0.49217310255346103, "learning_rate": 2.0300081103000813e-05, "loss": 0.4199, "step": 65205 }, { "epoch": 1.9039284098042364, "grad_norm": 0.4991571007147312, "learning_rate": 2.0297377669640443e-05, "loss": 0.4529, "step": 65210 }, { "epoch": 1.9040743930745536, "grad_norm": 0.510556333833962, "learning_rate": 2.0294674236280077e-05, "loss": 0.4442, "step": 65215 }, { "epoch": 1.9042203763448708, "grad_norm": 0.47652718340070216, "learning_rate": 2.029197080291971e-05, "loss": 0.4344, "step": 65220 }, { "epoch": 1.904366359615188, "grad_norm": 0.4777618476167842, "learning_rate": 2.0289267369559342e-05, "loss": 0.4345, "step": 65225 }, { "epoch": 1.9045123428855053, "grad_norm": 0.47584145256416815, "learning_rate": 2.0286563936198973e-05, "loss": 0.4052, "step": 65230 }, { "epoch": 1.9046583261558225, "grad_norm": 0.4737611107848053, "learning_rate": 2.0283860502838607e-05, "loss": 0.4226, "step": 65235 }, { "epoch": 1.9048043094261398, "grad_norm": 0.49440194317475855, "learning_rate": 2.0281157069478237e-05, "loss": 0.4139, "step": 65240 }, { "epoch": 1.904950292696457, "grad_norm": 0.4124830119808645, "learning_rate": 2.0278453636117868e-05, "loss": 0.4143, "step": 65245 }, { "epoch": 1.9050962759667742, "grad_norm": 0.48140941046196295, "learning_rate": 2.0275750202757505e-05, "loss": 0.4489, "step": 65250 }, { "epoch": 1.9052422592370915, "grad_norm": 0.4384902375766436, "learning_rate": 2.0273046769397136e-05, "loss": 0.4305, "step": 65255 }, { "epoch": 1.9053882425074087, "grad_norm": 0.4954869725474716, "learning_rate": 2.0270343336036767e-05, "loss": 0.4561, "step": 65260 }, { "epoch": 1.905534225777726, "grad_norm": 0.46630486631127216, "learning_rate": 2.02676399026764e-05, "loss": 0.4198, "step": 65265 }, { "epoch": 1.9056802090480431, "grad_norm": 0.44282498465723885, "learning_rate": 2.026493646931603e-05, "loss": 0.4228, "step": 65270 }, { "epoch": 1.9058261923183604, "grad_norm": 0.5182793505187201, "learning_rate": 2.0262233035955662e-05, "loss": 0.4425, "step": 65275 }, { "epoch": 1.9059721755886776, "grad_norm": 0.5072045382628322, "learning_rate": 2.02595296025953e-05, "loss": 0.4297, "step": 65280 }, { "epoch": 1.9061181588589946, "grad_norm": 0.512337024356989, "learning_rate": 2.025682616923493e-05, "loss": 0.4639, "step": 65285 }, { "epoch": 1.906264142129312, "grad_norm": 0.4552444262490686, "learning_rate": 2.025412273587456e-05, "loss": 0.4271, "step": 65290 }, { "epoch": 1.906410125399629, "grad_norm": 0.46345930907510646, "learning_rate": 2.0251419302514194e-05, "loss": 0.4625, "step": 65295 }, { "epoch": 1.9065561086699465, "grad_norm": 0.44637200711108843, "learning_rate": 2.0248715869153825e-05, "loss": 0.4423, "step": 65300 }, { "epoch": 1.9067020919402635, "grad_norm": 0.48284283365637604, "learning_rate": 2.024601243579346e-05, "loss": 0.44, "step": 65305 }, { "epoch": 1.906848075210581, "grad_norm": 0.5006998493600332, "learning_rate": 2.0243309002433093e-05, "loss": 0.4217, "step": 65310 }, { "epoch": 1.906994058480898, "grad_norm": 0.4922222804767115, "learning_rate": 2.0240605569072724e-05, "loss": 0.4461, "step": 65315 }, { "epoch": 1.9071400417512154, "grad_norm": 0.4733212349119824, "learning_rate": 2.0237902135712354e-05, "loss": 0.4032, "step": 65320 }, { "epoch": 1.9072860250215324, "grad_norm": 0.49664603385534917, "learning_rate": 2.023519870235199e-05, "loss": 0.4187, "step": 65325 }, { "epoch": 1.90743200829185, "grad_norm": 0.47256375270192086, "learning_rate": 2.023249526899162e-05, "loss": 0.4225, "step": 65330 }, { "epoch": 1.907577991562167, "grad_norm": 0.5263819075243543, "learning_rate": 2.0229791835631253e-05, "loss": 0.4282, "step": 65335 }, { "epoch": 1.9077239748324843, "grad_norm": 0.4340807922888956, "learning_rate": 2.0227088402270887e-05, "loss": 0.4346, "step": 65340 }, { "epoch": 1.9078699581028014, "grad_norm": 0.4702026053379517, "learning_rate": 2.0224384968910518e-05, "loss": 0.4355, "step": 65345 }, { "epoch": 1.9080159413731188, "grad_norm": 0.4998595492560549, "learning_rate": 2.0221681535550148e-05, "loss": 0.4448, "step": 65350 }, { "epoch": 1.9081619246434358, "grad_norm": 0.49832872185954225, "learning_rate": 2.0218978102189782e-05, "loss": 0.4331, "step": 65355 }, { "epoch": 1.908307907913753, "grad_norm": 0.49566210083313683, "learning_rate": 2.0216274668829413e-05, "loss": 0.421, "step": 65360 }, { "epoch": 1.9084538911840703, "grad_norm": 0.4311857837019058, "learning_rate": 2.0213571235469047e-05, "loss": 0.4243, "step": 65365 }, { "epoch": 1.9085998744543875, "grad_norm": 0.4685475647384923, "learning_rate": 2.021086780210868e-05, "loss": 0.4243, "step": 65370 }, { "epoch": 1.9087458577247047, "grad_norm": 0.500205977922958, "learning_rate": 2.020816436874831e-05, "loss": 0.4191, "step": 65375 }, { "epoch": 1.908891840995022, "grad_norm": 0.48523498876120064, "learning_rate": 2.0205460935387942e-05, "loss": 0.4346, "step": 65380 }, { "epoch": 1.9090378242653392, "grad_norm": 0.4728371351605864, "learning_rate": 2.0202757502027576e-05, "loss": 0.4264, "step": 65385 }, { "epoch": 1.9091838075356564, "grad_norm": 0.466057556425706, "learning_rate": 2.020005406866721e-05, "loss": 0.4376, "step": 65390 }, { "epoch": 1.9093297908059736, "grad_norm": 0.4866766957858001, "learning_rate": 2.019735063530684e-05, "loss": 0.4558, "step": 65395 }, { "epoch": 1.9094757740762909, "grad_norm": 0.4945533181281668, "learning_rate": 2.0194647201946475e-05, "loss": 0.4593, "step": 65400 }, { "epoch": 1.909621757346608, "grad_norm": 0.4799881175225122, "learning_rate": 2.0191943768586105e-05, "loss": 0.4414, "step": 65405 }, { "epoch": 1.9097677406169253, "grad_norm": 0.5124348336036462, "learning_rate": 2.0189240335225736e-05, "loss": 0.4686, "step": 65410 }, { "epoch": 1.9099137238872426, "grad_norm": 0.4869647227041416, "learning_rate": 2.018653690186537e-05, "loss": 0.4679, "step": 65415 }, { "epoch": 1.9100597071575598, "grad_norm": 0.43033810502139946, "learning_rate": 2.0183833468505004e-05, "loss": 0.4499, "step": 65420 }, { "epoch": 1.910205690427877, "grad_norm": 0.47032420116237594, "learning_rate": 2.0181130035144635e-05, "loss": 0.4533, "step": 65425 }, { "epoch": 1.910351673698194, "grad_norm": 0.46615137464760453, "learning_rate": 2.017842660178427e-05, "loss": 0.41, "step": 65430 }, { "epoch": 1.9104976569685115, "grad_norm": 0.42872489777316203, "learning_rate": 2.01757231684239e-05, "loss": 0.4176, "step": 65435 }, { "epoch": 1.9106436402388285, "grad_norm": 0.4631829272445126, "learning_rate": 2.017301973506353e-05, "loss": 0.4518, "step": 65440 }, { "epoch": 1.910789623509146, "grad_norm": 0.5113983331170333, "learning_rate": 2.0170316301703164e-05, "loss": 0.4365, "step": 65445 }, { "epoch": 1.910935606779463, "grad_norm": 0.462863359514459, "learning_rate": 2.0167612868342798e-05, "loss": 0.4217, "step": 65450 }, { "epoch": 1.9110815900497804, "grad_norm": 0.4835766581917346, "learning_rate": 2.016490943498243e-05, "loss": 0.4524, "step": 65455 }, { "epoch": 1.9112275733200974, "grad_norm": 0.4693907796642201, "learning_rate": 2.0162206001622063e-05, "loss": 0.455, "step": 65460 }, { "epoch": 1.9113735565904149, "grad_norm": 0.47659005616990624, "learning_rate": 2.0159502568261693e-05, "loss": 0.4236, "step": 65465 }, { "epoch": 1.9115195398607319, "grad_norm": 0.4410900130562375, "learning_rate": 2.0156799134901324e-05, "loss": 0.4115, "step": 65470 }, { "epoch": 1.9116655231310493, "grad_norm": 0.46937696672902013, "learning_rate": 2.0154095701540958e-05, "loss": 0.4259, "step": 65475 }, { "epoch": 1.9118115064013663, "grad_norm": 0.4937979962471369, "learning_rate": 2.0151392268180592e-05, "loss": 0.445, "step": 65480 }, { "epoch": 1.9119574896716838, "grad_norm": 0.4735430209565013, "learning_rate": 2.0148688834820222e-05, "loss": 0.4333, "step": 65485 }, { "epoch": 1.9121034729420008, "grad_norm": 0.479638002026295, "learning_rate": 2.0145985401459853e-05, "loss": 0.4136, "step": 65490 }, { "epoch": 1.9122494562123182, "grad_norm": 0.41287583320248405, "learning_rate": 2.0143281968099487e-05, "loss": 0.4214, "step": 65495 }, { "epoch": 1.9123954394826352, "grad_norm": 0.4704655307481378, "learning_rate": 2.0140578534739118e-05, "loss": 0.4317, "step": 65500 }, { "epoch": 1.9125414227529525, "grad_norm": 0.47570224263648764, "learning_rate": 2.0137875101378752e-05, "loss": 0.4084, "step": 65505 }, { "epoch": 1.9126874060232697, "grad_norm": 0.45114264884512123, "learning_rate": 2.0135171668018386e-05, "loss": 0.4157, "step": 65510 }, { "epoch": 1.912833389293587, "grad_norm": 0.5226297341877771, "learning_rate": 2.0132468234658016e-05, "loss": 0.4504, "step": 65515 }, { "epoch": 1.9129793725639042, "grad_norm": 0.4643105627964212, "learning_rate": 2.0129764801297647e-05, "loss": 0.4369, "step": 65520 }, { "epoch": 1.9131253558342214, "grad_norm": 0.4639834707484759, "learning_rate": 2.012706136793728e-05, "loss": 0.4523, "step": 65525 }, { "epoch": 1.9132713391045386, "grad_norm": 0.5093653453599115, "learning_rate": 2.012435793457691e-05, "loss": 0.4068, "step": 65530 }, { "epoch": 1.9134173223748558, "grad_norm": 0.4417911550283858, "learning_rate": 2.0121654501216546e-05, "loss": 0.4223, "step": 65535 }, { "epoch": 1.913563305645173, "grad_norm": 0.47553427657901237, "learning_rate": 2.011895106785618e-05, "loss": 0.4564, "step": 65540 }, { "epoch": 1.9137092889154903, "grad_norm": 0.4790655335643574, "learning_rate": 2.011624763449581e-05, "loss": 0.4775, "step": 65545 }, { "epoch": 1.9138552721858075, "grad_norm": 0.4590220605102313, "learning_rate": 2.011354420113544e-05, "loss": 0.4523, "step": 65550 }, { "epoch": 1.9140012554561248, "grad_norm": 0.493635855007525, "learning_rate": 2.0110840767775075e-05, "loss": 0.4346, "step": 65555 }, { "epoch": 1.914147238726442, "grad_norm": 0.4763109102092465, "learning_rate": 2.010813733441471e-05, "loss": 0.4185, "step": 65560 }, { "epoch": 1.9142932219967592, "grad_norm": 0.44887653030043845, "learning_rate": 2.010543390105434e-05, "loss": 0.4155, "step": 65565 }, { "epoch": 1.9144392052670764, "grad_norm": 0.49219071535518405, "learning_rate": 2.0102730467693974e-05, "loss": 0.443, "step": 65570 }, { "epoch": 1.9145851885373935, "grad_norm": 0.48662760784281645, "learning_rate": 2.0100027034333604e-05, "loss": 0.4346, "step": 65575 }, { "epoch": 1.914731171807711, "grad_norm": 0.4484565021396057, "learning_rate": 2.0097323600973235e-05, "loss": 0.4453, "step": 65580 }, { "epoch": 1.914877155078028, "grad_norm": 0.4676711554815729, "learning_rate": 2.009462016761287e-05, "loss": 0.4678, "step": 65585 }, { "epoch": 1.9150231383483454, "grad_norm": 0.505111388842296, "learning_rate": 2.0091916734252503e-05, "loss": 0.425, "step": 65590 }, { "epoch": 1.9151691216186624, "grad_norm": 0.4816597598176447, "learning_rate": 2.0089213300892133e-05, "loss": 0.4444, "step": 65595 }, { "epoch": 1.9153151048889798, "grad_norm": 0.4560654481132095, "learning_rate": 2.0086509867531767e-05, "loss": 0.4202, "step": 65600 }, { "epoch": 1.9154610881592968, "grad_norm": 0.49411470542937197, "learning_rate": 2.0083806434171398e-05, "loss": 0.4265, "step": 65605 }, { "epoch": 1.9156070714296143, "grad_norm": 0.4594905271462482, "learning_rate": 2.008110300081103e-05, "loss": 0.4523, "step": 65610 }, { "epoch": 1.9157530546999313, "grad_norm": 0.4448092514342808, "learning_rate": 2.0078399567450663e-05, "loss": 0.4296, "step": 65615 }, { "epoch": 1.9158990379702487, "grad_norm": 0.45906384065935213, "learning_rate": 2.0075696134090297e-05, "loss": 0.4221, "step": 65620 }, { "epoch": 1.9160450212405657, "grad_norm": 0.5220685243902605, "learning_rate": 2.0072992700729927e-05, "loss": 0.4296, "step": 65625 }, { "epoch": 1.9161910045108832, "grad_norm": 0.49208779251696894, "learning_rate": 2.007028926736956e-05, "loss": 0.4087, "step": 65630 }, { "epoch": 1.9163369877812002, "grad_norm": 0.4957086451848641, "learning_rate": 2.0067585834009192e-05, "loss": 0.4238, "step": 65635 }, { "epoch": 1.9164829710515177, "grad_norm": 0.530317025671773, "learning_rate": 2.0064882400648823e-05, "loss": 0.4294, "step": 65640 }, { "epoch": 1.9166289543218347, "grad_norm": 0.4775908733033913, "learning_rate": 2.006217896728846e-05, "loss": 0.4408, "step": 65645 }, { "epoch": 1.916774937592152, "grad_norm": 0.4457945154345406, "learning_rate": 2.005947553392809e-05, "loss": 0.4594, "step": 65650 }, { "epoch": 1.9169209208624691, "grad_norm": 0.43260211085484224, "learning_rate": 2.005677210056772e-05, "loss": 0.4187, "step": 65655 }, { "epoch": 1.9170669041327864, "grad_norm": 0.47822563840872434, "learning_rate": 2.0054068667207355e-05, "loss": 0.4355, "step": 65660 }, { "epoch": 1.9172128874031036, "grad_norm": 0.41117375901678893, "learning_rate": 2.0051365233846986e-05, "loss": 0.4076, "step": 65665 }, { "epoch": 1.9173588706734208, "grad_norm": 0.48442412806177776, "learning_rate": 2.0048661800486617e-05, "loss": 0.4463, "step": 65670 }, { "epoch": 1.917504853943738, "grad_norm": 0.5319965870816862, "learning_rate": 2.0045958367126254e-05, "loss": 0.4726, "step": 65675 }, { "epoch": 1.9176508372140553, "grad_norm": 0.45690160190604806, "learning_rate": 2.0043254933765885e-05, "loss": 0.4369, "step": 65680 }, { "epoch": 1.9177968204843725, "grad_norm": 0.42508103392484325, "learning_rate": 2.0040551500405515e-05, "loss": 0.4185, "step": 65685 }, { "epoch": 1.9179428037546897, "grad_norm": 0.4068885682260853, "learning_rate": 2.003784806704515e-05, "loss": 0.3885, "step": 65690 }, { "epoch": 1.918088787025007, "grad_norm": 0.48559688386783423, "learning_rate": 2.003514463368478e-05, "loss": 0.4231, "step": 65695 }, { "epoch": 1.9182347702953242, "grad_norm": 0.4874949959660918, "learning_rate": 2.003244120032441e-05, "loss": 0.4469, "step": 65700 }, { "epoch": 1.9183807535656414, "grad_norm": 0.5077936435459551, "learning_rate": 2.0029737766964048e-05, "loss": 0.4318, "step": 65705 }, { "epoch": 1.9185267368359586, "grad_norm": 0.4696389851388362, "learning_rate": 2.002703433360368e-05, "loss": 0.4336, "step": 65710 }, { "epoch": 1.9186727201062759, "grad_norm": 0.4554773124749724, "learning_rate": 2.002433090024331e-05, "loss": 0.4391, "step": 65715 }, { "epoch": 1.9188187033765929, "grad_norm": 0.45740194678818674, "learning_rate": 2.0021627466882943e-05, "loss": 0.4148, "step": 65720 }, { "epoch": 1.9189646866469103, "grad_norm": 0.543579703438731, "learning_rate": 2.0018924033522574e-05, "loss": 0.4632, "step": 65725 }, { "epoch": 1.9191106699172273, "grad_norm": 0.4882810214278807, "learning_rate": 2.0016220600162208e-05, "loss": 0.4374, "step": 65730 }, { "epoch": 1.9192566531875448, "grad_norm": 0.49722804877567256, "learning_rate": 2.0013517166801842e-05, "loss": 0.4229, "step": 65735 }, { "epoch": 1.9194026364578618, "grad_norm": 0.47847330764252954, "learning_rate": 2.0010813733441472e-05, "loss": 0.4296, "step": 65740 }, { "epoch": 1.9195486197281793, "grad_norm": 0.4477677961785368, "learning_rate": 2.0008110300081103e-05, "loss": 0.4311, "step": 65745 }, { "epoch": 1.9196946029984963, "grad_norm": 0.45520314410233714, "learning_rate": 2.0005406866720737e-05, "loss": 0.4363, "step": 65750 }, { "epoch": 1.9198405862688137, "grad_norm": 0.5064391139472093, "learning_rate": 2.0002703433360368e-05, "loss": 0.4622, "step": 65755 }, { "epoch": 1.9199865695391307, "grad_norm": 0.4562304607220954, "learning_rate": 2e-05, "loss": 0.4433, "step": 65760 }, { "epoch": 1.9201325528094482, "grad_norm": 0.4591499147325528, "learning_rate": 1.9997296566639632e-05, "loss": 0.4279, "step": 65765 }, { "epoch": 1.9202785360797652, "grad_norm": 0.49010467550686687, "learning_rate": 1.9994593133279266e-05, "loss": 0.4213, "step": 65770 }, { "epoch": 1.9204245193500826, "grad_norm": 0.44590278446484105, "learning_rate": 1.9991889699918897e-05, "loss": 0.4342, "step": 65775 }, { "epoch": 1.9205705026203996, "grad_norm": 0.4920784455597093, "learning_rate": 1.998918626655853e-05, "loss": 0.4422, "step": 65780 }, { "epoch": 1.920716485890717, "grad_norm": 0.5061630066863222, "learning_rate": 1.998648283319816e-05, "loss": 0.4095, "step": 65785 }, { "epoch": 1.920862469161034, "grad_norm": 0.42857692158479127, "learning_rate": 1.9983779399837796e-05, "loss": 0.3858, "step": 65790 }, { "epoch": 1.9210084524313513, "grad_norm": 0.46227481151377914, "learning_rate": 1.9981075966477426e-05, "loss": 0.4013, "step": 65795 }, { "epoch": 1.9211544357016686, "grad_norm": 0.48009500211358785, "learning_rate": 1.997837253311706e-05, "loss": 0.4486, "step": 65800 }, { "epoch": 1.9213004189719858, "grad_norm": 0.4871470449760335, "learning_rate": 1.997566909975669e-05, "loss": 0.4352, "step": 65805 }, { "epoch": 1.921446402242303, "grad_norm": 0.46493966522422037, "learning_rate": 1.9972965666396325e-05, "loss": 0.4165, "step": 65810 }, { "epoch": 1.9215923855126202, "grad_norm": 0.47802337443903287, "learning_rate": 1.997026223303596e-05, "loss": 0.4313, "step": 65815 }, { "epoch": 1.9217383687829375, "grad_norm": 0.4772010696950242, "learning_rate": 1.996755879967559e-05, "loss": 0.4272, "step": 65820 }, { "epoch": 1.9218843520532547, "grad_norm": 0.436494557703409, "learning_rate": 1.996485536631522e-05, "loss": 0.4314, "step": 65825 }, { "epoch": 1.922030335323572, "grad_norm": 0.45307914722181114, "learning_rate": 1.9962151932954854e-05, "loss": 0.4538, "step": 65830 }, { "epoch": 1.9221763185938892, "grad_norm": 0.471606632403763, "learning_rate": 1.9959448499594485e-05, "loss": 0.4361, "step": 65835 }, { "epoch": 1.9223223018642064, "grad_norm": 0.4424127298218058, "learning_rate": 1.995674506623412e-05, "loss": 0.4384, "step": 65840 }, { "epoch": 1.9224682851345236, "grad_norm": 0.46487746344062597, "learning_rate": 1.9954041632873753e-05, "loss": 0.4411, "step": 65845 }, { "epoch": 1.9226142684048408, "grad_norm": 0.458406898985117, "learning_rate": 1.9951338199513383e-05, "loss": 0.4243, "step": 65850 }, { "epoch": 1.922760251675158, "grad_norm": 0.4683282742641431, "learning_rate": 1.9948634766153014e-05, "loss": 0.4541, "step": 65855 }, { "epoch": 1.9229062349454753, "grad_norm": 0.46409519404799165, "learning_rate": 1.9945931332792648e-05, "loss": 0.4517, "step": 65860 }, { "epoch": 1.9230522182157923, "grad_norm": 0.49804247012402525, "learning_rate": 1.994322789943228e-05, "loss": 0.4345, "step": 65865 }, { "epoch": 1.9231982014861098, "grad_norm": 0.46345955786395593, "learning_rate": 1.9940524466071913e-05, "loss": 0.4086, "step": 65870 }, { "epoch": 1.9233441847564268, "grad_norm": 0.4619214356280396, "learning_rate": 1.9937821032711547e-05, "loss": 0.4073, "step": 65875 }, { "epoch": 1.9234901680267442, "grad_norm": 0.48681460697304735, "learning_rate": 1.9935117599351177e-05, "loss": 0.3995, "step": 65880 }, { "epoch": 1.9236361512970612, "grad_norm": 0.4971125154060376, "learning_rate": 1.9932414165990808e-05, "loss": 0.4514, "step": 65885 }, { "epoch": 1.9237821345673787, "grad_norm": 0.45866363129128807, "learning_rate": 1.9929710732630442e-05, "loss": 0.4124, "step": 65890 }, { "epoch": 1.9239281178376957, "grad_norm": 0.4583545886690288, "learning_rate": 1.9927007299270073e-05, "loss": 0.4391, "step": 65895 }, { "epoch": 1.9240741011080131, "grad_norm": 0.42558481630697553, "learning_rate": 1.9924303865909707e-05, "loss": 0.4239, "step": 65900 }, { "epoch": 1.9242200843783301, "grad_norm": 0.4705178444910174, "learning_rate": 1.992160043254934e-05, "loss": 0.4147, "step": 65905 }, { "epoch": 1.9243660676486476, "grad_norm": 0.48647069800540893, "learning_rate": 1.991889699918897e-05, "loss": 0.4791, "step": 65910 }, { "epoch": 1.9245120509189646, "grad_norm": 0.45702030079067785, "learning_rate": 1.9916193565828602e-05, "loss": 0.437, "step": 65915 }, { "epoch": 1.924658034189282, "grad_norm": 0.46802932824396065, "learning_rate": 1.9913490132468236e-05, "loss": 0.3986, "step": 65920 }, { "epoch": 1.924804017459599, "grad_norm": 0.4626379413450076, "learning_rate": 1.9910786699107866e-05, "loss": 0.4418, "step": 65925 }, { "epoch": 1.9249500007299165, "grad_norm": 0.4816730247078401, "learning_rate": 1.99080832657475e-05, "loss": 0.3937, "step": 65930 }, { "epoch": 1.9250959840002335, "grad_norm": 0.44543357877358725, "learning_rate": 1.9905379832387134e-05, "loss": 0.4212, "step": 65935 }, { "epoch": 1.9252419672705507, "grad_norm": 0.46016329458025396, "learning_rate": 1.9902676399026765e-05, "loss": 0.4383, "step": 65940 }, { "epoch": 1.925387950540868, "grad_norm": 0.42695614325834363, "learning_rate": 1.9899972965666396e-05, "loss": 0.4313, "step": 65945 }, { "epoch": 1.9255339338111852, "grad_norm": 0.45574022464159236, "learning_rate": 1.989726953230603e-05, "loss": 0.4329, "step": 65950 }, { "epoch": 1.9256799170815024, "grad_norm": 0.4987169304693251, "learning_rate": 1.989456609894566e-05, "loss": 0.4452, "step": 65955 }, { "epoch": 1.9258259003518197, "grad_norm": 0.4644840167093027, "learning_rate": 1.9891862665585294e-05, "loss": 0.4833, "step": 65960 }, { "epoch": 1.925971883622137, "grad_norm": 0.4686235075068213, "learning_rate": 1.988915923222493e-05, "loss": 0.4258, "step": 65965 }, { "epoch": 1.9261178668924541, "grad_norm": 0.4316722309646686, "learning_rate": 1.988645579886456e-05, "loss": 0.4252, "step": 65970 }, { "epoch": 1.9262638501627714, "grad_norm": 0.4387551933579705, "learning_rate": 1.988375236550419e-05, "loss": 0.4021, "step": 65975 }, { "epoch": 1.9264098334330886, "grad_norm": 0.5119824283569563, "learning_rate": 1.9881048932143824e-05, "loss": 0.4627, "step": 65980 }, { "epoch": 1.9265558167034058, "grad_norm": 0.4593771697014655, "learning_rate": 1.9878345498783458e-05, "loss": 0.4449, "step": 65985 }, { "epoch": 1.926701799973723, "grad_norm": 0.4545544376361265, "learning_rate": 1.9875642065423088e-05, "loss": 0.429, "step": 65990 }, { "epoch": 1.9268477832440403, "grad_norm": 0.43085056483082207, "learning_rate": 1.9872938632062722e-05, "loss": 0.4459, "step": 65995 }, { "epoch": 1.9269937665143575, "grad_norm": 0.44858916224389295, "learning_rate": 1.9870235198702353e-05, "loss": 0.4027, "step": 66000 }, { "epoch": 1.9271397497846747, "grad_norm": 0.45202527847987456, "learning_rate": 1.9867531765341983e-05, "loss": 0.4248, "step": 66005 }, { "epoch": 1.9272857330549917, "grad_norm": 0.49199276324937125, "learning_rate": 1.9864828331981617e-05, "loss": 0.4418, "step": 66010 }, { "epoch": 1.9274317163253092, "grad_norm": 0.45676450370518357, "learning_rate": 1.986212489862125e-05, "loss": 0.4281, "step": 66015 }, { "epoch": 1.9275776995956262, "grad_norm": 0.4601517431401451, "learning_rate": 1.9859421465260882e-05, "loss": 0.4136, "step": 66020 }, { "epoch": 1.9277236828659436, "grad_norm": 0.4580391350016572, "learning_rate": 1.9856718031900516e-05, "loss": 0.413, "step": 66025 }, { "epoch": 1.9278696661362607, "grad_norm": 0.4894776570595746, "learning_rate": 1.9854014598540147e-05, "loss": 0.4397, "step": 66030 }, { "epoch": 1.928015649406578, "grad_norm": 0.8269626542247047, "learning_rate": 1.9851311165179777e-05, "loss": 0.4485, "step": 66035 }, { "epoch": 1.928161632676895, "grad_norm": 0.506131574769666, "learning_rate": 1.984860773181941e-05, "loss": 0.4537, "step": 66040 }, { "epoch": 1.9283076159472126, "grad_norm": 0.5137175238658054, "learning_rate": 1.9845904298459045e-05, "loss": 0.4447, "step": 66045 }, { "epoch": 1.9284535992175296, "grad_norm": 0.48691007743119635, "learning_rate": 1.9843200865098676e-05, "loss": 0.4313, "step": 66050 }, { "epoch": 1.928599582487847, "grad_norm": 0.4513663432674768, "learning_rate": 1.984049743173831e-05, "loss": 0.4311, "step": 66055 }, { "epoch": 1.928745565758164, "grad_norm": 0.4668603311920325, "learning_rate": 1.983779399837794e-05, "loss": 0.4331, "step": 66060 }, { "epoch": 1.9288915490284815, "grad_norm": 0.4586004182709445, "learning_rate": 1.983509056501757e-05, "loss": 0.4503, "step": 66065 }, { "epoch": 1.9290375322987985, "grad_norm": 0.47419991474832224, "learning_rate": 1.9832387131657205e-05, "loss": 0.4349, "step": 66070 }, { "epoch": 1.929183515569116, "grad_norm": 0.5037134546891857, "learning_rate": 1.982968369829684e-05, "loss": 0.4267, "step": 66075 }, { "epoch": 1.929329498839433, "grad_norm": 0.48132593171467897, "learning_rate": 1.982698026493647e-05, "loss": 0.4369, "step": 66080 }, { "epoch": 1.9294754821097502, "grad_norm": 0.4529761949897349, "learning_rate": 1.9824276831576104e-05, "loss": 0.4105, "step": 66085 }, { "epoch": 1.9296214653800674, "grad_norm": 0.4239050248792494, "learning_rate": 1.9821573398215735e-05, "loss": 0.4353, "step": 66090 }, { "epoch": 1.9297674486503846, "grad_norm": 0.4854238514013806, "learning_rate": 1.9818869964855365e-05, "loss": 0.4418, "step": 66095 }, { "epoch": 1.9299134319207019, "grad_norm": 0.45745148735493724, "learning_rate": 1.9816166531495e-05, "loss": 0.4391, "step": 66100 }, { "epoch": 1.930059415191019, "grad_norm": 0.45162238766928353, "learning_rate": 1.9813463098134633e-05, "loss": 0.4239, "step": 66105 }, { "epoch": 1.9302053984613363, "grad_norm": 0.4804365240556892, "learning_rate": 1.9810759664774264e-05, "loss": 0.4495, "step": 66110 }, { "epoch": 1.9303513817316535, "grad_norm": 0.4618671740851368, "learning_rate": 1.9808056231413898e-05, "loss": 0.434, "step": 66115 }, { "epoch": 1.9304973650019708, "grad_norm": 0.5282917678901382, "learning_rate": 1.980535279805353e-05, "loss": 0.4429, "step": 66120 }, { "epoch": 1.930643348272288, "grad_norm": 0.5046790529700146, "learning_rate": 1.980264936469316e-05, "loss": 0.4448, "step": 66125 }, { "epoch": 1.9307893315426052, "grad_norm": 0.5222707556735722, "learning_rate": 1.9799945931332793e-05, "loss": 0.4524, "step": 66130 }, { "epoch": 1.9309353148129225, "grad_norm": 0.4819705553811347, "learning_rate": 1.9797242497972427e-05, "loss": 0.4676, "step": 66135 }, { "epoch": 1.9310812980832397, "grad_norm": 0.7392262736082409, "learning_rate": 1.9794539064612058e-05, "loss": 0.4628, "step": 66140 }, { "epoch": 1.931227281353557, "grad_norm": 0.446389475729934, "learning_rate": 1.979183563125169e-05, "loss": 0.439, "step": 66145 }, { "epoch": 1.9313732646238742, "grad_norm": 0.4668101834862061, "learning_rate": 1.9789132197891322e-05, "loss": 0.4335, "step": 66150 }, { "epoch": 1.9315192478941912, "grad_norm": 0.46608051213846297, "learning_rate": 1.9786428764530956e-05, "loss": 0.4305, "step": 66155 }, { "epoch": 1.9316652311645086, "grad_norm": 0.43872420242907023, "learning_rate": 1.9783725331170587e-05, "loss": 0.437, "step": 66160 }, { "epoch": 1.9318112144348256, "grad_norm": 0.4945557221694447, "learning_rate": 1.978102189781022e-05, "loss": 0.4678, "step": 66165 }, { "epoch": 1.931957197705143, "grad_norm": 0.4530322577792134, "learning_rate": 1.977831846444985e-05, "loss": 0.4335, "step": 66170 }, { "epoch": 1.93210318097546, "grad_norm": 0.4480498851815168, "learning_rate": 1.9775615031089482e-05, "loss": 0.412, "step": 66175 }, { "epoch": 1.9322491642457775, "grad_norm": 0.49342298118853317, "learning_rate": 1.9772911597729116e-05, "loss": 0.4233, "step": 66180 }, { "epoch": 1.9323951475160945, "grad_norm": 0.5004818344451356, "learning_rate": 1.977020816436875e-05, "loss": 0.4431, "step": 66185 }, { "epoch": 1.932541130786412, "grad_norm": 0.48680530225006413, "learning_rate": 1.976750473100838e-05, "loss": 0.4536, "step": 66190 }, { "epoch": 1.932687114056729, "grad_norm": 0.5176545214492518, "learning_rate": 1.9764801297648015e-05, "loss": 0.4638, "step": 66195 }, { "epoch": 1.9328330973270464, "grad_norm": 0.5102671642315993, "learning_rate": 1.9762097864287646e-05, "loss": 0.4247, "step": 66200 }, { "epoch": 1.9329790805973635, "grad_norm": 0.46301374833258685, "learning_rate": 1.9759394430927276e-05, "loss": 0.45, "step": 66205 }, { "epoch": 1.933125063867681, "grad_norm": 0.4601055438304051, "learning_rate": 1.975669099756691e-05, "loss": 0.4161, "step": 66210 }, { "epoch": 1.933271047137998, "grad_norm": 0.4649326158605355, "learning_rate": 1.9753987564206544e-05, "loss": 0.4373, "step": 66215 }, { "epoch": 1.9334170304083154, "grad_norm": 0.41200609450052356, "learning_rate": 1.9751284130846175e-05, "loss": 0.4203, "step": 66220 }, { "epoch": 1.9335630136786324, "grad_norm": 0.5069546291504635, "learning_rate": 1.974858069748581e-05, "loss": 0.417, "step": 66225 }, { "epoch": 1.9337089969489496, "grad_norm": 0.4642138312234277, "learning_rate": 1.974587726412544e-05, "loss": 0.4099, "step": 66230 }, { "epoch": 1.9338549802192668, "grad_norm": 0.45606808613646216, "learning_rate": 1.974317383076507e-05, "loss": 0.4157, "step": 66235 }, { "epoch": 1.934000963489584, "grad_norm": 0.5045281348478435, "learning_rate": 1.9740470397404707e-05, "loss": 0.4337, "step": 66240 }, { "epoch": 1.9341469467599013, "grad_norm": 0.45601981795743707, "learning_rate": 1.9737766964044338e-05, "loss": 0.4211, "step": 66245 }, { "epoch": 1.9342929300302185, "grad_norm": 0.49460323960738295, "learning_rate": 1.973506353068397e-05, "loss": 0.4521, "step": 66250 }, { "epoch": 1.9344389133005357, "grad_norm": 0.48479335759777026, "learning_rate": 1.9732360097323603e-05, "loss": 0.4452, "step": 66255 }, { "epoch": 1.934584896570853, "grad_norm": 0.47482189194582103, "learning_rate": 1.9729656663963233e-05, "loss": 0.4316, "step": 66260 }, { "epoch": 1.9347308798411702, "grad_norm": 0.43389159927564463, "learning_rate": 1.9726953230602864e-05, "loss": 0.4015, "step": 66265 }, { "epoch": 1.9348768631114874, "grad_norm": 0.45875957198714334, "learning_rate": 1.97242497972425e-05, "loss": 0.4558, "step": 66270 }, { "epoch": 1.9350228463818047, "grad_norm": 0.5086977751206756, "learning_rate": 1.9721546363882132e-05, "loss": 0.4256, "step": 66275 }, { "epoch": 1.935168829652122, "grad_norm": 0.5017728228613493, "learning_rate": 1.9718842930521763e-05, "loss": 0.4472, "step": 66280 }, { "epoch": 1.9353148129224391, "grad_norm": 0.4740504055532066, "learning_rate": 1.9716139497161397e-05, "loss": 0.4393, "step": 66285 }, { "epoch": 1.9354607961927563, "grad_norm": 0.48669512187034564, "learning_rate": 1.9713436063801027e-05, "loss": 0.437, "step": 66290 }, { "epoch": 1.9356067794630736, "grad_norm": 0.5049154390800584, "learning_rate": 1.9710732630440658e-05, "loss": 0.4429, "step": 66295 }, { "epoch": 1.9357527627333908, "grad_norm": 0.48764070705905876, "learning_rate": 1.9708029197080295e-05, "loss": 0.433, "step": 66300 }, { "epoch": 1.935898746003708, "grad_norm": 0.4608116353170131, "learning_rate": 1.9705325763719926e-05, "loss": 0.4281, "step": 66305 }, { "epoch": 1.936044729274025, "grad_norm": 0.43593821427933027, "learning_rate": 1.9702622330359557e-05, "loss": 0.3913, "step": 66310 }, { "epoch": 1.9361907125443425, "grad_norm": 0.5003419131066141, "learning_rate": 1.969991889699919e-05, "loss": 0.4472, "step": 66315 }, { "epoch": 1.9363366958146595, "grad_norm": 0.4827627768830777, "learning_rate": 1.969721546363882e-05, "loss": 0.4339, "step": 66320 }, { "epoch": 1.936482679084977, "grad_norm": 0.47825080758364535, "learning_rate": 1.9694512030278455e-05, "loss": 0.4351, "step": 66325 }, { "epoch": 1.936628662355294, "grad_norm": 0.46917838506549936, "learning_rate": 1.969180859691809e-05, "loss": 0.4393, "step": 66330 }, { "epoch": 1.9367746456256114, "grad_norm": 0.46537058313792695, "learning_rate": 1.968910516355772e-05, "loss": 0.4425, "step": 66335 }, { "epoch": 1.9369206288959284, "grad_norm": 0.5276634208991274, "learning_rate": 1.968640173019735e-05, "loss": 0.4709, "step": 66340 }, { "epoch": 1.9370666121662459, "grad_norm": 0.47070255242282083, "learning_rate": 1.9683698296836984e-05, "loss": 0.421, "step": 66345 }, { "epoch": 1.9372125954365629, "grad_norm": 0.4432658093099156, "learning_rate": 1.9680994863476615e-05, "loss": 0.4192, "step": 66350 }, { "epoch": 1.9373585787068803, "grad_norm": 0.48760646513651706, "learning_rate": 1.967829143011625e-05, "loss": 0.4344, "step": 66355 }, { "epoch": 1.9375045619771973, "grad_norm": 0.48256629868382206, "learning_rate": 1.9675587996755883e-05, "loss": 0.4439, "step": 66360 }, { "epoch": 1.9376505452475148, "grad_norm": 0.5084679411492751, "learning_rate": 1.9672884563395514e-05, "loss": 0.4447, "step": 66365 }, { "epoch": 1.9377965285178318, "grad_norm": 0.46856991811987436, "learning_rate": 1.9670181130035144e-05, "loss": 0.4277, "step": 66370 }, { "epoch": 1.937942511788149, "grad_norm": 0.4437611556681702, "learning_rate": 1.966747769667478e-05, "loss": 0.4461, "step": 66375 }, { "epoch": 1.9380884950584663, "grad_norm": 0.4702122779014433, "learning_rate": 1.966477426331441e-05, "loss": 0.4126, "step": 66380 }, { "epoch": 1.9382344783287835, "grad_norm": 0.4757710127174221, "learning_rate": 1.9662070829954043e-05, "loss": 0.4488, "step": 66385 }, { "epoch": 1.9383804615991007, "grad_norm": 0.43847921262570905, "learning_rate": 1.9659367396593677e-05, "loss": 0.44, "step": 66390 }, { "epoch": 1.938526444869418, "grad_norm": 0.4651771574304187, "learning_rate": 1.9656663963233308e-05, "loss": 0.4419, "step": 66395 }, { "epoch": 1.9386724281397352, "grad_norm": 0.46396631091782925, "learning_rate": 1.9653960529872938e-05, "loss": 0.4149, "step": 66400 }, { "epoch": 1.9388184114100524, "grad_norm": 0.47193922485014866, "learning_rate": 1.9651257096512572e-05, "loss": 0.4482, "step": 66405 }, { "epoch": 1.9389643946803696, "grad_norm": 0.4473528929221774, "learning_rate": 1.9648553663152206e-05, "loss": 0.4167, "step": 66410 }, { "epoch": 1.9391103779506869, "grad_norm": 0.4712732768748084, "learning_rate": 1.9645850229791837e-05, "loss": 0.4558, "step": 66415 }, { "epoch": 1.939256361221004, "grad_norm": 0.47575991514601385, "learning_rate": 1.9643146796431468e-05, "loss": 0.4284, "step": 66420 }, { "epoch": 1.9394023444913213, "grad_norm": 0.4542324366968975, "learning_rate": 1.96404433630711e-05, "loss": 0.4174, "step": 66425 }, { "epoch": 1.9395483277616385, "grad_norm": 0.49425800253715935, "learning_rate": 1.9637739929710732e-05, "loss": 0.4247, "step": 66430 }, { "epoch": 1.9396943110319558, "grad_norm": 0.460475122075688, "learning_rate": 1.9635036496350366e-05, "loss": 0.4163, "step": 66435 }, { "epoch": 1.939840294302273, "grad_norm": 0.43445624810391636, "learning_rate": 1.963233306299e-05, "loss": 0.427, "step": 66440 }, { "epoch": 1.9399862775725902, "grad_norm": 0.46509482195708984, "learning_rate": 1.962962962962963e-05, "loss": 0.4496, "step": 66445 }, { "epoch": 1.9401322608429075, "grad_norm": 0.5151447776110593, "learning_rate": 1.962692619626926e-05, "loss": 0.431, "step": 66450 }, { "epoch": 1.9402782441132245, "grad_norm": 0.48139806590013906, "learning_rate": 1.9624222762908895e-05, "loss": 0.4209, "step": 66455 }, { "epoch": 1.940424227383542, "grad_norm": 0.4861419846882829, "learning_rate": 1.9621519329548526e-05, "loss": 0.4303, "step": 66460 }, { "epoch": 1.940570210653859, "grad_norm": 0.4935811774614265, "learning_rate": 1.961881589618816e-05, "loss": 0.433, "step": 66465 }, { "epoch": 1.9407161939241764, "grad_norm": 0.4722486868213957, "learning_rate": 1.9616112462827794e-05, "loss": 0.4244, "step": 66470 }, { "epoch": 1.9408621771944934, "grad_norm": 0.5009125969879223, "learning_rate": 1.9613409029467425e-05, "loss": 0.4633, "step": 66475 }, { "epoch": 1.9410081604648108, "grad_norm": 0.4702105375653207, "learning_rate": 1.9610705596107055e-05, "loss": 0.4628, "step": 66480 }, { "epoch": 1.9411541437351278, "grad_norm": 0.4486392346539244, "learning_rate": 1.960800216274669e-05, "loss": 0.4363, "step": 66485 }, { "epoch": 1.9413001270054453, "grad_norm": 0.4818196273057273, "learning_rate": 1.960529872938632e-05, "loss": 0.4593, "step": 66490 }, { "epoch": 1.9414461102757623, "grad_norm": 0.4345383672945172, "learning_rate": 1.9602595296025954e-05, "loss": 0.4607, "step": 66495 }, { "epoch": 1.9415920935460798, "grad_norm": 0.4921985870402857, "learning_rate": 1.9599891862665588e-05, "loss": 0.4351, "step": 66500 }, { "epoch": 1.9417380768163968, "grad_norm": 0.518967862086592, "learning_rate": 1.959718842930522e-05, "loss": 0.4315, "step": 66505 }, { "epoch": 1.9418840600867142, "grad_norm": 0.46703523769514343, "learning_rate": 1.959448499594485e-05, "loss": 0.3968, "step": 66510 }, { "epoch": 1.9420300433570312, "grad_norm": 0.48033338775246814, "learning_rate": 1.9591781562584483e-05, "loss": 0.426, "step": 66515 }, { "epoch": 1.9421760266273485, "grad_norm": 0.5009820983410163, "learning_rate": 1.9589078129224114e-05, "loss": 0.4399, "step": 66520 }, { "epoch": 1.9423220098976657, "grad_norm": 0.4646353383775937, "learning_rate": 1.9586374695863748e-05, "loss": 0.4459, "step": 66525 }, { "epoch": 1.942467993167983, "grad_norm": 0.457561632137756, "learning_rate": 1.9583671262503382e-05, "loss": 0.4314, "step": 66530 }, { "epoch": 1.9426139764383001, "grad_norm": 0.48494408863249217, "learning_rate": 1.9580967829143013e-05, "loss": 0.4553, "step": 66535 }, { "epoch": 1.9427599597086174, "grad_norm": 0.4894594704218985, "learning_rate": 1.9578264395782643e-05, "loss": 0.4604, "step": 66540 }, { "epoch": 1.9429059429789346, "grad_norm": 0.4472922988127795, "learning_rate": 1.9575560962422277e-05, "loss": 0.426, "step": 66545 }, { "epoch": 1.9430519262492518, "grad_norm": 0.45514464341551936, "learning_rate": 1.957285752906191e-05, "loss": 0.4242, "step": 66550 }, { "epoch": 1.943197909519569, "grad_norm": 0.43263243275547014, "learning_rate": 1.9570154095701542e-05, "loss": 0.4202, "step": 66555 }, { "epoch": 1.9433438927898863, "grad_norm": 0.46407475770105655, "learning_rate": 1.9567450662341176e-05, "loss": 0.4192, "step": 66560 }, { "epoch": 1.9434898760602035, "grad_norm": 0.4857123427072103, "learning_rate": 1.9564747228980806e-05, "loss": 0.4402, "step": 66565 }, { "epoch": 1.9436358593305207, "grad_norm": 0.46655979279633014, "learning_rate": 1.9562043795620437e-05, "loss": 0.4192, "step": 66570 }, { "epoch": 1.943781842600838, "grad_norm": 0.4530806440441216, "learning_rate": 1.955934036226007e-05, "loss": 0.4501, "step": 66575 }, { "epoch": 1.9439278258711552, "grad_norm": 0.4722060711094884, "learning_rate": 1.9556636928899705e-05, "loss": 0.434, "step": 66580 }, { "epoch": 1.9440738091414724, "grad_norm": 0.5097021785004215, "learning_rate": 1.9553933495539336e-05, "loss": 0.4258, "step": 66585 }, { "epoch": 1.9442197924117897, "grad_norm": 0.45461163888653694, "learning_rate": 1.955123006217897e-05, "loss": 0.4194, "step": 66590 }, { "epoch": 1.944365775682107, "grad_norm": 0.48266661345407896, "learning_rate": 1.95485266288186e-05, "loss": 0.4337, "step": 66595 }, { "epoch": 1.944511758952424, "grad_norm": 0.463061381749929, "learning_rate": 1.954582319545823e-05, "loss": 0.4485, "step": 66600 }, { "epoch": 1.9446577422227413, "grad_norm": 0.5206732830192512, "learning_rate": 1.9543119762097865e-05, "loss": 0.46, "step": 66605 }, { "epoch": 1.9448037254930584, "grad_norm": 0.48362064258473214, "learning_rate": 1.95404163287375e-05, "loss": 0.4508, "step": 66610 }, { "epoch": 1.9449497087633758, "grad_norm": 0.44859618884306063, "learning_rate": 1.953771289537713e-05, "loss": 0.4277, "step": 66615 }, { "epoch": 1.9450956920336928, "grad_norm": 0.4812071205932206, "learning_rate": 1.9535009462016764e-05, "loss": 0.4633, "step": 66620 }, { "epoch": 1.9452416753040103, "grad_norm": 0.46168182972583927, "learning_rate": 1.9532306028656394e-05, "loss": 0.4384, "step": 66625 }, { "epoch": 1.9453876585743273, "grad_norm": 0.4742625581319816, "learning_rate": 1.9529602595296025e-05, "loss": 0.4496, "step": 66630 }, { "epoch": 1.9455336418446447, "grad_norm": 0.4903160692255326, "learning_rate": 1.9526899161935662e-05, "loss": 0.4399, "step": 66635 }, { "epoch": 1.9456796251149617, "grad_norm": 0.43650591779907855, "learning_rate": 1.9524195728575293e-05, "loss": 0.4449, "step": 66640 }, { "epoch": 1.9458256083852792, "grad_norm": 0.5019401611655184, "learning_rate": 1.9521492295214923e-05, "loss": 0.4196, "step": 66645 }, { "epoch": 1.9459715916555962, "grad_norm": 0.4756730300603292, "learning_rate": 1.9518788861854557e-05, "loss": 0.4172, "step": 66650 }, { "epoch": 1.9461175749259136, "grad_norm": 0.4391066959622048, "learning_rate": 1.9516085428494188e-05, "loss": 0.4319, "step": 66655 }, { "epoch": 1.9462635581962306, "grad_norm": 0.48275222035949406, "learning_rate": 1.951338199513382e-05, "loss": 0.4295, "step": 66660 }, { "epoch": 1.946409541466548, "grad_norm": 0.47516470449142667, "learning_rate": 1.9510678561773453e-05, "loss": 0.4394, "step": 66665 }, { "epoch": 1.946555524736865, "grad_norm": 0.47623211444783664, "learning_rate": 1.9507975128413087e-05, "loss": 0.4265, "step": 66670 }, { "epoch": 1.9467015080071823, "grad_norm": 0.5215512968237026, "learning_rate": 1.9505271695052717e-05, "loss": 0.4486, "step": 66675 }, { "epoch": 1.9468474912774996, "grad_norm": 0.487275048688075, "learning_rate": 1.950256826169235e-05, "loss": 0.4277, "step": 66680 }, { "epoch": 1.9469934745478168, "grad_norm": 0.4610399956493259, "learning_rate": 1.9499864828331982e-05, "loss": 0.4205, "step": 66685 }, { "epoch": 1.947139457818134, "grad_norm": 0.47206820323357, "learning_rate": 1.9497161394971613e-05, "loss": 0.4414, "step": 66690 }, { "epoch": 1.9472854410884513, "grad_norm": 0.43328981399979977, "learning_rate": 1.9494457961611247e-05, "loss": 0.4148, "step": 66695 }, { "epoch": 1.9474314243587685, "grad_norm": 0.4860603128206996, "learning_rate": 1.949175452825088e-05, "loss": 0.4286, "step": 66700 }, { "epoch": 1.9475774076290857, "grad_norm": 0.4512230348516876, "learning_rate": 1.948905109489051e-05, "loss": 0.4217, "step": 66705 }, { "epoch": 1.947723390899403, "grad_norm": 0.4780068187960924, "learning_rate": 1.9486347661530145e-05, "loss": 0.4424, "step": 66710 }, { "epoch": 1.9478693741697202, "grad_norm": 0.43645584049914615, "learning_rate": 1.9483644228169776e-05, "loss": 0.4191, "step": 66715 }, { "epoch": 1.9480153574400374, "grad_norm": 0.4747563573238879, "learning_rate": 1.948094079480941e-05, "loss": 0.4235, "step": 66720 }, { "epoch": 1.9481613407103546, "grad_norm": 0.5193864931552916, "learning_rate": 1.947823736144904e-05, "loss": 0.4234, "step": 66725 }, { "epoch": 1.9483073239806719, "grad_norm": 0.4235910569878267, "learning_rate": 1.9475533928088675e-05, "loss": 0.4192, "step": 66730 }, { "epoch": 1.948453307250989, "grad_norm": 0.4728112490000478, "learning_rate": 1.9472830494728305e-05, "loss": 0.4231, "step": 66735 }, { "epoch": 1.9485992905213063, "grad_norm": 0.44421072696883396, "learning_rate": 1.947012706136794e-05, "loss": 0.4377, "step": 66740 }, { "epoch": 1.9487452737916233, "grad_norm": 0.49439162109732704, "learning_rate": 1.946742362800757e-05, "loss": 0.4255, "step": 66745 }, { "epoch": 1.9488912570619408, "grad_norm": 0.4618736616814282, "learning_rate": 1.9464720194647204e-05, "loss": 0.4558, "step": 66750 }, { "epoch": 1.9490372403322578, "grad_norm": 0.45170141310628426, "learning_rate": 1.9462016761286834e-05, "loss": 0.4136, "step": 66755 }, { "epoch": 1.9491832236025752, "grad_norm": 0.45464815156554705, "learning_rate": 1.945931332792647e-05, "loss": 0.4357, "step": 66760 }, { "epoch": 1.9493292068728922, "grad_norm": 0.45948039045708666, "learning_rate": 1.94566098945661e-05, "loss": 0.4284, "step": 66765 }, { "epoch": 1.9494751901432097, "grad_norm": 0.44222308196352517, "learning_rate": 1.9453906461205733e-05, "loss": 0.4248, "step": 66770 }, { "epoch": 1.9496211734135267, "grad_norm": 0.49876762360426713, "learning_rate": 1.9451203027845364e-05, "loss": 0.4212, "step": 66775 }, { "epoch": 1.9497671566838441, "grad_norm": 0.46863450496488596, "learning_rate": 1.9448499594484998e-05, "loss": 0.4427, "step": 66780 }, { "epoch": 1.9499131399541612, "grad_norm": 0.48248909146229674, "learning_rate": 1.944579616112463e-05, "loss": 0.4234, "step": 66785 }, { "epoch": 1.9500591232244786, "grad_norm": 0.4745001615168795, "learning_rate": 1.9443092727764262e-05, "loss": 0.4372, "step": 66790 }, { "epoch": 1.9502051064947956, "grad_norm": 0.4523588891005042, "learning_rate": 1.9440389294403893e-05, "loss": 0.4475, "step": 66795 }, { "epoch": 1.950351089765113, "grad_norm": 0.5007186618826078, "learning_rate": 1.9437685861043524e-05, "loss": 0.4286, "step": 66800 }, { "epoch": 1.95049707303543, "grad_norm": 0.46298361301149155, "learning_rate": 1.943498242768316e-05, "loss": 0.4355, "step": 66805 }, { "epoch": 1.9506430563057475, "grad_norm": 0.433477791511256, "learning_rate": 1.943227899432279e-05, "loss": 0.4229, "step": 66810 }, { "epoch": 1.9507890395760645, "grad_norm": 0.44550553224995154, "learning_rate": 1.9429575560962422e-05, "loss": 0.4138, "step": 66815 }, { "epoch": 1.9509350228463818, "grad_norm": 0.4364289761143205, "learning_rate": 1.9426872127602056e-05, "loss": 0.398, "step": 66820 }, { "epoch": 1.951081006116699, "grad_norm": 0.46381501207659187, "learning_rate": 1.9424168694241687e-05, "loss": 0.4453, "step": 66825 }, { "epoch": 1.9512269893870162, "grad_norm": 0.4961285848958563, "learning_rate": 1.9421465260881318e-05, "loss": 0.4448, "step": 66830 }, { "epoch": 1.9513729726573334, "grad_norm": 0.47568128890134403, "learning_rate": 1.9418761827520955e-05, "loss": 0.4296, "step": 66835 }, { "epoch": 1.9515189559276507, "grad_norm": 0.43611353695048555, "learning_rate": 1.9416058394160586e-05, "loss": 0.4191, "step": 66840 }, { "epoch": 1.951664939197968, "grad_norm": 0.4623146750367495, "learning_rate": 1.9413354960800216e-05, "loss": 0.4288, "step": 66845 }, { "epoch": 1.9518109224682851, "grad_norm": 0.4678564448522301, "learning_rate": 1.941065152743985e-05, "loss": 0.4547, "step": 66850 }, { "epoch": 1.9519569057386024, "grad_norm": 0.49194022729331516, "learning_rate": 1.940794809407948e-05, "loss": 0.446, "step": 66855 }, { "epoch": 1.9521028890089196, "grad_norm": 0.4716097872255591, "learning_rate": 1.940524466071911e-05, "loss": 0.4605, "step": 66860 }, { "epoch": 1.9522488722792368, "grad_norm": 0.4388645100123368, "learning_rate": 1.940254122735875e-05, "loss": 0.4248, "step": 66865 }, { "epoch": 1.952394855549554, "grad_norm": 0.4791589303343053, "learning_rate": 1.939983779399838e-05, "loss": 0.4329, "step": 66870 }, { "epoch": 1.9525408388198713, "grad_norm": 0.4949507760164726, "learning_rate": 1.939713436063801e-05, "loss": 0.4051, "step": 66875 }, { "epoch": 1.9526868220901885, "grad_norm": 0.4741415148223383, "learning_rate": 1.9394430927277644e-05, "loss": 0.4286, "step": 66880 }, { "epoch": 1.9528328053605057, "grad_norm": 0.5007184222971136, "learning_rate": 1.9391727493917275e-05, "loss": 0.4243, "step": 66885 }, { "epoch": 1.9529787886308227, "grad_norm": 0.5215806815373079, "learning_rate": 1.938902406055691e-05, "loss": 0.4513, "step": 66890 }, { "epoch": 1.9531247719011402, "grad_norm": 0.48524549996980815, "learning_rate": 1.9386320627196543e-05, "loss": 0.4182, "step": 66895 }, { "epoch": 1.9532707551714572, "grad_norm": 0.45715985867700515, "learning_rate": 1.9383617193836173e-05, "loss": 0.4455, "step": 66900 }, { "epoch": 1.9534167384417747, "grad_norm": 0.4621722673619075, "learning_rate": 1.9380913760475804e-05, "loss": 0.4281, "step": 66905 }, { "epoch": 1.9535627217120917, "grad_norm": 0.4565422257052516, "learning_rate": 1.9378210327115438e-05, "loss": 0.4315, "step": 66910 }, { "epoch": 1.9537087049824091, "grad_norm": 0.4825205000939185, "learning_rate": 1.937550689375507e-05, "loss": 0.4555, "step": 66915 }, { "epoch": 1.9538546882527261, "grad_norm": 0.4784495007758648, "learning_rate": 1.9372803460394703e-05, "loss": 0.4108, "step": 66920 }, { "epoch": 1.9540006715230436, "grad_norm": 0.44957254421143994, "learning_rate": 1.9370100027034337e-05, "loss": 0.4273, "step": 66925 }, { "epoch": 1.9541466547933606, "grad_norm": 0.44809090195400797, "learning_rate": 1.9367396593673967e-05, "loss": 0.403, "step": 66930 }, { "epoch": 1.954292638063678, "grad_norm": 0.46263339246916113, "learning_rate": 1.9364693160313598e-05, "loss": 0.4114, "step": 66935 }, { "epoch": 1.954438621333995, "grad_norm": 0.44248756152643187, "learning_rate": 1.9361989726953232e-05, "loss": 0.4287, "step": 66940 }, { "epoch": 1.9545846046043125, "grad_norm": 0.4386027071262157, "learning_rate": 1.9359286293592863e-05, "loss": 0.4423, "step": 66945 }, { "epoch": 1.9547305878746295, "grad_norm": 0.4564631184737646, "learning_rate": 1.9356582860232497e-05, "loss": 0.4457, "step": 66950 }, { "epoch": 1.954876571144947, "grad_norm": 0.4640237446642203, "learning_rate": 1.935387942687213e-05, "loss": 0.4389, "step": 66955 }, { "epoch": 1.955022554415264, "grad_norm": 0.46733471189378056, "learning_rate": 1.935117599351176e-05, "loss": 0.454, "step": 66960 }, { "epoch": 1.9551685376855812, "grad_norm": 0.4657806880844029, "learning_rate": 1.9348472560151392e-05, "loss": 0.4201, "step": 66965 }, { "epoch": 1.9553145209558984, "grad_norm": 0.4559916365191144, "learning_rate": 1.9345769126791026e-05, "loss": 0.4239, "step": 66970 }, { "epoch": 1.9554605042262156, "grad_norm": 0.44473653817579073, "learning_rate": 1.934306569343066e-05, "loss": 0.4176, "step": 66975 }, { "epoch": 1.9556064874965329, "grad_norm": 0.48462334661055145, "learning_rate": 1.934036226007029e-05, "loss": 0.4307, "step": 66980 }, { "epoch": 1.95575247076685, "grad_norm": 0.4724737355004577, "learning_rate": 1.9337658826709924e-05, "loss": 0.4486, "step": 66985 }, { "epoch": 1.9558984540371673, "grad_norm": 0.506801479553056, "learning_rate": 1.9334955393349555e-05, "loss": 0.4385, "step": 66990 }, { "epoch": 1.9560444373074846, "grad_norm": 0.5199315325588263, "learning_rate": 1.9332251959989186e-05, "loss": 0.4411, "step": 66995 }, { "epoch": 1.9561904205778018, "grad_norm": 0.4971087990732308, "learning_rate": 1.932954852662882e-05, "loss": 0.4218, "step": 67000 }, { "epoch": 1.956336403848119, "grad_norm": 0.44633285625946606, "learning_rate": 1.9326845093268454e-05, "loss": 0.3996, "step": 67005 }, { "epoch": 1.9564823871184363, "grad_norm": 0.49330310828549684, "learning_rate": 1.9324141659908084e-05, "loss": 0.4359, "step": 67010 }, { "epoch": 1.9566283703887535, "grad_norm": 0.4435083815953087, "learning_rate": 1.932143822654772e-05, "loss": 0.4121, "step": 67015 }, { "epoch": 1.9567743536590707, "grad_norm": 0.47039800591424724, "learning_rate": 1.931873479318735e-05, "loss": 0.4327, "step": 67020 }, { "epoch": 1.956920336929388, "grad_norm": 0.497969578804799, "learning_rate": 1.931603135982698e-05, "loss": 0.4462, "step": 67025 }, { "epoch": 1.9570663201997052, "grad_norm": 0.4617779838855539, "learning_rate": 1.9313327926466614e-05, "loss": 0.4202, "step": 67030 }, { "epoch": 1.9572123034700222, "grad_norm": 0.46966844894672394, "learning_rate": 1.9310624493106248e-05, "loss": 0.4128, "step": 67035 }, { "epoch": 1.9573582867403396, "grad_norm": 0.4522063534107626, "learning_rate": 1.9307921059745878e-05, "loss": 0.4306, "step": 67040 }, { "epoch": 1.9575042700106566, "grad_norm": 0.4791213762143186, "learning_rate": 1.9305217626385512e-05, "loss": 0.4483, "step": 67045 }, { "epoch": 1.957650253280974, "grad_norm": 0.41302889956003863, "learning_rate": 1.9302514193025143e-05, "loss": 0.4304, "step": 67050 }, { "epoch": 1.957796236551291, "grad_norm": 0.48016463712341284, "learning_rate": 1.9299810759664774e-05, "loss": 0.4637, "step": 67055 }, { "epoch": 1.9579422198216085, "grad_norm": 0.4491053487357722, "learning_rate": 1.9297107326304408e-05, "loss": 0.4225, "step": 67060 }, { "epoch": 1.9580882030919255, "grad_norm": 0.41084120410846786, "learning_rate": 1.929440389294404e-05, "loss": 0.4516, "step": 67065 }, { "epoch": 1.958234186362243, "grad_norm": 0.4848632219352932, "learning_rate": 1.9291700459583672e-05, "loss": 0.427, "step": 67070 }, { "epoch": 1.95838016963256, "grad_norm": 0.48014750679176615, "learning_rate": 1.9288997026223303e-05, "loss": 0.4169, "step": 67075 }, { "epoch": 1.9585261529028775, "grad_norm": 0.5020987650321226, "learning_rate": 1.9286293592862937e-05, "loss": 0.4332, "step": 67080 }, { "epoch": 1.9586721361731945, "grad_norm": 0.47311299716182875, "learning_rate": 1.9283590159502567e-05, "loss": 0.451, "step": 67085 }, { "epoch": 1.958818119443512, "grad_norm": 0.4821928125220141, "learning_rate": 1.92808867261422e-05, "loss": 0.444, "step": 67090 }, { "epoch": 1.958964102713829, "grad_norm": 0.4662723737707363, "learning_rate": 1.9278183292781835e-05, "loss": 0.4473, "step": 67095 }, { "epoch": 1.9591100859841464, "grad_norm": 0.4691458930590292, "learning_rate": 1.9275479859421466e-05, "loss": 0.4025, "step": 67100 }, { "epoch": 1.9592560692544634, "grad_norm": 0.5593364183031018, "learning_rate": 1.9272776426061097e-05, "loss": 0.4492, "step": 67105 }, { "epoch": 1.9594020525247806, "grad_norm": 0.46043430397173357, "learning_rate": 1.927007299270073e-05, "loss": 0.4299, "step": 67110 }, { "epoch": 1.9595480357950978, "grad_norm": 0.4589783889058613, "learning_rate": 1.926736955934036e-05, "loss": 0.4493, "step": 67115 }, { "epoch": 1.959694019065415, "grad_norm": 0.49059667663483475, "learning_rate": 1.9264666125979995e-05, "loss": 0.4033, "step": 67120 }, { "epoch": 1.9598400023357323, "grad_norm": 0.4478079220341509, "learning_rate": 1.926196269261963e-05, "loss": 0.4323, "step": 67125 }, { "epoch": 1.9599859856060495, "grad_norm": 0.4750122893572119, "learning_rate": 1.925925925925926e-05, "loss": 0.4464, "step": 67130 }, { "epoch": 1.9601319688763668, "grad_norm": 0.45181384330599483, "learning_rate": 1.925655582589889e-05, "loss": 0.4352, "step": 67135 }, { "epoch": 1.960277952146684, "grad_norm": 0.4414114839714568, "learning_rate": 1.9253852392538525e-05, "loss": 0.4039, "step": 67140 }, { "epoch": 1.9604239354170012, "grad_norm": 0.49045335102919757, "learning_rate": 1.925114895917816e-05, "loss": 0.4258, "step": 67145 }, { "epoch": 1.9605699186873184, "grad_norm": 0.47521071404037446, "learning_rate": 1.924844552581779e-05, "loss": 0.4547, "step": 67150 }, { "epoch": 1.9607159019576357, "grad_norm": 0.4983964553971018, "learning_rate": 1.9245742092457423e-05, "loss": 0.4321, "step": 67155 }, { "epoch": 1.960861885227953, "grad_norm": 0.41854000138380887, "learning_rate": 1.9243038659097054e-05, "loss": 0.395, "step": 67160 }, { "epoch": 1.9610078684982701, "grad_norm": 0.45489984771503567, "learning_rate": 1.9240335225736684e-05, "loss": 0.4371, "step": 67165 }, { "epoch": 1.9611538517685874, "grad_norm": 0.48692271880297144, "learning_rate": 1.923763179237632e-05, "loss": 0.4156, "step": 67170 }, { "epoch": 1.9612998350389046, "grad_norm": 0.5253814595400825, "learning_rate": 1.9234928359015953e-05, "loss": 0.4403, "step": 67175 }, { "epoch": 1.9614458183092216, "grad_norm": 0.5169254639578449, "learning_rate": 1.9232224925655583e-05, "loss": 0.4674, "step": 67180 }, { "epoch": 1.961591801579539, "grad_norm": 0.4855914751770692, "learning_rate": 1.9229521492295217e-05, "loss": 0.4456, "step": 67185 }, { "epoch": 1.961737784849856, "grad_norm": 0.4876048332851905, "learning_rate": 1.9226818058934848e-05, "loss": 0.4361, "step": 67190 }, { "epoch": 1.9618837681201735, "grad_norm": 0.4353391889673495, "learning_rate": 1.922411462557448e-05, "loss": 0.4159, "step": 67195 }, { "epoch": 1.9620297513904905, "grad_norm": 0.4610533703055461, "learning_rate": 1.9221411192214112e-05, "loss": 0.4358, "step": 67200 }, { "epoch": 1.962175734660808, "grad_norm": 0.5111421713397851, "learning_rate": 1.9218707758853746e-05, "loss": 0.4303, "step": 67205 }, { "epoch": 1.962321717931125, "grad_norm": 0.4780912186457236, "learning_rate": 1.9216004325493377e-05, "loss": 0.4279, "step": 67210 }, { "epoch": 1.9624677012014424, "grad_norm": 0.49728530097656487, "learning_rate": 1.921330089213301e-05, "loss": 0.4265, "step": 67215 }, { "epoch": 1.9626136844717594, "grad_norm": 0.4740882940165465, "learning_rate": 1.921059745877264e-05, "loss": 0.434, "step": 67220 }, { "epoch": 1.9627596677420769, "grad_norm": 0.45918464498500167, "learning_rate": 1.9207894025412272e-05, "loss": 0.434, "step": 67225 }, { "epoch": 1.962905651012394, "grad_norm": 0.48179982595413845, "learning_rate": 1.920519059205191e-05, "loss": 0.4109, "step": 67230 }, { "epoch": 1.9630516342827113, "grad_norm": 0.49664957990117625, "learning_rate": 1.920248715869154e-05, "loss": 0.4392, "step": 67235 }, { "epoch": 1.9631976175530284, "grad_norm": 0.45119787843160947, "learning_rate": 1.919978372533117e-05, "loss": 0.4189, "step": 67240 }, { "epoch": 1.9633436008233458, "grad_norm": 0.46632058686721667, "learning_rate": 1.9197080291970805e-05, "loss": 0.4398, "step": 67245 }, { "epoch": 1.9634895840936628, "grad_norm": 0.43731148670347353, "learning_rate": 1.9194376858610436e-05, "loss": 0.4157, "step": 67250 }, { "epoch": 1.96363556736398, "grad_norm": 0.4721166679102722, "learning_rate": 1.9191673425250066e-05, "loss": 0.4488, "step": 67255 }, { "epoch": 1.9637815506342973, "grad_norm": 0.46011441894253413, "learning_rate": 1.9188969991889704e-05, "loss": 0.4263, "step": 67260 }, { "epoch": 1.9639275339046145, "grad_norm": 0.4375814365604172, "learning_rate": 1.9186266558529334e-05, "loss": 0.4271, "step": 67265 }, { "epoch": 1.9640735171749317, "grad_norm": 0.4774387594815021, "learning_rate": 1.9183563125168965e-05, "loss": 0.4388, "step": 67270 }, { "epoch": 1.964219500445249, "grad_norm": 0.5021163475752081, "learning_rate": 1.91808596918086e-05, "loss": 0.4488, "step": 67275 }, { "epoch": 1.9643654837155662, "grad_norm": 0.45495815942410234, "learning_rate": 1.917815625844823e-05, "loss": 0.4401, "step": 67280 }, { "epoch": 1.9645114669858834, "grad_norm": 0.47417132370058024, "learning_rate": 1.917545282508786e-05, "loss": 0.455, "step": 67285 }, { "epoch": 1.9646574502562006, "grad_norm": 0.46928587277273265, "learning_rate": 1.9172749391727497e-05, "loss": 0.4384, "step": 67290 }, { "epoch": 1.9648034335265179, "grad_norm": 0.48235308939614036, "learning_rate": 1.9170045958367128e-05, "loss": 0.4401, "step": 67295 }, { "epoch": 1.964949416796835, "grad_norm": 0.48870272892551275, "learning_rate": 1.916734252500676e-05, "loss": 0.428, "step": 67300 }, { "epoch": 1.9650954000671523, "grad_norm": 0.47029040150210727, "learning_rate": 1.9164639091646393e-05, "loss": 0.4362, "step": 67305 }, { "epoch": 1.9652413833374696, "grad_norm": 0.45677445922743776, "learning_rate": 1.9161935658286023e-05, "loss": 0.4097, "step": 67310 }, { "epoch": 1.9653873666077868, "grad_norm": 0.4807552076908751, "learning_rate": 1.9159232224925657e-05, "loss": 0.4372, "step": 67315 }, { "epoch": 1.965533349878104, "grad_norm": 0.505409626931124, "learning_rate": 1.9156528791565288e-05, "loss": 0.4462, "step": 67320 }, { "epoch": 1.965679333148421, "grad_norm": 0.4901986673977388, "learning_rate": 1.9153825358204922e-05, "loss": 0.4278, "step": 67325 }, { "epoch": 1.9658253164187385, "grad_norm": 0.4623538785921461, "learning_rate": 1.9151121924844553e-05, "loss": 0.4292, "step": 67330 }, { "epoch": 1.9659712996890555, "grad_norm": 0.4757410419917369, "learning_rate": 1.9148418491484187e-05, "loss": 0.4371, "step": 67335 }, { "epoch": 1.966117282959373, "grad_norm": 0.5089097520810217, "learning_rate": 1.9145715058123817e-05, "loss": 0.4586, "step": 67340 }, { "epoch": 1.96626326622969, "grad_norm": 0.4445928613109882, "learning_rate": 1.914301162476345e-05, "loss": 0.43, "step": 67345 }, { "epoch": 1.9664092495000074, "grad_norm": 0.49564908011003966, "learning_rate": 1.9140308191403082e-05, "loss": 0.4364, "step": 67350 }, { "epoch": 1.9665552327703244, "grad_norm": 0.43554855384490343, "learning_rate": 1.9137604758042716e-05, "loss": 0.4374, "step": 67355 }, { "epoch": 1.9667012160406419, "grad_norm": 0.47975192811722606, "learning_rate": 1.9134901324682347e-05, "loss": 0.4401, "step": 67360 }, { "epoch": 1.9668471993109589, "grad_norm": 0.4778530784998812, "learning_rate": 1.913219789132198e-05, "loss": 0.4045, "step": 67365 }, { "epoch": 1.9669931825812763, "grad_norm": 0.47825283209583624, "learning_rate": 1.912949445796161e-05, "loss": 0.4201, "step": 67370 }, { "epoch": 1.9671391658515933, "grad_norm": 0.47617391747919524, "learning_rate": 1.9126791024601245e-05, "loss": 0.4013, "step": 67375 }, { "epoch": 1.9672851491219108, "grad_norm": 0.4853997290966832, "learning_rate": 1.9124087591240876e-05, "loss": 0.455, "step": 67380 }, { "epoch": 1.9674311323922278, "grad_norm": 0.4785062148945646, "learning_rate": 1.912138415788051e-05, "loss": 0.4195, "step": 67385 }, { "epoch": 1.9675771156625452, "grad_norm": 0.467996254992297, "learning_rate": 1.911868072452014e-05, "loss": 0.4083, "step": 67390 }, { "epoch": 1.9677230989328622, "grad_norm": 0.4619535997501002, "learning_rate": 1.9115977291159774e-05, "loss": 0.4217, "step": 67395 }, { "epoch": 1.9678690822031795, "grad_norm": 0.5107396553867634, "learning_rate": 1.911327385779941e-05, "loss": 0.4426, "step": 67400 }, { "epoch": 1.9680150654734967, "grad_norm": 0.4718988600064016, "learning_rate": 1.911057042443904e-05, "loss": 0.4306, "step": 67405 }, { "epoch": 1.968161048743814, "grad_norm": 0.4741119419285584, "learning_rate": 1.910786699107867e-05, "loss": 0.4573, "step": 67410 }, { "epoch": 1.9683070320141312, "grad_norm": 0.47139074908680867, "learning_rate": 1.9105163557718304e-05, "loss": 0.4201, "step": 67415 }, { "epoch": 1.9684530152844484, "grad_norm": 0.4718582147276606, "learning_rate": 1.9102460124357934e-05, "loss": 0.4431, "step": 67420 }, { "epoch": 1.9685989985547656, "grad_norm": 0.4569761256674544, "learning_rate": 1.909975669099757e-05, "loss": 0.4151, "step": 67425 }, { "epoch": 1.9687449818250828, "grad_norm": 0.49154806814015545, "learning_rate": 1.9097053257637202e-05, "loss": 0.4469, "step": 67430 }, { "epoch": 1.9688909650954, "grad_norm": 0.47042455935532146, "learning_rate": 1.9094349824276833e-05, "loss": 0.4124, "step": 67435 }, { "epoch": 1.9690369483657173, "grad_norm": 0.48587035710332144, "learning_rate": 1.9091646390916464e-05, "loss": 0.4267, "step": 67440 }, { "epoch": 1.9691829316360345, "grad_norm": 0.46122324559410194, "learning_rate": 1.9088942957556098e-05, "loss": 0.4049, "step": 67445 }, { "epoch": 1.9693289149063518, "grad_norm": 0.4204360313519031, "learning_rate": 1.9086239524195728e-05, "loss": 0.432, "step": 67450 }, { "epoch": 1.969474898176669, "grad_norm": 0.5075998064710671, "learning_rate": 1.908353609083536e-05, "loss": 0.4323, "step": 67455 }, { "epoch": 1.9696208814469862, "grad_norm": 0.4615510003282979, "learning_rate": 1.9080832657474996e-05, "loss": 0.4233, "step": 67460 }, { "epoch": 1.9697668647173034, "grad_norm": 0.46198988911906125, "learning_rate": 1.9078129224114627e-05, "loss": 0.434, "step": 67465 }, { "epoch": 1.9699128479876205, "grad_norm": 0.49318638068714277, "learning_rate": 1.9075425790754258e-05, "loss": 0.4452, "step": 67470 }, { "epoch": 1.970058831257938, "grad_norm": 0.5152198014076655, "learning_rate": 1.907272235739389e-05, "loss": 0.4226, "step": 67475 }, { "epoch": 1.970204814528255, "grad_norm": 0.42779215818498956, "learning_rate": 1.9070018924033522e-05, "loss": 0.3756, "step": 67480 }, { "epoch": 1.9703507977985724, "grad_norm": 0.45791429456264576, "learning_rate": 1.9067315490673156e-05, "loss": 0.4527, "step": 67485 }, { "epoch": 1.9704967810688894, "grad_norm": 0.45912196715367726, "learning_rate": 1.906461205731279e-05, "loss": 0.421, "step": 67490 }, { "epoch": 1.9706427643392068, "grad_norm": 0.4826828411026722, "learning_rate": 1.906190862395242e-05, "loss": 0.4369, "step": 67495 }, { "epoch": 1.9707887476095238, "grad_norm": 0.4672272146585742, "learning_rate": 1.905920519059205e-05, "loss": 0.4403, "step": 67500 }, { "epoch": 1.9709347308798413, "grad_norm": 0.4149953438438813, "learning_rate": 1.9056501757231685e-05, "loss": 0.4114, "step": 67505 }, { "epoch": 1.9710807141501583, "grad_norm": 0.5155035961821762, "learning_rate": 1.9053798323871316e-05, "loss": 0.4543, "step": 67510 }, { "epoch": 1.9712266974204757, "grad_norm": 0.4623445095866058, "learning_rate": 1.905109489051095e-05, "loss": 0.4163, "step": 67515 }, { "epoch": 1.9713726806907927, "grad_norm": 0.5028323477784078, "learning_rate": 1.9048391457150584e-05, "loss": 0.4729, "step": 67520 }, { "epoch": 1.9715186639611102, "grad_norm": 0.49088064810255155, "learning_rate": 1.9045688023790215e-05, "loss": 0.4184, "step": 67525 }, { "epoch": 1.9716646472314272, "grad_norm": 0.47647261126009555, "learning_rate": 1.9042984590429845e-05, "loss": 0.4256, "step": 67530 }, { "epoch": 1.9718106305017447, "grad_norm": 0.45146046182623417, "learning_rate": 1.904028115706948e-05, "loss": 0.4033, "step": 67535 }, { "epoch": 1.9719566137720617, "grad_norm": 0.4525687937151315, "learning_rate": 1.903757772370911e-05, "loss": 0.4462, "step": 67540 }, { "epoch": 1.972102597042379, "grad_norm": 0.44134257296379426, "learning_rate": 1.9034874290348744e-05, "loss": 0.4042, "step": 67545 }, { "epoch": 1.9722485803126961, "grad_norm": 0.43544963251528535, "learning_rate": 1.9032170856988378e-05, "loss": 0.414, "step": 67550 }, { "epoch": 1.9723945635830133, "grad_norm": 0.480051017626034, "learning_rate": 1.902946742362801e-05, "loss": 0.4222, "step": 67555 }, { "epoch": 1.9725405468533306, "grad_norm": 0.48864921727393207, "learning_rate": 1.902676399026764e-05, "loss": 0.4266, "step": 67560 }, { "epoch": 1.9726865301236478, "grad_norm": 0.46760491379176927, "learning_rate": 1.9024060556907273e-05, "loss": 0.4376, "step": 67565 }, { "epoch": 1.972832513393965, "grad_norm": 0.5062580918613504, "learning_rate": 1.9021357123546907e-05, "loss": 0.446, "step": 67570 }, { "epoch": 1.9729784966642823, "grad_norm": 0.4975250017151806, "learning_rate": 1.9018653690186538e-05, "loss": 0.4494, "step": 67575 }, { "epoch": 1.9731244799345995, "grad_norm": 0.46280310509474337, "learning_rate": 1.9015950256826172e-05, "loss": 0.4126, "step": 67580 }, { "epoch": 1.9732704632049167, "grad_norm": 0.43476708356050203, "learning_rate": 1.9013246823465803e-05, "loss": 0.449, "step": 67585 }, { "epoch": 1.973416446475234, "grad_norm": 0.4705024160741304, "learning_rate": 1.9010543390105433e-05, "loss": 0.4301, "step": 67590 }, { "epoch": 1.9735624297455512, "grad_norm": 0.460381899874644, "learning_rate": 1.9007839956745067e-05, "loss": 0.443, "step": 67595 }, { "epoch": 1.9737084130158684, "grad_norm": 0.45709674373447895, "learning_rate": 1.90051365233847e-05, "loss": 0.414, "step": 67600 }, { "epoch": 1.9738543962861856, "grad_norm": 0.4320600024623145, "learning_rate": 1.9002433090024332e-05, "loss": 0.4235, "step": 67605 }, { "epoch": 1.9740003795565029, "grad_norm": 0.45249510154041767, "learning_rate": 1.8999729656663966e-05, "loss": 0.4234, "step": 67610 }, { "epoch": 1.97414636282682, "grad_norm": 0.47643218286755257, "learning_rate": 1.8997026223303596e-05, "loss": 0.4007, "step": 67615 }, { "epoch": 1.9742923460971373, "grad_norm": 0.4713511663635621, "learning_rate": 1.8994322789943227e-05, "loss": 0.4429, "step": 67620 }, { "epoch": 1.9744383293674543, "grad_norm": 0.4774665537847088, "learning_rate": 1.899161935658286e-05, "loss": 0.4435, "step": 67625 }, { "epoch": 1.9745843126377718, "grad_norm": 0.44353921135513213, "learning_rate": 1.8988915923222495e-05, "loss": 0.4448, "step": 67630 }, { "epoch": 1.9747302959080888, "grad_norm": 0.5097393746630401, "learning_rate": 1.8986212489862126e-05, "loss": 0.4373, "step": 67635 }, { "epoch": 1.9748762791784062, "grad_norm": 0.4980841370076106, "learning_rate": 1.898350905650176e-05, "loss": 0.4123, "step": 67640 }, { "epoch": 1.9750222624487233, "grad_norm": 0.4786218959835488, "learning_rate": 1.898080562314139e-05, "loss": 0.4477, "step": 67645 }, { "epoch": 1.9751682457190407, "grad_norm": 0.4753511532057125, "learning_rate": 1.897810218978102e-05, "loss": 0.4387, "step": 67650 }, { "epoch": 1.9753142289893577, "grad_norm": 0.49048339033324495, "learning_rate": 1.8975398756420655e-05, "loss": 0.4672, "step": 67655 }, { "epoch": 1.9754602122596752, "grad_norm": 0.4610747893638659, "learning_rate": 1.897269532306029e-05, "loss": 0.4344, "step": 67660 }, { "epoch": 1.9756061955299922, "grad_norm": 0.45341175234798153, "learning_rate": 1.896999188969992e-05, "loss": 0.4093, "step": 67665 }, { "epoch": 1.9757521788003096, "grad_norm": 0.49902114172194445, "learning_rate": 1.8967288456339554e-05, "loss": 0.4278, "step": 67670 }, { "epoch": 1.9758981620706266, "grad_norm": 0.48708097158279845, "learning_rate": 1.8964585022979184e-05, "loss": 0.4421, "step": 67675 }, { "epoch": 1.976044145340944, "grad_norm": 0.47367130722548, "learning_rate": 1.8961881589618815e-05, "loss": 0.4459, "step": 67680 }, { "epoch": 1.976190128611261, "grad_norm": 0.45354734890930387, "learning_rate": 1.895917815625845e-05, "loss": 0.4247, "step": 67685 }, { "epoch": 1.9763361118815783, "grad_norm": 0.4060596004429562, "learning_rate": 1.8956474722898083e-05, "loss": 0.4132, "step": 67690 }, { "epoch": 1.9764820951518955, "grad_norm": 0.4760261839364815, "learning_rate": 1.8953771289537714e-05, "loss": 0.4375, "step": 67695 }, { "epoch": 1.9766280784222128, "grad_norm": 0.44399267357032285, "learning_rate": 1.8951067856177344e-05, "loss": 0.4157, "step": 67700 }, { "epoch": 1.97677406169253, "grad_norm": 0.47762104251154897, "learning_rate": 1.8948364422816978e-05, "loss": 0.4284, "step": 67705 }, { "epoch": 1.9769200449628472, "grad_norm": 0.4498993826835442, "learning_rate": 1.894566098945661e-05, "loss": 0.4277, "step": 67710 }, { "epoch": 1.9770660282331645, "grad_norm": 0.4626064543558897, "learning_rate": 1.8942957556096243e-05, "loss": 0.4454, "step": 67715 }, { "epoch": 1.9772120115034817, "grad_norm": 0.477103757012497, "learning_rate": 1.8940254122735877e-05, "loss": 0.443, "step": 67720 }, { "epoch": 1.977357994773799, "grad_norm": 0.4986148175832113, "learning_rate": 1.8937550689375507e-05, "loss": 0.4507, "step": 67725 }, { "epoch": 1.9775039780441162, "grad_norm": 0.4937921642269721, "learning_rate": 1.8934847256015138e-05, "loss": 0.4507, "step": 67730 }, { "epoch": 1.9776499613144334, "grad_norm": 0.4571798217203037, "learning_rate": 1.8932143822654772e-05, "loss": 0.4384, "step": 67735 }, { "epoch": 1.9777959445847506, "grad_norm": 0.49662779585932343, "learning_rate": 1.8929440389294406e-05, "loss": 0.4188, "step": 67740 }, { "epoch": 1.9779419278550678, "grad_norm": 0.4552589324943479, "learning_rate": 1.8926736955934037e-05, "loss": 0.4181, "step": 67745 }, { "epoch": 1.978087911125385, "grad_norm": 0.45221398072114993, "learning_rate": 1.892403352257367e-05, "loss": 0.3975, "step": 67750 }, { "epoch": 1.9782338943957023, "grad_norm": 0.4915091961598702, "learning_rate": 1.89213300892133e-05, "loss": 0.4637, "step": 67755 }, { "epoch": 1.9783798776660195, "grad_norm": 0.4699644740307115, "learning_rate": 1.8918626655852932e-05, "loss": 0.4346, "step": 67760 }, { "epoch": 1.9785258609363368, "grad_norm": 0.43538841860361116, "learning_rate": 1.8915923222492566e-05, "loss": 0.4241, "step": 67765 }, { "epoch": 1.9786718442066538, "grad_norm": 0.46235362029087285, "learning_rate": 1.89132197891322e-05, "loss": 0.4324, "step": 67770 }, { "epoch": 1.9788178274769712, "grad_norm": 0.48751166769244425, "learning_rate": 1.891051635577183e-05, "loss": 0.4536, "step": 67775 }, { "epoch": 1.9789638107472882, "grad_norm": 0.5160352990706543, "learning_rate": 1.8907812922411465e-05, "loss": 0.4248, "step": 67780 }, { "epoch": 1.9791097940176057, "grad_norm": 0.45994051000113473, "learning_rate": 1.8905109489051095e-05, "loss": 0.4394, "step": 67785 }, { "epoch": 1.9792557772879227, "grad_norm": 0.4770721620602089, "learning_rate": 1.8902406055690726e-05, "loss": 0.4243, "step": 67790 }, { "epoch": 1.9794017605582401, "grad_norm": 0.4756771127514102, "learning_rate": 1.889970262233036e-05, "loss": 0.4477, "step": 67795 }, { "epoch": 1.9795477438285571, "grad_norm": 0.4289158064868029, "learning_rate": 1.8896999188969994e-05, "loss": 0.4216, "step": 67800 }, { "epoch": 1.9796937270988746, "grad_norm": 0.49686876732884544, "learning_rate": 1.8894295755609624e-05, "loss": 0.4561, "step": 67805 }, { "epoch": 1.9798397103691916, "grad_norm": 0.4845795854186067, "learning_rate": 1.889159232224926e-05, "loss": 0.46, "step": 67810 }, { "epoch": 1.979985693639509, "grad_norm": 0.46341416103717786, "learning_rate": 1.888888888888889e-05, "loss": 0.4291, "step": 67815 }, { "epoch": 1.980131676909826, "grad_norm": 0.4931349567901228, "learning_rate": 1.888618545552852e-05, "loss": 0.4285, "step": 67820 }, { "epoch": 1.9802776601801435, "grad_norm": 0.4440092988571265, "learning_rate": 1.8883482022168157e-05, "loss": 0.4026, "step": 67825 }, { "epoch": 1.9804236434504605, "grad_norm": 0.5203120684767681, "learning_rate": 1.8880778588807788e-05, "loss": 0.4725, "step": 67830 }, { "epoch": 1.980569626720778, "grad_norm": 0.46995282396284177, "learning_rate": 1.887807515544742e-05, "loss": 0.4464, "step": 67835 }, { "epoch": 1.980715609991095, "grad_norm": 0.4943649364194581, "learning_rate": 1.8875371722087052e-05, "loss": 0.4449, "step": 67840 }, { "epoch": 1.9808615932614122, "grad_norm": 0.4445169562897856, "learning_rate": 1.8872668288726683e-05, "loss": 0.4245, "step": 67845 }, { "epoch": 1.9810075765317294, "grad_norm": 0.5035720321787457, "learning_rate": 1.8869964855366314e-05, "loss": 0.4462, "step": 67850 }, { "epoch": 1.9811535598020467, "grad_norm": 0.4923308401239882, "learning_rate": 1.886726142200595e-05, "loss": 0.4224, "step": 67855 }, { "epoch": 1.981299543072364, "grad_norm": 0.45476595218598687, "learning_rate": 1.886455798864558e-05, "loss": 0.4085, "step": 67860 }, { "epoch": 1.9814455263426811, "grad_norm": 0.5041638742975371, "learning_rate": 1.8861854555285212e-05, "loss": 0.4309, "step": 67865 }, { "epoch": 1.9815915096129983, "grad_norm": 0.4334624217409952, "learning_rate": 1.8859151121924846e-05, "loss": 0.433, "step": 67870 }, { "epoch": 1.9817374928833156, "grad_norm": 0.47409252062167534, "learning_rate": 1.8856447688564477e-05, "loss": 0.4473, "step": 67875 }, { "epoch": 1.9818834761536328, "grad_norm": 0.49570523707655617, "learning_rate": 1.8853744255204108e-05, "loss": 0.4236, "step": 67880 }, { "epoch": 1.98202945942395, "grad_norm": 0.45590463720416946, "learning_rate": 1.8851040821843745e-05, "loss": 0.4366, "step": 67885 }, { "epoch": 1.9821754426942673, "grad_norm": 0.5137907704247299, "learning_rate": 1.8848337388483376e-05, "loss": 0.4563, "step": 67890 }, { "epoch": 1.9823214259645845, "grad_norm": 0.4756144496895895, "learning_rate": 1.8845633955123006e-05, "loss": 0.4155, "step": 67895 }, { "epoch": 1.9824674092349017, "grad_norm": 0.5393373201889908, "learning_rate": 1.884293052176264e-05, "loss": 0.4521, "step": 67900 }, { "epoch": 1.982613392505219, "grad_norm": 0.5066959377631775, "learning_rate": 1.884022708840227e-05, "loss": 0.4734, "step": 67905 }, { "epoch": 1.9827593757755362, "grad_norm": 0.4939295132013026, "learning_rate": 1.8837523655041905e-05, "loss": 0.4232, "step": 67910 }, { "epoch": 1.9829053590458532, "grad_norm": 0.5162885699992161, "learning_rate": 1.883482022168154e-05, "loss": 0.443, "step": 67915 }, { "epoch": 1.9830513423161706, "grad_norm": 0.48054891757952434, "learning_rate": 1.883211678832117e-05, "loss": 0.4113, "step": 67920 }, { "epoch": 1.9831973255864876, "grad_norm": 0.4595479986640496, "learning_rate": 1.88294133549608e-05, "loss": 0.4443, "step": 67925 }, { "epoch": 1.983343308856805, "grad_norm": 0.46182549609929646, "learning_rate": 1.8826709921600434e-05, "loss": 0.4375, "step": 67930 }, { "epoch": 1.983489292127122, "grad_norm": 0.4735729977722949, "learning_rate": 1.8824006488240065e-05, "loss": 0.433, "step": 67935 }, { "epoch": 1.9836352753974396, "grad_norm": 0.47453255163386965, "learning_rate": 1.88213030548797e-05, "loss": 0.4349, "step": 67940 }, { "epoch": 1.9837812586677566, "grad_norm": 0.4789491807920786, "learning_rate": 1.8818599621519333e-05, "loss": 0.4195, "step": 67945 }, { "epoch": 1.983927241938074, "grad_norm": 0.49120172964573566, "learning_rate": 1.8815896188158963e-05, "loss": 0.4465, "step": 67950 }, { "epoch": 1.984073225208391, "grad_norm": 0.49752453068979086, "learning_rate": 1.8813192754798594e-05, "loss": 0.457, "step": 67955 }, { "epoch": 1.9842192084787085, "grad_norm": 0.47419098926281766, "learning_rate": 1.8810489321438228e-05, "loss": 0.4091, "step": 67960 }, { "epoch": 1.9843651917490255, "grad_norm": 0.5137530140154073, "learning_rate": 1.880778588807786e-05, "loss": 0.437, "step": 67965 }, { "epoch": 1.984511175019343, "grad_norm": 0.45533112688584265, "learning_rate": 1.8805082454717493e-05, "loss": 0.4419, "step": 67970 }, { "epoch": 1.98465715828966, "grad_norm": 0.47986906526368556, "learning_rate": 1.8802379021357123e-05, "loss": 0.4314, "step": 67975 }, { "epoch": 1.9848031415599774, "grad_norm": 0.5196303943422933, "learning_rate": 1.8799675587996757e-05, "loss": 0.4276, "step": 67980 }, { "epoch": 1.9849491248302944, "grad_norm": 0.473281290930801, "learning_rate": 1.8796972154636388e-05, "loss": 0.4135, "step": 67985 }, { "epoch": 1.9850951081006116, "grad_norm": 0.5228242102556345, "learning_rate": 1.8794268721276022e-05, "loss": 0.4432, "step": 67990 }, { "epoch": 1.9852410913709289, "grad_norm": 0.4581471794675477, "learning_rate": 1.8791565287915656e-05, "loss": 0.4302, "step": 67995 }, { "epoch": 1.985387074641246, "grad_norm": 0.47046951191135994, "learning_rate": 1.8788861854555287e-05, "loss": 0.4572, "step": 68000 }, { "epoch": 1.9855330579115633, "grad_norm": 0.4583056182157188, "learning_rate": 1.8786158421194917e-05, "loss": 0.4437, "step": 68005 }, { "epoch": 1.9856790411818805, "grad_norm": 0.4731029719716598, "learning_rate": 1.878345498783455e-05, "loss": 0.4674, "step": 68010 }, { "epoch": 1.9858250244521978, "grad_norm": 0.49385249666959846, "learning_rate": 1.8780751554474182e-05, "loss": 0.4551, "step": 68015 }, { "epoch": 1.985971007722515, "grad_norm": 0.48754673258458675, "learning_rate": 1.8778048121113816e-05, "loss": 0.4481, "step": 68020 }, { "epoch": 1.9861169909928322, "grad_norm": 0.46947932189174113, "learning_rate": 1.877534468775345e-05, "loss": 0.4306, "step": 68025 }, { "epoch": 1.9862629742631495, "grad_norm": 0.4830690410200502, "learning_rate": 1.877264125439308e-05, "loss": 0.4152, "step": 68030 }, { "epoch": 1.9864089575334667, "grad_norm": 0.4568243376400924, "learning_rate": 1.876993782103271e-05, "loss": 0.4362, "step": 68035 }, { "epoch": 1.986554940803784, "grad_norm": 0.4591719471026851, "learning_rate": 1.8767234387672345e-05, "loss": 0.453, "step": 68040 }, { "epoch": 1.9867009240741011, "grad_norm": 0.464568805823233, "learning_rate": 1.8764530954311976e-05, "loss": 0.4259, "step": 68045 }, { "epoch": 1.9868469073444184, "grad_norm": 0.45865175810095815, "learning_rate": 1.876182752095161e-05, "loss": 0.4402, "step": 68050 }, { "epoch": 1.9869928906147356, "grad_norm": 0.4983031613418212, "learning_rate": 1.8759124087591244e-05, "loss": 0.4313, "step": 68055 }, { "epoch": 1.9871388738850526, "grad_norm": 0.5000026326709218, "learning_rate": 1.8756420654230874e-05, "loss": 0.4504, "step": 68060 }, { "epoch": 1.98728485715537, "grad_norm": 0.4544558519881497, "learning_rate": 1.8753717220870505e-05, "loss": 0.422, "step": 68065 }, { "epoch": 1.987430840425687, "grad_norm": 0.49395533222886356, "learning_rate": 1.875101378751014e-05, "loss": 0.4404, "step": 68070 }, { "epoch": 1.9875768236960045, "grad_norm": 0.4779721448628186, "learning_rate": 1.874831035414977e-05, "loss": 0.4285, "step": 68075 }, { "epoch": 1.9877228069663215, "grad_norm": 0.46586545627215775, "learning_rate": 1.8745606920789404e-05, "loss": 0.4399, "step": 68080 }, { "epoch": 1.987868790236639, "grad_norm": 0.4352384087258315, "learning_rate": 1.8742903487429038e-05, "loss": 0.4288, "step": 68085 }, { "epoch": 1.988014773506956, "grad_norm": 0.470254983083869, "learning_rate": 1.8740200054068668e-05, "loss": 0.4414, "step": 68090 }, { "epoch": 1.9881607567772734, "grad_norm": 0.43229841586741113, "learning_rate": 1.87374966207083e-05, "loss": 0.4207, "step": 68095 }, { "epoch": 1.9883067400475904, "grad_norm": 0.48348967799478443, "learning_rate": 1.8734793187347933e-05, "loss": 0.437, "step": 68100 }, { "epoch": 1.988452723317908, "grad_norm": 0.4497402142432747, "learning_rate": 1.8732089753987564e-05, "loss": 0.4407, "step": 68105 }, { "epoch": 1.988598706588225, "grad_norm": 0.4423685937036642, "learning_rate": 1.8729386320627198e-05, "loss": 0.4111, "step": 68110 }, { "epoch": 1.9887446898585424, "grad_norm": 0.45909729380077435, "learning_rate": 1.872668288726683e-05, "loss": 0.4363, "step": 68115 }, { "epoch": 1.9888906731288594, "grad_norm": 0.47426088096830693, "learning_rate": 1.8723979453906462e-05, "loss": 0.4163, "step": 68120 }, { "epoch": 1.9890366563991768, "grad_norm": 0.46718394034947813, "learning_rate": 1.8721276020546093e-05, "loss": 0.4124, "step": 68125 }, { "epoch": 1.9891826396694938, "grad_norm": 0.5794544777542956, "learning_rate": 1.8718572587185727e-05, "loss": 0.46, "step": 68130 }, { "epoch": 1.989328622939811, "grad_norm": 0.5020566995576036, "learning_rate": 1.8715869153825357e-05, "loss": 0.4472, "step": 68135 }, { "epoch": 1.9894746062101283, "grad_norm": 0.48916902404719914, "learning_rate": 1.871316572046499e-05, "loss": 0.441, "step": 68140 }, { "epoch": 1.9896205894804455, "grad_norm": 0.5200014586865397, "learning_rate": 1.8710462287104625e-05, "loss": 0.4553, "step": 68145 }, { "epoch": 1.9897665727507627, "grad_norm": 0.43474079890558875, "learning_rate": 1.8707758853744256e-05, "loss": 0.4285, "step": 68150 }, { "epoch": 1.98991255602108, "grad_norm": 0.4930281541960497, "learning_rate": 1.8705055420383887e-05, "loss": 0.4341, "step": 68155 }, { "epoch": 1.9900585392913972, "grad_norm": 0.45679578944168286, "learning_rate": 1.870235198702352e-05, "loss": 0.4258, "step": 68160 }, { "epoch": 1.9902045225617144, "grad_norm": 0.4638893232182964, "learning_rate": 1.8699648553663155e-05, "loss": 0.4305, "step": 68165 }, { "epoch": 1.9903505058320317, "grad_norm": 0.5027509250639755, "learning_rate": 1.8696945120302785e-05, "loss": 0.4456, "step": 68170 }, { "epoch": 1.9904964891023489, "grad_norm": 0.47753220567719995, "learning_rate": 1.869424168694242e-05, "loss": 0.434, "step": 68175 }, { "epoch": 1.9906424723726661, "grad_norm": 0.47522707065056835, "learning_rate": 1.869153825358205e-05, "loss": 0.4502, "step": 68180 }, { "epoch": 1.9907884556429833, "grad_norm": 0.4358912360282838, "learning_rate": 1.868883482022168e-05, "loss": 0.4066, "step": 68185 }, { "epoch": 1.9909344389133006, "grad_norm": 0.4405058164070727, "learning_rate": 1.8686131386861315e-05, "loss": 0.4203, "step": 68190 }, { "epoch": 1.9910804221836178, "grad_norm": 0.5011990736991863, "learning_rate": 1.868342795350095e-05, "loss": 0.4368, "step": 68195 }, { "epoch": 1.991226405453935, "grad_norm": 0.4827954543404659, "learning_rate": 1.868072452014058e-05, "loss": 0.4293, "step": 68200 }, { "epoch": 1.991372388724252, "grad_norm": 0.45442559842996855, "learning_rate": 1.8678021086780213e-05, "loss": 0.3956, "step": 68205 }, { "epoch": 1.9915183719945695, "grad_norm": 0.4548833530769022, "learning_rate": 1.8675317653419844e-05, "loss": 0.4197, "step": 68210 }, { "epoch": 1.9916643552648865, "grad_norm": 0.41977260279478007, "learning_rate": 1.8672614220059474e-05, "loss": 0.4329, "step": 68215 }, { "epoch": 1.991810338535204, "grad_norm": 0.4552337929215215, "learning_rate": 1.866991078669911e-05, "loss": 0.4211, "step": 68220 }, { "epoch": 1.991956321805521, "grad_norm": 0.4437230895441444, "learning_rate": 1.8667207353338743e-05, "loss": 0.4277, "step": 68225 }, { "epoch": 1.9921023050758384, "grad_norm": 0.4625062108758829, "learning_rate": 1.8664503919978373e-05, "loss": 0.4374, "step": 68230 }, { "epoch": 1.9922482883461554, "grad_norm": 0.4760984606983902, "learning_rate": 1.8661800486618007e-05, "loss": 0.444, "step": 68235 }, { "epoch": 1.9923942716164729, "grad_norm": 0.46014050767128306, "learning_rate": 1.8659097053257638e-05, "loss": 0.4226, "step": 68240 }, { "epoch": 1.9925402548867899, "grad_norm": 0.4387799418504589, "learning_rate": 1.865639361989727e-05, "loss": 0.4238, "step": 68245 }, { "epoch": 1.9926862381571073, "grad_norm": 0.49909084639244167, "learning_rate": 1.8653690186536902e-05, "loss": 0.4455, "step": 68250 }, { "epoch": 1.9928322214274243, "grad_norm": 0.4872142317578402, "learning_rate": 1.8650986753176536e-05, "loss": 0.4225, "step": 68255 }, { "epoch": 1.9929782046977418, "grad_norm": 0.475981076543006, "learning_rate": 1.8648283319816167e-05, "loss": 0.4566, "step": 68260 }, { "epoch": 1.9931241879680588, "grad_norm": 0.48392340249956384, "learning_rate": 1.86455798864558e-05, "loss": 0.4268, "step": 68265 }, { "epoch": 1.9932701712383762, "grad_norm": 0.508478284435826, "learning_rate": 1.864287645309543e-05, "loss": 0.4438, "step": 68270 }, { "epoch": 1.9934161545086932, "grad_norm": 0.5351478831943837, "learning_rate": 1.8640173019735062e-05, "loss": 0.4526, "step": 68275 }, { "epoch": 1.9935621377790105, "grad_norm": 0.46099628902344714, "learning_rate": 1.8637469586374696e-05, "loss": 0.4595, "step": 68280 }, { "epoch": 1.9937081210493277, "grad_norm": 0.4463903338778986, "learning_rate": 1.863476615301433e-05, "loss": 0.4491, "step": 68285 }, { "epoch": 1.993854104319645, "grad_norm": 0.46530613755133676, "learning_rate": 1.863206271965396e-05, "loss": 0.438, "step": 68290 }, { "epoch": 1.9940000875899622, "grad_norm": 0.47356364174521814, "learning_rate": 1.8629359286293595e-05, "loss": 0.4319, "step": 68295 }, { "epoch": 1.9941460708602794, "grad_norm": 0.4669272012521625, "learning_rate": 1.8626655852933226e-05, "loss": 0.4358, "step": 68300 }, { "epoch": 1.9942920541305966, "grad_norm": 0.48444598651641835, "learning_rate": 1.8623952419572856e-05, "loss": 0.4022, "step": 68305 }, { "epoch": 1.9944380374009139, "grad_norm": 0.46284331950180857, "learning_rate": 1.862124898621249e-05, "loss": 0.4293, "step": 68310 }, { "epoch": 1.994584020671231, "grad_norm": 0.48929326509313065, "learning_rate": 1.8618545552852124e-05, "loss": 0.4351, "step": 68315 }, { "epoch": 1.9947300039415483, "grad_norm": 0.4691417140325544, "learning_rate": 1.8615842119491755e-05, "loss": 0.4186, "step": 68320 }, { "epoch": 1.9948759872118655, "grad_norm": 0.4938800626322855, "learning_rate": 1.861313868613139e-05, "loss": 0.4474, "step": 68325 }, { "epoch": 1.9950219704821828, "grad_norm": 0.46423272522731973, "learning_rate": 1.861043525277102e-05, "loss": 0.4408, "step": 68330 }, { "epoch": 1.9951679537525, "grad_norm": 0.4682801639509049, "learning_rate": 1.8607731819410654e-05, "loss": 0.4026, "step": 68335 }, { "epoch": 1.9953139370228172, "grad_norm": 0.4657350814355299, "learning_rate": 1.8605028386050284e-05, "loss": 0.4335, "step": 68340 }, { "epoch": 1.9954599202931345, "grad_norm": 0.48214571939075695, "learning_rate": 1.8602324952689918e-05, "loss": 0.4597, "step": 68345 }, { "epoch": 1.9956059035634515, "grad_norm": 0.45611032215018976, "learning_rate": 1.859962151932955e-05, "loss": 0.4162, "step": 68350 }, { "epoch": 1.995751886833769, "grad_norm": 0.46168338167451, "learning_rate": 1.859691808596918e-05, "loss": 0.4118, "step": 68355 }, { "epoch": 1.995897870104086, "grad_norm": 0.4948556443077006, "learning_rate": 1.8594214652608813e-05, "loss": 0.4194, "step": 68360 }, { "epoch": 1.9960438533744034, "grad_norm": 0.41178936233427194, "learning_rate": 1.8591511219248447e-05, "loss": 0.3938, "step": 68365 }, { "epoch": 1.9961898366447204, "grad_norm": 0.46272619381579333, "learning_rate": 1.8588807785888078e-05, "loss": 0.4316, "step": 68370 }, { "epoch": 1.9963358199150378, "grad_norm": 0.469958552886636, "learning_rate": 1.8586104352527712e-05, "loss": 0.4656, "step": 68375 }, { "epoch": 1.9964818031853548, "grad_norm": 0.4827167531121255, "learning_rate": 1.8583400919167343e-05, "loss": 0.4416, "step": 68380 }, { "epoch": 1.9966277864556723, "grad_norm": 0.47872245734646435, "learning_rate": 1.8580697485806973e-05, "loss": 0.4684, "step": 68385 }, { "epoch": 1.9967737697259893, "grad_norm": 0.515308385371639, "learning_rate": 1.8577994052446607e-05, "loss": 0.4436, "step": 68390 }, { "epoch": 1.9969197529963068, "grad_norm": 0.4623180638631848, "learning_rate": 1.857529061908624e-05, "loss": 0.4435, "step": 68395 }, { "epoch": 1.9970657362666238, "grad_norm": 0.4580109992652903, "learning_rate": 1.8572587185725872e-05, "loss": 0.4216, "step": 68400 }, { "epoch": 1.9972117195369412, "grad_norm": 0.480487148501135, "learning_rate": 1.8569883752365506e-05, "loss": 0.4376, "step": 68405 }, { "epoch": 1.9973577028072582, "grad_norm": 0.456014734719796, "learning_rate": 1.8567180319005137e-05, "loss": 0.4289, "step": 68410 }, { "epoch": 1.9975036860775757, "grad_norm": 0.5094511850319844, "learning_rate": 1.8564476885644767e-05, "loss": 0.4412, "step": 68415 }, { "epoch": 1.9976496693478927, "grad_norm": 0.4706236044375761, "learning_rate": 1.8561773452284405e-05, "loss": 0.4214, "step": 68420 }, { "epoch": 1.99779565261821, "grad_norm": 0.44837721329239516, "learning_rate": 1.8559070018924035e-05, "loss": 0.4259, "step": 68425 }, { "epoch": 1.9979416358885271, "grad_norm": 0.47151855170555224, "learning_rate": 1.8556366585563666e-05, "loss": 0.4034, "step": 68430 }, { "epoch": 1.9980876191588444, "grad_norm": 0.4546003768007102, "learning_rate": 1.85536631522033e-05, "loss": 0.4156, "step": 68435 }, { "epoch": 1.9982336024291616, "grad_norm": 0.4623077259369671, "learning_rate": 1.855095971884293e-05, "loss": 0.4089, "step": 68440 }, { "epoch": 1.9983795856994788, "grad_norm": 0.4830061744510788, "learning_rate": 1.854825628548256e-05, "loss": 0.4516, "step": 68445 }, { "epoch": 1.998525568969796, "grad_norm": 0.5072429186149813, "learning_rate": 1.85455528521222e-05, "loss": 0.4253, "step": 68450 }, { "epoch": 1.9986715522401133, "grad_norm": 0.4641710181534908, "learning_rate": 1.854284941876183e-05, "loss": 0.4414, "step": 68455 }, { "epoch": 1.9988175355104305, "grad_norm": 0.4554287467338292, "learning_rate": 1.854014598540146e-05, "loss": 0.3977, "step": 68460 }, { "epoch": 1.9989635187807477, "grad_norm": 0.46454367073974295, "learning_rate": 1.8537442552041094e-05, "loss": 0.437, "step": 68465 }, { "epoch": 1.999109502051065, "grad_norm": 0.4930084591997668, "learning_rate": 1.8534739118680724e-05, "loss": 0.436, "step": 68470 }, { "epoch": 1.9992554853213822, "grad_norm": 0.4771972489578986, "learning_rate": 1.8532035685320355e-05, "loss": 0.4341, "step": 68475 }, { "epoch": 1.9994014685916994, "grad_norm": 0.4947208165989544, "learning_rate": 1.8529332251959992e-05, "loss": 0.4738, "step": 68480 }, { "epoch": 1.9995474518620167, "grad_norm": 0.46152875822775885, "learning_rate": 1.8526628818599623e-05, "loss": 0.4421, "step": 68485 }, { "epoch": 1.9996934351323339, "grad_norm": 0.43301637810088206, "learning_rate": 1.8523925385239254e-05, "loss": 0.4221, "step": 68490 }, { "epoch": 1.999839418402651, "grad_norm": 0.45666084815453756, "learning_rate": 1.8521221951878888e-05, "loss": 0.4261, "step": 68495 }, { "epoch": 1.9999854016729683, "grad_norm": 0.5284864422056024, "learning_rate": 1.8518518518518518e-05, "loss": 0.4486, "step": 68500 }, { "epoch": 2.000145983270317, "grad_norm": 0.3876727041961795, "learning_rate": 1.8515815085158152e-05, "loss": 0.3886, "step": 68505 }, { "epoch": 2.0002919665406345, "grad_norm": 0.4385458026389408, "learning_rate": 1.8513111651797786e-05, "loss": 0.279, "step": 68510 }, { "epoch": 2.0004379498109515, "grad_norm": 0.502832313164971, "learning_rate": 1.8510408218437417e-05, "loss": 0.2858, "step": 68515 }, { "epoch": 2.000583933081269, "grad_norm": 0.5179084112602427, "learning_rate": 1.8507704785077048e-05, "loss": 0.2523, "step": 68520 }, { "epoch": 2.000729916351586, "grad_norm": 0.4900735651266831, "learning_rate": 1.850500135171668e-05, "loss": 0.2554, "step": 68525 }, { "epoch": 2.0008758996219034, "grad_norm": 0.4718031047733712, "learning_rate": 1.8502297918356312e-05, "loss": 0.2673, "step": 68530 }, { "epoch": 2.0010218828922204, "grad_norm": 0.5459594416597204, "learning_rate": 1.8499594484995946e-05, "loss": 0.2716, "step": 68535 }, { "epoch": 2.001167866162538, "grad_norm": 0.47731358452902906, "learning_rate": 1.849689105163558e-05, "loss": 0.2549, "step": 68540 }, { "epoch": 2.001313849432855, "grad_norm": 0.5399733708879908, "learning_rate": 1.849418761827521e-05, "loss": 0.2684, "step": 68545 }, { "epoch": 2.0014598327031723, "grad_norm": 0.5222731734679024, "learning_rate": 1.849148418491484e-05, "loss": 0.2774, "step": 68550 }, { "epoch": 2.0016058159734893, "grad_norm": 0.49756671625868093, "learning_rate": 1.8488780751554475e-05, "loss": 0.2607, "step": 68555 }, { "epoch": 2.0017517992438068, "grad_norm": 0.5416489586974037, "learning_rate": 1.8486077318194106e-05, "loss": 0.2825, "step": 68560 }, { "epoch": 2.0018977825141238, "grad_norm": 0.48369257966299695, "learning_rate": 1.848337388483374e-05, "loss": 0.2492, "step": 68565 }, { "epoch": 2.002043765784441, "grad_norm": 0.5111322790324022, "learning_rate": 1.8480670451473374e-05, "loss": 0.2709, "step": 68570 }, { "epoch": 2.002189749054758, "grad_norm": 0.49495077994037223, "learning_rate": 1.8477967018113005e-05, "loss": 0.2529, "step": 68575 }, { "epoch": 2.0023357323250757, "grad_norm": 0.5534406574455781, "learning_rate": 1.8475263584752635e-05, "loss": 0.2754, "step": 68580 }, { "epoch": 2.0024817155953927, "grad_norm": 0.5077939416394099, "learning_rate": 1.847256015139227e-05, "loss": 0.2752, "step": 68585 }, { "epoch": 2.00262769886571, "grad_norm": 0.507036517126107, "learning_rate": 1.8469856718031903e-05, "loss": 0.245, "step": 68590 }, { "epoch": 2.002773682136027, "grad_norm": 0.5112860734912756, "learning_rate": 1.8467153284671534e-05, "loss": 0.2537, "step": 68595 }, { "epoch": 2.0029196654063446, "grad_norm": 0.5748904904057897, "learning_rate": 1.8464449851311168e-05, "loss": 0.2653, "step": 68600 }, { "epoch": 2.0030656486766616, "grad_norm": 0.5283647147977051, "learning_rate": 1.84617464179508e-05, "loss": 0.256, "step": 68605 }, { "epoch": 2.003211631946979, "grad_norm": 0.5610449529146964, "learning_rate": 1.845904298459043e-05, "loss": 0.2666, "step": 68610 }, { "epoch": 2.003357615217296, "grad_norm": 0.4919475503942428, "learning_rate": 1.8456339551230063e-05, "loss": 0.2682, "step": 68615 }, { "epoch": 2.0035035984876135, "grad_norm": 0.5142242636013745, "learning_rate": 1.8453636117869697e-05, "loss": 0.2787, "step": 68620 }, { "epoch": 2.0036495817579305, "grad_norm": 0.5552384467750344, "learning_rate": 1.8450932684509328e-05, "loss": 0.258, "step": 68625 }, { "epoch": 2.003795565028248, "grad_norm": 0.5630972823735394, "learning_rate": 1.844822925114896e-05, "loss": 0.288, "step": 68630 }, { "epoch": 2.003941548298565, "grad_norm": 0.5528216946487311, "learning_rate": 1.8445525817788593e-05, "loss": 0.2679, "step": 68635 }, { "epoch": 2.0040875315688824, "grad_norm": 0.5601595884385057, "learning_rate": 1.8442822384428223e-05, "loss": 0.2815, "step": 68640 }, { "epoch": 2.0042335148391994, "grad_norm": 0.5311429916323038, "learning_rate": 1.8440118951067857e-05, "loss": 0.2557, "step": 68645 }, { "epoch": 2.0043794981095164, "grad_norm": 0.533203359941316, "learning_rate": 1.843741551770749e-05, "loss": 0.2576, "step": 68650 }, { "epoch": 2.004525481379834, "grad_norm": 0.4820575070800396, "learning_rate": 1.8434712084347122e-05, "loss": 0.2538, "step": 68655 }, { "epoch": 2.004671464650151, "grad_norm": 0.5392622663217009, "learning_rate": 1.8432008650986752e-05, "loss": 0.2448, "step": 68660 }, { "epoch": 2.0048174479204683, "grad_norm": 0.5870156710254073, "learning_rate": 1.8429305217626386e-05, "loss": 0.2702, "step": 68665 }, { "epoch": 2.0049634311907854, "grad_norm": 0.5692812487794061, "learning_rate": 1.8426601784266017e-05, "loss": 0.2611, "step": 68670 }, { "epoch": 2.005109414461103, "grad_norm": 0.5127125947961917, "learning_rate": 1.842389835090565e-05, "loss": 0.2691, "step": 68675 }, { "epoch": 2.00525539773142, "grad_norm": 0.5745563719017434, "learning_rate": 1.8421194917545285e-05, "loss": 0.2564, "step": 68680 }, { "epoch": 2.0054013810017373, "grad_norm": 0.5190769033427312, "learning_rate": 1.8418491484184916e-05, "loss": 0.2538, "step": 68685 }, { "epoch": 2.0055473642720543, "grad_norm": 0.5208910195189462, "learning_rate": 1.8415788050824546e-05, "loss": 0.255, "step": 68690 }, { "epoch": 2.0056933475423717, "grad_norm": 0.5259228971933395, "learning_rate": 1.841308461746418e-05, "loss": 0.2572, "step": 68695 }, { "epoch": 2.0058393308126887, "grad_norm": 0.5526332600621976, "learning_rate": 1.841038118410381e-05, "loss": 0.2559, "step": 68700 }, { "epoch": 2.005985314083006, "grad_norm": 0.5385808213693465, "learning_rate": 1.8407677750743445e-05, "loss": 0.2431, "step": 68705 }, { "epoch": 2.006131297353323, "grad_norm": 0.5501367642202083, "learning_rate": 1.840497431738308e-05, "loss": 0.2558, "step": 68710 }, { "epoch": 2.0062772806236406, "grad_norm": 0.5442270746917717, "learning_rate": 1.840227088402271e-05, "loss": 0.2626, "step": 68715 }, { "epoch": 2.0064232638939576, "grad_norm": 0.5492923134606666, "learning_rate": 1.839956745066234e-05, "loss": 0.24, "step": 68720 }, { "epoch": 2.006569247164275, "grad_norm": 0.5255758058246478, "learning_rate": 1.8396864017301974e-05, "loss": 0.2498, "step": 68725 }, { "epoch": 2.006715230434592, "grad_norm": 0.5530733009331315, "learning_rate": 1.8394160583941605e-05, "loss": 0.2683, "step": 68730 }, { "epoch": 2.0068612137049096, "grad_norm": 0.5907309255323735, "learning_rate": 1.839145715058124e-05, "loss": 0.2851, "step": 68735 }, { "epoch": 2.0070071969752266, "grad_norm": 0.5329431699417222, "learning_rate": 1.8388753717220873e-05, "loss": 0.2577, "step": 68740 }, { "epoch": 2.007153180245544, "grad_norm": 0.5818742196484281, "learning_rate": 1.8386050283860504e-05, "loss": 0.2548, "step": 68745 }, { "epoch": 2.007299163515861, "grad_norm": 0.5472441285314247, "learning_rate": 1.8383346850500134e-05, "loss": 0.2768, "step": 68750 }, { "epoch": 2.0074451467861785, "grad_norm": 0.5383298457177913, "learning_rate": 1.8380643417139768e-05, "loss": 0.264, "step": 68755 }, { "epoch": 2.0075911300564955, "grad_norm": 0.5329399841762028, "learning_rate": 1.8377939983779402e-05, "loss": 0.2632, "step": 68760 }, { "epoch": 2.007737113326813, "grad_norm": 0.5418880385433823, "learning_rate": 1.8375236550419033e-05, "loss": 0.2735, "step": 68765 }, { "epoch": 2.00788309659713, "grad_norm": 0.5481234494407202, "learning_rate": 1.8372533117058667e-05, "loss": 0.2643, "step": 68770 }, { "epoch": 2.0080290798674474, "grad_norm": 0.5719367568063858, "learning_rate": 1.8369829683698297e-05, "loss": 0.2744, "step": 68775 }, { "epoch": 2.0081750631377644, "grad_norm": 0.5426472789514328, "learning_rate": 1.8367126250337928e-05, "loss": 0.2655, "step": 68780 }, { "epoch": 2.008321046408082, "grad_norm": 0.5447650707829128, "learning_rate": 1.8364422816977562e-05, "loss": 0.2657, "step": 68785 }, { "epoch": 2.008467029678399, "grad_norm": 0.5453306120398367, "learning_rate": 1.8361719383617196e-05, "loss": 0.272, "step": 68790 }, { "epoch": 2.008613012948716, "grad_norm": 0.5474650801707504, "learning_rate": 1.8359015950256827e-05, "loss": 0.2699, "step": 68795 }, { "epoch": 2.0087589962190333, "grad_norm": 0.5537009156475432, "learning_rate": 1.835631251689646e-05, "loss": 0.2652, "step": 68800 }, { "epoch": 2.0089049794893503, "grad_norm": 0.5337105297292642, "learning_rate": 1.835360908353609e-05, "loss": 0.2464, "step": 68805 }, { "epoch": 2.0090509627596678, "grad_norm": 0.5396297433065221, "learning_rate": 1.8350905650175722e-05, "loss": 0.2531, "step": 68810 }, { "epoch": 2.0091969460299848, "grad_norm": 0.5032516824038654, "learning_rate": 1.8348202216815356e-05, "loss": 0.2387, "step": 68815 }, { "epoch": 2.0093429293003022, "grad_norm": 0.5665899861757823, "learning_rate": 1.834549878345499e-05, "loss": 0.2477, "step": 68820 }, { "epoch": 2.0094889125706192, "grad_norm": 0.503351354877878, "learning_rate": 1.834279535009462e-05, "loss": 0.2576, "step": 68825 }, { "epoch": 2.0096348958409367, "grad_norm": 0.5640228778358748, "learning_rate": 1.8340091916734255e-05, "loss": 0.2667, "step": 68830 }, { "epoch": 2.0097808791112537, "grad_norm": 0.5324307238935669, "learning_rate": 1.8337388483373885e-05, "loss": 0.2712, "step": 68835 }, { "epoch": 2.009926862381571, "grad_norm": 0.5276217738096486, "learning_rate": 1.8334685050013516e-05, "loss": 0.2492, "step": 68840 }, { "epoch": 2.010072845651888, "grad_norm": 0.5948230033760108, "learning_rate": 1.8331981616653153e-05, "loss": 0.2667, "step": 68845 }, { "epoch": 2.0102188289222056, "grad_norm": 0.5463279759455141, "learning_rate": 1.8329278183292784e-05, "loss": 0.2653, "step": 68850 }, { "epoch": 2.0103648121925226, "grad_norm": 0.5397319546938228, "learning_rate": 1.8326574749932414e-05, "loss": 0.2664, "step": 68855 }, { "epoch": 2.01051079546284, "grad_norm": 0.522607113485795, "learning_rate": 1.832387131657205e-05, "loss": 0.2597, "step": 68860 }, { "epoch": 2.010656778733157, "grad_norm": 0.5309602453671903, "learning_rate": 1.832116788321168e-05, "loss": 0.268, "step": 68865 }, { "epoch": 2.0108027620034745, "grad_norm": 0.5358700962789038, "learning_rate": 1.831846444985131e-05, "loss": 0.2569, "step": 68870 }, { "epoch": 2.0109487452737915, "grad_norm": 0.54848228650006, "learning_rate": 1.8315761016490944e-05, "loss": 0.2589, "step": 68875 }, { "epoch": 2.011094728544109, "grad_norm": 0.5340214482577722, "learning_rate": 1.8313057583130578e-05, "loss": 0.2641, "step": 68880 }, { "epoch": 2.011240711814426, "grad_norm": 0.5065311371728343, "learning_rate": 1.831035414977021e-05, "loss": 0.2566, "step": 68885 }, { "epoch": 2.0113866950847434, "grad_norm": 0.540885581770776, "learning_rate": 1.8307650716409842e-05, "loss": 0.2532, "step": 68890 }, { "epoch": 2.0115326783550604, "grad_norm": 0.578257222388948, "learning_rate": 1.8304947283049473e-05, "loss": 0.2746, "step": 68895 }, { "epoch": 2.011678661625378, "grad_norm": 0.5550896324343964, "learning_rate": 1.8302243849689104e-05, "loss": 0.2627, "step": 68900 }, { "epoch": 2.011824644895695, "grad_norm": 0.5368619485769245, "learning_rate": 1.8299540416328738e-05, "loss": 0.2672, "step": 68905 }, { "epoch": 2.0119706281660124, "grad_norm": 0.5845401785702533, "learning_rate": 1.829683698296837e-05, "loss": 0.2855, "step": 68910 }, { "epoch": 2.0121166114363294, "grad_norm": 0.49799362283084864, "learning_rate": 1.8294133549608002e-05, "loss": 0.2525, "step": 68915 }, { "epoch": 2.012262594706647, "grad_norm": 0.5864453909009711, "learning_rate": 1.8291430116247636e-05, "loss": 0.281, "step": 68920 }, { "epoch": 2.012408577976964, "grad_norm": 0.5375572967700541, "learning_rate": 1.8288726682887267e-05, "loss": 0.2607, "step": 68925 }, { "epoch": 2.0125545612472813, "grad_norm": 0.5462508911145556, "learning_rate": 1.82860232495269e-05, "loss": 0.2527, "step": 68930 }, { "epoch": 2.0127005445175983, "grad_norm": 0.556419462333745, "learning_rate": 1.828331981616653e-05, "loss": 0.2364, "step": 68935 }, { "epoch": 2.0128465277879153, "grad_norm": 0.5673280808936322, "learning_rate": 1.8280616382806166e-05, "loss": 0.2471, "step": 68940 }, { "epoch": 2.0129925110582327, "grad_norm": 0.5512869793748977, "learning_rate": 1.8277912949445796e-05, "loss": 0.2621, "step": 68945 }, { "epoch": 2.0131384943285497, "grad_norm": 0.5225353246017469, "learning_rate": 1.827520951608543e-05, "loss": 0.2573, "step": 68950 }, { "epoch": 2.013284477598867, "grad_norm": 0.5169067413587262, "learning_rate": 1.827250608272506e-05, "loss": 0.2601, "step": 68955 }, { "epoch": 2.013430460869184, "grad_norm": 0.5272015888296615, "learning_rate": 1.8269802649364695e-05, "loss": 0.2516, "step": 68960 }, { "epoch": 2.0135764441395017, "grad_norm": 0.4880842966604581, "learning_rate": 1.8267099216004325e-05, "loss": 0.2528, "step": 68965 }, { "epoch": 2.0137224274098187, "grad_norm": 0.5272999699337914, "learning_rate": 1.826439578264396e-05, "loss": 0.2573, "step": 68970 }, { "epoch": 2.013868410680136, "grad_norm": 0.5436944121617542, "learning_rate": 1.826169234928359e-05, "loss": 0.2581, "step": 68975 }, { "epoch": 2.014014393950453, "grad_norm": 0.5638031776843899, "learning_rate": 1.8258988915923224e-05, "loss": 0.2873, "step": 68980 }, { "epoch": 2.0141603772207706, "grad_norm": 0.5008992436554749, "learning_rate": 1.8256285482562855e-05, "loss": 0.2546, "step": 68985 }, { "epoch": 2.0143063604910876, "grad_norm": 0.550294269738232, "learning_rate": 1.825358204920249e-05, "loss": 0.2649, "step": 68990 }, { "epoch": 2.014452343761405, "grad_norm": 0.5439296760790343, "learning_rate": 1.825087861584212e-05, "loss": 0.2549, "step": 68995 }, { "epoch": 2.014598327031722, "grad_norm": 0.5639164774777046, "learning_rate": 1.8248175182481753e-05, "loss": 0.2485, "step": 69000 }, { "epoch": 2.0147443103020395, "grad_norm": 0.5526925809650047, "learning_rate": 1.8245471749121384e-05, "loss": 0.2701, "step": 69005 }, { "epoch": 2.0148902935723565, "grad_norm": 0.530865691632625, "learning_rate": 1.8242768315761015e-05, "loss": 0.2649, "step": 69010 }, { "epoch": 2.015036276842674, "grad_norm": 0.5611580681988179, "learning_rate": 1.8240064882400652e-05, "loss": 0.25, "step": 69015 }, { "epoch": 2.015182260112991, "grad_norm": 0.5347013224611257, "learning_rate": 1.8237361449040283e-05, "loss": 0.2543, "step": 69020 }, { "epoch": 2.0153282433833084, "grad_norm": 0.5937768152242854, "learning_rate": 1.8234658015679913e-05, "loss": 0.2674, "step": 69025 }, { "epoch": 2.0154742266536254, "grad_norm": 0.6054956722047687, "learning_rate": 1.8231954582319547e-05, "loss": 0.2587, "step": 69030 }, { "epoch": 2.015620209923943, "grad_norm": 0.5514134277807985, "learning_rate": 1.8229251148959178e-05, "loss": 0.2686, "step": 69035 }, { "epoch": 2.01576619319426, "grad_norm": 0.5580216007120119, "learning_rate": 1.822654771559881e-05, "loss": 0.2557, "step": 69040 }, { "epoch": 2.0159121764645773, "grad_norm": 0.5110979643880502, "learning_rate": 1.8223844282238446e-05, "loss": 0.2719, "step": 69045 }, { "epoch": 2.0160581597348943, "grad_norm": 0.5573544140750081, "learning_rate": 1.8221140848878077e-05, "loss": 0.261, "step": 69050 }, { "epoch": 2.016204143005212, "grad_norm": 0.5454617365624365, "learning_rate": 1.8218437415517707e-05, "loss": 0.2559, "step": 69055 }, { "epoch": 2.016350126275529, "grad_norm": 0.6562981447991536, "learning_rate": 1.821573398215734e-05, "loss": 0.2793, "step": 69060 }, { "epoch": 2.0164961095458462, "grad_norm": 0.5672553806400067, "learning_rate": 1.8213030548796972e-05, "loss": 0.2535, "step": 69065 }, { "epoch": 2.0166420928161632, "grad_norm": 0.5636905414053239, "learning_rate": 1.8210327115436606e-05, "loss": 0.2429, "step": 69070 }, { "epoch": 2.0167880760864807, "grad_norm": 0.5884001952612795, "learning_rate": 1.820762368207624e-05, "loss": 0.2445, "step": 69075 }, { "epoch": 2.0169340593567977, "grad_norm": 0.552676002754238, "learning_rate": 1.820492024871587e-05, "loss": 0.2768, "step": 69080 }, { "epoch": 2.017080042627115, "grad_norm": 0.559343124816177, "learning_rate": 1.82022168153555e-05, "loss": 0.2407, "step": 69085 }, { "epoch": 2.017226025897432, "grad_norm": 0.49990047641864105, "learning_rate": 1.8199513381995135e-05, "loss": 0.2688, "step": 69090 }, { "epoch": 2.017372009167749, "grad_norm": 0.5199662158052673, "learning_rate": 1.8196809948634766e-05, "loss": 0.2524, "step": 69095 }, { "epoch": 2.0175179924380666, "grad_norm": 0.5265239471971934, "learning_rate": 1.81941065152744e-05, "loss": 0.2441, "step": 69100 }, { "epoch": 2.0176639757083836, "grad_norm": 0.5693921606752445, "learning_rate": 1.8191403081914034e-05, "loss": 0.2638, "step": 69105 }, { "epoch": 2.017809958978701, "grad_norm": 0.5106704959244464, "learning_rate": 1.8188699648553664e-05, "loss": 0.2636, "step": 69110 }, { "epoch": 2.017955942249018, "grad_norm": 0.5782248439496352, "learning_rate": 1.8185996215193295e-05, "loss": 0.2664, "step": 69115 }, { "epoch": 2.0181019255193355, "grad_norm": 0.591078519189885, "learning_rate": 1.818329278183293e-05, "loss": 0.2566, "step": 69120 }, { "epoch": 2.0182479087896525, "grad_norm": 0.5401420423669987, "learning_rate": 1.818058934847256e-05, "loss": 0.2622, "step": 69125 }, { "epoch": 2.01839389205997, "grad_norm": 0.5579321330313461, "learning_rate": 1.8177885915112194e-05, "loss": 0.2529, "step": 69130 }, { "epoch": 2.018539875330287, "grad_norm": 0.5490438012211428, "learning_rate": 1.8175182481751828e-05, "loss": 0.2712, "step": 69135 }, { "epoch": 2.0186858586006045, "grad_norm": 0.5478944809279405, "learning_rate": 1.8172479048391458e-05, "loss": 0.2505, "step": 69140 }, { "epoch": 2.0188318418709215, "grad_norm": 0.576502020788605, "learning_rate": 1.816977561503109e-05, "loss": 0.2637, "step": 69145 }, { "epoch": 2.018977825141239, "grad_norm": 0.5691453637719013, "learning_rate": 1.8167072181670723e-05, "loss": 0.2578, "step": 69150 }, { "epoch": 2.019123808411556, "grad_norm": 0.5599588617571882, "learning_rate": 1.8164368748310357e-05, "loss": 0.2501, "step": 69155 }, { "epoch": 2.0192697916818734, "grad_norm": 0.5579698934263823, "learning_rate": 1.8161665314949988e-05, "loss": 0.267, "step": 69160 }, { "epoch": 2.0194157749521904, "grad_norm": 0.5166142813288819, "learning_rate": 1.815896188158962e-05, "loss": 0.2556, "step": 69165 }, { "epoch": 2.019561758222508, "grad_norm": 0.5909635762919501, "learning_rate": 1.8156258448229252e-05, "loss": 0.2578, "step": 69170 }, { "epoch": 2.019707741492825, "grad_norm": 0.535894130069577, "learning_rate": 1.8153555014868883e-05, "loss": 0.2434, "step": 69175 }, { "epoch": 2.0198537247631423, "grad_norm": 0.5103987218111526, "learning_rate": 1.8150851581508517e-05, "loss": 0.2554, "step": 69180 }, { "epoch": 2.0199997080334593, "grad_norm": 0.5705401803194482, "learning_rate": 1.814814814814815e-05, "loss": 0.2618, "step": 69185 }, { "epoch": 2.0201456913037767, "grad_norm": 0.5198771909275443, "learning_rate": 1.814544471478778e-05, "loss": 0.2331, "step": 69190 }, { "epoch": 2.0202916745740938, "grad_norm": 0.5493415328285413, "learning_rate": 1.8142741281427415e-05, "loss": 0.2659, "step": 69195 }, { "epoch": 2.020437657844411, "grad_norm": 0.5615270757498516, "learning_rate": 1.8140037848067046e-05, "loss": 0.2544, "step": 69200 }, { "epoch": 2.020583641114728, "grad_norm": 0.5242929291024021, "learning_rate": 1.8137334414706677e-05, "loss": 0.2504, "step": 69205 }, { "epoch": 2.0207296243850457, "grad_norm": 0.5781582051331761, "learning_rate": 1.813463098134631e-05, "loss": 0.2541, "step": 69210 }, { "epoch": 2.0208756076553627, "grad_norm": 0.6091833104819091, "learning_rate": 1.8131927547985945e-05, "loss": 0.2588, "step": 69215 }, { "epoch": 2.02102159092568, "grad_norm": 0.5167283787869079, "learning_rate": 1.8129224114625575e-05, "loss": 0.2478, "step": 69220 }, { "epoch": 2.021167574195997, "grad_norm": 0.554477195874328, "learning_rate": 1.812652068126521e-05, "loss": 0.2619, "step": 69225 }, { "epoch": 2.021313557466314, "grad_norm": 0.556809239455426, "learning_rate": 1.812381724790484e-05, "loss": 0.2735, "step": 69230 }, { "epoch": 2.0214595407366316, "grad_norm": 0.5465793517666349, "learning_rate": 1.812111381454447e-05, "loss": 0.2629, "step": 69235 }, { "epoch": 2.0216055240069486, "grad_norm": 0.542088395661873, "learning_rate": 1.8118410381184105e-05, "loss": 0.2456, "step": 69240 }, { "epoch": 2.021751507277266, "grad_norm": 0.5280541268921662, "learning_rate": 1.811570694782374e-05, "loss": 0.2653, "step": 69245 }, { "epoch": 2.021897490547583, "grad_norm": 0.6390509451175171, "learning_rate": 1.811300351446337e-05, "loss": 0.2506, "step": 69250 }, { "epoch": 2.0220434738179005, "grad_norm": 0.5208913221378131, "learning_rate": 1.8110300081103003e-05, "loss": 0.2555, "step": 69255 }, { "epoch": 2.0221894570882175, "grad_norm": 0.564902958296493, "learning_rate": 1.8107596647742634e-05, "loss": 0.2401, "step": 69260 }, { "epoch": 2.022335440358535, "grad_norm": 0.519546398765382, "learning_rate": 1.8104893214382265e-05, "loss": 0.2532, "step": 69265 }, { "epoch": 2.022481423628852, "grad_norm": 0.592888654675512, "learning_rate": 1.81021897810219e-05, "loss": 0.2542, "step": 69270 }, { "epoch": 2.0226274068991694, "grad_norm": 0.5410989404180124, "learning_rate": 1.8099486347661533e-05, "loss": 0.2493, "step": 69275 }, { "epoch": 2.0227733901694864, "grad_norm": 0.5542513449707255, "learning_rate": 1.8096782914301163e-05, "loss": 0.2654, "step": 69280 }, { "epoch": 2.022919373439804, "grad_norm": 0.5845867840907811, "learning_rate": 1.8094079480940794e-05, "loss": 0.2467, "step": 69285 }, { "epoch": 2.023065356710121, "grad_norm": 0.5924529035611288, "learning_rate": 1.8091376047580428e-05, "loss": 0.2556, "step": 69290 }, { "epoch": 2.0232113399804383, "grad_norm": 0.5397301491161561, "learning_rate": 1.808867261422006e-05, "loss": 0.2372, "step": 69295 }, { "epoch": 2.0233573232507553, "grad_norm": 0.5316437697894896, "learning_rate": 1.8085969180859692e-05, "loss": 0.2574, "step": 69300 }, { "epoch": 2.023503306521073, "grad_norm": 0.5322897260706482, "learning_rate": 1.8083265747499326e-05, "loss": 0.2721, "step": 69305 }, { "epoch": 2.02364928979139, "grad_norm": 0.57091841237333, "learning_rate": 1.8080562314138957e-05, "loss": 0.2584, "step": 69310 }, { "epoch": 2.0237952730617073, "grad_norm": 0.5410569395767824, "learning_rate": 1.8077858880778588e-05, "loss": 0.253, "step": 69315 }, { "epoch": 2.0239412563320243, "grad_norm": 0.5810916255741372, "learning_rate": 1.8075155447418222e-05, "loss": 0.259, "step": 69320 }, { "epoch": 2.0240872396023417, "grad_norm": 0.5165921549365602, "learning_rate": 1.8072452014057856e-05, "loss": 0.264, "step": 69325 }, { "epoch": 2.0242332228726587, "grad_norm": 0.5593297007250375, "learning_rate": 1.8069748580697486e-05, "loss": 0.2561, "step": 69330 }, { "epoch": 2.024379206142976, "grad_norm": 0.5666653776408456, "learning_rate": 1.806704514733712e-05, "loss": 0.2497, "step": 69335 }, { "epoch": 2.024525189413293, "grad_norm": 0.5633057250749478, "learning_rate": 1.806434171397675e-05, "loss": 0.2595, "step": 69340 }, { "epoch": 2.0246711726836106, "grad_norm": 0.5299868187384803, "learning_rate": 1.806163828061638e-05, "loss": 0.2631, "step": 69345 }, { "epoch": 2.0248171559539276, "grad_norm": 0.5629111632051862, "learning_rate": 1.8058934847256016e-05, "loss": 0.2691, "step": 69350 }, { "epoch": 2.024963139224245, "grad_norm": 0.5241851738323693, "learning_rate": 1.805623141389565e-05, "loss": 0.2692, "step": 69355 }, { "epoch": 2.025109122494562, "grad_norm": 0.5561495576688819, "learning_rate": 1.805352798053528e-05, "loss": 0.2677, "step": 69360 }, { "epoch": 2.0252551057648795, "grad_norm": 0.541115946261979, "learning_rate": 1.8050824547174914e-05, "loss": 0.261, "step": 69365 }, { "epoch": 2.0254010890351966, "grad_norm": 0.5458023819923594, "learning_rate": 1.8048121113814545e-05, "loss": 0.2604, "step": 69370 }, { "epoch": 2.025547072305514, "grad_norm": 0.5657222649197912, "learning_rate": 1.8045417680454175e-05, "loss": 0.268, "step": 69375 }, { "epoch": 2.025693055575831, "grad_norm": 0.5522179626324646, "learning_rate": 1.804271424709381e-05, "loss": 0.2679, "step": 69380 }, { "epoch": 2.025839038846148, "grad_norm": 0.5378346897415573, "learning_rate": 1.8040010813733444e-05, "loss": 0.2563, "step": 69385 }, { "epoch": 2.0259850221164655, "grad_norm": 0.5179331282404024, "learning_rate": 1.8037307380373074e-05, "loss": 0.2658, "step": 69390 }, { "epoch": 2.0261310053867825, "grad_norm": 0.5950824791863818, "learning_rate": 1.8034603947012708e-05, "loss": 0.2445, "step": 69395 }, { "epoch": 2.0262769886571, "grad_norm": 0.5875303588110579, "learning_rate": 1.803190051365234e-05, "loss": 0.2477, "step": 69400 }, { "epoch": 2.026422971927417, "grad_norm": 0.5475549172224143, "learning_rate": 1.802919708029197e-05, "loss": 0.2634, "step": 69405 }, { "epoch": 2.0265689551977344, "grad_norm": 0.5360711642228964, "learning_rate": 1.8026493646931607e-05, "loss": 0.2554, "step": 69410 }, { "epoch": 2.0267149384680514, "grad_norm": 0.5551655722325746, "learning_rate": 1.8023790213571237e-05, "loss": 0.2568, "step": 69415 }, { "epoch": 2.026860921738369, "grad_norm": 0.5061922426093043, "learning_rate": 1.8021086780210868e-05, "loss": 0.2726, "step": 69420 }, { "epoch": 2.027006905008686, "grad_norm": 0.5159067785202464, "learning_rate": 1.8018383346850502e-05, "loss": 0.247, "step": 69425 }, { "epoch": 2.0271528882790033, "grad_norm": 0.5519218438897141, "learning_rate": 1.8015679913490133e-05, "loss": 0.2454, "step": 69430 }, { "epoch": 2.0272988715493203, "grad_norm": 0.5496014510097902, "learning_rate": 1.8012976480129763e-05, "loss": 0.252, "step": 69435 }, { "epoch": 2.0274448548196378, "grad_norm": 0.5458108640599105, "learning_rate": 1.80102730467694e-05, "loss": 0.2632, "step": 69440 }, { "epoch": 2.0275908380899548, "grad_norm": 0.5623704361565596, "learning_rate": 1.800756961340903e-05, "loss": 0.2525, "step": 69445 }, { "epoch": 2.0277368213602722, "grad_norm": 0.5183750293134802, "learning_rate": 1.8004866180048662e-05, "loss": 0.2586, "step": 69450 }, { "epoch": 2.0278828046305892, "grad_norm": 0.5522908424401551, "learning_rate": 1.8002162746688296e-05, "loss": 0.2454, "step": 69455 }, { "epoch": 2.0280287879009067, "grad_norm": 0.5484888545160228, "learning_rate": 1.7999459313327927e-05, "loss": 0.2573, "step": 69460 }, { "epoch": 2.0281747711712237, "grad_norm": 0.539194120740263, "learning_rate": 1.7996755879967557e-05, "loss": 0.2661, "step": 69465 }, { "epoch": 2.028320754441541, "grad_norm": 0.5188852213105439, "learning_rate": 1.7994052446607195e-05, "loss": 0.2584, "step": 69470 }, { "epoch": 2.028466737711858, "grad_norm": 0.5270613089113291, "learning_rate": 1.7991349013246825e-05, "loss": 0.2643, "step": 69475 }, { "epoch": 2.0286127209821756, "grad_norm": 0.5851886126965221, "learning_rate": 1.7988645579886456e-05, "loss": 0.2558, "step": 69480 }, { "epoch": 2.0287587042524926, "grad_norm": 0.5719946424169482, "learning_rate": 1.798594214652609e-05, "loss": 0.2435, "step": 69485 }, { "epoch": 2.02890468752281, "grad_norm": 0.565349611331831, "learning_rate": 1.798323871316572e-05, "loss": 0.2494, "step": 69490 }, { "epoch": 2.029050670793127, "grad_norm": 0.5850802435322757, "learning_rate": 1.7980535279805354e-05, "loss": 0.2578, "step": 69495 }, { "epoch": 2.0291966540634445, "grad_norm": 0.6091971476273514, "learning_rate": 1.797783184644499e-05, "loss": 0.2602, "step": 69500 }, { "epoch": 2.0293426373337615, "grad_norm": 0.4970578175674246, "learning_rate": 1.797512841308462e-05, "loss": 0.2624, "step": 69505 }, { "epoch": 2.029488620604079, "grad_norm": 0.5301379298764692, "learning_rate": 1.797242497972425e-05, "loss": 0.2802, "step": 69510 }, { "epoch": 2.029634603874396, "grad_norm": 0.57720406608212, "learning_rate": 1.7969721546363884e-05, "loss": 0.2461, "step": 69515 }, { "epoch": 2.0297805871447134, "grad_norm": 0.577142483295449, "learning_rate": 1.7967018113003514e-05, "loss": 0.2684, "step": 69520 }, { "epoch": 2.0299265704150304, "grad_norm": 0.5444843108195481, "learning_rate": 1.796431467964315e-05, "loss": 0.2531, "step": 69525 }, { "epoch": 2.0300725536853474, "grad_norm": 0.5529310953177328, "learning_rate": 1.796161124628278e-05, "loss": 0.2702, "step": 69530 }, { "epoch": 2.030218536955665, "grad_norm": 0.5330285182997203, "learning_rate": 1.7958907812922413e-05, "loss": 0.2516, "step": 69535 }, { "epoch": 2.030364520225982, "grad_norm": 0.5951709891923228, "learning_rate": 1.7956204379562044e-05, "loss": 0.2505, "step": 69540 }, { "epoch": 2.0305105034962994, "grad_norm": 0.5488732276906798, "learning_rate": 1.7953500946201678e-05, "loss": 0.2485, "step": 69545 }, { "epoch": 2.0306564867666164, "grad_norm": 0.5556871739874589, "learning_rate": 1.7950797512841308e-05, "loss": 0.2535, "step": 69550 }, { "epoch": 2.030802470036934, "grad_norm": 0.5608408324760653, "learning_rate": 1.7948094079480942e-05, "loss": 0.2483, "step": 69555 }, { "epoch": 2.030948453307251, "grad_norm": 0.5729915832768321, "learning_rate": 1.7945390646120573e-05, "loss": 0.2703, "step": 69560 }, { "epoch": 2.0310944365775683, "grad_norm": 0.5767960772757276, "learning_rate": 1.7942687212760207e-05, "loss": 0.2511, "step": 69565 }, { "epoch": 2.0312404198478853, "grad_norm": 0.553495248970355, "learning_rate": 1.7939983779399838e-05, "loss": 0.2813, "step": 69570 }, { "epoch": 2.0313864031182027, "grad_norm": 0.5966729498125741, "learning_rate": 1.793728034603947e-05, "loss": 0.2625, "step": 69575 }, { "epoch": 2.0315323863885197, "grad_norm": 0.5294056249728689, "learning_rate": 1.7934576912679106e-05, "loss": 0.2382, "step": 69580 }, { "epoch": 2.031678369658837, "grad_norm": 0.5477399458445814, "learning_rate": 1.7931873479318736e-05, "loss": 0.2719, "step": 69585 }, { "epoch": 2.031824352929154, "grad_norm": 0.5786717149525858, "learning_rate": 1.7929170045958367e-05, "loss": 0.2526, "step": 69590 }, { "epoch": 2.0319703361994716, "grad_norm": 0.5487096884423615, "learning_rate": 1.7926466612598e-05, "loss": 0.2532, "step": 69595 }, { "epoch": 2.0321163194697887, "grad_norm": 0.5935321797736214, "learning_rate": 1.792376317923763e-05, "loss": 0.2666, "step": 69600 }, { "epoch": 2.032262302740106, "grad_norm": 0.5358355547762735, "learning_rate": 1.7921059745877265e-05, "loss": 0.2427, "step": 69605 }, { "epoch": 2.032408286010423, "grad_norm": 0.5328715844376172, "learning_rate": 1.79183563125169e-05, "loss": 0.2527, "step": 69610 }, { "epoch": 2.0325542692807406, "grad_norm": 0.5587694744076795, "learning_rate": 1.791565287915653e-05, "loss": 0.2565, "step": 69615 }, { "epoch": 2.0327002525510576, "grad_norm": 0.6038771296659746, "learning_rate": 1.791294944579616e-05, "loss": 0.2524, "step": 69620 }, { "epoch": 2.032846235821375, "grad_norm": 0.5149474600873039, "learning_rate": 1.7910246012435795e-05, "loss": 0.2593, "step": 69625 }, { "epoch": 2.032992219091692, "grad_norm": 0.5780985307890173, "learning_rate": 1.7907542579075425e-05, "loss": 0.2558, "step": 69630 }, { "epoch": 2.0331382023620095, "grad_norm": 0.5176798770201351, "learning_rate": 1.790483914571506e-05, "loss": 0.2553, "step": 69635 }, { "epoch": 2.0332841856323265, "grad_norm": 0.5702973164086017, "learning_rate": 1.7902135712354693e-05, "loss": 0.26, "step": 69640 }, { "epoch": 2.033430168902644, "grad_norm": 0.5470503089188385, "learning_rate": 1.7899432278994324e-05, "loss": 0.2445, "step": 69645 }, { "epoch": 2.033576152172961, "grad_norm": 0.5874535634897536, "learning_rate": 1.7896728845633955e-05, "loss": 0.2583, "step": 69650 }, { "epoch": 2.0337221354432784, "grad_norm": 0.5608532856508786, "learning_rate": 1.789402541227359e-05, "loss": 0.2701, "step": 69655 }, { "epoch": 2.0338681187135954, "grad_norm": 0.5356637380956619, "learning_rate": 1.789132197891322e-05, "loss": 0.2524, "step": 69660 }, { "epoch": 2.034014101983913, "grad_norm": 0.5641416267523822, "learning_rate": 1.7888618545552853e-05, "loss": 0.2529, "step": 69665 }, { "epoch": 2.03416008525423, "grad_norm": 0.561500978998433, "learning_rate": 1.7885915112192487e-05, "loss": 0.2575, "step": 69670 }, { "epoch": 2.034306068524547, "grad_norm": 0.577780501759419, "learning_rate": 1.7883211678832118e-05, "loss": 0.2452, "step": 69675 }, { "epoch": 2.0344520517948643, "grad_norm": 0.5389297937767559, "learning_rate": 1.788050824547175e-05, "loss": 0.2522, "step": 69680 }, { "epoch": 2.0345980350651813, "grad_norm": 0.5465746085800832, "learning_rate": 1.7877804812111383e-05, "loss": 0.2534, "step": 69685 }, { "epoch": 2.034744018335499, "grad_norm": 0.5143112620333204, "learning_rate": 1.7875101378751013e-05, "loss": 0.2526, "step": 69690 }, { "epoch": 2.034890001605816, "grad_norm": 0.5958252627986108, "learning_rate": 1.7872397945390647e-05, "loss": 0.2752, "step": 69695 }, { "epoch": 2.0350359848761332, "grad_norm": 0.5511249170035112, "learning_rate": 1.786969451203028e-05, "loss": 0.2703, "step": 69700 }, { "epoch": 2.0351819681464502, "grad_norm": 0.5220384002746009, "learning_rate": 1.7866991078669912e-05, "loss": 0.2375, "step": 69705 }, { "epoch": 2.0353279514167677, "grad_norm": 0.5493164293151082, "learning_rate": 1.7864287645309542e-05, "loss": 0.249, "step": 69710 }, { "epoch": 2.0354739346870847, "grad_norm": 0.5074192693562612, "learning_rate": 1.7861584211949176e-05, "loss": 0.2561, "step": 69715 }, { "epoch": 2.035619917957402, "grad_norm": 0.4992244800338131, "learning_rate": 1.7858880778588807e-05, "loss": 0.2557, "step": 69720 }, { "epoch": 2.035765901227719, "grad_norm": 0.557975383864823, "learning_rate": 1.785617734522844e-05, "loss": 0.2525, "step": 69725 }, { "epoch": 2.0359118844980366, "grad_norm": 0.5031355283298925, "learning_rate": 1.7853473911868075e-05, "loss": 0.2399, "step": 69730 }, { "epoch": 2.0360578677683536, "grad_norm": 0.5497730683016532, "learning_rate": 1.7850770478507706e-05, "loss": 0.2591, "step": 69735 }, { "epoch": 2.036203851038671, "grad_norm": 0.549247794935198, "learning_rate": 1.7848067045147336e-05, "loss": 0.2662, "step": 69740 }, { "epoch": 2.036349834308988, "grad_norm": 0.5748532465163952, "learning_rate": 1.784536361178697e-05, "loss": 0.2821, "step": 69745 }, { "epoch": 2.0364958175793055, "grad_norm": 0.5670453112131927, "learning_rate": 1.7842660178426604e-05, "loss": 0.2405, "step": 69750 }, { "epoch": 2.0366418008496225, "grad_norm": 0.6113058625874097, "learning_rate": 1.7839956745066235e-05, "loss": 0.2615, "step": 69755 }, { "epoch": 2.03678778411994, "grad_norm": 0.5911607794903224, "learning_rate": 1.783725331170587e-05, "loss": 0.2709, "step": 69760 }, { "epoch": 2.036933767390257, "grad_norm": 0.5791586505514481, "learning_rate": 1.78345498783455e-05, "loss": 0.2386, "step": 69765 }, { "epoch": 2.0370797506605745, "grad_norm": 0.6236653811636265, "learning_rate": 1.783184644498513e-05, "loss": 0.2621, "step": 69770 }, { "epoch": 2.0372257339308915, "grad_norm": 0.6242294393324741, "learning_rate": 1.7829143011624764e-05, "loss": 0.2633, "step": 69775 }, { "epoch": 2.037371717201209, "grad_norm": 0.6140855837599626, "learning_rate": 1.7826439578264398e-05, "loss": 0.2588, "step": 69780 }, { "epoch": 2.037517700471526, "grad_norm": 0.5696186170653283, "learning_rate": 1.782373614490403e-05, "loss": 0.25, "step": 69785 }, { "epoch": 2.0376636837418434, "grad_norm": 0.5661887609033068, "learning_rate": 1.7821032711543663e-05, "loss": 0.2569, "step": 69790 }, { "epoch": 2.0378096670121604, "grad_norm": 0.6089583980100448, "learning_rate": 1.7818329278183294e-05, "loss": 0.2773, "step": 69795 }, { "epoch": 2.037955650282478, "grad_norm": 0.5754387231871269, "learning_rate": 1.7815625844822924e-05, "loss": 0.2686, "step": 69800 }, { "epoch": 2.038101633552795, "grad_norm": 0.5939524392174755, "learning_rate": 1.7812922411462558e-05, "loss": 0.2551, "step": 69805 }, { "epoch": 2.0382476168231123, "grad_norm": 0.5725999524503049, "learning_rate": 1.7810218978102192e-05, "loss": 0.2652, "step": 69810 }, { "epoch": 2.0383936000934293, "grad_norm": 0.5868149145519129, "learning_rate": 1.7807515544741823e-05, "loss": 0.2478, "step": 69815 }, { "epoch": 2.0385395833637463, "grad_norm": 0.6133502768476486, "learning_rate": 1.7804812111381457e-05, "loss": 0.2579, "step": 69820 }, { "epoch": 2.0386855666340638, "grad_norm": 0.5427900812873306, "learning_rate": 1.7802108678021087e-05, "loss": 0.2535, "step": 69825 }, { "epoch": 2.0388315499043808, "grad_norm": 0.5391604329496543, "learning_rate": 1.7799405244660718e-05, "loss": 0.2369, "step": 69830 }, { "epoch": 2.038977533174698, "grad_norm": 0.6285959327954249, "learning_rate": 1.7796701811300352e-05, "loss": 0.2641, "step": 69835 }, { "epoch": 2.039123516445015, "grad_norm": 0.5735125602825613, "learning_rate": 1.7793998377939986e-05, "loss": 0.2688, "step": 69840 }, { "epoch": 2.0392694997153327, "grad_norm": 0.5297468660655217, "learning_rate": 1.7791294944579617e-05, "loss": 0.2452, "step": 69845 }, { "epoch": 2.0394154829856497, "grad_norm": 0.5564750515343729, "learning_rate": 1.778859151121925e-05, "loss": 0.242, "step": 69850 }, { "epoch": 2.039561466255967, "grad_norm": 0.5684847481784975, "learning_rate": 1.778588807785888e-05, "loss": 0.2568, "step": 69855 }, { "epoch": 2.039707449526284, "grad_norm": 0.5594400656505467, "learning_rate": 1.7783184644498512e-05, "loss": 0.2536, "step": 69860 }, { "epoch": 2.0398534327966016, "grad_norm": 0.5465075719298252, "learning_rate": 1.7780481211138146e-05, "loss": 0.2619, "step": 69865 }, { "epoch": 2.0399994160669186, "grad_norm": 0.5253353718744815, "learning_rate": 1.777777777777778e-05, "loss": 0.2529, "step": 69870 }, { "epoch": 2.040145399337236, "grad_norm": 0.5879867838767496, "learning_rate": 1.777507434441741e-05, "loss": 0.2432, "step": 69875 }, { "epoch": 2.040291382607553, "grad_norm": 0.5453580027989783, "learning_rate": 1.7772370911057045e-05, "loss": 0.2661, "step": 69880 }, { "epoch": 2.0404373658778705, "grad_norm": 0.612127360331986, "learning_rate": 1.7769667477696675e-05, "loss": 0.271, "step": 69885 }, { "epoch": 2.0405833491481875, "grad_norm": 0.5428867035458829, "learning_rate": 1.7766964044336306e-05, "loss": 0.2662, "step": 69890 }, { "epoch": 2.040729332418505, "grad_norm": 0.5694633221663263, "learning_rate": 1.776426061097594e-05, "loss": 0.2539, "step": 69895 }, { "epoch": 2.040875315688822, "grad_norm": 0.5302156025891226, "learning_rate": 1.7761557177615574e-05, "loss": 0.2531, "step": 69900 }, { "epoch": 2.0410212989591394, "grad_norm": 0.5965091295776629, "learning_rate": 1.7758853744255205e-05, "loss": 0.2545, "step": 69905 }, { "epoch": 2.0411672822294564, "grad_norm": 0.5981672766263111, "learning_rate": 1.7756150310894835e-05, "loss": 0.2628, "step": 69910 }, { "epoch": 2.041313265499774, "grad_norm": 0.5686323286626901, "learning_rate": 1.775344687753447e-05, "loss": 0.2501, "step": 69915 }, { "epoch": 2.041459248770091, "grad_norm": 0.5642495660061008, "learning_rate": 1.7750743444174103e-05, "loss": 0.2482, "step": 69920 }, { "epoch": 2.0416052320404083, "grad_norm": 0.5974159424221358, "learning_rate": 1.7748040010813734e-05, "loss": 0.2643, "step": 69925 }, { "epoch": 2.0417512153107253, "grad_norm": 0.5591868572698491, "learning_rate": 1.7745336577453368e-05, "loss": 0.259, "step": 69930 }, { "epoch": 2.041897198581043, "grad_norm": 0.5752904874397816, "learning_rate": 1.7742633144093e-05, "loss": 0.2766, "step": 69935 }, { "epoch": 2.04204318185136, "grad_norm": 0.5495941451116233, "learning_rate": 1.773992971073263e-05, "loss": 0.2599, "step": 69940 }, { "epoch": 2.0421891651216773, "grad_norm": 0.6388625452562422, "learning_rate": 1.7737226277372263e-05, "loss": 0.2521, "step": 69945 }, { "epoch": 2.0423351483919943, "grad_norm": 0.5216457510176963, "learning_rate": 1.7734522844011897e-05, "loss": 0.2609, "step": 69950 }, { "epoch": 2.0424811316623117, "grad_norm": 0.5965626096610197, "learning_rate": 1.7731819410651528e-05, "loss": 0.2723, "step": 69955 }, { "epoch": 2.0426271149326287, "grad_norm": 0.5252334208112073, "learning_rate": 1.7729115977291162e-05, "loss": 0.2362, "step": 69960 }, { "epoch": 2.0427730982029457, "grad_norm": 0.5610476753296354, "learning_rate": 1.7726412543930792e-05, "loss": 0.262, "step": 69965 }, { "epoch": 2.042919081473263, "grad_norm": 0.5833800694349406, "learning_rate": 1.7723709110570423e-05, "loss": 0.2563, "step": 69970 }, { "epoch": 2.04306506474358, "grad_norm": 0.4905594016768396, "learning_rate": 1.7721005677210057e-05, "loss": 0.2338, "step": 69975 }, { "epoch": 2.0432110480138976, "grad_norm": 0.5425912124336738, "learning_rate": 1.771830224384969e-05, "loss": 0.2554, "step": 69980 }, { "epoch": 2.0433570312842146, "grad_norm": 0.5598813903591603, "learning_rate": 1.771559881048932e-05, "loss": 0.2603, "step": 69985 }, { "epoch": 2.043503014554532, "grad_norm": 0.5759090644001339, "learning_rate": 1.7712895377128956e-05, "loss": 0.251, "step": 69990 }, { "epoch": 2.043648997824849, "grad_norm": 0.5429368157441149, "learning_rate": 1.7710191943768586e-05, "loss": 0.2369, "step": 69995 }, { "epoch": 2.0437949810951666, "grad_norm": 0.5823121421579159, "learning_rate": 1.7707488510408217e-05, "loss": 0.2577, "step": 70000 }, { "epoch": 2.0439409643654836, "grad_norm": 0.4919349868601757, "learning_rate": 1.7704785077047854e-05, "loss": 0.2428, "step": 70005 }, { "epoch": 2.044086947635801, "grad_norm": 0.5401871434322058, "learning_rate": 1.7702081643687485e-05, "loss": 0.2485, "step": 70010 }, { "epoch": 2.044232930906118, "grad_norm": 0.5185343760980361, "learning_rate": 1.7699378210327115e-05, "loss": 0.2481, "step": 70015 }, { "epoch": 2.0443789141764355, "grad_norm": 0.5669799248009048, "learning_rate": 1.769667477696675e-05, "loss": 0.2447, "step": 70020 }, { "epoch": 2.0445248974467525, "grad_norm": 0.5487486760477317, "learning_rate": 1.769397134360638e-05, "loss": 0.2393, "step": 70025 }, { "epoch": 2.04467088071707, "grad_norm": 0.5701507798338015, "learning_rate": 1.769126791024601e-05, "loss": 0.2551, "step": 70030 }, { "epoch": 2.044816863987387, "grad_norm": 0.6075598406422364, "learning_rate": 1.7688564476885648e-05, "loss": 0.2539, "step": 70035 }, { "epoch": 2.0449628472577044, "grad_norm": 0.5179023661844634, "learning_rate": 1.768586104352528e-05, "loss": 0.243, "step": 70040 }, { "epoch": 2.0451088305280214, "grad_norm": 0.5018397548248471, "learning_rate": 1.768315761016491e-05, "loss": 0.2372, "step": 70045 }, { "epoch": 2.045254813798339, "grad_norm": 0.5122853776726903, "learning_rate": 1.7680454176804543e-05, "loss": 0.2546, "step": 70050 }, { "epoch": 2.045400797068656, "grad_norm": 0.5636110236539355, "learning_rate": 1.7677750743444174e-05, "loss": 0.2605, "step": 70055 }, { "epoch": 2.0455467803389733, "grad_norm": 0.592506442847941, "learning_rate": 1.7675047310083805e-05, "loss": 0.2621, "step": 70060 }, { "epoch": 2.0456927636092903, "grad_norm": 0.5159522806100246, "learning_rate": 1.7672343876723442e-05, "loss": 0.2539, "step": 70065 }, { "epoch": 2.0458387468796078, "grad_norm": 0.5764311045232103, "learning_rate": 1.7669640443363073e-05, "loss": 0.2663, "step": 70070 }, { "epoch": 2.0459847301499248, "grad_norm": 0.5650517239995579, "learning_rate": 1.7666937010002703e-05, "loss": 0.2695, "step": 70075 }, { "epoch": 2.046130713420242, "grad_norm": 0.5641378450870491, "learning_rate": 1.7664233576642337e-05, "loss": 0.2565, "step": 70080 }, { "epoch": 2.0462766966905592, "grad_norm": 0.5661540506022429, "learning_rate": 1.7661530143281968e-05, "loss": 0.2565, "step": 70085 }, { "epoch": 2.0464226799608767, "grad_norm": 0.5442832811629831, "learning_rate": 1.7658826709921602e-05, "loss": 0.2661, "step": 70090 }, { "epoch": 2.0465686632311937, "grad_norm": 0.5501048620462586, "learning_rate": 1.7656123276561236e-05, "loss": 0.2531, "step": 70095 }, { "epoch": 2.046714646501511, "grad_norm": 0.5862863014902318, "learning_rate": 1.7653419843200867e-05, "loss": 0.2748, "step": 70100 }, { "epoch": 2.046860629771828, "grad_norm": 0.5965613478129936, "learning_rate": 1.7650716409840497e-05, "loss": 0.2633, "step": 70105 }, { "epoch": 2.0470066130421456, "grad_norm": 0.5747113037363147, "learning_rate": 1.764801297648013e-05, "loss": 0.2526, "step": 70110 }, { "epoch": 2.0471525963124626, "grad_norm": 0.5347917899874772, "learning_rate": 1.7645309543119762e-05, "loss": 0.2557, "step": 70115 }, { "epoch": 2.0472985795827796, "grad_norm": 0.557700798173357, "learning_rate": 1.7642606109759396e-05, "loss": 0.2508, "step": 70120 }, { "epoch": 2.047444562853097, "grad_norm": 0.6035946661016456, "learning_rate": 1.763990267639903e-05, "loss": 0.2675, "step": 70125 }, { "epoch": 2.047590546123414, "grad_norm": 0.5660046281272992, "learning_rate": 1.763719924303866e-05, "loss": 0.2573, "step": 70130 }, { "epoch": 2.0477365293937315, "grad_norm": 0.6069876818637074, "learning_rate": 1.763449580967829e-05, "loss": 0.2489, "step": 70135 }, { "epoch": 2.0478825126640485, "grad_norm": 0.5635282434866883, "learning_rate": 1.7631792376317925e-05, "loss": 0.2431, "step": 70140 }, { "epoch": 2.048028495934366, "grad_norm": 0.5866386708171647, "learning_rate": 1.7629088942957556e-05, "loss": 0.2603, "step": 70145 }, { "epoch": 2.048174479204683, "grad_norm": 0.5960824114167578, "learning_rate": 1.762638550959719e-05, "loss": 0.2515, "step": 70150 }, { "epoch": 2.0483204624750004, "grad_norm": 0.5469336025730724, "learning_rate": 1.7623682076236824e-05, "loss": 0.2633, "step": 70155 }, { "epoch": 2.0484664457453174, "grad_norm": 0.5641077419499025, "learning_rate": 1.7620978642876454e-05, "loss": 0.2419, "step": 70160 }, { "epoch": 2.048612429015635, "grad_norm": 0.5860060736897671, "learning_rate": 1.7618275209516085e-05, "loss": 0.285, "step": 70165 }, { "epoch": 2.048758412285952, "grad_norm": 0.5836298491206752, "learning_rate": 1.761557177615572e-05, "loss": 0.2663, "step": 70170 }, { "epoch": 2.0489043955562694, "grad_norm": 0.5385621015499508, "learning_rate": 1.7612868342795353e-05, "loss": 0.2564, "step": 70175 }, { "epoch": 2.0490503788265864, "grad_norm": 0.5802409655582005, "learning_rate": 1.7610164909434984e-05, "loss": 0.2811, "step": 70180 }, { "epoch": 2.049196362096904, "grad_norm": 0.5459918179867888, "learning_rate": 1.7607461476074614e-05, "loss": 0.255, "step": 70185 }, { "epoch": 2.049342345367221, "grad_norm": 0.5336823923452177, "learning_rate": 1.7604758042714248e-05, "loss": 0.2477, "step": 70190 }, { "epoch": 2.0494883286375383, "grad_norm": 0.5291797345076625, "learning_rate": 1.760205460935388e-05, "loss": 0.2433, "step": 70195 }, { "epoch": 2.0496343119078553, "grad_norm": 0.5396616551348051, "learning_rate": 1.7599351175993513e-05, "loss": 0.2449, "step": 70200 }, { "epoch": 2.0497802951781727, "grad_norm": 0.5416242282376276, "learning_rate": 1.7596647742633147e-05, "loss": 0.2627, "step": 70205 }, { "epoch": 2.0499262784484897, "grad_norm": 0.5716271910435838, "learning_rate": 1.7593944309272778e-05, "loss": 0.2491, "step": 70210 }, { "epoch": 2.050072261718807, "grad_norm": 0.5428216924395988, "learning_rate": 1.7591240875912408e-05, "loss": 0.2508, "step": 70215 }, { "epoch": 2.050218244989124, "grad_norm": 0.5816326654597241, "learning_rate": 1.7588537442552042e-05, "loss": 0.2601, "step": 70220 }, { "epoch": 2.0503642282594416, "grad_norm": 0.5633757909650516, "learning_rate": 1.7585834009191673e-05, "loss": 0.254, "step": 70225 }, { "epoch": 2.0505102115297587, "grad_norm": 0.570774276024939, "learning_rate": 1.7583130575831307e-05, "loss": 0.2553, "step": 70230 }, { "epoch": 2.050656194800076, "grad_norm": 0.5408921494393211, "learning_rate": 1.758042714247094e-05, "loss": 0.2529, "step": 70235 }, { "epoch": 2.050802178070393, "grad_norm": 0.5604526639296796, "learning_rate": 1.757772370911057e-05, "loss": 0.2521, "step": 70240 }, { "epoch": 2.0509481613407106, "grad_norm": 0.5522910348843872, "learning_rate": 1.7575020275750202e-05, "loss": 0.2616, "step": 70245 }, { "epoch": 2.0510941446110276, "grad_norm": 0.6240777791717101, "learning_rate": 1.7572316842389836e-05, "loss": 0.2708, "step": 70250 }, { "epoch": 2.0512401278813446, "grad_norm": 0.5537425128240068, "learning_rate": 1.7569613409029467e-05, "loss": 0.2598, "step": 70255 }, { "epoch": 2.051386111151662, "grad_norm": 0.547010087517646, "learning_rate": 1.75669099756691e-05, "loss": 0.2658, "step": 70260 }, { "epoch": 2.051532094421979, "grad_norm": 0.5420803958579449, "learning_rate": 1.7564206542308735e-05, "loss": 0.2458, "step": 70265 }, { "epoch": 2.0516780776922965, "grad_norm": 0.576973371668563, "learning_rate": 1.7561503108948365e-05, "loss": 0.2689, "step": 70270 }, { "epoch": 2.0518240609626135, "grad_norm": 0.5688163251689271, "learning_rate": 1.7558799675587996e-05, "loss": 0.2473, "step": 70275 }, { "epoch": 2.051970044232931, "grad_norm": 0.5611999058640041, "learning_rate": 1.755609624222763e-05, "loss": 0.2535, "step": 70280 }, { "epoch": 2.052116027503248, "grad_norm": 0.5742127643065037, "learning_rate": 1.755339280886726e-05, "loss": 0.2471, "step": 70285 }, { "epoch": 2.0522620107735654, "grad_norm": 0.5370107637928034, "learning_rate": 1.7550689375506895e-05, "loss": 0.2436, "step": 70290 }, { "epoch": 2.0524079940438824, "grad_norm": 0.5866235595274171, "learning_rate": 1.754798594214653e-05, "loss": 0.2636, "step": 70295 }, { "epoch": 2.0525539773142, "grad_norm": 0.579291665747817, "learning_rate": 1.754528250878616e-05, "loss": 0.259, "step": 70300 }, { "epoch": 2.052699960584517, "grad_norm": 0.5613609621228283, "learning_rate": 1.754257907542579e-05, "loss": 0.2633, "step": 70305 }, { "epoch": 2.0528459438548343, "grad_norm": 0.5453365694418902, "learning_rate": 1.7539875642065424e-05, "loss": 0.256, "step": 70310 }, { "epoch": 2.0529919271251513, "grad_norm": 0.554698568947294, "learning_rate": 1.7537172208705055e-05, "loss": 0.2547, "step": 70315 }, { "epoch": 2.053137910395469, "grad_norm": 0.5619680460534003, "learning_rate": 1.753446877534469e-05, "loss": 0.2573, "step": 70320 }, { "epoch": 2.053283893665786, "grad_norm": 0.5432020031210935, "learning_rate": 1.7531765341984323e-05, "loss": 0.269, "step": 70325 }, { "epoch": 2.0534298769361032, "grad_norm": 0.5736484112698105, "learning_rate": 1.7529061908623953e-05, "loss": 0.2569, "step": 70330 }, { "epoch": 2.0535758602064202, "grad_norm": 0.5708840383396939, "learning_rate": 1.7526358475263584e-05, "loss": 0.2476, "step": 70335 }, { "epoch": 2.0537218434767377, "grad_norm": 0.5924432204536236, "learning_rate": 1.7523655041903218e-05, "loss": 0.246, "step": 70340 }, { "epoch": 2.0538678267470547, "grad_norm": 0.5270652106121236, "learning_rate": 1.7520951608542852e-05, "loss": 0.246, "step": 70345 }, { "epoch": 2.054013810017372, "grad_norm": 0.5743693009545658, "learning_rate": 1.7518248175182482e-05, "loss": 0.2622, "step": 70350 }, { "epoch": 2.054159793287689, "grad_norm": 0.5622389778987148, "learning_rate": 1.7515544741822116e-05, "loss": 0.2486, "step": 70355 }, { "epoch": 2.0543057765580066, "grad_norm": 0.5520336935801831, "learning_rate": 1.7512841308461747e-05, "loss": 0.2565, "step": 70360 }, { "epoch": 2.0544517598283236, "grad_norm": 0.5714824798515139, "learning_rate": 1.7510137875101378e-05, "loss": 0.2644, "step": 70365 }, { "epoch": 2.054597743098641, "grad_norm": 0.5531110291806226, "learning_rate": 1.7507434441741012e-05, "loss": 0.243, "step": 70370 }, { "epoch": 2.054743726368958, "grad_norm": 0.5610362499554049, "learning_rate": 1.7504731008380646e-05, "loss": 0.2555, "step": 70375 }, { "epoch": 2.0548897096392755, "grad_norm": 0.5674116728283157, "learning_rate": 1.7502027575020276e-05, "loss": 0.248, "step": 70380 }, { "epoch": 2.0550356929095925, "grad_norm": 0.5630665525373405, "learning_rate": 1.749932414165991e-05, "loss": 0.2714, "step": 70385 }, { "epoch": 2.05518167617991, "grad_norm": 0.5834188022060628, "learning_rate": 1.749662070829954e-05, "loss": 0.2397, "step": 70390 }, { "epoch": 2.055327659450227, "grad_norm": 0.5914036704840039, "learning_rate": 1.749391727493917e-05, "loss": 0.2769, "step": 70395 }, { "epoch": 2.0554736427205444, "grad_norm": 0.5741947969419722, "learning_rate": 1.7491213841578806e-05, "loss": 0.2578, "step": 70400 }, { "epoch": 2.0556196259908615, "grad_norm": 0.5338801074134186, "learning_rate": 1.748851040821844e-05, "loss": 0.2656, "step": 70405 }, { "epoch": 2.0557656092611785, "grad_norm": 0.6077472144631401, "learning_rate": 1.748580697485807e-05, "loss": 0.2416, "step": 70410 }, { "epoch": 2.055911592531496, "grad_norm": 0.5688611163800523, "learning_rate": 1.7483103541497704e-05, "loss": 0.242, "step": 70415 }, { "epoch": 2.056057575801813, "grad_norm": 0.5057077552347423, "learning_rate": 1.7480400108137335e-05, "loss": 0.2506, "step": 70420 }, { "epoch": 2.0562035590721304, "grad_norm": 0.5555471071944117, "learning_rate": 1.7477696674776966e-05, "loss": 0.2566, "step": 70425 }, { "epoch": 2.0563495423424474, "grad_norm": 0.5412937750018646, "learning_rate": 1.74749932414166e-05, "loss": 0.2325, "step": 70430 }, { "epoch": 2.056495525612765, "grad_norm": 0.5362018528121458, "learning_rate": 1.7472289808056234e-05, "loss": 0.2564, "step": 70435 }, { "epoch": 2.056641508883082, "grad_norm": 0.5576700079556952, "learning_rate": 1.7469586374695864e-05, "loss": 0.2443, "step": 70440 }, { "epoch": 2.0567874921533993, "grad_norm": 0.5732822316440254, "learning_rate": 1.7466882941335498e-05, "loss": 0.2769, "step": 70445 }, { "epoch": 2.0569334754237163, "grad_norm": 0.567233270900955, "learning_rate": 1.746417950797513e-05, "loss": 0.2366, "step": 70450 }, { "epoch": 2.0570794586940337, "grad_norm": 0.5309347165949767, "learning_rate": 1.746147607461476e-05, "loss": 0.2604, "step": 70455 }, { "epoch": 2.0572254419643508, "grad_norm": 0.5168731312695538, "learning_rate": 1.7458772641254393e-05, "loss": 0.2533, "step": 70460 }, { "epoch": 2.057371425234668, "grad_norm": 0.5673799197948834, "learning_rate": 1.7456069207894027e-05, "loss": 0.2663, "step": 70465 }, { "epoch": 2.057517408504985, "grad_norm": 0.5737144562323658, "learning_rate": 1.7453365774533658e-05, "loss": 0.2487, "step": 70470 }, { "epoch": 2.0576633917753027, "grad_norm": 0.5735872544092164, "learning_rate": 1.7450662341173292e-05, "loss": 0.2455, "step": 70475 }, { "epoch": 2.0578093750456197, "grad_norm": 0.6085818935857181, "learning_rate": 1.7447958907812923e-05, "loss": 0.2552, "step": 70480 }, { "epoch": 2.057955358315937, "grad_norm": 0.5429642155657894, "learning_rate": 1.7445255474452553e-05, "loss": 0.2366, "step": 70485 }, { "epoch": 2.058101341586254, "grad_norm": 0.5757377761221292, "learning_rate": 1.7442552041092187e-05, "loss": 0.259, "step": 70490 }, { "epoch": 2.0582473248565716, "grad_norm": 0.5518352266238756, "learning_rate": 1.743984860773182e-05, "loss": 0.2446, "step": 70495 }, { "epoch": 2.0583933081268886, "grad_norm": 0.5555965670518305, "learning_rate": 1.7437145174371452e-05, "loss": 0.2659, "step": 70500 }, { "epoch": 2.058539291397206, "grad_norm": 0.5760085227280395, "learning_rate": 1.7434441741011086e-05, "loss": 0.2636, "step": 70505 }, { "epoch": 2.058685274667523, "grad_norm": 0.5910114011444778, "learning_rate": 1.7431738307650717e-05, "loss": 0.2594, "step": 70510 }, { "epoch": 2.0588312579378405, "grad_norm": 0.5543807056708234, "learning_rate": 1.742903487429035e-05, "loss": 0.2656, "step": 70515 }, { "epoch": 2.0589772412081575, "grad_norm": 0.5289830226131707, "learning_rate": 1.742633144092998e-05, "loss": 0.245, "step": 70520 }, { "epoch": 2.059123224478475, "grad_norm": 0.5316266550357499, "learning_rate": 1.7423628007569615e-05, "loss": 0.245, "step": 70525 }, { "epoch": 2.059269207748792, "grad_norm": 0.5034991535290197, "learning_rate": 1.7420924574209246e-05, "loss": 0.2561, "step": 70530 }, { "epoch": 2.0594151910191094, "grad_norm": 0.5864622581181248, "learning_rate": 1.741822114084888e-05, "loss": 0.2639, "step": 70535 }, { "epoch": 2.0595611742894264, "grad_norm": 0.6114763390193048, "learning_rate": 1.741551770748851e-05, "loss": 0.2477, "step": 70540 }, { "epoch": 2.0597071575597434, "grad_norm": 0.6276431288216361, "learning_rate": 1.7412814274128145e-05, "loss": 0.2648, "step": 70545 }, { "epoch": 2.059853140830061, "grad_norm": 0.5968584653966267, "learning_rate": 1.7410110840767775e-05, "loss": 0.2759, "step": 70550 }, { "epoch": 2.059999124100378, "grad_norm": 0.5795534191413539, "learning_rate": 1.740740740740741e-05, "loss": 0.2629, "step": 70555 }, { "epoch": 2.0601451073706953, "grad_norm": 0.5267063283684663, "learning_rate": 1.740470397404704e-05, "loss": 0.2437, "step": 70560 }, { "epoch": 2.0602910906410123, "grad_norm": 0.5745794014222706, "learning_rate": 1.740200054068667e-05, "loss": 0.2677, "step": 70565 }, { "epoch": 2.06043707391133, "grad_norm": 0.5901688981407794, "learning_rate": 1.7399297107326304e-05, "loss": 0.2542, "step": 70570 }, { "epoch": 2.060583057181647, "grad_norm": 0.5531918062959663, "learning_rate": 1.739659367396594e-05, "loss": 0.2713, "step": 70575 }, { "epoch": 2.0607290404519643, "grad_norm": 0.5357450202854559, "learning_rate": 1.739389024060557e-05, "loss": 0.262, "step": 70580 }, { "epoch": 2.0608750237222813, "grad_norm": 0.5372740211391135, "learning_rate": 1.7391186807245203e-05, "loss": 0.256, "step": 70585 }, { "epoch": 2.0610210069925987, "grad_norm": 0.5674479871917464, "learning_rate": 1.7388483373884834e-05, "loss": 0.2648, "step": 70590 }, { "epoch": 2.0611669902629157, "grad_norm": 0.5990306291005183, "learning_rate": 1.7385779940524464e-05, "loss": 0.258, "step": 70595 }, { "epoch": 2.061312973533233, "grad_norm": 0.5812927235824612, "learning_rate": 1.7383076507164102e-05, "loss": 0.2569, "step": 70600 }, { "epoch": 2.06145895680355, "grad_norm": 0.5886809464792524, "learning_rate": 1.7380373073803732e-05, "loss": 0.2668, "step": 70605 }, { "epoch": 2.0616049400738676, "grad_norm": 0.5480202226138939, "learning_rate": 1.7377669640443363e-05, "loss": 0.2635, "step": 70610 }, { "epoch": 2.0617509233441846, "grad_norm": 0.505356564956042, "learning_rate": 1.7374966207082997e-05, "loss": 0.2421, "step": 70615 }, { "epoch": 2.061896906614502, "grad_norm": 0.5878079705142512, "learning_rate": 1.7372262773722628e-05, "loss": 0.2433, "step": 70620 }, { "epoch": 2.062042889884819, "grad_norm": 0.595452055140936, "learning_rate": 1.7369559340362258e-05, "loss": 0.2473, "step": 70625 }, { "epoch": 2.0621888731551365, "grad_norm": 0.5557089578081637, "learning_rate": 1.7366855907001896e-05, "loss": 0.2485, "step": 70630 }, { "epoch": 2.0623348564254536, "grad_norm": 0.5827384306322011, "learning_rate": 1.7364152473641526e-05, "loss": 0.28, "step": 70635 }, { "epoch": 2.062480839695771, "grad_norm": 0.5814862369720682, "learning_rate": 1.7361449040281157e-05, "loss": 0.2592, "step": 70640 }, { "epoch": 2.062626822966088, "grad_norm": 0.5746883480497005, "learning_rate": 1.735874560692079e-05, "loss": 0.2509, "step": 70645 }, { "epoch": 2.0627728062364055, "grad_norm": 0.5971280543675732, "learning_rate": 1.735604217356042e-05, "loss": 0.2538, "step": 70650 }, { "epoch": 2.0629187895067225, "grad_norm": 0.54320941582112, "learning_rate": 1.7353338740200052e-05, "loss": 0.2362, "step": 70655 }, { "epoch": 2.06306477277704, "grad_norm": 0.5955258388355574, "learning_rate": 1.735063530683969e-05, "loss": 0.2468, "step": 70660 }, { "epoch": 2.063210756047357, "grad_norm": 0.5774514274029147, "learning_rate": 1.734793187347932e-05, "loss": 0.2587, "step": 70665 }, { "epoch": 2.0633567393176744, "grad_norm": 0.5492988672275311, "learning_rate": 1.734522844011895e-05, "loss": 0.2439, "step": 70670 }, { "epoch": 2.0635027225879914, "grad_norm": 0.6004373694088906, "learning_rate": 1.7342525006758585e-05, "loss": 0.2661, "step": 70675 }, { "epoch": 2.063648705858309, "grad_norm": 0.5591829461217086, "learning_rate": 1.7339821573398215e-05, "loss": 0.2631, "step": 70680 }, { "epoch": 2.063794689128626, "grad_norm": 0.5272599380338587, "learning_rate": 1.733711814003785e-05, "loss": 0.2649, "step": 70685 }, { "epoch": 2.0639406723989433, "grad_norm": 0.631375366756533, "learning_rate": 1.7334414706677483e-05, "loss": 0.2452, "step": 70690 }, { "epoch": 2.0640866556692603, "grad_norm": 0.5982532165334354, "learning_rate": 1.7331711273317114e-05, "loss": 0.2596, "step": 70695 }, { "epoch": 2.0642326389395773, "grad_norm": 0.5321076132526306, "learning_rate": 1.7329007839956745e-05, "loss": 0.2564, "step": 70700 }, { "epoch": 2.0643786222098948, "grad_norm": 0.5354058985798174, "learning_rate": 1.732630440659638e-05, "loss": 0.2711, "step": 70705 }, { "epoch": 2.0645246054802118, "grad_norm": 0.5527051951005042, "learning_rate": 1.732360097323601e-05, "loss": 0.2631, "step": 70710 }, { "epoch": 2.0646705887505292, "grad_norm": 0.5427129300942919, "learning_rate": 1.7320897539875643e-05, "loss": 0.2566, "step": 70715 }, { "epoch": 2.0648165720208462, "grad_norm": 0.5303202584647696, "learning_rate": 1.7318194106515277e-05, "loss": 0.2473, "step": 70720 }, { "epoch": 2.0649625552911637, "grad_norm": 0.5881516783662519, "learning_rate": 1.7315490673154908e-05, "loss": 0.2642, "step": 70725 }, { "epoch": 2.0651085385614807, "grad_norm": 0.502008732561882, "learning_rate": 1.731278723979454e-05, "loss": 0.2269, "step": 70730 }, { "epoch": 2.065254521831798, "grad_norm": 0.6156695075563069, "learning_rate": 1.7310083806434173e-05, "loss": 0.2564, "step": 70735 }, { "epoch": 2.065400505102115, "grad_norm": 0.5489895325176636, "learning_rate": 1.7307380373073803e-05, "loss": 0.258, "step": 70740 }, { "epoch": 2.0655464883724326, "grad_norm": 0.5571287696849646, "learning_rate": 1.7304676939713437e-05, "loss": 0.2657, "step": 70745 }, { "epoch": 2.0656924716427496, "grad_norm": 0.5530810251260544, "learning_rate": 1.730197350635307e-05, "loss": 0.2513, "step": 70750 }, { "epoch": 2.065838454913067, "grad_norm": 0.5487979757690981, "learning_rate": 1.7299270072992702e-05, "loss": 0.2637, "step": 70755 }, { "epoch": 2.065984438183384, "grad_norm": 0.5676586862266507, "learning_rate": 1.7296566639632332e-05, "loss": 0.2588, "step": 70760 }, { "epoch": 2.0661304214537015, "grad_norm": 0.5310711509745674, "learning_rate": 1.7293863206271966e-05, "loss": 0.2588, "step": 70765 }, { "epoch": 2.0662764047240185, "grad_norm": 0.5554973868513455, "learning_rate": 1.72911597729116e-05, "loss": 0.2517, "step": 70770 }, { "epoch": 2.066422387994336, "grad_norm": 0.5956369879681835, "learning_rate": 1.728845633955123e-05, "loss": 0.2512, "step": 70775 }, { "epoch": 2.066568371264653, "grad_norm": 0.5555098680670153, "learning_rate": 1.7285752906190865e-05, "loss": 0.268, "step": 70780 }, { "epoch": 2.0667143545349704, "grad_norm": 0.5451752094428652, "learning_rate": 1.7283049472830496e-05, "loss": 0.2415, "step": 70785 }, { "epoch": 2.0668603378052874, "grad_norm": 0.5813227640234335, "learning_rate": 1.7280346039470126e-05, "loss": 0.2588, "step": 70790 }, { "epoch": 2.067006321075605, "grad_norm": 0.567535021102739, "learning_rate": 1.727764260610976e-05, "loss": 0.2508, "step": 70795 }, { "epoch": 2.067152304345922, "grad_norm": 0.5561163819481147, "learning_rate": 1.7274939172749394e-05, "loss": 0.2683, "step": 70800 }, { "epoch": 2.0672982876162393, "grad_norm": 0.585303526638044, "learning_rate": 1.7272235739389025e-05, "loss": 0.2631, "step": 70805 }, { "epoch": 2.0674442708865564, "grad_norm": 0.5691216786472557, "learning_rate": 1.726953230602866e-05, "loss": 0.257, "step": 70810 }, { "epoch": 2.067590254156874, "grad_norm": 0.5489802541342436, "learning_rate": 1.726682887266829e-05, "loss": 0.2359, "step": 70815 }, { "epoch": 2.067736237427191, "grad_norm": 0.5352565761622011, "learning_rate": 1.726412543930792e-05, "loss": 0.2508, "step": 70820 }, { "epoch": 2.0678822206975083, "grad_norm": 0.6065718605287843, "learning_rate": 1.7261422005947554e-05, "loss": 0.2497, "step": 70825 }, { "epoch": 2.0680282039678253, "grad_norm": 0.5646132272990092, "learning_rate": 1.7258718572587188e-05, "loss": 0.2468, "step": 70830 }, { "epoch": 2.0681741872381423, "grad_norm": 0.586541925554334, "learning_rate": 1.725601513922682e-05, "loss": 0.273, "step": 70835 }, { "epoch": 2.0683201705084597, "grad_norm": 0.6024716764464719, "learning_rate": 1.725331170586645e-05, "loss": 0.2636, "step": 70840 }, { "epoch": 2.0684661537787767, "grad_norm": 0.5916932393698328, "learning_rate": 1.7250608272506084e-05, "loss": 0.2604, "step": 70845 }, { "epoch": 2.068612137049094, "grad_norm": 0.558482364174322, "learning_rate": 1.7247904839145714e-05, "loss": 0.2653, "step": 70850 }, { "epoch": 2.068758120319411, "grad_norm": 0.539619549146033, "learning_rate": 1.7245201405785348e-05, "loss": 0.2553, "step": 70855 }, { "epoch": 2.0689041035897286, "grad_norm": 0.5855209903578469, "learning_rate": 1.7242497972424982e-05, "loss": 0.2717, "step": 70860 }, { "epoch": 2.0690500868600457, "grad_norm": 0.5822902571191376, "learning_rate": 1.7239794539064613e-05, "loss": 0.258, "step": 70865 }, { "epoch": 2.069196070130363, "grad_norm": 0.5584759535711908, "learning_rate": 1.7237091105704243e-05, "loss": 0.2562, "step": 70870 }, { "epoch": 2.06934205340068, "grad_norm": 0.5939982495163201, "learning_rate": 1.7234387672343877e-05, "loss": 0.2424, "step": 70875 }, { "epoch": 2.0694880366709976, "grad_norm": 0.5955628401809072, "learning_rate": 1.7231684238983508e-05, "loss": 0.2464, "step": 70880 }, { "epoch": 2.0696340199413146, "grad_norm": 0.5779459237320155, "learning_rate": 1.7228980805623142e-05, "loss": 0.2618, "step": 70885 }, { "epoch": 2.069780003211632, "grad_norm": 0.5514543369549536, "learning_rate": 1.7226277372262776e-05, "loss": 0.2536, "step": 70890 }, { "epoch": 2.069925986481949, "grad_norm": 0.5239220116738851, "learning_rate": 1.7223573938902407e-05, "loss": 0.2297, "step": 70895 }, { "epoch": 2.0700719697522665, "grad_norm": 0.5436429920468437, "learning_rate": 1.7220870505542037e-05, "loss": 0.2521, "step": 70900 }, { "epoch": 2.0702179530225835, "grad_norm": 0.5417681729608476, "learning_rate": 1.721816707218167e-05, "loss": 0.2587, "step": 70905 }, { "epoch": 2.070363936292901, "grad_norm": 0.5333647307975758, "learning_rate": 1.7215463638821302e-05, "loss": 0.2588, "step": 70910 }, { "epoch": 2.070509919563218, "grad_norm": 0.5439461115727617, "learning_rate": 1.7212760205460936e-05, "loss": 0.2557, "step": 70915 }, { "epoch": 2.0706559028335354, "grad_norm": 0.5518277641705068, "learning_rate": 1.721005677210057e-05, "loss": 0.2418, "step": 70920 }, { "epoch": 2.0708018861038524, "grad_norm": 0.5763136244399294, "learning_rate": 1.72073533387402e-05, "loss": 0.2453, "step": 70925 }, { "epoch": 2.07094786937417, "grad_norm": 0.5953525867812228, "learning_rate": 1.720464990537983e-05, "loss": 0.2653, "step": 70930 }, { "epoch": 2.071093852644487, "grad_norm": 0.5355961438763618, "learning_rate": 1.7201946472019465e-05, "loss": 0.259, "step": 70935 }, { "epoch": 2.0712398359148043, "grad_norm": 0.5327168860801411, "learning_rate": 1.71992430386591e-05, "loss": 0.2527, "step": 70940 }, { "epoch": 2.0713858191851213, "grad_norm": 0.5707810335320549, "learning_rate": 1.719653960529873e-05, "loss": 0.2484, "step": 70945 }, { "epoch": 2.0715318024554388, "grad_norm": 0.6033131133321028, "learning_rate": 1.7193836171938364e-05, "loss": 0.2792, "step": 70950 }, { "epoch": 2.071677785725756, "grad_norm": 0.5489515646018206, "learning_rate": 1.7191132738577995e-05, "loss": 0.2494, "step": 70955 }, { "epoch": 2.0718237689960732, "grad_norm": 0.5378821128235531, "learning_rate": 1.7188429305217625e-05, "loss": 0.2315, "step": 70960 }, { "epoch": 2.0719697522663902, "grad_norm": 0.5440724915904165, "learning_rate": 1.718572587185726e-05, "loss": 0.2448, "step": 70965 }, { "epoch": 2.0721157355367077, "grad_norm": 0.5826098227586062, "learning_rate": 1.7183022438496893e-05, "loss": 0.2599, "step": 70970 }, { "epoch": 2.0722617188070247, "grad_norm": 0.5683438541899849, "learning_rate": 1.7180319005136524e-05, "loss": 0.257, "step": 70975 }, { "epoch": 2.072407702077342, "grad_norm": 0.587387706240196, "learning_rate": 1.7177615571776158e-05, "loss": 0.2583, "step": 70980 }, { "epoch": 2.072553685347659, "grad_norm": 0.5560140782828834, "learning_rate": 1.717491213841579e-05, "loss": 0.2567, "step": 70985 }, { "epoch": 2.072699668617976, "grad_norm": 0.624015563713432, "learning_rate": 1.717220870505542e-05, "loss": 0.2672, "step": 70990 }, { "epoch": 2.0728456518882936, "grad_norm": 0.5147473563909193, "learning_rate": 1.7169505271695053e-05, "loss": 0.2471, "step": 70995 }, { "epoch": 2.0729916351586106, "grad_norm": 0.49988858071772474, "learning_rate": 1.7166801838334687e-05, "loss": 0.2444, "step": 71000 }, { "epoch": 2.073137618428928, "grad_norm": 0.5587714299884416, "learning_rate": 1.7164098404974318e-05, "loss": 0.2494, "step": 71005 }, { "epoch": 2.073283601699245, "grad_norm": 0.5501178881751859, "learning_rate": 1.7161394971613952e-05, "loss": 0.2578, "step": 71010 }, { "epoch": 2.0734295849695625, "grad_norm": 0.5744639911899767, "learning_rate": 1.7158691538253582e-05, "loss": 0.2609, "step": 71015 }, { "epoch": 2.0735755682398795, "grad_norm": 0.5920712051078573, "learning_rate": 1.7155988104893213e-05, "loss": 0.2478, "step": 71020 }, { "epoch": 2.073721551510197, "grad_norm": 0.5419410910318372, "learning_rate": 1.715328467153285e-05, "loss": 0.2486, "step": 71025 }, { "epoch": 2.073867534780514, "grad_norm": 0.5586134365107888, "learning_rate": 1.715058123817248e-05, "loss": 0.2527, "step": 71030 }, { "epoch": 2.0740135180508315, "grad_norm": 0.5601528722137874, "learning_rate": 1.714787780481211e-05, "loss": 0.2563, "step": 71035 }, { "epoch": 2.0741595013211485, "grad_norm": 0.5329418508857102, "learning_rate": 1.7145174371451746e-05, "loss": 0.2508, "step": 71040 }, { "epoch": 2.074305484591466, "grad_norm": 0.6061109566054974, "learning_rate": 1.7142470938091376e-05, "loss": 0.2609, "step": 71045 }, { "epoch": 2.074451467861783, "grad_norm": 0.6180576883558854, "learning_rate": 1.7139767504731007e-05, "loss": 0.2548, "step": 71050 }, { "epoch": 2.0745974511321004, "grad_norm": 0.5415060447864772, "learning_rate": 1.7137064071370644e-05, "loss": 0.2463, "step": 71055 }, { "epoch": 2.0747434344024174, "grad_norm": 0.51909277528399, "learning_rate": 1.7134360638010275e-05, "loss": 0.2461, "step": 71060 }, { "epoch": 2.074889417672735, "grad_norm": 0.5942712417932243, "learning_rate": 1.7131657204649906e-05, "loss": 0.2469, "step": 71065 }, { "epoch": 2.075035400943052, "grad_norm": 0.5933636446361115, "learning_rate": 1.712895377128954e-05, "loss": 0.2551, "step": 71070 }, { "epoch": 2.0751813842133693, "grad_norm": 0.5572018819772794, "learning_rate": 1.712625033792917e-05, "loss": 0.2391, "step": 71075 }, { "epoch": 2.0753273674836863, "grad_norm": 0.5749160453151256, "learning_rate": 1.71235469045688e-05, "loss": 0.2415, "step": 71080 }, { "epoch": 2.0754733507540037, "grad_norm": 0.560240806407335, "learning_rate": 1.7120843471208435e-05, "loss": 0.2737, "step": 71085 }, { "epoch": 2.0756193340243208, "grad_norm": 0.5840909889863931, "learning_rate": 1.711814003784807e-05, "loss": 0.2573, "step": 71090 }, { "epoch": 2.075765317294638, "grad_norm": 0.5692738999447847, "learning_rate": 1.71154366044877e-05, "loss": 0.2608, "step": 71095 }, { "epoch": 2.075911300564955, "grad_norm": 0.5255705278410417, "learning_rate": 1.7112733171127333e-05, "loss": 0.2409, "step": 71100 }, { "epoch": 2.0760572838352727, "grad_norm": 0.5631547054300519, "learning_rate": 1.7110029737766964e-05, "loss": 0.2621, "step": 71105 }, { "epoch": 2.0762032671055897, "grad_norm": 0.5654769533477032, "learning_rate": 1.7107326304406598e-05, "loss": 0.2645, "step": 71110 }, { "epoch": 2.076349250375907, "grad_norm": 0.5587338285169074, "learning_rate": 1.710462287104623e-05, "loss": 0.2549, "step": 71115 }, { "epoch": 2.076495233646224, "grad_norm": 0.6067118728477578, "learning_rate": 1.7101919437685863e-05, "loss": 0.2627, "step": 71120 }, { "epoch": 2.0766412169165416, "grad_norm": 0.5531761077491142, "learning_rate": 1.7099216004325493e-05, "loss": 0.265, "step": 71125 }, { "epoch": 2.0767872001868586, "grad_norm": 0.5678481968901692, "learning_rate": 1.7096512570965127e-05, "loss": 0.2438, "step": 71130 }, { "epoch": 2.076933183457176, "grad_norm": 0.5226291481638318, "learning_rate": 1.7093809137604758e-05, "loss": 0.2497, "step": 71135 }, { "epoch": 2.077079166727493, "grad_norm": 0.5634095075316308, "learning_rate": 1.7091105704244392e-05, "loss": 0.2355, "step": 71140 }, { "epoch": 2.07722514999781, "grad_norm": 0.5639861388512293, "learning_rate": 1.7088402270884023e-05, "loss": 0.2434, "step": 71145 }, { "epoch": 2.0773711332681275, "grad_norm": 0.5392836457583097, "learning_rate": 1.7085698837523657e-05, "loss": 0.2642, "step": 71150 }, { "epoch": 2.0775171165384445, "grad_norm": 0.5318935837532945, "learning_rate": 1.7082995404163287e-05, "loss": 0.2508, "step": 71155 }, { "epoch": 2.077663099808762, "grad_norm": 0.5761685316803007, "learning_rate": 1.708029197080292e-05, "loss": 0.2499, "step": 71160 }, { "epoch": 2.077809083079079, "grad_norm": 0.584886455837565, "learning_rate": 1.7077588537442552e-05, "loss": 0.2437, "step": 71165 }, { "epoch": 2.0779550663493964, "grad_norm": 0.5972120395620686, "learning_rate": 1.7074885104082186e-05, "loss": 0.263, "step": 71170 }, { "epoch": 2.0781010496197134, "grad_norm": 0.5760392618342318, "learning_rate": 1.7072181670721816e-05, "loss": 0.2625, "step": 71175 }, { "epoch": 2.078247032890031, "grad_norm": 0.5929935406063507, "learning_rate": 1.706947823736145e-05, "loss": 0.248, "step": 71180 }, { "epoch": 2.078393016160348, "grad_norm": 0.600991931044892, "learning_rate": 1.706677480400108e-05, "loss": 0.2604, "step": 71185 }, { "epoch": 2.0785389994306653, "grad_norm": 0.5518479722830101, "learning_rate": 1.7064071370640715e-05, "loss": 0.2481, "step": 71190 }, { "epoch": 2.0786849827009823, "grad_norm": 0.5229390688547233, "learning_rate": 1.706136793728035e-05, "loss": 0.2413, "step": 71195 }, { "epoch": 2.0788309659713, "grad_norm": 0.5680220846439722, "learning_rate": 1.705866450391998e-05, "loss": 0.2727, "step": 71200 }, { "epoch": 2.078976949241617, "grad_norm": 0.5699302120165746, "learning_rate": 1.705596107055961e-05, "loss": 0.2577, "step": 71205 }, { "epoch": 2.0791229325119343, "grad_norm": 0.5347801511745273, "learning_rate": 1.7053257637199244e-05, "loss": 0.241, "step": 71210 }, { "epoch": 2.0792689157822513, "grad_norm": 0.5699561733904192, "learning_rate": 1.7050554203838875e-05, "loss": 0.256, "step": 71215 }, { "epoch": 2.0794148990525687, "grad_norm": 0.591825446682446, "learning_rate": 1.7047850770478506e-05, "loss": 0.2657, "step": 71220 }, { "epoch": 2.0795608823228857, "grad_norm": 0.5419536691263341, "learning_rate": 1.7045147337118143e-05, "loss": 0.2574, "step": 71225 }, { "epoch": 2.079706865593203, "grad_norm": 0.6187665640019501, "learning_rate": 1.7042443903757774e-05, "loss": 0.2474, "step": 71230 }, { "epoch": 2.07985284886352, "grad_norm": 0.5716349738360909, "learning_rate": 1.7039740470397404e-05, "loss": 0.2387, "step": 71235 }, { "epoch": 2.0799988321338376, "grad_norm": 0.5018921003873402, "learning_rate": 1.7037037037037038e-05, "loss": 0.2373, "step": 71240 }, { "epoch": 2.0801448154041546, "grad_norm": 0.5811489940994072, "learning_rate": 1.703433360367667e-05, "loss": 0.2496, "step": 71245 }, { "epoch": 2.080290798674472, "grad_norm": 0.597662515558682, "learning_rate": 1.70316301703163e-05, "loss": 0.2642, "step": 71250 }, { "epoch": 2.080436781944789, "grad_norm": 0.5649489398979173, "learning_rate": 1.7028926736955937e-05, "loss": 0.2494, "step": 71255 }, { "epoch": 2.0805827652151065, "grad_norm": 0.5528458521866972, "learning_rate": 1.7026223303595568e-05, "loss": 0.2515, "step": 71260 }, { "epoch": 2.0807287484854236, "grad_norm": 0.5534749626706228, "learning_rate": 1.7023519870235198e-05, "loss": 0.2472, "step": 71265 }, { "epoch": 2.080874731755741, "grad_norm": 0.57632218082142, "learning_rate": 1.7020816436874832e-05, "loss": 0.2532, "step": 71270 }, { "epoch": 2.081020715026058, "grad_norm": 0.5066755262079816, "learning_rate": 1.7018113003514463e-05, "loss": 0.2559, "step": 71275 }, { "epoch": 2.081166698296375, "grad_norm": 0.4975832190296274, "learning_rate": 1.7015409570154097e-05, "loss": 0.2471, "step": 71280 }, { "epoch": 2.0813126815666925, "grad_norm": 0.5705328701977496, "learning_rate": 1.701270613679373e-05, "loss": 0.2772, "step": 71285 }, { "epoch": 2.0814586648370095, "grad_norm": 0.5412197167363976, "learning_rate": 1.701000270343336e-05, "loss": 0.2262, "step": 71290 }, { "epoch": 2.081604648107327, "grad_norm": 0.5679741813051699, "learning_rate": 1.7007299270072992e-05, "loss": 0.264, "step": 71295 }, { "epoch": 2.081750631377644, "grad_norm": 0.518783984707804, "learning_rate": 1.7004595836712626e-05, "loss": 0.2543, "step": 71300 }, { "epoch": 2.0818966146479614, "grad_norm": 0.6082318182860291, "learning_rate": 1.7001892403352257e-05, "loss": 0.2499, "step": 71305 }, { "epoch": 2.0820425979182784, "grad_norm": 0.5353896479464064, "learning_rate": 1.699918896999189e-05, "loss": 0.2607, "step": 71310 }, { "epoch": 2.082188581188596, "grad_norm": 0.6049138415261632, "learning_rate": 1.6996485536631525e-05, "loss": 0.2663, "step": 71315 }, { "epoch": 2.082334564458913, "grad_norm": 0.5608382039601852, "learning_rate": 1.6993782103271155e-05, "loss": 0.2562, "step": 71320 }, { "epoch": 2.0824805477292303, "grad_norm": 0.5675232896849436, "learning_rate": 1.6991078669910786e-05, "loss": 0.2497, "step": 71325 }, { "epoch": 2.0826265309995473, "grad_norm": 0.5529833193954771, "learning_rate": 1.698837523655042e-05, "loss": 0.2561, "step": 71330 }, { "epoch": 2.0827725142698648, "grad_norm": 0.5699659209822301, "learning_rate": 1.698567180319005e-05, "loss": 0.2395, "step": 71335 }, { "epoch": 2.0829184975401818, "grad_norm": 0.5407799768715262, "learning_rate": 1.6982968369829685e-05, "loss": 0.2625, "step": 71340 }, { "epoch": 2.083064480810499, "grad_norm": 0.6142521774702949, "learning_rate": 1.698026493646932e-05, "loss": 0.2555, "step": 71345 }, { "epoch": 2.0832104640808162, "grad_norm": 0.5856135365477045, "learning_rate": 1.697756150310895e-05, "loss": 0.2759, "step": 71350 }, { "epoch": 2.0833564473511337, "grad_norm": 0.6116008124892752, "learning_rate": 1.697485806974858e-05, "loss": 0.2476, "step": 71355 }, { "epoch": 2.0835024306214507, "grad_norm": 0.5969684630092169, "learning_rate": 1.6972154636388214e-05, "loss": 0.2531, "step": 71360 }, { "epoch": 2.083648413891768, "grad_norm": 0.5411421175219208, "learning_rate": 1.6969451203027848e-05, "loss": 0.2678, "step": 71365 }, { "epoch": 2.083794397162085, "grad_norm": 0.5376766830714077, "learning_rate": 1.696674776966748e-05, "loss": 0.2427, "step": 71370 }, { "epoch": 2.0839403804324026, "grad_norm": 0.5389133724512506, "learning_rate": 1.6964044336307113e-05, "loss": 0.2473, "step": 71375 }, { "epoch": 2.0840863637027196, "grad_norm": 0.5335001798253667, "learning_rate": 1.6961340902946743e-05, "loss": 0.251, "step": 71380 }, { "epoch": 2.084232346973037, "grad_norm": 0.6038916658990758, "learning_rate": 1.6958637469586374e-05, "loss": 0.2614, "step": 71385 }, { "epoch": 2.084378330243354, "grad_norm": 0.5580380624022335, "learning_rate": 1.6955934036226008e-05, "loss": 0.2353, "step": 71390 }, { "epoch": 2.0845243135136715, "grad_norm": 0.602208888457543, "learning_rate": 1.6953230602865642e-05, "loss": 0.2542, "step": 71395 }, { "epoch": 2.0846702967839885, "grad_norm": 0.5811701143095936, "learning_rate": 1.6950527169505272e-05, "loss": 0.2626, "step": 71400 }, { "epoch": 2.084816280054306, "grad_norm": 0.6238111085834492, "learning_rate": 1.6947823736144906e-05, "loss": 0.2635, "step": 71405 }, { "epoch": 2.084962263324623, "grad_norm": 0.543129150663198, "learning_rate": 1.6945120302784537e-05, "loss": 0.2624, "step": 71410 }, { "epoch": 2.0851082465949404, "grad_norm": 0.5001491487283947, "learning_rate": 1.6942416869424168e-05, "loss": 0.2368, "step": 71415 }, { "epoch": 2.0852542298652574, "grad_norm": 0.515035742177758, "learning_rate": 1.6939713436063802e-05, "loss": 0.2468, "step": 71420 }, { "epoch": 2.085400213135575, "grad_norm": 0.5488014891246412, "learning_rate": 1.6937010002703436e-05, "loss": 0.2466, "step": 71425 }, { "epoch": 2.085546196405892, "grad_norm": 0.5391541569637638, "learning_rate": 1.6934306569343066e-05, "loss": 0.2513, "step": 71430 }, { "epoch": 2.085692179676209, "grad_norm": 0.5926256488910843, "learning_rate": 1.69316031359827e-05, "loss": 0.2548, "step": 71435 }, { "epoch": 2.0858381629465264, "grad_norm": 0.5702925328413301, "learning_rate": 1.692889970262233e-05, "loss": 0.2651, "step": 71440 }, { "epoch": 2.0859841462168434, "grad_norm": 0.594080851039648, "learning_rate": 1.692619626926196e-05, "loss": 0.2527, "step": 71445 }, { "epoch": 2.086130129487161, "grad_norm": 0.555199625645584, "learning_rate": 1.6923492835901596e-05, "loss": 0.258, "step": 71450 }, { "epoch": 2.086276112757478, "grad_norm": 0.5416847714432508, "learning_rate": 1.692078940254123e-05, "loss": 0.2339, "step": 71455 }, { "epoch": 2.0864220960277953, "grad_norm": 0.5581797096433553, "learning_rate": 1.691808596918086e-05, "loss": 0.2532, "step": 71460 }, { "epoch": 2.0865680792981123, "grad_norm": 0.5581519892767665, "learning_rate": 1.6915382535820494e-05, "loss": 0.2497, "step": 71465 }, { "epoch": 2.0867140625684297, "grad_norm": 0.5541925391014734, "learning_rate": 1.6912679102460125e-05, "loss": 0.2434, "step": 71470 }, { "epoch": 2.0868600458387467, "grad_norm": 0.6119745399937698, "learning_rate": 1.6909975669099756e-05, "loss": 0.256, "step": 71475 }, { "epoch": 2.087006029109064, "grad_norm": 0.5800320828650808, "learning_rate": 1.690727223573939e-05, "loss": 0.2302, "step": 71480 }, { "epoch": 2.087152012379381, "grad_norm": 0.5937401368993195, "learning_rate": 1.6904568802379024e-05, "loss": 0.2617, "step": 71485 }, { "epoch": 2.0872979956496986, "grad_norm": 0.5507891787144719, "learning_rate": 1.6901865369018654e-05, "loss": 0.265, "step": 71490 }, { "epoch": 2.0874439789200157, "grad_norm": 0.5587834689964855, "learning_rate": 1.6899161935658285e-05, "loss": 0.2503, "step": 71495 }, { "epoch": 2.087589962190333, "grad_norm": 0.539562586342224, "learning_rate": 1.689645850229792e-05, "loss": 0.2731, "step": 71500 }, { "epoch": 2.08773594546065, "grad_norm": 0.5647693193607621, "learning_rate": 1.689375506893755e-05, "loss": 0.2764, "step": 71505 }, { "epoch": 2.0878819287309676, "grad_norm": 0.5818804505862574, "learning_rate": 1.6891051635577183e-05, "loss": 0.2551, "step": 71510 }, { "epoch": 2.0880279120012846, "grad_norm": 0.5792246469807354, "learning_rate": 1.6888348202216817e-05, "loss": 0.2528, "step": 71515 }, { "epoch": 2.088173895271602, "grad_norm": 0.6174429560813414, "learning_rate": 1.6885644768856448e-05, "loss": 0.2501, "step": 71520 }, { "epoch": 2.088319878541919, "grad_norm": 0.5726658486659825, "learning_rate": 1.688294133549608e-05, "loss": 0.2639, "step": 71525 }, { "epoch": 2.0884658618122365, "grad_norm": 0.5673295567688912, "learning_rate": 1.6880237902135713e-05, "loss": 0.255, "step": 71530 }, { "epoch": 2.0886118450825535, "grad_norm": 0.5793359943710642, "learning_rate": 1.6877534468775347e-05, "loss": 0.2671, "step": 71535 }, { "epoch": 2.088757828352871, "grad_norm": 0.5552013877359515, "learning_rate": 1.6874831035414977e-05, "loss": 0.2505, "step": 71540 }, { "epoch": 2.088903811623188, "grad_norm": 0.5931368398365602, "learning_rate": 1.687212760205461e-05, "loss": 0.246, "step": 71545 }, { "epoch": 2.0890497948935054, "grad_norm": 0.5403027980065936, "learning_rate": 1.6869424168694242e-05, "loss": 0.2592, "step": 71550 }, { "epoch": 2.0891957781638224, "grad_norm": 0.5166095794042806, "learning_rate": 1.6866720735333873e-05, "loss": 0.2408, "step": 71555 }, { "epoch": 2.08934176143414, "grad_norm": 0.6158142222122883, "learning_rate": 1.6864017301973507e-05, "loss": 0.2556, "step": 71560 }, { "epoch": 2.089487744704457, "grad_norm": 0.5449311915174233, "learning_rate": 1.686131386861314e-05, "loss": 0.245, "step": 71565 }, { "epoch": 2.089633727974774, "grad_norm": 0.5810179572157982, "learning_rate": 1.685861043525277e-05, "loss": 0.2806, "step": 71570 }, { "epoch": 2.0897797112450913, "grad_norm": 0.5795883790536867, "learning_rate": 1.6855907001892405e-05, "loss": 0.257, "step": 71575 }, { "epoch": 2.0899256945154083, "grad_norm": 0.5385501472256964, "learning_rate": 1.6853203568532036e-05, "loss": 0.2435, "step": 71580 }, { "epoch": 2.090071677785726, "grad_norm": 0.6065590355158087, "learning_rate": 1.6850500135171667e-05, "loss": 0.2576, "step": 71585 }, { "epoch": 2.090217661056043, "grad_norm": 0.5349000575636979, "learning_rate": 1.6847796701811304e-05, "loss": 0.2506, "step": 71590 }, { "epoch": 2.0903636443263602, "grad_norm": 0.5704682038915776, "learning_rate": 1.6845093268450935e-05, "loss": 0.2422, "step": 71595 }, { "epoch": 2.0905096275966772, "grad_norm": 0.5649568128141397, "learning_rate": 1.6842389835090565e-05, "loss": 0.2285, "step": 71600 }, { "epoch": 2.0906556108669947, "grad_norm": 0.5702682337160412, "learning_rate": 1.68396864017302e-05, "loss": 0.2414, "step": 71605 }, { "epoch": 2.0908015941373117, "grad_norm": 0.5292329636185898, "learning_rate": 1.683698296836983e-05, "loss": 0.2515, "step": 71610 }, { "epoch": 2.090947577407629, "grad_norm": 0.6178493213917159, "learning_rate": 1.683427953500946e-05, "loss": 0.2731, "step": 71615 }, { "epoch": 2.091093560677946, "grad_norm": 0.5277368922168468, "learning_rate": 1.6831576101649098e-05, "loss": 0.2466, "step": 71620 }, { "epoch": 2.0912395439482636, "grad_norm": 0.5830986099725708, "learning_rate": 1.682887266828873e-05, "loss": 0.2633, "step": 71625 }, { "epoch": 2.0913855272185806, "grad_norm": 0.5575806579784801, "learning_rate": 1.682616923492836e-05, "loss": 0.2578, "step": 71630 }, { "epoch": 2.091531510488898, "grad_norm": 0.5776935389429763, "learning_rate": 1.6823465801567993e-05, "loss": 0.2643, "step": 71635 }, { "epoch": 2.091677493759215, "grad_norm": 0.5490495875513416, "learning_rate": 1.6820762368207624e-05, "loss": 0.2612, "step": 71640 }, { "epoch": 2.0918234770295325, "grad_norm": 0.5338371341802449, "learning_rate": 1.6818058934847254e-05, "loss": 0.2371, "step": 71645 }, { "epoch": 2.0919694602998495, "grad_norm": 0.5527136363730464, "learning_rate": 1.6815355501486892e-05, "loss": 0.2391, "step": 71650 }, { "epoch": 2.092115443570167, "grad_norm": 0.5241745421944625, "learning_rate": 1.6812652068126522e-05, "loss": 0.2627, "step": 71655 }, { "epoch": 2.092261426840484, "grad_norm": 0.5131321387196925, "learning_rate": 1.6809948634766153e-05, "loss": 0.25, "step": 71660 }, { "epoch": 2.0924074101108014, "grad_norm": 0.6380903097094979, "learning_rate": 1.6807245201405787e-05, "loss": 0.2677, "step": 71665 }, { "epoch": 2.0925533933811185, "grad_norm": 0.6106007082351275, "learning_rate": 1.6804541768045418e-05, "loss": 0.2502, "step": 71670 }, { "epoch": 2.092699376651436, "grad_norm": 0.5612018762749768, "learning_rate": 1.680183833468505e-05, "loss": 0.2514, "step": 71675 }, { "epoch": 2.092845359921753, "grad_norm": 0.533058457490875, "learning_rate": 1.6799134901324686e-05, "loss": 0.2462, "step": 71680 }, { "epoch": 2.0929913431920704, "grad_norm": 0.5881041311375189, "learning_rate": 1.6796431467964316e-05, "loss": 0.2457, "step": 71685 }, { "epoch": 2.0931373264623874, "grad_norm": 0.5772517684600912, "learning_rate": 1.6793728034603947e-05, "loss": 0.2496, "step": 71690 }, { "epoch": 2.093283309732705, "grad_norm": 0.6034049875952989, "learning_rate": 1.679102460124358e-05, "loss": 0.2492, "step": 71695 }, { "epoch": 2.093429293003022, "grad_norm": 0.567701526760132, "learning_rate": 1.678832116788321e-05, "loss": 0.2499, "step": 71700 }, { "epoch": 2.0935752762733393, "grad_norm": 0.5374112035276192, "learning_rate": 1.6785617734522846e-05, "loss": 0.2368, "step": 71705 }, { "epoch": 2.0937212595436563, "grad_norm": 0.6039143648509565, "learning_rate": 1.678291430116248e-05, "loss": 0.2573, "step": 71710 }, { "epoch": 2.0938672428139737, "grad_norm": 0.5634991249227037, "learning_rate": 1.678021086780211e-05, "loss": 0.241, "step": 71715 }, { "epoch": 2.0940132260842907, "grad_norm": 0.5464681519998723, "learning_rate": 1.677750743444174e-05, "loss": 0.2646, "step": 71720 }, { "epoch": 2.0941592093546078, "grad_norm": 0.5630370284908345, "learning_rate": 1.6774804001081375e-05, "loss": 0.2571, "step": 71725 }, { "epoch": 2.094305192624925, "grad_norm": 0.5659084692906498, "learning_rate": 1.6772100567721005e-05, "loss": 0.2557, "step": 71730 }, { "epoch": 2.094451175895242, "grad_norm": 0.5693158095251609, "learning_rate": 1.676939713436064e-05, "loss": 0.2578, "step": 71735 }, { "epoch": 2.0945971591655597, "grad_norm": 0.5683062349681667, "learning_rate": 1.676669370100027e-05, "loss": 0.2498, "step": 71740 }, { "epoch": 2.0947431424358767, "grad_norm": 0.5330847093281378, "learning_rate": 1.6763990267639904e-05, "loss": 0.2629, "step": 71745 }, { "epoch": 2.094889125706194, "grad_norm": 0.5572473826423898, "learning_rate": 1.6761286834279535e-05, "loss": 0.2485, "step": 71750 }, { "epoch": 2.095035108976511, "grad_norm": 0.5286985065975437, "learning_rate": 1.675858340091917e-05, "loss": 0.2559, "step": 71755 }, { "epoch": 2.0951810922468286, "grad_norm": 0.5638388669354643, "learning_rate": 1.6755879967558803e-05, "loss": 0.2716, "step": 71760 }, { "epoch": 2.0953270755171456, "grad_norm": 0.5834115957111758, "learning_rate": 1.6753176534198433e-05, "loss": 0.2584, "step": 71765 }, { "epoch": 2.095473058787463, "grad_norm": 0.5790528591592244, "learning_rate": 1.6750473100838064e-05, "loss": 0.251, "step": 71770 }, { "epoch": 2.09561904205778, "grad_norm": 0.6087911804754155, "learning_rate": 1.6747769667477698e-05, "loss": 0.27, "step": 71775 }, { "epoch": 2.0957650253280975, "grad_norm": 0.5829571002642009, "learning_rate": 1.674506623411733e-05, "loss": 0.2523, "step": 71780 }, { "epoch": 2.0959110085984145, "grad_norm": 0.5417977509186142, "learning_rate": 1.6742362800756963e-05, "loss": 0.249, "step": 71785 }, { "epoch": 2.096056991868732, "grad_norm": 0.5781609492869078, "learning_rate": 1.6739659367396597e-05, "loss": 0.2548, "step": 71790 }, { "epoch": 2.096202975139049, "grad_norm": 0.5851023690286425, "learning_rate": 1.6736955934036227e-05, "loss": 0.2503, "step": 71795 }, { "epoch": 2.0963489584093664, "grad_norm": 0.5440764633991657, "learning_rate": 1.6734252500675858e-05, "loss": 0.2597, "step": 71800 }, { "epoch": 2.0964949416796834, "grad_norm": 0.6137924618171853, "learning_rate": 1.6731549067315492e-05, "loss": 0.2549, "step": 71805 }, { "epoch": 2.096640924950001, "grad_norm": 0.571922511765216, "learning_rate": 1.6728845633955122e-05, "loss": 0.2507, "step": 71810 }, { "epoch": 2.096786908220318, "grad_norm": 0.5790064380604708, "learning_rate": 1.6726142200594756e-05, "loss": 0.2604, "step": 71815 }, { "epoch": 2.0969328914906353, "grad_norm": 0.5636169935632638, "learning_rate": 1.672343876723439e-05, "loss": 0.243, "step": 71820 }, { "epoch": 2.0970788747609523, "grad_norm": 0.526610062432887, "learning_rate": 1.672073533387402e-05, "loss": 0.2345, "step": 71825 }, { "epoch": 2.09722485803127, "grad_norm": 0.5585810904724317, "learning_rate": 1.6718031900513652e-05, "loss": 0.2571, "step": 71830 }, { "epoch": 2.097370841301587, "grad_norm": 0.5056185796748097, "learning_rate": 1.6715328467153286e-05, "loss": 0.2578, "step": 71835 }, { "epoch": 2.0975168245719042, "grad_norm": 0.5576771882692763, "learning_rate": 1.6712625033792916e-05, "loss": 0.2579, "step": 71840 }, { "epoch": 2.0976628078422213, "grad_norm": 0.5701952925056631, "learning_rate": 1.670992160043255e-05, "loss": 0.2587, "step": 71845 }, { "epoch": 2.0978087911125387, "grad_norm": 0.6165282708169527, "learning_rate": 1.6707218167072184e-05, "loss": 0.2565, "step": 71850 }, { "epoch": 2.0979547743828557, "grad_norm": 0.5776874960379748, "learning_rate": 1.6704514733711815e-05, "loss": 0.2485, "step": 71855 }, { "epoch": 2.0981007576531727, "grad_norm": 0.5443782452724729, "learning_rate": 1.6701811300351446e-05, "loss": 0.2486, "step": 71860 }, { "epoch": 2.09824674092349, "grad_norm": 0.5202043882902191, "learning_rate": 1.669910786699108e-05, "loss": 0.2591, "step": 71865 }, { "epoch": 2.098392724193807, "grad_norm": 0.5735882550085759, "learning_rate": 1.669640443363071e-05, "loss": 0.2545, "step": 71870 }, { "epoch": 2.0985387074641246, "grad_norm": 0.6406131048502761, "learning_rate": 1.6693701000270344e-05, "loss": 0.2531, "step": 71875 }, { "epoch": 2.0986846907344416, "grad_norm": 0.5412164993061805, "learning_rate": 1.6690997566909978e-05, "loss": 0.2545, "step": 71880 }, { "epoch": 2.098830674004759, "grad_norm": 0.5829901550247657, "learning_rate": 1.668829413354961e-05, "loss": 0.2445, "step": 71885 }, { "epoch": 2.098976657275076, "grad_norm": 0.6167292155376027, "learning_rate": 1.668559070018924e-05, "loss": 0.2667, "step": 71890 }, { "epoch": 2.0991226405453935, "grad_norm": 0.5884392559653392, "learning_rate": 1.6682887266828874e-05, "loss": 0.2583, "step": 71895 }, { "epoch": 2.0992686238157106, "grad_norm": 0.6156100822925097, "learning_rate": 1.6680183833468504e-05, "loss": 0.2618, "step": 71900 }, { "epoch": 2.099414607086028, "grad_norm": 0.5930983177884888, "learning_rate": 1.6677480400108138e-05, "loss": 0.2406, "step": 71905 }, { "epoch": 2.099560590356345, "grad_norm": 0.5725425588718657, "learning_rate": 1.6674776966747772e-05, "loss": 0.2561, "step": 71910 }, { "epoch": 2.0997065736266625, "grad_norm": 0.5481853264331736, "learning_rate": 1.6672073533387403e-05, "loss": 0.2454, "step": 71915 }, { "epoch": 2.0998525568969795, "grad_norm": 0.5811954138249877, "learning_rate": 1.6669370100027033e-05, "loss": 0.2708, "step": 71920 }, { "epoch": 2.099998540167297, "grad_norm": 0.5283660493986141, "learning_rate": 1.6666666666666667e-05, "loss": 0.2424, "step": 71925 }, { "epoch": 2.100144523437614, "grad_norm": 0.5405130122188391, "learning_rate": 1.66639632333063e-05, "loss": 0.2445, "step": 71930 }, { "epoch": 2.1002905067079314, "grad_norm": 0.5356709822925482, "learning_rate": 1.6661259799945932e-05, "loss": 0.2387, "step": 71935 }, { "epoch": 2.1004364899782484, "grad_norm": 0.6106900142819467, "learning_rate": 1.6658556366585566e-05, "loss": 0.271, "step": 71940 }, { "epoch": 2.100582473248566, "grad_norm": 0.6060074163859848, "learning_rate": 1.6655852933225197e-05, "loss": 0.2826, "step": 71945 }, { "epoch": 2.100728456518883, "grad_norm": 0.5550393489065613, "learning_rate": 1.6653149499864827e-05, "loss": 0.2654, "step": 71950 }, { "epoch": 2.1008744397892003, "grad_norm": 0.5224390827387526, "learning_rate": 1.665044606650446e-05, "loss": 0.257, "step": 71955 }, { "epoch": 2.1010204230595173, "grad_norm": 0.522351211860701, "learning_rate": 1.6647742633144095e-05, "loss": 0.2406, "step": 71960 }, { "epoch": 2.1011664063298348, "grad_norm": 0.5356124377248477, "learning_rate": 1.6645039199783726e-05, "loss": 0.2381, "step": 71965 }, { "epoch": 2.1013123896001518, "grad_norm": 0.6056950973926406, "learning_rate": 1.664233576642336e-05, "loss": 0.2594, "step": 71970 }, { "epoch": 2.101458372870469, "grad_norm": 0.5635806398331907, "learning_rate": 1.663963233306299e-05, "loss": 0.2575, "step": 71975 }, { "epoch": 2.1016043561407862, "grad_norm": 0.5558899921019201, "learning_rate": 1.663692889970262e-05, "loss": 0.2554, "step": 71980 }, { "epoch": 2.1017503394111037, "grad_norm": 0.5969772049384379, "learning_rate": 1.6634225466342255e-05, "loss": 0.2669, "step": 71985 }, { "epoch": 2.1018963226814207, "grad_norm": 0.576900138994939, "learning_rate": 1.663152203298189e-05, "loss": 0.2429, "step": 71990 }, { "epoch": 2.102042305951738, "grad_norm": 0.5223187233278446, "learning_rate": 1.662881859962152e-05, "loss": 0.2565, "step": 71995 }, { "epoch": 2.102188289222055, "grad_norm": 0.5216524034093477, "learning_rate": 1.6626115166261154e-05, "loss": 0.2308, "step": 72000 }, { "epoch": 2.1023342724923726, "grad_norm": 0.5648679565709973, "learning_rate": 1.6623411732900785e-05, "loss": 0.2403, "step": 72005 }, { "epoch": 2.1024802557626896, "grad_norm": 0.5241625532273876, "learning_rate": 1.6620708299540415e-05, "loss": 0.2371, "step": 72010 }, { "epoch": 2.1026262390330066, "grad_norm": 0.5685467577978617, "learning_rate": 1.661800486618005e-05, "loss": 0.2659, "step": 72015 }, { "epoch": 2.102772222303324, "grad_norm": 0.6103119236982341, "learning_rate": 1.6615301432819683e-05, "loss": 0.2583, "step": 72020 }, { "epoch": 2.102918205573641, "grad_norm": 0.5884577337001772, "learning_rate": 1.6612597999459314e-05, "loss": 0.2368, "step": 72025 }, { "epoch": 2.1030641888439585, "grad_norm": 0.52949200902247, "learning_rate": 1.6609894566098948e-05, "loss": 0.248, "step": 72030 }, { "epoch": 2.1032101721142755, "grad_norm": 0.5839439943560166, "learning_rate": 1.660719113273858e-05, "loss": 0.243, "step": 72035 }, { "epoch": 2.103356155384593, "grad_norm": 0.5154102357355236, "learning_rate": 1.660448769937821e-05, "loss": 0.2434, "step": 72040 }, { "epoch": 2.10350213865491, "grad_norm": 0.5434459208910095, "learning_rate": 1.6601784266017843e-05, "loss": 0.237, "step": 72045 }, { "epoch": 2.1036481219252274, "grad_norm": 0.5547386220488465, "learning_rate": 1.6599080832657477e-05, "loss": 0.2539, "step": 72050 }, { "epoch": 2.1037941051955444, "grad_norm": 0.5533270037274997, "learning_rate": 1.6596377399297108e-05, "loss": 0.2412, "step": 72055 }, { "epoch": 2.103940088465862, "grad_norm": 0.5695268353760358, "learning_rate": 1.6593673965936742e-05, "loss": 0.2482, "step": 72060 }, { "epoch": 2.104086071736179, "grad_norm": 0.5308720511513796, "learning_rate": 1.6590970532576372e-05, "loss": 0.2385, "step": 72065 }, { "epoch": 2.1042320550064963, "grad_norm": 0.5764479004178361, "learning_rate": 1.6588267099216003e-05, "loss": 0.2649, "step": 72070 }, { "epoch": 2.1043780382768134, "grad_norm": 0.5832864933180641, "learning_rate": 1.6585563665855637e-05, "loss": 0.2538, "step": 72075 }, { "epoch": 2.104524021547131, "grad_norm": 0.5793003786589889, "learning_rate": 1.658286023249527e-05, "loss": 0.2532, "step": 72080 }, { "epoch": 2.104670004817448, "grad_norm": 0.5654036382360994, "learning_rate": 1.65801567991349e-05, "loss": 0.2486, "step": 72085 }, { "epoch": 2.1048159880877653, "grad_norm": 0.5859336776866086, "learning_rate": 1.6577453365774536e-05, "loss": 0.2546, "step": 72090 }, { "epoch": 2.1049619713580823, "grad_norm": 0.583572715724921, "learning_rate": 1.6574749932414166e-05, "loss": 0.2468, "step": 72095 }, { "epoch": 2.1051079546283997, "grad_norm": 0.5323541176340454, "learning_rate": 1.65720464990538e-05, "loss": 0.2518, "step": 72100 }, { "epoch": 2.1052539378987167, "grad_norm": 0.5619612713850817, "learning_rate": 1.656934306569343e-05, "loss": 0.2502, "step": 72105 }, { "epoch": 2.105399921169034, "grad_norm": 0.5354290915179266, "learning_rate": 1.6566639632333065e-05, "loss": 0.242, "step": 72110 }, { "epoch": 2.105545904439351, "grad_norm": 0.5689794983421153, "learning_rate": 1.6563936198972696e-05, "loss": 0.2717, "step": 72115 }, { "epoch": 2.1056918877096686, "grad_norm": 0.6169087072070191, "learning_rate": 1.6561232765612326e-05, "loss": 0.2491, "step": 72120 }, { "epoch": 2.1058378709799856, "grad_norm": 0.6315059937746798, "learning_rate": 1.655852933225196e-05, "loss": 0.2677, "step": 72125 }, { "epoch": 2.105983854250303, "grad_norm": 0.5991160457235585, "learning_rate": 1.6555825898891594e-05, "loss": 0.2633, "step": 72130 }, { "epoch": 2.10612983752062, "grad_norm": 0.5408868863872623, "learning_rate": 1.6553122465531225e-05, "loss": 0.2372, "step": 72135 }, { "epoch": 2.1062758207909376, "grad_norm": 0.5264821394110709, "learning_rate": 1.655041903217086e-05, "loss": 0.2386, "step": 72140 }, { "epoch": 2.1064218040612546, "grad_norm": 0.5406783849453767, "learning_rate": 1.654771559881049e-05, "loss": 0.2472, "step": 72145 }, { "epoch": 2.1065677873315716, "grad_norm": 0.6119868359902695, "learning_rate": 1.654501216545012e-05, "loss": 0.2725, "step": 72150 }, { "epoch": 2.106713770601889, "grad_norm": 0.564784164071691, "learning_rate": 1.6542308732089754e-05, "loss": 0.2454, "step": 72155 }, { "epoch": 2.106859753872206, "grad_norm": 0.5407705399050379, "learning_rate": 1.6539605298729388e-05, "loss": 0.2551, "step": 72160 }, { "epoch": 2.1070057371425235, "grad_norm": 0.5976080495611997, "learning_rate": 1.653690186536902e-05, "loss": 0.2668, "step": 72165 }, { "epoch": 2.1071517204128405, "grad_norm": 0.5973809845011672, "learning_rate": 1.6534198432008653e-05, "loss": 0.258, "step": 72170 }, { "epoch": 2.107297703683158, "grad_norm": 0.49095689308538626, "learning_rate": 1.6531494998648283e-05, "loss": 0.234, "step": 72175 }, { "epoch": 2.107443686953475, "grad_norm": 0.5976314188739247, "learning_rate": 1.6528791565287914e-05, "loss": 0.2634, "step": 72180 }, { "epoch": 2.1075896702237924, "grad_norm": 0.608127183528186, "learning_rate": 1.652608813192755e-05, "loss": 0.2482, "step": 72185 }, { "epoch": 2.1077356534941094, "grad_norm": 0.5708171688126225, "learning_rate": 1.6523384698567182e-05, "loss": 0.255, "step": 72190 }, { "epoch": 2.107881636764427, "grad_norm": 0.5354889974816195, "learning_rate": 1.6520681265206813e-05, "loss": 0.2642, "step": 72195 }, { "epoch": 2.108027620034744, "grad_norm": 0.5367453171009611, "learning_rate": 1.6517977831846447e-05, "loss": 0.2624, "step": 72200 }, { "epoch": 2.1081736033050613, "grad_norm": 0.532892522267002, "learning_rate": 1.6515274398486077e-05, "loss": 0.2583, "step": 72205 }, { "epoch": 2.1083195865753783, "grad_norm": 0.5886511129443716, "learning_rate": 1.6512570965125708e-05, "loss": 0.2523, "step": 72210 }, { "epoch": 2.1084655698456958, "grad_norm": 0.559312276403286, "learning_rate": 1.6509867531765345e-05, "loss": 0.259, "step": 72215 }, { "epoch": 2.108611553116013, "grad_norm": 0.6030865979600499, "learning_rate": 1.6507164098404976e-05, "loss": 0.2602, "step": 72220 }, { "epoch": 2.1087575363863302, "grad_norm": 0.5759256534079878, "learning_rate": 1.6504460665044607e-05, "loss": 0.265, "step": 72225 }, { "epoch": 2.1089035196566472, "grad_norm": 0.5498261575365926, "learning_rate": 1.650175723168424e-05, "loss": 0.2586, "step": 72230 }, { "epoch": 2.1090495029269647, "grad_norm": 0.5633976984025355, "learning_rate": 1.649905379832387e-05, "loss": 0.254, "step": 72235 }, { "epoch": 2.1091954861972817, "grad_norm": 0.5767939235991307, "learning_rate": 1.6496350364963502e-05, "loss": 0.2551, "step": 72240 }, { "epoch": 2.109341469467599, "grad_norm": 0.5871128279490125, "learning_rate": 1.649364693160314e-05, "loss": 0.2471, "step": 72245 }, { "epoch": 2.109487452737916, "grad_norm": 0.5453726480240834, "learning_rate": 1.649094349824277e-05, "loss": 0.2521, "step": 72250 }, { "epoch": 2.1096334360082336, "grad_norm": 0.5897331915585151, "learning_rate": 1.64882400648824e-05, "loss": 0.2664, "step": 72255 }, { "epoch": 2.1097794192785506, "grad_norm": 0.5732140459221674, "learning_rate": 1.6485536631522034e-05, "loss": 0.2498, "step": 72260 }, { "epoch": 2.109925402548868, "grad_norm": 0.5805291124581093, "learning_rate": 1.6482833198161665e-05, "loss": 0.2586, "step": 72265 }, { "epoch": 2.110071385819185, "grad_norm": 0.5834554476969627, "learning_rate": 1.64801297648013e-05, "loss": 0.2505, "step": 72270 }, { "epoch": 2.1102173690895025, "grad_norm": 0.527656421498555, "learning_rate": 1.6477426331440933e-05, "loss": 0.2482, "step": 72275 }, { "epoch": 2.1103633523598195, "grad_norm": 0.5735243779720567, "learning_rate": 1.6474722898080564e-05, "loss": 0.2547, "step": 72280 }, { "epoch": 2.110509335630137, "grad_norm": 0.5306334016406395, "learning_rate": 1.6472019464720194e-05, "loss": 0.2521, "step": 72285 }, { "epoch": 2.110655318900454, "grad_norm": 0.5919783169961041, "learning_rate": 1.646931603135983e-05, "loss": 0.258, "step": 72290 }, { "epoch": 2.1108013021707714, "grad_norm": 0.5349173232591835, "learning_rate": 1.646661259799946e-05, "loss": 0.2566, "step": 72295 }, { "epoch": 2.1109472854410885, "grad_norm": 1.1398276982843496, "learning_rate": 1.6463909164639093e-05, "loss": 0.2564, "step": 72300 }, { "epoch": 2.1110932687114055, "grad_norm": 0.5542324238322937, "learning_rate": 1.6461205731278727e-05, "loss": 0.2616, "step": 72305 }, { "epoch": 2.111239251981723, "grad_norm": 0.5348896519181467, "learning_rate": 1.6458502297918358e-05, "loss": 0.2446, "step": 72310 }, { "epoch": 2.11138523525204, "grad_norm": 0.6068500508260377, "learning_rate": 1.6455798864557988e-05, "loss": 0.2752, "step": 72315 }, { "epoch": 2.1115312185223574, "grad_norm": 0.5055233290517583, "learning_rate": 1.6453095431197622e-05, "loss": 0.2515, "step": 72320 }, { "epoch": 2.1116772017926744, "grad_norm": 0.5585834222409658, "learning_rate": 1.6450391997837253e-05, "loss": 0.2492, "step": 72325 }, { "epoch": 2.111823185062992, "grad_norm": 0.5563384637675671, "learning_rate": 1.6447688564476887e-05, "loss": 0.2698, "step": 72330 }, { "epoch": 2.111969168333309, "grad_norm": 0.6062452609143824, "learning_rate": 1.644498513111652e-05, "loss": 0.2684, "step": 72335 }, { "epoch": 2.1121151516036263, "grad_norm": 0.5818095773774744, "learning_rate": 1.644228169775615e-05, "loss": 0.2483, "step": 72340 }, { "epoch": 2.1122611348739433, "grad_norm": 0.6205611782717818, "learning_rate": 1.6439578264395782e-05, "loss": 0.2576, "step": 72345 }, { "epoch": 2.1124071181442607, "grad_norm": 0.5752858897310448, "learning_rate": 1.6436874831035416e-05, "loss": 0.2535, "step": 72350 }, { "epoch": 2.1125531014145777, "grad_norm": 0.5230697799879532, "learning_rate": 1.643417139767505e-05, "loss": 0.242, "step": 72355 }, { "epoch": 2.112699084684895, "grad_norm": 0.5454264489156908, "learning_rate": 1.643146796431468e-05, "loss": 0.2635, "step": 72360 }, { "epoch": 2.112845067955212, "grad_norm": 0.5451659074636653, "learning_rate": 1.6428764530954315e-05, "loss": 0.2507, "step": 72365 }, { "epoch": 2.1129910512255297, "grad_norm": 0.5530573111463354, "learning_rate": 1.6426061097593945e-05, "loss": 0.2559, "step": 72370 }, { "epoch": 2.1131370344958467, "grad_norm": 0.5434755290180607, "learning_rate": 1.6423357664233576e-05, "loss": 0.2627, "step": 72375 }, { "epoch": 2.113283017766164, "grad_norm": 0.5758095745105175, "learning_rate": 1.642065423087321e-05, "loss": 0.25, "step": 72380 }, { "epoch": 2.113429001036481, "grad_norm": 0.5222669500518777, "learning_rate": 1.6417950797512844e-05, "loss": 0.2516, "step": 72385 }, { "epoch": 2.1135749843067986, "grad_norm": 0.5996687151067384, "learning_rate": 1.6415247364152475e-05, "loss": 0.2582, "step": 72390 }, { "epoch": 2.1137209675771156, "grad_norm": 0.571605187009065, "learning_rate": 1.6412543930792105e-05, "loss": 0.2589, "step": 72395 }, { "epoch": 2.113866950847433, "grad_norm": 0.5414697157405881, "learning_rate": 1.640984049743174e-05, "loss": 0.2519, "step": 72400 }, { "epoch": 2.11401293411775, "grad_norm": 0.5595595331898994, "learning_rate": 1.640713706407137e-05, "loss": 0.2468, "step": 72405 }, { "epoch": 2.1141589173880675, "grad_norm": 0.5953292986002853, "learning_rate": 1.6404433630711004e-05, "loss": 0.2533, "step": 72410 }, { "epoch": 2.1143049006583845, "grad_norm": 0.538059762075928, "learning_rate": 1.6401730197350638e-05, "loss": 0.2535, "step": 72415 }, { "epoch": 2.114450883928702, "grad_norm": 0.5599567938634672, "learning_rate": 1.639902676399027e-05, "loss": 0.2691, "step": 72420 }, { "epoch": 2.114596867199019, "grad_norm": 0.5600530910099805, "learning_rate": 1.63963233306299e-05, "loss": 0.2469, "step": 72425 }, { "epoch": 2.1147428504693364, "grad_norm": 0.5486286808378577, "learning_rate": 1.6393619897269533e-05, "loss": 0.2635, "step": 72430 }, { "epoch": 2.1148888337396534, "grad_norm": 0.587189596190544, "learning_rate": 1.6390916463909164e-05, "loss": 0.25, "step": 72435 }, { "epoch": 2.115034817009971, "grad_norm": 0.586457265839623, "learning_rate": 1.6388213030548798e-05, "loss": 0.2705, "step": 72440 }, { "epoch": 2.115180800280288, "grad_norm": 0.5804268488757882, "learning_rate": 1.6385509597188432e-05, "loss": 0.2549, "step": 72445 }, { "epoch": 2.1153267835506053, "grad_norm": 0.5252092298493773, "learning_rate": 1.6382806163828062e-05, "loss": 0.2555, "step": 72450 }, { "epoch": 2.1154727668209223, "grad_norm": 0.5922888725167809, "learning_rate": 1.6380102730467693e-05, "loss": 0.2611, "step": 72455 }, { "epoch": 2.1156187500912393, "grad_norm": 0.5539352383540216, "learning_rate": 1.6377399297107327e-05, "loss": 0.2392, "step": 72460 }, { "epoch": 2.115764733361557, "grad_norm": 0.5305538913607242, "learning_rate": 1.6374695863746958e-05, "loss": 0.2529, "step": 72465 }, { "epoch": 2.115910716631874, "grad_norm": 0.5627775063981691, "learning_rate": 1.6371992430386592e-05, "loss": 0.2494, "step": 72470 }, { "epoch": 2.1160566999021913, "grad_norm": 0.5696811174465892, "learning_rate": 1.6369288997026226e-05, "loss": 0.2475, "step": 72475 }, { "epoch": 2.1162026831725083, "grad_norm": 0.5780740093314319, "learning_rate": 1.6366585563665856e-05, "loss": 0.2674, "step": 72480 }, { "epoch": 2.1163486664428257, "grad_norm": 0.5241282267451772, "learning_rate": 1.6363882130305487e-05, "loss": 0.2447, "step": 72485 }, { "epoch": 2.1164946497131427, "grad_norm": 0.573771978471943, "learning_rate": 1.636117869694512e-05, "loss": 0.2482, "step": 72490 }, { "epoch": 2.11664063298346, "grad_norm": 0.662258903952711, "learning_rate": 1.635847526358475e-05, "loss": 0.2638, "step": 72495 }, { "epoch": 2.116786616253777, "grad_norm": 0.5838263728178872, "learning_rate": 1.6355771830224386e-05, "loss": 0.2592, "step": 72500 }, { "epoch": 2.1169325995240946, "grad_norm": 0.5943815281259381, "learning_rate": 1.635306839686402e-05, "loss": 0.2591, "step": 72505 }, { "epoch": 2.1170785827944116, "grad_norm": 0.5556442989264008, "learning_rate": 1.635036496350365e-05, "loss": 0.2495, "step": 72510 }, { "epoch": 2.117224566064729, "grad_norm": 0.5983303227712766, "learning_rate": 1.634766153014328e-05, "loss": 0.2639, "step": 72515 }, { "epoch": 2.117370549335046, "grad_norm": 0.5852498946557161, "learning_rate": 1.6344958096782915e-05, "loss": 0.2595, "step": 72520 }, { "epoch": 2.1175165326053635, "grad_norm": 0.5371062855861549, "learning_rate": 1.634225466342255e-05, "loss": 0.2581, "step": 72525 }, { "epoch": 2.1176625158756806, "grad_norm": 0.532897798345835, "learning_rate": 1.633955123006218e-05, "loss": 0.2324, "step": 72530 }, { "epoch": 2.117808499145998, "grad_norm": 0.5479016293389541, "learning_rate": 1.6336847796701814e-05, "loss": 0.2613, "step": 72535 }, { "epoch": 2.117954482416315, "grad_norm": 0.6266224689804765, "learning_rate": 1.6334144363341444e-05, "loss": 0.2564, "step": 72540 }, { "epoch": 2.1181004656866325, "grad_norm": 0.6073831708654566, "learning_rate": 1.6331440929981075e-05, "loss": 0.2449, "step": 72545 }, { "epoch": 2.1182464489569495, "grad_norm": 0.5506525841995067, "learning_rate": 1.632873749662071e-05, "loss": 0.2681, "step": 72550 }, { "epoch": 2.118392432227267, "grad_norm": 0.5611679141690215, "learning_rate": 1.6326034063260343e-05, "loss": 0.2361, "step": 72555 }, { "epoch": 2.118538415497584, "grad_norm": 0.5603305014675376, "learning_rate": 1.6323330629899973e-05, "loss": 0.2726, "step": 72560 }, { "epoch": 2.1186843987679014, "grad_norm": 0.5994662319160272, "learning_rate": 1.6320627196539607e-05, "loss": 0.2426, "step": 72565 }, { "epoch": 2.1188303820382184, "grad_norm": 0.625308927574196, "learning_rate": 1.6317923763179238e-05, "loss": 0.2624, "step": 72570 }, { "epoch": 2.118976365308536, "grad_norm": 0.5629060494937728, "learning_rate": 1.631522032981887e-05, "loss": 0.2591, "step": 72575 }, { "epoch": 2.119122348578853, "grad_norm": 0.6085553670948167, "learning_rate": 1.6312516896458503e-05, "loss": 0.2653, "step": 72580 }, { "epoch": 2.1192683318491703, "grad_norm": 0.5417107739154473, "learning_rate": 1.6309813463098137e-05, "loss": 0.2484, "step": 72585 }, { "epoch": 2.1194143151194873, "grad_norm": 0.5612705595004254, "learning_rate": 1.6307110029737767e-05, "loss": 0.2686, "step": 72590 }, { "epoch": 2.1195602983898043, "grad_norm": 0.5421843584967184, "learning_rate": 1.63044065963774e-05, "loss": 0.2507, "step": 72595 }, { "epoch": 2.1197062816601218, "grad_norm": 0.5470813460459919, "learning_rate": 1.6301703163017032e-05, "loss": 0.2648, "step": 72600 }, { "epoch": 2.1198522649304388, "grad_norm": 0.599512448557678, "learning_rate": 1.6298999729656663e-05, "loss": 0.2561, "step": 72605 }, { "epoch": 2.119998248200756, "grad_norm": 0.5380307608899593, "learning_rate": 1.62962962962963e-05, "loss": 0.256, "step": 72610 }, { "epoch": 2.1201442314710732, "grad_norm": 0.5590077521683853, "learning_rate": 1.629359286293593e-05, "loss": 0.2683, "step": 72615 }, { "epoch": 2.1202902147413907, "grad_norm": 0.581866422034997, "learning_rate": 1.629088942957556e-05, "loss": 0.2669, "step": 72620 }, { "epoch": 2.1204361980117077, "grad_norm": 0.5293711546239965, "learning_rate": 1.6288185996215195e-05, "loss": 0.2651, "step": 72625 }, { "epoch": 2.120582181282025, "grad_norm": 0.5166922837970952, "learning_rate": 1.6285482562854826e-05, "loss": 0.2561, "step": 72630 }, { "epoch": 2.120728164552342, "grad_norm": 0.5669171360058369, "learning_rate": 1.6282779129494457e-05, "loss": 0.2579, "step": 72635 }, { "epoch": 2.1208741478226596, "grad_norm": 0.5940659849117221, "learning_rate": 1.6280075696134094e-05, "loss": 0.2582, "step": 72640 }, { "epoch": 2.1210201310929766, "grad_norm": 0.6003465967112577, "learning_rate": 1.6277372262773725e-05, "loss": 0.2517, "step": 72645 }, { "epoch": 2.121166114363294, "grad_norm": 0.5653032956999703, "learning_rate": 1.6274668829413355e-05, "loss": 0.2536, "step": 72650 }, { "epoch": 2.121312097633611, "grad_norm": 0.5812290976887468, "learning_rate": 1.627196539605299e-05, "loss": 0.281, "step": 72655 }, { "epoch": 2.1214580809039285, "grad_norm": 0.5233437101565034, "learning_rate": 1.626926196269262e-05, "loss": 0.2553, "step": 72660 }, { "epoch": 2.1216040641742455, "grad_norm": 0.6197420472440102, "learning_rate": 1.626655852933225e-05, "loss": 0.25, "step": 72665 }, { "epoch": 2.121750047444563, "grad_norm": 0.5239153811193121, "learning_rate": 1.6263855095971884e-05, "loss": 0.2497, "step": 72670 }, { "epoch": 2.12189603071488, "grad_norm": 0.556583521445729, "learning_rate": 1.626115166261152e-05, "loss": 0.2551, "step": 72675 }, { "epoch": 2.1220420139851974, "grad_norm": 0.5374298862175291, "learning_rate": 1.625844822925115e-05, "loss": 0.2433, "step": 72680 }, { "epoch": 2.1221879972555144, "grad_norm": 0.5449018137014788, "learning_rate": 1.6255744795890783e-05, "loss": 0.2653, "step": 72685 }, { "epoch": 2.122333980525832, "grad_norm": 0.5553142822384745, "learning_rate": 1.6253041362530414e-05, "loss": 0.2559, "step": 72690 }, { "epoch": 2.122479963796149, "grad_norm": 0.5336025733158566, "learning_rate": 1.6250337929170048e-05, "loss": 0.251, "step": 72695 }, { "epoch": 2.1226259470664663, "grad_norm": 0.5605089014704482, "learning_rate": 1.624763449580968e-05, "loss": 0.2671, "step": 72700 }, { "epoch": 2.1227719303367834, "grad_norm": 0.5892090286140041, "learning_rate": 1.6244931062449312e-05, "loss": 0.2541, "step": 72705 }, { "epoch": 2.122917913607101, "grad_norm": 0.5336212440036641, "learning_rate": 1.6242227629088943e-05, "loss": 0.265, "step": 72710 }, { "epoch": 2.123063896877418, "grad_norm": 0.5640464043002889, "learning_rate": 1.6239524195728577e-05, "loss": 0.2645, "step": 72715 }, { "epoch": 2.1232098801477353, "grad_norm": 0.5460942246019918, "learning_rate": 1.6236820762368208e-05, "loss": 0.2455, "step": 72720 }, { "epoch": 2.1233558634180523, "grad_norm": 0.5917973826392668, "learning_rate": 1.623411732900784e-05, "loss": 0.2643, "step": 72725 }, { "epoch": 2.1235018466883697, "grad_norm": 0.5933147613288329, "learning_rate": 1.6231413895647472e-05, "loss": 0.2594, "step": 72730 }, { "epoch": 2.1236478299586867, "grad_norm": 0.5872356399856169, "learning_rate": 1.6228710462287106e-05, "loss": 0.2635, "step": 72735 }, { "epoch": 2.123793813229004, "grad_norm": 0.5848931407646184, "learning_rate": 1.6226007028926737e-05, "loss": 0.2417, "step": 72740 }, { "epoch": 2.123939796499321, "grad_norm": 0.5883982057391911, "learning_rate": 1.622330359556637e-05, "loss": 0.2558, "step": 72745 }, { "epoch": 2.124085779769638, "grad_norm": 0.5597008063468377, "learning_rate": 1.6220600162206e-05, "loss": 0.2602, "step": 72750 }, { "epoch": 2.1242317630399556, "grad_norm": 0.5162589609470744, "learning_rate": 1.6217896728845636e-05, "loss": 0.2454, "step": 72755 }, { "epoch": 2.1243777463102727, "grad_norm": 0.6061329841987536, "learning_rate": 1.6215193295485266e-05, "loss": 0.2739, "step": 72760 }, { "epoch": 2.12452372958059, "grad_norm": 0.5765589128063259, "learning_rate": 1.62124898621249e-05, "loss": 0.25, "step": 72765 }, { "epoch": 2.124669712850907, "grad_norm": 0.5548579588771225, "learning_rate": 1.620978642876453e-05, "loss": 0.2657, "step": 72770 }, { "epoch": 2.1248156961212246, "grad_norm": 0.5035894065020945, "learning_rate": 1.620708299540416e-05, "loss": 0.2502, "step": 72775 }, { "epoch": 2.1249616793915416, "grad_norm": 0.553092119183637, "learning_rate": 1.62043795620438e-05, "loss": 0.2415, "step": 72780 }, { "epoch": 2.125107662661859, "grad_norm": 0.5510802123820895, "learning_rate": 1.620167612868343e-05, "loss": 0.2663, "step": 72785 }, { "epoch": 2.125253645932176, "grad_norm": 0.5718222676820857, "learning_rate": 1.619897269532306e-05, "loss": 0.2521, "step": 72790 }, { "epoch": 2.1253996292024935, "grad_norm": 0.559618561332335, "learning_rate": 1.6196269261962694e-05, "loss": 0.2509, "step": 72795 }, { "epoch": 2.1255456124728105, "grad_norm": 0.5776609773415776, "learning_rate": 1.6193565828602325e-05, "loss": 0.2487, "step": 72800 }, { "epoch": 2.125691595743128, "grad_norm": 0.5631542884249913, "learning_rate": 1.6190862395241955e-05, "loss": 0.2435, "step": 72805 }, { "epoch": 2.125837579013445, "grad_norm": 0.5602198635018011, "learning_rate": 1.6188158961881593e-05, "loss": 0.2497, "step": 72810 }, { "epoch": 2.1259835622837624, "grad_norm": 0.5788986026807915, "learning_rate": 1.6185455528521223e-05, "loss": 0.262, "step": 72815 }, { "epoch": 2.1261295455540794, "grad_norm": 0.5665430020170206, "learning_rate": 1.6182752095160854e-05, "loss": 0.2521, "step": 72820 }, { "epoch": 2.126275528824397, "grad_norm": 0.5920487837067598, "learning_rate": 1.6180048661800488e-05, "loss": 0.2615, "step": 72825 }, { "epoch": 2.126421512094714, "grad_norm": 0.5898512129866965, "learning_rate": 1.617734522844012e-05, "loss": 0.2478, "step": 72830 }, { "epoch": 2.1265674953650313, "grad_norm": 0.5718905673292388, "learning_rate": 1.617464179507975e-05, "loss": 0.2497, "step": 72835 }, { "epoch": 2.1267134786353483, "grad_norm": 0.5785496714699804, "learning_rate": 1.6171938361719387e-05, "loss": 0.2432, "step": 72840 }, { "epoch": 2.1268594619056658, "grad_norm": 0.5735999733335995, "learning_rate": 1.6169234928359017e-05, "loss": 0.2685, "step": 72845 }, { "epoch": 2.127005445175983, "grad_norm": 0.5337607129584383, "learning_rate": 1.6166531494998648e-05, "loss": 0.2562, "step": 72850 }, { "epoch": 2.1271514284463002, "grad_norm": 0.5838432867855049, "learning_rate": 1.6163828061638282e-05, "loss": 0.2463, "step": 72855 }, { "epoch": 2.1272974117166172, "grad_norm": 0.6272299862158803, "learning_rate": 1.6161124628277912e-05, "loss": 0.256, "step": 72860 }, { "epoch": 2.1274433949869347, "grad_norm": 0.5626882737373016, "learning_rate": 1.6158421194917547e-05, "loss": 0.26, "step": 72865 }, { "epoch": 2.1275893782572517, "grad_norm": 0.6212793319194593, "learning_rate": 1.615571776155718e-05, "loss": 0.2514, "step": 72870 }, { "epoch": 2.127735361527569, "grad_norm": 0.587276792211298, "learning_rate": 1.615301432819681e-05, "loss": 0.2616, "step": 72875 }, { "epoch": 2.127881344797886, "grad_norm": 0.6116156430143299, "learning_rate": 1.6150310894836442e-05, "loss": 0.267, "step": 72880 }, { "epoch": 2.128027328068203, "grad_norm": 0.628731422413699, "learning_rate": 1.6147607461476076e-05, "loss": 0.2583, "step": 72885 }, { "epoch": 2.1281733113385206, "grad_norm": 0.5371768537684606, "learning_rate": 1.6144904028115706e-05, "loss": 0.2518, "step": 72890 }, { "epoch": 2.1283192946088376, "grad_norm": 0.5478481535676635, "learning_rate": 1.614220059475534e-05, "loss": 0.2573, "step": 72895 }, { "epoch": 2.128465277879155, "grad_norm": 0.5852296732807515, "learning_rate": 1.6139497161394974e-05, "loss": 0.2479, "step": 72900 }, { "epoch": 2.128611261149472, "grad_norm": 0.6188609154899197, "learning_rate": 1.6136793728034605e-05, "loss": 0.2624, "step": 72905 }, { "epoch": 2.1287572444197895, "grad_norm": 0.6125159765595727, "learning_rate": 1.6134090294674236e-05, "loss": 0.272, "step": 72910 }, { "epoch": 2.1289032276901065, "grad_norm": 0.5299555527461417, "learning_rate": 1.613138686131387e-05, "loss": 0.2651, "step": 72915 }, { "epoch": 2.129049210960424, "grad_norm": 0.5763866817480674, "learning_rate": 1.61286834279535e-05, "loss": 0.2626, "step": 72920 }, { "epoch": 2.129195194230741, "grad_norm": 0.5914208510276141, "learning_rate": 1.6125979994593134e-05, "loss": 0.2683, "step": 72925 }, { "epoch": 2.1293411775010584, "grad_norm": 0.5395950791569692, "learning_rate": 1.612327656123277e-05, "loss": 0.2425, "step": 72930 }, { "epoch": 2.1294871607713755, "grad_norm": 0.5257239372524996, "learning_rate": 1.61205731278724e-05, "loss": 0.2606, "step": 72935 }, { "epoch": 2.129633144041693, "grad_norm": 0.5690953360358667, "learning_rate": 1.611786969451203e-05, "loss": 0.239, "step": 72940 }, { "epoch": 2.12977912731201, "grad_norm": 0.5857468977568517, "learning_rate": 1.6115166261151664e-05, "loss": 0.2571, "step": 72945 }, { "epoch": 2.1299251105823274, "grad_norm": 0.5472547410963043, "learning_rate": 1.6112462827791298e-05, "loss": 0.2452, "step": 72950 }, { "epoch": 2.1300710938526444, "grad_norm": 0.5691789106153278, "learning_rate": 1.6109759394430928e-05, "loss": 0.2479, "step": 72955 }, { "epoch": 2.130217077122962, "grad_norm": 0.6314108918377179, "learning_rate": 1.6107055961070562e-05, "loss": 0.2771, "step": 72960 }, { "epoch": 2.130363060393279, "grad_norm": 0.5717101314040647, "learning_rate": 1.6104352527710193e-05, "loss": 0.2491, "step": 72965 }, { "epoch": 2.1305090436635963, "grad_norm": 0.5735977230024542, "learning_rate": 1.6101649094349823e-05, "loss": 0.2499, "step": 72970 }, { "epoch": 2.1306550269339133, "grad_norm": 0.563794138391455, "learning_rate": 1.6098945660989457e-05, "loss": 0.2428, "step": 72975 }, { "epoch": 2.1308010102042307, "grad_norm": 0.6103001207219944, "learning_rate": 1.609624222762909e-05, "loss": 0.2602, "step": 72980 }, { "epoch": 2.1309469934745477, "grad_norm": 0.5683492726524794, "learning_rate": 1.6093538794268722e-05, "loss": 0.2575, "step": 72985 }, { "epoch": 2.131092976744865, "grad_norm": 0.5980052566970415, "learning_rate": 1.6090835360908356e-05, "loss": 0.2453, "step": 72990 }, { "epoch": 2.131238960015182, "grad_norm": 0.5744617948564359, "learning_rate": 1.6088131927547987e-05, "loss": 0.264, "step": 72995 }, { "epoch": 2.1313849432854997, "grad_norm": 0.580894468208248, "learning_rate": 1.6085428494187617e-05, "loss": 0.2437, "step": 73000 }, { "epoch": 2.1315309265558167, "grad_norm": 0.5984985220933055, "learning_rate": 1.608272506082725e-05, "loss": 0.2738, "step": 73005 }, { "epoch": 2.131676909826134, "grad_norm": 0.5645166516514923, "learning_rate": 1.6080021627466885e-05, "loss": 0.2527, "step": 73010 }, { "epoch": 2.131822893096451, "grad_norm": 0.5151809542757162, "learning_rate": 1.6077318194106516e-05, "loss": 0.2531, "step": 73015 }, { "epoch": 2.1319688763667686, "grad_norm": 0.5494756681035873, "learning_rate": 1.607461476074615e-05, "loss": 0.2595, "step": 73020 }, { "epoch": 2.1321148596370856, "grad_norm": 0.5968666685146007, "learning_rate": 1.607191132738578e-05, "loss": 0.2627, "step": 73025 }, { "epoch": 2.132260842907403, "grad_norm": 0.6607294092324406, "learning_rate": 1.606920789402541e-05, "loss": 0.2705, "step": 73030 }, { "epoch": 2.13240682617772, "grad_norm": 0.5583763564084611, "learning_rate": 1.6066504460665045e-05, "loss": 0.2728, "step": 73035 }, { "epoch": 2.132552809448037, "grad_norm": 0.6098586067735906, "learning_rate": 1.606380102730468e-05, "loss": 0.2534, "step": 73040 }, { "epoch": 2.1326987927183545, "grad_norm": 0.5277147364905657, "learning_rate": 1.606109759394431e-05, "loss": 0.2564, "step": 73045 }, { "epoch": 2.1328447759886715, "grad_norm": 0.5453964054724936, "learning_rate": 1.605839416058394e-05, "loss": 0.2562, "step": 73050 }, { "epoch": 2.132990759258989, "grad_norm": 0.6048391183386588, "learning_rate": 1.6055690727223575e-05, "loss": 0.248, "step": 73055 }, { "epoch": 2.133136742529306, "grad_norm": 0.5532290070589444, "learning_rate": 1.6052987293863205e-05, "loss": 0.2632, "step": 73060 }, { "epoch": 2.1332827257996234, "grad_norm": 0.5344300065617363, "learning_rate": 1.605028386050284e-05, "loss": 0.2513, "step": 73065 }, { "epoch": 2.1334287090699404, "grad_norm": 0.5403555600829675, "learning_rate": 1.6047580427142473e-05, "loss": 0.278, "step": 73070 }, { "epoch": 2.133574692340258, "grad_norm": 0.601721781567069, "learning_rate": 1.6044876993782104e-05, "loss": 0.2866, "step": 73075 }, { "epoch": 2.133720675610575, "grad_norm": 0.5743220301295983, "learning_rate": 1.6042173560421734e-05, "loss": 0.2592, "step": 73080 }, { "epoch": 2.1338666588808923, "grad_norm": 0.5874016193732269, "learning_rate": 1.603947012706137e-05, "loss": 0.2642, "step": 73085 }, { "epoch": 2.1340126421512093, "grad_norm": 0.5434697604885511, "learning_rate": 1.6036766693701e-05, "loss": 0.2448, "step": 73090 }, { "epoch": 2.134158625421527, "grad_norm": 0.5770992538627107, "learning_rate": 1.6034063260340633e-05, "loss": 0.247, "step": 73095 }, { "epoch": 2.134304608691844, "grad_norm": 0.5944915586762459, "learning_rate": 1.6031359826980267e-05, "loss": 0.2612, "step": 73100 }, { "epoch": 2.1344505919621612, "grad_norm": 0.5494893185882878, "learning_rate": 1.6028656393619898e-05, "loss": 0.2511, "step": 73105 }, { "epoch": 2.1345965752324783, "grad_norm": 0.5578046959267285, "learning_rate": 1.602595296025953e-05, "loss": 0.2686, "step": 73110 }, { "epoch": 2.1347425585027957, "grad_norm": 0.5777019122269107, "learning_rate": 1.6023249526899162e-05, "loss": 0.2554, "step": 73115 }, { "epoch": 2.1348885417731127, "grad_norm": 0.5710267264340025, "learning_rate": 1.6020546093538796e-05, "loss": 0.2639, "step": 73120 }, { "epoch": 2.13503452504343, "grad_norm": 0.5451648115061424, "learning_rate": 1.6017842660178427e-05, "loss": 0.2597, "step": 73125 }, { "epoch": 2.135180508313747, "grad_norm": 0.6080874114050451, "learning_rate": 1.601513922681806e-05, "loss": 0.2546, "step": 73130 }, { "epoch": 2.1353264915840646, "grad_norm": 0.5607220520815546, "learning_rate": 1.601243579345769e-05, "loss": 0.2648, "step": 73135 }, { "epoch": 2.1354724748543816, "grad_norm": 0.5130548416647593, "learning_rate": 1.6009732360097322e-05, "loss": 0.2498, "step": 73140 }, { "epoch": 2.135618458124699, "grad_norm": 0.5551707665783951, "learning_rate": 1.6007028926736956e-05, "loss": 0.2738, "step": 73145 }, { "epoch": 2.135764441395016, "grad_norm": 0.5345978268258063, "learning_rate": 1.600432549337659e-05, "loss": 0.2442, "step": 73150 }, { "epoch": 2.1359104246653335, "grad_norm": 0.5488387040215922, "learning_rate": 1.600162206001622e-05, "loss": 0.2525, "step": 73155 }, { "epoch": 2.1360564079356505, "grad_norm": 0.5338267017288162, "learning_rate": 1.5998918626655855e-05, "loss": 0.2601, "step": 73160 }, { "epoch": 2.136202391205968, "grad_norm": 0.5977902083628175, "learning_rate": 1.5996215193295486e-05, "loss": 0.2639, "step": 73165 }, { "epoch": 2.136348374476285, "grad_norm": 0.573257436798688, "learning_rate": 1.5993511759935116e-05, "loss": 0.2593, "step": 73170 }, { "epoch": 2.136494357746602, "grad_norm": 0.5693551134414865, "learning_rate": 1.599080832657475e-05, "loss": 0.2773, "step": 73175 }, { "epoch": 2.1366403410169195, "grad_norm": 0.5382978987003915, "learning_rate": 1.5988104893214384e-05, "loss": 0.2627, "step": 73180 }, { "epoch": 2.136786324287237, "grad_norm": 0.5679642405091255, "learning_rate": 1.5985401459854015e-05, "loss": 0.2522, "step": 73185 }, { "epoch": 2.136932307557554, "grad_norm": 0.5754771205968664, "learning_rate": 1.598269802649365e-05, "loss": 0.2492, "step": 73190 }, { "epoch": 2.137078290827871, "grad_norm": 0.5680099420986298, "learning_rate": 1.597999459313328e-05, "loss": 0.2477, "step": 73195 }, { "epoch": 2.1372242740981884, "grad_norm": 0.5461378841283266, "learning_rate": 1.597729115977291e-05, "loss": 0.248, "step": 73200 }, { "epoch": 2.1373702573685054, "grad_norm": 0.5477475882238736, "learning_rate": 1.5974587726412547e-05, "loss": 0.2438, "step": 73205 }, { "epoch": 2.137516240638823, "grad_norm": 0.5385085646473655, "learning_rate": 1.5971884293052178e-05, "loss": 0.2485, "step": 73210 }, { "epoch": 2.13766222390914, "grad_norm": 0.5950872457061409, "learning_rate": 1.596918085969181e-05, "loss": 0.2738, "step": 73215 }, { "epoch": 2.1378082071794573, "grad_norm": 0.5225845924402955, "learning_rate": 1.5966477426331443e-05, "loss": 0.2486, "step": 73220 }, { "epoch": 2.1379541904497743, "grad_norm": 0.6067592749624554, "learning_rate": 1.5963773992971073e-05, "loss": 0.2508, "step": 73225 }, { "epoch": 2.1381001737200918, "grad_norm": 0.5614741156496939, "learning_rate": 1.5961070559610704e-05, "loss": 0.2432, "step": 73230 }, { "epoch": 2.1382461569904088, "grad_norm": 0.5897535071155798, "learning_rate": 1.595836712625034e-05, "loss": 0.2638, "step": 73235 }, { "epoch": 2.138392140260726, "grad_norm": 0.5407030965683545, "learning_rate": 1.5955663692889972e-05, "loss": 0.2473, "step": 73240 }, { "epoch": 2.1385381235310432, "grad_norm": 0.5581327731940787, "learning_rate": 1.5952960259529603e-05, "loss": 0.2469, "step": 73245 }, { "epoch": 2.1386841068013607, "grad_norm": 0.581711310897681, "learning_rate": 1.5950256826169237e-05, "loss": 0.2368, "step": 73250 }, { "epoch": 2.1388300900716777, "grad_norm": 0.5968426776610917, "learning_rate": 1.5947553392808867e-05, "loss": 0.2661, "step": 73255 }, { "epoch": 2.138976073341995, "grad_norm": 0.5791021159918115, "learning_rate": 1.5944849959448498e-05, "loss": 0.2557, "step": 73260 }, { "epoch": 2.139122056612312, "grad_norm": 0.5567182352377791, "learning_rate": 1.5942146526088135e-05, "loss": 0.2396, "step": 73265 }, { "epoch": 2.1392680398826296, "grad_norm": 0.5790198330587744, "learning_rate": 1.5939443092727766e-05, "loss": 0.261, "step": 73270 }, { "epoch": 2.1394140231529466, "grad_norm": 0.5730055215827822, "learning_rate": 1.5936739659367397e-05, "loss": 0.2523, "step": 73275 }, { "epoch": 2.139560006423264, "grad_norm": 0.5904874014039382, "learning_rate": 1.593403622600703e-05, "loss": 0.2582, "step": 73280 }, { "epoch": 2.139705989693581, "grad_norm": 0.5705448952731491, "learning_rate": 1.593133279264666e-05, "loss": 0.2687, "step": 73285 }, { "epoch": 2.1398519729638985, "grad_norm": 0.6107072908142377, "learning_rate": 1.5928629359286295e-05, "loss": 0.2484, "step": 73290 }, { "epoch": 2.1399979562342155, "grad_norm": 0.5463424283534926, "learning_rate": 1.5925925925925926e-05, "loss": 0.2417, "step": 73295 }, { "epoch": 2.140143939504533, "grad_norm": 0.6176221306593893, "learning_rate": 1.592322249256556e-05, "loss": 0.246, "step": 73300 }, { "epoch": 2.14028992277485, "grad_norm": 0.5528728301210345, "learning_rate": 1.592051905920519e-05, "loss": 0.25, "step": 73305 }, { "epoch": 2.1404359060451674, "grad_norm": 0.5437866906107073, "learning_rate": 1.5917815625844824e-05, "loss": 0.2437, "step": 73310 }, { "epoch": 2.1405818893154844, "grad_norm": 0.5831482554151396, "learning_rate": 1.5915112192484455e-05, "loss": 0.2634, "step": 73315 }, { "epoch": 2.140727872585802, "grad_norm": 0.5837225500922879, "learning_rate": 1.591240875912409e-05, "loss": 0.2679, "step": 73320 }, { "epoch": 2.140873855856119, "grad_norm": 0.5342415189109716, "learning_rate": 1.590970532576372e-05, "loss": 0.2296, "step": 73325 }, { "epoch": 2.141019839126436, "grad_norm": 0.5648917715610138, "learning_rate": 1.5907001892403354e-05, "loss": 0.2583, "step": 73330 }, { "epoch": 2.1411658223967533, "grad_norm": 0.6016748711122892, "learning_rate": 1.5904298459042984e-05, "loss": 0.2562, "step": 73335 }, { "epoch": 2.1413118056670704, "grad_norm": 0.5785232138755491, "learning_rate": 1.590159502568262e-05, "loss": 0.2392, "step": 73340 }, { "epoch": 2.141457788937388, "grad_norm": 0.5328386031319353, "learning_rate": 1.589889159232225e-05, "loss": 0.2584, "step": 73345 }, { "epoch": 2.141603772207705, "grad_norm": 0.5795440301903974, "learning_rate": 1.5896188158961883e-05, "loss": 0.2421, "step": 73350 }, { "epoch": 2.1417497554780223, "grad_norm": 0.5940480809081765, "learning_rate": 1.5893484725601514e-05, "loss": 0.2668, "step": 73355 }, { "epoch": 2.1418957387483393, "grad_norm": 0.528721633058191, "learning_rate": 1.5890781292241148e-05, "loss": 0.239, "step": 73360 }, { "epoch": 2.1420417220186567, "grad_norm": 0.5595772915059971, "learning_rate": 1.5888077858880778e-05, "loss": 0.2568, "step": 73365 }, { "epoch": 2.1421877052889737, "grad_norm": 0.5996766195489276, "learning_rate": 1.5885374425520412e-05, "loss": 0.2677, "step": 73370 }, { "epoch": 2.142333688559291, "grad_norm": 0.5153327834625898, "learning_rate": 1.5882670992160046e-05, "loss": 0.2483, "step": 73375 }, { "epoch": 2.142479671829608, "grad_norm": 0.5210581438262938, "learning_rate": 1.5879967558799677e-05, "loss": 0.2459, "step": 73380 }, { "epoch": 2.1426256550999256, "grad_norm": 0.5614329393086823, "learning_rate": 1.5877264125439307e-05, "loss": 0.2611, "step": 73385 }, { "epoch": 2.1427716383702426, "grad_norm": 0.5320814698906, "learning_rate": 1.587456069207894e-05, "loss": 0.251, "step": 73390 }, { "epoch": 2.14291762164056, "grad_norm": 0.564500942974779, "learning_rate": 1.5871857258718572e-05, "loss": 0.2577, "step": 73395 }, { "epoch": 2.143063604910877, "grad_norm": 0.6228301504549283, "learning_rate": 1.5869153825358206e-05, "loss": 0.2555, "step": 73400 }, { "epoch": 2.1432095881811946, "grad_norm": 0.6093354001625332, "learning_rate": 1.586645039199784e-05, "loss": 0.2588, "step": 73405 }, { "epoch": 2.1433555714515116, "grad_norm": 0.544574458805022, "learning_rate": 1.586374695863747e-05, "loss": 0.2432, "step": 73410 }, { "epoch": 2.143501554721829, "grad_norm": 0.5577025579921753, "learning_rate": 1.58610435252771e-05, "loss": 0.2446, "step": 73415 }, { "epoch": 2.143647537992146, "grad_norm": 0.5914851643864494, "learning_rate": 1.5858340091916735e-05, "loss": 0.2554, "step": 73420 }, { "epoch": 2.1437935212624635, "grad_norm": 0.5968733372183148, "learning_rate": 1.5855636658556366e-05, "loss": 0.249, "step": 73425 }, { "epoch": 2.1439395045327805, "grad_norm": 0.5634072596199969, "learning_rate": 1.5852933225195997e-05, "loss": 0.2614, "step": 73430 }, { "epoch": 2.144085487803098, "grad_norm": 0.5662611933027919, "learning_rate": 1.5850229791835634e-05, "loss": 0.2571, "step": 73435 }, { "epoch": 2.144231471073415, "grad_norm": 0.5688011212660011, "learning_rate": 1.5847526358475265e-05, "loss": 0.2441, "step": 73440 }, { "epoch": 2.1443774543437324, "grad_norm": 0.5268716035822905, "learning_rate": 1.5844822925114895e-05, "loss": 0.2514, "step": 73445 }, { "epoch": 2.1445234376140494, "grad_norm": 0.49843783644969997, "learning_rate": 1.584211949175453e-05, "loss": 0.2536, "step": 73450 }, { "epoch": 2.144669420884367, "grad_norm": 0.5883214528349411, "learning_rate": 1.583941605839416e-05, "loss": 0.2561, "step": 73455 }, { "epoch": 2.144815404154684, "grad_norm": 0.5983661415710682, "learning_rate": 1.5836712625033794e-05, "loss": 0.2637, "step": 73460 }, { "epoch": 2.144961387425001, "grad_norm": 0.5484560922084194, "learning_rate": 1.5834009191673428e-05, "loss": 0.2469, "step": 73465 }, { "epoch": 2.1451073706953183, "grad_norm": 0.5912544816637102, "learning_rate": 1.583130575831306e-05, "loss": 0.2435, "step": 73470 }, { "epoch": 2.1452533539656358, "grad_norm": 0.6104974340989501, "learning_rate": 1.582860232495269e-05, "loss": 0.2511, "step": 73475 }, { "epoch": 2.1453993372359528, "grad_norm": 0.6008596738709121, "learning_rate": 1.5825898891592323e-05, "loss": 0.2563, "step": 73480 }, { "epoch": 2.14554532050627, "grad_norm": 0.5796103394301482, "learning_rate": 1.5823195458231954e-05, "loss": 0.245, "step": 73485 }, { "epoch": 2.1456913037765872, "grad_norm": 0.6208660747010645, "learning_rate": 1.5820492024871588e-05, "loss": 0.2494, "step": 73490 }, { "epoch": 2.1458372870469042, "grad_norm": 0.5351197345337102, "learning_rate": 1.5817788591511222e-05, "loss": 0.2553, "step": 73495 }, { "epoch": 2.1459832703172217, "grad_norm": 0.574937299791452, "learning_rate": 1.5815085158150852e-05, "loss": 0.2472, "step": 73500 }, { "epoch": 2.1461292535875387, "grad_norm": 0.5908030520406864, "learning_rate": 1.5812381724790483e-05, "loss": 0.2409, "step": 73505 }, { "epoch": 2.146275236857856, "grad_norm": 0.5656938492361621, "learning_rate": 1.5809678291430117e-05, "loss": 0.2589, "step": 73510 }, { "epoch": 2.146421220128173, "grad_norm": 0.5508658819674185, "learning_rate": 1.5806974858069748e-05, "loss": 0.2598, "step": 73515 }, { "epoch": 2.1465672033984906, "grad_norm": 0.5321591474933383, "learning_rate": 1.5804271424709382e-05, "loss": 0.2351, "step": 73520 }, { "epoch": 2.1467131866688076, "grad_norm": 0.5913075770480706, "learning_rate": 1.5801567991349016e-05, "loss": 0.2624, "step": 73525 }, { "epoch": 2.146859169939125, "grad_norm": 0.566536064239448, "learning_rate": 1.5798864557988646e-05, "loss": 0.2502, "step": 73530 }, { "epoch": 2.147005153209442, "grad_norm": 0.5330049163973094, "learning_rate": 1.5796161124628277e-05, "loss": 0.2718, "step": 73535 }, { "epoch": 2.1471511364797595, "grad_norm": 0.5456183745113595, "learning_rate": 1.579345769126791e-05, "loss": 0.2359, "step": 73540 }, { "epoch": 2.1472971197500765, "grad_norm": 0.5591283894958479, "learning_rate": 1.5790754257907545e-05, "loss": 0.2564, "step": 73545 }, { "epoch": 2.147443103020394, "grad_norm": 0.5387922120410306, "learning_rate": 1.5788050824547176e-05, "loss": 0.2375, "step": 73550 }, { "epoch": 2.147589086290711, "grad_norm": 0.6135527292460222, "learning_rate": 1.578534739118681e-05, "loss": 0.2631, "step": 73555 }, { "epoch": 2.1477350695610284, "grad_norm": 0.5533896998078557, "learning_rate": 1.578264395782644e-05, "loss": 0.258, "step": 73560 }, { "epoch": 2.1478810528313454, "grad_norm": 0.5795234451004591, "learning_rate": 1.577994052446607e-05, "loss": 0.2581, "step": 73565 }, { "epoch": 2.148027036101663, "grad_norm": 0.5400420735487859, "learning_rate": 1.5777237091105705e-05, "loss": 0.2518, "step": 73570 }, { "epoch": 2.14817301937198, "grad_norm": 0.5998530242620842, "learning_rate": 1.577453365774534e-05, "loss": 0.256, "step": 73575 }, { "epoch": 2.1483190026422974, "grad_norm": 0.531531822482514, "learning_rate": 1.577183022438497e-05, "loss": 0.224, "step": 73580 }, { "epoch": 2.1484649859126144, "grad_norm": 0.5661824217178973, "learning_rate": 1.5769126791024604e-05, "loss": 0.2519, "step": 73585 }, { "epoch": 2.148610969182932, "grad_norm": 0.537208437488253, "learning_rate": 1.5766423357664234e-05, "loss": 0.2449, "step": 73590 }, { "epoch": 2.148756952453249, "grad_norm": 0.569696219121613, "learning_rate": 1.5763719924303865e-05, "loss": 0.2525, "step": 73595 }, { "epoch": 2.1489029357235663, "grad_norm": 0.5888229388535687, "learning_rate": 1.57610164909435e-05, "loss": 0.2565, "step": 73600 }, { "epoch": 2.1490489189938833, "grad_norm": 0.5663935965888285, "learning_rate": 1.5758313057583133e-05, "loss": 0.2479, "step": 73605 }, { "epoch": 2.1491949022642007, "grad_norm": 0.5415757105036424, "learning_rate": 1.5755609624222763e-05, "loss": 0.2385, "step": 73610 }, { "epoch": 2.1493408855345177, "grad_norm": 0.5578617259767157, "learning_rate": 1.5752906190862397e-05, "loss": 0.2541, "step": 73615 }, { "epoch": 2.1494868688048347, "grad_norm": 0.5565443453047524, "learning_rate": 1.5750202757502028e-05, "loss": 0.2479, "step": 73620 }, { "epoch": 2.149632852075152, "grad_norm": 0.5863535256806843, "learning_rate": 1.574749932414166e-05, "loss": 0.2661, "step": 73625 }, { "epoch": 2.149778835345469, "grad_norm": 0.6141231859097203, "learning_rate": 1.5744795890781293e-05, "loss": 0.2766, "step": 73630 }, { "epoch": 2.1499248186157867, "grad_norm": 0.6144510174503497, "learning_rate": 1.5742092457420927e-05, "loss": 0.2559, "step": 73635 }, { "epoch": 2.1500708018861037, "grad_norm": 0.5806768215998113, "learning_rate": 1.5739389024060557e-05, "loss": 0.2547, "step": 73640 }, { "epoch": 2.150216785156421, "grad_norm": 0.569085486874186, "learning_rate": 1.573668559070019e-05, "loss": 0.2497, "step": 73645 }, { "epoch": 2.150362768426738, "grad_norm": 0.5323501084990786, "learning_rate": 1.5733982157339822e-05, "loss": 0.2491, "step": 73650 }, { "epoch": 2.1505087516970556, "grad_norm": 0.6237246391287784, "learning_rate": 1.5731278723979453e-05, "loss": 0.2615, "step": 73655 }, { "epoch": 2.1506547349673726, "grad_norm": 0.5833715190132248, "learning_rate": 1.5728575290619087e-05, "loss": 0.2523, "step": 73660 }, { "epoch": 2.15080071823769, "grad_norm": 0.6191802790392502, "learning_rate": 1.572587185725872e-05, "loss": 0.2622, "step": 73665 }, { "epoch": 2.150946701508007, "grad_norm": 0.6179723029434966, "learning_rate": 1.572316842389835e-05, "loss": 0.2611, "step": 73670 }, { "epoch": 2.1510926847783245, "grad_norm": 0.5805552709173046, "learning_rate": 1.5720464990537985e-05, "loss": 0.2628, "step": 73675 }, { "epoch": 2.1512386680486415, "grad_norm": 0.5736380664446274, "learning_rate": 1.5717761557177616e-05, "loss": 0.2566, "step": 73680 }, { "epoch": 2.151384651318959, "grad_norm": 0.6078459325563238, "learning_rate": 1.5715058123817247e-05, "loss": 0.2716, "step": 73685 }, { "epoch": 2.151530634589276, "grad_norm": 0.552607022232919, "learning_rate": 1.571235469045688e-05, "loss": 0.2641, "step": 73690 }, { "epoch": 2.1516766178595934, "grad_norm": 0.5924233511514759, "learning_rate": 1.5709651257096515e-05, "loss": 0.262, "step": 73695 }, { "epoch": 2.1518226011299104, "grad_norm": 0.5169090623746483, "learning_rate": 1.5706947823736145e-05, "loss": 0.2497, "step": 73700 }, { "epoch": 2.151968584400228, "grad_norm": 0.6047910275920718, "learning_rate": 1.5704244390375776e-05, "loss": 0.2469, "step": 73705 }, { "epoch": 2.152114567670545, "grad_norm": 0.5373620334722266, "learning_rate": 1.570154095701541e-05, "loss": 0.2561, "step": 73710 }, { "epoch": 2.1522605509408623, "grad_norm": 0.5487669932403257, "learning_rate": 1.5698837523655044e-05, "loss": 0.2542, "step": 73715 }, { "epoch": 2.1524065342111793, "grad_norm": 0.5321765958352527, "learning_rate": 1.5696134090294674e-05, "loss": 0.2445, "step": 73720 }, { "epoch": 2.152552517481497, "grad_norm": 0.5609051683092561, "learning_rate": 1.569343065693431e-05, "loss": 0.256, "step": 73725 }, { "epoch": 2.152698500751814, "grad_norm": 0.6099383351500456, "learning_rate": 1.569072722357394e-05, "loss": 0.2708, "step": 73730 }, { "epoch": 2.1528444840221312, "grad_norm": 0.5622961850185757, "learning_rate": 1.568802379021357e-05, "loss": 0.241, "step": 73735 }, { "epoch": 2.1529904672924483, "grad_norm": 0.5563040290150016, "learning_rate": 1.5685320356853204e-05, "loss": 0.2444, "step": 73740 }, { "epoch": 2.1531364505627657, "grad_norm": 0.5538807365587242, "learning_rate": 1.5682616923492838e-05, "loss": 0.2422, "step": 73745 }, { "epoch": 2.1532824338330827, "grad_norm": 0.5966356449579571, "learning_rate": 1.567991349013247e-05, "loss": 0.2653, "step": 73750 }, { "epoch": 2.1534284171033997, "grad_norm": 0.6347654435044146, "learning_rate": 1.5677210056772102e-05, "loss": 0.2632, "step": 73755 }, { "epoch": 2.153574400373717, "grad_norm": 0.568210744939192, "learning_rate": 1.5674506623411733e-05, "loss": 0.2568, "step": 73760 }, { "epoch": 2.1537203836440346, "grad_norm": 0.5271586580254205, "learning_rate": 1.5671803190051364e-05, "loss": 0.2624, "step": 73765 }, { "epoch": 2.1538663669143516, "grad_norm": 0.5253373122681438, "learning_rate": 1.5669099756690998e-05, "loss": 0.2412, "step": 73770 }, { "epoch": 2.1540123501846686, "grad_norm": 0.6034848706895352, "learning_rate": 1.566639632333063e-05, "loss": 0.254, "step": 73775 }, { "epoch": 2.154158333454986, "grad_norm": 0.5878752909262968, "learning_rate": 1.5663692889970262e-05, "loss": 0.2615, "step": 73780 }, { "epoch": 2.154304316725303, "grad_norm": 0.5761159525120414, "learning_rate": 1.5660989456609896e-05, "loss": 0.243, "step": 73785 }, { "epoch": 2.1544502999956205, "grad_norm": 0.5720743898818662, "learning_rate": 1.5658286023249527e-05, "loss": 0.2429, "step": 73790 }, { "epoch": 2.1545962832659376, "grad_norm": 0.5546327177823475, "learning_rate": 1.5655582589889158e-05, "loss": 0.2369, "step": 73795 }, { "epoch": 2.154742266536255, "grad_norm": 0.5729547045854644, "learning_rate": 1.5652879156528795e-05, "loss": 0.2465, "step": 73800 }, { "epoch": 2.154888249806572, "grad_norm": 0.5379395863621067, "learning_rate": 1.5650175723168426e-05, "loss": 0.2533, "step": 73805 }, { "epoch": 2.1550342330768895, "grad_norm": 0.5585754765586404, "learning_rate": 1.5647472289808056e-05, "loss": 0.2425, "step": 73810 }, { "epoch": 2.1551802163472065, "grad_norm": 0.5257425644358904, "learning_rate": 1.564476885644769e-05, "loss": 0.2366, "step": 73815 }, { "epoch": 2.155326199617524, "grad_norm": 0.5585791659438892, "learning_rate": 1.564206542308732e-05, "loss": 0.2518, "step": 73820 }, { "epoch": 2.155472182887841, "grad_norm": 0.5731426366676121, "learning_rate": 1.563936198972695e-05, "loss": 0.2528, "step": 73825 }, { "epoch": 2.1556181661581584, "grad_norm": 0.5739810987642239, "learning_rate": 1.563665855636659e-05, "loss": 0.2467, "step": 73830 }, { "epoch": 2.1557641494284754, "grad_norm": 0.591214283193711, "learning_rate": 1.563395512300622e-05, "loss": 0.2587, "step": 73835 }, { "epoch": 2.155910132698793, "grad_norm": 0.5647848962369519, "learning_rate": 1.563125168964585e-05, "loss": 0.2472, "step": 73840 }, { "epoch": 2.15605611596911, "grad_norm": 11.802154602553134, "learning_rate": 1.5628548256285484e-05, "loss": 0.3172, "step": 73845 }, { "epoch": 2.1562020992394273, "grad_norm": 0.5680561442688442, "learning_rate": 1.5625844822925115e-05, "loss": 0.2626, "step": 73850 }, { "epoch": 2.1563480825097443, "grad_norm": 0.530288698165567, "learning_rate": 1.5623141389564745e-05, "loss": 0.2489, "step": 73855 }, { "epoch": 2.1564940657800618, "grad_norm": 0.529256203999088, "learning_rate": 1.5620437956204383e-05, "loss": 0.2457, "step": 73860 }, { "epoch": 2.1566400490503788, "grad_norm": 0.5917844553976502, "learning_rate": 1.5617734522844013e-05, "loss": 0.2657, "step": 73865 }, { "epoch": 2.156786032320696, "grad_norm": 0.5367073039189116, "learning_rate": 1.5615031089483644e-05, "loss": 0.2578, "step": 73870 }, { "epoch": 2.156932015591013, "grad_norm": 0.5386721459683295, "learning_rate": 1.5612327656123278e-05, "loss": 0.26, "step": 73875 }, { "epoch": 2.1570779988613307, "grad_norm": 0.5724142494607811, "learning_rate": 1.560962422276291e-05, "loss": 0.2395, "step": 73880 }, { "epoch": 2.1572239821316477, "grad_norm": 0.5353373867056301, "learning_rate": 1.5606920789402543e-05, "loss": 0.2529, "step": 73885 }, { "epoch": 2.157369965401965, "grad_norm": 0.5316033924684527, "learning_rate": 1.5604217356042177e-05, "loss": 0.2429, "step": 73890 }, { "epoch": 2.157515948672282, "grad_norm": 0.6291391031803236, "learning_rate": 1.5601513922681807e-05, "loss": 0.2767, "step": 73895 }, { "epoch": 2.1576619319425996, "grad_norm": 0.6116051200324383, "learning_rate": 1.5598810489321438e-05, "loss": 0.2575, "step": 73900 }, { "epoch": 2.1578079152129166, "grad_norm": 0.5959425537157357, "learning_rate": 1.5596107055961072e-05, "loss": 0.2673, "step": 73905 }, { "epoch": 2.1579538984832336, "grad_norm": 0.5306329700837464, "learning_rate": 1.5593403622600703e-05, "loss": 0.2623, "step": 73910 }, { "epoch": 2.158099881753551, "grad_norm": 0.577273299469844, "learning_rate": 1.5590700189240337e-05, "loss": 0.2513, "step": 73915 }, { "epoch": 2.158245865023868, "grad_norm": 0.5645467378932435, "learning_rate": 1.558799675587997e-05, "loss": 0.2454, "step": 73920 }, { "epoch": 2.1583918482941855, "grad_norm": 0.5926992902579017, "learning_rate": 1.55852933225196e-05, "loss": 0.2628, "step": 73925 }, { "epoch": 2.1585378315645025, "grad_norm": 0.5807376742659885, "learning_rate": 1.5582589889159232e-05, "loss": 0.252, "step": 73930 }, { "epoch": 2.15868381483482, "grad_norm": 0.6260551158441109, "learning_rate": 1.5579886455798866e-05, "loss": 0.2534, "step": 73935 }, { "epoch": 2.158829798105137, "grad_norm": 0.5677462642918365, "learning_rate": 1.5577183022438496e-05, "loss": 0.2483, "step": 73940 }, { "epoch": 2.1589757813754544, "grad_norm": 0.5680051301645749, "learning_rate": 1.557447958907813e-05, "loss": 0.2564, "step": 73945 }, { "epoch": 2.1591217646457714, "grad_norm": 0.5902265062348363, "learning_rate": 1.557177615571776e-05, "loss": 0.251, "step": 73950 }, { "epoch": 2.159267747916089, "grad_norm": 0.5262824906288446, "learning_rate": 1.5569072722357395e-05, "loss": 0.2619, "step": 73955 }, { "epoch": 2.159413731186406, "grad_norm": 0.5836966727314721, "learning_rate": 1.5566369288997026e-05, "loss": 0.2515, "step": 73960 }, { "epoch": 2.1595597144567233, "grad_norm": 0.5954244335333064, "learning_rate": 1.556366585563666e-05, "loss": 0.2439, "step": 73965 }, { "epoch": 2.1597056977270404, "grad_norm": 0.6096929888181407, "learning_rate": 1.5560962422276294e-05, "loss": 0.2695, "step": 73970 }, { "epoch": 2.159851680997358, "grad_norm": 0.6051577650346937, "learning_rate": 1.5558258988915924e-05, "loss": 0.2597, "step": 73975 }, { "epoch": 2.159997664267675, "grad_norm": 0.5734164104210024, "learning_rate": 1.5555555555555555e-05, "loss": 0.2664, "step": 73980 }, { "epoch": 2.1601436475379923, "grad_norm": 0.5779487937502383, "learning_rate": 1.555285212219519e-05, "loss": 0.2537, "step": 73985 }, { "epoch": 2.1602896308083093, "grad_norm": 0.5979811260430002, "learning_rate": 1.555014868883482e-05, "loss": 0.2476, "step": 73990 }, { "epoch": 2.1604356140786267, "grad_norm": 0.6756569683713993, "learning_rate": 1.5547445255474454e-05, "loss": 0.2665, "step": 73995 }, { "epoch": 2.1605815973489437, "grad_norm": 0.6001068366424248, "learning_rate": 1.5544741822114088e-05, "loss": 0.25, "step": 74000 }, { "epoch": 2.160727580619261, "grad_norm": 0.5871609622714772, "learning_rate": 1.5542038388753718e-05, "loss": 0.2549, "step": 74005 }, { "epoch": 2.160873563889578, "grad_norm": 0.576740512417928, "learning_rate": 1.553933495539335e-05, "loss": 0.2677, "step": 74010 }, { "epoch": 2.1610195471598956, "grad_norm": 0.5717016030344997, "learning_rate": 1.5536631522032983e-05, "loss": 0.2412, "step": 74015 }, { "epoch": 2.1611655304302126, "grad_norm": 0.5215221469217042, "learning_rate": 1.5533928088672613e-05, "loss": 0.2476, "step": 74020 }, { "epoch": 2.16131151370053, "grad_norm": 0.5715375349923856, "learning_rate": 1.5531224655312247e-05, "loss": 0.2451, "step": 74025 }, { "epoch": 2.161457496970847, "grad_norm": 0.5587030334191975, "learning_rate": 1.552852122195188e-05, "loss": 0.2483, "step": 74030 }, { "epoch": 2.1616034802411646, "grad_norm": 0.5152257179144756, "learning_rate": 1.5525817788591512e-05, "loss": 0.2491, "step": 74035 }, { "epoch": 2.1617494635114816, "grad_norm": 0.542814612598504, "learning_rate": 1.5523114355231143e-05, "loss": 0.2439, "step": 74040 }, { "epoch": 2.161895446781799, "grad_norm": 0.5396974881013396, "learning_rate": 1.5520410921870777e-05, "loss": 0.2306, "step": 74045 }, { "epoch": 2.162041430052116, "grad_norm": 0.550986473824294, "learning_rate": 1.5517707488510407e-05, "loss": 0.2469, "step": 74050 }, { "epoch": 2.1621874133224335, "grad_norm": 0.5678156829697153, "learning_rate": 1.551500405515004e-05, "loss": 0.2612, "step": 74055 }, { "epoch": 2.1623333965927505, "grad_norm": 0.5524406500695133, "learning_rate": 1.5512300621789675e-05, "loss": 0.2498, "step": 74060 }, { "epoch": 2.1624793798630675, "grad_norm": 0.5650138148705182, "learning_rate": 1.5509597188429306e-05, "loss": 0.2511, "step": 74065 }, { "epoch": 2.162625363133385, "grad_norm": 0.584957487393551, "learning_rate": 1.5506893755068937e-05, "loss": 0.2546, "step": 74070 }, { "epoch": 2.162771346403702, "grad_norm": 0.5486204887869656, "learning_rate": 1.550419032170857e-05, "loss": 0.244, "step": 74075 }, { "epoch": 2.1629173296740194, "grad_norm": 0.5578918052696715, "learning_rate": 1.55014868883482e-05, "loss": 0.2483, "step": 74080 }, { "epoch": 2.1630633129443364, "grad_norm": 0.6417924261331155, "learning_rate": 1.5498783454987835e-05, "loss": 0.2595, "step": 74085 }, { "epoch": 2.163209296214654, "grad_norm": 0.5978365899012531, "learning_rate": 1.549608002162747e-05, "loss": 0.2714, "step": 74090 }, { "epoch": 2.163355279484971, "grad_norm": 0.5353059916473304, "learning_rate": 1.54933765882671e-05, "loss": 0.2474, "step": 74095 }, { "epoch": 2.1635012627552883, "grad_norm": 0.5353068478575649, "learning_rate": 1.549067315490673e-05, "loss": 0.2372, "step": 74100 }, { "epoch": 2.1636472460256053, "grad_norm": 0.6161130184551347, "learning_rate": 1.5487969721546365e-05, "loss": 0.2724, "step": 74105 }, { "epoch": 2.1637932292959228, "grad_norm": 0.5909667419010574, "learning_rate": 1.5485266288186e-05, "loss": 0.2709, "step": 74110 }, { "epoch": 2.16393921256624, "grad_norm": 0.5955623558900366, "learning_rate": 1.548256285482563e-05, "loss": 0.2506, "step": 74115 }, { "epoch": 2.1640851958365572, "grad_norm": 0.5749078318361175, "learning_rate": 1.5479859421465263e-05, "loss": 0.233, "step": 74120 }, { "epoch": 2.1642311791068742, "grad_norm": 0.5360895738311192, "learning_rate": 1.5477155988104894e-05, "loss": 0.2606, "step": 74125 }, { "epoch": 2.1643771623771917, "grad_norm": 0.5950111042757322, "learning_rate": 1.5474452554744524e-05, "loss": 0.2656, "step": 74130 }, { "epoch": 2.1645231456475087, "grad_norm": 0.5319137055789712, "learning_rate": 1.547174912138416e-05, "loss": 0.2414, "step": 74135 }, { "epoch": 2.164669128917826, "grad_norm": 0.5462364184048563, "learning_rate": 1.5469045688023792e-05, "loss": 0.2379, "step": 74140 }, { "epoch": 2.164815112188143, "grad_norm": 0.5465976851269173, "learning_rate": 1.5466342254663423e-05, "loss": 0.2502, "step": 74145 }, { "epoch": 2.1649610954584606, "grad_norm": 0.5570548959720837, "learning_rate": 1.5463638821303057e-05, "loss": 0.2555, "step": 74150 }, { "epoch": 2.1651070787287776, "grad_norm": 0.5843162382908643, "learning_rate": 1.5460935387942688e-05, "loss": 0.2674, "step": 74155 }, { "epoch": 2.165253061999095, "grad_norm": 0.5730978641344, "learning_rate": 1.545823195458232e-05, "loss": 0.2566, "step": 74160 }, { "epoch": 2.165399045269412, "grad_norm": 0.626499625312598, "learning_rate": 1.5455528521221952e-05, "loss": 0.2591, "step": 74165 }, { "epoch": 2.1655450285397295, "grad_norm": 0.596489066858917, "learning_rate": 1.5452825087861586e-05, "loss": 0.2511, "step": 74170 }, { "epoch": 2.1656910118100465, "grad_norm": 0.6192978442680956, "learning_rate": 1.5450121654501217e-05, "loss": 0.2634, "step": 74175 }, { "epoch": 2.165836995080364, "grad_norm": 0.5366277409621923, "learning_rate": 1.544741822114085e-05, "loss": 0.2555, "step": 74180 }, { "epoch": 2.165982978350681, "grad_norm": 0.5478229884837555, "learning_rate": 1.544471478778048e-05, "loss": 0.2402, "step": 74185 }, { "epoch": 2.1661289616209984, "grad_norm": 0.5817448108622122, "learning_rate": 1.5442011354420112e-05, "loss": 0.2559, "step": 74190 }, { "epoch": 2.1662749448913154, "grad_norm": 0.5421953060280976, "learning_rate": 1.543930792105975e-05, "loss": 0.2608, "step": 74195 }, { "epoch": 2.1664209281616325, "grad_norm": 0.5305926700017821, "learning_rate": 1.543660448769938e-05, "loss": 0.2486, "step": 74200 }, { "epoch": 2.16656691143195, "grad_norm": 0.6011110290415407, "learning_rate": 1.543390105433901e-05, "loss": 0.2485, "step": 74205 }, { "epoch": 2.1667128947022674, "grad_norm": 0.5466665447501058, "learning_rate": 1.5431197620978645e-05, "loss": 0.2692, "step": 74210 }, { "epoch": 2.1668588779725844, "grad_norm": 0.5803209417127481, "learning_rate": 1.5428494187618276e-05, "loss": 0.2696, "step": 74215 }, { "epoch": 2.1670048612429014, "grad_norm": 0.6028629141798547, "learning_rate": 1.5425790754257906e-05, "loss": 0.2594, "step": 74220 }, { "epoch": 2.167150844513219, "grad_norm": 0.6097665498269508, "learning_rate": 1.542308732089754e-05, "loss": 0.255, "step": 74225 }, { "epoch": 2.167296827783536, "grad_norm": 0.5665295343163094, "learning_rate": 1.5420383887537174e-05, "loss": 0.2646, "step": 74230 }, { "epoch": 2.1674428110538533, "grad_norm": 0.5731856531622124, "learning_rate": 1.5417680454176805e-05, "loss": 0.2336, "step": 74235 }, { "epoch": 2.1675887943241703, "grad_norm": 0.5236762750322729, "learning_rate": 1.541497702081644e-05, "loss": 0.2571, "step": 74240 }, { "epoch": 2.1677347775944877, "grad_norm": 0.612820178904116, "learning_rate": 1.541227358745607e-05, "loss": 0.2549, "step": 74245 }, { "epoch": 2.1678807608648047, "grad_norm": 0.5335775080213964, "learning_rate": 1.54095701540957e-05, "loss": 0.2372, "step": 74250 }, { "epoch": 2.168026744135122, "grad_norm": 0.49889588776639837, "learning_rate": 1.5406866720735334e-05, "loss": 0.2291, "step": 74255 }, { "epoch": 2.168172727405439, "grad_norm": 0.5835259191824244, "learning_rate": 1.5404163287374968e-05, "loss": 0.2591, "step": 74260 }, { "epoch": 2.1683187106757567, "grad_norm": 0.5345726068052886, "learning_rate": 1.54014598540146e-05, "loss": 0.2502, "step": 74265 }, { "epoch": 2.1684646939460737, "grad_norm": 0.5688412276275261, "learning_rate": 1.5398756420654233e-05, "loss": 0.2681, "step": 74270 }, { "epoch": 2.168610677216391, "grad_norm": 0.5956492098496134, "learning_rate": 1.5396052987293863e-05, "loss": 0.2558, "step": 74275 }, { "epoch": 2.168756660486708, "grad_norm": 0.5486426220366464, "learning_rate": 1.5393349553933497e-05, "loss": 0.2395, "step": 74280 }, { "epoch": 2.1689026437570256, "grad_norm": 0.5396168736517992, "learning_rate": 1.5390646120573128e-05, "loss": 0.2554, "step": 74285 }, { "epoch": 2.1690486270273426, "grad_norm": 0.6023351514142518, "learning_rate": 1.5387942687212762e-05, "loss": 0.2571, "step": 74290 }, { "epoch": 2.16919461029766, "grad_norm": 0.6097988769620726, "learning_rate": 1.5385239253852393e-05, "loss": 0.2532, "step": 74295 }, { "epoch": 2.169340593567977, "grad_norm": 0.5534169454387292, "learning_rate": 1.5382535820492027e-05, "loss": 0.2445, "step": 74300 }, { "epoch": 2.1694865768382945, "grad_norm": 0.5457935540543196, "learning_rate": 1.5379832387131657e-05, "loss": 0.2561, "step": 74305 }, { "epoch": 2.1696325601086115, "grad_norm": 0.5536076336417636, "learning_rate": 1.537712895377129e-05, "loss": 0.2544, "step": 74310 }, { "epoch": 2.169778543378929, "grad_norm": 0.5833364178358833, "learning_rate": 1.5374425520410922e-05, "loss": 0.2545, "step": 74315 }, { "epoch": 2.169924526649246, "grad_norm": 0.6063449847185238, "learning_rate": 1.5371722087050556e-05, "loss": 0.2595, "step": 74320 }, { "epoch": 2.1700705099195634, "grad_norm": 0.5453594627347371, "learning_rate": 1.5369018653690187e-05, "loss": 0.2307, "step": 74325 }, { "epoch": 2.1702164931898804, "grad_norm": 0.5583246645746266, "learning_rate": 1.536631522032982e-05, "loss": 0.238, "step": 74330 }, { "epoch": 2.170362476460198, "grad_norm": 0.611239319580769, "learning_rate": 1.536361178696945e-05, "loss": 0.2529, "step": 74335 }, { "epoch": 2.170508459730515, "grad_norm": 0.5637818845493732, "learning_rate": 1.5360908353609085e-05, "loss": 0.2644, "step": 74340 }, { "epoch": 2.1706544430008323, "grad_norm": 0.5648694220898958, "learning_rate": 1.5358204920248716e-05, "loss": 0.233, "step": 74345 }, { "epoch": 2.1708004262711493, "grad_norm": 0.6100166261934991, "learning_rate": 1.535550148688835e-05, "loss": 0.2615, "step": 74350 }, { "epoch": 2.1709464095414663, "grad_norm": 0.5856218578185154, "learning_rate": 1.535279805352798e-05, "loss": 0.2643, "step": 74355 }, { "epoch": 2.171092392811784, "grad_norm": 0.5240937944850367, "learning_rate": 1.535009462016761e-05, "loss": 0.2556, "step": 74360 }, { "epoch": 2.171238376082101, "grad_norm": 0.5294089730958326, "learning_rate": 1.534739118680725e-05, "loss": 0.2435, "step": 74365 }, { "epoch": 2.1713843593524182, "grad_norm": 0.6053994317289829, "learning_rate": 1.534468775344688e-05, "loss": 0.251, "step": 74370 }, { "epoch": 2.1715303426227353, "grad_norm": 0.6100273866273241, "learning_rate": 1.534198432008651e-05, "loss": 0.2681, "step": 74375 }, { "epoch": 2.1716763258930527, "grad_norm": 0.574056608884927, "learning_rate": 1.5339280886726144e-05, "loss": 0.2616, "step": 74380 }, { "epoch": 2.1718223091633697, "grad_norm": 0.5807067956371139, "learning_rate": 1.5336577453365774e-05, "loss": 0.2555, "step": 74385 }, { "epoch": 2.171968292433687, "grad_norm": 0.5713217979992092, "learning_rate": 1.5333874020005405e-05, "loss": 0.2474, "step": 74390 }, { "epoch": 2.172114275704004, "grad_norm": 0.5424886311851771, "learning_rate": 1.5331170586645042e-05, "loss": 0.2564, "step": 74395 }, { "epoch": 2.1722602589743216, "grad_norm": 0.5443766932534156, "learning_rate": 1.5328467153284673e-05, "loss": 0.2503, "step": 74400 }, { "epoch": 2.1724062422446386, "grad_norm": 0.5866271319950193, "learning_rate": 1.5325763719924304e-05, "loss": 0.2408, "step": 74405 }, { "epoch": 2.172552225514956, "grad_norm": 0.564372213358048, "learning_rate": 1.5323060286563938e-05, "loss": 0.2545, "step": 74410 }, { "epoch": 2.172698208785273, "grad_norm": 0.5983037574649336, "learning_rate": 1.5320356853203568e-05, "loss": 0.2552, "step": 74415 }, { "epoch": 2.1728441920555905, "grad_norm": 0.567384662728638, "learning_rate": 1.53176534198432e-05, "loss": 0.2381, "step": 74420 }, { "epoch": 2.1729901753259075, "grad_norm": 0.5789269458405198, "learning_rate": 1.5314949986482836e-05, "loss": 0.2378, "step": 74425 }, { "epoch": 2.173136158596225, "grad_norm": 0.5898950321608725, "learning_rate": 1.5312246553122467e-05, "loss": 0.2452, "step": 74430 }, { "epoch": 2.173282141866542, "grad_norm": 0.5995544262434246, "learning_rate": 1.5309543119762098e-05, "loss": 0.2434, "step": 74435 }, { "epoch": 2.1734281251368595, "grad_norm": 0.5215049179163375, "learning_rate": 1.530683968640173e-05, "loss": 0.2456, "step": 74440 }, { "epoch": 2.1735741084071765, "grad_norm": 0.5798406487022931, "learning_rate": 1.5304136253041362e-05, "loss": 0.2575, "step": 74445 }, { "epoch": 2.173720091677494, "grad_norm": 0.5725104500473298, "learning_rate": 1.5301432819680996e-05, "loss": 0.2595, "step": 74450 }, { "epoch": 2.173866074947811, "grad_norm": 0.5929567341065924, "learning_rate": 1.529872938632063e-05, "loss": 0.2495, "step": 74455 }, { "epoch": 2.1740120582181284, "grad_norm": 0.5736625672675201, "learning_rate": 1.529602595296026e-05, "loss": 0.2506, "step": 74460 }, { "epoch": 2.1741580414884454, "grad_norm": 0.5727993926495981, "learning_rate": 1.529332251959989e-05, "loss": 0.2636, "step": 74465 }, { "epoch": 2.174304024758763, "grad_norm": 0.5478252406839778, "learning_rate": 1.5290619086239525e-05, "loss": 0.2517, "step": 74470 }, { "epoch": 2.17445000802908, "grad_norm": 0.58253337355087, "learning_rate": 1.5287915652879156e-05, "loss": 0.249, "step": 74475 }, { "epoch": 2.1745959912993973, "grad_norm": 0.6573049284649539, "learning_rate": 1.528521221951879e-05, "loss": 0.2542, "step": 74480 }, { "epoch": 2.1747419745697143, "grad_norm": 0.5555346467423009, "learning_rate": 1.5282508786158424e-05, "loss": 0.2556, "step": 74485 }, { "epoch": 2.1748879578400313, "grad_norm": 0.5510504775414007, "learning_rate": 1.5279805352798055e-05, "loss": 0.2554, "step": 74490 }, { "epoch": 2.1750339411103488, "grad_norm": 0.5383781832297392, "learning_rate": 1.5277101919437685e-05, "loss": 0.2574, "step": 74495 }, { "epoch": 2.175179924380666, "grad_norm": 0.5938508006344189, "learning_rate": 1.527439848607732e-05, "loss": 0.2518, "step": 74500 }, { "epoch": 2.175325907650983, "grad_norm": 0.5870510156427661, "learning_rate": 1.527169505271695e-05, "loss": 0.2544, "step": 74505 }, { "epoch": 2.1754718909213, "grad_norm": 0.5265045493606295, "learning_rate": 1.5268991619356584e-05, "loss": 0.247, "step": 74510 }, { "epoch": 2.1756178741916177, "grad_norm": 0.6183363729655255, "learning_rate": 1.5266288185996218e-05, "loss": 0.2597, "step": 74515 }, { "epoch": 2.1757638574619347, "grad_norm": 0.5468055607175204, "learning_rate": 1.526358475263585e-05, "loss": 0.2367, "step": 74520 }, { "epoch": 2.175909840732252, "grad_norm": 0.5448154267776596, "learning_rate": 1.526088131927548e-05, "loss": 0.2629, "step": 74525 }, { "epoch": 2.176055824002569, "grad_norm": 0.5378222781242474, "learning_rate": 1.5258177885915112e-05, "loss": 0.2488, "step": 74530 }, { "epoch": 2.1762018072728866, "grad_norm": 0.5593744045600222, "learning_rate": 1.5255474452554747e-05, "loss": 0.2475, "step": 74535 }, { "epoch": 2.1763477905432036, "grad_norm": 0.5842084817585774, "learning_rate": 1.5252771019194378e-05, "loss": 0.2492, "step": 74540 }, { "epoch": 2.176493773813521, "grad_norm": 0.5425545158520982, "learning_rate": 1.525006758583401e-05, "loss": 0.2435, "step": 74545 }, { "epoch": 2.176639757083838, "grad_norm": 0.5973267766304179, "learning_rate": 1.5247364152473643e-05, "loss": 0.2352, "step": 74550 }, { "epoch": 2.1767857403541555, "grad_norm": 0.562986508078223, "learning_rate": 1.5244660719113275e-05, "loss": 0.2628, "step": 74555 }, { "epoch": 2.1769317236244725, "grad_norm": 0.5061734088949965, "learning_rate": 1.5241957285752905e-05, "loss": 0.2541, "step": 74560 }, { "epoch": 2.17707770689479, "grad_norm": 0.5547235322056334, "learning_rate": 1.5239253852392541e-05, "loss": 0.2486, "step": 74565 }, { "epoch": 2.177223690165107, "grad_norm": 0.5634822598312065, "learning_rate": 1.5236550419032172e-05, "loss": 0.2491, "step": 74570 }, { "epoch": 2.1773696734354244, "grad_norm": 0.5376109157741826, "learning_rate": 1.5233846985671804e-05, "loss": 0.2446, "step": 74575 }, { "epoch": 2.1775156567057414, "grad_norm": 0.48708845756947766, "learning_rate": 1.5231143552311436e-05, "loss": 0.2531, "step": 74580 }, { "epoch": 2.177661639976059, "grad_norm": 0.5158213426994167, "learning_rate": 1.5228440118951067e-05, "loss": 0.2561, "step": 74585 }, { "epoch": 2.177807623246376, "grad_norm": 0.6183225242084383, "learning_rate": 1.52257366855907e-05, "loss": 0.2536, "step": 74590 }, { "epoch": 2.1779536065166933, "grad_norm": 0.541435156750712, "learning_rate": 1.5223033252230335e-05, "loss": 0.2397, "step": 74595 }, { "epoch": 2.1780995897870103, "grad_norm": 0.5913290960806407, "learning_rate": 1.5220329818869966e-05, "loss": 0.2422, "step": 74600 }, { "epoch": 2.178245573057328, "grad_norm": 0.6010595456320806, "learning_rate": 1.5217626385509598e-05, "loss": 0.2547, "step": 74605 }, { "epoch": 2.178391556327645, "grad_norm": 0.6231772214707954, "learning_rate": 1.521492295214923e-05, "loss": 0.2586, "step": 74610 }, { "epoch": 2.1785375395979623, "grad_norm": 0.559061323168074, "learning_rate": 1.5212219518788861e-05, "loss": 0.2692, "step": 74615 }, { "epoch": 2.1786835228682793, "grad_norm": 0.6253372861585224, "learning_rate": 1.5209516085428497e-05, "loss": 0.271, "step": 74620 }, { "epoch": 2.1788295061385967, "grad_norm": 0.5621629167960478, "learning_rate": 1.5206812652068127e-05, "loss": 0.2629, "step": 74625 }, { "epoch": 2.1789754894089137, "grad_norm": 0.5257066578299647, "learning_rate": 1.520410921870776e-05, "loss": 0.2457, "step": 74630 }, { "epoch": 2.179121472679231, "grad_norm": 0.5825857539966149, "learning_rate": 1.5201405785347392e-05, "loss": 0.2581, "step": 74635 }, { "epoch": 2.179267455949548, "grad_norm": 0.5469529251472011, "learning_rate": 1.5198702351987024e-05, "loss": 0.2609, "step": 74640 }, { "epoch": 2.179413439219865, "grad_norm": 0.5456503059808544, "learning_rate": 1.5195998918626655e-05, "loss": 0.254, "step": 74645 }, { "epoch": 2.1795594224901826, "grad_norm": 0.5543409738069742, "learning_rate": 1.519329548526629e-05, "loss": 0.2542, "step": 74650 }, { "epoch": 2.1797054057604996, "grad_norm": 0.5650019601938422, "learning_rate": 1.5190592051905921e-05, "loss": 0.2638, "step": 74655 }, { "epoch": 2.179851389030817, "grad_norm": 0.5661596769969397, "learning_rate": 1.5187888618545553e-05, "loss": 0.2449, "step": 74660 }, { "epoch": 2.179997372301134, "grad_norm": 0.5478713768807504, "learning_rate": 1.5185185185185186e-05, "loss": 0.2458, "step": 74665 }, { "epoch": 2.1801433555714516, "grad_norm": 0.5698361766077015, "learning_rate": 1.5182481751824818e-05, "loss": 0.2447, "step": 74670 }, { "epoch": 2.1802893388417686, "grad_norm": 0.5573279139587836, "learning_rate": 1.5179778318464449e-05, "loss": 0.2615, "step": 74675 }, { "epoch": 2.180435322112086, "grad_norm": 0.5864434995825427, "learning_rate": 1.5177074885104084e-05, "loss": 0.2536, "step": 74680 }, { "epoch": 2.180581305382403, "grad_norm": 0.5780045058319163, "learning_rate": 1.5174371451743715e-05, "loss": 0.2576, "step": 74685 }, { "epoch": 2.1807272886527205, "grad_norm": 0.6156851319060765, "learning_rate": 1.5171668018383347e-05, "loss": 0.2461, "step": 74690 }, { "epoch": 2.1808732719230375, "grad_norm": 0.5731040314315224, "learning_rate": 1.516896458502298e-05, "loss": 0.2674, "step": 74695 }, { "epoch": 2.181019255193355, "grad_norm": 0.5902852994592206, "learning_rate": 1.5166261151662612e-05, "loss": 0.251, "step": 74700 }, { "epoch": 2.181165238463672, "grad_norm": 0.5999346033611065, "learning_rate": 1.5163557718302246e-05, "loss": 0.2625, "step": 74705 }, { "epoch": 2.1813112217339894, "grad_norm": 0.5550839849194551, "learning_rate": 1.5160854284941878e-05, "loss": 0.2524, "step": 74710 }, { "epoch": 2.1814572050043064, "grad_norm": 0.5536675536476252, "learning_rate": 1.5158150851581509e-05, "loss": 0.2497, "step": 74715 }, { "epoch": 2.181603188274624, "grad_norm": 0.5631560377865065, "learning_rate": 1.5155447418221141e-05, "loss": 0.2579, "step": 74720 }, { "epoch": 2.181749171544941, "grad_norm": 0.597836612377667, "learning_rate": 1.5152743984860774e-05, "loss": 0.252, "step": 74725 }, { "epoch": 2.1818951548152583, "grad_norm": 0.5318128688331454, "learning_rate": 1.5150040551500406e-05, "loss": 0.2499, "step": 74730 }, { "epoch": 2.1820411380855753, "grad_norm": 0.5688974788828662, "learning_rate": 1.514733711814004e-05, "loss": 0.2593, "step": 74735 }, { "epoch": 2.1821871213558928, "grad_norm": 0.5832044798871362, "learning_rate": 1.5144633684779672e-05, "loss": 0.25, "step": 74740 }, { "epoch": 2.1823331046262098, "grad_norm": 0.5867313514129181, "learning_rate": 1.5141930251419303e-05, "loss": 0.2641, "step": 74745 }, { "epoch": 2.1824790878965272, "grad_norm": 0.5661189540373125, "learning_rate": 1.5139226818058935e-05, "loss": 0.2493, "step": 74750 }, { "epoch": 2.1826250711668442, "grad_norm": 0.5410127088384942, "learning_rate": 1.5136523384698568e-05, "loss": 0.2522, "step": 74755 }, { "epoch": 2.1827710544371617, "grad_norm": 0.6308577818107577, "learning_rate": 1.5133819951338198e-05, "loss": 0.269, "step": 74760 }, { "epoch": 2.1829170377074787, "grad_norm": 0.545628243258827, "learning_rate": 1.5131116517977834e-05, "loss": 0.2481, "step": 74765 }, { "epoch": 2.183063020977796, "grad_norm": 0.5530083113355599, "learning_rate": 1.5128413084617466e-05, "loss": 0.2455, "step": 74770 }, { "epoch": 2.183209004248113, "grad_norm": 0.5819751695806298, "learning_rate": 1.5125709651257097e-05, "loss": 0.2527, "step": 74775 }, { "epoch": 2.18335498751843, "grad_norm": 0.5962122319738306, "learning_rate": 1.5123006217896729e-05, "loss": 0.2565, "step": 74780 }, { "epoch": 2.1835009707887476, "grad_norm": 0.5872364865232301, "learning_rate": 1.5120302784536361e-05, "loss": 0.2609, "step": 74785 }, { "epoch": 2.183646954059065, "grad_norm": 0.5610929969268205, "learning_rate": 1.5117599351175995e-05, "loss": 0.232, "step": 74790 }, { "epoch": 2.183792937329382, "grad_norm": 0.5468006824438005, "learning_rate": 1.5114895917815628e-05, "loss": 0.2397, "step": 74795 }, { "epoch": 2.183938920599699, "grad_norm": 0.5707118404429765, "learning_rate": 1.511219248445526e-05, "loss": 0.2471, "step": 74800 }, { "epoch": 2.1840849038700165, "grad_norm": 0.5872010582258805, "learning_rate": 1.510948905109489e-05, "loss": 0.2424, "step": 74805 }, { "epoch": 2.1842308871403335, "grad_norm": 0.5773865884593633, "learning_rate": 1.5106785617734523e-05, "loss": 0.2399, "step": 74810 }, { "epoch": 2.184376870410651, "grad_norm": 0.5879076867968338, "learning_rate": 1.5104082184374155e-05, "loss": 0.266, "step": 74815 }, { "epoch": 2.184522853680968, "grad_norm": 0.6075870787409812, "learning_rate": 1.510137875101379e-05, "loss": 0.2492, "step": 74820 }, { "epoch": 2.1846688369512854, "grad_norm": 0.5750053555679303, "learning_rate": 1.5098675317653422e-05, "loss": 0.2519, "step": 74825 }, { "epoch": 2.1848148202216024, "grad_norm": 0.5657198953499578, "learning_rate": 1.5095971884293052e-05, "loss": 0.2387, "step": 74830 }, { "epoch": 2.18496080349192, "grad_norm": 0.5297341011022513, "learning_rate": 1.5093268450932685e-05, "loss": 0.2356, "step": 74835 }, { "epoch": 2.185106786762237, "grad_norm": 0.6020512743793864, "learning_rate": 1.5090565017572317e-05, "loss": 0.2686, "step": 74840 }, { "epoch": 2.1852527700325544, "grad_norm": 0.5723186339355182, "learning_rate": 1.508786158421195e-05, "loss": 0.2679, "step": 74845 }, { "epoch": 2.1853987533028714, "grad_norm": 0.6301409001292513, "learning_rate": 1.5085158150851583e-05, "loss": 0.2509, "step": 74850 }, { "epoch": 2.185544736573189, "grad_norm": 0.5686545917951368, "learning_rate": 1.5082454717491216e-05, "loss": 0.2562, "step": 74855 }, { "epoch": 2.185690719843506, "grad_norm": 0.5612255762215453, "learning_rate": 1.5079751284130846e-05, "loss": 0.2531, "step": 74860 }, { "epoch": 2.1858367031138233, "grad_norm": 0.5579287138467828, "learning_rate": 1.5077047850770478e-05, "loss": 0.2468, "step": 74865 }, { "epoch": 2.1859826863841403, "grad_norm": 0.5763395147032602, "learning_rate": 1.507434441741011e-05, "loss": 0.2527, "step": 74870 }, { "epoch": 2.1861286696544577, "grad_norm": 0.5376623895186007, "learning_rate": 1.5071640984049745e-05, "loss": 0.2472, "step": 74875 }, { "epoch": 2.1862746529247747, "grad_norm": 0.5686546078389513, "learning_rate": 1.5068937550689377e-05, "loss": 0.2609, "step": 74880 }, { "epoch": 2.186420636195092, "grad_norm": 0.5803587316955015, "learning_rate": 1.506623411732901e-05, "loss": 0.2524, "step": 74885 }, { "epoch": 2.186566619465409, "grad_norm": 0.5585888240158727, "learning_rate": 1.506353068396864e-05, "loss": 0.2456, "step": 74890 }, { "epoch": 2.1867126027357267, "grad_norm": 0.5724701600550134, "learning_rate": 1.5060827250608272e-05, "loss": 0.2669, "step": 74895 }, { "epoch": 2.1868585860060437, "grad_norm": 0.5765835488646502, "learning_rate": 1.5058123817247905e-05, "loss": 0.249, "step": 74900 }, { "epoch": 2.187004569276361, "grad_norm": 0.5587433867716473, "learning_rate": 1.5055420383887539e-05, "loss": 0.2609, "step": 74905 }, { "epoch": 2.187150552546678, "grad_norm": 0.5902354759691911, "learning_rate": 1.5052716950527171e-05, "loss": 0.2653, "step": 74910 }, { "epoch": 2.1872965358169956, "grad_norm": 0.5745350655356259, "learning_rate": 1.5050013517166803e-05, "loss": 0.2517, "step": 74915 }, { "epoch": 2.1874425190873126, "grad_norm": 0.5669861741694637, "learning_rate": 1.5047310083806434e-05, "loss": 0.2556, "step": 74920 }, { "epoch": 2.18758850235763, "grad_norm": 0.5509371579445228, "learning_rate": 1.5044606650446066e-05, "loss": 0.2562, "step": 74925 }, { "epoch": 2.187734485627947, "grad_norm": 0.5507867222927257, "learning_rate": 1.5041903217085699e-05, "loss": 0.2268, "step": 74930 }, { "epoch": 2.187880468898264, "grad_norm": 0.5768641942855302, "learning_rate": 1.5039199783725333e-05, "loss": 0.2519, "step": 74935 }, { "epoch": 2.1880264521685815, "grad_norm": 0.550494869051971, "learning_rate": 1.5036496350364965e-05, "loss": 0.2456, "step": 74940 }, { "epoch": 2.1881724354388985, "grad_norm": 0.5966244158640203, "learning_rate": 1.5033792917004597e-05, "loss": 0.2562, "step": 74945 }, { "epoch": 2.188318418709216, "grad_norm": 0.5710502511417034, "learning_rate": 1.5031089483644228e-05, "loss": 0.2415, "step": 74950 }, { "epoch": 2.188464401979533, "grad_norm": 0.5596223535401473, "learning_rate": 1.502838605028386e-05, "loss": 0.2506, "step": 74955 }, { "epoch": 2.1886103852498504, "grad_norm": 0.5391051459584483, "learning_rate": 1.5025682616923494e-05, "loss": 0.2608, "step": 74960 }, { "epoch": 2.1887563685201674, "grad_norm": 0.6342028115119007, "learning_rate": 1.5022979183563127e-05, "loss": 0.2488, "step": 74965 }, { "epoch": 2.188902351790485, "grad_norm": 0.6385490772382296, "learning_rate": 1.5020275750202759e-05, "loss": 0.2375, "step": 74970 }, { "epoch": 2.189048335060802, "grad_norm": 0.5533641717317606, "learning_rate": 1.5017572316842391e-05, "loss": 0.2677, "step": 74975 }, { "epoch": 2.1891943183311193, "grad_norm": 0.551828561112284, "learning_rate": 1.5014868883482022e-05, "loss": 0.2457, "step": 74980 }, { "epoch": 2.1893403016014363, "grad_norm": 0.5571395276207849, "learning_rate": 1.5012165450121654e-05, "loss": 0.2456, "step": 74985 }, { "epoch": 2.189486284871754, "grad_norm": 0.6015623670138602, "learning_rate": 1.5009462016761288e-05, "loss": 0.2686, "step": 74990 }, { "epoch": 2.189632268142071, "grad_norm": 0.5851105054141674, "learning_rate": 1.500675858340092e-05, "loss": 0.2485, "step": 74995 }, { "epoch": 2.1897782514123882, "grad_norm": 0.5888209468263712, "learning_rate": 1.5004055150040553e-05, "loss": 0.2609, "step": 75000 }, { "epoch": 2.1899242346827053, "grad_norm": 0.583768417830517, "learning_rate": 1.5001351716680185e-05, "loss": 0.2442, "step": 75005 }, { "epoch": 2.1900702179530227, "grad_norm": 0.5098484224708538, "learning_rate": 1.4998648283319816e-05, "loss": 0.2424, "step": 75010 }, { "epoch": 2.1902162012233397, "grad_norm": 0.5377284847803188, "learning_rate": 1.4995944849959448e-05, "loss": 0.2709, "step": 75015 }, { "epoch": 2.190362184493657, "grad_norm": 0.6009425789315197, "learning_rate": 1.4993241416599082e-05, "loss": 0.2513, "step": 75020 }, { "epoch": 2.190508167763974, "grad_norm": 0.5660814510476646, "learning_rate": 1.4990537983238714e-05, "loss": 0.2498, "step": 75025 }, { "epoch": 2.1906541510342916, "grad_norm": 0.5018590051758617, "learning_rate": 1.4987834549878347e-05, "loss": 0.23, "step": 75030 }, { "epoch": 2.1908001343046086, "grad_norm": 0.5799446808117296, "learning_rate": 1.4985131116517977e-05, "loss": 0.2475, "step": 75035 }, { "epoch": 2.190946117574926, "grad_norm": 0.580826765640956, "learning_rate": 1.498242768315761e-05, "loss": 0.2624, "step": 75040 }, { "epoch": 2.191092100845243, "grad_norm": 0.5878733406668918, "learning_rate": 1.4979724249797245e-05, "loss": 0.2594, "step": 75045 }, { "epoch": 2.1912380841155605, "grad_norm": 0.5560282682710672, "learning_rate": 1.4977020816436876e-05, "loss": 0.2435, "step": 75050 }, { "epoch": 2.1913840673858775, "grad_norm": 0.5931945300281216, "learning_rate": 1.4974317383076508e-05, "loss": 0.2525, "step": 75055 }, { "epoch": 2.191530050656195, "grad_norm": 0.5594808589963173, "learning_rate": 1.497161394971614e-05, "loss": 0.2431, "step": 75060 }, { "epoch": 2.191676033926512, "grad_norm": 0.5743548151534686, "learning_rate": 1.4968910516355771e-05, "loss": 0.2597, "step": 75065 }, { "epoch": 2.191822017196829, "grad_norm": 0.5979244222940634, "learning_rate": 1.4966207082995403e-05, "loss": 0.2486, "step": 75070 }, { "epoch": 2.1919680004671465, "grad_norm": 0.575537323223527, "learning_rate": 1.496350364963504e-05, "loss": 0.2656, "step": 75075 }, { "epoch": 2.192113983737464, "grad_norm": 0.5490344914581432, "learning_rate": 1.496080021627467e-05, "loss": 0.2624, "step": 75080 }, { "epoch": 2.192259967007781, "grad_norm": 0.5877476582844932, "learning_rate": 1.4958096782914302e-05, "loss": 0.247, "step": 75085 }, { "epoch": 2.192405950278098, "grad_norm": 0.6309856132576808, "learning_rate": 1.4955393349553934e-05, "loss": 0.2449, "step": 75090 }, { "epoch": 2.1925519335484154, "grad_norm": 0.5658951629008133, "learning_rate": 1.4952689916193565e-05, "loss": 0.244, "step": 75095 }, { "epoch": 2.1926979168187324, "grad_norm": 0.5967723394250439, "learning_rate": 1.4949986482833197e-05, "loss": 0.2506, "step": 75100 }, { "epoch": 2.19284390008905, "grad_norm": 0.5421189240537752, "learning_rate": 1.4947283049472831e-05, "loss": 0.2625, "step": 75105 }, { "epoch": 2.192989883359367, "grad_norm": 0.6204096015054732, "learning_rate": 1.4944579616112464e-05, "loss": 0.2646, "step": 75110 }, { "epoch": 2.1931358666296843, "grad_norm": 0.5767502113510672, "learning_rate": 1.4941876182752096e-05, "loss": 0.2554, "step": 75115 }, { "epoch": 2.1932818499000013, "grad_norm": 0.5417068696276236, "learning_rate": 1.4939172749391728e-05, "loss": 0.2532, "step": 75120 }, { "epoch": 2.1934278331703188, "grad_norm": 0.564729564923428, "learning_rate": 1.4936469316031359e-05, "loss": 0.2623, "step": 75125 }, { "epoch": 2.1935738164406358, "grad_norm": 0.6253235915820033, "learning_rate": 1.4933765882670995e-05, "loss": 0.2482, "step": 75130 }, { "epoch": 2.193719799710953, "grad_norm": 0.5523582380230367, "learning_rate": 1.4931062449310625e-05, "loss": 0.2447, "step": 75135 }, { "epoch": 2.19386578298127, "grad_norm": 0.5586066094106904, "learning_rate": 1.4928359015950258e-05, "loss": 0.2512, "step": 75140 }, { "epoch": 2.1940117662515877, "grad_norm": 0.5119051211718079, "learning_rate": 1.492565558258989e-05, "loss": 0.2353, "step": 75145 }, { "epoch": 2.1941577495219047, "grad_norm": 0.5834001486782241, "learning_rate": 1.4922952149229522e-05, "loss": 0.2606, "step": 75150 }, { "epoch": 2.194303732792222, "grad_norm": 0.5304365243079389, "learning_rate": 1.4920248715869153e-05, "loss": 0.238, "step": 75155 }, { "epoch": 2.194449716062539, "grad_norm": 0.5543751360177794, "learning_rate": 1.4917545282508789e-05, "loss": 0.2444, "step": 75160 }, { "epoch": 2.1945956993328566, "grad_norm": 0.5985003995618996, "learning_rate": 1.491484184914842e-05, "loss": 0.2708, "step": 75165 }, { "epoch": 2.1947416826031736, "grad_norm": 0.5557298669217448, "learning_rate": 1.4912138415788052e-05, "loss": 0.2581, "step": 75170 }, { "epoch": 2.194887665873491, "grad_norm": 0.54283184750067, "learning_rate": 1.4909434982427684e-05, "loss": 0.2563, "step": 75175 }, { "epoch": 2.195033649143808, "grad_norm": 0.6321521189022249, "learning_rate": 1.4906731549067316e-05, "loss": 0.2734, "step": 75180 }, { "epoch": 2.1951796324141255, "grad_norm": 0.5647806821794503, "learning_rate": 1.4904028115706947e-05, "loss": 0.2526, "step": 75185 }, { "epoch": 2.1953256156844425, "grad_norm": 0.5611561061152796, "learning_rate": 1.4901324682346583e-05, "loss": 0.243, "step": 75190 }, { "epoch": 2.19547159895476, "grad_norm": 0.6187712528470994, "learning_rate": 1.4898621248986213e-05, "loss": 0.2427, "step": 75195 }, { "epoch": 2.195617582225077, "grad_norm": 0.5773371065483344, "learning_rate": 1.4895917815625845e-05, "loss": 0.2763, "step": 75200 }, { "epoch": 2.1957635654953944, "grad_norm": 0.5457026323027673, "learning_rate": 1.4893214382265478e-05, "loss": 0.2571, "step": 75205 }, { "epoch": 2.1959095487657114, "grad_norm": 0.5812740504336206, "learning_rate": 1.4890510948905108e-05, "loss": 0.2507, "step": 75210 }, { "epoch": 2.196055532036029, "grad_norm": 0.5823649621176515, "learning_rate": 1.4887807515544744e-05, "loss": 0.2458, "step": 75215 }, { "epoch": 2.196201515306346, "grad_norm": 0.583977275404151, "learning_rate": 1.4885104082184376e-05, "loss": 0.2663, "step": 75220 }, { "epoch": 2.196347498576663, "grad_norm": 0.6194647420180842, "learning_rate": 1.4882400648824007e-05, "loss": 0.2576, "step": 75225 }, { "epoch": 2.1964934818469803, "grad_norm": 0.5598862498904906, "learning_rate": 1.487969721546364e-05, "loss": 0.2479, "step": 75230 }, { "epoch": 2.1966394651172974, "grad_norm": 0.5462249401490842, "learning_rate": 1.4876993782103272e-05, "loss": 0.2568, "step": 75235 }, { "epoch": 2.196785448387615, "grad_norm": 0.6028160679395511, "learning_rate": 1.4874290348742902e-05, "loss": 0.2499, "step": 75240 }, { "epoch": 2.196931431657932, "grad_norm": 0.560284937895095, "learning_rate": 1.4871586915382538e-05, "loss": 0.2576, "step": 75245 }, { "epoch": 2.1970774149282493, "grad_norm": 0.5479476921023287, "learning_rate": 1.486888348202217e-05, "loss": 0.2301, "step": 75250 }, { "epoch": 2.1972233981985663, "grad_norm": 0.6246667754417727, "learning_rate": 1.4866180048661801e-05, "loss": 0.2674, "step": 75255 }, { "epoch": 2.1973693814688837, "grad_norm": 0.5781637334144876, "learning_rate": 1.4863476615301433e-05, "loss": 0.2495, "step": 75260 }, { "epoch": 2.1975153647392007, "grad_norm": 0.5550721786934438, "learning_rate": 1.4860773181941066e-05, "loss": 0.2374, "step": 75265 }, { "epoch": 2.197661348009518, "grad_norm": 0.5874744425230747, "learning_rate": 1.4858069748580696e-05, "loss": 0.2355, "step": 75270 }, { "epoch": 2.197807331279835, "grad_norm": 0.6077474267850084, "learning_rate": 1.4855366315220332e-05, "loss": 0.2632, "step": 75275 }, { "epoch": 2.1979533145501526, "grad_norm": 0.6047366521115164, "learning_rate": 1.4852662881859963e-05, "loss": 0.28, "step": 75280 }, { "epoch": 2.1980992978204696, "grad_norm": 0.5601987450903184, "learning_rate": 1.4849959448499595e-05, "loss": 0.2436, "step": 75285 }, { "epoch": 2.198245281090787, "grad_norm": 0.5785432778128001, "learning_rate": 1.4847256015139227e-05, "loss": 0.2453, "step": 75290 }, { "epoch": 2.198391264361104, "grad_norm": 0.5688635160047462, "learning_rate": 1.484455258177886e-05, "loss": 0.2592, "step": 75295 }, { "epoch": 2.1985372476314216, "grad_norm": 0.5807436666075746, "learning_rate": 1.4841849148418493e-05, "loss": 0.2674, "step": 75300 }, { "epoch": 2.1986832309017386, "grad_norm": 0.5987779064971097, "learning_rate": 1.4839145715058126e-05, "loss": 0.2634, "step": 75305 }, { "epoch": 2.198829214172056, "grad_norm": 0.5704481550821153, "learning_rate": 1.4836442281697756e-05, "loss": 0.2584, "step": 75310 }, { "epoch": 2.198975197442373, "grad_norm": 0.5731946167607329, "learning_rate": 1.4833738848337389e-05, "loss": 0.2639, "step": 75315 }, { "epoch": 2.1991211807126905, "grad_norm": 0.5911870178587234, "learning_rate": 1.4831035414977021e-05, "loss": 0.2605, "step": 75320 }, { "epoch": 2.1992671639830075, "grad_norm": 0.5883729988960889, "learning_rate": 1.4828331981616653e-05, "loss": 0.2465, "step": 75325 }, { "epoch": 2.199413147253325, "grad_norm": 0.5549039049403741, "learning_rate": 1.4825628548256287e-05, "loss": 0.2694, "step": 75330 }, { "epoch": 2.199559130523642, "grad_norm": 0.5673046411352697, "learning_rate": 1.482292511489592e-05, "loss": 0.2542, "step": 75335 }, { "epoch": 2.1997051137939594, "grad_norm": 0.6057756362815799, "learning_rate": 1.482022168153555e-05, "loss": 0.2593, "step": 75340 }, { "epoch": 2.1998510970642764, "grad_norm": 0.5595837391946002, "learning_rate": 1.4817518248175183e-05, "loss": 0.2745, "step": 75345 }, { "epoch": 2.199997080334594, "grad_norm": 0.5560117058173821, "learning_rate": 1.4814814814814815e-05, "loss": 0.2487, "step": 75350 }, { "epoch": 2.200143063604911, "grad_norm": 0.5610447929519087, "learning_rate": 1.4812111381454447e-05, "loss": 0.2511, "step": 75355 }, { "epoch": 2.2002890468752283, "grad_norm": 0.5859823918588042, "learning_rate": 1.4809407948094081e-05, "loss": 0.2532, "step": 75360 }, { "epoch": 2.2004350301455453, "grad_norm": 0.5796185736845797, "learning_rate": 1.4806704514733714e-05, "loss": 0.2596, "step": 75365 }, { "epoch": 2.2005810134158628, "grad_norm": 0.5491541624909227, "learning_rate": 1.4804001081373344e-05, "loss": 0.2566, "step": 75370 }, { "epoch": 2.2007269966861798, "grad_norm": 0.5464570134640085, "learning_rate": 1.4801297648012977e-05, "loss": 0.2479, "step": 75375 }, { "epoch": 2.2008729799564968, "grad_norm": 0.5903274493166061, "learning_rate": 1.4798594214652609e-05, "loss": 0.2658, "step": 75380 }, { "epoch": 2.2010189632268142, "grad_norm": 0.6432770895942466, "learning_rate": 1.4795890781292243e-05, "loss": 0.2648, "step": 75385 }, { "epoch": 2.2011649464971312, "grad_norm": 0.5439082578847364, "learning_rate": 1.4793187347931875e-05, "loss": 0.2406, "step": 75390 }, { "epoch": 2.2013109297674487, "grad_norm": 0.6028196079863842, "learning_rate": 1.4790483914571508e-05, "loss": 0.2703, "step": 75395 }, { "epoch": 2.2014569130377657, "grad_norm": 0.5182765781666803, "learning_rate": 1.4787780481211138e-05, "loss": 0.2445, "step": 75400 }, { "epoch": 2.201602896308083, "grad_norm": 0.576433110213157, "learning_rate": 1.478507704785077e-05, "loss": 0.2539, "step": 75405 }, { "epoch": 2.2017488795784, "grad_norm": 0.601085865213005, "learning_rate": 1.4782373614490403e-05, "loss": 0.2652, "step": 75410 }, { "epoch": 2.2018948628487176, "grad_norm": 0.5786465819965285, "learning_rate": 1.4779670181130037e-05, "loss": 0.26, "step": 75415 }, { "epoch": 2.2020408461190346, "grad_norm": 0.5426618152516524, "learning_rate": 1.4776966747769669e-05, "loss": 0.2321, "step": 75420 }, { "epoch": 2.202186829389352, "grad_norm": 0.61535441159367, "learning_rate": 1.4774263314409301e-05, "loss": 0.2557, "step": 75425 }, { "epoch": 2.202332812659669, "grad_norm": 0.5899510090846796, "learning_rate": 1.4771559881048932e-05, "loss": 0.2475, "step": 75430 }, { "epoch": 2.2024787959299865, "grad_norm": 0.5978616242716219, "learning_rate": 1.4768856447688564e-05, "loss": 0.2433, "step": 75435 }, { "epoch": 2.2026247792003035, "grad_norm": 0.5695241166323595, "learning_rate": 1.4766153014328197e-05, "loss": 0.2494, "step": 75440 }, { "epoch": 2.202770762470621, "grad_norm": 0.5681125647278685, "learning_rate": 1.476344958096783e-05, "loss": 0.2626, "step": 75445 }, { "epoch": 2.202916745740938, "grad_norm": 0.5329112475868133, "learning_rate": 1.4760746147607463e-05, "loss": 0.2611, "step": 75450 }, { "epoch": 2.2030627290112554, "grad_norm": 0.5538698050586417, "learning_rate": 1.4758042714247095e-05, "loss": 0.2522, "step": 75455 }, { "epoch": 2.2032087122815724, "grad_norm": 0.5713365905307517, "learning_rate": 1.4755339280886726e-05, "loss": 0.2542, "step": 75460 }, { "epoch": 2.20335469555189, "grad_norm": 0.6001854419976358, "learning_rate": 1.4752635847526358e-05, "loss": 0.2436, "step": 75465 }, { "epoch": 2.203500678822207, "grad_norm": 0.5806409355529873, "learning_rate": 1.4749932414165992e-05, "loss": 0.2484, "step": 75470 }, { "epoch": 2.2036466620925244, "grad_norm": 0.5279842236744298, "learning_rate": 1.4747228980805625e-05, "loss": 0.2593, "step": 75475 }, { "epoch": 2.2037926453628414, "grad_norm": 0.6051396275113174, "learning_rate": 1.4744525547445257e-05, "loss": 0.2486, "step": 75480 }, { "epoch": 2.203938628633159, "grad_norm": 0.6052030892215372, "learning_rate": 1.4741822114084888e-05, "loss": 0.2637, "step": 75485 }, { "epoch": 2.204084611903476, "grad_norm": 0.6139146839960739, "learning_rate": 1.473911868072452e-05, "loss": 0.2467, "step": 75490 }, { "epoch": 2.2042305951737933, "grad_norm": 0.6118980765764774, "learning_rate": 1.4736415247364152e-05, "loss": 0.2388, "step": 75495 }, { "epoch": 2.2043765784441103, "grad_norm": 0.5750311961308473, "learning_rate": 1.4733711814003786e-05, "loss": 0.2592, "step": 75500 }, { "epoch": 2.2045225617144277, "grad_norm": 0.5265040593896957, "learning_rate": 1.4731008380643418e-05, "loss": 0.2716, "step": 75505 }, { "epoch": 2.2046685449847447, "grad_norm": 0.5518345095841022, "learning_rate": 1.472830494728305e-05, "loss": 0.255, "step": 75510 }, { "epoch": 2.2048145282550617, "grad_norm": 0.5479226988349051, "learning_rate": 1.4725601513922681e-05, "loss": 0.2347, "step": 75515 }, { "epoch": 2.204960511525379, "grad_norm": 0.612159804830206, "learning_rate": 1.4722898080562314e-05, "loss": 0.2533, "step": 75520 }, { "epoch": 2.2051064947956966, "grad_norm": 0.604600152074422, "learning_rate": 1.4720194647201946e-05, "loss": 0.2433, "step": 75525 }, { "epoch": 2.2052524780660137, "grad_norm": 0.5612575945825772, "learning_rate": 1.471749121384158e-05, "loss": 0.2467, "step": 75530 }, { "epoch": 2.2053984613363307, "grad_norm": 0.5666207088360938, "learning_rate": 1.4714787780481212e-05, "loss": 0.2757, "step": 75535 }, { "epoch": 2.205544444606648, "grad_norm": 0.5669457013237722, "learning_rate": 1.4712084347120845e-05, "loss": 0.269, "step": 75540 }, { "epoch": 2.205690427876965, "grad_norm": 0.5891257378066814, "learning_rate": 1.4709380913760475e-05, "loss": 0.2359, "step": 75545 }, { "epoch": 2.2058364111472826, "grad_norm": 0.5811245157251452, "learning_rate": 1.4706677480400108e-05, "loss": 0.2569, "step": 75550 }, { "epoch": 2.2059823944175996, "grad_norm": 0.5803691498844337, "learning_rate": 1.4703974047039742e-05, "loss": 0.2504, "step": 75555 }, { "epoch": 2.206128377687917, "grad_norm": 0.5660208608510714, "learning_rate": 1.4701270613679374e-05, "loss": 0.2534, "step": 75560 }, { "epoch": 2.206274360958234, "grad_norm": 0.5687522356033089, "learning_rate": 1.4698567180319006e-05, "loss": 0.2668, "step": 75565 }, { "epoch": 2.2064203442285515, "grad_norm": 0.6179378225608559, "learning_rate": 1.4695863746958639e-05, "loss": 0.2574, "step": 75570 }, { "epoch": 2.2065663274988685, "grad_norm": 0.6110433613254319, "learning_rate": 1.469316031359827e-05, "loss": 0.2458, "step": 75575 }, { "epoch": 2.206712310769186, "grad_norm": 0.5506615522096009, "learning_rate": 1.4690456880237902e-05, "loss": 0.2504, "step": 75580 }, { "epoch": 2.206858294039503, "grad_norm": 0.5877233977463794, "learning_rate": 1.4687753446877536e-05, "loss": 0.2574, "step": 75585 }, { "epoch": 2.2070042773098204, "grad_norm": 0.6042575413414961, "learning_rate": 1.4685050013517168e-05, "loss": 0.2571, "step": 75590 }, { "epoch": 2.2071502605801374, "grad_norm": 0.5707912666973306, "learning_rate": 1.46823465801568e-05, "loss": 0.2537, "step": 75595 }, { "epoch": 2.207296243850455, "grad_norm": 0.67578521376126, "learning_rate": 1.4679643146796433e-05, "loss": 0.2512, "step": 75600 }, { "epoch": 2.207442227120772, "grad_norm": 0.6222373907983831, "learning_rate": 1.4676939713436063e-05, "loss": 0.2631, "step": 75605 }, { "epoch": 2.2075882103910893, "grad_norm": 0.5803396471751274, "learning_rate": 1.4674236280075695e-05, "loss": 0.2549, "step": 75610 }, { "epoch": 2.2077341936614063, "grad_norm": 0.5555276303309913, "learning_rate": 1.467153284671533e-05, "loss": 0.2505, "step": 75615 }, { "epoch": 2.207880176931724, "grad_norm": 0.5898951388999704, "learning_rate": 1.4668829413354962e-05, "loss": 0.2509, "step": 75620 }, { "epoch": 2.208026160202041, "grad_norm": 0.5412660775622665, "learning_rate": 1.4666125979994594e-05, "loss": 0.2468, "step": 75625 }, { "epoch": 2.2081721434723582, "grad_norm": 0.6072634181068017, "learning_rate": 1.4663422546634226e-05, "loss": 0.2717, "step": 75630 }, { "epoch": 2.2083181267426752, "grad_norm": 0.5997146722725505, "learning_rate": 1.4660719113273857e-05, "loss": 0.2485, "step": 75635 }, { "epoch": 2.2084641100129927, "grad_norm": 0.5916235269788785, "learning_rate": 1.4658015679913493e-05, "loss": 0.2527, "step": 75640 }, { "epoch": 2.2086100932833097, "grad_norm": 0.5735987142851369, "learning_rate": 1.4655312246553123e-05, "loss": 0.2761, "step": 75645 }, { "epoch": 2.208756076553627, "grad_norm": 0.6165479112966147, "learning_rate": 1.4652608813192756e-05, "loss": 0.272, "step": 75650 }, { "epoch": 2.208902059823944, "grad_norm": 0.5881031572248119, "learning_rate": 1.4649905379832388e-05, "loss": 0.2677, "step": 75655 }, { "epoch": 2.2090480430942616, "grad_norm": 0.5988158930796715, "learning_rate": 1.464720194647202e-05, "loss": 0.2594, "step": 75660 }, { "epoch": 2.2091940263645786, "grad_norm": 0.6207978498879758, "learning_rate": 1.4644498513111651e-05, "loss": 0.2749, "step": 75665 }, { "epoch": 2.2093400096348956, "grad_norm": 0.6201972709209013, "learning_rate": 1.4641795079751287e-05, "loss": 0.269, "step": 75670 }, { "epoch": 2.209485992905213, "grad_norm": 0.6112471484416201, "learning_rate": 1.4639091646390917e-05, "loss": 0.2772, "step": 75675 }, { "epoch": 2.20963197617553, "grad_norm": 0.6388346750892286, "learning_rate": 1.463638821303055e-05, "loss": 0.2538, "step": 75680 }, { "epoch": 2.2097779594458475, "grad_norm": 0.5708642201260592, "learning_rate": 1.4633684779670182e-05, "loss": 0.253, "step": 75685 }, { "epoch": 2.2099239427161645, "grad_norm": 0.5508543254535279, "learning_rate": 1.4630981346309813e-05, "loss": 0.2495, "step": 75690 }, { "epoch": 2.210069925986482, "grad_norm": 0.5555600421535158, "learning_rate": 1.4628277912949445e-05, "loss": 0.2645, "step": 75695 }, { "epoch": 2.210215909256799, "grad_norm": 0.6056720089853136, "learning_rate": 1.462557447958908e-05, "loss": 0.2501, "step": 75700 }, { "epoch": 2.2103618925271165, "grad_norm": 0.5609355339922244, "learning_rate": 1.4622871046228711e-05, "loss": 0.2488, "step": 75705 }, { "epoch": 2.2105078757974335, "grad_norm": 0.6205414463531593, "learning_rate": 1.4620167612868343e-05, "loss": 0.2497, "step": 75710 }, { "epoch": 2.210653859067751, "grad_norm": 0.5548011366613248, "learning_rate": 1.4617464179507976e-05, "loss": 0.2394, "step": 75715 }, { "epoch": 2.210799842338068, "grad_norm": 0.6012230022203366, "learning_rate": 1.4614760746147606e-05, "loss": 0.2592, "step": 75720 }, { "epoch": 2.2109458256083854, "grad_norm": 0.6444802849802475, "learning_rate": 1.4612057312787242e-05, "loss": 0.2623, "step": 75725 }, { "epoch": 2.2110918088787024, "grad_norm": 0.563362957600398, "learning_rate": 1.4609353879426873e-05, "loss": 0.25, "step": 75730 }, { "epoch": 2.21123779214902, "grad_norm": 0.6124429214423689, "learning_rate": 1.4606650446066505e-05, "loss": 0.257, "step": 75735 }, { "epoch": 2.211383775419337, "grad_norm": 0.5417488388231996, "learning_rate": 1.4603947012706137e-05, "loss": 0.246, "step": 75740 }, { "epoch": 2.2115297586896543, "grad_norm": 0.5541673450444099, "learning_rate": 1.460124357934577e-05, "loss": 0.253, "step": 75745 }, { "epoch": 2.2116757419599713, "grad_norm": 0.5310810482033317, "learning_rate": 1.45985401459854e-05, "loss": 0.2543, "step": 75750 }, { "epoch": 2.2118217252302887, "grad_norm": 0.6212254802524573, "learning_rate": 1.4595836712625036e-05, "loss": 0.272, "step": 75755 }, { "epoch": 2.2119677085006058, "grad_norm": 0.5440894779753225, "learning_rate": 1.4593133279264667e-05, "loss": 0.2539, "step": 75760 }, { "epoch": 2.212113691770923, "grad_norm": 0.5522842082257247, "learning_rate": 1.4590429845904299e-05, "loss": 0.2547, "step": 75765 }, { "epoch": 2.21225967504124, "grad_norm": 0.5462947038869729, "learning_rate": 1.4587726412543931e-05, "loss": 0.2402, "step": 75770 }, { "epoch": 2.2124056583115577, "grad_norm": 0.6026442520108263, "learning_rate": 1.4585022979183564e-05, "loss": 0.2523, "step": 75775 }, { "epoch": 2.2125516415818747, "grad_norm": 0.538361940309054, "learning_rate": 1.4582319545823194e-05, "loss": 0.2556, "step": 75780 }, { "epoch": 2.212697624852192, "grad_norm": 0.5667701947544312, "learning_rate": 1.457961611246283e-05, "loss": 0.2466, "step": 75785 }, { "epoch": 2.212843608122509, "grad_norm": 0.6074708662416873, "learning_rate": 1.457691267910246e-05, "loss": 0.2594, "step": 75790 }, { "epoch": 2.2129895913928266, "grad_norm": 0.5982643697692565, "learning_rate": 1.4574209245742093e-05, "loss": 0.2471, "step": 75795 }, { "epoch": 2.2131355746631436, "grad_norm": 0.5679950353733682, "learning_rate": 1.4571505812381725e-05, "loss": 0.2433, "step": 75800 }, { "epoch": 2.2132815579334606, "grad_norm": 0.5846658690603724, "learning_rate": 1.4568802379021358e-05, "loss": 0.2552, "step": 75805 }, { "epoch": 2.213427541203778, "grad_norm": 0.5343683173526073, "learning_rate": 1.4566098945660992e-05, "loss": 0.2447, "step": 75810 }, { "epoch": 2.2135735244740955, "grad_norm": 0.54031559945684, "learning_rate": 1.4563395512300624e-05, "loss": 0.2502, "step": 75815 }, { "epoch": 2.2137195077444125, "grad_norm": 0.5233259422392391, "learning_rate": 1.4560692078940254e-05, "loss": 0.2543, "step": 75820 }, { "epoch": 2.2138654910147295, "grad_norm": 0.6112766308186981, "learning_rate": 1.4557988645579887e-05, "loss": 0.2573, "step": 75825 }, { "epoch": 2.214011474285047, "grad_norm": 0.56310085720929, "learning_rate": 1.4555285212219519e-05, "loss": 0.2654, "step": 75830 }, { "epoch": 2.214157457555364, "grad_norm": 0.5355253502312921, "learning_rate": 1.4552581778859151e-05, "loss": 0.2332, "step": 75835 }, { "epoch": 2.2143034408256814, "grad_norm": 0.5973250235642046, "learning_rate": 1.4549878345498785e-05, "loss": 0.2533, "step": 75840 }, { "epoch": 2.2144494240959984, "grad_norm": 0.5594838418047683, "learning_rate": 1.4547174912138418e-05, "loss": 0.2541, "step": 75845 }, { "epoch": 2.214595407366316, "grad_norm": 0.6023267428132459, "learning_rate": 1.4544471478778048e-05, "loss": 0.2536, "step": 75850 }, { "epoch": 2.214741390636633, "grad_norm": 0.5431506433311873, "learning_rate": 1.454176804541768e-05, "loss": 0.2351, "step": 75855 }, { "epoch": 2.2148873739069503, "grad_norm": 0.5320011829230361, "learning_rate": 1.4539064612057313e-05, "loss": 0.2473, "step": 75860 }, { "epoch": 2.2150333571772673, "grad_norm": 0.5441513426026825, "learning_rate": 1.4536361178696944e-05, "loss": 0.2591, "step": 75865 }, { "epoch": 2.215179340447585, "grad_norm": 0.5672559915243128, "learning_rate": 1.453365774533658e-05, "loss": 0.2575, "step": 75870 }, { "epoch": 2.215325323717902, "grad_norm": 0.5271969902177637, "learning_rate": 1.4530954311976212e-05, "loss": 0.2405, "step": 75875 }, { "epoch": 2.2154713069882193, "grad_norm": 0.6462499933074022, "learning_rate": 1.4528250878615842e-05, "loss": 0.2532, "step": 75880 }, { "epoch": 2.2156172902585363, "grad_norm": 0.5811209401879236, "learning_rate": 1.4525547445255475e-05, "loss": 0.2566, "step": 75885 }, { "epoch": 2.2157632735288537, "grad_norm": 0.5638128284942487, "learning_rate": 1.4522844011895107e-05, "loss": 0.232, "step": 75890 }, { "epoch": 2.2159092567991707, "grad_norm": 0.5940196534468462, "learning_rate": 1.4520140578534741e-05, "loss": 0.264, "step": 75895 }, { "epoch": 2.216055240069488, "grad_norm": 0.595901778263143, "learning_rate": 1.4517437145174373e-05, "loss": 0.2428, "step": 75900 }, { "epoch": 2.216201223339805, "grad_norm": 0.5072331148415142, "learning_rate": 1.4514733711814006e-05, "loss": 0.2372, "step": 75905 }, { "epoch": 2.2163472066101226, "grad_norm": 0.5745168106110466, "learning_rate": 1.4512030278453636e-05, "loss": 0.2595, "step": 75910 }, { "epoch": 2.2164931898804396, "grad_norm": 0.5246085862798201, "learning_rate": 1.4509326845093269e-05, "loss": 0.2604, "step": 75915 }, { "epoch": 2.216639173150757, "grad_norm": 0.6297053617419787, "learning_rate": 1.45066234117329e-05, "loss": 0.2618, "step": 75920 }, { "epoch": 2.216785156421074, "grad_norm": 0.6027563168740505, "learning_rate": 1.4503919978372535e-05, "loss": 0.2516, "step": 75925 }, { "epoch": 2.2169311396913916, "grad_norm": 0.5444846285832716, "learning_rate": 1.4501216545012167e-05, "loss": 0.2534, "step": 75930 }, { "epoch": 2.2170771229617086, "grad_norm": 0.6032822580643906, "learning_rate": 1.4498513111651798e-05, "loss": 0.2563, "step": 75935 }, { "epoch": 2.217223106232026, "grad_norm": 0.5735185109817439, "learning_rate": 1.449580967829143e-05, "loss": 0.2619, "step": 75940 }, { "epoch": 2.217369089502343, "grad_norm": 0.5780993937350557, "learning_rate": 1.4493106244931062e-05, "loss": 0.2774, "step": 75945 }, { "epoch": 2.2175150727726605, "grad_norm": 0.5566991889840357, "learning_rate": 1.4490402811570695e-05, "loss": 0.2498, "step": 75950 }, { "epoch": 2.2176610560429775, "grad_norm": 0.6257926494258851, "learning_rate": 1.4487699378210329e-05, "loss": 0.2518, "step": 75955 }, { "epoch": 2.2178070393132945, "grad_norm": 0.5712961645556841, "learning_rate": 1.4484995944849961e-05, "loss": 0.2692, "step": 75960 }, { "epoch": 2.217953022583612, "grad_norm": 0.5539664849780975, "learning_rate": 1.4482292511489592e-05, "loss": 0.2624, "step": 75965 }, { "epoch": 2.218099005853929, "grad_norm": 0.6716041322336812, "learning_rate": 1.4479589078129224e-05, "loss": 0.2728, "step": 75970 }, { "epoch": 2.2182449891242464, "grad_norm": 0.570559442655077, "learning_rate": 1.4476885644768856e-05, "loss": 0.2631, "step": 75975 }, { "epoch": 2.2183909723945634, "grad_norm": 0.5770049440242825, "learning_rate": 1.447418221140849e-05, "loss": 0.2495, "step": 75980 }, { "epoch": 2.218536955664881, "grad_norm": 0.5816461994902232, "learning_rate": 1.4471478778048123e-05, "loss": 0.2532, "step": 75985 }, { "epoch": 2.218682938935198, "grad_norm": 0.5605304145589717, "learning_rate": 1.4468775344687755e-05, "loss": 0.2535, "step": 75990 }, { "epoch": 2.2188289222055153, "grad_norm": 0.5602298748899535, "learning_rate": 1.4466071911327386e-05, "loss": 0.2476, "step": 75995 }, { "epoch": 2.2189749054758323, "grad_norm": 0.5813763760535257, "learning_rate": 1.4463368477967018e-05, "loss": 0.2642, "step": 76000 }, { "epoch": 2.2191208887461498, "grad_norm": 0.5593778256245587, "learning_rate": 1.446066504460665e-05, "loss": 0.259, "step": 76005 }, { "epoch": 2.2192668720164668, "grad_norm": 0.5611529669154633, "learning_rate": 1.4457961611246284e-05, "loss": 0.2528, "step": 76010 }, { "epoch": 2.2194128552867842, "grad_norm": 0.624840369778911, "learning_rate": 1.4455258177885917e-05, "loss": 0.2701, "step": 76015 }, { "epoch": 2.2195588385571012, "grad_norm": 0.5666295346697623, "learning_rate": 1.4452554744525549e-05, "loss": 0.256, "step": 76020 }, { "epoch": 2.2197048218274187, "grad_norm": 0.5098367630950111, "learning_rate": 1.444985131116518e-05, "loss": 0.2641, "step": 76025 }, { "epoch": 2.2198508050977357, "grad_norm": 0.5815864393133685, "learning_rate": 1.4447147877804812e-05, "loss": 0.2442, "step": 76030 }, { "epoch": 2.219996788368053, "grad_norm": 0.5783172640570711, "learning_rate": 1.4444444444444444e-05, "loss": 0.264, "step": 76035 }, { "epoch": 2.22014277163837, "grad_norm": 0.5651745073887137, "learning_rate": 1.4441741011084078e-05, "loss": 0.2506, "step": 76040 }, { "epoch": 2.2202887549086876, "grad_norm": 0.6102984689936624, "learning_rate": 1.443903757772371e-05, "loss": 0.2526, "step": 76045 }, { "epoch": 2.2204347381790046, "grad_norm": 0.5802039625339187, "learning_rate": 1.4436334144363343e-05, "loss": 0.2622, "step": 76050 }, { "epoch": 2.220580721449322, "grad_norm": 0.5585208031802469, "learning_rate": 1.4433630711002973e-05, "loss": 0.2495, "step": 76055 }, { "epoch": 2.220726704719639, "grad_norm": 0.5776817091447642, "learning_rate": 1.4430927277642606e-05, "loss": 0.2504, "step": 76060 }, { "epoch": 2.2208726879899565, "grad_norm": 0.5989137570550161, "learning_rate": 1.442822384428224e-05, "loss": 0.2407, "step": 76065 }, { "epoch": 2.2210186712602735, "grad_norm": 0.5470200248112326, "learning_rate": 1.4425520410921872e-05, "loss": 0.2531, "step": 76070 }, { "epoch": 2.221164654530591, "grad_norm": 0.5397912741292019, "learning_rate": 1.4422816977561504e-05, "loss": 0.2524, "step": 76075 }, { "epoch": 2.221310637800908, "grad_norm": 0.5452737524397212, "learning_rate": 1.4420113544201137e-05, "loss": 0.2543, "step": 76080 }, { "epoch": 2.2214566210712254, "grad_norm": 0.5420041670193361, "learning_rate": 1.4417410110840767e-05, "loss": 0.2615, "step": 76085 }, { "epoch": 2.2216026043415424, "grad_norm": 0.5711873940564842, "learning_rate": 1.44147066774804e-05, "loss": 0.2567, "step": 76090 }, { "epoch": 2.2217485876118594, "grad_norm": 0.5607769268076561, "learning_rate": 1.4412003244120034e-05, "loss": 0.2706, "step": 76095 }, { "epoch": 2.221894570882177, "grad_norm": 0.5498552921881282, "learning_rate": 1.4409299810759666e-05, "loss": 0.2433, "step": 76100 }, { "epoch": 2.2220405541524944, "grad_norm": 0.5414384748984886, "learning_rate": 1.4406596377399298e-05, "loss": 0.2432, "step": 76105 }, { "epoch": 2.2221865374228114, "grad_norm": 0.5740994479973864, "learning_rate": 1.440389294403893e-05, "loss": 0.2564, "step": 76110 }, { "epoch": 2.2223325206931284, "grad_norm": 0.6042981228138992, "learning_rate": 1.4401189510678561e-05, "loss": 0.267, "step": 76115 }, { "epoch": 2.222478503963446, "grad_norm": 0.555479837459434, "learning_rate": 1.4398486077318194e-05, "loss": 0.2564, "step": 76120 }, { "epoch": 2.222624487233763, "grad_norm": 0.5627044675745826, "learning_rate": 1.4395782643957828e-05, "loss": 0.2678, "step": 76125 }, { "epoch": 2.2227704705040803, "grad_norm": 0.5674314752967905, "learning_rate": 1.439307921059746e-05, "loss": 0.2585, "step": 76130 }, { "epoch": 2.2229164537743973, "grad_norm": 0.5433501475620751, "learning_rate": 1.4390375777237092e-05, "loss": 0.2494, "step": 76135 }, { "epoch": 2.2230624370447147, "grad_norm": 0.5802130189421565, "learning_rate": 1.4387672343876723e-05, "loss": 0.2593, "step": 76140 }, { "epoch": 2.2232084203150317, "grad_norm": 0.5398704540830147, "learning_rate": 1.4384968910516355e-05, "loss": 0.2505, "step": 76145 }, { "epoch": 2.223354403585349, "grad_norm": 0.5647593312026971, "learning_rate": 1.438226547715599e-05, "loss": 0.2489, "step": 76150 }, { "epoch": 2.223500386855666, "grad_norm": 0.5165149286544374, "learning_rate": 1.4379562043795621e-05, "loss": 0.2408, "step": 76155 }, { "epoch": 2.2236463701259837, "grad_norm": 0.5605485882062174, "learning_rate": 1.4376858610435254e-05, "loss": 0.2529, "step": 76160 }, { "epoch": 2.2237923533963007, "grad_norm": 0.5477466380051678, "learning_rate": 1.4374155177074886e-05, "loss": 0.2591, "step": 76165 }, { "epoch": 2.223938336666618, "grad_norm": 0.584117340277431, "learning_rate": 1.4371451743714517e-05, "loss": 0.2557, "step": 76170 }, { "epoch": 2.224084319936935, "grad_norm": 0.5541150605013184, "learning_rate": 1.4368748310354149e-05, "loss": 0.2673, "step": 76175 }, { "epoch": 2.2242303032072526, "grad_norm": 0.5812714330665993, "learning_rate": 1.4366044876993785e-05, "loss": 0.2693, "step": 76180 }, { "epoch": 2.2243762864775696, "grad_norm": 0.5121266038291619, "learning_rate": 1.4363341443633415e-05, "loss": 0.2557, "step": 76185 }, { "epoch": 2.224522269747887, "grad_norm": 0.5412076181496384, "learning_rate": 1.4360638010273048e-05, "loss": 0.2393, "step": 76190 }, { "epoch": 2.224668253018204, "grad_norm": 0.5599872952399098, "learning_rate": 1.435793457691268e-05, "loss": 0.2563, "step": 76195 }, { "epoch": 2.2248142362885215, "grad_norm": 0.5704814989399478, "learning_rate": 1.435523114355231e-05, "loss": 0.2399, "step": 76200 }, { "epoch": 2.2249602195588385, "grad_norm": 0.5421077207311905, "learning_rate": 1.4352527710191943e-05, "loss": 0.2404, "step": 76205 }, { "epoch": 2.225106202829156, "grad_norm": 0.5203983262602048, "learning_rate": 1.4349824276831577e-05, "loss": 0.2574, "step": 76210 }, { "epoch": 2.225252186099473, "grad_norm": 0.628705722066979, "learning_rate": 1.434712084347121e-05, "loss": 0.2539, "step": 76215 }, { "epoch": 2.2253981693697904, "grad_norm": 0.5609586492415434, "learning_rate": 1.4344417410110842e-05, "loss": 0.262, "step": 76220 }, { "epoch": 2.2255441526401074, "grad_norm": 0.5627888701452703, "learning_rate": 1.4341713976750474e-05, "loss": 0.2482, "step": 76225 }, { "epoch": 2.225690135910425, "grad_norm": 0.6189362299618739, "learning_rate": 1.4339010543390104e-05, "loss": 0.2615, "step": 76230 }, { "epoch": 2.225836119180742, "grad_norm": 0.518444701368774, "learning_rate": 1.433630711002974e-05, "loss": 0.2367, "step": 76235 }, { "epoch": 2.2259821024510593, "grad_norm": 0.5944893984482063, "learning_rate": 1.433360367666937e-05, "loss": 0.252, "step": 76240 }, { "epoch": 2.2261280857213763, "grad_norm": 0.5598541348417885, "learning_rate": 1.4330900243309003e-05, "loss": 0.2426, "step": 76245 }, { "epoch": 2.2262740689916933, "grad_norm": 0.5897760269722719, "learning_rate": 1.4328196809948635e-05, "loss": 0.2571, "step": 76250 }, { "epoch": 2.226420052262011, "grad_norm": 0.5738946649543186, "learning_rate": 1.4325493376588268e-05, "loss": 0.2639, "step": 76255 }, { "epoch": 2.226566035532328, "grad_norm": 0.5964797104876435, "learning_rate": 1.4322789943227898e-05, "loss": 0.2594, "step": 76260 }, { "epoch": 2.2267120188026452, "grad_norm": 0.5394560931841831, "learning_rate": 1.4320086509867534e-05, "loss": 0.2489, "step": 76265 }, { "epoch": 2.2268580020729623, "grad_norm": 0.5840177540036752, "learning_rate": 1.4317383076507165e-05, "loss": 0.2468, "step": 76270 }, { "epoch": 2.2270039853432797, "grad_norm": 0.6181756215550492, "learning_rate": 1.4314679643146797e-05, "loss": 0.258, "step": 76275 }, { "epoch": 2.2271499686135967, "grad_norm": 0.6127165314703054, "learning_rate": 1.431197620978643e-05, "loss": 0.248, "step": 76280 }, { "epoch": 2.227295951883914, "grad_norm": 0.5538171897641102, "learning_rate": 1.4309272776426062e-05, "loss": 0.2519, "step": 76285 }, { "epoch": 2.227441935154231, "grad_norm": 0.6088174955486872, "learning_rate": 1.4306569343065692e-05, "loss": 0.2698, "step": 76290 }, { "epoch": 2.2275879184245486, "grad_norm": 0.5886059563996194, "learning_rate": 1.4303865909705328e-05, "loss": 0.2493, "step": 76295 }, { "epoch": 2.2277339016948656, "grad_norm": 0.6050663778133898, "learning_rate": 1.4301162476344959e-05, "loss": 0.2522, "step": 76300 }, { "epoch": 2.227879884965183, "grad_norm": 0.5601358038542443, "learning_rate": 1.4298459042984591e-05, "loss": 0.2498, "step": 76305 }, { "epoch": 2.2280258682355, "grad_norm": 0.5373486802642361, "learning_rate": 1.4295755609624223e-05, "loss": 0.25, "step": 76310 }, { "epoch": 2.2281718515058175, "grad_norm": 0.5772902452168124, "learning_rate": 1.4293052176263854e-05, "loss": 0.2498, "step": 76315 }, { "epoch": 2.2283178347761345, "grad_norm": 0.5815132070559257, "learning_rate": 1.429034874290349e-05, "loss": 0.2402, "step": 76320 }, { "epoch": 2.228463818046452, "grad_norm": 0.5955106855436554, "learning_rate": 1.4287645309543122e-05, "loss": 0.259, "step": 76325 }, { "epoch": 2.228609801316769, "grad_norm": 0.5728480466317295, "learning_rate": 1.4284941876182753e-05, "loss": 0.2614, "step": 76330 }, { "epoch": 2.2287557845870865, "grad_norm": 0.6440443707399374, "learning_rate": 1.4282238442822385e-05, "loss": 0.2569, "step": 76335 }, { "epoch": 2.2289017678574035, "grad_norm": 0.5850269565039667, "learning_rate": 1.4279535009462017e-05, "loss": 0.257, "step": 76340 }, { "epoch": 2.229047751127721, "grad_norm": 0.5404183047958896, "learning_rate": 1.4276831576101648e-05, "loss": 0.2578, "step": 76345 }, { "epoch": 2.229193734398038, "grad_norm": 0.5843299816816308, "learning_rate": 1.4274128142741283e-05, "loss": 0.2633, "step": 76350 }, { "epoch": 2.2293397176683554, "grad_norm": 0.536649790151426, "learning_rate": 1.4271424709380916e-05, "loss": 0.2458, "step": 76355 }, { "epoch": 2.2294857009386724, "grad_norm": 0.6472149852880448, "learning_rate": 1.4268721276020546e-05, "loss": 0.2775, "step": 76360 }, { "epoch": 2.22963168420899, "grad_norm": 0.5733327110098846, "learning_rate": 1.4266017842660179e-05, "loss": 0.2437, "step": 76365 }, { "epoch": 2.229777667479307, "grad_norm": 0.5212544256409375, "learning_rate": 1.4263314409299811e-05, "loss": 0.2404, "step": 76370 }, { "epoch": 2.2299236507496243, "grad_norm": 0.5623876506939565, "learning_rate": 1.4260610975939442e-05, "loss": 0.2646, "step": 76375 }, { "epoch": 2.2300696340199413, "grad_norm": 0.637379866069679, "learning_rate": 1.4257907542579077e-05, "loss": 0.2447, "step": 76380 }, { "epoch": 2.2302156172902583, "grad_norm": 0.585163385805477, "learning_rate": 1.4255204109218708e-05, "loss": 0.2432, "step": 76385 }, { "epoch": 2.2303616005605758, "grad_norm": 0.5477210709899439, "learning_rate": 1.425250067585834e-05, "loss": 0.2552, "step": 76390 }, { "epoch": 2.230507583830893, "grad_norm": 0.6001872913065931, "learning_rate": 1.4249797242497973e-05, "loss": 0.2579, "step": 76395 }, { "epoch": 2.23065356710121, "grad_norm": 0.5919849419946781, "learning_rate": 1.4247093809137605e-05, "loss": 0.2819, "step": 76400 }, { "epoch": 2.230799550371527, "grad_norm": 0.5904877413476922, "learning_rate": 1.4244390375777239e-05, "loss": 0.2575, "step": 76405 }, { "epoch": 2.2309455336418447, "grad_norm": 0.511040322694773, "learning_rate": 1.4241686942416871e-05, "loss": 0.2387, "step": 76410 }, { "epoch": 2.2310915169121617, "grad_norm": 0.5468979547064348, "learning_rate": 1.4238983509056502e-05, "loss": 0.2347, "step": 76415 }, { "epoch": 2.231237500182479, "grad_norm": 0.5796526854724078, "learning_rate": 1.4236280075696134e-05, "loss": 0.2463, "step": 76420 }, { "epoch": 2.231383483452796, "grad_norm": 0.5931188015083044, "learning_rate": 1.4233576642335767e-05, "loss": 0.2523, "step": 76425 }, { "epoch": 2.2315294667231136, "grad_norm": 0.5604347756426827, "learning_rate": 1.4230873208975399e-05, "loss": 0.2532, "step": 76430 }, { "epoch": 2.2316754499934306, "grad_norm": 0.5967430453178226, "learning_rate": 1.4228169775615033e-05, "loss": 0.2434, "step": 76435 }, { "epoch": 2.231821433263748, "grad_norm": 0.5468464185757014, "learning_rate": 1.4225466342254665e-05, "loss": 0.2623, "step": 76440 }, { "epoch": 2.231967416534065, "grad_norm": 0.6049651928013281, "learning_rate": 1.4222762908894296e-05, "loss": 0.2503, "step": 76445 }, { "epoch": 2.2321133998043825, "grad_norm": 0.5931716074208616, "learning_rate": 1.4220059475533928e-05, "loss": 0.2562, "step": 76450 }, { "epoch": 2.2322593830746995, "grad_norm": 0.5289584939674808, "learning_rate": 1.421735604217356e-05, "loss": 0.2428, "step": 76455 }, { "epoch": 2.232405366345017, "grad_norm": 0.6233555829213493, "learning_rate": 1.4214652608813193e-05, "loss": 0.2454, "step": 76460 }, { "epoch": 2.232551349615334, "grad_norm": 0.5283906072034459, "learning_rate": 1.4211949175452827e-05, "loss": 0.2648, "step": 76465 }, { "epoch": 2.2326973328856514, "grad_norm": 0.5560826663725505, "learning_rate": 1.4209245742092459e-05, "loss": 0.2471, "step": 76470 }, { "epoch": 2.2328433161559684, "grad_norm": 0.5809424815938314, "learning_rate": 1.420654230873209e-05, "loss": 0.2522, "step": 76475 }, { "epoch": 2.232989299426286, "grad_norm": 0.5170671628014792, "learning_rate": 1.4203838875371722e-05, "loss": 0.2427, "step": 76480 }, { "epoch": 2.233135282696603, "grad_norm": 0.5883940854555488, "learning_rate": 1.4201135442011354e-05, "loss": 0.2509, "step": 76485 }, { "epoch": 2.2332812659669203, "grad_norm": 0.554900674310261, "learning_rate": 1.4198432008650988e-05, "loss": 0.2468, "step": 76490 }, { "epoch": 2.2334272492372373, "grad_norm": 0.5686250207734653, "learning_rate": 1.419572857529062e-05, "loss": 0.2628, "step": 76495 }, { "epoch": 2.233573232507555, "grad_norm": 0.5533022598527179, "learning_rate": 1.4193025141930253e-05, "loss": 0.2386, "step": 76500 }, { "epoch": 2.233719215777872, "grad_norm": 0.597858325740605, "learning_rate": 1.4190321708569884e-05, "loss": 0.2625, "step": 76505 }, { "epoch": 2.2338651990481893, "grad_norm": 0.5808910919610083, "learning_rate": 1.4187618275209516e-05, "loss": 0.2526, "step": 76510 }, { "epoch": 2.2340111823185063, "grad_norm": 0.523251111808483, "learning_rate": 1.4184914841849148e-05, "loss": 0.253, "step": 76515 }, { "epoch": 2.2341571655888237, "grad_norm": 0.5857952259453122, "learning_rate": 1.4182211408488782e-05, "loss": 0.2821, "step": 76520 }, { "epoch": 2.2343031488591407, "grad_norm": 0.5449092593585924, "learning_rate": 1.4179507975128415e-05, "loss": 0.2678, "step": 76525 }, { "epoch": 2.234449132129458, "grad_norm": 0.5591912282773945, "learning_rate": 1.4176804541768047e-05, "loss": 0.2572, "step": 76530 }, { "epoch": 2.234595115399775, "grad_norm": 0.6161080460690834, "learning_rate": 1.4174101108407678e-05, "loss": 0.2484, "step": 76535 }, { "epoch": 2.234741098670092, "grad_norm": 0.5598101810987631, "learning_rate": 1.417139767504731e-05, "loss": 0.2496, "step": 76540 }, { "epoch": 2.2348870819404096, "grad_norm": 0.5400277965099617, "learning_rate": 1.4168694241686944e-05, "loss": 0.25, "step": 76545 }, { "epoch": 2.2350330652107266, "grad_norm": 0.5482931830408215, "learning_rate": 1.4165990808326576e-05, "loss": 0.2296, "step": 76550 }, { "epoch": 2.235179048481044, "grad_norm": 0.5979174344796155, "learning_rate": 1.4163287374966209e-05, "loss": 0.2545, "step": 76555 }, { "epoch": 2.235325031751361, "grad_norm": 0.567343851602717, "learning_rate": 1.416058394160584e-05, "loss": 0.2466, "step": 76560 }, { "epoch": 2.2354710150216786, "grad_norm": 0.610947989708004, "learning_rate": 1.4157880508245471e-05, "loss": 0.2577, "step": 76565 }, { "epoch": 2.2356169982919956, "grad_norm": 0.607378076974566, "learning_rate": 1.4155177074885104e-05, "loss": 0.2622, "step": 76570 }, { "epoch": 2.235762981562313, "grad_norm": 0.5666313875267095, "learning_rate": 1.4152473641524738e-05, "loss": 0.2414, "step": 76575 }, { "epoch": 2.23590896483263, "grad_norm": 0.5455425383606571, "learning_rate": 1.414977020816437e-05, "loss": 0.2585, "step": 76580 }, { "epoch": 2.2360549481029475, "grad_norm": 0.6232351466876689, "learning_rate": 1.4147066774804002e-05, "loss": 0.2441, "step": 76585 }, { "epoch": 2.2362009313732645, "grad_norm": 0.6042515273937558, "learning_rate": 1.4144363341443633e-05, "loss": 0.2678, "step": 76590 }, { "epoch": 2.236346914643582, "grad_norm": 0.5943895345771153, "learning_rate": 1.4141659908083265e-05, "loss": 0.2558, "step": 76595 }, { "epoch": 2.236492897913899, "grad_norm": 0.5662630648339139, "learning_rate": 1.4138956474722898e-05, "loss": 0.2398, "step": 76600 }, { "epoch": 2.2366388811842164, "grad_norm": 0.6025765478288588, "learning_rate": 1.4136253041362532e-05, "loss": 0.2538, "step": 76605 }, { "epoch": 2.2367848644545334, "grad_norm": 0.600679264768227, "learning_rate": 1.4133549608002164e-05, "loss": 0.2495, "step": 76610 }, { "epoch": 2.236930847724851, "grad_norm": 0.6143352928214566, "learning_rate": 1.4130846174641796e-05, "loss": 0.2634, "step": 76615 }, { "epoch": 2.237076830995168, "grad_norm": 0.5901831582560201, "learning_rate": 1.4128142741281427e-05, "loss": 0.2516, "step": 76620 }, { "epoch": 2.2372228142654853, "grad_norm": 0.5795820749124108, "learning_rate": 1.412543930792106e-05, "loss": 0.2619, "step": 76625 }, { "epoch": 2.2373687975358023, "grad_norm": 0.5875388988410774, "learning_rate": 1.4122735874560695e-05, "loss": 0.2343, "step": 76630 }, { "epoch": 2.2375147808061198, "grad_norm": 0.6012762988503686, "learning_rate": 1.4120032441200326e-05, "loss": 0.2447, "step": 76635 }, { "epoch": 2.2376607640764368, "grad_norm": 0.5346037936569583, "learning_rate": 1.4117329007839958e-05, "loss": 0.2359, "step": 76640 }, { "epoch": 2.237806747346754, "grad_norm": 0.592860799393369, "learning_rate": 1.411462557447959e-05, "loss": 0.2601, "step": 76645 }, { "epoch": 2.2379527306170712, "grad_norm": 0.6105900277823053, "learning_rate": 1.411192214111922e-05, "loss": 0.2502, "step": 76650 }, { "epoch": 2.2380987138873887, "grad_norm": 0.6201097507820297, "learning_rate": 1.4109218707758853e-05, "loss": 0.2471, "step": 76655 }, { "epoch": 2.2382446971577057, "grad_norm": 0.5732122748825199, "learning_rate": 1.4106515274398487e-05, "loss": 0.2562, "step": 76660 }, { "epoch": 2.238390680428023, "grad_norm": 0.5817136823980751, "learning_rate": 1.410381184103812e-05, "loss": 0.2632, "step": 76665 }, { "epoch": 2.23853666369834, "grad_norm": 0.6265491199213369, "learning_rate": 1.4101108407677752e-05, "loss": 0.2514, "step": 76670 }, { "epoch": 2.2386826469686576, "grad_norm": 0.5844084148829216, "learning_rate": 1.4098404974317384e-05, "loss": 0.2436, "step": 76675 }, { "epoch": 2.2388286302389746, "grad_norm": 0.5561641763557339, "learning_rate": 1.4095701540957015e-05, "loss": 0.2617, "step": 76680 }, { "epoch": 2.238974613509292, "grad_norm": 0.5052128116739104, "learning_rate": 1.4092998107596647e-05, "loss": 0.2297, "step": 76685 }, { "epoch": 2.239120596779609, "grad_norm": 0.5952505604643672, "learning_rate": 1.4090294674236281e-05, "loss": 0.2681, "step": 76690 }, { "epoch": 2.239266580049926, "grad_norm": 0.5858234772812587, "learning_rate": 1.4087591240875913e-05, "loss": 0.2305, "step": 76695 }, { "epoch": 2.2394125633202435, "grad_norm": 0.5938018626605341, "learning_rate": 1.4084887807515546e-05, "loss": 0.2503, "step": 76700 }, { "epoch": 2.2395585465905605, "grad_norm": 0.5565921224831089, "learning_rate": 1.4082184374155178e-05, "loss": 0.2331, "step": 76705 }, { "epoch": 2.239704529860878, "grad_norm": 0.552129964702604, "learning_rate": 1.4079480940794809e-05, "loss": 0.254, "step": 76710 }, { "epoch": 2.239850513131195, "grad_norm": 0.5641082796180291, "learning_rate": 1.4076777507434444e-05, "loss": 0.2462, "step": 76715 }, { "epoch": 2.2399964964015124, "grad_norm": 0.5363731687397589, "learning_rate": 1.4074074074074075e-05, "loss": 0.2461, "step": 76720 }, { "epoch": 2.2401424796718294, "grad_norm": 0.6375361555704737, "learning_rate": 1.4071370640713707e-05, "loss": 0.2567, "step": 76725 }, { "epoch": 2.240288462942147, "grad_norm": 0.5803308443472173, "learning_rate": 1.406866720735334e-05, "loss": 0.2616, "step": 76730 }, { "epoch": 2.240434446212464, "grad_norm": 0.49760952916202533, "learning_rate": 1.4065963773992972e-05, "loss": 0.2421, "step": 76735 }, { "epoch": 2.2405804294827814, "grad_norm": 0.602153085365277, "learning_rate": 1.4063260340632603e-05, "loss": 0.2439, "step": 76740 }, { "epoch": 2.2407264127530984, "grad_norm": 0.5567316839835416, "learning_rate": 1.4060556907272238e-05, "loss": 0.2511, "step": 76745 }, { "epoch": 2.240872396023416, "grad_norm": 0.5684150346333402, "learning_rate": 1.4057853473911869e-05, "loss": 0.2615, "step": 76750 }, { "epoch": 2.241018379293733, "grad_norm": 0.5407048073343792, "learning_rate": 1.4055150040551501e-05, "loss": 0.2609, "step": 76755 }, { "epoch": 2.2411643625640503, "grad_norm": 0.6007505630394415, "learning_rate": 1.4052446607191134e-05, "loss": 0.2728, "step": 76760 }, { "epoch": 2.2413103458343673, "grad_norm": 0.5928481624777612, "learning_rate": 1.4049743173830766e-05, "loss": 0.2388, "step": 76765 }, { "epoch": 2.2414563291046847, "grad_norm": 0.55982141335418, "learning_rate": 1.4047039740470396e-05, "loss": 0.2354, "step": 76770 }, { "epoch": 2.2416023123750017, "grad_norm": 0.5763176230597975, "learning_rate": 1.4044336307110032e-05, "loss": 0.263, "step": 76775 }, { "epoch": 2.241748295645319, "grad_norm": 0.5638097002006724, "learning_rate": 1.4041632873749663e-05, "loss": 0.2665, "step": 76780 }, { "epoch": 2.241894278915636, "grad_norm": 0.5662207434373255, "learning_rate": 1.4038929440389295e-05, "loss": 0.2438, "step": 76785 }, { "epoch": 2.2420402621859536, "grad_norm": 0.5473318702191233, "learning_rate": 1.4036226007028927e-05, "loss": 0.241, "step": 76790 }, { "epoch": 2.2421862454562707, "grad_norm": 0.543375216595721, "learning_rate": 1.4033522573668558e-05, "loss": 0.2497, "step": 76795 }, { "epoch": 2.242332228726588, "grad_norm": 0.6166406928206353, "learning_rate": 1.4030819140308194e-05, "loss": 0.2525, "step": 76800 }, { "epoch": 2.242478211996905, "grad_norm": 0.6016247053031332, "learning_rate": 1.4028115706947826e-05, "loss": 0.2567, "step": 76805 }, { "epoch": 2.2426241952672226, "grad_norm": 0.5490277084682653, "learning_rate": 1.4025412273587457e-05, "loss": 0.241, "step": 76810 }, { "epoch": 2.2427701785375396, "grad_norm": 0.5648793757026405, "learning_rate": 1.4022708840227089e-05, "loss": 0.2414, "step": 76815 }, { "epoch": 2.242916161807857, "grad_norm": 0.5515575872843244, "learning_rate": 1.4020005406866721e-05, "loss": 0.2698, "step": 76820 }, { "epoch": 2.243062145078174, "grad_norm": 0.5477716809455401, "learning_rate": 1.4017301973506352e-05, "loss": 0.2588, "step": 76825 }, { "epoch": 2.243208128348491, "grad_norm": 0.540405165088997, "learning_rate": 1.4014598540145988e-05, "loss": 0.247, "step": 76830 }, { "epoch": 2.2433541116188085, "grad_norm": 0.6001290675220683, "learning_rate": 1.401189510678562e-05, "loss": 0.2529, "step": 76835 }, { "epoch": 2.243500094889126, "grad_norm": 0.57183083082296, "learning_rate": 1.400919167342525e-05, "loss": 0.2575, "step": 76840 }, { "epoch": 2.243646078159443, "grad_norm": 0.6131474490960236, "learning_rate": 1.4006488240064883e-05, "loss": 0.2579, "step": 76845 }, { "epoch": 2.24379206142976, "grad_norm": 0.5792683852909648, "learning_rate": 1.4003784806704515e-05, "loss": 0.2539, "step": 76850 }, { "epoch": 2.2439380447000774, "grad_norm": 0.5210675709263001, "learning_rate": 1.4001081373344146e-05, "loss": 0.2555, "step": 76855 }, { "epoch": 2.2440840279703944, "grad_norm": 0.579680102736161, "learning_rate": 1.3998377939983782e-05, "loss": 0.2538, "step": 76860 }, { "epoch": 2.244230011240712, "grad_norm": 0.5558556397422333, "learning_rate": 1.3995674506623412e-05, "loss": 0.2418, "step": 76865 }, { "epoch": 2.244375994511029, "grad_norm": 0.5592728184546444, "learning_rate": 1.3992971073263044e-05, "loss": 0.2513, "step": 76870 }, { "epoch": 2.2445219777813463, "grad_norm": 0.5350266167073787, "learning_rate": 1.3990267639902677e-05, "loss": 0.2286, "step": 76875 }, { "epoch": 2.2446679610516633, "grad_norm": 0.6275285069636515, "learning_rate": 1.3987564206542309e-05, "loss": 0.2529, "step": 76880 }, { "epoch": 2.244813944321981, "grad_norm": 0.6188262410670919, "learning_rate": 1.3984860773181943e-05, "loss": 0.2661, "step": 76885 }, { "epoch": 2.244959927592298, "grad_norm": 0.5796369611955439, "learning_rate": 1.3982157339821575e-05, "loss": 0.2622, "step": 76890 }, { "epoch": 2.2451059108626152, "grad_norm": 0.603003292076569, "learning_rate": 1.3979453906461206e-05, "loss": 0.26, "step": 76895 }, { "epoch": 2.2452518941329322, "grad_norm": 0.5856432954347995, "learning_rate": 1.3976750473100838e-05, "loss": 0.2356, "step": 76900 }, { "epoch": 2.2453978774032497, "grad_norm": 0.5326987102525159, "learning_rate": 1.397404703974047e-05, "loss": 0.2356, "step": 76905 }, { "epoch": 2.2455438606735667, "grad_norm": 0.5592919273304819, "learning_rate": 1.3971343606380103e-05, "loss": 0.2452, "step": 76910 }, { "epoch": 2.245689843943884, "grad_norm": 0.5575615087821401, "learning_rate": 1.3968640173019737e-05, "loss": 0.2365, "step": 76915 }, { "epoch": 2.245835827214201, "grad_norm": 0.588809110718611, "learning_rate": 1.396593673965937e-05, "loss": 0.2542, "step": 76920 }, { "epoch": 2.2459818104845186, "grad_norm": 0.5945539782138937, "learning_rate": 1.3963233306299e-05, "loss": 0.2573, "step": 76925 }, { "epoch": 2.2461277937548356, "grad_norm": 0.5673321042362539, "learning_rate": 1.3960529872938632e-05, "loss": 0.2404, "step": 76930 }, { "epoch": 2.246273777025153, "grad_norm": 0.649900362782707, "learning_rate": 1.3957826439578265e-05, "loss": 0.25, "step": 76935 }, { "epoch": 2.24641976029547, "grad_norm": 0.5891749753164406, "learning_rate": 1.3955123006217897e-05, "loss": 0.2428, "step": 76940 }, { "epoch": 2.2465657435657875, "grad_norm": 0.5991617281763327, "learning_rate": 1.3952419572857531e-05, "loss": 0.2522, "step": 76945 }, { "epoch": 2.2467117268361045, "grad_norm": 0.5823501257306863, "learning_rate": 1.3949716139497163e-05, "loss": 0.2474, "step": 76950 }, { "epoch": 2.246857710106422, "grad_norm": 0.5725732561985871, "learning_rate": 1.3947012706136794e-05, "loss": 0.2592, "step": 76955 }, { "epoch": 2.247003693376739, "grad_norm": 0.5977884004372939, "learning_rate": 1.3944309272776426e-05, "loss": 0.2693, "step": 76960 }, { "epoch": 2.2471496766470564, "grad_norm": 0.6057990375175942, "learning_rate": 1.3941605839416059e-05, "loss": 0.2585, "step": 76965 }, { "epoch": 2.2472956599173735, "grad_norm": 0.5839742777980926, "learning_rate": 1.3938902406055693e-05, "loss": 0.2615, "step": 76970 }, { "epoch": 2.247441643187691, "grad_norm": 0.5564623080544777, "learning_rate": 1.3936198972695325e-05, "loss": 0.2542, "step": 76975 }, { "epoch": 2.247587626458008, "grad_norm": 0.604330559505372, "learning_rate": 1.3933495539334957e-05, "loss": 0.2638, "step": 76980 }, { "epoch": 2.247733609728325, "grad_norm": 0.5752969821854174, "learning_rate": 1.3930792105974588e-05, "loss": 0.2607, "step": 76985 }, { "epoch": 2.2478795929986424, "grad_norm": 0.512822775900612, "learning_rate": 1.392808867261422e-05, "loss": 0.2476, "step": 76990 }, { "epoch": 2.2480255762689594, "grad_norm": 0.5709027169998403, "learning_rate": 1.3925385239253852e-05, "loss": 0.2394, "step": 76995 }, { "epoch": 2.248171559539277, "grad_norm": 0.6068301391304516, "learning_rate": 1.3922681805893486e-05, "loss": 0.2426, "step": 77000 }, { "epoch": 2.248317542809594, "grad_norm": 0.5932314369998651, "learning_rate": 1.3919978372533119e-05, "loss": 0.2516, "step": 77005 }, { "epoch": 2.2484635260799113, "grad_norm": 0.5950529611162119, "learning_rate": 1.3917274939172751e-05, "loss": 0.2593, "step": 77010 }, { "epoch": 2.2486095093502283, "grad_norm": 0.619763203565309, "learning_rate": 1.3914571505812382e-05, "loss": 0.2674, "step": 77015 }, { "epoch": 2.2487554926205457, "grad_norm": 0.5700923683013563, "learning_rate": 1.3911868072452014e-05, "loss": 0.2421, "step": 77020 }, { "epoch": 2.2489014758908628, "grad_norm": 0.5250587925988791, "learning_rate": 1.3909164639091646e-05, "loss": 0.2477, "step": 77025 }, { "epoch": 2.24904745916118, "grad_norm": 0.5381540222457848, "learning_rate": 1.390646120573128e-05, "loss": 0.2459, "step": 77030 }, { "epoch": 2.249193442431497, "grad_norm": 0.5993333483947946, "learning_rate": 1.3903757772370913e-05, "loss": 0.2465, "step": 77035 }, { "epoch": 2.2493394257018147, "grad_norm": 0.6113921979633373, "learning_rate": 1.3901054339010543e-05, "loss": 0.2403, "step": 77040 }, { "epoch": 2.2494854089721317, "grad_norm": 0.6128978640766556, "learning_rate": 1.3898350905650176e-05, "loss": 0.257, "step": 77045 }, { "epoch": 2.249631392242449, "grad_norm": 0.5825867043751245, "learning_rate": 1.3895647472289808e-05, "loss": 0.259, "step": 77050 }, { "epoch": 2.249777375512766, "grad_norm": 0.6019681409068885, "learning_rate": 1.3892944038929442e-05, "loss": 0.2363, "step": 77055 }, { "epoch": 2.2499233587830836, "grad_norm": 0.5480935415175854, "learning_rate": 1.3890240605569074e-05, "loss": 0.2528, "step": 77060 }, { "epoch": 2.2500693420534006, "grad_norm": 0.5872552094315614, "learning_rate": 1.3887537172208707e-05, "loss": 0.2556, "step": 77065 }, { "epoch": 2.250215325323718, "grad_norm": 0.5758831186075295, "learning_rate": 1.3884833738848337e-05, "loss": 0.2638, "step": 77070 }, { "epoch": 2.250361308594035, "grad_norm": 0.5488976038991216, "learning_rate": 1.388213030548797e-05, "loss": 0.2463, "step": 77075 }, { "epoch": 2.2505072918643525, "grad_norm": 0.5624265981636697, "learning_rate": 1.3879426872127602e-05, "loss": 0.2494, "step": 77080 }, { "epoch": 2.2506532751346695, "grad_norm": 0.6127478527203943, "learning_rate": 1.3876723438767236e-05, "loss": 0.2537, "step": 77085 }, { "epoch": 2.250799258404987, "grad_norm": 0.5790590963873667, "learning_rate": 1.3874020005406868e-05, "loss": 0.2531, "step": 77090 }, { "epoch": 2.250945241675304, "grad_norm": 0.6129495488913091, "learning_rate": 1.38713165720465e-05, "loss": 0.2635, "step": 77095 }, { "epoch": 2.2510912249456214, "grad_norm": 0.5868429668842373, "learning_rate": 1.3868613138686131e-05, "loss": 0.2505, "step": 77100 }, { "epoch": 2.2512372082159384, "grad_norm": 0.5531521626801151, "learning_rate": 1.3865909705325763e-05, "loss": 0.2541, "step": 77105 }, { "epoch": 2.251383191486256, "grad_norm": 0.6139837292993248, "learning_rate": 1.3863206271965396e-05, "loss": 0.2522, "step": 77110 }, { "epoch": 2.251529174756573, "grad_norm": 0.8485158819824562, "learning_rate": 1.386050283860503e-05, "loss": 0.2535, "step": 77115 }, { "epoch": 2.25167515802689, "grad_norm": 0.5781492051438917, "learning_rate": 1.3857799405244662e-05, "loss": 0.2674, "step": 77120 }, { "epoch": 2.2518211412972073, "grad_norm": 0.5160137859539784, "learning_rate": 1.3855095971884294e-05, "loss": 0.2487, "step": 77125 }, { "epoch": 2.251967124567525, "grad_norm": 0.5817013974956018, "learning_rate": 1.3852392538523925e-05, "loss": 0.2289, "step": 77130 }, { "epoch": 2.252113107837842, "grad_norm": 0.5470926195121527, "learning_rate": 1.3849689105163557e-05, "loss": 0.2341, "step": 77135 }, { "epoch": 2.252259091108159, "grad_norm": 0.6223833351319701, "learning_rate": 1.3846985671803191e-05, "loss": 0.2611, "step": 77140 }, { "epoch": 2.2524050743784763, "grad_norm": 0.619604964043005, "learning_rate": 1.3844282238442824e-05, "loss": 0.2618, "step": 77145 }, { "epoch": 2.2525510576487933, "grad_norm": 0.5468847325584423, "learning_rate": 1.3841578805082456e-05, "loss": 0.2461, "step": 77150 }, { "epoch": 2.2526970409191107, "grad_norm": 0.570876796410895, "learning_rate": 1.3838875371722088e-05, "loss": 0.2634, "step": 77155 }, { "epoch": 2.2528430241894277, "grad_norm": 0.587286580899775, "learning_rate": 1.3836171938361719e-05, "loss": 0.2652, "step": 77160 }, { "epoch": 2.252989007459745, "grad_norm": 0.5739240581965254, "learning_rate": 1.3833468505001351e-05, "loss": 0.2461, "step": 77165 }, { "epoch": 2.253134990730062, "grad_norm": 0.5806366379601469, "learning_rate": 1.3830765071640985e-05, "loss": 0.2435, "step": 77170 }, { "epoch": 2.2532809740003796, "grad_norm": 0.5441420659443025, "learning_rate": 1.3828061638280618e-05, "loss": 0.2481, "step": 77175 }, { "epoch": 2.2534269572706966, "grad_norm": 0.5418713949939015, "learning_rate": 1.382535820492025e-05, "loss": 0.2504, "step": 77180 }, { "epoch": 2.253572940541014, "grad_norm": 0.604955922286953, "learning_rate": 1.3822654771559882e-05, "loss": 0.2566, "step": 77185 }, { "epoch": 2.253718923811331, "grad_norm": 0.5724675055664953, "learning_rate": 1.3819951338199513e-05, "loss": 0.2632, "step": 77190 }, { "epoch": 2.2538649070816485, "grad_norm": 0.6076057485028922, "learning_rate": 1.3817247904839145e-05, "loss": 0.2544, "step": 77195 }, { "epoch": 2.2540108903519656, "grad_norm": 0.5661829019883701, "learning_rate": 1.3814544471478779e-05, "loss": 0.2593, "step": 77200 }, { "epoch": 2.254156873622283, "grad_norm": 0.554751918616213, "learning_rate": 1.3811841038118411e-05, "loss": 0.2651, "step": 77205 }, { "epoch": 2.2543028568926, "grad_norm": 0.6014954129903405, "learning_rate": 1.3809137604758044e-05, "loss": 0.2566, "step": 77210 }, { "epoch": 2.2544488401629175, "grad_norm": 0.533135140327164, "learning_rate": 1.3806434171397676e-05, "loss": 0.2559, "step": 77215 }, { "epoch": 2.2545948234332345, "grad_norm": 0.6044525674693567, "learning_rate": 1.3803730738037307e-05, "loss": 0.2566, "step": 77220 }, { "epoch": 2.254740806703552, "grad_norm": 0.5466984784254783, "learning_rate": 1.3801027304676942e-05, "loss": 0.2458, "step": 77225 }, { "epoch": 2.254886789973869, "grad_norm": 0.5741415903174215, "learning_rate": 1.3798323871316573e-05, "loss": 0.2539, "step": 77230 }, { "epoch": 2.2550327732441864, "grad_norm": 0.6361290637286743, "learning_rate": 1.3795620437956205e-05, "loss": 0.2572, "step": 77235 }, { "epoch": 2.2551787565145034, "grad_norm": 0.595403166612946, "learning_rate": 1.3792917004595838e-05, "loss": 0.2745, "step": 77240 }, { "epoch": 2.255324739784821, "grad_norm": 0.5552194595631971, "learning_rate": 1.3790213571235468e-05, "loss": 0.2474, "step": 77245 }, { "epoch": 2.255470723055138, "grad_norm": 0.5376395358612845, "learning_rate": 1.37875101378751e-05, "loss": 0.2611, "step": 77250 }, { "epoch": 2.255616706325455, "grad_norm": 0.5194406954118703, "learning_rate": 1.3784806704514736e-05, "loss": 0.2495, "step": 77255 }, { "epoch": 2.2557626895957723, "grad_norm": 0.5725080452887669, "learning_rate": 1.3782103271154367e-05, "loss": 0.2421, "step": 77260 }, { "epoch": 2.2559086728660898, "grad_norm": 0.601796338113821, "learning_rate": 1.3779399837794e-05, "loss": 0.262, "step": 77265 }, { "epoch": 2.2560546561364068, "grad_norm": 0.5847018259978634, "learning_rate": 1.3776696404433632e-05, "loss": 0.2567, "step": 77270 }, { "epoch": 2.2562006394067238, "grad_norm": 0.6049336838426965, "learning_rate": 1.3773992971073262e-05, "loss": 0.2641, "step": 77275 }, { "epoch": 2.2563466226770412, "grad_norm": 0.5557974726945656, "learning_rate": 1.3771289537712895e-05, "loss": 0.2621, "step": 77280 }, { "epoch": 2.2564926059473587, "grad_norm": 0.566936842514153, "learning_rate": 1.376858610435253e-05, "loss": 0.2476, "step": 77285 }, { "epoch": 2.2566385892176757, "grad_norm": 0.5683271152353141, "learning_rate": 1.376588267099216e-05, "loss": 0.2624, "step": 77290 }, { "epoch": 2.2567845724879927, "grad_norm": 0.535105279821455, "learning_rate": 1.3763179237631793e-05, "loss": 0.2526, "step": 77295 }, { "epoch": 2.25693055575831, "grad_norm": 0.640345225909499, "learning_rate": 1.3760475804271425e-05, "loss": 0.2619, "step": 77300 }, { "epoch": 2.257076539028627, "grad_norm": 0.5804795755209954, "learning_rate": 1.3757772370911056e-05, "loss": 0.2531, "step": 77305 }, { "epoch": 2.2572225222989446, "grad_norm": 0.5659990402836204, "learning_rate": 1.3755068937550692e-05, "loss": 0.2606, "step": 77310 }, { "epoch": 2.2573685055692616, "grad_norm": 0.5881364813761547, "learning_rate": 1.3752365504190322e-05, "loss": 0.258, "step": 77315 }, { "epoch": 2.257514488839579, "grad_norm": 0.6048256197225071, "learning_rate": 1.3749662070829955e-05, "loss": 0.2504, "step": 77320 }, { "epoch": 2.257660472109896, "grad_norm": 0.5614851842080991, "learning_rate": 1.3746958637469587e-05, "loss": 0.2514, "step": 77325 }, { "epoch": 2.2578064553802135, "grad_norm": 0.6161003883136634, "learning_rate": 1.374425520410922e-05, "loss": 0.2635, "step": 77330 }, { "epoch": 2.2579524386505305, "grad_norm": 0.6036373382104707, "learning_rate": 1.374155177074885e-05, "loss": 0.2556, "step": 77335 }, { "epoch": 2.258098421920848, "grad_norm": 0.5833266615812802, "learning_rate": 1.3738848337388486e-05, "loss": 0.2579, "step": 77340 }, { "epoch": 2.258244405191165, "grad_norm": 0.5497084811275857, "learning_rate": 1.3736144904028116e-05, "loss": 0.2422, "step": 77345 }, { "epoch": 2.2583903884614824, "grad_norm": 0.5720110566692974, "learning_rate": 1.3733441470667749e-05, "loss": 0.246, "step": 77350 }, { "epoch": 2.2585363717317994, "grad_norm": 0.6258293906413427, "learning_rate": 1.3730738037307381e-05, "loss": 0.2537, "step": 77355 }, { "epoch": 2.258682355002117, "grad_norm": 0.5309408397312447, "learning_rate": 1.3728034603947013e-05, "loss": 0.2542, "step": 77360 }, { "epoch": 2.258828338272434, "grad_norm": 0.5951089406938946, "learning_rate": 1.3725331170586644e-05, "loss": 0.2613, "step": 77365 }, { "epoch": 2.2589743215427514, "grad_norm": 0.6091275388964246, "learning_rate": 1.372262773722628e-05, "loss": 0.2577, "step": 77370 }, { "epoch": 2.2591203048130684, "grad_norm": 0.5858233966312276, "learning_rate": 1.371992430386591e-05, "loss": 0.2675, "step": 77375 }, { "epoch": 2.259266288083386, "grad_norm": 0.5600995091648562, "learning_rate": 1.3717220870505543e-05, "loss": 0.2608, "step": 77380 }, { "epoch": 2.259412271353703, "grad_norm": 0.5593247962419173, "learning_rate": 1.3714517437145175e-05, "loss": 0.2395, "step": 77385 }, { "epoch": 2.2595582546240203, "grad_norm": 0.5512508701581902, "learning_rate": 1.3711814003784807e-05, "loss": 0.2432, "step": 77390 }, { "epoch": 2.2597042378943373, "grad_norm": 0.5168443313321041, "learning_rate": 1.3709110570424441e-05, "loss": 0.2553, "step": 77395 }, { "epoch": 2.2598502211646547, "grad_norm": 0.5360203487768912, "learning_rate": 1.3706407137064074e-05, "loss": 0.2452, "step": 77400 }, { "epoch": 2.2599962044349717, "grad_norm": 0.569716003809331, "learning_rate": 1.3703703703703704e-05, "loss": 0.2598, "step": 77405 }, { "epoch": 2.2601421877052887, "grad_norm": 0.6054766766285801, "learning_rate": 1.3701000270343336e-05, "loss": 0.2528, "step": 77410 }, { "epoch": 2.260288170975606, "grad_norm": 0.6060552007223678, "learning_rate": 1.3698296836982969e-05, "loss": 0.2665, "step": 77415 }, { "epoch": 2.2604341542459236, "grad_norm": 0.596868100057369, "learning_rate": 1.36955934036226e-05, "loss": 0.2517, "step": 77420 }, { "epoch": 2.2605801375162407, "grad_norm": 0.5988440034584671, "learning_rate": 1.3692889970262235e-05, "loss": 0.2481, "step": 77425 }, { "epoch": 2.2607261207865577, "grad_norm": 0.5696799612222905, "learning_rate": 1.3690186536901867e-05, "loss": 0.2514, "step": 77430 }, { "epoch": 2.260872104056875, "grad_norm": 0.6115302174034476, "learning_rate": 1.3687483103541498e-05, "loss": 0.2575, "step": 77435 }, { "epoch": 2.261018087327192, "grad_norm": 0.5328887347547022, "learning_rate": 1.368477967018113e-05, "loss": 0.2458, "step": 77440 }, { "epoch": 2.2611640705975096, "grad_norm": 0.5686778356208984, "learning_rate": 1.3682076236820763e-05, "loss": 0.2374, "step": 77445 }, { "epoch": 2.2613100538678266, "grad_norm": 0.5396703037075737, "learning_rate": 1.3679372803460393e-05, "loss": 0.2537, "step": 77450 }, { "epoch": 2.261456037138144, "grad_norm": 0.5820162869625874, "learning_rate": 1.3676669370100029e-05, "loss": 0.2581, "step": 77455 }, { "epoch": 2.261602020408461, "grad_norm": 0.5104092960522587, "learning_rate": 1.3673965936739661e-05, "loss": 0.2626, "step": 77460 }, { "epoch": 2.2617480036787785, "grad_norm": 0.5588536643694239, "learning_rate": 1.3671262503379292e-05, "loss": 0.2542, "step": 77465 }, { "epoch": 2.2618939869490955, "grad_norm": 0.5787534379352675, "learning_rate": 1.3668559070018924e-05, "loss": 0.2615, "step": 77470 }, { "epoch": 2.262039970219413, "grad_norm": 0.607051068942745, "learning_rate": 1.3665855636658557e-05, "loss": 0.2571, "step": 77475 }, { "epoch": 2.26218595348973, "grad_norm": 0.6109156586940561, "learning_rate": 1.366315220329819e-05, "loss": 0.2612, "step": 77480 }, { "epoch": 2.2623319367600474, "grad_norm": 0.5775141322844506, "learning_rate": 1.3660448769937823e-05, "loss": 0.257, "step": 77485 }, { "epoch": 2.2624779200303644, "grad_norm": 0.5775386242420624, "learning_rate": 1.3657745336577454e-05, "loss": 0.2526, "step": 77490 }, { "epoch": 2.262623903300682, "grad_norm": 0.49361461338334195, "learning_rate": 1.3655041903217086e-05, "loss": 0.2348, "step": 77495 }, { "epoch": 2.262769886570999, "grad_norm": 0.579026166179153, "learning_rate": 1.3652338469856718e-05, "loss": 0.2517, "step": 77500 }, { "epoch": 2.2629158698413163, "grad_norm": 0.5740550642359481, "learning_rate": 1.364963503649635e-05, "loss": 0.2497, "step": 77505 }, { "epoch": 2.2630618531116333, "grad_norm": 0.6476715603340324, "learning_rate": 1.3646931603135984e-05, "loss": 0.2504, "step": 77510 }, { "epoch": 2.2632078363819508, "grad_norm": 0.4971463519222438, "learning_rate": 1.3644228169775617e-05, "loss": 0.2273, "step": 77515 }, { "epoch": 2.263353819652268, "grad_norm": 0.5823758392858053, "learning_rate": 1.3641524736415247e-05, "loss": 0.2602, "step": 77520 }, { "epoch": 2.2634998029225852, "grad_norm": 0.6083613909842202, "learning_rate": 1.363882130305488e-05, "loss": 0.269, "step": 77525 }, { "epoch": 2.2636457861929022, "grad_norm": 0.626497538299424, "learning_rate": 1.3636117869694512e-05, "loss": 0.2561, "step": 77530 }, { "epoch": 2.2637917694632197, "grad_norm": 0.5587998069483189, "learning_rate": 1.3633414436334144e-05, "loss": 0.2493, "step": 77535 }, { "epoch": 2.2639377527335367, "grad_norm": 0.5355181162376892, "learning_rate": 1.3630711002973778e-05, "loss": 0.249, "step": 77540 }, { "epoch": 2.264083736003854, "grad_norm": 0.5797359963701201, "learning_rate": 1.362800756961341e-05, "loss": 0.263, "step": 77545 }, { "epoch": 2.264229719274171, "grad_norm": 0.552974131304707, "learning_rate": 1.3625304136253041e-05, "loss": 0.2615, "step": 77550 }, { "epoch": 2.2643757025444886, "grad_norm": 0.5959650657360147, "learning_rate": 1.3622600702892674e-05, "loss": 0.25, "step": 77555 }, { "epoch": 2.2645216858148056, "grad_norm": 0.5693291490383128, "learning_rate": 1.3619897269532306e-05, "loss": 0.2451, "step": 77560 }, { "epoch": 2.2646676690851226, "grad_norm": 0.5801185594726931, "learning_rate": 1.361719383617194e-05, "loss": 0.253, "step": 77565 }, { "epoch": 2.26481365235544, "grad_norm": 0.5849113709977164, "learning_rate": 1.3614490402811572e-05, "loss": 0.2667, "step": 77570 }, { "epoch": 2.2649596356257575, "grad_norm": 0.6117787479958149, "learning_rate": 1.3611786969451205e-05, "loss": 0.2669, "step": 77575 }, { "epoch": 2.2651056188960745, "grad_norm": 0.6036210839220395, "learning_rate": 1.3609083536090835e-05, "loss": 0.259, "step": 77580 }, { "epoch": 2.2652516021663915, "grad_norm": 0.6371100533807721, "learning_rate": 1.3606380102730468e-05, "loss": 0.2609, "step": 77585 }, { "epoch": 2.265397585436709, "grad_norm": 0.5541480080943215, "learning_rate": 1.36036766693701e-05, "loss": 0.2618, "step": 77590 }, { "epoch": 2.265543568707026, "grad_norm": 0.5842554979333217, "learning_rate": 1.3600973236009734e-05, "loss": 0.2406, "step": 77595 }, { "epoch": 2.2656895519773435, "grad_norm": 0.5502314552173679, "learning_rate": 1.3598269802649366e-05, "loss": 0.2497, "step": 77600 }, { "epoch": 2.2658355352476605, "grad_norm": 0.6012356189807979, "learning_rate": 1.3595566369288999e-05, "loss": 0.2465, "step": 77605 }, { "epoch": 2.265981518517978, "grad_norm": 0.5983237731550184, "learning_rate": 1.3592862935928629e-05, "loss": 0.2584, "step": 77610 }, { "epoch": 2.266127501788295, "grad_norm": 0.6027713425598366, "learning_rate": 1.3590159502568261e-05, "loss": 0.2459, "step": 77615 }, { "epoch": 2.2662734850586124, "grad_norm": 0.5761778726321145, "learning_rate": 1.3587456069207894e-05, "loss": 0.2494, "step": 77620 }, { "epoch": 2.2664194683289294, "grad_norm": 0.5654491993389991, "learning_rate": 1.3584752635847528e-05, "loss": 0.2544, "step": 77625 }, { "epoch": 2.266565451599247, "grad_norm": 0.6051889509449334, "learning_rate": 1.358204920248716e-05, "loss": 0.248, "step": 77630 }, { "epoch": 2.266711434869564, "grad_norm": 0.58138735778563, "learning_rate": 1.3579345769126792e-05, "loss": 0.2498, "step": 77635 }, { "epoch": 2.2668574181398813, "grad_norm": 0.5143991578676012, "learning_rate": 1.3576642335766423e-05, "loss": 0.2636, "step": 77640 }, { "epoch": 2.2670034014101983, "grad_norm": 0.5676078014396678, "learning_rate": 1.3573938902406055e-05, "loss": 0.2497, "step": 77645 }, { "epoch": 2.2671493846805157, "grad_norm": 0.5915373420066121, "learning_rate": 1.357123546904569e-05, "loss": 0.2461, "step": 77650 }, { "epoch": 2.2672953679508328, "grad_norm": 0.6106449639456095, "learning_rate": 1.3568532035685322e-05, "loss": 0.2616, "step": 77655 }, { "epoch": 2.26744135122115, "grad_norm": 0.6568082263153945, "learning_rate": 1.3565828602324954e-05, "loss": 0.2663, "step": 77660 }, { "epoch": 2.267587334491467, "grad_norm": 0.5672726292813399, "learning_rate": 1.3563125168964586e-05, "loss": 0.2517, "step": 77665 }, { "epoch": 2.2677333177617847, "grad_norm": 0.5884644866049669, "learning_rate": 1.3560421735604217e-05, "loss": 0.2608, "step": 77670 }, { "epoch": 2.2678793010321017, "grad_norm": 0.5919052781089429, "learning_rate": 1.355771830224385e-05, "loss": 0.2605, "step": 77675 }, { "epoch": 2.268025284302419, "grad_norm": 0.5342885112001811, "learning_rate": 1.3555014868883483e-05, "loss": 0.2456, "step": 77680 }, { "epoch": 2.268171267572736, "grad_norm": 0.5679065357431462, "learning_rate": 1.3552311435523116e-05, "loss": 0.2589, "step": 77685 }, { "epoch": 2.2683172508430536, "grad_norm": 0.5991457826857212, "learning_rate": 1.3549608002162748e-05, "loss": 0.2697, "step": 77690 }, { "epoch": 2.2684632341133706, "grad_norm": 0.5363475086939894, "learning_rate": 1.3546904568802379e-05, "loss": 0.2586, "step": 77695 }, { "epoch": 2.2686092173836876, "grad_norm": 0.5805722455607941, "learning_rate": 1.3544201135442011e-05, "loss": 0.259, "step": 77700 }, { "epoch": 2.268755200654005, "grad_norm": 0.6177444310604567, "learning_rate": 1.3541497702081643e-05, "loss": 0.2562, "step": 77705 }, { "epoch": 2.2689011839243225, "grad_norm": 0.5604113685742782, "learning_rate": 1.3538794268721277e-05, "loss": 0.2638, "step": 77710 }, { "epoch": 2.2690471671946395, "grad_norm": 0.5904201806604957, "learning_rate": 1.353609083536091e-05, "loss": 0.241, "step": 77715 }, { "epoch": 2.2691931504649565, "grad_norm": 0.6155433927671574, "learning_rate": 1.3533387402000542e-05, "loss": 0.273, "step": 77720 }, { "epoch": 2.269339133735274, "grad_norm": 0.5700641432651538, "learning_rate": 1.3530683968640172e-05, "loss": 0.2425, "step": 77725 }, { "epoch": 2.269485117005591, "grad_norm": 0.6126688710048546, "learning_rate": 1.3527980535279805e-05, "loss": 0.2614, "step": 77730 }, { "epoch": 2.2696311002759084, "grad_norm": 0.6148777797147241, "learning_rate": 1.352527710191944e-05, "loss": 0.2432, "step": 77735 }, { "epoch": 2.2697770835462254, "grad_norm": 0.5985109270871761, "learning_rate": 1.3522573668559071e-05, "loss": 0.2552, "step": 77740 }, { "epoch": 2.269923066816543, "grad_norm": 0.5311565444685383, "learning_rate": 1.3519870235198703e-05, "loss": 0.2254, "step": 77745 }, { "epoch": 2.27006905008686, "grad_norm": 0.5531977547755004, "learning_rate": 1.3517166801838336e-05, "loss": 0.2629, "step": 77750 }, { "epoch": 2.2702150333571773, "grad_norm": 0.6144628680013308, "learning_rate": 1.3514463368477966e-05, "loss": 0.2508, "step": 77755 }, { "epoch": 2.2703610166274943, "grad_norm": 0.5601078306771151, "learning_rate": 1.3511759935117599e-05, "loss": 0.2438, "step": 77760 }, { "epoch": 2.270506999897812, "grad_norm": 0.5574362028725116, "learning_rate": 1.3509056501757233e-05, "loss": 0.2427, "step": 77765 }, { "epoch": 2.270652983168129, "grad_norm": 0.571050038246955, "learning_rate": 1.3506353068396865e-05, "loss": 0.2566, "step": 77770 }, { "epoch": 2.2707989664384463, "grad_norm": 0.5740883550618061, "learning_rate": 1.3503649635036497e-05, "loss": 0.2498, "step": 77775 }, { "epoch": 2.2709449497087633, "grad_norm": 0.6228431523264516, "learning_rate": 1.350094620167613e-05, "loss": 0.2744, "step": 77780 }, { "epoch": 2.2710909329790807, "grad_norm": 0.5626657649689545, "learning_rate": 1.349824276831576e-05, "loss": 0.2557, "step": 77785 }, { "epoch": 2.2712369162493977, "grad_norm": 0.5869715398522193, "learning_rate": 1.3495539334955393e-05, "loss": 0.2439, "step": 77790 }, { "epoch": 2.271382899519715, "grad_norm": 0.5742682580654355, "learning_rate": 1.3492835901595027e-05, "loss": 0.25, "step": 77795 }, { "epoch": 2.271528882790032, "grad_norm": 0.562279970672161, "learning_rate": 1.3490132468234659e-05, "loss": 0.2514, "step": 77800 }, { "epoch": 2.2716748660603496, "grad_norm": 0.5059740690133246, "learning_rate": 1.3487429034874291e-05, "loss": 0.2526, "step": 77805 }, { "epoch": 2.2718208493306666, "grad_norm": 0.5442129705018269, "learning_rate": 1.3484725601513924e-05, "loss": 0.2453, "step": 77810 }, { "epoch": 2.271966832600984, "grad_norm": 0.5173707226427341, "learning_rate": 1.3482022168153554e-05, "loss": 0.251, "step": 77815 }, { "epoch": 2.272112815871301, "grad_norm": 0.5980921624221203, "learning_rate": 1.347931873479319e-05, "loss": 0.2445, "step": 77820 }, { "epoch": 2.2722587991416185, "grad_norm": 0.5956087787577017, "learning_rate": 1.347661530143282e-05, "loss": 0.2637, "step": 77825 }, { "epoch": 2.2724047824119356, "grad_norm": 0.5915983477198684, "learning_rate": 1.3473911868072453e-05, "loss": 0.2587, "step": 77830 }, { "epoch": 2.272550765682253, "grad_norm": 0.5318697500815934, "learning_rate": 1.3471208434712085e-05, "loss": 0.2517, "step": 77835 }, { "epoch": 2.27269674895257, "grad_norm": 0.5627666457560961, "learning_rate": 1.3468505001351717e-05, "loss": 0.2595, "step": 77840 }, { "epoch": 2.2728427322228875, "grad_norm": 0.5571732710062389, "learning_rate": 1.3465801567991348e-05, "loss": 0.2488, "step": 77845 }, { "epoch": 2.2729887154932045, "grad_norm": 0.5318847743060904, "learning_rate": 1.3463098134630984e-05, "loss": 0.2546, "step": 77850 }, { "epoch": 2.2731346987635215, "grad_norm": 0.58493265739348, "learning_rate": 1.3460394701270614e-05, "loss": 0.2446, "step": 77855 }, { "epoch": 2.273280682033839, "grad_norm": 0.5665852340992074, "learning_rate": 1.3457691267910247e-05, "loss": 0.2375, "step": 77860 }, { "epoch": 2.2734266653041564, "grad_norm": 0.5635121942530603, "learning_rate": 1.3454987834549879e-05, "loss": 0.2457, "step": 77865 }, { "epoch": 2.2735726485744734, "grad_norm": 0.6117162654477667, "learning_rate": 1.3452284401189511e-05, "loss": 0.2544, "step": 77870 }, { "epoch": 2.2737186318447904, "grad_norm": 0.5558800462903465, "learning_rate": 1.3449580967829142e-05, "loss": 0.2478, "step": 77875 }, { "epoch": 2.273864615115108, "grad_norm": 0.5667243283637949, "learning_rate": 1.3446877534468778e-05, "loss": 0.2753, "step": 77880 }, { "epoch": 2.274010598385425, "grad_norm": 0.5934591671955616, "learning_rate": 1.3444174101108408e-05, "loss": 0.2527, "step": 77885 }, { "epoch": 2.2741565816557423, "grad_norm": 0.5596658123914839, "learning_rate": 1.344147066774804e-05, "loss": 0.2508, "step": 77890 }, { "epoch": 2.2743025649260593, "grad_norm": 0.5117490204665988, "learning_rate": 1.3438767234387673e-05, "loss": 0.2418, "step": 77895 }, { "epoch": 2.2744485481963768, "grad_norm": 0.5710906847347396, "learning_rate": 1.3436063801027304e-05, "loss": 0.251, "step": 77900 }, { "epoch": 2.2745945314666938, "grad_norm": 0.5664548320655713, "learning_rate": 1.343336036766694e-05, "loss": 0.2584, "step": 77905 }, { "epoch": 2.274740514737011, "grad_norm": 0.5654245867813237, "learning_rate": 1.3430656934306572e-05, "loss": 0.26, "step": 77910 }, { "epoch": 2.2748864980073282, "grad_norm": 0.6106317385756175, "learning_rate": 1.3427953500946202e-05, "loss": 0.2553, "step": 77915 }, { "epoch": 2.2750324812776457, "grad_norm": 0.5645413278207243, "learning_rate": 1.3425250067585835e-05, "loss": 0.2491, "step": 77920 }, { "epoch": 2.2751784645479627, "grad_norm": 0.5481812388829906, "learning_rate": 1.3422546634225467e-05, "loss": 0.254, "step": 77925 }, { "epoch": 2.27532444781828, "grad_norm": 0.5084455927554591, "learning_rate": 1.3419843200865097e-05, "loss": 0.237, "step": 77930 }, { "epoch": 2.275470431088597, "grad_norm": 0.5425102441932035, "learning_rate": 1.3417139767504733e-05, "loss": 0.247, "step": 77935 }, { "epoch": 2.2756164143589146, "grad_norm": 0.574266237642241, "learning_rate": 1.3414436334144365e-05, "loss": 0.2427, "step": 77940 }, { "epoch": 2.2757623976292316, "grad_norm": 0.6150409599506274, "learning_rate": 1.3411732900783996e-05, "loss": 0.255, "step": 77945 }, { "epoch": 2.275908380899549, "grad_norm": 0.6284567551397344, "learning_rate": 1.3409029467423628e-05, "loss": 0.2678, "step": 77950 }, { "epoch": 2.276054364169866, "grad_norm": 0.5542450905754754, "learning_rate": 1.340632603406326e-05, "loss": 0.2488, "step": 77955 }, { "epoch": 2.2762003474401835, "grad_norm": 0.5682608190216552, "learning_rate": 1.3403622600702891e-05, "loss": 0.2618, "step": 77960 }, { "epoch": 2.2763463307105005, "grad_norm": 0.6063686635074158, "learning_rate": 1.3400919167342527e-05, "loss": 0.2479, "step": 77965 }, { "epoch": 2.276492313980818, "grad_norm": 0.599572594668023, "learning_rate": 1.3398215733982158e-05, "loss": 0.2379, "step": 77970 }, { "epoch": 2.276638297251135, "grad_norm": 0.5637491225815735, "learning_rate": 1.339551230062179e-05, "loss": 0.271, "step": 77975 }, { "epoch": 2.2767842805214524, "grad_norm": 0.6147631797054071, "learning_rate": 1.3392808867261422e-05, "loss": 0.2555, "step": 77980 }, { "epoch": 2.2769302637917694, "grad_norm": 0.5509706671174821, "learning_rate": 1.3390105433901055e-05, "loss": 0.2425, "step": 77985 }, { "epoch": 2.2770762470620864, "grad_norm": 0.5722286934346601, "learning_rate": 1.3387402000540689e-05, "loss": 0.2381, "step": 77990 }, { "epoch": 2.277222230332404, "grad_norm": 0.5765687273073938, "learning_rate": 1.3384698567180321e-05, "loss": 0.2576, "step": 77995 }, { "epoch": 2.2773682136027213, "grad_norm": 0.596785214971596, "learning_rate": 1.3381995133819952e-05, "loss": 0.2508, "step": 78000 }, { "epoch": 2.2775141968730384, "grad_norm": 0.5647618977532155, "learning_rate": 1.3379291700459584e-05, "loss": 0.2584, "step": 78005 }, { "epoch": 2.2776601801433554, "grad_norm": 0.5597990344124115, "learning_rate": 1.3376588267099216e-05, "loss": 0.2485, "step": 78010 }, { "epoch": 2.277806163413673, "grad_norm": 0.5374345783842032, "learning_rate": 1.3373884833738849e-05, "loss": 0.2471, "step": 78015 }, { "epoch": 2.2779521466839903, "grad_norm": 0.5435737153151414, "learning_rate": 1.3371181400378483e-05, "loss": 0.243, "step": 78020 }, { "epoch": 2.2780981299543073, "grad_norm": 0.5872398258793248, "learning_rate": 1.3368477967018115e-05, "loss": 0.2499, "step": 78025 }, { "epoch": 2.2782441132246243, "grad_norm": 0.6006210319341273, "learning_rate": 1.3365774533657745e-05, "loss": 0.2568, "step": 78030 }, { "epoch": 2.2783900964949417, "grad_norm": 0.5749028953098022, "learning_rate": 1.3363071100297378e-05, "loss": 0.2445, "step": 78035 }, { "epoch": 2.2785360797652587, "grad_norm": 0.5619861785543891, "learning_rate": 1.336036766693701e-05, "loss": 0.2602, "step": 78040 }, { "epoch": 2.278682063035576, "grad_norm": 0.5434606402927171, "learning_rate": 1.3357664233576642e-05, "loss": 0.2468, "step": 78045 }, { "epoch": 2.278828046305893, "grad_norm": 0.5530276475751915, "learning_rate": 1.3354960800216276e-05, "loss": 0.2561, "step": 78050 }, { "epoch": 2.2789740295762106, "grad_norm": 0.564929672469251, "learning_rate": 1.3352257366855909e-05, "loss": 0.2479, "step": 78055 }, { "epoch": 2.2791200128465277, "grad_norm": 0.5525978648477797, "learning_rate": 1.334955393349554e-05, "loss": 0.2493, "step": 78060 }, { "epoch": 2.279265996116845, "grad_norm": 0.5837091147348547, "learning_rate": 1.3346850500135172e-05, "loss": 0.2496, "step": 78065 }, { "epoch": 2.279411979387162, "grad_norm": 0.6106110757266358, "learning_rate": 1.3344147066774804e-05, "loss": 0.2523, "step": 78070 }, { "epoch": 2.2795579626574796, "grad_norm": 0.5666066550724319, "learning_rate": 1.3341443633414438e-05, "loss": 0.2708, "step": 78075 }, { "epoch": 2.2797039459277966, "grad_norm": 0.558075110997468, "learning_rate": 1.333874020005407e-05, "loss": 0.2496, "step": 78080 }, { "epoch": 2.279849929198114, "grad_norm": 0.6092667571771737, "learning_rate": 1.3336036766693703e-05, "loss": 0.252, "step": 78085 }, { "epoch": 2.279995912468431, "grad_norm": 0.5778969986967196, "learning_rate": 1.3333333333333333e-05, "loss": 0.2397, "step": 78090 }, { "epoch": 2.2801418957387485, "grad_norm": 0.6078297706578574, "learning_rate": 1.3330629899972966e-05, "loss": 0.2512, "step": 78095 }, { "epoch": 2.2802878790090655, "grad_norm": 0.5775121876782875, "learning_rate": 1.3327926466612598e-05, "loss": 0.2398, "step": 78100 }, { "epoch": 2.280433862279383, "grad_norm": 0.5857939551856675, "learning_rate": 1.3325223033252232e-05, "loss": 0.2525, "step": 78105 }, { "epoch": 2.2805798455497, "grad_norm": 0.5153277815494463, "learning_rate": 1.3322519599891864e-05, "loss": 0.2417, "step": 78110 }, { "epoch": 2.2807258288200174, "grad_norm": 0.5454752025811188, "learning_rate": 1.3319816166531497e-05, "loss": 0.2398, "step": 78115 }, { "epoch": 2.2808718120903344, "grad_norm": 0.5270016633920446, "learning_rate": 1.3317112733171127e-05, "loss": 0.2384, "step": 78120 }, { "epoch": 2.281017795360652, "grad_norm": 0.5359863153149681, "learning_rate": 1.331440929981076e-05, "loss": 0.2315, "step": 78125 }, { "epoch": 2.281163778630969, "grad_norm": 0.5909255309289344, "learning_rate": 1.3311705866450392e-05, "loss": 0.2376, "step": 78130 }, { "epoch": 2.2813097619012863, "grad_norm": 0.589282311951519, "learning_rate": 1.3309002433090026e-05, "loss": 0.2561, "step": 78135 }, { "epoch": 2.2814557451716033, "grad_norm": 0.5904630860556105, "learning_rate": 1.3306298999729658e-05, "loss": 0.2384, "step": 78140 }, { "epoch": 2.2816017284419203, "grad_norm": 0.5382073951377156, "learning_rate": 1.3303595566369289e-05, "loss": 0.2423, "step": 78145 }, { "epoch": 2.281747711712238, "grad_norm": 0.5543903300936963, "learning_rate": 1.3300892133008921e-05, "loss": 0.2575, "step": 78150 }, { "epoch": 2.2818936949825552, "grad_norm": 0.5671800433689819, "learning_rate": 1.3298188699648553e-05, "loss": 0.2507, "step": 78155 }, { "epoch": 2.2820396782528722, "grad_norm": 0.6111844587085822, "learning_rate": 1.3295485266288187e-05, "loss": 0.2426, "step": 78160 }, { "epoch": 2.2821856615231892, "grad_norm": 0.555100578590276, "learning_rate": 1.329278183292782e-05, "loss": 0.2593, "step": 78165 }, { "epoch": 2.2823316447935067, "grad_norm": 0.5157364563741385, "learning_rate": 1.3290078399567452e-05, "loss": 0.2446, "step": 78170 }, { "epoch": 2.2824776280638237, "grad_norm": 0.6113363887414008, "learning_rate": 1.3287374966207083e-05, "loss": 0.2275, "step": 78175 }, { "epoch": 2.282623611334141, "grad_norm": 0.6305132734520094, "learning_rate": 1.3284671532846715e-05, "loss": 0.2524, "step": 78180 }, { "epoch": 2.282769594604458, "grad_norm": 0.5605179135420089, "learning_rate": 1.3281968099486347e-05, "loss": 0.2522, "step": 78185 }, { "epoch": 2.2829155778747756, "grad_norm": 0.526656591772522, "learning_rate": 1.3279264666125981e-05, "loss": 0.2459, "step": 78190 }, { "epoch": 2.2830615611450926, "grad_norm": 0.6062331636918857, "learning_rate": 1.3276561232765614e-05, "loss": 0.2475, "step": 78195 }, { "epoch": 2.28320754441541, "grad_norm": 0.5965241130374913, "learning_rate": 1.3273857799405246e-05, "loss": 0.2454, "step": 78200 }, { "epoch": 2.283353527685727, "grad_norm": 0.6145276612779551, "learning_rate": 1.3271154366044877e-05, "loss": 0.2534, "step": 78205 }, { "epoch": 2.2834995109560445, "grad_norm": 0.5663887465240364, "learning_rate": 1.3268450932684509e-05, "loss": 0.2586, "step": 78210 }, { "epoch": 2.2836454942263615, "grad_norm": 0.5589280089496977, "learning_rate": 1.3265747499324141e-05, "loss": 0.2437, "step": 78215 }, { "epoch": 2.283791477496679, "grad_norm": 0.5298721510443508, "learning_rate": 1.3263044065963775e-05, "loss": 0.246, "step": 78220 }, { "epoch": 2.283937460766996, "grad_norm": 0.5616567185529802, "learning_rate": 1.3260340632603408e-05, "loss": 0.2435, "step": 78225 }, { "epoch": 2.2840834440373134, "grad_norm": 0.6283043115333472, "learning_rate": 1.325763719924304e-05, "loss": 0.2558, "step": 78230 }, { "epoch": 2.2842294273076305, "grad_norm": 0.601043621907786, "learning_rate": 1.325493376588267e-05, "loss": 0.2725, "step": 78235 }, { "epoch": 2.284375410577948, "grad_norm": 0.571191336708045, "learning_rate": 1.3252230332522303e-05, "loss": 0.2474, "step": 78240 }, { "epoch": 2.284521393848265, "grad_norm": 0.6048449410808394, "learning_rate": 1.3249526899161937e-05, "loss": 0.2688, "step": 78245 }, { "epoch": 2.2846673771185824, "grad_norm": 0.5981437321494675, "learning_rate": 1.3246823465801569e-05, "loss": 0.2623, "step": 78250 }, { "epoch": 2.2848133603888994, "grad_norm": 0.5820829999279318, "learning_rate": 1.3244120032441201e-05, "loss": 0.2534, "step": 78255 }, { "epoch": 2.284959343659217, "grad_norm": 0.5684372911081285, "learning_rate": 1.3241416599080834e-05, "loss": 0.2447, "step": 78260 }, { "epoch": 2.285105326929534, "grad_norm": 0.5638671094706439, "learning_rate": 1.3238713165720464e-05, "loss": 0.238, "step": 78265 }, { "epoch": 2.2852513101998513, "grad_norm": 0.6062461770472968, "learning_rate": 1.3236009732360097e-05, "loss": 0.2391, "step": 78270 }, { "epoch": 2.2853972934701683, "grad_norm": 0.5898604487489455, "learning_rate": 1.323330629899973e-05, "loss": 0.2635, "step": 78275 }, { "epoch": 2.2855432767404853, "grad_norm": 0.5329442454774038, "learning_rate": 1.3230602865639363e-05, "loss": 0.2666, "step": 78280 }, { "epoch": 2.2856892600108027, "grad_norm": 0.5808779176956781, "learning_rate": 1.3227899432278995e-05, "loss": 0.2395, "step": 78285 }, { "epoch": 2.28583524328112, "grad_norm": 0.6128823758356522, "learning_rate": 1.3225195998918628e-05, "loss": 0.2447, "step": 78290 }, { "epoch": 2.285981226551437, "grad_norm": 0.6111302330522955, "learning_rate": 1.3222492565558258e-05, "loss": 0.2533, "step": 78295 }, { "epoch": 2.286127209821754, "grad_norm": 0.5730695165671291, "learning_rate": 1.321978913219789e-05, "loss": 0.2363, "step": 78300 }, { "epoch": 2.2862731930920717, "grad_norm": 0.518321378113642, "learning_rate": 1.3217085698837525e-05, "loss": 0.2488, "step": 78305 }, { "epoch": 2.286419176362389, "grad_norm": 0.5230609884922262, "learning_rate": 1.3214382265477157e-05, "loss": 0.2438, "step": 78310 }, { "epoch": 2.286565159632706, "grad_norm": 0.5833194075225748, "learning_rate": 1.321167883211679e-05, "loss": 0.2618, "step": 78315 }, { "epoch": 2.286711142903023, "grad_norm": 0.6353938650270984, "learning_rate": 1.3208975398756422e-05, "loss": 0.2642, "step": 78320 }, { "epoch": 2.2868571261733406, "grad_norm": 0.5614732975834857, "learning_rate": 1.3206271965396052e-05, "loss": 0.2541, "step": 78325 }, { "epoch": 2.2870031094436576, "grad_norm": 0.5557871460231781, "learning_rate": 1.3203568532035688e-05, "loss": 0.2562, "step": 78330 }, { "epoch": 2.287149092713975, "grad_norm": 0.5910828927509023, "learning_rate": 1.3200865098675319e-05, "loss": 0.2555, "step": 78335 }, { "epoch": 2.287295075984292, "grad_norm": 0.5457488939949433, "learning_rate": 1.3198161665314951e-05, "loss": 0.2514, "step": 78340 }, { "epoch": 2.2874410592546095, "grad_norm": 0.5233526743049496, "learning_rate": 1.3195458231954583e-05, "loss": 0.2319, "step": 78345 }, { "epoch": 2.2875870425249265, "grad_norm": 0.6230746689395532, "learning_rate": 1.3192754798594214e-05, "loss": 0.2449, "step": 78350 }, { "epoch": 2.287733025795244, "grad_norm": 0.5605639225930937, "learning_rate": 1.3190051365233846e-05, "loss": 0.2531, "step": 78355 }, { "epoch": 2.287879009065561, "grad_norm": 0.5751738768043427, "learning_rate": 1.3187347931873482e-05, "loss": 0.2503, "step": 78360 }, { "epoch": 2.2880249923358784, "grad_norm": 0.5829911103944011, "learning_rate": 1.3184644498513112e-05, "loss": 0.2535, "step": 78365 }, { "epoch": 2.2881709756061954, "grad_norm": 0.5423810668695808, "learning_rate": 1.3181941065152745e-05, "loss": 0.2608, "step": 78370 }, { "epoch": 2.288316958876513, "grad_norm": 0.5219448296607808, "learning_rate": 1.3179237631792377e-05, "loss": 0.2498, "step": 78375 }, { "epoch": 2.28846294214683, "grad_norm": 0.5429410016828766, "learning_rate": 1.3176534198432008e-05, "loss": 0.2417, "step": 78380 }, { "epoch": 2.2886089254171473, "grad_norm": 0.5486759229584056, "learning_rate": 1.317383076507164e-05, "loss": 0.2558, "step": 78385 }, { "epoch": 2.2887549086874643, "grad_norm": 0.5258156599137062, "learning_rate": 1.3171127331711276e-05, "loss": 0.2398, "step": 78390 }, { "epoch": 2.288900891957782, "grad_norm": 0.5790483751047267, "learning_rate": 1.3168423898350906e-05, "loss": 0.2391, "step": 78395 }, { "epoch": 2.289046875228099, "grad_norm": 0.5409795907385478, "learning_rate": 1.3165720464990539e-05, "loss": 0.2558, "step": 78400 }, { "epoch": 2.2891928584984162, "grad_norm": 0.5668631810587492, "learning_rate": 1.3163017031630171e-05, "loss": 0.2597, "step": 78405 }, { "epoch": 2.2893388417687333, "grad_norm": 0.5530641629328249, "learning_rate": 1.3160313598269802e-05, "loss": 0.2597, "step": 78410 }, { "epoch": 2.2894848250390507, "grad_norm": 0.6219596189406745, "learning_rate": 1.3157610164909437e-05, "loss": 0.2568, "step": 78415 }, { "epoch": 2.2896308083093677, "grad_norm": 0.6144320455248726, "learning_rate": 1.3154906731549068e-05, "loss": 0.2495, "step": 78420 }, { "epoch": 2.289776791579685, "grad_norm": 0.4974908722408422, "learning_rate": 1.31522032981887e-05, "loss": 0.2421, "step": 78425 }, { "epoch": 2.289922774850002, "grad_norm": 0.6043905452608122, "learning_rate": 1.3149499864828333e-05, "loss": 0.2376, "step": 78430 }, { "epoch": 2.290068758120319, "grad_norm": 0.6486633296973802, "learning_rate": 1.3146796431467965e-05, "loss": 0.2555, "step": 78435 }, { "epoch": 2.2902147413906366, "grad_norm": 0.5796586290646071, "learning_rate": 1.3144092998107596e-05, "loss": 0.24, "step": 78440 }, { "epoch": 2.290360724660954, "grad_norm": 0.5776426259935847, "learning_rate": 1.3141389564747231e-05, "loss": 0.2372, "step": 78445 }, { "epoch": 2.290506707931271, "grad_norm": 0.5686965476760429, "learning_rate": 1.3138686131386862e-05, "loss": 0.2548, "step": 78450 }, { "epoch": 2.290652691201588, "grad_norm": 0.580919421112786, "learning_rate": 1.3135982698026494e-05, "loss": 0.2539, "step": 78455 }, { "epoch": 2.2907986744719055, "grad_norm": 0.6096569774619325, "learning_rate": 1.3133279264666126e-05, "loss": 0.2503, "step": 78460 }, { "epoch": 2.2909446577422226, "grad_norm": 0.5656048194505405, "learning_rate": 1.3130575831305759e-05, "loss": 0.2425, "step": 78465 }, { "epoch": 2.29109064101254, "grad_norm": 0.5379787111728463, "learning_rate": 1.312787239794539e-05, "loss": 0.2522, "step": 78470 }, { "epoch": 2.291236624282857, "grad_norm": 0.5359556522580117, "learning_rate": 1.3125168964585025e-05, "loss": 0.2581, "step": 78475 }, { "epoch": 2.2913826075531745, "grad_norm": 0.6050539088747015, "learning_rate": 1.3122465531224656e-05, "loss": 0.2507, "step": 78480 }, { "epoch": 2.2915285908234915, "grad_norm": 0.6111174642352936, "learning_rate": 1.3119762097864288e-05, "loss": 0.2712, "step": 78485 }, { "epoch": 2.291674574093809, "grad_norm": 0.5738310677584079, "learning_rate": 1.311705866450392e-05, "loss": 0.2532, "step": 78490 }, { "epoch": 2.291820557364126, "grad_norm": 0.6153828921179187, "learning_rate": 1.3114355231143553e-05, "loss": 0.2612, "step": 78495 }, { "epoch": 2.2919665406344434, "grad_norm": 0.5740052563357239, "learning_rate": 1.3111651797783187e-05, "loss": 0.2505, "step": 78500 }, { "epoch": 2.2921125239047604, "grad_norm": 0.5796946167209607, "learning_rate": 1.3108948364422819e-05, "loss": 0.2624, "step": 78505 }, { "epoch": 2.292258507175078, "grad_norm": 0.5893145935046922, "learning_rate": 1.310624493106245e-05, "loss": 0.2419, "step": 78510 }, { "epoch": 2.292404490445395, "grad_norm": 0.5482698342219763, "learning_rate": 1.3103541497702082e-05, "loss": 0.2623, "step": 78515 }, { "epoch": 2.2925504737157123, "grad_norm": 0.5439878455252086, "learning_rate": 1.3100838064341714e-05, "loss": 0.25, "step": 78520 }, { "epoch": 2.2926964569860293, "grad_norm": 0.5729319145639078, "learning_rate": 1.3098134630981347e-05, "loss": 0.2481, "step": 78525 }, { "epoch": 2.2928424402563468, "grad_norm": 0.5476401930274202, "learning_rate": 1.309543119762098e-05, "loss": 0.2484, "step": 78530 }, { "epoch": 2.2929884235266638, "grad_norm": 0.5503736349546317, "learning_rate": 1.3092727764260613e-05, "loss": 0.2363, "step": 78535 }, { "epoch": 2.293134406796981, "grad_norm": 0.575985023986599, "learning_rate": 1.3090024330900244e-05, "loss": 0.2303, "step": 78540 }, { "epoch": 2.2932803900672982, "grad_norm": 0.5601162309194413, "learning_rate": 1.3087320897539876e-05, "loss": 0.2548, "step": 78545 }, { "epoch": 2.2934263733376157, "grad_norm": 0.5850503024082931, "learning_rate": 1.3084617464179508e-05, "loss": 0.2686, "step": 78550 }, { "epoch": 2.2935723566079327, "grad_norm": 0.5778147187496517, "learning_rate": 1.3081914030819139e-05, "loss": 0.2661, "step": 78555 }, { "epoch": 2.29371833987825, "grad_norm": 0.6207383856069575, "learning_rate": 1.3079210597458775e-05, "loss": 0.2618, "step": 78560 }, { "epoch": 2.293864323148567, "grad_norm": 0.6200066305967052, "learning_rate": 1.3076507164098407e-05, "loss": 0.2567, "step": 78565 }, { "epoch": 2.294010306418884, "grad_norm": 0.6113724322310677, "learning_rate": 1.3073803730738037e-05, "loss": 0.2559, "step": 78570 }, { "epoch": 2.2941562896892016, "grad_norm": 0.5634315485817868, "learning_rate": 1.307110029737767e-05, "loss": 0.2538, "step": 78575 }, { "epoch": 2.294302272959519, "grad_norm": 0.5615148908603635, "learning_rate": 1.3068396864017302e-05, "loss": 0.2563, "step": 78580 }, { "epoch": 2.294448256229836, "grad_norm": 0.6121710873917635, "learning_rate": 1.3065693430656936e-05, "loss": 0.2803, "step": 78585 }, { "epoch": 2.294594239500153, "grad_norm": 0.5671267111019996, "learning_rate": 1.3062989997296568e-05, "loss": 0.2481, "step": 78590 }, { "epoch": 2.2947402227704705, "grad_norm": 0.5808015332069384, "learning_rate": 1.3060286563936199e-05, "loss": 0.2503, "step": 78595 }, { "epoch": 2.294886206040788, "grad_norm": 0.5871468871924744, "learning_rate": 1.3057583130575831e-05, "loss": 0.2574, "step": 78600 }, { "epoch": 2.295032189311105, "grad_norm": 0.6432505993334539, "learning_rate": 1.3054879697215464e-05, "loss": 0.2536, "step": 78605 }, { "epoch": 2.295178172581422, "grad_norm": 0.5719866330313836, "learning_rate": 1.3052176263855096e-05, "loss": 0.2643, "step": 78610 }, { "epoch": 2.2953241558517394, "grad_norm": 0.588543275747039, "learning_rate": 1.304947283049473e-05, "loss": 0.2598, "step": 78615 }, { "epoch": 2.2954701391220564, "grad_norm": 0.557713936753049, "learning_rate": 1.3046769397134362e-05, "loss": 0.2568, "step": 78620 }, { "epoch": 2.295616122392374, "grad_norm": 0.5949992481670248, "learning_rate": 1.3044065963773993e-05, "loss": 0.2551, "step": 78625 }, { "epoch": 2.295762105662691, "grad_norm": 0.5429539058896132, "learning_rate": 1.3041362530413625e-05, "loss": 0.2386, "step": 78630 }, { "epoch": 2.2959080889330084, "grad_norm": 0.5266022560596397, "learning_rate": 1.3038659097053258e-05, "loss": 0.2549, "step": 78635 }, { "epoch": 2.2960540722033254, "grad_norm": 0.6465664403846979, "learning_rate": 1.303595566369289e-05, "loss": 0.2569, "step": 78640 }, { "epoch": 2.296200055473643, "grad_norm": 0.5634156777482731, "learning_rate": 1.3033252230332524e-05, "loss": 0.2577, "step": 78645 }, { "epoch": 2.29634603874396, "grad_norm": 0.5853785772030531, "learning_rate": 1.3030548796972156e-05, "loss": 0.2329, "step": 78650 }, { "epoch": 2.2964920220142773, "grad_norm": 0.5885548405212148, "learning_rate": 1.3027845363611787e-05, "loss": 0.2559, "step": 78655 }, { "epoch": 2.2966380052845943, "grad_norm": 0.5712483279106767, "learning_rate": 1.302514193025142e-05, "loss": 0.2595, "step": 78660 }, { "epoch": 2.2967839885549117, "grad_norm": 0.589026217819919, "learning_rate": 1.3022438496891051e-05, "loss": 0.2664, "step": 78665 }, { "epoch": 2.2969299718252287, "grad_norm": 0.5874112491659684, "learning_rate": 1.3019735063530685e-05, "loss": 0.2463, "step": 78670 }, { "epoch": 2.297075955095546, "grad_norm": 0.5476812819129109, "learning_rate": 1.3017031630170318e-05, "loss": 0.2537, "step": 78675 }, { "epoch": 2.297221938365863, "grad_norm": 0.5659391501802015, "learning_rate": 1.301432819680995e-05, "loss": 0.2489, "step": 78680 }, { "epoch": 2.2973679216361806, "grad_norm": 0.6083871215875221, "learning_rate": 1.301162476344958e-05, "loss": 0.262, "step": 78685 }, { "epoch": 2.2975139049064977, "grad_norm": 0.6053400151476752, "learning_rate": 1.3008921330089213e-05, "loss": 0.2416, "step": 78690 }, { "epoch": 2.297659888176815, "grad_norm": 0.5484352573624574, "learning_rate": 1.3006217896728845e-05, "loss": 0.2509, "step": 78695 }, { "epoch": 2.297805871447132, "grad_norm": 0.5407856411957112, "learning_rate": 1.300351446336848e-05, "loss": 0.2549, "step": 78700 }, { "epoch": 2.2979518547174496, "grad_norm": 0.649176574159914, "learning_rate": 1.3000811030008112e-05, "loss": 0.2692, "step": 78705 }, { "epoch": 2.2980978379877666, "grad_norm": 0.6195309474939741, "learning_rate": 1.2998107596647744e-05, "loss": 0.2615, "step": 78710 }, { "epoch": 2.298243821258084, "grad_norm": 0.5540053335448036, "learning_rate": 1.2995404163287375e-05, "loss": 0.2532, "step": 78715 }, { "epoch": 2.298389804528401, "grad_norm": 0.5597495814404908, "learning_rate": 1.2992700729927007e-05, "loss": 0.2459, "step": 78720 }, { "epoch": 2.298535787798718, "grad_norm": 0.6134221314746171, "learning_rate": 1.298999729656664e-05, "loss": 0.2493, "step": 78725 }, { "epoch": 2.2986817710690355, "grad_norm": 0.5902270765339205, "learning_rate": 1.2987293863206273e-05, "loss": 0.258, "step": 78730 }, { "epoch": 2.298827754339353, "grad_norm": 0.58055762366702, "learning_rate": 1.2984590429845906e-05, "loss": 0.2401, "step": 78735 }, { "epoch": 2.29897373760967, "grad_norm": 0.5714164386047181, "learning_rate": 1.2981886996485538e-05, "loss": 0.2503, "step": 78740 }, { "epoch": 2.299119720879987, "grad_norm": 0.6156659168509229, "learning_rate": 1.2979183563125169e-05, "loss": 0.255, "step": 78745 }, { "epoch": 2.2992657041503044, "grad_norm": 0.5885388790740619, "learning_rate": 1.2976480129764801e-05, "loss": 0.2657, "step": 78750 }, { "epoch": 2.2994116874206214, "grad_norm": 0.5588139690224448, "learning_rate": 1.2973776696404435e-05, "loss": 0.2424, "step": 78755 }, { "epoch": 2.299557670690939, "grad_norm": 0.5622946224525319, "learning_rate": 1.2971073263044067e-05, "loss": 0.2528, "step": 78760 }, { "epoch": 2.299703653961256, "grad_norm": 0.6014097955985672, "learning_rate": 1.29683698296837e-05, "loss": 0.2487, "step": 78765 }, { "epoch": 2.2998496372315733, "grad_norm": 0.5656052317357433, "learning_rate": 1.2965666396323332e-05, "loss": 0.2556, "step": 78770 }, { "epoch": 2.2999956205018903, "grad_norm": 0.5741098015923681, "learning_rate": 1.2962962962962962e-05, "loss": 0.2568, "step": 78775 }, { "epoch": 2.3001416037722078, "grad_norm": 0.5500633170717952, "learning_rate": 1.2960259529602595e-05, "loss": 0.2421, "step": 78780 }, { "epoch": 2.300287587042525, "grad_norm": 0.5652564909058079, "learning_rate": 1.2957556096242229e-05, "loss": 0.2439, "step": 78785 }, { "epoch": 2.3004335703128422, "grad_norm": 0.5553293582651605, "learning_rate": 1.2954852662881861e-05, "loss": 0.2345, "step": 78790 }, { "epoch": 2.3005795535831592, "grad_norm": 0.607247703763188, "learning_rate": 1.2952149229521493e-05, "loss": 0.2411, "step": 78795 }, { "epoch": 2.3007255368534767, "grad_norm": 0.6118529602681914, "learning_rate": 1.2949445796161124e-05, "loss": 0.2529, "step": 78800 }, { "epoch": 2.3008715201237937, "grad_norm": 0.5494985593445249, "learning_rate": 1.2946742362800756e-05, "loss": 0.2336, "step": 78805 }, { "epoch": 2.301017503394111, "grad_norm": 0.5564047947965304, "learning_rate": 1.2944038929440389e-05, "loss": 0.2553, "step": 78810 }, { "epoch": 2.301163486664428, "grad_norm": 0.6144788792506123, "learning_rate": 1.2941335496080023e-05, "loss": 0.2488, "step": 78815 }, { "epoch": 2.3013094699347456, "grad_norm": 0.5692580253341838, "learning_rate": 1.2938632062719655e-05, "loss": 0.2453, "step": 78820 }, { "epoch": 2.3014554532050626, "grad_norm": 0.5834204031666056, "learning_rate": 1.2935928629359287e-05, "loss": 0.2436, "step": 78825 }, { "epoch": 2.30160143647538, "grad_norm": 0.5038566656876914, "learning_rate": 1.2933225195998918e-05, "loss": 0.2445, "step": 78830 }, { "epoch": 2.301747419745697, "grad_norm": 0.5603548948201214, "learning_rate": 1.293052176263855e-05, "loss": 0.245, "step": 78835 }, { "epoch": 2.3018934030160145, "grad_norm": 0.6010179472539435, "learning_rate": 1.2927818329278186e-05, "loss": 0.274, "step": 78840 }, { "epoch": 2.3020393862863315, "grad_norm": 0.5371696210707899, "learning_rate": 1.2925114895917817e-05, "loss": 0.2328, "step": 78845 }, { "epoch": 2.302185369556649, "grad_norm": 0.5864955369908473, "learning_rate": 1.2922411462557449e-05, "loss": 0.2626, "step": 78850 }, { "epoch": 2.302331352826966, "grad_norm": 0.6602423018207658, "learning_rate": 1.2919708029197081e-05, "loss": 0.2691, "step": 78855 }, { "epoch": 2.3024773360972834, "grad_norm": 0.5654835816690168, "learning_rate": 1.2917004595836712e-05, "loss": 0.2674, "step": 78860 }, { "epoch": 2.3026233193676005, "grad_norm": 0.5770287956872452, "learning_rate": 1.2914301162476344e-05, "loss": 0.2573, "step": 78865 }, { "epoch": 2.302769302637918, "grad_norm": 0.5182482829749944, "learning_rate": 1.2911597729115978e-05, "loss": 0.2659, "step": 78870 }, { "epoch": 2.302915285908235, "grad_norm": 0.5424263450957835, "learning_rate": 1.290889429575561e-05, "loss": 0.2647, "step": 78875 }, { "epoch": 2.303061269178552, "grad_norm": 0.566842385933236, "learning_rate": 1.2906190862395243e-05, "loss": 0.2511, "step": 78880 }, { "epoch": 2.3032072524488694, "grad_norm": 0.584879066168192, "learning_rate": 1.2903487429034875e-05, "loss": 0.247, "step": 78885 }, { "epoch": 2.303353235719187, "grad_norm": 0.5738293614842144, "learning_rate": 1.2900783995674506e-05, "loss": 0.2544, "step": 78890 }, { "epoch": 2.303499218989504, "grad_norm": 0.6581511213608933, "learning_rate": 1.2898080562314138e-05, "loss": 0.2449, "step": 78895 }, { "epoch": 2.303645202259821, "grad_norm": 0.5464883574293627, "learning_rate": 1.2895377128953772e-05, "loss": 0.2388, "step": 78900 }, { "epoch": 2.3037911855301383, "grad_norm": 0.5435389908410109, "learning_rate": 1.2892673695593404e-05, "loss": 0.2393, "step": 78905 }, { "epoch": 2.3039371688004553, "grad_norm": 0.5727722881921506, "learning_rate": 1.2889970262233037e-05, "loss": 0.2542, "step": 78910 }, { "epoch": 2.3040831520707727, "grad_norm": 0.5818541836287178, "learning_rate": 1.2887266828872669e-05, "loss": 0.2661, "step": 78915 }, { "epoch": 2.3042291353410898, "grad_norm": 0.5431037861984053, "learning_rate": 1.28845633955123e-05, "loss": 0.2464, "step": 78920 }, { "epoch": 2.304375118611407, "grad_norm": 0.5694437646681418, "learning_rate": 1.2881859962151935e-05, "loss": 0.2499, "step": 78925 }, { "epoch": 2.304521101881724, "grad_norm": 0.549831233988862, "learning_rate": 1.2879156528791566e-05, "loss": 0.2461, "step": 78930 }, { "epoch": 2.3046670851520417, "grad_norm": 0.5585927461427537, "learning_rate": 1.2876453095431198e-05, "loss": 0.2496, "step": 78935 }, { "epoch": 2.3048130684223587, "grad_norm": 0.5249156777643402, "learning_rate": 1.287374966207083e-05, "loss": 0.2526, "step": 78940 }, { "epoch": 2.304959051692676, "grad_norm": 0.5419341296510484, "learning_rate": 1.2871046228710463e-05, "loss": 0.2291, "step": 78945 }, { "epoch": 2.305105034962993, "grad_norm": 0.5980573552875488, "learning_rate": 1.2868342795350094e-05, "loss": 0.2613, "step": 78950 }, { "epoch": 2.3052510182333106, "grad_norm": 0.5834208216018967, "learning_rate": 1.286563936198973e-05, "loss": 0.2603, "step": 78955 }, { "epoch": 2.3053970015036276, "grad_norm": 0.5585356707782082, "learning_rate": 1.286293592862936e-05, "loss": 0.255, "step": 78960 }, { "epoch": 2.305542984773945, "grad_norm": 0.5118236606617182, "learning_rate": 1.2860232495268992e-05, "loss": 0.231, "step": 78965 }, { "epoch": 2.305688968044262, "grad_norm": 0.5632451892847072, "learning_rate": 1.2857529061908625e-05, "loss": 0.2618, "step": 78970 }, { "epoch": 2.3058349513145795, "grad_norm": 0.5802008276982932, "learning_rate": 1.2854825628548257e-05, "loss": 0.2603, "step": 78975 }, { "epoch": 2.3059809345848965, "grad_norm": 0.5874021262120092, "learning_rate": 1.2852122195187887e-05, "loss": 0.2748, "step": 78980 }, { "epoch": 2.306126917855214, "grad_norm": 0.5628909915190056, "learning_rate": 1.2849418761827523e-05, "loss": 0.2303, "step": 78985 }, { "epoch": 2.306272901125531, "grad_norm": 0.6352662662809287, "learning_rate": 1.2846715328467154e-05, "loss": 0.2621, "step": 78990 }, { "epoch": 2.3064188843958484, "grad_norm": 0.5851481553347673, "learning_rate": 1.2844011895106786e-05, "loss": 0.2704, "step": 78995 }, { "epoch": 2.3065648676661654, "grad_norm": 0.5786336370551376, "learning_rate": 1.2841308461746418e-05, "loss": 0.2469, "step": 79000 }, { "epoch": 2.306710850936483, "grad_norm": 0.5878689401101173, "learning_rate": 1.2838605028386049e-05, "loss": 0.2486, "step": 79005 }, { "epoch": 2.3068568342068, "grad_norm": 0.5569564717402729, "learning_rate": 1.2835901595025685e-05, "loss": 0.2356, "step": 79010 }, { "epoch": 2.307002817477117, "grad_norm": 0.5755685626491234, "learning_rate": 1.2833198161665317e-05, "loss": 0.2608, "step": 79015 }, { "epoch": 2.3071488007474343, "grad_norm": 0.558540997683677, "learning_rate": 1.2830494728304948e-05, "loss": 0.2484, "step": 79020 }, { "epoch": 2.307294784017752, "grad_norm": 0.5202510900346461, "learning_rate": 1.282779129494458e-05, "loss": 0.2358, "step": 79025 }, { "epoch": 2.307440767288069, "grad_norm": 0.6049470012004466, "learning_rate": 1.2825087861584212e-05, "loss": 0.262, "step": 79030 }, { "epoch": 2.307586750558386, "grad_norm": 0.5290632374203681, "learning_rate": 1.2822384428223843e-05, "loss": 0.2591, "step": 79035 }, { "epoch": 2.3077327338287033, "grad_norm": 0.5531733672169618, "learning_rate": 1.2819680994863479e-05, "loss": 0.2427, "step": 79040 }, { "epoch": 2.3078787170990203, "grad_norm": 0.57349910091574, "learning_rate": 1.2816977561503111e-05, "loss": 0.2466, "step": 79045 }, { "epoch": 2.3080247003693377, "grad_norm": 0.6140499221884138, "learning_rate": 1.2814274128142742e-05, "loss": 0.2591, "step": 79050 }, { "epoch": 2.3081706836396547, "grad_norm": 0.5389148181367505, "learning_rate": 1.2811570694782374e-05, "loss": 0.2581, "step": 79055 }, { "epoch": 2.308316666909972, "grad_norm": 0.5754677010046875, "learning_rate": 1.2808867261422006e-05, "loss": 0.2492, "step": 79060 }, { "epoch": 2.308462650180289, "grad_norm": 0.5697769354536719, "learning_rate": 1.280616382806164e-05, "loss": 0.2409, "step": 79065 }, { "epoch": 2.3086086334506066, "grad_norm": 0.5772025129620963, "learning_rate": 1.2803460394701273e-05, "loss": 0.2471, "step": 79070 }, { "epoch": 2.3087546167209236, "grad_norm": 0.6219474241517877, "learning_rate": 1.2800756961340903e-05, "loss": 0.2744, "step": 79075 }, { "epoch": 2.308900599991241, "grad_norm": 0.5844045279375253, "learning_rate": 1.2798053527980536e-05, "loss": 0.2466, "step": 79080 }, { "epoch": 2.309046583261558, "grad_norm": 0.5840164116148069, "learning_rate": 1.2795350094620168e-05, "loss": 0.2455, "step": 79085 }, { "epoch": 2.3091925665318755, "grad_norm": 0.568248042744542, "learning_rate": 1.27926466612598e-05, "loss": 0.2553, "step": 79090 }, { "epoch": 2.3093385498021926, "grad_norm": 0.5792007893711231, "learning_rate": 1.2789943227899434e-05, "loss": 0.2463, "step": 79095 }, { "epoch": 2.30948453307251, "grad_norm": 0.5872642361519205, "learning_rate": 1.2787239794539066e-05, "loss": 0.263, "step": 79100 }, { "epoch": 2.309630516342827, "grad_norm": 0.5953443901316464, "learning_rate": 1.2784536361178697e-05, "loss": 0.2443, "step": 79105 }, { "epoch": 2.3097764996131445, "grad_norm": 0.6100640648436784, "learning_rate": 1.278183292781833e-05, "loss": 0.2525, "step": 79110 }, { "epoch": 2.3099224828834615, "grad_norm": 0.5671501465154533, "learning_rate": 1.2779129494457962e-05, "loss": 0.2602, "step": 79115 }, { "epoch": 2.310068466153779, "grad_norm": 0.5796592567296701, "learning_rate": 1.2776426061097594e-05, "loss": 0.2486, "step": 79120 }, { "epoch": 2.310214449424096, "grad_norm": 0.5712778390320417, "learning_rate": 1.2773722627737228e-05, "loss": 0.255, "step": 79125 }, { "epoch": 2.3103604326944134, "grad_norm": 0.5991194330675922, "learning_rate": 1.277101919437686e-05, "loss": 0.2598, "step": 79130 }, { "epoch": 2.3105064159647304, "grad_norm": 0.5308697622546771, "learning_rate": 1.2768315761016491e-05, "loss": 0.2365, "step": 79135 }, { "epoch": 2.310652399235048, "grad_norm": 0.6050469511226606, "learning_rate": 1.2765612327656123e-05, "loss": 0.2607, "step": 79140 }, { "epoch": 2.310798382505365, "grad_norm": 0.5309368377270081, "learning_rate": 1.2762908894295756e-05, "loss": 0.2533, "step": 79145 }, { "epoch": 2.3109443657756823, "grad_norm": 0.5768806670736301, "learning_rate": 1.276020546093539e-05, "loss": 0.2493, "step": 79150 }, { "epoch": 2.3110903490459993, "grad_norm": 0.5825041799237516, "learning_rate": 1.2757502027575022e-05, "loss": 0.2317, "step": 79155 }, { "epoch": 2.3112363323163168, "grad_norm": 0.5754370508807679, "learning_rate": 1.2754798594214654e-05, "loss": 0.2415, "step": 79160 }, { "epoch": 2.3113823155866338, "grad_norm": 0.5153245490931753, "learning_rate": 1.2752095160854285e-05, "loss": 0.2602, "step": 79165 }, { "epoch": 2.3115282988569508, "grad_norm": 0.5712196826092434, "learning_rate": 1.2749391727493917e-05, "loss": 0.2873, "step": 79170 }, { "epoch": 2.311674282127268, "grad_norm": 0.5715614715191917, "learning_rate": 1.274668829413355e-05, "loss": 0.2499, "step": 79175 }, { "epoch": 2.3118202653975857, "grad_norm": 0.5758767563646041, "learning_rate": 1.2743984860773184e-05, "loss": 0.2695, "step": 79180 }, { "epoch": 2.3119662486679027, "grad_norm": 0.5845542358326828, "learning_rate": 1.2741281427412816e-05, "loss": 0.2447, "step": 79185 }, { "epoch": 2.3121122319382197, "grad_norm": 0.5544887081257032, "learning_rate": 1.2738577994052448e-05, "loss": 0.2674, "step": 79190 }, { "epoch": 2.312258215208537, "grad_norm": 0.554218264899044, "learning_rate": 1.2735874560692079e-05, "loss": 0.2391, "step": 79195 }, { "epoch": 2.312404198478854, "grad_norm": 0.5526319956439648, "learning_rate": 1.2733171127331711e-05, "loss": 0.2628, "step": 79200 }, { "epoch": 2.3125501817491716, "grad_norm": 0.5717456460576286, "learning_rate": 1.2730467693971343e-05, "loss": 0.2418, "step": 79205 }, { "epoch": 2.3126961650194886, "grad_norm": 0.609852680016446, "learning_rate": 1.2727764260610977e-05, "loss": 0.257, "step": 79210 }, { "epoch": 2.312842148289806, "grad_norm": 0.5172409594721055, "learning_rate": 1.272506082725061e-05, "loss": 0.2406, "step": 79215 }, { "epoch": 2.312988131560123, "grad_norm": 0.6050827172257406, "learning_rate": 1.2722357393890242e-05, "loss": 0.2435, "step": 79220 }, { "epoch": 2.3131341148304405, "grad_norm": 0.57580477582505, "learning_rate": 1.2719653960529873e-05, "loss": 0.2579, "step": 79225 }, { "epoch": 2.3132800981007575, "grad_norm": 0.5802923226449105, "learning_rate": 1.2716950527169505e-05, "loss": 0.2586, "step": 79230 }, { "epoch": 2.313426081371075, "grad_norm": 0.6529497140575634, "learning_rate": 1.2714247093809139e-05, "loss": 0.2543, "step": 79235 }, { "epoch": 2.313572064641392, "grad_norm": 0.6260950484229759, "learning_rate": 1.2711543660448771e-05, "loss": 0.2581, "step": 79240 }, { "epoch": 2.3137180479117094, "grad_norm": 0.6289647846266181, "learning_rate": 1.2708840227088404e-05, "loss": 0.2622, "step": 79245 }, { "epoch": 2.3138640311820264, "grad_norm": 0.564652635318853, "learning_rate": 1.2706136793728034e-05, "loss": 0.247, "step": 79250 }, { "epoch": 2.314010014452344, "grad_norm": 0.5401680760524311, "learning_rate": 1.2703433360367667e-05, "loss": 0.2464, "step": 79255 }, { "epoch": 2.314155997722661, "grad_norm": 0.5543883686202511, "learning_rate": 1.2700729927007299e-05, "loss": 0.2425, "step": 79260 }, { "epoch": 2.3143019809929783, "grad_norm": 0.5745833707277994, "learning_rate": 1.2698026493646933e-05, "loss": 0.2448, "step": 79265 }, { "epoch": 2.3144479642632954, "grad_norm": 0.5889050040297696, "learning_rate": 1.2695323060286565e-05, "loss": 0.2521, "step": 79270 }, { "epoch": 2.314593947533613, "grad_norm": 0.5760484007522377, "learning_rate": 1.2692619626926198e-05, "loss": 0.2366, "step": 79275 }, { "epoch": 2.31473993080393, "grad_norm": 0.6245306688740894, "learning_rate": 1.2689916193565828e-05, "loss": 0.2505, "step": 79280 }, { "epoch": 2.3148859140742473, "grad_norm": 0.6031611931967625, "learning_rate": 1.268721276020546e-05, "loss": 0.2615, "step": 79285 }, { "epoch": 2.3150318973445643, "grad_norm": 0.5715833417687126, "learning_rate": 1.2684509326845093e-05, "loss": 0.2518, "step": 79290 }, { "epoch": 2.3151778806148817, "grad_norm": 0.6364489760388043, "learning_rate": 1.2681805893484727e-05, "loss": 0.2502, "step": 79295 }, { "epoch": 2.3153238638851987, "grad_norm": 0.5920797385708569, "learning_rate": 1.267910246012436e-05, "loss": 0.2372, "step": 79300 }, { "epoch": 2.3154698471555157, "grad_norm": 0.5531330952420159, "learning_rate": 1.2676399026763991e-05, "loss": 0.239, "step": 79305 }, { "epoch": 2.315615830425833, "grad_norm": 0.620935193540927, "learning_rate": 1.2673695593403622e-05, "loss": 0.2563, "step": 79310 }, { "epoch": 2.3157618136961506, "grad_norm": 0.5687839012583656, "learning_rate": 1.2670992160043254e-05, "loss": 0.251, "step": 79315 }, { "epoch": 2.3159077969664676, "grad_norm": 0.5817134121836152, "learning_rate": 1.2668288726682888e-05, "loss": 0.268, "step": 79320 }, { "epoch": 2.3160537802367847, "grad_norm": 0.5739656172073454, "learning_rate": 1.266558529332252e-05, "loss": 0.2492, "step": 79325 }, { "epoch": 2.316199763507102, "grad_norm": 0.5719703223384242, "learning_rate": 1.2662881859962153e-05, "loss": 0.244, "step": 79330 }, { "epoch": 2.3163457467774196, "grad_norm": 0.5981318334065922, "learning_rate": 1.2660178426601785e-05, "loss": 0.253, "step": 79335 }, { "epoch": 2.3164917300477366, "grad_norm": 0.5789465560057533, "learning_rate": 1.2657474993241416e-05, "loss": 0.2516, "step": 79340 }, { "epoch": 2.3166377133180536, "grad_norm": 0.5605206807159847, "learning_rate": 1.2654771559881048e-05, "loss": 0.245, "step": 79345 }, { "epoch": 2.316783696588371, "grad_norm": 0.570623385481231, "learning_rate": 1.2652068126520682e-05, "loss": 0.2411, "step": 79350 }, { "epoch": 2.316929679858688, "grad_norm": 0.6135099911719344, "learning_rate": 1.2649364693160315e-05, "loss": 0.2547, "step": 79355 }, { "epoch": 2.3170756631290055, "grad_norm": 0.5538378680815074, "learning_rate": 1.2646661259799947e-05, "loss": 0.2493, "step": 79360 }, { "epoch": 2.3172216463993225, "grad_norm": 0.6156839315752001, "learning_rate": 1.264395782643958e-05, "loss": 0.2603, "step": 79365 }, { "epoch": 2.31736762966964, "grad_norm": 0.5588586275391644, "learning_rate": 1.264125439307921e-05, "loss": 0.2498, "step": 79370 }, { "epoch": 2.317513612939957, "grad_norm": 0.5735925324701238, "learning_rate": 1.2638550959718842e-05, "loss": 0.2374, "step": 79375 }, { "epoch": 2.3176595962102744, "grad_norm": 0.5718352728686441, "learning_rate": 1.2635847526358476e-05, "loss": 0.2622, "step": 79380 }, { "epoch": 2.3178055794805914, "grad_norm": 0.5491742758739577, "learning_rate": 1.2633144092998109e-05, "loss": 0.2352, "step": 79385 }, { "epoch": 2.317951562750909, "grad_norm": 0.564660235788091, "learning_rate": 1.2630440659637741e-05, "loss": 0.2335, "step": 79390 }, { "epoch": 2.318097546021226, "grad_norm": 0.5660108998894444, "learning_rate": 1.2627737226277373e-05, "loss": 0.2587, "step": 79395 }, { "epoch": 2.3182435292915433, "grad_norm": 0.5400132616682078, "learning_rate": 1.2625033792917004e-05, "loss": 0.257, "step": 79400 }, { "epoch": 2.3183895125618603, "grad_norm": 0.5666196409708107, "learning_rate": 1.262233035955664e-05, "loss": 0.2489, "step": 79405 }, { "epoch": 2.3185354958321778, "grad_norm": 0.5607219173623804, "learning_rate": 1.261962692619627e-05, "loss": 0.2429, "step": 79410 }, { "epoch": 2.318681479102495, "grad_norm": 0.598970051309545, "learning_rate": 1.2616923492835902e-05, "loss": 0.2477, "step": 79415 }, { "epoch": 2.3188274623728122, "grad_norm": 0.5137440739732414, "learning_rate": 1.2614220059475535e-05, "loss": 0.2529, "step": 79420 }, { "epoch": 2.3189734456431292, "grad_norm": 0.6363774039693968, "learning_rate": 1.2611516626115167e-05, "loss": 0.246, "step": 79425 }, { "epoch": 2.3191194289134467, "grad_norm": 0.6493096771669389, "learning_rate": 1.2608813192754798e-05, "loss": 0.2575, "step": 79430 }, { "epoch": 2.3192654121837637, "grad_norm": 0.5613639729308564, "learning_rate": 1.2606109759394433e-05, "loss": 0.2501, "step": 79435 }, { "epoch": 2.319411395454081, "grad_norm": 0.5562305787591352, "learning_rate": 1.2603406326034064e-05, "loss": 0.2333, "step": 79440 }, { "epoch": 2.319557378724398, "grad_norm": 0.5589271856871857, "learning_rate": 1.2600702892673696e-05, "loss": 0.23, "step": 79445 }, { "epoch": 2.3197033619947156, "grad_norm": 0.5602496024311152, "learning_rate": 1.2597999459313329e-05, "loss": 0.2657, "step": 79450 }, { "epoch": 2.3198493452650326, "grad_norm": 0.5280611343019359, "learning_rate": 1.259529602595296e-05, "loss": 0.2386, "step": 79455 }, { "epoch": 2.3199953285353496, "grad_norm": 0.5730361963027181, "learning_rate": 1.2592592592592592e-05, "loss": 0.2481, "step": 79460 }, { "epoch": 2.320141311805667, "grad_norm": 0.5632187960890709, "learning_rate": 1.2589889159232227e-05, "loss": 0.2356, "step": 79465 }, { "epoch": 2.3202872950759845, "grad_norm": 0.5597788962031538, "learning_rate": 1.2587185725871858e-05, "loss": 0.26, "step": 79470 }, { "epoch": 2.3204332783463015, "grad_norm": 0.5504990689032365, "learning_rate": 1.258448229251149e-05, "loss": 0.2487, "step": 79475 }, { "epoch": 2.3205792616166185, "grad_norm": 0.5595516808489657, "learning_rate": 1.2581778859151123e-05, "loss": 0.2423, "step": 79480 }, { "epoch": 2.320725244886936, "grad_norm": 0.49417899732117987, "learning_rate": 1.2579075425790753e-05, "loss": 0.2471, "step": 79485 }, { "epoch": 2.320871228157253, "grad_norm": 0.536882421470046, "learning_rate": 1.2576371992430389e-05, "loss": 0.2486, "step": 79490 }, { "epoch": 2.3210172114275704, "grad_norm": 0.6069562245846378, "learning_rate": 1.2573668559070021e-05, "loss": 0.2754, "step": 79495 }, { "epoch": 2.3211631946978875, "grad_norm": 0.6006380821592362, "learning_rate": 1.2570965125709652e-05, "loss": 0.2449, "step": 79500 }, { "epoch": 2.321309177968205, "grad_norm": 0.5922008214146469, "learning_rate": 1.2568261692349284e-05, "loss": 0.2462, "step": 79505 }, { "epoch": 2.321455161238522, "grad_norm": 0.5264780066438816, "learning_rate": 1.2565558258988916e-05, "loss": 0.2428, "step": 79510 }, { "epoch": 2.3216011445088394, "grad_norm": 0.5685114424519991, "learning_rate": 1.2562854825628547e-05, "loss": 0.2531, "step": 79515 }, { "epoch": 2.3217471277791564, "grad_norm": 0.5569565931948307, "learning_rate": 1.2560151392268183e-05, "loss": 0.2541, "step": 79520 }, { "epoch": 2.321893111049474, "grad_norm": 0.5750484526188244, "learning_rate": 1.2557447958907813e-05, "loss": 0.2498, "step": 79525 }, { "epoch": 2.322039094319791, "grad_norm": 0.5253014989831456, "learning_rate": 1.2554744525547446e-05, "loss": 0.2386, "step": 79530 }, { "epoch": 2.3221850775901083, "grad_norm": 0.5951742360527643, "learning_rate": 1.2552041092187078e-05, "loss": 0.2649, "step": 79535 }, { "epoch": 2.3223310608604253, "grad_norm": 0.6128824616357482, "learning_rate": 1.254933765882671e-05, "loss": 0.2589, "step": 79540 }, { "epoch": 2.3224770441307427, "grad_norm": 0.6037104166390262, "learning_rate": 1.2546634225466341e-05, "loss": 0.2558, "step": 79545 }, { "epoch": 2.3226230274010597, "grad_norm": 0.5874651757312094, "learning_rate": 1.2543930792105977e-05, "loss": 0.2655, "step": 79550 }, { "epoch": 2.322769010671377, "grad_norm": 0.6278438918200978, "learning_rate": 1.2541227358745607e-05, "loss": 0.2615, "step": 79555 }, { "epoch": 2.322914993941694, "grad_norm": 0.5800227286559344, "learning_rate": 1.253852392538524e-05, "loss": 0.2467, "step": 79560 }, { "epoch": 2.3230609772120117, "grad_norm": 0.8417056869104076, "learning_rate": 1.2535820492024872e-05, "loss": 0.2528, "step": 79565 }, { "epoch": 2.3232069604823287, "grad_norm": 0.5787019093477848, "learning_rate": 1.2533117058664504e-05, "loss": 0.2586, "step": 79570 }, { "epoch": 2.323352943752646, "grad_norm": 0.5859831545670953, "learning_rate": 1.2530413625304138e-05, "loss": 0.2648, "step": 79575 }, { "epoch": 2.323498927022963, "grad_norm": 0.5910927207779966, "learning_rate": 1.252771019194377e-05, "loss": 0.2638, "step": 79580 }, { "epoch": 2.3236449102932806, "grad_norm": 0.5650199963328274, "learning_rate": 1.2525006758583401e-05, "loss": 0.244, "step": 79585 }, { "epoch": 2.3237908935635976, "grad_norm": 0.5356903512143989, "learning_rate": 1.2522303325223034e-05, "loss": 0.2672, "step": 79590 }, { "epoch": 2.3239368768339146, "grad_norm": 0.5716006588881346, "learning_rate": 1.2519599891862666e-05, "loss": 0.2434, "step": 79595 }, { "epoch": 2.324082860104232, "grad_norm": 0.6347054800990666, "learning_rate": 1.2516896458502298e-05, "loss": 0.2425, "step": 79600 }, { "epoch": 2.3242288433745495, "grad_norm": 0.607760150896006, "learning_rate": 1.2514193025141932e-05, "loss": 0.271, "step": 79605 }, { "epoch": 2.3243748266448665, "grad_norm": 0.5988342471732686, "learning_rate": 1.2511489591781565e-05, "loss": 0.2592, "step": 79610 }, { "epoch": 2.3245208099151835, "grad_norm": 0.6067000244489656, "learning_rate": 1.2508786158421195e-05, "loss": 0.2526, "step": 79615 }, { "epoch": 2.324666793185501, "grad_norm": 0.5762478556711348, "learning_rate": 1.2506082725060827e-05, "loss": 0.2325, "step": 79620 }, { "epoch": 2.3248127764558184, "grad_norm": 0.6031061906150947, "learning_rate": 1.250337929170046e-05, "loss": 0.2586, "step": 79625 }, { "epoch": 2.3249587597261354, "grad_norm": 0.6049065961279136, "learning_rate": 1.2500675858340092e-05, "loss": 0.2428, "step": 79630 }, { "epoch": 2.3251047429964524, "grad_norm": 0.5878092546934479, "learning_rate": 1.2497972424979724e-05, "loss": 0.2537, "step": 79635 }, { "epoch": 2.32525072626677, "grad_norm": 0.6050201814898004, "learning_rate": 1.2495268991619358e-05, "loss": 0.2399, "step": 79640 }, { "epoch": 2.325396709537087, "grad_norm": 0.6106069634639285, "learning_rate": 1.2492565558258989e-05, "loss": 0.2514, "step": 79645 }, { "epoch": 2.3255426928074043, "grad_norm": 0.5771181968330131, "learning_rate": 1.2489862124898621e-05, "loss": 0.255, "step": 79650 }, { "epoch": 2.3256886760777213, "grad_norm": 0.5943680870650484, "learning_rate": 1.2487158691538255e-05, "loss": 0.2634, "step": 79655 }, { "epoch": 2.325834659348039, "grad_norm": 0.5454927535416275, "learning_rate": 1.2484455258177886e-05, "loss": 0.2614, "step": 79660 }, { "epoch": 2.325980642618356, "grad_norm": 0.5649564989446743, "learning_rate": 1.2481751824817518e-05, "loss": 0.2584, "step": 79665 }, { "epoch": 2.3261266258886732, "grad_norm": 0.5909828483662434, "learning_rate": 1.2479048391457152e-05, "loss": 0.242, "step": 79670 }, { "epoch": 2.3262726091589903, "grad_norm": 0.5779376195995382, "learning_rate": 1.2476344958096783e-05, "loss": 0.2501, "step": 79675 }, { "epoch": 2.3264185924293077, "grad_norm": 0.5533664158664511, "learning_rate": 1.2473641524736415e-05, "loss": 0.2492, "step": 79680 }, { "epoch": 2.3265645756996247, "grad_norm": 0.6180249522927134, "learning_rate": 1.247093809137605e-05, "loss": 0.2633, "step": 79685 }, { "epoch": 2.326710558969942, "grad_norm": 0.5861121041227106, "learning_rate": 1.246823465801568e-05, "loss": 0.2566, "step": 79690 }, { "epoch": 2.326856542240259, "grad_norm": 0.5838817284821239, "learning_rate": 1.2465531224655314e-05, "loss": 0.2507, "step": 79695 }, { "epoch": 2.3270025255105766, "grad_norm": 0.5097926733547218, "learning_rate": 1.2462827791294945e-05, "loss": 0.2356, "step": 79700 }, { "epoch": 2.3271485087808936, "grad_norm": 0.602677518145418, "learning_rate": 1.2460124357934577e-05, "loss": 0.2592, "step": 79705 }, { "epoch": 2.327294492051211, "grad_norm": 0.5744839387373788, "learning_rate": 1.2457420924574211e-05, "loss": 0.2469, "step": 79710 }, { "epoch": 2.327440475321528, "grad_norm": 0.5468163464093724, "learning_rate": 1.2454717491213841e-05, "loss": 0.2408, "step": 79715 }, { "epoch": 2.3275864585918455, "grad_norm": 0.576432592447908, "learning_rate": 1.2452014057853474e-05, "loss": 0.2494, "step": 79720 }, { "epoch": 2.3277324418621625, "grad_norm": 0.523503321492516, "learning_rate": 1.2449310624493108e-05, "loss": 0.243, "step": 79725 }, { "epoch": 2.32787842513248, "grad_norm": 0.6029738624970313, "learning_rate": 1.2446607191132738e-05, "loss": 0.252, "step": 79730 }, { "epoch": 2.328024408402797, "grad_norm": 0.5639672491128088, "learning_rate": 1.244390375777237e-05, "loss": 0.2616, "step": 79735 }, { "epoch": 2.3281703916731145, "grad_norm": 0.5519088960865123, "learning_rate": 1.2441200324412005e-05, "loss": 0.2465, "step": 79740 }, { "epoch": 2.3283163749434315, "grad_norm": 0.5796454169808805, "learning_rate": 1.2438496891051635e-05, "loss": 0.2561, "step": 79745 }, { "epoch": 2.3284623582137485, "grad_norm": 0.6113798006121134, "learning_rate": 1.2435793457691268e-05, "loss": 0.2475, "step": 79750 }, { "epoch": 2.328608341484066, "grad_norm": 0.5327620297214273, "learning_rate": 1.2433090024330902e-05, "loss": 0.2377, "step": 79755 }, { "epoch": 2.3287543247543834, "grad_norm": 0.6727514535607241, "learning_rate": 1.2430386590970532e-05, "loss": 0.2696, "step": 79760 }, { "epoch": 2.3289003080247004, "grad_norm": 0.5890707411606209, "learning_rate": 1.2427683157610165e-05, "loss": 0.2554, "step": 79765 }, { "epoch": 2.3290462912950174, "grad_norm": 0.5796935049422511, "learning_rate": 1.2424979724249799e-05, "loss": 0.2447, "step": 79770 }, { "epoch": 2.329192274565335, "grad_norm": 0.5383465322552683, "learning_rate": 1.242227629088943e-05, "loss": 0.267, "step": 79775 }, { "epoch": 2.329338257835652, "grad_norm": 0.5937262179961706, "learning_rate": 1.2419572857529063e-05, "loss": 0.2587, "step": 79780 }, { "epoch": 2.3294842411059693, "grad_norm": 0.5727232710263479, "learning_rate": 1.2416869424168696e-05, "loss": 0.2599, "step": 79785 }, { "epoch": 2.3296302243762863, "grad_norm": 0.6103257152268068, "learning_rate": 1.2414165990808326e-05, "loss": 0.2501, "step": 79790 }, { "epoch": 2.3297762076466038, "grad_norm": 0.561113149760379, "learning_rate": 1.241146255744796e-05, "loss": 0.2509, "step": 79795 }, { "epoch": 2.3299221909169208, "grad_norm": 0.6089493182525079, "learning_rate": 1.2408759124087593e-05, "loss": 0.2407, "step": 79800 }, { "epoch": 2.330068174187238, "grad_norm": 0.5645545174251815, "learning_rate": 1.2406055690727223e-05, "loss": 0.2498, "step": 79805 }, { "epoch": 2.3302141574575552, "grad_norm": 0.5488867268235624, "learning_rate": 1.2403352257366857e-05, "loss": 0.2494, "step": 79810 }, { "epoch": 2.3303601407278727, "grad_norm": 0.6008779374448263, "learning_rate": 1.240064882400649e-05, "loss": 0.2603, "step": 79815 }, { "epoch": 2.3305061239981897, "grad_norm": 0.5902070523664936, "learning_rate": 1.239794539064612e-05, "loss": 0.2555, "step": 79820 }, { "epoch": 2.330652107268507, "grad_norm": 0.6316303887910667, "learning_rate": 1.2395241957285754e-05, "loss": 0.2403, "step": 79825 }, { "epoch": 2.330798090538824, "grad_norm": 0.6047467490469425, "learning_rate": 1.2392538523925386e-05, "loss": 0.2522, "step": 79830 }, { "epoch": 2.3309440738091416, "grad_norm": 0.614998748777477, "learning_rate": 1.2389835090565017e-05, "loss": 0.262, "step": 79835 }, { "epoch": 2.3310900570794586, "grad_norm": 0.568771638854467, "learning_rate": 1.2387131657204651e-05, "loss": 0.2351, "step": 79840 }, { "epoch": 2.331236040349776, "grad_norm": 0.5854428508018892, "learning_rate": 1.2384428223844283e-05, "loss": 0.2609, "step": 79845 }, { "epoch": 2.331382023620093, "grad_norm": 0.6048766181114766, "learning_rate": 1.2381724790483914e-05, "loss": 0.2638, "step": 79850 }, { "epoch": 2.3315280068904105, "grad_norm": 0.5507271241179406, "learning_rate": 1.2379021357123548e-05, "loss": 0.2523, "step": 79855 }, { "epoch": 2.3316739901607275, "grad_norm": 0.5621349090311959, "learning_rate": 1.237631792376318e-05, "loss": 0.2536, "step": 79860 }, { "epoch": 2.331819973431045, "grad_norm": 0.6016902623154649, "learning_rate": 1.2373614490402813e-05, "loss": 0.2436, "step": 79865 }, { "epoch": 2.331965956701362, "grad_norm": 0.5648584395503458, "learning_rate": 1.2370911057042445e-05, "loss": 0.2589, "step": 79870 }, { "epoch": 2.3321119399716794, "grad_norm": 0.6044517237074565, "learning_rate": 1.2368207623682077e-05, "loss": 0.2472, "step": 79875 }, { "epoch": 2.3322579232419964, "grad_norm": 0.524816219918166, "learning_rate": 1.236550419032171e-05, "loss": 0.2391, "step": 79880 }, { "epoch": 2.3324039065123134, "grad_norm": 0.5870445185558457, "learning_rate": 1.2362800756961342e-05, "loss": 0.2623, "step": 79885 }, { "epoch": 2.332549889782631, "grad_norm": 0.5250200632810722, "learning_rate": 1.2360097323600974e-05, "loss": 0.2497, "step": 79890 }, { "epoch": 2.3326958730529483, "grad_norm": 0.5910500120964325, "learning_rate": 1.2357393890240607e-05, "loss": 0.2483, "step": 79895 }, { "epoch": 2.3328418563232654, "grad_norm": 0.6064755470508644, "learning_rate": 1.2354690456880239e-05, "loss": 0.2523, "step": 79900 }, { "epoch": 2.3329878395935824, "grad_norm": 0.6218792536593646, "learning_rate": 1.235198702351987e-05, "loss": 0.2473, "step": 79905 }, { "epoch": 2.3331338228639, "grad_norm": 0.5969039068043871, "learning_rate": 1.2349283590159504e-05, "loss": 0.2605, "step": 79910 }, { "epoch": 2.3332798061342173, "grad_norm": 0.5528128867621467, "learning_rate": 1.2346580156799136e-05, "loss": 0.2572, "step": 79915 }, { "epoch": 2.3334257894045343, "grad_norm": 0.5560360930789454, "learning_rate": 1.2343876723438766e-05, "loss": 0.2496, "step": 79920 }, { "epoch": 2.3335717726748513, "grad_norm": 0.5595123774537387, "learning_rate": 1.23411732900784e-05, "loss": 0.2552, "step": 79925 }, { "epoch": 2.3337177559451687, "grad_norm": 0.5894957357852604, "learning_rate": 1.2338469856718033e-05, "loss": 0.2511, "step": 79930 }, { "epoch": 2.3338637392154857, "grad_norm": 0.6019520228377656, "learning_rate": 1.2335766423357663e-05, "loss": 0.2509, "step": 79935 }, { "epoch": 2.334009722485803, "grad_norm": 0.5620176509120388, "learning_rate": 1.2333062989997297e-05, "loss": 0.2472, "step": 79940 }, { "epoch": 2.33415570575612, "grad_norm": 0.5676692366977475, "learning_rate": 1.233035955663693e-05, "loss": 0.2589, "step": 79945 }, { "epoch": 2.3343016890264376, "grad_norm": 0.5868843062866669, "learning_rate": 1.2327656123276562e-05, "loss": 0.2529, "step": 79950 }, { "epoch": 2.3344476722967547, "grad_norm": 0.5254988460421914, "learning_rate": 1.2324952689916194e-05, "loss": 0.245, "step": 79955 }, { "epoch": 2.334593655567072, "grad_norm": 0.5719569281098533, "learning_rate": 1.2322249256555827e-05, "loss": 0.2578, "step": 79960 }, { "epoch": 2.334739638837389, "grad_norm": 0.5719138712118318, "learning_rate": 1.2319545823195459e-05, "loss": 0.2576, "step": 79965 }, { "epoch": 2.3348856221077066, "grad_norm": 0.596562617448991, "learning_rate": 1.2316842389835091e-05, "loss": 0.2566, "step": 79970 }, { "epoch": 2.3350316053780236, "grad_norm": 0.5552107611353408, "learning_rate": 1.2314138956474724e-05, "loss": 0.2533, "step": 79975 }, { "epoch": 2.335177588648341, "grad_norm": 0.5743032998934327, "learning_rate": 1.2311435523114356e-05, "loss": 0.2426, "step": 79980 }, { "epoch": 2.335323571918658, "grad_norm": 0.6471301194557726, "learning_rate": 1.2308732089753988e-05, "loss": 0.2586, "step": 79985 }, { "epoch": 2.3354695551889755, "grad_norm": 0.5704195270841744, "learning_rate": 1.230602865639362e-05, "loss": 0.2604, "step": 79990 }, { "epoch": 2.3356155384592925, "grad_norm": 0.6221429943230499, "learning_rate": 1.2303325223033253e-05, "loss": 0.26, "step": 79995 }, { "epoch": 2.33576152172961, "grad_norm": 0.5798782401332704, "learning_rate": 1.2300621789672885e-05, "loss": 0.2613, "step": 80000 }, { "epoch": 2.335907504999927, "grad_norm": 0.5457240560387764, "learning_rate": 1.2297918356312518e-05, "loss": 0.2474, "step": 80005 }, { "epoch": 2.3360534882702444, "grad_norm": 0.6264893521682512, "learning_rate": 1.229521492295215e-05, "loss": 0.254, "step": 80010 }, { "epoch": 2.3361994715405614, "grad_norm": 0.586667030617272, "learning_rate": 1.2292511489591782e-05, "loss": 0.2425, "step": 80015 }, { "epoch": 2.336345454810879, "grad_norm": 0.5802253362263343, "learning_rate": 1.2289808056231415e-05, "loss": 0.2551, "step": 80020 }, { "epoch": 2.336491438081196, "grad_norm": 0.6252845089754977, "learning_rate": 1.2287104622871047e-05, "loss": 0.2482, "step": 80025 }, { "epoch": 2.3366374213515133, "grad_norm": 0.602684855092031, "learning_rate": 1.228440118951068e-05, "loss": 0.2491, "step": 80030 }, { "epoch": 2.3367834046218303, "grad_norm": 0.5556896187012291, "learning_rate": 1.2281697756150311e-05, "loss": 0.2236, "step": 80035 }, { "epoch": 2.3369293878921473, "grad_norm": 0.591929217881339, "learning_rate": 1.2278994322789944e-05, "loss": 0.2469, "step": 80040 }, { "epoch": 2.3370753711624648, "grad_norm": 0.5275670171973428, "learning_rate": 1.2276290889429576e-05, "loss": 0.2349, "step": 80045 }, { "epoch": 2.3372213544327822, "grad_norm": 0.5829893464644988, "learning_rate": 1.2273587456069208e-05, "loss": 0.2431, "step": 80050 }, { "epoch": 2.3373673377030992, "grad_norm": 0.5928998903752337, "learning_rate": 1.227088402270884e-05, "loss": 0.2422, "step": 80055 }, { "epoch": 2.3375133209734162, "grad_norm": 0.5846657051137483, "learning_rate": 1.2268180589348473e-05, "loss": 0.2502, "step": 80060 }, { "epoch": 2.3376593042437337, "grad_norm": 0.5664146129975894, "learning_rate": 1.2265477155988105e-05, "loss": 0.2744, "step": 80065 }, { "epoch": 2.3378052875140507, "grad_norm": 0.5785963462741001, "learning_rate": 1.2262773722627738e-05, "loss": 0.2467, "step": 80070 }, { "epoch": 2.337951270784368, "grad_norm": 0.5550312362288752, "learning_rate": 1.226007028926737e-05, "loss": 0.2514, "step": 80075 }, { "epoch": 2.338097254054685, "grad_norm": 0.5963748331532608, "learning_rate": 1.2257366855907002e-05, "loss": 0.2672, "step": 80080 }, { "epoch": 2.3382432373250026, "grad_norm": 0.5856377502054538, "learning_rate": 1.2254663422546635e-05, "loss": 0.2494, "step": 80085 }, { "epoch": 2.3383892205953196, "grad_norm": 0.5836278581357239, "learning_rate": 1.2251959989186267e-05, "loss": 0.2649, "step": 80090 }, { "epoch": 2.338535203865637, "grad_norm": 0.5561081734784916, "learning_rate": 1.22492565558259e-05, "loss": 0.2652, "step": 80095 }, { "epoch": 2.338681187135954, "grad_norm": 0.5918343408799, "learning_rate": 1.2246553122465532e-05, "loss": 0.2564, "step": 80100 }, { "epoch": 2.3388271704062715, "grad_norm": 0.5592482170822515, "learning_rate": 1.2243849689105164e-05, "loss": 0.2413, "step": 80105 }, { "epoch": 2.3389731536765885, "grad_norm": 0.561029187437517, "learning_rate": 1.2241146255744796e-05, "loss": 0.2484, "step": 80110 }, { "epoch": 2.339119136946906, "grad_norm": 0.6121571104764564, "learning_rate": 1.2238442822384429e-05, "loss": 0.2702, "step": 80115 }, { "epoch": 2.339265120217223, "grad_norm": 0.5567077501502401, "learning_rate": 1.2235739389024063e-05, "loss": 0.2445, "step": 80120 }, { "epoch": 2.3394111034875404, "grad_norm": 0.5741368126533483, "learning_rate": 1.2233035955663693e-05, "loss": 0.2618, "step": 80125 }, { "epoch": 2.3395570867578575, "grad_norm": 0.5537310404605102, "learning_rate": 1.2230332522303326e-05, "loss": 0.2613, "step": 80130 }, { "epoch": 2.339703070028175, "grad_norm": 0.6188443650032083, "learning_rate": 1.222762908894296e-05, "loss": 0.2421, "step": 80135 }, { "epoch": 2.339849053298492, "grad_norm": 0.5412526122626318, "learning_rate": 1.222492565558259e-05, "loss": 0.2658, "step": 80140 }, { "epoch": 2.3399950365688094, "grad_norm": 0.631197101993963, "learning_rate": 1.2222222222222222e-05, "loss": 0.2585, "step": 80145 }, { "epoch": 2.3401410198391264, "grad_norm": 0.5717875745470805, "learning_rate": 1.2219518788861856e-05, "loss": 0.2344, "step": 80150 }, { "epoch": 2.340287003109444, "grad_norm": 0.5572945533150957, "learning_rate": 1.2216815355501487e-05, "loss": 0.2547, "step": 80155 }, { "epoch": 2.340432986379761, "grad_norm": 0.5844819351255691, "learning_rate": 1.221411192214112e-05, "loss": 0.2578, "step": 80160 }, { "epoch": 2.3405789696500783, "grad_norm": 0.6142893407268603, "learning_rate": 1.2211408488780752e-05, "loss": 0.2572, "step": 80165 }, { "epoch": 2.3407249529203953, "grad_norm": 0.5772633760238803, "learning_rate": 1.2208705055420384e-05, "loss": 0.2385, "step": 80170 }, { "epoch": 2.3408709361907127, "grad_norm": 0.6092112095933682, "learning_rate": 1.2206001622060016e-05, "loss": 0.2518, "step": 80175 }, { "epoch": 2.3410169194610297, "grad_norm": 0.5761408182261567, "learning_rate": 1.2203298188699649e-05, "loss": 0.2581, "step": 80180 }, { "epoch": 2.341162902731347, "grad_norm": 0.5714648146104944, "learning_rate": 1.2200594755339281e-05, "loss": 0.2556, "step": 80185 }, { "epoch": 2.341308886001664, "grad_norm": 0.602689073890807, "learning_rate": 1.2197891321978913e-05, "loss": 0.2504, "step": 80190 }, { "epoch": 2.341454869271981, "grad_norm": 0.5873036661351513, "learning_rate": 1.2195187888618546e-05, "loss": 0.2545, "step": 80195 }, { "epoch": 2.3416008525422987, "grad_norm": 0.5596016129752935, "learning_rate": 1.2192484455258178e-05, "loss": 0.2554, "step": 80200 }, { "epoch": 2.341746835812616, "grad_norm": 0.5347891691356369, "learning_rate": 1.2189781021897812e-05, "loss": 0.2349, "step": 80205 }, { "epoch": 2.341892819082933, "grad_norm": 0.5546727495925677, "learning_rate": 1.2187077588537443e-05, "loss": 0.2503, "step": 80210 }, { "epoch": 2.34203880235325, "grad_norm": 0.6102011864280056, "learning_rate": 1.2184374155177075e-05, "loss": 0.2535, "step": 80215 }, { "epoch": 2.3421847856235676, "grad_norm": 0.5998108094580397, "learning_rate": 1.2181670721816709e-05, "loss": 0.2577, "step": 80220 }, { "epoch": 2.3423307688938846, "grad_norm": 0.5325608466618846, "learning_rate": 1.217896728845634e-05, "loss": 0.2474, "step": 80225 }, { "epoch": 2.342476752164202, "grad_norm": 0.5412415055603064, "learning_rate": 1.2176263855095972e-05, "loss": 0.257, "step": 80230 }, { "epoch": 2.342622735434519, "grad_norm": 0.5745221754754077, "learning_rate": 1.2173560421735606e-05, "loss": 0.2397, "step": 80235 }, { "epoch": 2.3427687187048365, "grad_norm": 0.5678739570661895, "learning_rate": 1.2170856988375236e-05, "loss": 0.2529, "step": 80240 }, { "epoch": 2.3429147019751535, "grad_norm": 0.5641979339540234, "learning_rate": 1.2168153555014869e-05, "loss": 0.2588, "step": 80245 }, { "epoch": 2.343060685245471, "grad_norm": 0.5392731162741455, "learning_rate": 1.2165450121654503e-05, "loss": 0.25, "step": 80250 }, { "epoch": 2.343206668515788, "grad_norm": 0.5742905123026849, "learning_rate": 1.2162746688294133e-05, "loss": 0.2514, "step": 80255 }, { "epoch": 2.3433526517861054, "grad_norm": 0.5659046963161088, "learning_rate": 1.2160043254933766e-05, "loss": 0.2448, "step": 80260 }, { "epoch": 2.3434986350564224, "grad_norm": 0.5758074273259628, "learning_rate": 1.21573398215734e-05, "loss": 0.2578, "step": 80265 }, { "epoch": 2.34364461832674, "grad_norm": 0.5602169790721637, "learning_rate": 1.215463638821303e-05, "loss": 0.2513, "step": 80270 }, { "epoch": 2.343790601597057, "grad_norm": 0.57249983013399, "learning_rate": 1.2151932954852663e-05, "loss": 0.2581, "step": 80275 }, { "epoch": 2.3439365848673743, "grad_norm": 0.5537016160069801, "learning_rate": 1.2149229521492297e-05, "loss": 0.2408, "step": 80280 }, { "epoch": 2.3440825681376913, "grad_norm": 0.6026003055048128, "learning_rate": 1.2146526088131927e-05, "loss": 0.2423, "step": 80285 }, { "epoch": 2.344228551408009, "grad_norm": 0.6005735613304232, "learning_rate": 1.2143822654771561e-05, "loss": 0.2548, "step": 80290 }, { "epoch": 2.344374534678326, "grad_norm": 0.5212628444763459, "learning_rate": 1.2141119221411194e-05, "loss": 0.2464, "step": 80295 }, { "epoch": 2.3445205179486432, "grad_norm": 0.5687828017997518, "learning_rate": 1.2138415788050824e-05, "loss": 0.2497, "step": 80300 }, { "epoch": 2.3446665012189603, "grad_norm": 0.5703913031695306, "learning_rate": 1.2135712354690458e-05, "loss": 0.2419, "step": 80305 }, { "epoch": 2.3448124844892777, "grad_norm": 0.5537133396510032, "learning_rate": 1.213300892133009e-05, "loss": 0.2545, "step": 80310 }, { "epoch": 2.3449584677595947, "grad_norm": 0.6065933660076188, "learning_rate": 1.2130305487969721e-05, "loss": 0.2423, "step": 80315 }, { "epoch": 2.345104451029912, "grad_norm": 0.5679550019153229, "learning_rate": 1.2127602054609355e-05, "loss": 0.238, "step": 80320 }, { "epoch": 2.345250434300229, "grad_norm": 0.5625639328767593, "learning_rate": 1.2124898621248988e-05, "loss": 0.2459, "step": 80325 }, { "epoch": 2.345396417570546, "grad_norm": 0.5878071171502587, "learning_rate": 1.2122195187888618e-05, "loss": 0.2628, "step": 80330 }, { "epoch": 2.3455424008408636, "grad_norm": 0.5860425623862732, "learning_rate": 1.2119491754528252e-05, "loss": 0.2459, "step": 80335 }, { "epoch": 2.345688384111181, "grad_norm": 0.5653731030546377, "learning_rate": 1.2116788321167885e-05, "loss": 0.2485, "step": 80340 }, { "epoch": 2.345834367381498, "grad_norm": 0.5759772115241822, "learning_rate": 1.2114084887807515e-05, "loss": 0.246, "step": 80345 }, { "epoch": 2.345980350651815, "grad_norm": 0.6265533668094976, "learning_rate": 1.211138145444715e-05, "loss": 0.2719, "step": 80350 }, { "epoch": 2.3461263339221325, "grad_norm": 0.5650559016820634, "learning_rate": 1.210867802108678e-05, "loss": 0.2553, "step": 80355 }, { "epoch": 2.3462723171924496, "grad_norm": 0.5963110907460144, "learning_rate": 1.2105974587726412e-05, "loss": 0.2557, "step": 80360 }, { "epoch": 2.346418300462767, "grad_norm": 0.5430963036042041, "learning_rate": 1.2103271154366046e-05, "loss": 0.2295, "step": 80365 }, { "epoch": 2.346564283733084, "grad_norm": 0.598475557832037, "learning_rate": 1.2100567721005677e-05, "loss": 0.2529, "step": 80370 }, { "epoch": 2.3467102670034015, "grad_norm": 0.580248974766825, "learning_rate": 1.209786428764531e-05, "loss": 0.2533, "step": 80375 }, { "epoch": 2.3468562502737185, "grad_norm": 0.510135456429933, "learning_rate": 1.2095160854284943e-05, "loss": 0.2415, "step": 80380 }, { "epoch": 2.347002233544036, "grad_norm": 0.5599205488510702, "learning_rate": 1.2092457420924574e-05, "loss": 0.2512, "step": 80385 }, { "epoch": 2.347148216814353, "grad_norm": 0.5531656216451343, "learning_rate": 1.2089753987564208e-05, "loss": 0.2541, "step": 80390 }, { "epoch": 2.3472942000846704, "grad_norm": 0.5348290789719005, "learning_rate": 1.208705055420384e-05, "loss": 0.25, "step": 80395 }, { "epoch": 2.3474401833549874, "grad_norm": 0.54700438788876, "learning_rate": 1.208434712084347e-05, "loss": 0.2603, "step": 80400 }, { "epoch": 2.347586166625305, "grad_norm": 0.5915768458020568, "learning_rate": 1.2081643687483105e-05, "loss": 0.2407, "step": 80405 }, { "epoch": 2.347732149895622, "grad_norm": 0.572377928360568, "learning_rate": 1.2078940254122737e-05, "loss": 0.2603, "step": 80410 }, { "epoch": 2.3478781331659393, "grad_norm": 0.5914497156146324, "learning_rate": 1.2076236820762368e-05, "loss": 0.2598, "step": 80415 }, { "epoch": 2.3480241164362563, "grad_norm": 0.6169668246362918, "learning_rate": 1.2073533387402002e-05, "loss": 0.2703, "step": 80420 }, { "epoch": 2.3481700997065738, "grad_norm": 0.5356751148671948, "learning_rate": 1.2070829954041634e-05, "loss": 0.261, "step": 80425 }, { "epoch": 2.3483160829768908, "grad_norm": 1.7797442000609789, "learning_rate": 1.2068126520681265e-05, "loss": 0.2577, "step": 80430 }, { "epoch": 2.348462066247208, "grad_norm": 0.6429533933912877, "learning_rate": 1.2065423087320899e-05, "loss": 0.2626, "step": 80435 }, { "epoch": 2.348608049517525, "grad_norm": 0.6017171793240157, "learning_rate": 1.2062719653960531e-05, "loss": 0.2653, "step": 80440 }, { "epoch": 2.3487540327878427, "grad_norm": 0.5998011487885817, "learning_rate": 1.2060016220600162e-05, "loss": 0.2483, "step": 80445 }, { "epoch": 2.3489000160581597, "grad_norm": 0.6119577078409053, "learning_rate": 1.2057312787239796e-05, "loss": 0.2665, "step": 80450 }, { "epoch": 2.349045999328477, "grad_norm": 0.6113884050752603, "learning_rate": 1.2054609353879428e-05, "loss": 0.2608, "step": 80455 }, { "epoch": 2.349191982598794, "grad_norm": 0.5711294386208988, "learning_rate": 1.205190592051906e-05, "loss": 0.2476, "step": 80460 }, { "epoch": 2.3493379658691116, "grad_norm": 0.6054429991404255, "learning_rate": 1.2049202487158692e-05, "loss": 0.2498, "step": 80465 }, { "epoch": 2.3494839491394286, "grad_norm": 0.5994334186284542, "learning_rate": 1.2046499053798325e-05, "loss": 0.2352, "step": 80470 }, { "epoch": 2.349629932409746, "grad_norm": 0.5498189471032345, "learning_rate": 1.2043795620437957e-05, "loss": 0.2504, "step": 80475 }, { "epoch": 2.349775915680063, "grad_norm": 0.5768416491461674, "learning_rate": 1.204109218707759e-05, "loss": 0.2467, "step": 80480 }, { "epoch": 2.34992189895038, "grad_norm": 0.6566661665085101, "learning_rate": 1.2038388753717222e-05, "loss": 0.2438, "step": 80485 }, { "epoch": 2.3500678822206975, "grad_norm": 0.6001753372752081, "learning_rate": 1.2035685320356854e-05, "loss": 0.2464, "step": 80490 }, { "epoch": 2.350213865491015, "grad_norm": 0.5728291008045694, "learning_rate": 1.2032981886996486e-05, "loss": 0.2579, "step": 80495 }, { "epoch": 2.350359848761332, "grad_norm": 0.563816819802323, "learning_rate": 1.2030278453636119e-05, "loss": 0.2404, "step": 80500 }, { "epoch": 2.350505832031649, "grad_norm": 0.5890553780629562, "learning_rate": 1.2027575020275751e-05, "loss": 0.2492, "step": 80505 }, { "epoch": 2.3506518153019664, "grad_norm": 0.600563987817566, "learning_rate": 1.2024871586915383e-05, "loss": 0.2557, "step": 80510 }, { "epoch": 2.3507977985722834, "grad_norm": 0.5802050803673907, "learning_rate": 1.2022168153555016e-05, "loss": 0.2538, "step": 80515 }, { "epoch": 2.350943781842601, "grad_norm": 0.5116592939167971, "learning_rate": 1.2019464720194648e-05, "loss": 0.2445, "step": 80520 }, { "epoch": 2.351089765112918, "grad_norm": 0.5252386800230491, "learning_rate": 1.201676128683428e-05, "loss": 0.2405, "step": 80525 }, { "epoch": 2.3512357483832353, "grad_norm": 0.5850012025014385, "learning_rate": 1.2014057853473913e-05, "loss": 0.2508, "step": 80530 }, { "epoch": 2.3513817316535524, "grad_norm": 0.573840051762071, "learning_rate": 1.2011354420113545e-05, "loss": 0.2433, "step": 80535 }, { "epoch": 2.35152771492387, "grad_norm": 0.5935148216898882, "learning_rate": 1.2008650986753177e-05, "loss": 0.2537, "step": 80540 }, { "epoch": 2.351673698194187, "grad_norm": 0.5711275688196559, "learning_rate": 1.200594755339281e-05, "loss": 0.2612, "step": 80545 }, { "epoch": 2.3518196814645043, "grad_norm": 0.5553275220481783, "learning_rate": 1.2003244120032442e-05, "loss": 0.2598, "step": 80550 }, { "epoch": 2.3519656647348213, "grad_norm": 0.6194034752685965, "learning_rate": 1.2000540686672074e-05, "loss": 0.2581, "step": 80555 }, { "epoch": 2.3521116480051387, "grad_norm": 0.5627650520218396, "learning_rate": 1.1997837253311706e-05, "loss": 0.2397, "step": 80560 }, { "epoch": 2.3522576312754557, "grad_norm": 0.5661841949551835, "learning_rate": 1.1995133819951339e-05, "loss": 0.2368, "step": 80565 }, { "epoch": 2.352403614545773, "grad_norm": 0.6140661533488116, "learning_rate": 1.1992430386590971e-05, "loss": 0.2529, "step": 80570 }, { "epoch": 2.35254959781609, "grad_norm": 0.5548394097656807, "learning_rate": 1.1989726953230603e-05, "loss": 0.2391, "step": 80575 }, { "epoch": 2.3526955810864076, "grad_norm": 0.5801440460667286, "learning_rate": 1.1987023519870236e-05, "loss": 0.2654, "step": 80580 }, { "epoch": 2.3528415643567246, "grad_norm": 0.585830980479862, "learning_rate": 1.1984320086509868e-05, "loss": 0.2531, "step": 80585 }, { "epoch": 2.352987547627042, "grad_norm": 0.6075678263207783, "learning_rate": 1.19816166531495e-05, "loss": 0.2551, "step": 80590 }, { "epoch": 2.353133530897359, "grad_norm": 0.644083675889834, "learning_rate": 1.1978913219789133e-05, "loss": 0.2377, "step": 80595 }, { "epoch": 2.3532795141676766, "grad_norm": 0.5692581328847328, "learning_rate": 1.1976209786428765e-05, "loss": 0.2505, "step": 80600 }, { "epoch": 2.3534254974379936, "grad_norm": 0.5937564696322097, "learning_rate": 1.1973506353068397e-05, "loss": 0.2566, "step": 80605 }, { "epoch": 2.353571480708311, "grad_norm": 0.6125708922533125, "learning_rate": 1.197080291970803e-05, "loss": 0.2825, "step": 80610 }, { "epoch": 2.353717463978628, "grad_norm": 0.5981996202352416, "learning_rate": 1.1968099486347662e-05, "loss": 0.2628, "step": 80615 }, { "epoch": 2.353863447248945, "grad_norm": 0.5724431210138907, "learning_rate": 1.1965396052987294e-05, "loss": 0.2471, "step": 80620 }, { "epoch": 2.3540094305192625, "grad_norm": 0.5709649968792438, "learning_rate": 1.1962692619626927e-05, "loss": 0.2548, "step": 80625 }, { "epoch": 2.35415541378958, "grad_norm": 0.6010095833577918, "learning_rate": 1.1959989186266559e-05, "loss": 0.2499, "step": 80630 }, { "epoch": 2.354301397059897, "grad_norm": 0.5852924288768179, "learning_rate": 1.1957285752906191e-05, "loss": 0.2507, "step": 80635 }, { "epoch": 2.354447380330214, "grad_norm": 0.5529238842192511, "learning_rate": 1.1954582319545824e-05, "loss": 0.237, "step": 80640 }, { "epoch": 2.3545933636005314, "grad_norm": 0.5538624490408275, "learning_rate": 1.1951878886185456e-05, "loss": 0.2428, "step": 80645 }, { "epoch": 2.354739346870849, "grad_norm": 0.5685840995994826, "learning_rate": 1.1949175452825088e-05, "loss": 0.2707, "step": 80650 }, { "epoch": 2.354885330141166, "grad_norm": 0.5823420760466108, "learning_rate": 1.194647201946472e-05, "loss": 0.2735, "step": 80655 }, { "epoch": 2.355031313411483, "grad_norm": 0.5786619208833741, "learning_rate": 1.1943768586104353e-05, "loss": 0.2628, "step": 80660 }, { "epoch": 2.3551772966818003, "grad_norm": 0.5319089961395678, "learning_rate": 1.1941065152743985e-05, "loss": 0.2529, "step": 80665 }, { "epoch": 2.3553232799521173, "grad_norm": 0.5114994747554117, "learning_rate": 1.1938361719383617e-05, "loss": 0.2444, "step": 80670 }, { "epoch": 2.3554692632224348, "grad_norm": 0.6352480144866524, "learning_rate": 1.193565828602325e-05, "loss": 0.2474, "step": 80675 }, { "epoch": 2.355615246492752, "grad_norm": 0.557890982818538, "learning_rate": 1.1932954852662882e-05, "loss": 0.2373, "step": 80680 }, { "epoch": 2.3557612297630692, "grad_norm": 0.5311251235743649, "learning_rate": 1.1930251419302514e-05, "loss": 0.2305, "step": 80685 }, { "epoch": 2.3559072130333862, "grad_norm": 0.5565006202798631, "learning_rate": 1.1927547985942147e-05, "loss": 0.2343, "step": 80690 }, { "epoch": 2.3560531963037037, "grad_norm": 0.5570432212023686, "learning_rate": 1.1924844552581779e-05, "loss": 0.2443, "step": 80695 }, { "epoch": 2.3561991795740207, "grad_norm": 0.5787633104437566, "learning_rate": 1.1922141119221411e-05, "loss": 0.2409, "step": 80700 }, { "epoch": 2.356345162844338, "grad_norm": 0.5865715540791043, "learning_rate": 1.1919437685861044e-05, "loss": 0.2443, "step": 80705 }, { "epoch": 2.356491146114655, "grad_norm": 0.5981795658665351, "learning_rate": 1.1916734252500676e-05, "loss": 0.2355, "step": 80710 }, { "epoch": 2.3566371293849726, "grad_norm": 0.5861785242916725, "learning_rate": 1.191403081914031e-05, "loss": 0.248, "step": 80715 }, { "epoch": 2.3567831126552896, "grad_norm": 0.5900969517018083, "learning_rate": 1.191132738577994e-05, "loss": 0.247, "step": 80720 }, { "epoch": 2.356929095925607, "grad_norm": 0.604497555536689, "learning_rate": 1.1908623952419573e-05, "loss": 0.2524, "step": 80725 }, { "epoch": 2.357075079195924, "grad_norm": 0.561416059813945, "learning_rate": 1.1905920519059207e-05, "loss": 0.2589, "step": 80730 }, { "epoch": 2.3572210624662415, "grad_norm": 0.539207227233442, "learning_rate": 1.1903217085698838e-05, "loss": 0.2397, "step": 80735 }, { "epoch": 2.3573670457365585, "grad_norm": 0.5955437175664346, "learning_rate": 1.190051365233847e-05, "loss": 0.253, "step": 80740 }, { "epoch": 2.357513029006876, "grad_norm": 0.6001103451947587, "learning_rate": 1.1897810218978104e-05, "loss": 0.2508, "step": 80745 }, { "epoch": 2.357659012277193, "grad_norm": 0.5702800737499062, "learning_rate": 1.1895106785617735e-05, "loss": 0.2475, "step": 80750 }, { "epoch": 2.3578049955475104, "grad_norm": 0.6235277706067575, "learning_rate": 1.1892403352257367e-05, "loss": 0.258, "step": 80755 }, { "epoch": 2.3579509788178274, "grad_norm": 0.5715407804336112, "learning_rate": 1.1889699918897001e-05, "loss": 0.2544, "step": 80760 }, { "epoch": 2.358096962088145, "grad_norm": 0.6316436233052276, "learning_rate": 1.1886996485536632e-05, "loss": 0.2571, "step": 80765 }, { "epoch": 2.358242945358462, "grad_norm": 0.5783736792881062, "learning_rate": 1.1884293052176264e-05, "loss": 0.2522, "step": 80770 }, { "epoch": 2.358388928628779, "grad_norm": 0.5833040103755291, "learning_rate": 1.1881589618815898e-05, "loss": 0.2456, "step": 80775 }, { "epoch": 2.3585349118990964, "grad_norm": 0.5500946135913698, "learning_rate": 1.1878886185455528e-05, "loss": 0.2381, "step": 80780 }, { "epoch": 2.358680895169414, "grad_norm": 0.6107015069135213, "learning_rate": 1.187618275209516e-05, "loss": 0.2564, "step": 80785 }, { "epoch": 2.358826878439731, "grad_norm": 0.5781635758337682, "learning_rate": 1.1873479318734795e-05, "loss": 0.2344, "step": 80790 }, { "epoch": 2.358972861710048, "grad_norm": 0.5879254477491541, "learning_rate": 1.1870775885374425e-05, "loss": 0.2669, "step": 80795 }, { "epoch": 2.3591188449803653, "grad_norm": 0.6039368815551568, "learning_rate": 1.186807245201406e-05, "loss": 0.2578, "step": 80800 }, { "epoch": 2.3592648282506823, "grad_norm": 0.5389841752993169, "learning_rate": 1.186536901865369e-05, "loss": 0.2505, "step": 80805 }, { "epoch": 2.3594108115209997, "grad_norm": 0.5863692648836072, "learning_rate": 1.1862665585293322e-05, "loss": 0.255, "step": 80810 }, { "epoch": 2.3595567947913167, "grad_norm": 0.5314651103929768, "learning_rate": 1.1859962151932956e-05, "loss": 0.2528, "step": 80815 }, { "epoch": 2.359702778061634, "grad_norm": 0.5115766566279684, "learning_rate": 1.1857258718572587e-05, "loss": 0.2462, "step": 80820 }, { "epoch": 2.359848761331951, "grad_norm": 0.6033525500392652, "learning_rate": 1.185455528521222e-05, "loss": 0.242, "step": 80825 }, { "epoch": 2.3599947446022687, "grad_norm": 0.5840759320391762, "learning_rate": 1.1851851851851853e-05, "loss": 0.2376, "step": 80830 }, { "epoch": 2.3601407278725857, "grad_norm": 0.6130938395096818, "learning_rate": 1.1849148418491484e-05, "loss": 0.2553, "step": 80835 }, { "epoch": 2.360286711142903, "grad_norm": 0.6237150658671207, "learning_rate": 1.1846444985131116e-05, "loss": 0.2621, "step": 80840 }, { "epoch": 2.36043269441322, "grad_norm": 0.5686411086359455, "learning_rate": 1.184374155177075e-05, "loss": 0.2545, "step": 80845 }, { "epoch": 2.3605786776835376, "grad_norm": 0.5718076960671317, "learning_rate": 1.1841038118410381e-05, "loss": 0.2518, "step": 80850 }, { "epoch": 2.3607246609538546, "grad_norm": 0.589511537804099, "learning_rate": 1.1838334685050013e-05, "loss": 0.2551, "step": 80855 }, { "epoch": 2.360870644224172, "grad_norm": 0.5617708802254786, "learning_rate": 1.1835631251689647e-05, "loss": 0.2402, "step": 80860 }, { "epoch": 2.361016627494489, "grad_norm": 0.5352334899388914, "learning_rate": 1.1832927818329278e-05, "loss": 0.2443, "step": 80865 }, { "epoch": 2.3611626107648065, "grad_norm": 0.5720185086196874, "learning_rate": 1.183022438496891e-05, "loss": 0.2545, "step": 80870 }, { "epoch": 2.3613085940351235, "grad_norm": 0.6000079369636226, "learning_rate": 1.1827520951608544e-05, "loss": 0.2578, "step": 80875 }, { "epoch": 2.361454577305441, "grad_norm": 0.5511437640520868, "learning_rate": 1.1824817518248175e-05, "loss": 0.252, "step": 80880 }, { "epoch": 2.361600560575758, "grad_norm": 0.5904551545864655, "learning_rate": 1.1822114084887809e-05, "loss": 0.2553, "step": 80885 }, { "epoch": 2.3617465438460754, "grad_norm": 0.5647130488364126, "learning_rate": 1.1819410651527441e-05, "loss": 0.2727, "step": 80890 }, { "epoch": 2.3618925271163924, "grad_norm": 0.5263805175253253, "learning_rate": 1.1816707218167072e-05, "loss": 0.2371, "step": 80895 }, { "epoch": 2.36203851038671, "grad_norm": 0.6242699146893084, "learning_rate": 1.1814003784806706e-05, "loss": 0.2444, "step": 80900 }, { "epoch": 2.362184493657027, "grad_norm": 0.5298838721189627, "learning_rate": 1.1811300351446338e-05, "loss": 0.2327, "step": 80905 }, { "epoch": 2.362330476927344, "grad_norm": 0.4976101371586311, "learning_rate": 1.1808596918085969e-05, "loss": 0.249, "step": 80910 }, { "epoch": 2.3624764601976613, "grad_norm": 0.5968275325425182, "learning_rate": 1.1805893484725603e-05, "loss": 0.2513, "step": 80915 }, { "epoch": 2.362622443467979, "grad_norm": 0.5243131274962656, "learning_rate": 1.1803190051365235e-05, "loss": 0.2397, "step": 80920 }, { "epoch": 2.362768426738296, "grad_norm": 0.6026779412202693, "learning_rate": 1.1800486618004866e-05, "loss": 0.241, "step": 80925 }, { "epoch": 2.362914410008613, "grad_norm": 0.5933566982523379, "learning_rate": 1.17977831846445e-05, "loss": 0.2475, "step": 80930 }, { "epoch": 2.3630603932789302, "grad_norm": 0.7171041209637268, "learning_rate": 1.1795079751284132e-05, "loss": 0.2524, "step": 80935 }, { "epoch": 2.3632063765492477, "grad_norm": 0.6006577952588265, "learning_rate": 1.1792376317923763e-05, "loss": 0.2571, "step": 80940 }, { "epoch": 2.3633523598195647, "grad_norm": 0.5839688098511755, "learning_rate": 1.1789672884563397e-05, "loss": 0.2574, "step": 80945 }, { "epoch": 2.3634983430898817, "grad_norm": 0.6045199353944721, "learning_rate": 1.1786969451203029e-05, "loss": 0.2456, "step": 80950 }, { "epoch": 2.363644326360199, "grad_norm": 0.5864179313194186, "learning_rate": 1.1784266017842661e-05, "loss": 0.2669, "step": 80955 }, { "epoch": 2.363790309630516, "grad_norm": 0.5866193097389594, "learning_rate": 1.1781562584482294e-05, "loss": 0.2554, "step": 80960 }, { "epoch": 2.3639362929008336, "grad_norm": 0.6299113582149817, "learning_rate": 1.1778859151121926e-05, "loss": 0.2733, "step": 80965 }, { "epoch": 2.3640822761711506, "grad_norm": 0.5578270825125422, "learning_rate": 1.1776155717761558e-05, "loss": 0.2628, "step": 80970 }, { "epoch": 2.364228259441468, "grad_norm": 0.5584298877135646, "learning_rate": 1.177345228440119e-05, "loss": 0.2439, "step": 80975 }, { "epoch": 2.364374242711785, "grad_norm": 0.5714425987660544, "learning_rate": 1.1770748851040823e-05, "loss": 0.2475, "step": 80980 }, { "epoch": 2.3645202259821025, "grad_norm": 0.574481646672595, "learning_rate": 1.1768045417680455e-05, "loss": 0.2417, "step": 80985 }, { "epoch": 2.3646662092524195, "grad_norm": 0.5029163365813407, "learning_rate": 1.1765341984320087e-05, "loss": 0.2496, "step": 80990 }, { "epoch": 2.364812192522737, "grad_norm": 0.6024817605341558, "learning_rate": 1.176263855095972e-05, "loss": 0.2418, "step": 80995 }, { "epoch": 2.364958175793054, "grad_norm": 0.5844751177721013, "learning_rate": 1.1759935117599352e-05, "loss": 0.2632, "step": 81000 }, { "epoch": 2.3651041590633715, "grad_norm": 0.541468570639595, "learning_rate": 1.1757231684238984e-05, "loss": 0.2591, "step": 81005 }, { "epoch": 2.3652501423336885, "grad_norm": 0.5556350825361394, "learning_rate": 1.1754528250878615e-05, "loss": 0.2512, "step": 81010 }, { "epoch": 2.365396125604006, "grad_norm": 0.6071861511279473, "learning_rate": 1.1751824817518249e-05, "loss": 0.245, "step": 81015 }, { "epoch": 2.365542108874323, "grad_norm": 0.5995255118992197, "learning_rate": 1.1749121384157881e-05, "loss": 0.2518, "step": 81020 }, { "epoch": 2.3656880921446404, "grad_norm": 0.564570881777866, "learning_rate": 1.1746417950797512e-05, "loss": 0.2528, "step": 81025 }, { "epoch": 2.3658340754149574, "grad_norm": 0.5943639419795446, "learning_rate": 1.1743714517437146e-05, "loss": 0.2662, "step": 81030 }, { "epoch": 2.365980058685275, "grad_norm": 0.6512286667358712, "learning_rate": 1.1741011084076778e-05, "loss": 0.2622, "step": 81035 }, { "epoch": 2.366126041955592, "grad_norm": 0.5716126150038545, "learning_rate": 1.173830765071641e-05, "loss": 0.2492, "step": 81040 }, { "epoch": 2.3662720252259093, "grad_norm": 0.5766551463473907, "learning_rate": 1.1735604217356043e-05, "loss": 0.2613, "step": 81045 }, { "epoch": 2.3664180084962263, "grad_norm": 0.5777407290424226, "learning_rate": 1.1732900783995675e-05, "loss": 0.2519, "step": 81050 }, { "epoch": 2.3665639917665438, "grad_norm": 0.5618035809274928, "learning_rate": 1.1730197350635308e-05, "loss": 0.2655, "step": 81055 }, { "epoch": 2.3667099750368608, "grad_norm": 0.5625303098752364, "learning_rate": 1.172749391727494e-05, "loss": 0.2441, "step": 81060 }, { "epoch": 2.3668559583071778, "grad_norm": 0.5998382082504975, "learning_rate": 1.1724790483914572e-05, "loss": 0.2505, "step": 81065 }, { "epoch": 2.367001941577495, "grad_norm": 0.5386614753620114, "learning_rate": 1.1722087050554205e-05, "loss": 0.2415, "step": 81070 }, { "epoch": 2.3671479248478127, "grad_norm": 0.5759930588945475, "learning_rate": 1.1719383617193837e-05, "loss": 0.2384, "step": 81075 }, { "epoch": 2.3672939081181297, "grad_norm": 0.6160282074084361, "learning_rate": 1.171668018383347e-05, "loss": 0.2589, "step": 81080 }, { "epoch": 2.3674398913884467, "grad_norm": 0.6161328552849713, "learning_rate": 1.1713976750473102e-05, "loss": 0.2509, "step": 81085 }, { "epoch": 2.367585874658764, "grad_norm": 0.5496277258628149, "learning_rate": 1.1711273317112734e-05, "loss": 0.2479, "step": 81090 }, { "epoch": 2.367731857929081, "grad_norm": 0.5579260772011381, "learning_rate": 1.1708569883752366e-05, "loss": 0.2436, "step": 81095 }, { "epoch": 2.3678778411993986, "grad_norm": 0.5665964388160841, "learning_rate": 1.1705866450391998e-05, "loss": 0.2415, "step": 81100 }, { "epoch": 2.3680238244697156, "grad_norm": 0.5868453087453465, "learning_rate": 1.170316301703163e-05, "loss": 0.2529, "step": 81105 }, { "epoch": 2.368169807740033, "grad_norm": 0.5908174390199528, "learning_rate": 1.1700459583671263e-05, "loss": 0.2534, "step": 81110 }, { "epoch": 2.36831579101035, "grad_norm": 0.540400204771903, "learning_rate": 1.1697756150310895e-05, "loss": 0.2661, "step": 81115 }, { "epoch": 2.3684617742806675, "grad_norm": 0.5757625679084636, "learning_rate": 1.1695052716950528e-05, "loss": 0.2516, "step": 81120 }, { "epoch": 2.3686077575509845, "grad_norm": 0.5264177582406288, "learning_rate": 1.169234928359016e-05, "loss": 0.2411, "step": 81125 }, { "epoch": 2.368753740821302, "grad_norm": 0.6517011056964739, "learning_rate": 1.1689645850229792e-05, "loss": 0.2593, "step": 81130 }, { "epoch": 2.368899724091619, "grad_norm": 0.5955335451859735, "learning_rate": 1.1686942416869425e-05, "loss": 0.2504, "step": 81135 }, { "epoch": 2.3690457073619364, "grad_norm": 0.5392751591188732, "learning_rate": 1.1684238983509057e-05, "loss": 0.2525, "step": 81140 }, { "epoch": 2.3691916906322534, "grad_norm": 0.5012903302796119, "learning_rate": 1.168153555014869e-05, "loss": 0.2465, "step": 81145 }, { "epoch": 2.369337673902571, "grad_norm": 0.5754073913805507, "learning_rate": 1.1678832116788322e-05, "loss": 0.2602, "step": 81150 }, { "epoch": 2.369483657172888, "grad_norm": 0.6022388398618909, "learning_rate": 1.1676128683427954e-05, "loss": 0.2566, "step": 81155 }, { "epoch": 2.3696296404432053, "grad_norm": 0.5220616105730103, "learning_rate": 1.1673425250067586e-05, "loss": 0.2473, "step": 81160 }, { "epoch": 2.3697756237135224, "grad_norm": 0.5989605063200126, "learning_rate": 1.1670721816707219e-05, "loss": 0.2613, "step": 81165 }, { "epoch": 2.36992160698384, "grad_norm": 0.5570945728474633, "learning_rate": 1.1668018383346851e-05, "loss": 0.2454, "step": 81170 }, { "epoch": 2.370067590254157, "grad_norm": 0.6011659596969461, "learning_rate": 1.1665314949986483e-05, "loss": 0.2706, "step": 81175 }, { "epoch": 2.3702135735244743, "grad_norm": 0.6407016635249447, "learning_rate": 1.1662611516626116e-05, "loss": 0.2527, "step": 81180 }, { "epoch": 2.3703595567947913, "grad_norm": 0.5895751644219821, "learning_rate": 1.1659908083265748e-05, "loss": 0.2534, "step": 81185 }, { "epoch": 2.3705055400651087, "grad_norm": 0.5699755595259316, "learning_rate": 1.165720464990538e-05, "loss": 0.2472, "step": 81190 }, { "epoch": 2.3706515233354257, "grad_norm": 0.596094827565202, "learning_rate": 1.1654501216545012e-05, "loss": 0.241, "step": 81195 }, { "epoch": 2.3707975066057427, "grad_norm": 0.5838869596368593, "learning_rate": 1.1651797783184645e-05, "loss": 0.2628, "step": 81200 }, { "epoch": 2.37094348987606, "grad_norm": 0.587838625826814, "learning_rate": 1.1649094349824277e-05, "loss": 0.2588, "step": 81205 }, { "epoch": 2.3710894731463776, "grad_norm": 0.5785397664328391, "learning_rate": 1.1646390916463911e-05, "loss": 0.2458, "step": 81210 }, { "epoch": 2.3712354564166946, "grad_norm": 0.6135991332776514, "learning_rate": 1.1643687483103542e-05, "loss": 0.2414, "step": 81215 }, { "epoch": 2.3713814396870116, "grad_norm": 0.5376804634933867, "learning_rate": 1.1640984049743174e-05, "loss": 0.2373, "step": 81220 }, { "epoch": 2.371527422957329, "grad_norm": 0.618193238156591, "learning_rate": 1.1638280616382808e-05, "loss": 0.2547, "step": 81225 }, { "epoch": 2.3716734062276466, "grad_norm": 0.6100031209115895, "learning_rate": 1.1635577183022439e-05, "loss": 0.26, "step": 81230 }, { "epoch": 2.3718193894979636, "grad_norm": 0.5738261734930205, "learning_rate": 1.1632873749662071e-05, "loss": 0.2595, "step": 81235 }, { "epoch": 2.3719653727682806, "grad_norm": 0.5823799274740009, "learning_rate": 1.1630170316301705e-05, "loss": 0.2551, "step": 81240 }, { "epoch": 2.372111356038598, "grad_norm": 0.5592597252121754, "learning_rate": 1.1627466882941336e-05, "loss": 0.2435, "step": 81245 }, { "epoch": 2.372257339308915, "grad_norm": 0.6053555551487898, "learning_rate": 1.1624763449580968e-05, "loss": 0.2572, "step": 81250 }, { "epoch": 2.3724033225792325, "grad_norm": 0.6209195951277036, "learning_rate": 1.1622060016220602e-05, "loss": 0.25, "step": 81255 }, { "epoch": 2.3725493058495495, "grad_norm": 0.5456147521481574, "learning_rate": 1.1619356582860233e-05, "loss": 0.2446, "step": 81260 }, { "epoch": 2.372695289119867, "grad_norm": 0.5654476522288937, "learning_rate": 1.1616653149499865e-05, "loss": 0.2389, "step": 81265 }, { "epoch": 2.372841272390184, "grad_norm": 0.5504958557988111, "learning_rate": 1.1613949716139497e-05, "loss": 0.2504, "step": 81270 }, { "epoch": 2.3729872556605014, "grad_norm": 0.5843752927820898, "learning_rate": 1.161124628277913e-05, "loss": 0.2484, "step": 81275 }, { "epoch": 2.3731332389308184, "grad_norm": 0.5764916521456745, "learning_rate": 1.1608542849418762e-05, "loss": 0.2559, "step": 81280 }, { "epoch": 2.373279222201136, "grad_norm": 0.5800492989800238, "learning_rate": 1.1605839416058394e-05, "loss": 0.2586, "step": 81285 }, { "epoch": 2.373425205471453, "grad_norm": 0.5833470739158702, "learning_rate": 1.1603135982698027e-05, "loss": 0.2472, "step": 81290 }, { "epoch": 2.3735711887417703, "grad_norm": 0.5988782533090882, "learning_rate": 1.160043254933766e-05, "loss": 0.2528, "step": 81295 }, { "epoch": 2.3737171720120873, "grad_norm": 0.5517880242213916, "learning_rate": 1.1597729115977291e-05, "loss": 0.2425, "step": 81300 }, { "epoch": 2.3738631552824048, "grad_norm": 0.5783213008780339, "learning_rate": 1.1595025682616923e-05, "loss": 0.2627, "step": 81305 }, { "epoch": 2.3740091385527218, "grad_norm": 0.5744509463384628, "learning_rate": 1.1592322249256557e-05, "loss": 0.2584, "step": 81310 }, { "epoch": 2.3741551218230392, "grad_norm": 0.6159272607158967, "learning_rate": 1.1589618815896188e-05, "loss": 0.248, "step": 81315 }, { "epoch": 2.3743011050933562, "grad_norm": 0.6064163347089115, "learning_rate": 1.158691538253582e-05, "loss": 0.2545, "step": 81320 }, { "epoch": 2.3744470883636737, "grad_norm": 0.5748345109795088, "learning_rate": 1.1584211949175454e-05, "loss": 0.2367, "step": 81325 }, { "epoch": 2.3745930716339907, "grad_norm": 0.6238310288626941, "learning_rate": 1.1581508515815085e-05, "loss": 0.2604, "step": 81330 }, { "epoch": 2.374739054904308, "grad_norm": 0.5889812214625832, "learning_rate": 1.1578805082454717e-05, "loss": 0.2521, "step": 81335 }, { "epoch": 2.374885038174625, "grad_norm": 0.5752356872320115, "learning_rate": 1.1576101649094351e-05, "loss": 0.2479, "step": 81340 }, { "epoch": 2.3750310214449426, "grad_norm": 0.5633001873047214, "learning_rate": 1.1573398215733982e-05, "loss": 0.2581, "step": 81345 }, { "epoch": 2.3751770047152596, "grad_norm": 0.542855653550745, "learning_rate": 1.1570694782373614e-05, "loss": 0.2508, "step": 81350 }, { "epoch": 2.3753229879855766, "grad_norm": 0.5412834729580672, "learning_rate": 1.1567991349013248e-05, "loss": 0.2509, "step": 81355 }, { "epoch": 2.375468971255894, "grad_norm": 0.5485763451819016, "learning_rate": 1.1565287915652879e-05, "loss": 0.2661, "step": 81360 }, { "epoch": 2.3756149545262115, "grad_norm": 0.525192187057786, "learning_rate": 1.1562584482292511e-05, "loss": 0.2364, "step": 81365 }, { "epoch": 2.3757609377965285, "grad_norm": 0.6175434650769339, "learning_rate": 1.1559881048932145e-05, "loss": 0.2584, "step": 81370 }, { "epoch": 2.3759069210668455, "grad_norm": 0.6776518093258524, "learning_rate": 1.1557177615571776e-05, "loss": 0.2708, "step": 81375 }, { "epoch": 2.376052904337163, "grad_norm": 0.5818537138916847, "learning_rate": 1.155447418221141e-05, "loss": 0.2367, "step": 81380 }, { "epoch": 2.37619888760748, "grad_norm": 0.5706721541828024, "learning_rate": 1.1551770748851042e-05, "loss": 0.2382, "step": 81385 }, { "epoch": 2.3763448708777974, "grad_norm": 0.6310585456761789, "learning_rate": 1.1549067315490673e-05, "loss": 0.2568, "step": 81390 }, { "epoch": 2.3764908541481145, "grad_norm": 0.5768914869239034, "learning_rate": 1.1546363882130307e-05, "loss": 0.2595, "step": 81395 }, { "epoch": 2.376636837418432, "grad_norm": 0.5742373564289323, "learning_rate": 1.154366044876994e-05, "loss": 0.2562, "step": 81400 }, { "epoch": 2.376782820688749, "grad_norm": 0.583234887606277, "learning_rate": 1.154095701540957e-05, "loss": 0.2425, "step": 81405 }, { "epoch": 2.3769288039590664, "grad_norm": 0.64300618661637, "learning_rate": 1.1538253582049204e-05, "loss": 0.261, "step": 81410 }, { "epoch": 2.3770747872293834, "grad_norm": 0.5802170239930339, "learning_rate": 1.1535550148688836e-05, "loss": 0.2595, "step": 81415 }, { "epoch": 2.377220770499701, "grad_norm": 0.5444677951500199, "learning_rate": 1.1532846715328467e-05, "loss": 0.2382, "step": 81420 }, { "epoch": 2.377366753770018, "grad_norm": 0.5936696014072201, "learning_rate": 1.15301432819681e-05, "loss": 0.2485, "step": 81425 }, { "epoch": 2.3775127370403353, "grad_norm": 0.593890160171988, "learning_rate": 1.1527439848607733e-05, "loss": 0.2485, "step": 81430 }, { "epoch": 2.3776587203106523, "grad_norm": 0.6070954187582992, "learning_rate": 1.1524736415247364e-05, "loss": 0.2612, "step": 81435 }, { "epoch": 2.3778047035809697, "grad_norm": 0.5846863494422623, "learning_rate": 1.1522032981886998e-05, "loss": 0.2509, "step": 81440 }, { "epoch": 2.3779506868512867, "grad_norm": 0.5991575561419817, "learning_rate": 1.151932954852663e-05, "loss": 0.2384, "step": 81445 }, { "epoch": 2.378096670121604, "grad_norm": 0.5915169296264489, "learning_rate": 1.151662611516626e-05, "loss": 0.2518, "step": 81450 }, { "epoch": 2.378242653391921, "grad_norm": 0.6158586195625381, "learning_rate": 1.1513922681805895e-05, "loss": 0.2564, "step": 81455 }, { "epoch": 2.3783886366622387, "grad_norm": 0.573073975844086, "learning_rate": 1.1511219248445525e-05, "loss": 0.2417, "step": 81460 }, { "epoch": 2.3785346199325557, "grad_norm": 0.6234964532738363, "learning_rate": 1.150851581508516e-05, "loss": 0.2518, "step": 81465 }, { "epoch": 2.378680603202873, "grad_norm": 0.5783110433316418, "learning_rate": 1.1505812381724792e-05, "loss": 0.2459, "step": 81470 }, { "epoch": 2.37882658647319, "grad_norm": 0.5780389885454815, "learning_rate": 1.1503108948364422e-05, "loss": 0.2529, "step": 81475 }, { "epoch": 2.3789725697435076, "grad_norm": 0.5601643820082766, "learning_rate": 1.1500405515004056e-05, "loss": 0.2503, "step": 81480 }, { "epoch": 2.3791185530138246, "grad_norm": 0.5770828597540155, "learning_rate": 1.1497702081643689e-05, "loss": 0.2491, "step": 81485 }, { "epoch": 2.379264536284142, "grad_norm": 0.5731046304446239, "learning_rate": 1.149499864828332e-05, "loss": 0.2411, "step": 81490 }, { "epoch": 2.379410519554459, "grad_norm": 0.5934349886141337, "learning_rate": 1.1492295214922953e-05, "loss": 0.2647, "step": 81495 }, { "epoch": 2.3795565028247765, "grad_norm": 0.5340138252443757, "learning_rate": 1.1489591781562586e-05, "loss": 0.247, "step": 81500 }, { "epoch": 2.3797024860950935, "grad_norm": 0.604377725949386, "learning_rate": 1.1486888348202216e-05, "loss": 0.2487, "step": 81505 }, { "epoch": 2.3798484693654105, "grad_norm": 0.5629333202098278, "learning_rate": 1.148418491484185e-05, "loss": 0.2346, "step": 81510 }, { "epoch": 2.379994452635728, "grad_norm": 0.5752567457295295, "learning_rate": 1.1481481481481482e-05, "loss": 0.2309, "step": 81515 }, { "epoch": 2.3801404359060454, "grad_norm": 0.5678468365999947, "learning_rate": 1.1478778048121113e-05, "loss": 0.259, "step": 81520 }, { "epoch": 2.3802864191763624, "grad_norm": 0.6095243902673532, "learning_rate": 1.1476074614760747e-05, "loss": 0.2611, "step": 81525 }, { "epoch": 2.3804324024466794, "grad_norm": 0.6160548424319726, "learning_rate": 1.147337118140038e-05, "loss": 0.245, "step": 81530 }, { "epoch": 2.380578385716997, "grad_norm": 0.6043430431323389, "learning_rate": 1.147066774804001e-05, "loss": 0.2519, "step": 81535 }, { "epoch": 2.380724368987314, "grad_norm": 0.5769447073116879, "learning_rate": 1.1467964314679644e-05, "loss": 0.2496, "step": 81540 }, { "epoch": 2.3808703522576313, "grad_norm": 0.5896342905386818, "learning_rate": 1.1465260881319276e-05, "loss": 0.2556, "step": 81545 }, { "epoch": 2.3810163355279483, "grad_norm": 0.5669204747367151, "learning_rate": 1.1462557447958909e-05, "loss": 0.2499, "step": 81550 }, { "epoch": 2.381162318798266, "grad_norm": 0.571560489305661, "learning_rate": 1.1459854014598541e-05, "loss": 0.2499, "step": 81555 }, { "epoch": 2.381308302068583, "grad_norm": 0.626467404636462, "learning_rate": 1.1457150581238173e-05, "loss": 0.25, "step": 81560 }, { "epoch": 2.3814542853389002, "grad_norm": 0.6358502594141727, "learning_rate": 1.1454447147877806e-05, "loss": 0.2532, "step": 81565 }, { "epoch": 2.3816002686092173, "grad_norm": 0.5508403961613573, "learning_rate": 1.1451743714517438e-05, "loss": 0.239, "step": 81570 }, { "epoch": 2.3817462518795347, "grad_norm": 0.5859216117342996, "learning_rate": 1.144904028115707e-05, "loss": 0.2428, "step": 81575 }, { "epoch": 2.3818922351498517, "grad_norm": 0.5584421617664168, "learning_rate": 1.1446336847796703e-05, "loss": 0.2313, "step": 81580 }, { "epoch": 2.382038218420169, "grad_norm": 0.618828722455776, "learning_rate": 1.1443633414436335e-05, "loss": 0.2478, "step": 81585 }, { "epoch": 2.382184201690486, "grad_norm": 0.5975614014095243, "learning_rate": 1.1440929981075967e-05, "loss": 0.2557, "step": 81590 }, { "epoch": 2.3823301849608036, "grad_norm": 0.5606022647991327, "learning_rate": 1.14382265477156e-05, "loss": 0.2529, "step": 81595 }, { "epoch": 2.3824761682311206, "grad_norm": 0.6070508084978964, "learning_rate": 1.1435523114355232e-05, "loss": 0.2506, "step": 81600 }, { "epoch": 2.382622151501438, "grad_norm": 0.5278663773702456, "learning_rate": 1.1432819680994864e-05, "loss": 0.263, "step": 81605 }, { "epoch": 2.382768134771755, "grad_norm": 0.529979059044541, "learning_rate": 1.1430116247634497e-05, "loss": 0.2372, "step": 81610 }, { "epoch": 2.3829141180420725, "grad_norm": 0.6183360289783197, "learning_rate": 1.1427412814274129e-05, "loss": 0.2455, "step": 81615 }, { "epoch": 2.3830601013123895, "grad_norm": 0.5893432013319622, "learning_rate": 1.1424709380913761e-05, "loss": 0.2552, "step": 81620 }, { "epoch": 2.383206084582707, "grad_norm": 0.5697074906158531, "learning_rate": 1.1422005947553393e-05, "loss": 0.258, "step": 81625 }, { "epoch": 2.383352067853024, "grad_norm": 0.5971309238525572, "learning_rate": 1.1419302514193026e-05, "loss": 0.2567, "step": 81630 }, { "epoch": 2.3834980511233415, "grad_norm": 0.587009246074093, "learning_rate": 1.1416599080832658e-05, "loss": 0.2534, "step": 81635 }, { "epoch": 2.3836440343936585, "grad_norm": 0.586610443490603, "learning_rate": 1.141389564747229e-05, "loss": 0.2676, "step": 81640 }, { "epoch": 2.3837900176639755, "grad_norm": 0.5387201204939708, "learning_rate": 1.1411192214111923e-05, "loss": 0.2374, "step": 81645 }, { "epoch": 2.383936000934293, "grad_norm": 0.5810108527102494, "learning_rate": 1.1408488780751555e-05, "loss": 0.2569, "step": 81650 }, { "epoch": 2.3840819842046104, "grad_norm": 0.6154469707281118, "learning_rate": 1.1405785347391187e-05, "loss": 0.2548, "step": 81655 }, { "epoch": 2.3842279674749274, "grad_norm": 0.574186550630718, "learning_rate": 1.140308191403082e-05, "loss": 0.2419, "step": 81660 }, { "epoch": 2.3843739507452444, "grad_norm": 0.6121621633961004, "learning_rate": 1.1400378480670452e-05, "loss": 0.2646, "step": 81665 }, { "epoch": 2.384519934015562, "grad_norm": 0.5828297921037107, "learning_rate": 1.1397675047310084e-05, "loss": 0.2632, "step": 81670 }, { "epoch": 2.384665917285879, "grad_norm": 0.5776413649491671, "learning_rate": 1.1394971613949717e-05, "loss": 0.2338, "step": 81675 }, { "epoch": 2.3848119005561963, "grad_norm": 0.5745555453259598, "learning_rate": 1.1392268180589349e-05, "loss": 0.2469, "step": 81680 }, { "epoch": 2.3849578838265133, "grad_norm": 0.5436458331549651, "learning_rate": 1.1389564747228981e-05, "loss": 0.2275, "step": 81685 }, { "epoch": 2.3851038670968308, "grad_norm": 0.5539653354543697, "learning_rate": 1.1386861313868614e-05, "loss": 0.2494, "step": 81690 }, { "epoch": 2.3852498503671478, "grad_norm": 0.5574531394076125, "learning_rate": 1.1384157880508246e-05, "loss": 0.2398, "step": 81695 }, { "epoch": 2.385395833637465, "grad_norm": 0.5934630077398482, "learning_rate": 1.1381454447147878e-05, "loss": 0.2574, "step": 81700 }, { "epoch": 2.385541816907782, "grad_norm": 0.5524671540239312, "learning_rate": 1.137875101378751e-05, "loss": 0.2341, "step": 81705 }, { "epoch": 2.3856878001780997, "grad_norm": 0.5636357127534167, "learning_rate": 1.1376047580427143e-05, "loss": 0.2548, "step": 81710 }, { "epoch": 2.3858337834484167, "grad_norm": 0.5749110806926637, "learning_rate": 1.1373344147066775e-05, "loss": 0.2447, "step": 81715 }, { "epoch": 2.385979766718734, "grad_norm": 0.5937857234702721, "learning_rate": 1.1370640713706407e-05, "loss": 0.2294, "step": 81720 }, { "epoch": 2.386125749989051, "grad_norm": 0.606544524389834, "learning_rate": 1.136793728034604e-05, "loss": 0.254, "step": 81725 }, { "epoch": 2.3862717332593686, "grad_norm": 0.5938327204398388, "learning_rate": 1.1365233846985672e-05, "loss": 0.2537, "step": 81730 }, { "epoch": 2.3864177165296856, "grad_norm": 0.6116934992147111, "learning_rate": 1.1362530413625304e-05, "loss": 0.2567, "step": 81735 }, { "epoch": 2.386563699800003, "grad_norm": 0.5893853962005695, "learning_rate": 1.1359826980264937e-05, "loss": 0.2617, "step": 81740 }, { "epoch": 2.38670968307032, "grad_norm": 0.577169859360162, "learning_rate": 1.1357123546904569e-05, "loss": 0.2456, "step": 81745 }, { "epoch": 2.3868556663406375, "grad_norm": 0.564911885618482, "learning_rate": 1.1354420113544201e-05, "loss": 0.25, "step": 81750 }, { "epoch": 2.3870016496109545, "grad_norm": 0.5863837594835851, "learning_rate": 1.1351716680183834e-05, "loss": 0.2484, "step": 81755 }, { "epoch": 2.387147632881272, "grad_norm": 0.5370321840191238, "learning_rate": 1.1349013246823466e-05, "loss": 0.2461, "step": 81760 }, { "epoch": 2.387293616151589, "grad_norm": 0.5885433476070268, "learning_rate": 1.1346309813463098e-05, "loss": 0.2563, "step": 81765 }, { "epoch": 2.3874395994219064, "grad_norm": 0.5542130080851853, "learning_rate": 1.134360638010273e-05, "loss": 0.256, "step": 81770 }, { "epoch": 2.3875855826922234, "grad_norm": 0.6051703118978438, "learning_rate": 1.1340902946742363e-05, "loss": 0.2546, "step": 81775 }, { "epoch": 2.387731565962541, "grad_norm": 0.5420822826164121, "learning_rate": 1.1338199513381995e-05, "loss": 0.2448, "step": 81780 }, { "epoch": 2.387877549232858, "grad_norm": 0.56456526991204, "learning_rate": 1.1335496080021628e-05, "loss": 0.2518, "step": 81785 }, { "epoch": 2.3880235325031753, "grad_norm": 0.5572701735570164, "learning_rate": 1.133279264666126e-05, "loss": 0.2493, "step": 81790 }, { "epoch": 2.3881695157734923, "grad_norm": 0.5374335346014029, "learning_rate": 1.1330089213300892e-05, "loss": 0.248, "step": 81795 }, { "epoch": 2.3883154990438094, "grad_norm": 0.5447528441396143, "learning_rate": 1.1327385779940525e-05, "loss": 0.2383, "step": 81800 }, { "epoch": 2.388461482314127, "grad_norm": 0.570479912842913, "learning_rate": 1.1324682346580159e-05, "loss": 0.2373, "step": 81805 }, { "epoch": 2.3886074655844443, "grad_norm": 0.5056597090667653, "learning_rate": 1.132197891321979e-05, "loss": 0.2505, "step": 81810 }, { "epoch": 2.3887534488547613, "grad_norm": 0.5773549621267738, "learning_rate": 1.1319275479859422e-05, "loss": 0.2547, "step": 81815 }, { "epoch": 2.3888994321250783, "grad_norm": 0.5764890110296053, "learning_rate": 1.1316572046499056e-05, "loss": 0.2478, "step": 81820 }, { "epoch": 2.3890454153953957, "grad_norm": 0.5909448468656631, "learning_rate": 1.1313868613138686e-05, "loss": 0.2397, "step": 81825 }, { "epoch": 2.3891913986657127, "grad_norm": 0.5616548037582255, "learning_rate": 1.1311165179778318e-05, "loss": 0.255, "step": 81830 }, { "epoch": 2.38933738193603, "grad_norm": 0.5917987703626026, "learning_rate": 1.1308461746417952e-05, "loss": 0.2394, "step": 81835 }, { "epoch": 2.389483365206347, "grad_norm": 0.5717308069217867, "learning_rate": 1.1305758313057583e-05, "loss": 0.2467, "step": 81840 }, { "epoch": 2.3896293484766646, "grad_norm": 0.5498760500447858, "learning_rate": 1.1303054879697215e-05, "loss": 0.2482, "step": 81845 }, { "epoch": 2.3897753317469816, "grad_norm": 0.5476007848683213, "learning_rate": 1.130035144633685e-05, "loss": 0.2494, "step": 81850 }, { "epoch": 2.389921315017299, "grad_norm": 0.5930521872559132, "learning_rate": 1.129764801297648e-05, "loss": 0.2556, "step": 81855 }, { "epoch": 2.390067298287616, "grad_norm": 0.5703378139541908, "learning_rate": 1.1294944579616112e-05, "loss": 0.2593, "step": 81860 }, { "epoch": 2.3902132815579336, "grad_norm": 0.5978329897055015, "learning_rate": 1.1292241146255746e-05, "loss": 0.2474, "step": 81865 }, { "epoch": 2.3903592648282506, "grad_norm": 0.5840263778140798, "learning_rate": 1.1289537712895377e-05, "loss": 0.2423, "step": 81870 }, { "epoch": 2.390505248098568, "grad_norm": 0.5735637330987383, "learning_rate": 1.128683427953501e-05, "loss": 0.2493, "step": 81875 }, { "epoch": 2.390651231368885, "grad_norm": 0.5567468457605639, "learning_rate": 1.1284130846174643e-05, "loss": 0.2429, "step": 81880 }, { "epoch": 2.3907972146392025, "grad_norm": 0.6412235379678609, "learning_rate": 1.1281427412814274e-05, "loss": 0.2658, "step": 81885 }, { "epoch": 2.3909431979095195, "grad_norm": 0.5457107390176259, "learning_rate": 1.1278723979453908e-05, "loss": 0.2421, "step": 81890 }, { "epoch": 2.391089181179837, "grad_norm": 0.58430047880256, "learning_rate": 1.127602054609354e-05, "loss": 0.2475, "step": 81895 }, { "epoch": 2.391235164450154, "grad_norm": 0.5478343062450958, "learning_rate": 1.1273317112733171e-05, "loss": 0.254, "step": 81900 }, { "epoch": 2.3913811477204714, "grad_norm": 0.5629539482486056, "learning_rate": 1.1270613679372805e-05, "loss": 0.2569, "step": 81905 }, { "epoch": 2.3915271309907884, "grad_norm": 0.5945621377189897, "learning_rate": 1.1267910246012436e-05, "loss": 0.2669, "step": 81910 }, { "epoch": 2.391673114261106, "grad_norm": 0.6089381276279107, "learning_rate": 1.1265206812652068e-05, "loss": 0.264, "step": 81915 }, { "epoch": 2.391819097531423, "grad_norm": 0.5889743994290865, "learning_rate": 1.1262503379291702e-05, "loss": 0.2566, "step": 81920 }, { "epoch": 2.3919650808017403, "grad_norm": 0.5725766310428645, "learning_rate": 1.1259799945931332e-05, "loss": 0.2363, "step": 81925 }, { "epoch": 2.3921110640720573, "grad_norm": 0.6564895420573095, "learning_rate": 1.1257096512570965e-05, "loss": 0.2611, "step": 81930 }, { "epoch": 2.3922570473423743, "grad_norm": 0.549246305006409, "learning_rate": 1.1254393079210599e-05, "loss": 0.2459, "step": 81935 }, { "epoch": 2.3924030306126918, "grad_norm": 0.5801284534915139, "learning_rate": 1.125168964585023e-05, "loss": 0.2557, "step": 81940 }, { "epoch": 2.3925490138830092, "grad_norm": 0.6220629052264637, "learning_rate": 1.1248986212489862e-05, "loss": 0.2709, "step": 81945 }, { "epoch": 2.3926949971533262, "grad_norm": 0.5430751919691015, "learning_rate": 1.1246282779129496e-05, "loss": 0.2678, "step": 81950 }, { "epoch": 2.3928409804236432, "grad_norm": 0.5785111363308743, "learning_rate": 1.1243579345769126e-05, "loss": 0.2442, "step": 81955 }, { "epoch": 2.3929869636939607, "grad_norm": 0.5717145916630411, "learning_rate": 1.1240875912408759e-05, "loss": 0.2544, "step": 81960 }, { "epoch": 2.393132946964278, "grad_norm": 0.5687919991983027, "learning_rate": 1.1238172479048393e-05, "loss": 0.2604, "step": 81965 }, { "epoch": 2.393278930234595, "grad_norm": 0.5999593393715775, "learning_rate": 1.1235469045688023e-05, "loss": 0.2631, "step": 81970 }, { "epoch": 2.393424913504912, "grad_norm": 0.5933893216486426, "learning_rate": 1.1232765612327657e-05, "loss": 0.251, "step": 81975 }, { "epoch": 2.3935708967752296, "grad_norm": 0.5951432191594538, "learning_rate": 1.123006217896729e-05, "loss": 0.2581, "step": 81980 }, { "epoch": 2.3937168800455466, "grad_norm": 0.5762801609867815, "learning_rate": 1.122735874560692e-05, "loss": 0.2465, "step": 81985 }, { "epoch": 2.393862863315864, "grad_norm": 0.6106408973441236, "learning_rate": 1.1224655312246554e-05, "loss": 0.256, "step": 81990 }, { "epoch": 2.394008846586181, "grad_norm": 0.5752690845612604, "learning_rate": 1.1221951878886187e-05, "loss": 0.2638, "step": 81995 }, { "epoch": 2.3941548298564985, "grad_norm": 0.6254184082664715, "learning_rate": 1.1219248445525817e-05, "loss": 0.2585, "step": 82000 }, { "epoch": 2.3943008131268155, "grad_norm": 0.5373248228939197, "learning_rate": 1.1216545012165451e-05, "loss": 0.2485, "step": 82005 }, { "epoch": 2.394446796397133, "grad_norm": 0.598769364016753, "learning_rate": 1.1213841578805084e-05, "loss": 0.253, "step": 82010 }, { "epoch": 2.39459277966745, "grad_norm": 0.5819102745983751, "learning_rate": 1.1211138145444714e-05, "loss": 0.236, "step": 82015 }, { "epoch": 2.3947387629377674, "grad_norm": 0.5917022199125272, "learning_rate": 1.1208434712084348e-05, "loss": 0.2578, "step": 82020 }, { "epoch": 2.3948847462080844, "grad_norm": 0.5787258008981204, "learning_rate": 1.120573127872398e-05, "loss": 0.2459, "step": 82025 }, { "epoch": 2.395030729478402, "grad_norm": 0.5870039712735408, "learning_rate": 1.1203027845363611e-05, "loss": 0.2626, "step": 82030 }, { "epoch": 2.395176712748719, "grad_norm": 0.6016176662792633, "learning_rate": 1.1200324412003245e-05, "loss": 0.2553, "step": 82035 }, { "epoch": 2.3953226960190364, "grad_norm": 0.5830942730845449, "learning_rate": 1.1197620978642877e-05, "loss": 0.2522, "step": 82040 }, { "epoch": 2.3954686792893534, "grad_norm": 0.5939437275213382, "learning_rate": 1.1194917545282508e-05, "loss": 0.2425, "step": 82045 }, { "epoch": 2.395614662559671, "grad_norm": 0.5965234589927854, "learning_rate": 1.1192214111922142e-05, "loss": 0.2596, "step": 82050 }, { "epoch": 2.395760645829988, "grad_norm": 0.5659795079664447, "learning_rate": 1.1189510678561774e-05, "loss": 0.2446, "step": 82055 }, { "epoch": 2.3959066291003053, "grad_norm": 0.5547698317762587, "learning_rate": 1.1186807245201407e-05, "loss": 0.261, "step": 82060 }, { "epoch": 2.3960526123706223, "grad_norm": 0.6348941550906836, "learning_rate": 1.1184103811841039e-05, "loss": 0.2566, "step": 82065 }, { "epoch": 2.3961985956409397, "grad_norm": 0.6094607366253082, "learning_rate": 1.1181400378480671e-05, "loss": 0.2555, "step": 82070 }, { "epoch": 2.3963445789112567, "grad_norm": 0.5269111892511803, "learning_rate": 1.1178696945120304e-05, "loss": 0.2463, "step": 82075 }, { "epoch": 2.396490562181574, "grad_norm": 0.5759679697863934, "learning_rate": 1.1175993511759936e-05, "loss": 0.2477, "step": 82080 }, { "epoch": 2.396636545451891, "grad_norm": 0.6171626485033566, "learning_rate": 1.1173290078399568e-05, "loss": 0.2493, "step": 82085 }, { "epoch": 2.396782528722208, "grad_norm": 0.5710814047596715, "learning_rate": 1.11705866450392e-05, "loss": 0.2466, "step": 82090 }, { "epoch": 2.3969285119925257, "grad_norm": 0.6088913161201445, "learning_rate": 1.1167883211678833e-05, "loss": 0.2553, "step": 82095 }, { "epoch": 2.397074495262843, "grad_norm": 0.5506886437791824, "learning_rate": 1.1165179778318465e-05, "loss": 0.2457, "step": 82100 }, { "epoch": 2.39722047853316, "grad_norm": 0.543115230634833, "learning_rate": 1.1162476344958098e-05, "loss": 0.2618, "step": 82105 }, { "epoch": 2.397366461803477, "grad_norm": 0.6208389649648617, "learning_rate": 1.115977291159773e-05, "loss": 0.2497, "step": 82110 }, { "epoch": 2.3975124450737946, "grad_norm": 0.5652863279407049, "learning_rate": 1.115706947823736e-05, "loss": 0.243, "step": 82115 }, { "epoch": 2.3976584283441116, "grad_norm": 0.5790670664284079, "learning_rate": 1.1154366044876995e-05, "loss": 0.2415, "step": 82120 }, { "epoch": 2.397804411614429, "grad_norm": 0.5526045904313223, "learning_rate": 1.1151662611516627e-05, "loss": 0.2477, "step": 82125 }, { "epoch": 2.397950394884746, "grad_norm": 0.5826313878660437, "learning_rate": 1.1148959178156258e-05, "loss": 0.2472, "step": 82130 }, { "epoch": 2.3980963781550635, "grad_norm": 0.5766551032258297, "learning_rate": 1.1146255744795892e-05, "loss": 0.2406, "step": 82135 }, { "epoch": 2.3982423614253805, "grad_norm": 0.5903684117416063, "learning_rate": 1.1143552311435524e-05, "loss": 0.2419, "step": 82140 }, { "epoch": 2.398388344695698, "grad_norm": 0.5793235810595664, "learning_rate": 1.1140848878075156e-05, "loss": 0.256, "step": 82145 }, { "epoch": 2.398534327966015, "grad_norm": 0.5449971650545501, "learning_rate": 1.1138145444714788e-05, "loss": 0.2296, "step": 82150 }, { "epoch": 2.3986803112363324, "grad_norm": 0.6342096109022751, "learning_rate": 1.113544201135442e-05, "loss": 0.2604, "step": 82155 }, { "epoch": 2.3988262945066494, "grad_norm": 0.5214413503625245, "learning_rate": 1.1132738577994053e-05, "loss": 0.2291, "step": 82160 }, { "epoch": 2.398972277776967, "grad_norm": 0.628477194603238, "learning_rate": 1.1130035144633685e-05, "loss": 0.2702, "step": 82165 }, { "epoch": 2.399118261047284, "grad_norm": 0.5981185449710577, "learning_rate": 1.1127331711273318e-05, "loss": 0.2385, "step": 82170 }, { "epoch": 2.3992642443176013, "grad_norm": 0.5370996549105905, "learning_rate": 1.112462827791295e-05, "loss": 0.2508, "step": 82175 }, { "epoch": 2.3994102275879183, "grad_norm": 0.5748405462198326, "learning_rate": 1.1121924844552582e-05, "loss": 0.2491, "step": 82180 }, { "epoch": 2.399556210858236, "grad_norm": 0.6220481533160985, "learning_rate": 1.1119221411192215e-05, "loss": 0.2485, "step": 82185 }, { "epoch": 2.399702194128553, "grad_norm": 0.5664937031892586, "learning_rate": 1.1116517977831847e-05, "loss": 0.2346, "step": 82190 }, { "epoch": 2.3998481773988702, "grad_norm": 0.6086550034802738, "learning_rate": 1.111381454447148e-05, "loss": 0.2519, "step": 82195 }, { "epoch": 2.3999941606691872, "grad_norm": 0.640042566596332, "learning_rate": 1.1111111111111112e-05, "loss": 0.2608, "step": 82200 }, { "epoch": 2.4001401439395047, "grad_norm": 0.5843873782386482, "learning_rate": 1.1108407677750744e-05, "loss": 0.2477, "step": 82205 }, { "epoch": 2.4002861272098217, "grad_norm": 1.5739621970915196, "learning_rate": 1.1105704244390376e-05, "loss": 0.2558, "step": 82210 }, { "epoch": 2.400432110480139, "grad_norm": 0.604211310593574, "learning_rate": 1.1103000811030009e-05, "loss": 0.2544, "step": 82215 }, { "epoch": 2.400578093750456, "grad_norm": 0.6483408353086989, "learning_rate": 1.1100297377669641e-05, "loss": 0.2596, "step": 82220 }, { "epoch": 2.400724077020773, "grad_norm": 0.5793420636015726, "learning_rate": 1.1097593944309273e-05, "loss": 0.251, "step": 82225 }, { "epoch": 2.4008700602910906, "grad_norm": 0.5598188497118913, "learning_rate": 1.1094890510948906e-05, "loss": 0.2563, "step": 82230 }, { "epoch": 2.401016043561408, "grad_norm": 0.5658017053959916, "learning_rate": 1.1092187077588538e-05, "loss": 0.2619, "step": 82235 }, { "epoch": 2.401162026831725, "grad_norm": 0.5689894183666085, "learning_rate": 1.108948364422817e-05, "loss": 0.2509, "step": 82240 }, { "epoch": 2.401308010102042, "grad_norm": 0.6053922272592174, "learning_rate": 1.1086780210867802e-05, "loss": 0.2632, "step": 82245 }, { "epoch": 2.4014539933723595, "grad_norm": 0.6059244860104056, "learning_rate": 1.1084076777507435e-05, "loss": 0.2463, "step": 82250 }, { "epoch": 2.401599976642677, "grad_norm": 0.5787011030148677, "learning_rate": 1.1081373344147067e-05, "loss": 0.2642, "step": 82255 }, { "epoch": 2.401745959912994, "grad_norm": 0.6099751606946073, "learning_rate": 1.10786699107867e-05, "loss": 0.2616, "step": 82260 }, { "epoch": 2.401891943183311, "grad_norm": 0.5643657385518956, "learning_rate": 1.1075966477426332e-05, "loss": 0.2437, "step": 82265 }, { "epoch": 2.4020379264536285, "grad_norm": 0.6204722650898253, "learning_rate": 1.1073263044065964e-05, "loss": 0.2464, "step": 82270 }, { "epoch": 2.4021839097239455, "grad_norm": 0.5888657996454857, "learning_rate": 1.1070559610705596e-05, "loss": 0.2503, "step": 82275 }, { "epoch": 2.402329892994263, "grad_norm": 0.6011832188430125, "learning_rate": 1.1067856177345229e-05, "loss": 0.2644, "step": 82280 }, { "epoch": 2.40247587626458, "grad_norm": 0.5598874119359837, "learning_rate": 1.1065152743984861e-05, "loss": 0.2521, "step": 82285 }, { "epoch": 2.4026218595348974, "grad_norm": 0.5556709562539138, "learning_rate": 1.1062449310624493e-05, "loss": 0.2509, "step": 82290 }, { "epoch": 2.4027678428052144, "grad_norm": 0.5492631271164894, "learning_rate": 1.1059745877264126e-05, "loss": 0.2574, "step": 82295 }, { "epoch": 2.402913826075532, "grad_norm": 0.5779147738554752, "learning_rate": 1.105704244390376e-05, "loss": 0.2622, "step": 82300 }, { "epoch": 2.403059809345849, "grad_norm": 0.5759714353564029, "learning_rate": 1.105433901054339e-05, "loss": 0.2502, "step": 82305 }, { "epoch": 2.4032057926161663, "grad_norm": 0.5856109613229638, "learning_rate": 1.1051635577183023e-05, "loss": 0.2376, "step": 82310 }, { "epoch": 2.4033517758864833, "grad_norm": 0.5616827621771459, "learning_rate": 1.1048932143822657e-05, "loss": 0.251, "step": 82315 }, { "epoch": 2.4034977591568008, "grad_norm": 0.5919552231758198, "learning_rate": 1.1046228710462287e-05, "loss": 0.2582, "step": 82320 }, { "epoch": 2.4036437424271178, "grad_norm": 0.5961212577040222, "learning_rate": 1.104352527710192e-05, "loss": 0.2507, "step": 82325 }, { "epoch": 2.403789725697435, "grad_norm": 0.624941221773549, "learning_rate": 1.1040821843741554e-05, "loss": 0.2553, "step": 82330 }, { "epoch": 2.403935708967752, "grad_norm": 0.5821519326594785, "learning_rate": 1.1038118410381184e-05, "loss": 0.2512, "step": 82335 }, { "epoch": 2.4040816922380697, "grad_norm": 0.5746672958974735, "learning_rate": 1.1035414977020817e-05, "loss": 0.2453, "step": 82340 }, { "epoch": 2.4042276755083867, "grad_norm": 0.605192703067236, "learning_rate": 1.103271154366045e-05, "loss": 0.2568, "step": 82345 }, { "epoch": 2.404373658778704, "grad_norm": 0.5418120318362268, "learning_rate": 1.1030008110300081e-05, "loss": 0.2469, "step": 82350 }, { "epoch": 2.404519642049021, "grad_norm": 0.5745356298595325, "learning_rate": 1.1027304676939713e-05, "loss": 0.2571, "step": 82355 }, { "epoch": 2.4046656253193386, "grad_norm": 0.5844848816125155, "learning_rate": 1.1024601243579347e-05, "loss": 0.2453, "step": 82360 }, { "epoch": 2.4048116085896556, "grad_norm": 0.5947043978344424, "learning_rate": 1.1021897810218978e-05, "loss": 0.2494, "step": 82365 }, { "epoch": 2.404957591859973, "grad_norm": 0.5810849132002297, "learning_rate": 1.101919437685861e-05, "loss": 0.2476, "step": 82370 }, { "epoch": 2.40510357513029, "grad_norm": 0.5843414890055523, "learning_rate": 1.1016490943498243e-05, "loss": 0.2527, "step": 82375 }, { "epoch": 2.405249558400607, "grad_norm": 0.6042532730778981, "learning_rate": 1.1013787510137875e-05, "loss": 0.2608, "step": 82380 }, { "epoch": 2.4053955416709245, "grad_norm": 0.5171532993387522, "learning_rate": 1.1011084076777509e-05, "loss": 0.2567, "step": 82385 }, { "epoch": 2.405541524941242, "grad_norm": 0.5488342293556888, "learning_rate": 1.100838064341714e-05, "loss": 0.2437, "step": 82390 }, { "epoch": 2.405687508211559, "grad_norm": 0.6112093162946101, "learning_rate": 1.1005677210056772e-05, "loss": 0.252, "step": 82395 }, { "epoch": 2.405833491481876, "grad_norm": 0.5971712994388652, "learning_rate": 1.1002973776696406e-05, "loss": 0.2513, "step": 82400 }, { "epoch": 2.4059794747521934, "grad_norm": 0.583386922698466, "learning_rate": 1.1000270343336037e-05, "loss": 0.2401, "step": 82405 }, { "epoch": 2.4061254580225104, "grad_norm": 0.591450436896421, "learning_rate": 1.0997566909975669e-05, "loss": 0.2559, "step": 82410 }, { "epoch": 2.406271441292828, "grad_norm": 0.6077196424144541, "learning_rate": 1.0994863476615303e-05, "loss": 0.2654, "step": 82415 }, { "epoch": 2.406417424563145, "grad_norm": 0.5862080714748348, "learning_rate": 1.0992160043254934e-05, "loss": 0.2562, "step": 82420 }, { "epoch": 2.4065634078334623, "grad_norm": 0.5695029273579512, "learning_rate": 1.0989456609894566e-05, "loss": 0.2533, "step": 82425 }, { "epoch": 2.4067093911037793, "grad_norm": 0.5708128143496202, "learning_rate": 1.09867531765342e-05, "loss": 0.2672, "step": 82430 }, { "epoch": 2.406855374374097, "grad_norm": 0.5870856095125054, "learning_rate": 1.098404974317383e-05, "loss": 0.2397, "step": 82435 }, { "epoch": 2.407001357644414, "grad_norm": 0.588130612674363, "learning_rate": 1.0981346309813463e-05, "loss": 0.2587, "step": 82440 }, { "epoch": 2.4071473409147313, "grad_norm": 0.6184080372430449, "learning_rate": 1.0978642876453097e-05, "loss": 0.2531, "step": 82445 }, { "epoch": 2.4072933241850483, "grad_norm": 0.5831024803287169, "learning_rate": 1.0975939443092728e-05, "loss": 0.2588, "step": 82450 }, { "epoch": 2.4074393074553657, "grad_norm": 0.524065275249287, "learning_rate": 1.097323600973236e-05, "loss": 0.244, "step": 82455 }, { "epoch": 2.4075852907256827, "grad_norm": 0.6214525351760622, "learning_rate": 1.0970532576371994e-05, "loss": 0.2382, "step": 82460 }, { "epoch": 2.407731273996, "grad_norm": 0.5727416505065859, "learning_rate": 1.0967829143011624e-05, "loss": 0.2486, "step": 82465 }, { "epoch": 2.407877257266317, "grad_norm": 0.6081564995856864, "learning_rate": 1.0965125709651258e-05, "loss": 0.2352, "step": 82470 }, { "epoch": 2.4080232405366346, "grad_norm": 0.6338338403421989, "learning_rate": 1.096242227629089e-05, "loss": 0.2499, "step": 82475 }, { "epoch": 2.4081692238069516, "grad_norm": 0.5779130292866967, "learning_rate": 1.0959718842930521e-05, "loss": 0.2399, "step": 82480 }, { "epoch": 2.408315207077269, "grad_norm": 0.6098294076076896, "learning_rate": 1.0957015409570155e-05, "loss": 0.257, "step": 82485 }, { "epoch": 2.408461190347586, "grad_norm": 0.6170421781277587, "learning_rate": 1.0954311976209788e-05, "loss": 0.2442, "step": 82490 }, { "epoch": 2.4086071736179036, "grad_norm": 0.5753988425788796, "learning_rate": 1.0951608542849418e-05, "loss": 0.2468, "step": 82495 }, { "epoch": 2.4087531568882206, "grad_norm": 0.5311928579353306, "learning_rate": 1.0948905109489052e-05, "loss": 0.2395, "step": 82500 }, { "epoch": 2.408899140158538, "grad_norm": 0.5451077339777785, "learning_rate": 1.0946201676128685e-05, "loss": 0.2328, "step": 82505 }, { "epoch": 2.409045123428855, "grad_norm": 0.5532027013672245, "learning_rate": 1.0943498242768315e-05, "loss": 0.2426, "step": 82510 }, { "epoch": 2.4091911066991725, "grad_norm": 0.5753836229519881, "learning_rate": 1.094079480940795e-05, "loss": 0.2396, "step": 82515 }, { "epoch": 2.4093370899694895, "grad_norm": 0.636165020492172, "learning_rate": 1.0938091376047582e-05, "loss": 0.257, "step": 82520 }, { "epoch": 2.409483073239807, "grad_norm": 0.5641472688681165, "learning_rate": 1.0935387942687212e-05, "loss": 0.2449, "step": 82525 }, { "epoch": 2.409629056510124, "grad_norm": 0.602391294595832, "learning_rate": 1.0932684509326846e-05, "loss": 0.2543, "step": 82530 }, { "epoch": 2.409775039780441, "grad_norm": 0.5617621313829692, "learning_rate": 1.0929981075966479e-05, "loss": 0.2433, "step": 82535 }, { "epoch": 2.4099210230507584, "grad_norm": 0.6031281849480041, "learning_rate": 1.092727764260611e-05, "loss": 0.26, "step": 82540 }, { "epoch": 2.410067006321076, "grad_norm": 0.5580874155858866, "learning_rate": 1.0924574209245743e-05, "loss": 0.2468, "step": 82545 }, { "epoch": 2.410212989591393, "grad_norm": 0.5715012658826628, "learning_rate": 1.0921870775885376e-05, "loss": 0.2509, "step": 82550 }, { "epoch": 2.41035897286171, "grad_norm": 0.6189342615771979, "learning_rate": 1.0919167342525008e-05, "loss": 0.2462, "step": 82555 }, { "epoch": 2.4105049561320273, "grad_norm": 0.5997073845531299, "learning_rate": 1.091646390916464e-05, "loss": 0.2361, "step": 82560 }, { "epoch": 2.4106509394023443, "grad_norm": 0.5835124593329915, "learning_rate": 1.091376047580427e-05, "loss": 0.2667, "step": 82565 }, { "epoch": 2.4107969226726618, "grad_norm": 0.5496007132879711, "learning_rate": 1.0911057042443905e-05, "loss": 0.2555, "step": 82570 }, { "epoch": 2.4109429059429788, "grad_norm": 0.5306534460151832, "learning_rate": 1.0908353609083537e-05, "loss": 0.2588, "step": 82575 }, { "epoch": 2.4110888892132962, "grad_norm": 0.5868327690393035, "learning_rate": 1.0905650175723168e-05, "loss": 0.2506, "step": 82580 }, { "epoch": 2.4112348724836132, "grad_norm": 0.5797876632662429, "learning_rate": 1.0902946742362802e-05, "loss": 0.2576, "step": 82585 }, { "epoch": 2.4113808557539307, "grad_norm": 0.5475299883810175, "learning_rate": 1.0900243309002434e-05, "loss": 0.2536, "step": 82590 }, { "epoch": 2.4115268390242477, "grad_norm": 0.5723888249592314, "learning_rate": 1.0897539875642065e-05, "loss": 0.261, "step": 82595 }, { "epoch": 2.411672822294565, "grad_norm": 0.6091112605181876, "learning_rate": 1.0894836442281699e-05, "loss": 0.2509, "step": 82600 }, { "epoch": 2.411818805564882, "grad_norm": 0.5449645321298813, "learning_rate": 1.0892133008921331e-05, "loss": 0.241, "step": 82605 }, { "epoch": 2.4119647888351996, "grad_norm": 0.5605435531715439, "learning_rate": 1.0889429575560962e-05, "loss": 0.2581, "step": 82610 }, { "epoch": 2.4121107721055166, "grad_norm": 0.60208044637214, "learning_rate": 1.0886726142200596e-05, "loss": 0.2382, "step": 82615 }, { "epoch": 2.412256755375834, "grad_norm": 0.5366075531070651, "learning_rate": 1.0884022708840228e-05, "loss": 0.2504, "step": 82620 }, { "epoch": 2.412402738646151, "grad_norm": 0.5619236757873691, "learning_rate": 1.0881319275479859e-05, "loss": 0.2505, "step": 82625 }, { "epoch": 2.4125487219164685, "grad_norm": 0.6014755380503647, "learning_rate": 1.0878615842119493e-05, "loss": 0.2424, "step": 82630 }, { "epoch": 2.4126947051867855, "grad_norm": 0.643064422487197, "learning_rate": 1.0875912408759125e-05, "loss": 0.2506, "step": 82635 }, { "epoch": 2.412840688457103, "grad_norm": 0.588311507803204, "learning_rate": 1.0873208975398757e-05, "loss": 0.2392, "step": 82640 }, { "epoch": 2.41298667172742, "grad_norm": 0.5876048279065396, "learning_rate": 1.087050554203839e-05, "loss": 0.2525, "step": 82645 }, { "epoch": 2.4131326549977374, "grad_norm": 0.5795946363904246, "learning_rate": 1.0867802108678022e-05, "loss": 0.2426, "step": 82650 }, { "epoch": 2.4132786382680544, "grad_norm": 0.5585507320890375, "learning_rate": 1.0865098675317654e-05, "loss": 0.2392, "step": 82655 }, { "epoch": 2.413424621538372, "grad_norm": 0.5948568614842354, "learning_rate": 1.0862395241957287e-05, "loss": 0.2397, "step": 82660 }, { "epoch": 2.413570604808689, "grad_norm": 0.6043509575671013, "learning_rate": 1.0859691808596919e-05, "loss": 0.2462, "step": 82665 }, { "epoch": 2.413716588079006, "grad_norm": 0.5565341772616514, "learning_rate": 1.0856988375236551e-05, "loss": 0.2462, "step": 82670 }, { "epoch": 2.4138625713493234, "grad_norm": 0.5543608211247023, "learning_rate": 1.0854284941876183e-05, "loss": 0.2434, "step": 82675 }, { "epoch": 2.414008554619641, "grad_norm": 0.5578718841465915, "learning_rate": 1.0851581508515816e-05, "loss": 0.2475, "step": 82680 }, { "epoch": 2.414154537889958, "grad_norm": 0.5745722102040858, "learning_rate": 1.0848878075155448e-05, "loss": 0.2586, "step": 82685 }, { "epoch": 2.414300521160275, "grad_norm": 0.5619290629475452, "learning_rate": 1.084617464179508e-05, "loss": 0.2286, "step": 82690 }, { "epoch": 2.4144465044305923, "grad_norm": 0.6085618353374892, "learning_rate": 1.0843471208434713e-05, "loss": 0.266, "step": 82695 }, { "epoch": 2.4145924877009093, "grad_norm": 0.6161797720026017, "learning_rate": 1.0840767775074345e-05, "loss": 0.2579, "step": 82700 }, { "epoch": 2.4147384709712267, "grad_norm": 0.5609526568627814, "learning_rate": 1.0838064341713977e-05, "loss": 0.239, "step": 82705 }, { "epoch": 2.4148844542415437, "grad_norm": 0.6166956408917886, "learning_rate": 1.083536090835361e-05, "loss": 0.2499, "step": 82710 }, { "epoch": 2.415030437511861, "grad_norm": 0.5671406864477954, "learning_rate": 1.0832657474993242e-05, "loss": 0.2572, "step": 82715 }, { "epoch": 2.415176420782178, "grad_norm": 0.5850332632303276, "learning_rate": 1.0829954041632874e-05, "loss": 0.2518, "step": 82720 }, { "epoch": 2.4153224040524957, "grad_norm": 0.6124965157651702, "learning_rate": 1.0827250608272507e-05, "loss": 0.2542, "step": 82725 }, { "epoch": 2.4154683873228127, "grad_norm": 0.5942421444331921, "learning_rate": 1.0824547174912139e-05, "loss": 0.2619, "step": 82730 }, { "epoch": 2.41561437059313, "grad_norm": 0.5495459576704111, "learning_rate": 1.0821843741551771e-05, "loss": 0.2365, "step": 82735 }, { "epoch": 2.415760353863447, "grad_norm": 0.5488582952149097, "learning_rate": 1.0819140308191404e-05, "loss": 0.2421, "step": 82740 }, { "epoch": 2.4159063371337646, "grad_norm": 0.5679794524429863, "learning_rate": 1.0816436874831036e-05, "loss": 0.2564, "step": 82745 }, { "epoch": 2.4160523204040816, "grad_norm": 0.6058023420579768, "learning_rate": 1.0813733441470668e-05, "loss": 0.241, "step": 82750 }, { "epoch": 2.416198303674399, "grad_norm": 0.5861886584436138, "learning_rate": 1.08110300081103e-05, "loss": 0.2558, "step": 82755 }, { "epoch": 2.416344286944716, "grad_norm": 0.6042208951783672, "learning_rate": 1.0808326574749933e-05, "loss": 0.251, "step": 82760 }, { "epoch": 2.4164902702150335, "grad_norm": 0.6174232697896239, "learning_rate": 1.0805623141389565e-05, "loss": 0.2568, "step": 82765 }, { "epoch": 2.4166362534853505, "grad_norm": 0.57625585699091, "learning_rate": 1.0802919708029198e-05, "loss": 0.2449, "step": 82770 }, { "epoch": 2.416782236755668, "grad_norm": 0.5715680493417198, "learning_rate": 1.080021627466883e-05, "loss": 0.2483, "step": 82775 }, { "epoch": 2.416928220025985, "grad_norm": 0.5726501040533241, "learning_rate": 1.0797512841308462e-05, "loss": 0.2472, "step": 82780 }, { "epoch": 2.4170742032963024, "grad_norm": 0.6095930585537107, "learning_rate": 1.0794809407948094e-05, "loss": 0.2502, "step": 82785 }, { "epoch": 2.4172201865666194, "grad_norm": 0.548250372800318, "learning_rate": 1.0792105974587727e-05, "loss": 0.2426, "step": 82790 }, { "epoch": 2.417366169836937, "grad_norm": 0.6198371986872825, "learning_rate": 1.0789402541227359e-05, "loss": 0.2531, "step": 82795 }, { "epoch": 2.417512153107254, "grad_norm": 0.5698398285664756, "learning_rate": 1.0786699107866991e-05, "loss": 0.2475, "step": 82800 }, { "epoch": 2.4176581363775713, "grad_norm": 0.5808558072961979, "learning_rate": 1.0783995674506624e-05, "loss": 0.2591, "step": 82805 }, { "epoch": 2.4178041196478883, "grad_norm": 0.5849623864572332, "learning_rate": 1.0781292241146258e-05, "loss": 0.2622, "step": 82810 }, { "epoch": 2.417950102918206, "grad_norm": 0.5655692765885366, "learning_rate": 1.0778588807785888e-05, "loss": 0.2454, "step": 82815 }, { "epoch": 2.418096086188523, "grad_norm": 0.624932707959607, "learning_rate": 1.077588537442552e-05, "loss": 0.2613, "step": 82820 }, { "epoch": 2.41824206945884, "grad_norm": 0.5630627359947132, "learning_rate": 1.0773181941065153e-05, "loss": 0.2421, "step": 82825 }, { "epoch": 2.4183880527291572, "grad_norm": 0.5369978143294903, "learning_rate": 1.0770478507704785e-05, "loss": 0.2529, "step": 82830 }, { "epoch": 2.4185340359994747, "grad_norm": 0.5485100058928332, "learning_rate": 1.0767775074344418e-05, "loss": 0.2538, "step": 82835 }, { "epoch": 2.4186800192697917, "grad_norm": 0.5813625421312842, "learning_rate": 1.076507164098405e-05, "loss": 0.2417, "step": 82840 }, { "epoch": 2.4188260025401087, "grad_norm": 0.5877772233826184, "learning_rate": 1.0762368207623682e-05, "loss": 0.2628, "step": 82845 }, { "epoch": 2.418971985810426, "grad_norm": 0.5767782463530111, "learning_rate": 1.0759664774263315e-05, "loss": 0.2443, "step": 82850 }, { "epoch": 2.419117969080743, "grad_norm": 0.5892031111143419, "learning_rate": 1.0756961340902947e-05, "loss": 0.2545, "step": 82855 }, { "epoch": 2.4192639523510606, "grad_norm": 0.5908708007932528, "learning_rate": 1.075425790754258e-05, "loss": 0.2413, "step": 82860 }, { "epoch": 2.4194099356213776, "grad_norm": 0.5757061261863204, "learning_rate": 1.0751554474182212e-05, "loss": 0.2546, "step": 82865 }, { "epoch": 2.419555918891695, "grad_norm": 0.5501128477079117, "learning_rate": 1.0748851040821844e-05, "loss": 0.2666, "step": 82870 }, { "epoch": 2.419701902162012, "grad_norm": 0.5695246686436704, "learning_rate": 1.0746147607461476e-05, "loss": 0.2544, "step": 82875 }, { "epoch": 2.4198478854323295, "grad_norm": 0.5599568087066054, "learning_rate": 1.0743444174101108e-05, "loss": 0.2516, "step": 82880 }, { "epoch": 2.4199938687026465, "grad_norm": 0.5503691856520904, "learning_rate": 1.074074074074074e-05, "loss": 0.2274, "step": 82885 }, { "epoch": 2.420139851972964, "grad_norm": 0.5764756686071555, "learning_rate": 1.0738037307380373e-05, "loss": 0.2549, "step": 82890 }, { "epoch": 2.420285835243281, "grad_norm": 0.5768680336217443, "learning_rate": 1.0735333874020007e-05, "loss": 0.2561, "step": 82895 }, { "epoch": 2.4204318185135985, "grad_norm": 0.5324166204913663, "learning_rate": 1.0732630440659638e-05, "loss": 0.2509, "step": 82900 }, { "epoch": 2.4205778017839155, "grad_norm": 0.5491085803676095, "learning_rate": 1.072992700729927e-05, "loss": 0.2584, "step": 82905 }, { "epoch": 2.420723785054233, "grad_norm": 0.5463327844718271, "learning_rate": 1.0727223573938904e-05, "loss": 0.2417, "step": 82910 }, { "epoch": 2.42086976832455, "grad_norm": 0.5665733052275453, "learning_rate": 1.0724520140578535e-05, "loss": 0.2309, "step": 82915 }, { "epoch": 2.4210157515948674, "grad_norm": 0.5112812022712686, "learning_rate": 1.0721816707218167e-05, "loss": 0.2214, "step": 82920 }, { "epoch": 2.4211617348651844, "grad_norm": 0.5762562948097536, "learning_rate": 1.0719113273857801e-05, "loss": 0.2546, "step": 82925 }, { "epoch": 2.421307718135502, "grad_norm": 0.5629420287336796, "learning_rate": 1.0716409840497432e-05, "loss": 0.2507, "step": 82930 }, { "epoch": 2.421453701405819, "grad_norm": 0.5664525365499841, "learning_rate": 1.0713706407137064e-05, "loss": 0.2543, "step": 82935 }, { "epoch": 2.4215996846761363, "grad_norm": 0.5602648457924042, "learning_rate": 1.0711002973776698e-05, "loss": 0.2549, "step": 82940 }, { "epoch": 2.4217456679464533, "grad_norm": 0.5283024618272962, "learning_rate": 1.0708299540416329e-05, "loss": 0.2337, "step": 82945 }, { "epoch": 2.4218916512167707, "grad_norm": 0.595211983603507, "learning_rate": 1.0705596107055961e-05, "loss": 0.2517, "step": 82950 }, { "epoch": 2.4220376344870878, "grad_norm": 0.5840688003118647, "learning_rate": 1.0702892673695595e-05, "loss": 0.2561, "step": 82955 }, { "epoch": 2.4221836177574048, "grad_norm": 0.6016941655200638, "learning_rate": 1.0700189240335226e-05, "loss": 0.2487, "step": 82960 }, { "epoch": 2.422329601027722, "grad_norm": 0.5951181165557816, "learning_rate": 1.0697485806974858e-05, "loss": 0.2552, "step": 82965 }, { "epoch": 2.4224755842980397, "grad_norm": 0.5612702443983668, "learning_rate": 1.0694782373614492e-05, "loss": 0.2446, "step": 82970 }, { "epoch": 2.4226215675683567, "grad_norm": 0.5727741835601116, "learning_rate": 1.0692078940254123e-05, "loss": 0.2555, "step": 82975 }, { "epoch": 2.4227675508386737, "grad_norm": 0.5534957566792029, "learning_rate": 1.0689375506893757e-05, "loss": 0.2512, "step": 82980 }, { "epoch": 2.422913534108991, "grad_norm": 0.5755197322729574, "learning_rate": 1.0686672073533389e-05, "loss": 0.2508, "step": 82985 }, { "epoch": 2.4230595173793086, "grad_norm": 0.577763280798947, "learning_rate": 1.068396864017302e-05, "loss": 0.2492, "step": 82990 }, { "epoch": 2.4232055006496256, "grad_norm": 0.5984801996121002, "learning_rate": 1.0681265206812653e-05, "loss": 0.2497, "step": 82995 }, { "epoch": 2.4233514839199426, "grad_norm": 0.619673893988449, "learning_rate": 1.0678561773452286e-05, "loss": 0.2506, "step": 83000 }, { "epoch": 2.42349746719026, "grad_norm": 0.5853622320511845, "learning_rate": 1.0675858340091916e-05, "loss": 0.2393, "step": 83005 }, { "epoch": 2.423643450460577, "grad_norm": 0.5986302062014763, "learning_rate": 1.067315490673155e-05, "loss": 0.2551, "step": 83010 }, { "epoch": 2.4237894337308945, "grad_norm": 0.5694157521086771, "learning_rate": 1.0670451473371181e-05, "loss": 0.2564, "step": 83015 }, { "epoch": 2.4239354170012115, "grad_norm": 0.6030843857498427, "learning_rate": 1.0667748040010813e-05, "loss": 0.2543, "step": 83020 }, { "epoch": 2.424081400271529, "grad_norm": 0.5305626475461492, "learning_rate": 1.0665044606650447e-05, "loss": 0.242, "step": 83025 }, { "epoch": 2.424227383541846, "grad_norm": 0.5323956454212835, "learning_rate": 1.0662341173290078e-05, "loss": 0.236, "step": 83030 }, { "epoch": 2.4243733668121634, "grad_norm": 0.5650859857768152, "learning_rate": 1.065963773992971e-05, "loss": 0.2485, "step": 83035 }, { "epoch": 2.4245193500824804, "grad_norm": 0.5511477061322855, "learning_rate": 1.0656934306569344e-05, "loss": 0.2455, "step": 83040 }, { "epoch": 2.424665333352798, "grad_norm": 0.5766840075309726, "learning_rate": 1.0654230873208975e-05, "loss": 0.2543, "step": 83045 }, { "epoch": 2.424811316623115, "grad_norm": 0.5650498307300728, "learning_rate": 1.0651527439848607e-05, "loss": 0.2538, "step": 83050 }, { "epoch": 2.4249572998934323, "grad_norm": 0.5820402606046945, "learning_rate": 1.0648824006488241e-05, "loss": 0.2604, "step": 83055 }, { "epoch": 2.4251032831637493, "grad_norm": 0.5672887192881353, "learning_rate": 1.0646120573127872e-05, "loss": 0.2507, "step": 83060 }, { "epoch": 2.425249266434067, "grad_norm": 0.5448240106641993, "learning_rate": 1.0643417139767506e-05, "loss": 0.2458, "step": 83065 }, { "epoch": 2.425395249704384, "grad_norm": 0.5737822647064702, "learning_rate": 1.0640713706407138e-05, "loss": 0.2547, "step": 83070 }, { "epoch": 2.4255412329747013, "grad_norm": 0.541891592556101, "learning_rate": 1.0638010273046769e-05, "loss": 0.247, "step": 83075 }, { "epoch": 2.4256872162450183, "grad_norm": 0.6019989839104831, "learning_rate": 1.0635306839686403e-05, "loss": 0.2546, "step": 83080 }, { "epoch": 2.4258331995153357, "grad_norm": 0.5759369859797896, "learning_rate": 1.0632603406326035e-05, "loss": 0.2425, "step": 83085 }, { "epoch": 2.4259791827856527, "grad_norm": 0.6072971277094072, "learning_rate": 1.0629899972965666e-05, "loss": 0.2578, "step": 83090 }, { "epoch": 2.42612516605597, "grad_norm": 0.6193198376856188, "learning_rate": 1.06271965396053e-05, "loss": 0.2522, "step": 83095 }, { "epoch": 2.426271149326287, "grad_norm": 0.5357586655154224, "learning_rate": 1.0624493106244932e-05, "loss": 0.2517, "step": 83100 }, { "epoch": 2.4264171325966046, "grad_norm": 0.5720519906474651, "learning_rate": 1.0621789672884563e-05, "loss": 0.2508, "step": 83105 }, { "epoch": 2.4265631158669216, "grad_norm": 0.5368772796431767, "learning_rate": 1.0619086239524197e-05, "loss": 0.267, "step": 83110 }, { "epoch": 2.4267090991372386, "grad_norm": 0.5701032200323366, "learning_rate": 1.0616382806163829e-05, "loss": 0.2551, "step": 83115 }, { "epoch": 2.426855082407556, "grad_norm": 0.5561021274630277, "learning_rate": 1.061367937280346e-05, "loss": 0.2663, "step": 83120 }, { "epoch": 2.4270010656778735, "grad_norm": 0.5593952455474845, "learning_rate": 1.0610975939443094e-05, "loss": 0.2367, "step": 83125 }, { "epoch": 2.4271470489481906, "grad_norm": 0.6042010933718548, "learning_rate": 1.0608272506082726e-05, "loss": 0.2468, "step": 83130 }, { "epoch": 2.4272930322185076, "grad_norm": 0.5702842867690383, "learning_rate": 1.0605569072722357e-05, "loss": 0.2435, "step": 83135 }, { "epoch": 2.427439015488825, "grad_norm": 0.6296042945950717, "learning_rate": 1.060286563936199e-05, "loss": 0.2745, "step": 83140 }, { "epoch": 2.427584998759142, "grad_norm": 0.5679118057229402, "learning_rate": 1.0600162206001623e-05, "loss": 0.2334, "step": 83145 }, { "epoch": 2.4277309820294595, "grad_norm": 0.5470042258102397, "learning_rate": 1.0597458772641255e-05, "loss": 0.2428, "step": 83150 }, { "epoch": 2.4278769652997765, "grad_norm": 0.6018982964883073, "learning_rate": 1.0594755339280888e-05, "loss": 0.2425, "step": 83155 }, { "epoch": 2.428022948570094, "grad_norm": 0.5630616044895347, "learning_rate": 1.059205190592052e-05, "loss": 0.254, "step": 83160 }, { "epoch": 2.428168931840411, "grad_norm": 0.6356620420007862, "learning_rate": 1.0589348472560152e-05, "loss": 0.272, "step": 83165 }, { "epoch": 2.4283149151107284, "grad_norm": 0.6100955479035026, "learning_rate": 1.0586645039199785e-05, "loss": 0.2518, "step": 83170 }, { "epoch": 2.4284608983810454, "grad_norm": 0.5931249632472538, "learning_rate": 1.0583941605839417e-05, "loss": 0.2525, "step": 83175 }, { "epoch": 2.428606881651363, "grad_norm": 0.6206206705300329, "learning_rate": 1.058123817247905e-05, "loss": 0.2391, "step": 83180 }, { "epoch": 2.42875286492168, "grad_norm": 0.6393265814581233, "learning_rate": 1.0578534739118682e-05, "loss": 0.2509, "step": 83185 }, { "epoch": 2.4288988481919973, "grad_norm": 0.5978690965443046, "learning_rate": 1.0575831305758314e-05, "loss": 0.2495, "step": 83190 }, { "epoch": 2.4290448314623143, "grad_norm": 0.614514451918608, "learning_rate": 1.0573127872397946e-05, "loss": 0.2415, "step": 83195 }, { "epoch": 2.4291908147326318, "grad_norm": 0.5586018412937588, "learning_rate": 1.0570424439037578e-05, "loss": 0.2458, "step": 83200 }, { "epoch": 2.4293367980029488, "grad_norm": 0.5364989868215136, "learning_rate": 1.056772100567721e-05, "loss": 0.2432, "step": 83205 }, { "epoch": 2.4294827812732662, "grad_norm": 0.6040015495623823, "learning_rate": 1.0565017572316843e-05, "loss": 0.262, "step": 83210 }, { "epoch": 2.4296287645435832, "grad_norm": 0.6286438794408773, "learning_rate": 1.0562314138956475e-05, "loss": 0.2533, "step": 83215 }, { "epoch": 2.4297747478139007, "grad_norm": 0.5565272090126152, "learning_rate": 1.0559610705596106e-05, "loss": 0.2638, "step": 83220 }, { "epoch": 2.4299207310842177, "grad_norm": 0.5617342449315009, "learning_rate": 1.055690727223574e-05, "loss": 0.2469, "step": 83225 }, { "epoch": 2.430066714354535, "grad_norm": 0.5732856314610001, "learning_rate": 1.0554203838875372e-05, "loss": 0.2556, "step": 83230 }, { "epoch": 2.430212697624852, "grad_norm": 0.5773699204951417, "learning_rate": 1.0551500405515005e-05, "loss": 0.2515, "step": 83235 }, { "epoch": 2.4303586808951696, "grad_norm": 0.6092192898462944, "learning_rate": 1.0548796972154637e-05, "loss": 0.2543, "step": 83240 }, { "epoch": 2.4305046641654866, "grad_norm": 0.5429341613826184, "learning_rate": 1.054609353879427e-05, "loss": 0.2366, "step": 83245 }, { "epoch": 2.4306506474358036, "grad_norm": 0.6005008846279224, "learning_rate": 1.0543390105433902e-05, "loss": 0.2597, "step": 83250 }, { "epoch": 2.430796630706121, "grad_norm": 0.5715400649792984, "learning_rate": 1.0540686672073534e-05, "loss": 0.2428, "step": 83255 }, { "epoch": 2.4309426139764385, "grad_norm": 0.6046345610753066, "learning_rate": 1.0537983238713166e-05, "loss": 0.2364, "step": 83260 }, { "epoch": 2.4310885972467555, "grad_norm": 0.5832426444833837, "learning_rate": 1.0535279805352799e-05, "loss": 0.2524, "step": 83265 }, { "epoch": 2.4312345805170725, "grad_norm": 0.5826623962407018, "learning_rate": 1.0532576371992431e-05, "loss": 0.2465, "step": 83270 }, { "epoch": 2.43138056378739, "grad_norm": 0.6620177064657232, "learning_rate": 1.0529872938632063e-05, "loss": 0.2521, "step": 83275 }, { "epoch": 2.4315265470577074, "grad_norm": 0.6189581116818579, "learning_rate": 1.0527169505271696e-05, "loss": 0.2594, "step": 83280 }, { "epoch": 2.4316725303280244, "grad_norm": 0.5680354504929492, "learning_rate": 1.0524466071911328e-05, "loss": 0.2543, "step": 83285 }, { "epoch": 2.4318185135983414, "grad_norm": 0.6247670617125007, "learning_rate": 1.052176263855096e-05, "loss": 0.2642, "step": 83290 }, { "epoch": 2.431964496868659, "grad_norm": 0.6039779704481807, "learning_rate": 1.0519059205190593e-05, "loss": 0.2654, "step": 83295 }, { "epoch": 2.432110480138976, "grad_norm": 0.5664712445833825, "learning_rate": 1.0516355771830225e-05, "loss": 0.2685, "step": 83300 }, { "epoch": 2.4322564634092934, "grad_norm": 0.5133888405816904, "learning_rate": 1.0513652338469857e-05, "loss": 0.2391, "step": 83305 }, { "epoch": 2.4324024466796104, "grad_norm": 0.6057717090199566, "learning_rate": 1.051094890510949e-05, "loss": 0.2684, "step": 83310 }, { "epoch": 2.432548429949928, "grad_norm": 0.5817586427360882, "learning_rate": 1.0508245471749122e-05, "loss": 0.248, "step": 83315 }, { "epoch": 2.432694413220245, "grad_norm": 0.55093908362818, "learning_rate": 1.0505542038388754e-05, "loss": 0.2483, "step": 83320 }, { "epoch": 2.4328403964905623, "grad_norm": 0.5366689784340161, "learning_rate": 1.0502838605028386e-05, "loss": 0.26, "step": 83325 }, { "epoch": 2.4329863797608793, "grad_norm": 0.5885993957547261, "learning_rate": 1.0500135171668019e-05, "loss": 0.2387, "step": 83330 }, { "epoch": 2.4331323630311967, "grad_norm": 0.5835092756450415, "learning_rate": 1.0497431738307651e-05, "loss": 0.2473, "step": 83335 }, { "epoch": 2.4332783463015137, "grad_norm": 0.5633141635301182, "learning_rate": 1.0494728304947283e-05, "loss": 0.2469, "step": 83340 }, { "epoch": 2.433424329571831, "grad_norm": 0.5433755223419385, "learning_rate": 1.0492024871586916e-05, "loss": 0.2503, "step": 83345 }, { "epoch": 2.433570312842148, "grad_norm": 0.6188401207912759, "learning_rate": 1.0489321438226548e-05, "loss": 0.2521, "step": 83350 }, { "epoch": 2.4337162961124656, "grad_norm": 0.5582619566584807, "learning_rate": 1.048661800486618e-05, "loss": 0.2472, "step": 83355 }, { "epoch": 2.4338622793827827, "grad_norm": 0.5776264768132932, "learning_rate": 1.0483914571505813e-05, "loss": 0.2432, "step": 83360 }, { "epoch": 2.4340082626531, "grad_norm": 0.610182109956325, "learning_rate": 1.0481211138145445e-05, "loss": 0.2461, "step": 83365 }, { "epoch": 2.434154245923417, "grad_norm": 0.5965580023996087, "learning_rate": 1.0478507704785077e-05, "loss": 0.2354, "step": 83370 }, { "epoch": 2.4343002291937346, "grad_norm": 0.6180030789601402, "learning_rate": 1.047580427142471e-05, "loss": 0.2435, "step": 83375 }, { "epoch": 2.4344462124640516, "grad_norm": 0.6275465497642585, "learning_rate": 1.0473100838064342e-05, "loss": 0.2485, "step": 83380 }, { "epoch": 2.434592195734369, "grad_norm": 0.5850177705769068, "learning_rate": 1.0470397404703974e-05, "loss": 0.259, "step": 83385 }, { "epoch": 2.434738179004686, "grad_norm": 0.5701697501135055, "learning_rate": 1.0467693971343608e-05, "loss": 0.2564, "step": 83390 }, { "epoch": 2.4348841622750035, "grad_norm": 0.6053317320483756, "learning_rate": 1.0464990537983239e-05, "loss": 0.2583, "step": 83395 }, { "epoch": 2.4350301455453205, "grad_norm": 0.5916121956767911, "learning_rate": 1.0462287104622871e-05, "loss": 0.2363, "step": 83400 }, { "epoch": 2.4351761288156375, "grad_norm": 0.6113286016121964, "learning_rate": 1.0459583671262505e-05, "loss": 0.2514, "step": 83405 }, { "epoch": 2.435322112085955, "grad_norm": 0.6122346009295716, "learning_rate": 1.0456880237902136e-05, "loss": 0.2553, "step": 83410 }, { "epoch": 2.4354680953562724, "grad_norm": 0.560849182960542, "learning_rate": 1.0454176804541768e-05, "loss": 0.2417, "step": 83415 }, { "epoch": 2.4356140786265894, "grad_norm": 0.6258415334179664, "learning_rate": 1.0451473371181402e-05, "loss": 0.2795, "step": 83420 }, { "epoch": 2.4357600618969064, "grad_norm": 0.5973595594670412, "learning_rate": 1.0448769937821033e-05, "loss": 0.2504, "step": 83425 }, { "epoch": 2.435906045167224, "grad_norm": 0.5625987968826536, "learning_rate": 1.0446066504460665e-05, "loss": 0.2499, "step": 83430 }, { "epoch": 2.436052028437541, "grad_norm": 0.6445310796700998, "learning_rate": 1.0443363071100299e-05, "loss": 0.2581, "step": 83435 }, { "epoch": 2.4361980117078583, "grad_norm": 0.5981250674840407, "learning_rate": 1.044065963773993e-05, "loss": 0.2623, "step": 83440 }, { "epoch": 2.4363439949781753, "grad_norm": 0.5845291079724199, "learning_rate": 1.0437956204379562e-05, "loss": 0.2575, "step": 83445 }, { "epoch": 2.436489978248493, "grad_norm": 0.5806213639903285, "learning_rate": 1.0435252771019196e-05, "loss": 0.2513, "step": 83450 }, { "epoch": 2.43663596151881, "grad_norm": 0.5419227775938812, "learning_rate": 1.0432549337658827e-05, "loss": 0.2517, "step": 83455 }, { "epoch": 2.4367819447891272, "grad_norm": 0.6286801039484824, "learning_rate": 1.0429845904298459e-05, "loss": 0.2632, "step": 83460 }, { "epoch": 2.4369279280594442, "grad_norm": 0.6372107053479564, "learning_rate": 1.0427142470938093e-05, "loss": 0.2489, "step": 83465 }, { "epoch": 2.4370739113297617, "grad_norm": 0.6159703039530459, "learning_rate": 1.0424439037577724e-05, "loss": 0.2586, "step": 83470 }, { "epoch": 2.4372198946000787, "grad_norm": 0.5762538752531978, "learning_rate": 1.0421735604217358e-05, "loss": 0.2439, "step": 83475 }, { "epoch": 2.437365877870396, "grad_norm": 0.6079750211875519, "learning_rate": 1.0419032170856988e-05, "loss": 0.2646, "step": 83480 }, { "epoch": 2.437511861140713, "grad_norm": 0.5720531904777195, "learning_rate": 1.041632873749662e-05, "loss": 0.2342, "step": 83485 }, { "epoch": 2.4376578444110306, "grad_norm": 0.5917511141725474, "learning_rate": 1.0413625304136255e-05, "loss": 0.2514, "step": 83490 }, { "epoch": 2.4378038276813476, "grad_norm": 0.5844760382169009, "learning_rate": 1.0410921870775885e-05, "loss": 0.2646, "step": 83495 }, { "epoch": 2.437949810951665, "grad_norm": 0.564528007563103, "learning_rate": 1.0408218437415518e-05, "loss": 0.243, "step": 83500 }, { "epoch": 2.438095794221982, "grad_norm": 0.5860817331839868, "learning_rate": 1.0405515004055152e-05, "loss": 0.2672, "step": 83505 }, { "epoch": 2.4382417774922995, "grad_norm": 0.5934011969670453, "learning_rate": 1.0402811570694782e-05, "loss": 0.255, "step": 83510 }, { "epoch": 2.4383877607626165, "grad_norm": 0.5655941559461021, "learning_rate": 1.0400108137334414e-05, "loss": 0.2426, "step": 83515 }, { "epoch": 2.438533744032934, "grad_norm": 0.609778273825321, "learning_rate": 1.0397404703974048e-05, "loss": 0.2484, "step": 83520 }, { "epoch": 2.438679727303251, "grad_norm": 0.5624227065362455, "learning_rate": 1.0394701270613679e-05, "loss": 0.2352, "step": 83525 }, { "epoch": 2.4388257105735685, "grad_norm": 0.6575034452837276, "learning_rate": 1.0391997837253311e-05, "loss": 0.2654, "step": 83530 }, { "epoch": 2.4389716938438855, "grad_norm": 0.6107199875381196, "learning_rate": 1.0389294403892945e-05, "loss": 0.2702, "step": 83535 }, { "epoch": 2.4391176771142025, "grad_norm": 0.5703069431893989, "learning_rate": 1.0386590970532576e-05, "loss": 0.2488, "step": 83540 }, { "epoch": 2.43926366038452, "grad_norm": 0.5899254748431596, "learning_rate": 1.0383887537172208e-05, "loss": 0.2438, "step": 83545 }, { "epoch": 2.4394096436548374, "grad_norm": 0.560770110302133, "learning_rate": 1.0381184103811842e-05, "loss": 0.2528, "step": 83550 }, { "epoch": 2.4395556269251544, "grad_norm": 0.629296322572276, "learning_rate": 1.0378480670451473e-05, "loss": 0.2559, "step": 83555 }, { "epoch": 2.4397016101954714, "grad_norm": 0.6180894669053598, "learning_rate": 1.0375777237091107e-05, "loss": 0.2453, "step": 83560 }, { "epoch": 2.439847593465789, "grad_norm": 0.6151350340753581, "learning_rate": 1.037307380373074e-05, "loss": 0.2556, "step": 83565 }, { "epoch": 2.4399935767361063, "grad_norm": 0.6012523038805532, "learning_rate": 1.037037037037037e-05, "loss": 0.2448, "step": 83570 }, { "epoch": 2.4401395600064233, "grad_norm": 0.5529794892006981, "learning_rate": 1.0367666937010004e-05, "loss": 0.2483, "step": 83575 }, { "epoch": 2.4402855432767403, "grad_norm": 0.6380678723618457, "learning_rate": 1.0364963503649636e-05, "loss": 0.249, "step": 83580 }, { "epoch": 2.4404315265470578, "grad_norm": 0.5983278957820004, "learning_rate": 1.0362260070289267e-05, "loss": 0.2592, "step": 83585 }, { "epoch": 2.4405775098173748, "grad_norm": 0.5188231196384083, "learning_rate": 1.0359556636928901e-05, "loss": 0.2489, "step": 83590 }, { "epoch": 2.440723493087692, "grad_norm": 0.5521878469103034, "learning_rate": 1.0356853203568533e-05, "loss": 0.2493, "step": 83595 }, { "epoch": 2.440869476358009, "grad_norm": 0.5385981707102143, "learning_rate": 1.0354149770208164e-05, "loss": 0.2473, "step": 83600 }, { "epoch": 2.4410154596283267, "grad_norm": 0.6262580000033142, "learning_rate": 1.0351446336847798e-05, "loss": 0.2592, "step": 83605 }, { "epoch": 2.4411614428986437, "grad_norm": 0.5862647335294048, "learning_rate": 1.034874290348743e-05, "loss": 0.24, "step": 83610 }, { "epoch": 2.441307426168961, "grad_norm": 0.5834814383504003, "learning_rate": 1.034603947012706e-05, "loss": 0.2494, "step": 83615 }, { "epoch": 2.441453409439278, "grad_norm": 0.5821025132638195, "learning_rate": 1.0343336036766695e-05, "loss": 0.2439, "step": 83620 }, { "epoch": 2.4415993927095956, "grad_norm": 0.6079349490312439, "learning_rate": 1.0340632603406327e-05, "loss": 0.2708, "step": 83625 }, { "epoch": 2.4417453759799126, "grad_norm": 0.5791980786672162, "learning_rate": 1.0337929170045958e-05, "loss": 0.2535, "step": 83630 }, { "epoch": 2.44189135925023, "grad_norm": 0.6125261796968076, "learning_rate": 1.0335225736685592e-05, "loss": 0.2464, "step": 83635 }, { "epoch": 2.442037342520547, "grad_norm": 0.6356239905118607, "learning_rate": 1.0332522303325224e-05, "loss": 0.2544, "step": 83640 }, { "epoch": 2.4421833257908645, "grad_norm": 0.5485291291161581, "learning_rate": 1.0329818869964856e-05, "loss": 0.2505, "step": 83645 }, { "epoch": 2.4423293090611815, "grad_norm": 0.5544518171576298, "learning_rate": 1.0327115436604489e-05, "loss": 0.2541, "step": 83650 }, { "epoch": 2.442475292331499, "grad_norm": 0.6346919117663293, "learning_rate": 1.0324412003244121e-05, "loss": 0.2533, "step": 83655 }, { "epoch": 2.442621275601816, "grad_norm": 0.5703345902012752, "learning_rate": 1.0321708569883753e-05, "loss": 0.2386, "step": 83660 }, { "epoch": 2.4427672588721334, "grad_norm": 0.5711105168527854, "learning_rate": 1.0319005136523386e-05, "loss": 0.2638, "step": 83665 }, { "epoch": 2.4429132421424504, "grad_norm": 0.6132505197613971, "learning_rate": 1.0316301703163016e-05, "loss": 0.248, "step": 83670 }, { "epoch": 2.443059225412768, "grad_norm": 0.5862720635802285, "learning_rate": 1.031359826980265e-05, "loss": 0.2473, "step": 83675 }, { "epoch": 2.443205208683085, "grad_norm": 0.5494593471365551, "learning_rate": 1.0310894836442283e-05, "loss": 0.2565, "step": 83680 }, { "epoch": 2.4433511919534023, "grad_norm": 0.5664595923835049, "learning_rate": 1.0308191403081913e-05, "loss": 0.2517, "step": 83685 }, { "epoch": 2.4434971752237193, "grad_norm": 0.5791028701884189, "learning_rate": 1.0305487969721547e-05, "loss": 0.2579, "step": 83690 }, { "epoch": 2.4436431584940363, "grad_norm": 0.565765926353986, "learning_rate": 1.030278453636118e-05, "loss": 0.2475, "step": 83695 }, { "epoch": 2.443789141764354, "grad_norm": 0.5793665021481872, "learning_rate": 1.030008110300081e-05, "loss": 0.264, "step": 83700 }, { "epoch": 2.4439351250346713, "grad_norm": 0.6076944391324227, "learning_rate": 1.0297377669640444e-05, "loss": 0.2578, "step": 83705 }, { "epoch": 2.4440811083049883, "grad_norm": 0.5313903729172721, "learning_rate": 1.0294674236280077e-05, "loss": 0.2361, "step": 83710 }, { "epoch": 2.4442270915753053, "grad_norm": 0.5512712755141507, "learning_rate": 1.0291970802919707e-05, "loss": 0.232, "step": 83715 }, { "epoch": 2.4443730748456227, "grad_norm": 0.5605932569530582, "learning_rate": 1.0289267369559341e-05, "loss": 0.2589, "step": 83720 }, { "epoch": 2.4445190581159397, "grad_norm": 0.6039652375235953, "learning_rate": 1.0286563936198973e-05, "loss": 0.2498, "step": 83725 }, { "epoch": 2.444665041386257, "grad_norm": 0.596370988667233, "learning_rate": 1.0283860502838606e-05, "loss": 0.2439, "step": 83730 }, { "epoch": 2.444811024656574, "grad_norm": 0.6473780869175352, "learning_rate": 1.0281157069478238e-05, "loss": 0.26, "step": 83735 }, { "epoch": 2.4449570079268916, "grad_norm": 0.6016124309415061, "learning_rate": 1.027845363611787e-05, "loss": 0.2388, "step": 83740 }, { "epoch": 2.4451029911972086, "grad_norm": 0.605642720056627, "learning_rate": 1.0275750202757503e-05, "loss": 0.2509, "step": 83745 }, { "epoch": 2.445248974467526, "grad_norm": 0.5836516685312857, "learning_rate": 1.0273046769397135e-05, "loss": 0.2562, "step": 83750 }, { "epoch": 2.445394957737843, "grad_norm": 0.6203511217570628, "learning_rate": 1.0270343336036767e-05, "loss": 0.2495, "step": 83755 }, { "epoch": 2.4455409410081606, "grad_norm": 0.5923023888252503, "learning_rate": 1.02676399026764e-05, "loss": 0.2451, "step": 83760 }, { "epoch": 2.4456869242784776, "grad_norm": 0.5725692438409432, "learning_rate": 1.0264936469316032e-05, "loss": 0.2542, "step": 83765 }, { "epoch": 2.445832907548795, "grad_norm": 0.5779565455462354, "learning_rate": 1.0262233035955664e-05, "loss": 0.2379, "step": 83770 }, { "epoch": 2.445978890819112, "grad_norm": 0.6344334835797951, "learning_rate": 1.0259529602595297e-05, "loss": 0.2396, "step": 83775 }, { "epoch": 2.4461248740894295, "grad_norm": 0.5749471262926764, "learning_rate": 1.0256826169234929e-05, "loss": 0.2414, "step": 83780 }, { "epoch": 2.4462708573597465, "grad_norm": 0.561116418788489, "learning_rate": 1.0254122735874561e-05, "loss": 0.2471, "step": 83785 }, { "epoch": 2.446416840630064, "grad_norm": 0.6167551744138483, "learning_rate": 1.0251419302514194e-05, "loss": 0.2486, "step": 83790 }, { "epoch": 2.446562823900381, "grad_norm": 0.5920582393362176, "learning_rate": 1.0248715869153826e-05, "loss": 0.236, "step": 83795 }, { "epoch": 2.4467088071706984, "grad_norm": 0.5358055957005511, "learning_rate": 1.0246012435793458e-05, "loss": 0.2355, "step": 83800 }, { "epoch": 2.4468547904410154, "grad_norm": 0.5914954144880818, "learning_rate": 1.024330900243309e-05, "loss": 0.2501, "step": 83805 }, { "epoch": 2.447000773711333, "grad_norm": 0.5641964898222949, "learning_rate": 1.0240605569072723e-05, "loss": 0.2457, "step": 83810 }, { "epoch": 2.44714675698165, "grad_norm": 0.5624281270558563, "learning_rate": 1.0237902135712355e-05, "loss": 0.2425, "step": 83815 }, { "epoch": 2.4472927402519673, "grad_norm": 0.6206365826312551, "learning_rate": 1.0235198702351988e-05, "loss": 0.2548, "step": 83820 }, { "epoch": 2.4474387235222843, "grad_norm": 0.5286746689119877, "learning_rate": 1.023249526899162e-05, "loss": 0.2323, "step": 83825 }, { "epoch": 2.4475847067926018, "grad_norm": 0.5613138488713828, "learning_rate": 1.0229791835631252e-05, "loss": 0.254, "step": 83830 }, { "epoch": 2.4477306900629188, "grad_norm": 0.569486635334893, "learning_rate": 1.0227088402270884e-05, "loss": 0.2372, "step": 83835 }, { "epoch": 2.447876673333236, "grad_norm": 0.5593077615066014, "learning_rate": 1.0224384968910517e-05, "loss": 0.2351, "step": 83840 }, { "epoch": 2.4480226566035532, "grad_norm": 0.5827667046504588, "learning_rate": 1.0221681535550149e-05, "loss": 0.2476, "step": 83845 }, { "epoch": 2.4481686398738702, "grad_norm": 0.5941561701215745, "learning_rate": 1.0218978102189781e-05, "loss": 0.2434, "step": 83850 }, { "epoch": 2.4483146231441877, "grad_norm": 0.5767429029723837, "learning_rate": 1.0216274668829414e-05, "loss": 0.2488, "step": 83855 }, { "epoch": 2.448460606414505, "grad_norm": 0.583667152390715, "learning_rate": 1.0213571235469046e-05, "loss": 0.2459, "step": 83860 }, { "epoch": 2.448606589684822, "grad_norm": 0.5622691695975983, "learning_rate": 1.0210867802108678e-05, "loss": 0.2519, "step": 83865 }, { "epoch": 2.448752572955139, "grad_norm": 0.6535431026968601, "learning_rate": 1.020816436874831e-05, "loss": 0.2611, "step": 83870 }, { "epoch": 2.4488985562254566, "grad_norm": 0.6075567755234558, "learning_rate": 1.0205460935387943e-05, "loss": 0.2559, "step": 83875 }, { "epoch": 2.4490445394957736, "grad_norm": 0.6085199539760672, "learning_rate": 1.0202757502027575e-05, "loss": 0.2602, "step": 83880 }, { "epoch": 2.449190522766091, "grad_norm": 0.6011496001386043, "learning_rate": 1.0200054068667208e-05, "loss": 0.2548, "step": 83885 }, { "epoch": 2.449336506036408, "grad_norm": 0.6149608560170103, "learning_rate": 1.019735063530684e-05, "loss": 0.256, "step": 83890 }, { "epoch": 2.4494824893067255, "grad_norm": 0.5638926790453384, "learning_rate": 1.0194647201946472e-05, "loss": 0.2471, "step": 83895 }, { "epoch": 2.4496284725770425, "grad_norm": 0.5785822213009855, "learning_rate": 1.0191943768586106e-05, "loss": 0.2451, "step": 83900 }, { "epoch": 2.44977445584736, "grad_norm": 0.6142943901234419, "learning_rate": 1.0189240335225737e-05, "loss": 0.2681, "step": 83905 }, { "epoch": 2.449920439117677, "grad_norm": 0.5885825025082432, "learning_rate": 1.018653690186537e-05, "loss": 0.2498, "step": 83910 }, { "epoch": 2.4500664223879944, "grad_norm": 0.5514074519402827, "learning_rate": 1.0183833468505003e-05, "loss": 0.2409, "step": 83915 }, { "epoch": 2.4502124056583114, "grad_norm": 0.5927129141231673, "learning_rate": 1.0181130035144634e-05, "loss": 0.2505, "step": 83920 }, { "epoch": 2.450358388928629, "grad_norm": 0.5808509650064007, "learning_rate": 1.0178426601784266e-05, "loss": 0.2581, "step": 83925 }, { "epoch": 2.450504372198946, "grad_norm": 0.588232096310618, "learning_rate": 1.0175723168423899e-05, "loss": 0.245, "step": 83930 }, { "epoch": 2.4506503554692634, "grad_norm": 0.6412304385602108, "learning_rate": 1.017301973506353e-05, "loss": 0.2539, "step": 83935 }, { "epoch": 2.4507963387395804, "grad_norm": 0.554762672800691, "learning_rate": 1.0170316301703163e-05, "loss": 0.2446, "step": 83940 }, { "epoch": 2.450942322009898, "grad_norm": 0.642018096929703, "learning_rate": 1.0167612868342795e-05, "loss": 0.2643, "step": 83945 }, { "epoch": 2.451088305280215, "grad_norm": 0.5746926011403661, "learning_rate": 1.0164909434982428e-05, "loss": 0.25, "step": 83950 }, { "epoch": 2.4512342885505323, "grad_norm": 0.5821667451153948, "learning_rate": 1.016220600162206e-05, "loss": 0.248, "step": 83955 }, { "epoch": 2.4513802718208493, "grad_norm": 0.5864305139158569, "learning_rate": 1.0159502568261692e-05, "loss": 0.2621, "step": 83960 }, { "epoch": 2.4515262550911667, "grad_norm": 0.6414823945828593, "learning_rate": 1.0156799134901325e-05, "loss": 0.251, "step": 83965 }, { "epoch": 2.4516722383614837, "grad_norm": 0.5954432837567049, "learning_rate": 1.0154095701540957e-05, "loss": 0.257, "step": 83970 }, { "epoch": 2.451818221631801, "grad_norm": 0.5891548608574254, "learning_rate": 1.015139226818059e-05, "loss": 0.2567, "step": 83975 }, { "epoch": 2.451964204902118, "grad_norm": 0.5622401340938082, "learning_rate": 1.0148688834820222e-05, "loss": 0.2425, "step": 83980 }, { "epoch": 2.452110188172435, "grad_norm": 0.609323830246829, "learning_rate": 1.0145985401459856e-05, "loss": 0.2525, "step": 83985 }, { "epoch": 2.4522561714427527, "grad_norm": 0.6002141159750326, "learning_rate": 1.0143281968099486e-05, "loss": 0.2478, "step": 83990 }, { "epoch": 2.45240215471307, "grad_norm": 0.5592109489648047, "learning_rate": 1.0140578534739119e-05, "loss": 0.2375, "step": 83995 }, { "epoch": 2.452548137983387, "grad_norm": 0.6153366790013833, "learning_rate": 1.0137875101378753e-05, "loss": 0.2677, "step": 84000 }, { "epoch": 2.452694121253704, "grad_norm": 0.5603772193939983, "learning_rate": 1.0135171668018383e-05, "loss": 0.2453, "step": 84005 }, { "epoch": 2.4528401045240216, "grad_norm": 0.5570470185570414, "learning_rate": 1.0132468234658016e-05, "loss": 0.2434, "step": 84010 }, { "epoch": 2.4529860877943386, "grad_norm": 0.5419234945183172, "learning_rate": 1.012976480129765e-05, "loss": 0.238, "step": 84015 }, { "epoch": 2.453132071064656, "grad_norm": 0.5332393460323841, "learning_rate": 1.012706136793728e-05, "loss": 0.2615, "step": 84020 }, { "epoch": 2.453278054334973, "grad_norm": 0.5747075280351048, "learning_rate": 1.0124357934576913e-05, "loss": 0.2609, "step": 84025 }, { "epoch": 2.4534240376052905, "grad_norm": 0.5589673602684508, "learning_rate": 1.0121654501216547e-05, "loss": 0.2573, "step": 84030 }, { "epoch": 2.4535700208756075, "grad_norm": 0.5926446063337416, "learning_rate": 1.0118951067856177e-05, "loss": 0.2596, "step": 84035 }, { "epoch": 2.453716004145925, "grad_norm": 0.598777560974229, "learning_rate": 1.011624763449581e-05, "loss": 0.2498, "step": 84040 }, { "epoch": 2.453861987416242, "grad_norm": 0.5769521804432534, "learning_rate": 1.0113544201135443e-05, "loss": 0.2414, "step": 84045 }, { "epoch": 2.4540079706865594, "grad_norm": 0.632124516391089, "learning_rate": 1.0110840767775074e-05, "loss": 0.2446, "step": 84050 }, { "epoch": 2.4541539539568764, "grad_norm": 0.5367423188228102, "learning_rate": 1.0108137334414706e-05, "loss": 0.2474, "step": 84055 }, { "epoch": 2.454299937227194, "grad_norm": 0.6077788201085863, "learning_rate": 1.010543390105434e-05, "loss": 0.2529, "step": 84060 }, { "epoch": 2.454445920497511, "grad_norm": 0.5391111808563025, "learning_rate": 1.0102730467693971e-05, "loss": 0.2525, "step": 84065 }, { "epoch": 2.4545919037678283, "grad_norm": 0.609792296911237, "learning_rate": 1.0100027034333605e-05, "loss": 0.2586, "step": 84070 }, { "epoch": 2.4547378870381453, "grad_norm": 0.565249012212194, "learning_rate": 1.0097323600973237e-05, "loss": 0.2507, "step": 84075 }, { "epoch": 2.454883870308463, "grad_norm": 0.5483728280871373, "learning_rate": 1.0094620167612868e-05, "loss": 0.2253, "step": 84080 }, { "epoch": 2.45502985357878, "grad_norm": 0.6369800815697922, "learning_rate": 1.0091916734252502e-05, "loss": 0.2414, "step": 84085 }, { "epoch": 2.4551758368490972, "grad_norm": 0.6136539173595064, "learning_rate": 1.0089213300892134e-05, "loss": 0.2533, "step": 84090 }, { "epoch": 2.4553218201194142, "grad_norm": 0.5906632752464644, "learning_rate": 1.0086509867531765e-05, "loss": 0.2502, "step": 84095 }, { "epoch": 2.4554678033897317, "grad_norm": 0.5870822295328082, "learning_rate": 1.0083806434171399e-05, "loss": 0.284, "step": 84100 }, { "epoch": 2.4556137866600487, "grad_norm": 0.5921983373761442, "learning_rate": 1.0081103000811031e-05, "loss": 0.2548, "step": 84105 }, { "epoch": 2.455759769930366, "grad_norm": 0.5508905027997852, "learning_rate": 1.0078399567450662e-05, "loss": 0.2371, "step": 84110 }, { "epoch": 2.455905753200683, "grad_norm": 0.5516523493097085, "learning_rate": 1.0075696134090296e-05, "loss": 0.249, "step": 84115 }, { "epoch": 2.4560517364710006, "grad_norm": 0.5851671898015737, "learning_rate": 1.0072992700729927e-05, "loss": 0.2388, "step": 84120 }, { "epoch": 2.4561977197413176, "grad_norm": 0.639922146039645, "learning_rate": 1.0070289267369559e-05, "loss": 0.2427, "step": 84125 }, { "epoch": 2.456343703011635, "grad_norm": 0.5655020868970796, "learning_rate": 1.0067585834009193e-05, "loss": 0.2572, "step": 84130 }, { "epoch": 2.456489686281952, "grad_norm": 0.5662085741171663, "learning_rate": 1.0064882400648824e-05, "loss": 0.2626, "step": 84135 }, { "epoch": 2.456635669552269, "grad_norm": 0.6249160750809404, "learning_rate": 1.0062178967288456e-05, "loss": 0.2615, "step": 84140 }, { "epoch": 2.4567816528225865, "grad_norm": 0.591864081797554, "learning_rate": 1.005947553392809e-05, "loss": 0.2523, "step": 84145 }, { "epoch": 2.456927636092904, "grad_norm": 0.5889067986462818, "learning_rate": 1.005677210056772e-05, "loss": 0.253, "step": 84150 }, { "epoch": 2.457073619363221, "grad_norm": 0.5937468211931171, "learning_rate": 1.0054068667207354e-05, "loss": 0.2534, "step": 84155 }, { "epoch": 2.457219602633538, "grad_norm": 0.5431847438587354, "learning_rate": 1.0051365233846987e-05, "loss": 0.2269, "step": 84160 }, { "epoch": 2.4573655859038555, "grad_norm": 0.6061904591894275, "learning_rate": 1.0048661800486617e-05, "loss": 0.2565, "step": 84165 }, { "epoch": 2.4575115691741725, "grad_norm": 0.5914690250484217, "learning_rate": 1.0045958367126251e-05, "loss": 0.2481, "step": 84170 }, { "epoch": 2.45765755244449, "grad_norm": 0.5845171722044852, "learning_rate": 1.0043254933765884e-05, "loss": 0.258, "step": 84175 }, { "epoch": 2.457803535714807, "grad_norm": 0.5689642314470387, "learning_rate": 1.0040551500405514e-05, "loss": 0.2509, "step": 84180 }, { "epoch": 2.4579495189851244, "grad_norm": 0.5576720871610943, "learning_rate": 1.0037848067045148e-05, "loss": 0.2473, "step": 84185 }, { "epoch": 2.4580955022554414, "grad_norm": 0.6264599853441949, "learning_rate": 1.003514463368478e-05, "loss": 0.2473, "step": 84190 }, { "epoch": 2.458241485525759, "grad_norm": 0.6018563955413591, "learning_rate": 1.0032441200324411e-05, "loss": 0.2671, "step": 84195 }, { "epoch": 2.458387468796076, "grad_norm": 0.5616459769065809, "learning_rate": 1.0029737766964045e-05, "loss": 0.2512, "step": 84200 }, { "epoch": 2.4585334520663933, "grad_norm": 0.5797716269486904, "learning_rate": 1.0027034333603678e-05, "loss": 0.2527, "step": 84205 }, { "epoch": 2.4586794353367103, "grad_norm": 0.6031958065539399, "learning_rate": 1.0024330900243308e-05, "loss": 0.2594, "step": 84210 }, { "epoch": 2.4588254186070277, "grad_norm": 0.535591217665518, "learning_rate": 1.0021627466882942e-05, "loss": 0.2595, "step": 84215 }, { "epoch": 2.4589714018773448, "grad_norm": 0.5425893958273464, "learning_rate": 1.0018924033522575e-05, "loss": 0.242, "step": 84220 }, { "epoch": 2.459117385147662, "grad_norm": 0.5691027826971823, "learning_rate": 1.0016220600162205e-05, "loss": 0.2548, "step": 84225 }, { "epoch": 2.459263368417979, "grad_norm": 0.5886793047014383, "learning_rate": 1.001351716680184e-05, "loss": 0.2609, "step": 84230 }, { "epoch": 2.4594093516882967, "grad_norm": 0.5900931452379584, "learning_rate": 1.0010813733441472e-05, "loss": 0.2491, "step": 84235 }, { "epoch": 2.4595553349586137, "grad_norm": 0.5356293186534915, "learning_rate": 1.0008110300081104e-05, "loss": 0.2418, "step": 84240 }, { "epoch": 2.459701318228931, "grad_norm": 0.571814834320596, "learning_rate": 1.0005406866720736e-05, "loss": 0.2536, "step": 84245 }, { "epoch": 2.459847301499248, "grad_norm": 0.5762281766559634, "learning_rate": 1.0002703433360369e-05, "loss": 0.2369, "step": 84250 }, { "epoch": 2.4599932847695656, "grad_norm": 0.5600431411098578, "learning_rate": 1e-05, "loss": 0.2498, "step": 84255 }, { "epoch": 2.4601392680398826, "grad_norm": 0.5849664361627559, "learning_rate": 9.997296566639633e-06, "loss": 0.2565, "step": 84260 }, { "epoch": 2.4602852513102, "grad_norm": 0.5850198464983039, "learning_rate": 9.994593133279265e-06, "loss": 0.2325, "step": 84265 }, { "epoch": 2.460431234580517, "grad_norm": 0.5881643362300969, "learning_rate": 9.991889699918898e-06, "loss": 0.2576, "step": 84270 }, { "epoch": 2.460577217850834, "grad_norm": 0.599117241340051, "learning_rate": 9.98918626655853e-06, "loss": 0.2416, "step": 84275 }, { "epoch": 2.4607232011211515, "grad_norm": 0.5884997815085482, "learning_rate": 9.986482833198162e-06, "loss": 0.2431, "step": 84280 }, { "epoch": 2.460869184391469, "grad_norm": 0.5417980288980617, "learning_rate": 9.983779399837795e-06, "loss": 0.2535, "step": 84285 }, { "epoch": 2.461015167661786, "grad_norm": 0.5377816887573933, "learning_rate": 9.981075966477427e-06, "loss": 0.2598, "step": 84290 }, { "epoch": 2.461161150932103, "grad_norm": 0.570156251899009, "learning_rate": 9.97837253311706e-06, "loss": 0.2464, "step": 84295 }, { "epoch": 2.4613071342024204, "grad_norm": 0.6264490704861423, "learning_rate": 9.975669099756692e-06, "loss": 0.2758, "step": 84300 }, { "epoch": 2.461453117472738, "grad_norm": 0.5606555141729215, "learning_rate": 9.972965666396324e-06, "loss": 0.2397, "step": 84305 }, { "epoch": 2.461599100743055, "grad_norm": 0.5751173783030298, "learning_rate": 9.970262233035956e-06, "loss": 0.2459, "step": 84310 }, { "epoch": 2.461745084013372, "grad_norm": 0.644330530008777, "learning_rate": 9.967558799675589e-06, "loss": 0.255, "step": 84315 }, { "epoch": 2.4618910672836893, "grad_norm": 0.6342610686109414, "learning_rate": 9.964855366315221e-06, "loss": 0.2668, "step": 84320 }, { "epoch": 2.4620370505540063, "grad_norm": 0.5719322117634935, "learning_rate": 9.962151932954853e-06, "loss": 0.2348, "step": 84325 }, { "epoch": 2.462183033824324, "grad_norm": 0.5725526736037122, "learning_rate": 9.959448499594486e-06, "loss": 0.2483, "step": 84330 }, { "epoch": 2.462329017094641, "grad_norm": 0.6048553739269605, "learning_rate": 9.956745066234118e-06, "loss": 0.2618, "step": 84335 }, { "epoch": 2.4624750003649583, "grad_norm": 0.590608324212584, "learning_rate": 9.95404163287375e-06, "loss": 0.2527, "step": 84340 }, { "epoch": 2.4626209836352753, "grad_norm": 0.5810239642605799, "learning_rate": 9.951338199513383e-06, "loss": 0.25, "step": 84345 }, { "epoch": 2.4627669669055927, "grad_norm": 0.574587123876116, "learning_rate": 9.948634766153015e-06, "loss": 0.2362, "step": 84350 }, { "epoch": 2.4629129501759097, "grad_norm": 0.5650440942210545, "learning_rate": 9.945931332792647e-06, "loss": 0.2472, "step": 84355 }, { "epoch": 2.463058933446227, "grad_norm": 0.6285091632486571, "learning_rate": 9.94322789943228e-06, "loss": 0.2492, "step": 84360 }, { "epoch": 2.463204916716544, "grad_norm": 0.5504912897236306, "learning_rate": 9.940524466071912e-06, "loss": 0.2458, "step": 84365 }, { "epoch": 2.4633508999868616, "grad_norm": 0.571901878648428, "learning_rate": 9.937821032711544e-06, "loss": 0.2431, "step": 84370 }, { "epoch": 2.4634968832571786, "grad_norm": 0.5651318133360203, "learning_rate": 9.935117599351176e-06, "loss": 0.2492, "step": 84375 }, { "epoch": 2.463642866527496, "grad_norm": 0.5826134954330723, "learning_rate": 9.932414165990809e-06, "loss": 0.2548, "step": 84380 }, { "epoch": 2.463788849797813, "grad_norm": 0.5446070807788317, "learning_rate": 9.929710732630441e-06, "loss": 0.2394, "step": 84385 }, { "epoch": 2.4639348330681305, "grad_norm": 0.5074668776296083, "learning_rate": 9.927007299270073e-06, "loss": 0.2361, "step": 84390 }, { "epoch": 2.4640808163384476, "grad_norm": 0.5887932467339585, "learning_rate": 9.924303865909706e-06, "loss": 0.253, "step": 84395 }, { "epoch": 2.464226799608765, "grad_norm": 0.5822145025247201, "learning_rate": 9.921600432549338e-06, "loss": 0.2543, "step": 84400 }, { "epoch": 2.464372782879082, "grad_norm": 0.6317309879968834, "learning_rate": 9.91889699918897e-06, "loss": 0.2734, "step": 84405 }, { "epoch": 2.4645187661493995, "grad_norm": 0.5794414278819072, "learning_rate": 9.916193565828603e-06, "loss": 0.2599, "step": 84410 }, { "epoch": 2.4646647494197165, "grad_norm": 0.5508101267856583, "learning_rate": 9.913490132468235e-06, "loss": 0.2492, "step": 84415 }, { "epoch": 2.464810732690034, "grad_norm": 0.5453862555985144, "learning_rate": 9.910786699107867e-06, "loss": 0.2399, "step": 84420 }, { "epoch": 2.464956715960351, "grad_norm": 0.6002318646637448, "learning_rate": 9.9080832657475e-06, "loss": 0.2495, "step": 84425 }, { "epoch": 2.465102699230668, "grad_norm": 0.5602610927791031, "learning_rate": 9.905379832387132e-06, "loss": 0.2489, "step": 84430 }, { "epoch": 2.4652486825009854, "grad_norm": 0.5653063220750533, "learning_rate": 9.902676399026764e-06, "loss": 0.2586, "step": 84435 }, { "epoch": 2.465394665771303, "grad_norm": 0.5729451596326802, "learning_rate": 9.899972965666397e-06, "loss": 0.2471, "step": 84440 }, { "epoch": 2.46554064904162, "grad_norm": 0.6258681423649389, "learning_rate": 9.897269532306029e-06, "loss": 0.2497, "step": 84445 }, { "epoch": 2.465686632311937, "grad_norm": 0.5794254838683821, "learning_rate": 9.894566098945661e-06, "loss": 0.2476, "step": 84450 }, { "epoch": 2.4658326155822543, "grad_norm": 0.6015331232661861, "learning_rate": 9.891862665585294e-06, "loss": 0.254, "step": 84455 }, { "epoch": 2.4659785988525713, "grad_norm": 0.5844553731346736, "learning_rate": 9.889159232224926e-06, "loss": 0.249, "step": 84460 }, { "epoch": 2.4661245821228888, "grad_norm": 0.5183206215408089, "learning_rate": 9.886455798864558e-06, "loss": 0.2393, "step": 84465 }, { "epoch": 2.4662705653932058, "grad_norm": 0.5240706074300278, "learning_rate": 9.88375236550419e-06, "loss": 0.25, "step": 84470 }, { "epoch": 2.4664165486635232, "grad_norm": 0.6045911584555346, "learning_rate": 9.881048932143823e-06, "loss": 0.2518, "step": 84475 }, { "epoch": 2.4665625319338402, "grad_norm": 0.5749679945050274, "learning_rate": 9.878345498783455e-06, "loss": 0.2641, "step": 84480 }, { "epoch": 2.4667085152041577, "grad_norm": 0.5993134204611986, "learning_rate": 9.875642065423087e-06, "loss": 0.2631, "step": 84485 }, { "epoch": 2.4668544984744747, "grad_norm": 0.5642197087327713, "learning_rate": 9.87293863206272e-06, "loss": 0.2268, "step": 84490 }, { "epoch": 2.467000481744792, "grad_norm": 0.615938880345317, "learning_rate": 9.870235198702354e-06, "loss": 0.2561, "step": 84495 }, { "epoch": 2.467146465015109, "grad_norm": 0.572885404598514, "learning_rate": 9.867531765341984e-06, "loss": 0.247, "step": 84500 }, { "epoch": 2.4672924482854266, "grad_norm": 0.5746236870374576, "learning_rate": 9.864828331981617e-06, "loss": 0.2569, "step": 84505 }, { "epoch": 2.4674384315557436, "grad_norm": 0.5952014904432608, "learning_rate": 9.86212489862125e-06, "loss": 0.2571, "step": 84510 }, { "epoch": 2.467584414826061, "grad_norm": 0.6207271209800359, "learning_rate": 9.859421465260881e-06, "loss": 0.2504, "step": 84515 }, { "epoch": 2.467730398096378, "grad_norm": 0.5821474211683559, "learning_rate": 9.856718031900514e-06, "loss": 0.2529, "step": 84520 }, { "epoch": 2.4678763813666955, "grad_norm": 0.5555101537534926, "learning_rate": 9.854014598540148e-06, "loss": 0.2456, "step": 84525 }, { "epoch": 2.4680223646370125, "grad_norm": 0.6401163656587043, "learning_rate": 9.851311165179778e-06, "loss": 0.263, "step": 84530 }, { "epoch": 2.46816834790733, "grad_norm": 0.6056329165102404, "learning_rate": 9.84860773181941e-06, "loss": 0.2524, "step": 84535 }, { "epoch": 2.468314331177647, "grad_norm": 0.5493018012666862, "learning_rate": 9.845904298459045e-06, "loss": 0.245, "step": 84540 }, { "epoch": 2.4684603144479644, "grad_norm": 0.539830053861364, "learning_rate": 9.843200865098675e-06, "loss": 0.2489, "step": 84545 }, { "epoch": 2.4686062977182814, "grad_norm": 0.609727969315246, "learning_rate": 9.840497431738308e-06, "loss": 0.2434, "step": 84550 }, { "epoch": 2.468752280988599, "grad_norm": 0.5810413492711193, "learning_rate": 9.837793998377942e-06, "loss": 0.2459, "step": 84555 }, { "epoch": 2.468898264258916, "grad_norm": 0.6173887510667577, "learning_rate": 9.835090565017572e-06, "loss": 0.2574, "step": 84560 }, { "epoch": 2.469044247529233, "grad_norm": 0.6172906363767537, "learning_rate": 9.832387131657204e-06, "loss": 0.2456, "step": 84565 }, { "epoch": 2.4691902307995504, "grad_norm": 0.5398664547644059, "learning_rate": 9.829683698296839e-06, "loss": 0.2386, "step": 84570 }, { "epoch": 2.469336214069868, "grad_norm": 0.5768473071394534, "learning_rate": 9.826980264936469e-06, "loss": 0.2374, "step": 84575 }, { "epoch": 2.469482197340185, "grad_norm": 0.5473445607036914, "learning_rate": 9.824276831576103e-06, "loss": 0.251, "step": 84580 }, { "epoch": 2.469628180610502, "grad_norm": 0.5502024358033218, "learning_rate": 9.821573398215734e-06, "loss": 0.2363, "step": 84585 }, { "epoch": 2.4697741638808193, "grad_norm": 0.5735361358635124, "learning_rate": 9.818869964855366e-06, "loss": 0.252, "step": 84590 }, { "epoch": 2.4699201471511367, "grad_norm": 0.5241306369941079, "learning_rate": 9.816166531495e-06, "loss": 0.2301, "step": 84595 }, { "epoch": 2.4700661304214537, "grad_norm": 0.6057262243550974, "learning_rate": 9.81346309813463e-06, "loss": 0.2743, "step": 84600 }, { "epoch": 2.4702121136917707, "grad_norm": 0.5673012421081064, "learning_rate": 9.810759664774263e-06, "loss": 0.2437, "step": 84605 }, { "epoch": 2.470358096962088, "grad_norm": 0.5714414856044969, "learning_rate": 9.808056231413897e-06, "loss": 0.2434, "step": 84610 }, { "epoch": 2.470504080232405, "grad_norm": 0.573675022316931, "learning_rate": 9.805352798053528e-06, "loss": 0.2636, "step": 84615 }, { "epoch": 2.4706500635027226, "grad_norm": 0.6101716532235675, "learning_rate": 9.80264936469316e-06, "loss": 0.2389, "step": 84620 }, { "epoch": 2.4707960467730397, "grad_norm": 0.5511720340689539, "learning_rate": 9.799945931332794e-06, "loss": 0.2399, "step": 84625 }, { "epoch": 2.470942030043357, "grad_norm": 0.6115558550693689, "learning_rate": 9.797242497972425e-06, "loss": 0.2559, "step": 84630 }, { "epoch": 2.471088013313674, "grad_norm": 0.5421890273758774, "learning_rate": 9.794539064612057e-06, "loss": 0.2478, "step": 84635 }, { "epoch": 2.4712339965839916, "grad_norm": 0.5735464131012084, "learning_rate": 9.791835631251691e-06, "loss": 0.2707, "step": 84640 }, { "epoch": 2.4713799798543086, "grad_norm": 0.548233887204223, "learning_rate": 9.789132197891322e-06, "loss": 0.2388, "step": 84645 }, { "epoch": 2.471525963124626, "grad_norm": 0.5902911798042427, "learning_rate": 9.786428764530956e-06, "loss": 0.255, "step": 84650 }, { "epoch": 2.471671946394943, "grad_norm": 0.6006567452733159, "learning_rate": 9.783725331170588e-06, "loss": 0.2579, "step": 84655 }, { "epoch": 2.4718179296652605, "grad_norm": 0.6463296674761054, "learning_rate": 9.781021897810219e-06, "loss": 0.255, "step": 84660 }, { "epoch": 2.4719639129355775, "grad_norm": 0.6056949204144773, "learning_rate": 9.778318464449853e-06, "loss": 0.2592, "step": 84665 }, { "epoch": 2.472109896205895, "grad_norm": 0.6186957720146765, "learning_rate": 9.775615031089485e-06, "loss": 0.2399, "step": 84670 }, { "epoch": 2.472255879476212, "grad_norm": 0.6310438753157207, "learning_rate": 9.772911597729115e-06, "loss": 0.2443, "step": 84675 }, { "epoch": 2.4724018627465294, "grad_norm": 0.5891000194720767, "learning_rate": 9.77020816436875e-06, "loss": 0.2441, "step": 84680 }, { "epoch": 2.4725478460168464, "grad_norm": 0.5812613136494317, "learning_rate": 9.767504731008382e-06, "loss": 0.2534, "step": 84685 }, { "epoch": 2.472693829287164, "grad_norm": 0.5753717713212184, "learning_rate": 9.764801297648012e-06, "loss": 0.2443, "step": 84690 }, { "epoch": 2.472839812557481, "grad_norm": 0.5483611672544599, "learning_rate": 9.762097864287646e-06, "loss": 0.2532, "step": 84695 }, { "epoch": 2.4729857958277983, "grad_norm": 0.6210811519104771, "learning_rate": 9.759394430927279e-06, "loss": 0.2603, "step": 84700 }, { "epoch": 2.4731317790981153, "grad_norm": 0.5988784652341985, "learning_rate": 9.75669099756691e-06, "loss": 0.2467, "step": 84705 }, { "epoch": 2.4732777623684328, "grad_norm": 0.5818818841608696, "learning_rate": 9.753987564206543e-06, "loss": 0.2644, "step": 84710 }, { "epoch": 2.47342374563875, "grad_norm": 0.576711322597246, "learning_rate": 9.751284130846176e-06, "loss": 0.2588, "step": 84715 }, { "epoch": 2.473569728909067, "grad_norm": 0.592150812090461, "learning_rate": 9.748580697485806e-06, "loss": 0.2673, "step": 84720 }, { "epoch": 2.4737157121793842, "grad_norm": 0.5889793331310387, "learning_rate": 9.74587726412544e-06, "loss": 0.2554, "step": 84725 }, { "epoch": 2.4738616954497017, "grad_norm": 0.5992811740270054, "learning_rate": 9.743173830765073e-06, "loss": 0.2406, "step": 84730 }, { "epoch": 2.4740076787200187, "grad_norm": 0.542436047466807, "learning_rate": 9.740470397404705e-06, "loss": 0.2426, "step": 84735 }, { "epoch": 2.4741536619903357, "grad_norm": 0.5870823766846803, "learning_rate": 9.737766964044337e-06, "loss": 0.2578, "step": 84740 }, { "epoch": 2.474299645260653, "grad_norm": 0.5934471464027987, "learning_rate": 9.73506353068397e-06, "loss": 0.2457, "step": 84745 }, { "epoch": 2.47444562853097, "grad_norm": 0.5449935869694035, "learning_rate": 9.732360097323602e-06, "loss": 0.2511, "step": 84750 }, { "epoch": 2.4745916118012876, "grad_norm": 0.6048613240982639, "learning_rate": 9.729656663963234e-06, "loss": 0.2549, "step": 84755 }, { "epoch": 2.4747375950716046, "grad_norm": 0.5723346443885022, "learning_rate": 9.726953230602867e-06, "loss": 0.2509, "step": 84760 }, { "epoch": 2.474883578341922, "grad_norm": 0.622329578370841, "learning_rate": 9.724249797242499e-06, "loss": 0.2402, "step": 84765 }, { "epoch": 2.475029561612239, "grad_norm": 0.5463563182480197, "learning_rate": 9.721546363882131e-06, "loss": 0.2441, "step": 84770 }, { "epoch": 2.4751755448825565, "grad_norm": 0.5560176815130072, "learning_rate": 9.718842930521762e-06, "loss": 0.2288, "step": 84775 }, { "epoch": 2.4753215281528735, "grad_norm": 0.6070285777780019, "learning_rate": 9.716139497161396e-06, "loss": 0.2669, "step": 84780 }, { "epoch": 2.475467511423191, "grad_norm": 0.5682255426063714, "learning_rate": 9.713436063801028e-06, "loss": 0.2455, "step": 84785 }, { "epoch": 2.475613494693508, "grad_norm": 0.5564510856026259, "learning_rate": 9.710732630440659e-06, "loss": 0.2528, "step": 84790 }, { "epoch": 2.4757594779638255, "grad_norm": 0.5854972088600133, "learning_rate": 9.708029197080293e-06, "loss": 0.2283, "step": 84795 }, { "epoch": 2.4759054612341425, "grad_norm": 0.5434382918571385, "learning_rate": 9.705325763719925e-06, "loss": 0.2562, "step": 84800 }, { "epoch": 2.47605144450446, "grad_norm": 0.5852667546934646, "learning_rate": 9.702622330359556e-06, "loss": 0.2439, "step": 84805 }, { "epoch": 2.476197427774777, "grad_norm": 0.5682596810202188, "learning_rate": 9.69991889699919e-06, "loss": 0.2442, "step": 84810 }, { "epoch": 2.4763434110450944, "grad_norm": 0.6007675098668432, "learning_rate": 9.697215463638822e-06, "loss": 0.2469, "step": 84815 }, { "epoch": 2.4764893943154114, "grad_norm": 0.5628728756298955, "learning_rate": 9.694512030278454e-06, "loss": 0.2534, "step": 84820 }, { "epoch": 2.476635377585729, "grad_norm": 0.5724763872857078, "learning_rate": 9.691808596918087e-06, "loss": 0.2341, "step": 84825 }, { "epoch": 2.476781360856046, "grad_norm": 0.6105534267865552, "learning_rate": 9.689105163557719e-06, "loss": 0.2531, "step": 84830 }, { "epoch": 2.4769273441263633, "grad_norm": 0.5844609148270631, "learning_rate": 9.686401730197351e-06, "loss": 0.2448, "step": 84835 }, { "epoch": 2.4770733273966803, "grad_norm": 0.6036993551881881, "learning_rate": 9.683698296836984e-06, "loss": 0.2535, "step": 84840 }, { "epoch": 2.4772193106669977, "grad_norm": 0.5779642553406787, "learning_rate": 9.680994863476616e-06, "loss": 0.2499, "step": 84845 }, { "epoch": 2.4773652939373147, "grad_norm": 0.5756050026954442, "learning_rate": 9.678291430116248e-06, "loss": 0.2587, "step": 84850 }, { "epoch": 2.4775112772076318, "grad_norm": 0.5881837720898079, "learning_rate": 9.67558799675588e-06, "loss": 0.2549, "step": 84855 }, { "epoch": 2.477657260477949, "grad_norm": 0.6238916291342882, "learning_rate": 9.672884563395513e-06, "loss": 0.2614, "step": 84860 }, { "epoch": 2.4778032437482667, "grad_norm": 0.627375913481892, "learning_rate": 9.670181130035145e-06, "loss": 0.249, "step": 84865 }, { "epoch": 2.4779492270185837, "grad_norm": 0.5519383417221037, "learning_rate": 9.667477696674778e-06, "loss": 0.2401, "step": 84870 }, { "epoch": 2.4780952102889007, "grad_norm": 0.5476932884553797, "learning_rate": 9.66477426331441e-06, "loss": 0.2389, "step": 84875 }, { "epoch": 2.478241193559218, "grad_norm": 0.5360444757309688, "learning_rate": 9.662070829954042e-06, "loss": 0.2445, "step": 84880 }, { "epoch": 2.4783871768295356, "grad_norm": 0.5894111185759112, "learning_rate": 9.659367396593674e-06, "loss": 0.2516, "step": 84885 }, { "epoch": 2.4785331600998526, "grad_norm": 0.6331222037792079, "learning_rate": 9.656663963233307e-06, "loss": 0.2525, "step": 84890 }, { "epoch": 2.4786791433701696, "grad_norm": 0.5696460194005671, "learning_rate": 9.653960529872939e-06, "loss": 0.249, "step": 84895 }, { "epoch": 2.478825126640487, "grad_norm": 0.6004465344070609, "learning_rate": 9.651257096512571e-06, "loss": 0.2738, "step": 84900 }, { "epoch": 2.478971109910804, "grad_norm": 0.5825426590284616, "learning_rate": 9.648553663152204e-06, "loss": 0.2364, "step": 84905 }, { "epoch": 2.4791170931811215, "grad_norm": 0.5995071014668804, "learning_rate": 9.645850229791836e-06, "loss": 0.2476, "step": 84910 }, { "epoch": 2.4792630764514385, "grad_norm": 0.6346052596908114, "learning_rate": 9.643146796431468e-06, "loss": 0.2541, "step": 84915 }, { "epoch": 2.479409059721756, "grad_norm": 0.5951368227397535, "learning_rate": 9.6404433630711e-06, "loss": 0.2369, "step": 84920 }, { "epoch": 2.479555042992073, "grad_norm": 0.5817891841799552, "learning_rate": 9.637739929710733e-06, "loss": 0.2509, "step": 84925 }, { "epoch": 2.4797010262623904, "grad_norm": 0.6454442650066792, "learning_rate": 9.635036496350365e-06, "loss": 0.2343, "step": 84930 }, { "epoch": 2.4798470095327074, "grad_norm": 0.5949247304570207, "learning_rate": 9.632333062989998e-06, "loss": 0.2579, "step": 84935 }, { "epoch": 2.479992992803025, "grad_norm": 0.6013292407124833, "learning_rate": 9.62962962962963e-06, "loss": 0.2537, "step": 84940 }, { "epoch": 2.480138976073342, "grad_norm": 0.5866631466555509, "learning_rate": 9.626926196269262e-06, "loss": 0.2451, "step": 84945 }, { "epoch": 2.4802849593436593, "grad_norm": 0.523055081589701, "learning_rate": 9.624222762908895e-06, "loss": 0.2534, "step": 84950 }, { "epoch": 2.4804309426139763, "grad_norm": 0.6138282241417896, "learning_rate": 9.621519329548527e-06, "loss": 0.2693, "step": 84955 }, { "epoch": 2.480576925884294, "grad_norm": 0.5896540865314818, "learning_rate": 9.61881589618816e-06, "loss": 0.2402, "step": 84960 }, { "epoch": 2.480722909154611, "grad_norm": 0.5262468856646866, "learning_rate": 9.616112462827792e-06, "loss": 0.2529, "step": 84965 }, { "epoch": 2.4808688924249283, "grad_norm": 0.5982672814597241, "learning_rate": 9.613409029467424e-06, "loss": 0.2579, "step": 84970 }, { "epoch": 2.4810148756952453, "grad_norm": 0.5912753711227423, "learning_rate": 9.610705596107056e-06, "loss": 0.2509, "step": 84975 }, { "epoch": 2.4811608589655627, "grad_norm": 0.5588857046504082, "learning_rate": 9.608002162746689e-06, "loss": 0.2446, "step": 84980 }, { "epoch": 2.4813068422358797, "grad_norm": 0.6091718760366794, "learning_rate": 9.60529872938632e-06, "loss": 0.2428, "step": 84985 }, { "epoch": 2.481452825506197, "grad_norm": 0.5927273431351856, "learning_rate": 9.602595296025955e-06, "loss": 0.2481, "step": 84990 }, { "epoch": 2.481598808776514, "grad_norm": 0.5474756498074101, "learning_rate": 9.599891862665585e-06, "loss": 0.2553, "step": 84995 }, { "epoch": 2.4817447920468316, "grad_norm": 0.6225196240193442, "learning_rate": 9.597188429305218e-06, "loss": 0.2578, "step": 85000 }, { "epoch": 2.4818907753171486, "grad_norm": 0.5641930401585372, "learning_rate": 9.594484995944852e-06, "loss": 0.2419, "step": 85005 }, { "epoch": 2.4820367585874656, "grad_norm": 0.5701184965400143, "learning_rate": 9.591781562584482e-06, "loss": 0.2441, "step": 85010 }, { "epoch": 2.482182741857783, "grad_norm": 0.5928144080201359, "learning_rate": 9.589078129224115e-06, "loss": 0.2525, "step": 85015 }, { "epoch": 2.4823287251281005, "grad_norm": 0.5556762686835647, "learning_rate": 9.586374695863749e-06, "loss": 0.2612, "step": 85020 }, { "epoch": 2.4824747083984176, "grad_norm": 0.5955222731046704, "learning_rate": 9.58367126250338e-06, "loss": 0.2485, "step": 85025 }, { "epoch": 2.4826206916687346, "grad_norm": 0.5924600672535838, "learning_rate": 9.580967829143012e-06, "loss": 0.2541, "step": 85030 }, { "epoch": 2.482766674939052, "grad_norm": 0.5835829849179341, "learning_rate": 9.578264395782644e-06, "loss": 0.2454, "step": 85035 }, { "epoch": 2.482912658209369, "grad_norm": 0.5919243707615224, "learning_rate": 9.575560962422276e-06, "loss": 0.2454, "step": 85040 }, { "epoch": 2.4830586414796865, "grad_norm": 0.5494395004014491, "learning_rate": 9.572857529061909e-06, "loss": 0.2265, "step": 85045 }, { "epoch": 2.4832046247500035, "grad_norm": 0.6235353188559359, "learning_rate": 9.570154095701541e-06, "loss": 0.2616, "step": 85050 }, { "epoch": 2.483350608020321, "grad_norm": 0.6148167878171907, "learning_rate": 9.567450662341173e-06, "loss": 0.2573, "step": 85055 }, { "epoch": 2.483496591290638, "grad_norm": 0.5818701683615235, "learning_rate": 9.564747228980806e-06, "loss": 0.2551, "step": 85060 }, { "epoch": 2.4836425745609554, "grad_norm": 0.5523736967419024, "learning_rate": 9.562043795620438e-06, "loss": 0.2394, "step": 85065 }, { "epoch": 2.4837885578312724, "grad_norm": 0.5625195002289126, "learning_rate": 9.55934036226007e-06, "loss": 0.2334, "step": 85070 }, { "epoch": 2.48393454110159, "grad_norm": 0.5941071297467215, "learning_rate": 9.556636928899704e-06, "loss": 0.2539, "step": 85075 }, { "epoch": 2.484080524371907, "grad_norm": 0.6392550906054483, "learning_rate": 9.553933495539335e-06, "loss": 0.2568, "step": 85080 }, { "epoch": 2.4842265076422243, "grad_norm": 0.5622489426736929, "learning_rate": 9.551230062178967e-06, "loss": 0.251, "step": 85085 }, { "epoch": 2.4843724909125413, "grad_norm": 0.5527826470449728, "learning_rate": 9.548526628818601e-06, "loss": 0.2361, "step": 85090 }, { "epoch": 2.4845184741828588, "grad_norm": 0.560916394786308, "learning_rate": 9.545823195458232e-06, "loss": 0.2309, "step": 85095 }, { "epoch": 2.4846644574531758, "grad_norm": 0.5285108884121346, "learning_rate": 9.543119762097864e-06, "loss": 0.2523, "step": 85100 }, { "epoch": 2.484810440723493, "grad_norm": 0.5551991329237284, "learning_rate": 9.540416328737498e-06, "loss": 0.2474, "step": 85105 }, { "epoch": 2.4849564239938102, "grad_norm": 0.5927403786230094, "learning_rate": 9.537712895377129e-06, "loss": 0.2472, "step": 85110 }, { "epoch": 2.4851024072641277, "grad_norm": 0.5870595789320309, "learning_rate": 9.535009462016761e-06, "loss": 0.2527, "step": 85115 }, { "epoch": 2.4852483905344447, "grad_norm": 0.6359349481781725, "learning_rate": 9.532306028656395e-06, "loss": 0.2577, "step": 85120 }, { "epoch": 2.485394373804762, "grad_norm": 0.5880376272982065, "learning_rate": 9.529602595296026e-06, "loss": 0.253, "step": 85125 }, { "epoch": 2.485540357075079, "grad_norm": 0.5854666666804859, "learning_rate": 9.526899161935658e-06, "loss": 0.2533, "step": 85130 }, { "epoch": 2.4856863403453966, "grad_norm": 0.5358264134852609, "learning_rate": 9.524195728575292e-06, "loss": 0.2447, "step": 85135 }, { "epoch": 2.4858323236157136, "grad_norm": 0.5850088955451147, "learning_rate": 9.521492295214923e-06, "loss": 0.2562, "step": 85140 }, { "epoch": 2.485978306886031, "grad_norm": 0.6215948418736629, "learning_rate": 9.518788861854555e-06, "loss": 0.2581, "step": 85145 }, { "epoch": 2.486124290156348, "grad_norm": 0.5131829341295014, "learning_rate": 9.516085428494189e-06, "loss": 0.2446, "step": 85150 }, { "epoch": 2.4862702734266655, "grad_norm": 0.610249612038933, "learning_rate": 9.51338199513382e-06, "loss": 0.258, "step": 85155 }, { "epoch": 2.4864162566969825, "grad_norm": 0.6051339223480401, "learning_rate": 9.510678561773454e-06, "loss": 0.2495, "step": 85160 }, { "epoch": 2.4865622399672995, "grad_norm": 0.6245924024424904, "learning_rate": 9.507975128413086e-06, "loss": 0.2467, "step": 85165 }, { "epoch": 2.486708223237617, "grad_norm": 0.5839607714478465, "learning_rate": 9.505271695052717e-06, "loss": 0.2463, "step": 85170 }, { "epoch": 2.4868542065079344, "grad_norm": 0.5418568766175194, "learning_rate": 9.50256826169235e-06, "loss": 0.2554, "step": 85175 }, { "epoch": 2.4870001897782514, "grad_norm": 0.6084991919113559, "learning_rate": 9.499864828331983e-06, "loss": 0.2583, "step": 85180 }, { "epoch": 2.4871461730485684, "grad_norm": 0.5533413364290114, "learning_rate": 9.497161394971614e-06, "loss": 0.2451, "step": 85185 }, { "epoch": 2.487292156318886, "grad_norm": 0.5814902352543417, "learning_rate": 9.494457961611248e-06, "loss": 0.2507, "step": 85190 }, { "epoch": 2.487438139589203, "grad_norm": 0.572301148956977, "learning_rate": 9.49175452825088e-06, "loss": 0.2428, "step": 85195 }, { "epoch": 2.4875841228595204, "grad_norm": 0.6021985208279335, "learning_rate": 9.48905109489051e-06, "loss": 0.2598, "step": 85200 }, { "epoch": 2.4877301061298374, "grad_norm": 0.5563394824339851, "learning_rate": 9.486347661530144e-06, "loss": 0.2543, "step": 85205 }, { "epoch": 2.487876089400155, "grad_norm": 0.5401611247442749, "learning_rate": 9.483644228169777e-06, "loss": 0.2318, "step": 85210 }, { "epoch": 2.488022072670472, "grad_norm": 0.5897471325768575, "learning_rate": 9.480940794809407e-06, "loss": 0.2482, "step": 85215 }, { "epoch": 2.4881680559407893, "grad_norm": 0.6050019971826965, "learning_rate": 9.478237361449041e-06, "loss": 0.2489, "step": 85220 }, { "epoch": 2.4883140392111063, "grad_norm": 0.624220002143721, "learning_rate": 9.475533928088672e-06, "loss": 0.2678, "step": 85225 }, { "epoch": 2.4884600224814237, "grad_norm": 0.5789033643769568, "learning_rate": 9.472830494728304e-06, "loss": 0.2374, "step": 85230 }, { "epoch": 2.4886060057517407, "grad_norm": 0.6251684576059355, "learning_rate": 9.470127061367938e-06, "loss": 0.2636, "step": 85235 }, { "epoch": 2.488751989022058, "grad_norm": 0.5394554731557988, "learning_rate": 9.467423628007569e-06, "loss": 0.25, "step": 85240 }, { "epoch": 2.488897972292375, "grad_norm": 0.549961724736832, "learning_rate": 9.464720194647203e-06, "loss": 0.2499, "step": 85245 }, { "epoch": 2.4890439555626926, "grad_norm": 0.5986360444680576, "learning_rate": 9.462016761286835e-06, "loss": 0.2557, "step": 85250 }, { "epoch": 2.4891899388330097, "grad_norm": 0.580941804360723, "learning_rate": 9.459313327926466e-06, "loss": 0.248, "step": 85255 }, { "epoch": 2.489335922103327, "grad_norm": 0.6247547690707355, "learning_rate": 9.4566098945661e-06, "loss": 0.26, "step": 85260 }, { "epoch": 2.489481905373644, "grad_norm": 0.5503839300996428, "learning_rate": 9.453906461205732e-06, "loss": 0.2344, "step": 85265 }, { "epoch": 2.4896278886439616, "grad_norm": 0.5864029795340936, "learning_rate": 9.451203027845363e-06, "loss": 0.2411, "step": 85270 }, { "epoch": 2.4897738719142786, "grad_norm": 0.5490286743104917, "learning_rate": 9.448499594484997e-06, "loss": 0.2449, "step": 85275 }, { "epoch": 2.489919855184596, "grad_norm": 0.529051681689844, "learning_rate": 9.44579616112463e-06, "loss": 0.2393, "step": 85280 }, { "epoch": 2.490065838454913, "grad_norm": 0.624121620262649, "learning_rate": 9.44309272776426e-06, "loss": 0.2512, "step": 85285 }, { "epoch": 2.4902118217252305, "grad_norm": 0.5881210343067983, "learning_rate": 9.440389294403894e-06, "loss": 0.2491, "step": 85290 }, { "epoch": 2.4903578049955475, "grad_norm": 0.5613549640890082, "learning_rate": 9.437685861043526e-06, "loss": 0.244, "step": 85295 }, { "epoch": 2.4905037882658645, "grad_norm": 0.5611524482157947, "learning_rate": 9.434982427683157e-06, "loss": 0.2369, "step": 85300 }, { "epoch": 2.490649771536182, "grad_norm": 0.6120813515204974, "learning_rate": 9.43227899432279e-06, "loss": 0.2557, "step": 85305 }, { "epoch": 2.4907957548064994, "grad_norm": 0.5932334175682515, "learning_rate": 9.429575560962423e-06, "loss": 0.2422, "step": 85310 }, { "epoch": 2.4909417380768164, "grad_norm": 0.5992797474789591, "learning_rate": 9.426872127602054e-06, "loss": 0.2535, "step": 85315 }, { "epoch": 2.4910877213471334, "grad_norm": 0.6080293959287721, "learning_rate": 9.424168694241688e-06, "loss": 0.2403, "step": 85320 }, { "epoch": 2.491233704617451, "grad_norm": 0.5801991367223482, "learning_rate": 9.42146526088132e-06, "loss": 0.2435, "step": 85325 }, { "epoch": 2.491379687887768, "grad_norm": 0.5879164325751262, "learning_rate": 9.418761827520952e-06, "loss": 0.2427, "step": 85330 }, { "epoch": 2.4915256711580853, "grad_norm": 0.6243995077672929, "learning_rate": 9.416058394160585e-06, "loss": 0.236, "step": 85335 }, { "epoch": 2.4916716544284023, "grad_norm": 0.5431494773810999, "learning_rate": 9.413354960800217e-06, "loss": 0.2337, "step": 85340 }, { "epoch": 2.49181763769872, "grad_norm": 0.579063003431724, "learning_rate": 9.41065152743985e-06, "loss": 0.2577, "step": 85345 }, { "epoch": 2.491963620969037, "grad_norm": 0.5652730593479662, "learning_rate": 9.407948094079482e-06, "loss": 0.2429, "step": 85350 }, { "epoch": 2.4921096042393542, "grad_norm": 0.5523284808164152, "learning_rate": 9.405244660719114e-06, "loss": 0.2324, "step": 85355 }, { "epoch": 2.4922555875096712, "grad_norm": 0.591978963972107, "learning_rate": 9.402541227358746e-06, "loss": 0.2476, "step": 85360 }, { "epoch": 2.4924015707799887, "grad_norm": 0.5681870565190665, "learning_rate": 9.399837793998379e-06, "loss": 0.24, "step": 85365 }, { "epoch": 2.4925475540503057, "grad_norm": 0.5877699723501465, "learning_rate": 9.397134360638011e-06, "loss": 0.2458, "step": 85370 }, { "epoch": 2.492693537320623, "grad_norm": 0.5458294716444057, "learning_rate": 9.394430927277643e-06, "loss": 0.2367, "step": 85375 }, { "epoch": 2.49283952059094, "grad_norm": 0.6003945717488659, "learning_rate": 9.391727493917276e-06, "loss": 0.2596, "step": 85380 }, { "epoch": 2.4929855038612576, "grad_norm": 0.580152508754779, "learning_rate": 9.389024060556908e-06, "loss": 0.2309, "step": 85385 }, { "epoch": 2.4931314871315746, "grad_norm": 0.598195506005596, "learning_rate": 9.38632062719654e-06, "loss": 0.2408, "step": 85390 }, { "epoch": 2.493277470401892, "grad_norm": 0.6185332018170142, "learning_rate": 9.383617193836173e-06, "loss": 0.2508, "step": 85395 }, { "epoch": 2.493423453672209, "grad_norm": 0.5567645698744595, "learning_rate": 9.380913760475805e-06, "loss": 0.2455, "step": 85400 }, { "epoch": 2.4935694369425265, "grad_norm": 0.5796426237798672, "learning_rate": 9.378210327115437e-06, "loss": 0.2405, "step": 85405 }, { "epoch": 2.4937154202128435, "grad_norm": 0.6115849612012595, "learning_rate": 9.37550689375507e-06, "loss": 0.2492, "step": 85410 }, { "epoch": 2.493861403483161, "grad_norm": 0.6156612637709246, "learning_rate": 9.372803460394702e-06, "loss": 0.2785, "step": 85415 }, { "epoch": 2.494007386753478, "grad_norm": 0.5601034188404116, "learning_rate": 9.370100027034334e-06, "loss": 0.2284, "step": 85420 }, { "epoch": 2.4941533700237954, "grad_norm": 0.5274608694464743, "learning_rate": 9.367396593673966e-06, "loss": 0.2416, "step": 85425 }, { "epoch": 2.4942993532941125, "grad_norm": 0.6139628516144329, "learning_rate": 9.364693160313599e-06, "loss": 0.2558, "step": 85430 }, { "epoch": 2.49444533656443, "grad_norm": 0.5534882646498919, "learning_rate": 9.361989726953231e-06, "loss": 0.2543, "step": 85435 }, { "epoch": 2.494591319834747, "grad_norm": 0.5602586151921943, "learning_rate": 9.359286293592863e-06, "loss": 0.2383, "step": 85440 }, { "epoch": 2.4947373031050644, "grad_norm": 0.5985281517470286, "learning_rate": 9.356582860232496e-06, "loss": 0.2515, "step": 85445 }, { "epoch": 2.4948832863753814, "grad_norm": 0.6180274552160917, "learning_rate": 9.353879426872128e-06, "loss": 0.2571, "step": 85450 }, { "epoch": 2.4950292696456984, "grad_norm": 0.5896630985840929, "learning_rate": 9.35117599351176e-06, "loss": 0.2533, "step": 85455 }, { "epoch": 2.495175252916016, "grad_norm": 0.5615560746399079, "learning_rate": 9.348472560151393e-06, "loss": 0.2522, "step": 85460 }, { "epoch": 2.4953212361863333, "grad_norm": 0.5884326361681398, "learning_rate": 9.345769126791025e-06, "loss": 0.2507, "step": 85465 }, { "epoch": 2.4954672194566503, "grad_norm": 0.5849448202534923, "learning_rate": 9.343065693430657e-06, "loss": 0.244, "step": 85470 }, { "epoch": 2.4956132027269673, "grad_norm": 0.5681286241660648, "learning_rate": 9.34036226007029e-06, "loss": 0.2402, "step": 85475 }, { "epoch": 2.4957591859972847, "grad_norm": 0.5834875397372741, "learning_rate": 9.337658826709922e-06, "loss": 0.2475, "step": 85480 }, { "epoch": 2.4959051692676018, "grad_norm": 0.6108523553438568, "learning_rate": 9.334955393349554e-06, "loss": 0.2531, "step": 85485 }, { "epoch": 2.496051152537919, "grad_norm": 0.5784717167186646, "learning_rate": 9.332251959989187e-06, "loss": 0.237, "step": 85490 }, { "epoch": 2.496197135808236, "grad_norm": 0.549075929835045, "learning_rate": 9.329548526628819e-06, "loss": 0.2369, "step": 85495 }, { "epoch": 2.4963431190785537, "grad_norm": 0.6085024209758205, "learning_rate": 9.326845093268451e-06, "loss": 0.248, "step": 85500 }, { "epoch": 2.4964891023488707, "grad_norm": 0.5775871068755383, "learning_rate": 9.324141659908084e-06, "loss": 0.2495, "step": 85505 }, { "epoch": 2.496635085619188, "grad_norm": 0.5863226582672901, "learning_rate": 9.321438226547716e-06, "loss": 0.2537, "step": 85510 }, { "epoch": 2.496781068889505, "grad_norm": 0.5877719350353483, "learning_rate": 9.318734793187348e-06, "loss": 0.2509, "step": 85515 }, { "epoch": 2.4969270521598226, "grad_norm": 0.5622563499360623, "learning_rate": 9.31603135982698e-06, "loss": 0.2461, "step": 85520 }, { "epoch": 2.4970730354301396, "grad_norm": 0.5809729911286906, "learning_rate": 9.313327926466613e-06, "loss": 0.2566, "step": 85525 }, { "epoch": 2.497219018700457, "grad_norm": 0.584040498733987, "learning_rate": 9.310624493106245e-06, "loss": 0.2408, "step": 85530 }, { "epoch": 2.497365001970774, "grad_norm": 0.536214868114231, "learning_rate": 9.307921059745877e-06, "loss": 0.241, "step": 85535 }, { "epoch": 2.4975109852410915, "grad_norm": 0.5413265665368814, "learning_rate": 9.30521762638551e-06, "loss": 0.2511, "step": 85540 }, { "epoch": 2.4976569685114085, "grad_norm": 0.5792858507400062, "learning_rate": 9.302514193025142e-06, "loss": 0.2456, "step": 85545 }, { "epoch": 2.497802951781726, "grad_norm": 0.5411562834869833, "learning_rate": 9.299810759664774e-06, "loss": 0.2541, "step": 85550 }, { "epoch": 2.497948935052043, "grad_norm": 0.5879566455345352, "learning_rate": 9.297107326304407e-06, "loss": 0.2516, "step": 85555 }, { "epoch": 2.4980949183223604, "grad_norm": 0.5733132009812092, "learning_rate": 9.294403892944039e-06, "loss": 0.2383, "step": 85560 }, { "epoch": 2.4982409015926774, "grad_norm": 0.5800359814654638, "learning_rate": 9.291700459583671e-06, "loss": 0.233, "step": 85565 }, { "epoch": 2.498386884862995, "grad_norm": 0.5686940825815616, "learning_rate": 9.288997026223304e-06, "loss": 0.2312, "step": 85570 }, { "epoch": 2.498532868133312, "grad_norm": 0.550520011215352, "learning_rate": 9.286293592862936e-06, "loss": 0.2541, "step": 85575 }, { "epoch": 2.4986788514036293, "grad_norm": 0.6036048337918483, "learning_rate": 9.283590159502568e-06, "loss": 0.2549, "step": 85580 }, { "epoch": 2.4988248346739463, "grad_norm": 0.5874745473702752, "learning_rate": 9.280886726142202e-06, "loss": 0.2614, "step": 85585 }, { "epoch": 2.4989708179442633, "grad_norm": 0.5952189452852648, "learning_rate": 9.278183292781833e-06, "loss": 0.2453, "step": 85590 }, { "epoch": 2.499116801214581, "grad_norm": 0.5515031560200978, "learning_rate": 9.275479859421465e-06, "loss": 0.2365, "step": 85595 }, { "epoch": 2.4992627844848982, "grad_norm": 0.6049507344859434, "learning_rate": 9.2727764260611e-06, "loss": 0.2624, "step": 85600 }, { "epoch": 2.4994087677552153, "grad_norm": 0.5709080374720084, "learning_rate": 9.27007299270073e-06, "loss": 0.2499, "step": 85605 }, { "epoch": 2.4995547510255323, "grad_norm": 0.5478496309274699, "learning_rate": 9.267369559340362e-06, "loss": 0.2306, "step": 85610 }, { "epoch": 2.4997007342958497, "grad_norm": 0.6408771196885149, "learning_rate": 9.264666125979996e-06, "loss": 0.2625, "step": 85615 }, { "epoch": 2.499846717566167, "grad_norm": 0.6469730151199102, "learning_rate": 9.261962692619627e-06, "loss": 0.2627, "step": 85620 }, { "epoch": 2.499992700836484, "grad_norm": 0.5922439494085581, "learning_rate": 9.259259259259259e-06, "loss": 0.2655, "step": 85625 }, { "epoch": 2.500138684106801, "grad_norm": 0.6285613280428479, "learning_rate": 9.256555825898893e-06, "loss": 0.2612, "step": 85630 }, { "epoch": 2.5002846673771186, "grad_norm": 0.5653549227362539, "learning_rate": 9.253852392538524e-06, "loss": 0.2423, "step": 85635 }, { "epoch": 2.5004306506474356, "grad_norm": 0.57263144373957, "learning_rate": 9.251148959178156e-06, "loss": 0.2467, "step": 85640 }, { "epoch": 2.500576633917753, "grad_norm": 0.6204685728737812, "learning_rate": 9.24844552581779e-06, "loss": 0.2559, "step": 85645 }, { "epoch": 2.50072261718807, "grad_norm": 0.5317646667324387, "learning_rate": 9.24574209245742e-06, "loss": 0.2591, "step": 85650 }, { "epoch": 2.5008686004583875, "grad_norm": 0.57629241530215, "learning_rate": 9.243038659097053e-06, "loss": 0.2478, "step": 85655 }, { "epoch": 2.5010145837287046, "grad_norm": 0.540721007733005, "learning_rate": 9.240335225736687e-06, "loss": 0.2477, "step": 85660 }, { "epoch": 2.501160566999022, "grad_norm": 0.5864693337479647, "learning_rate": 9.237631792376318e-06, "loss": 0.2664, "step": 85665 }, { "epoch": 2.501306550269339, "grad_norm": 0.5583234410699807, "learning_rate": 9.234928359015952e-06, "loss": 0.2462, "step": 85670 }, { "epoch": 2.5014525335396565, "grad_norm": 0.5524676741726493, "learning_rate": 9.232224925655584e-06, "loss": 0.2519, "step": 85675 }, { "epoch": 2.5015985168099735, "grad_norm": 0.5755675348013849, "learning_rate": 9.229521492295215e-06, "loss": 0.2469, "step": 85680 }, { "epoch": 2.501744500080291, "grad_norm": 0.5922065376366843, "learning_rate": 9.226818058934849e-06, "loss": 0.257, "step": 85685 }, { "epoch": 2.501890483350608, "grad_norm": 0.5734366277781806, "learning_rate": 9.22411462557448e-06, "loss": 0.2554, "step": 85690 }, { "epoch": 2.5020364666209254, "grad_norm": 0.5777486676244159, "learning_rate": 9.221411192214112e-06, "loss": 0.2375, "step": 85695 }, { "epoch": 2.5021824498912424, "grad_norm": 0.5492139503693149, "learning_rate": 9.218707758853746e-06, "loss": 0.2434, "step": 85700 }, { "epoch": 2.50232843316156, "grad_norm": 0.5560699835069052, "learning_rate": 9.216004325493376e-06, "loss": 0.2352, "step": 85705 }, { "epoch": 2.502474416431877, "grad_norm": 0.580376366157834, "learning_rate": 9.213300892133009e-06, "loss": 0.2555, "step": 85710 }, { "epoch": 2.5026203997021943, "grad_norm": 0.5542246417734024, "learning_rate": 9.210597458772643e-06, "loss": 0.2486, "step": 85715 }, { "epoch": 2.5027663829725113, "grad_norm": 0.5697679284018453, "learning_rate": 9.207894025412273e-06, "loss": 0.2651, "step": 85720 }, { "epoch": 2.5029123662428283, "grad_norm": 0.592851997364461, "learning_rate": 9.205190592051905e-06, "loss": 0.2312, "step": 85725 }, { "epoch": 2.5030583495131458, "grad_norm": 0.6050500503826083, "learning_rate": 9.20248715869154e-06, "loss": 0.2465, "step": 85730 }, { "epoch": 2.503204332783463, "grad_norm": 0.5733145190791573, "learning_rate": 9.19978372533117e-06, "loss": 0.2538, "step": 85735 }, { "epoch": 2.5033503160537802, "grad_norm": 0.6120438381989306, "learning_rate": 9.197080291970802e-06, "loss": 0.2648, "step": 85740 }, { "epoch": 2.5034962993240972, "grad_norm": 0.5635573499241509, "learning_rate": 9.194376858610436e-06, "loss": 0.248, "step": 85745 }, { "epoch": 2.5036422825944147, "grad_norm": 0.5393719984314849, "learning_rate": 9.191673425250067e-06, "loss": 0.2434, "step": 85750 }, { "epoch": 2.503788265864732, "grad_norm": 0.585699561166867, "learning_rate": 9.188969991889701e-06, "loss": 0.2572, "step": 85755 }, { "epoch": 2.503934249135049, "grad_norm": 0.5919224866888071, "learning_rate": 9.186266558529333e-06, "loss": 0.2425, "step": 85760 }, { "epoch": 2.504080232405366, "grad_norm": 0.621888770898081, "learning_rate": 9.183563125168964e-06, "loss": 0.2493, "step": 85765 }, { "epoch": 2.5042262156756836, "grad_norm": 0.524490605830917, "learning_rate": 9.180859691808598e-06, "loss": 0.2344, "step": 85770 }, { "epoch": 2.504372198946001, "grad_norm": 0.5587366696488946, "learning_rate": 9.17815625844823e-06, "loss": 0.2496, "step": 85775 }, { "epoch": 2.504518182216318, "grad_norm": 0.5759097636466598, "learning_rate": 9.175452825087861e-06, "loss": 0.2593, "step": 85780 }, { "epoch": 2.504664165486635, "grad_norm": 0.5664622863254204, "learning_rate": 9.172749391727495e-06, "loss": 0.2428, "step": 85785 }, { "epoch": 2.5048101487569525, "grad_norm": 0.5830563506483498, "learning_rate": 9.170045958367127e-06, "loss": 0.2566, "step": 85790 }, { "epoch": 2.5049561320272695, "grad_norm": 0.6065548124431905, "learning_rate": 9.167342525006758e-06, "loss": 0.2519, "step": 85795 }, { "epoch": 2.505102115297587, "grad_norm": 0.549381666731188, "learning_rate": 9.164639091646392e-06, "loss": 0.2605, "step": 85800 }, { "epoch": 2.505248098567904, "grad_norm": 0.5470179064035863, "learning_rate": 9.161935658286024e-06, "loss": 0.2506, "step": 85805 }, { "epoch": 2.5053940818382214, "grad_norm": 0.5511444481604968, "learning_rate": 9.159232224925655e-06, "loss": 0.2664, "step": 85810 }, { "epoch": 2.5055400651085384, "grad_norm": 0.5444029363784663, "learning_rate": 9.156528791565289e-06, "loss": 0.244, "step": 85815 }, { "epoch": 2.505686048378856, "grad_norm": 0.5657813715490537, "learning_rate": 9.153825358204921e-06, "loss": 0.2339, "step": 85820 }, { "epoch": 2.505832031649173, "grad_norm": 0.5839041707852975, "learning_rate": 9.151121924844552e-06, "loss": 0.2528, "step": 85825 }, { "epoch": 2.5059780149194903, "grad_norm": 0.5554352573682656, "learning_rate": 9.148418491484186e-06, "loss": 0.2561, "step": 85830 }, { "epoch": 2.5061239981898074, "grad_norm": 0.5755482708534971, "learning_rate": 9.145715058123818e-06, "loss": 0.233, "step": 85835 }, { "epoch": 2.506269981460125, "grad_norm": 0.49624411162881055, "learning_rate": 9.14301162476345e-06, "loss": 0.2246, "step": 85840 }, { "epoch": 2.506415964730442, "grad_norm": 0.5617062851633352, "learning_rate": 9.140308191403083e-06, "loss": 0.2427, "step": 85845 }, { "epoch": 2.5065619480007593, "grad_norm": 0.5620713366516673, "learning_rate": 9.137604758042715e-06, "loss": 0.2419, "step": 85850 }, { "epoch": 2.5067079312710763, "grad_norm": 0.5569604059703047, "learning_rate": 9.134901324682347e-06, "loss": 0.2407, "step": 85855 }, { "epoch": 2.5068539145413933, "grad_norm": 0.6092583330525877, "learning_rate": 9.13219789132198e-06, "loss": 0.2416, "step": 85860 }, { "epoch": 2.5069998978117107, "grad_norm": 0.6108217285528468, "learning_rate": 9.129494457961612e-06, "loss": 0.2565, "step": 85865 }, { "epoch": 2.507145881082028, "grad_norm": 0.6214551901719871, "learning_rate": 9.126791024601244e-06, "loss": 0.2367, "step": 85870 }, { "epoch": 2.507291864352345, "grad_norm": 0.5418208890003481, "learning_rate": 9.124087591240877e-06, "loss": 0.2454, "step": 85875 }, { "epoch": 2.507437847622662, "grad_norm": 0.5794957675084348, "learning_rate": 9.121384157880507e-06, "loss": 0.265, "step": 85880 }, { "epoch": 2.5075838308929796, "grad_norm": 0.6269384615866607, "learning_rate": 9.118680724520141e-06, "loss": 0.259, "step": 85885 }, { "epoch": 2.507729814163297, "grad_norm": 0.5712153407684294, "learning_rate": 9.115977291159774e-06, "loss": 0.2398, "step": 85890 }, { "epoch": 2.507875797433614, "grad_norm": 0.5779581147737043, "learning_rate": 9.113273857799404e-06, "loss": 0.2691, "step": 85895 }, { "epoch": 2.508021780703931, "grad_norm": 0.6220995373229242, "learning_rate": 9.110570424439038e-06, "loss": 0.2618, "step": 85900 }, { "epoch": 2.5081677639742486, "grad_norm": 0.5310944418197833, "learning_rate": 9.10786699107867e-06, "loss": 0.2427, "step": 85905 }, { "epoch": 2.508313747244566, "grad_norm": 0.5977022532274766, "learning_rate": 9.105163557718303e-06, "loss": 0.2559, "step": 85910 }, { "epoch": 2.508459730514883, "grad_norm": 0.5931503705936311, "learning_rate": 9.102460124357935e-06, "loss": 0.2577, "step": 85915 }, { "epoch": 2.5086057137852, "grad_norm": 0.5957211848664062, "learning_rate": 9.099756690997568e-06, "loss": 0.2337, "step": 85920 }, { "epoch": 2.5087516970555175, "grad_norm": 0.566207800948442, "learning_rate": 9.0970532576372e-06, "loss": 0.2301, "step": 85925 }, { "epoch": 2.508897680325835, "grad_norm": 0.5693660126260981, "learning_rate": 9.094349824276832e-06, "loss": 0.236, "step": 85930 }, { "epoch": 2.509043663596152, "grad_norm": 0.5858955603548461, "learning_rate": 9.091646390916465e-06, "loss": 0.2364, "step": 85935 }, { "epoch": 2.509189646866469, "grad_norm": 0.5838669679567615, "learning_rate": 9.088942957556097e-06, "loss": 0.2436, "step": 85940 }, { "epoch": 2.5093356301367864, "grad_norm": 0.6063661760299914, "learning_rate": 9.086239524195729e-06, "loss": 0.2571, "step": 85945 }, { "epoch": 2.5094816134071034, "grad_norm": 0.6278828158985569, "learning_rate": 9.083536090835361e-06, "loss": 0.2582, "step": 85950 }, { "epoch": 2.509627596677421, "grad_norm": 0.5492118247899098, "learning_rate": 9.080832657474994e-06, "loss": 0.2519, "step": 85955 }, { "epoch": 2.509773579947738, "grad_norm": 0.5450520626689391, "learning_rate": 9.078129224114626e-06, "loss": 0.241, "step": 85960 }, { "epoch": 2.5099195632180553, "grad_norm": 0.5417194548438289, "learning_rate": 9.075425790754258e-06, "loss": 0.2475, "step": 85965 }, { "epoch": 2.5100655464883723, "grad_norm": 0.5771731306669629, "learning_rate": 9.07272235739389e-06, "loss": 0.2504, "step": 85970 }, { "epoch": 2.5102115297586898, "grad_norm": 0.5754122282181346, "learning_rate": 9.070018924033523e-06, "loss": 0.2405, "step": 85975 }, { "epoch": 2.510357513029007, "grad_norm": 0.5502831906924325, "learning_rate": 9.067315490673155e-06, "loss": 0.2446, "step": 85980 }, { "epoch": 2.5105034962993242, "grad_norm": 0.5806123869663637, "learning_rate": 9.064612057312788e-06, "loss": 0.2552, "step": 85985 }, { "epoch": 2.5106494795696412, "grad_norm": 0.5719688773561848, "learning_rate": 9.06190862395242e-06, "loss": 0.2361, "step": 85990 }, { "epoch": 2.5107954628399587, "grad_norm": 0.5778067289086403, "learning_rate": 9.059205190592052e-06, "loss": 0.2555, "step": 85995 }, { "epoch": 2.5109414461102757, "grad_norm": 0.5847358209018422, "learning_rate": 9.056501757231685e-06, "loss": 0.249, "step": 86000 }, { "epoch": 2.511087429380593, "grad_norm": 0.5817060732415537, "learning_rate": 9.053798323871317e-06, "loss": 0.2436, "step": 86005 }, { "epoch": 2.51123341265091, "grad_norm": 0.5753633912775844, "learning_rate": 9.05109489051095e-06, "loss": 0.2516, "step": 86010 }, { "epoch": 2.511379395921227, "grad_norm": 0.5748561881545382, "learning_rate": 9.048391457150582e-06, "loss": 0.248, "step": 86015 }, { "epoch": 2.5115253791915446, "grad_norm": 0.6202208902109069, "learning_rate": 9.045688023790214e-06, "loss": 0.261, "step": 86020 }, { "epoch": 2.511671362461862, "grad_norm": 0.5737699177436718, "learning_rate": 9.042984590429846e-06, "loss": 0.2558, "step": 86025 }, { "epoch": 2.511817345732179, "grad_norm": 0.5959573217734624, "learning_rate": 9.040281157069479e-06, "loss": 0.2449, "step": 86030 }, { "epoch": 2.511963329002496, "grad_norm": 0.5537467608948045, "learning_rate": 9.037577723709111e-06, "loss": 0.2409, "step": 86035 }, { "epoch": 2.5121093122728135, "grad_norm": 0.5474733975016055, "learning_rate": 9.034874290348743e-06, "loss": 0.2452, "step": 86040 }, { "epoch": 2.512255295543131, "grad_norm": 0.6175307007468561, "learning_rate": 9.032170856988375e-06, "loss": 0.2615, "step": 86045 }, { "epoch": 2.512401278813448, "grad_norm": 0.6465109993933208, "learning_rate": 9.029467423628008e-06, "loss": 0.2595, "step": 86050 }, { "epoch": 2.512547262083765, "grad_norm": 0.5595122981042502, "learning_rate": 9.02676399026764e-06, "loss": 0.2397, "step": 86055 }, { "epoch": 2.5126932453540824, "grad_norm": 0.6221650212913707, "learning_rate": 9.024060556907272e-06, "loss": 0.2551, "step": 86060 }, { "epoch": 2.5128392286244, "grad_norm": 0.545302870083665, "learning_rate": 9.021357123546905e-06, "loss": 0.2569, "step": 86065 }, { "epoch": 2.512985211894717, "grad_norm": 0.6479503179659557, "learning_rate": 9.018653690186537e-06, "loss": 0.2621, "step": 86070 }, { "epoch": 2.513131195165034, "grad_norm": 0.5513997696731501, "learning_rate": 9.01595025682617e-06, "loss": 0.2428, "step": 86075 }, { "epoch": 2.5132771784353514, "grad_norm": 0.5600480608997854, "learning_rate": 9.013246823465803e-06, "loss": 0.2448, "step": 86080 }, { "epoch": 2.5134231617056684, "grad_norm": 0.5744415129614642, "learning_rate": 9.010543390105434e-06, "loss": 0.2327, "step": 86085 }, { "epoch": 2.513569144975986, "grad_norm": 0.5344159897434714, "learning_rate": 9.007839956745066e-06, "loss": 0.2544, "step": 86090 }, { "epoch": 2.513715128246303, "grad_norm": 0.6140192895335533, "learning_rate": 9.0051365233847e-06, "loss": 0.2533, "step": 86095 }, { "epoch": 2.5138611115166203, "grad_norm": 0.5810354892278409, "learning_rate": 9.002433090024331e-06, "loss": 0.2406, "step": 86100 }, { "epoch": 2.5140070947869373, "grad_norm": 0.5746143649118565, "learning_rate": 8.999729656663963e-06, "loss": 0.2579, "step": 86105 }, { "epoch": 2.5141530780572547, "grad_norm": 0.6746879340098833, "learning_rate": 8.997026223303597e-06, "loss": 0.275, "step": 86110 }, { "epoch": 2.5142990613275717, "grad_norm": 0.5811994645355499, "learning_rate": 8.994322789943228e-06, "loss": 0.2608, "step": 86115 }, { "epoch": 2.514445044597889, "grad_norm": 0.5527556431928808, "learning_rate": 8.99161935658286e-06, "loss": 0.2416, "step": 86120 }, { "epoch": 2.514591027868206, "grad_norm": 0.5680288944845906, "learning_rate": 8.988915923222494e-06, "loss": 0.2418, "step": 86125 }, { "epoch": 2.5147370111385237, "grad_norm": 0.6334598948674888, "learning_rate": 8.986212489862125e-06, "loss": 0.258, "step": 86130 }, { "epoch": 2.5148829944088407, "grad_norm": 0.5763211395756813, "learning_rate": 8.983509056501757e-06, "loss": 0.2389, "step": 86135 }, { "epoch": 2.515028977679158, "grad_norm": 0.6134630925658177, "learning_rate": 8.98080562314139e-06, "loss": 0.2538, "step": 86140 }, { "epoch": 2.515174960949475, "grad_norm": 0.5557325943481781, "learning_rate": 8.978102189781022e-06, "loss": 0.246, "step": 86145 }, { "epoch": 2.5153209442197926, "grad_norm": 0.5702114024250171, "learning_rate": 8.975398756420654e-06, "loss": 0.2451, "step": 86150 }, { "epoch": 2.5154669274901096, "grad_norm": 0.5820214703714442, "learning_rate": 8.972695323060286e-06, "loss": 0.245, "step": 86155 }, { "epoch": 2.515612910760427, "grad_norm": 0.57433710771716, "learning_rate": 8.969991889699919e-06, "loss": 0.2544, "step": 86160 }, { "epoch": 2.515758894030744, "grad_norm": 0.6054848380387469, "learning_rate": 8.967288456339553e-06, "loss": 0.2378, "step": 86165 }, { "epoch": 2.515904877301061, "grad_norm": 0.5650437500671701, "learning_rate": 8.964585022979183e-06, "loss": 0.2557, "step": 86170 }, { "epoch": 2.5160508605713785, "grad_norm": 0.5816744689754543, "learning_rate": 8.961881589618816e-06, "loss": 0.2399, "step": 86175 }, { "epoch": 2.516196843841696, "grad_norm": 0.5763301297368526, "learning_rate": 8.95917815625845e-06, "loss": 0.2497, "step": 86180 }, { "epoch": 2.516342827112013, "grad_norm": 0.5911891245018092, "learning_rate": 8.95647472289808e-06, "loss": 0.2442, "step": 86185 }, { "epoch": 2.51648881038233, "grad_norm": 0.613264505812746, "learning_rate": 8.953771289537713e-06, "loss": 0.248, "step": 86190 }, { "epoch": 2.5166347936526474, "grad_norm": 0.5692445143350924, "learning_rate": 8.951067856177347e-06, "loss": 0.2487, "step": 86195 }, { "epoch": 2.516780776922965, "grad_norm": 0.5918953602675098, "learning_rate": 8.948364422816977e-06, "loss": 0.2617, "step": 86200 }, { "epoch": 2.516926760193282, "grad_norm": 0.6404929500033796, "learning_rate": 8.94566098945661e-06, "loss": 0.2516, "step": 86205 }, { "epoch": 2.517072743463599, "grad_norm": 0.5785797805418657, "learning_rate": 8.942957556096244e-06, "loss": 0.2498, "step": 86210 }, { "epoch": 2.5172187267339163, "grad_norm": 0.6129204519277383, "learning_rate": 8.940254122735874e-06, "loss": 0.2461, "step": 86215 }, { "epoch": 2.517364710004234, "grad_norm": 0.5753531899524015, "learning_rate": 8.937550689375507e-06, "loss": 0.2349, "step": 86220 }, { "epoch": 2.517510693274551, "grad_norm": 0.5687912416523792, "learning_rate": 8.93484725601514e-06, "loss": 0.2367, "step": 86225 }, { "epoch": 2.517656676544868, "grad_norm": 0.5875463768424488, "learning_rate": 8.932143822654771e-06, "loss": 0.2417, "step": 86230 }, { "epoch": 2.5178026598151853, "grad_norm": 0.5633128222980734, "learning_rate": 8.929440389294404e-06, "loss": 0.2285, "step": 86235 }, { "epoch": 2.5179486430855023, "grad_norm": 0.5758560837753763, "learning_rate": 8.926736955934038e-06, "loss": 0.2638, "step": 86240 }, { "epoch": 2.5180946263558197, "grad_norm": 0.5837438930911584, "learning_rate": 8.924033522573668e-06, "loss": 0.2423, "step": 86245 }, { "epoch": 2.5182406096261367, "grad_norm": 0.5777438711196289, "learning_rate": 8.921330089213302e-06, "loss": 0.2437, "step": 86250 }, { "epoch": 2.518386592896454, "grad_norm": 0.6245350909938392, "learning_rate": 8.918626655852935e-06, "loss": 0.2294, "step": 86255 }, { "epoch": 2.518532576166771, "grad_norm": 0.5926007483138445, "learning_rate": 8.915923222492565e-06, "loss": 0.2425, "step": 86260 }, { "epoch": 2.5186785594370886, "grad_norm": 0.5962145258234589, "learning_rate": 8.913219789132199e-06, "loss": 0.2557, "step": 86265 }, { "epoch": 2.5188245427074056, "grad_norm": 0.6256572875015499, "learning_rate": 8.910516355771831e-06, "loss": 0.2614, "step": 86270 }, { "epoch": 2.518970525977723, "grad_norm": 0.5751052859106011, "learning_rate": 8.907812922411462e-06, "loss": 0.2338, "step": 86275 }, { "epoch": 2.51911650924804, "grad_norm": 0.5904256622832461, "learning_rate": 8.905109489051096e-06, "loss": 0.2462, "step": 86280 }, { "epoch": 2.5192624925183575, "grad_norm": 0.6037740531565176, "learning_rate": 8.902406055690728e-06, "loss": 0.2701, "step": 86285 }, { "epoch": 2.5194084757886746, "grad_norm": 0.630379710568715, "learning_rate": 8.899702622330359e-06, "loss": 0.2602, "step": 86290 }, { "epoch": 2.519554459058992, "grad_norm": 0.5757552790569327, "learning_rate": 8.896999188969993e-06, "loss": 0.252, "step": 86295 }, { "epoch": 2.519700442329309, "grad_norm": 0.6022732720849749, "learning_rate": 8.894295755609625e-06, "loss": 0.2586, "step": 86300 }, { "epoch": 2.519846425599626, "grad_norm": 0.6072217228003044, "learning_rate": 8.891592322249256e-06, "loss": 0.2572, "step": 86305 }, { "epoch": 2.5199924088699435, "grad_norm": 0.5441081101298118, "learning_rate": 8.88888888888889e-06, "loss": 0.2471, "step": 86310 }, { "epoch": 2.520138392140261, "grad_norm": 0.581252536296847, "learning_rate": 8.886185455528522e-06, "loss": 0.2404, "step": 86315 }, { "epoch": 2.520284375410578, "grad_norm": 0.6122018733902355, "learning_rate": 8.883482022168153e-06, "loss": 0.2496, "step": 86320 }, { "epoch": 2.520430358680895, "grad_norm": 0.5988675127361723, "learning_rate": 8.880778588807787e-06, "loss": 0.2548, "step": 86325 }, { "epoch": 2.5205763419512124, "grad_norm": 0.5553388782729113, "learning_rate": 8.878075155447418e-06, "loss": 0.247, "step": 86330 }, { "epoch": 2.52072232522153, "grad_norm": 0.5695124081535857, "learning_rate": 8.875371722087052e-06, "loss": 0.2351, "step": 86335 }, { "epoch": 2.520868308491847, "grad_norm": 0.5938235267566514, "learning_rate": 8.872668288726684e-06, "loss": 0.2517, "step": 86340 }, { "epoch": 2.521014291762164, "grad_norm": 0.5542576503541978, "learning_rate": 8.869964855366315e-06, "loss": 0.2385, "step": 86345 }, { "epoch": 2.5211602750324813, "grad_norm": 0.6214885970256381, "learning_rate": 8.867261422005949e-06, "loss": 0.2484, "step": 86350 }, { "epoch": 2.5213062583027988, "grad_norm": 0.5967695822259743, "learning_rate": 8.864557988645581e-06, "loss": 0.24, "step": 86355 }, { "epoch": 2.5214522415731158, "grad_norm": 0.5722874147742331, "learning_rate": 8.861854555285211e-06, "loss": 0.248, "step": 86360 }, { "epoch": 2.5215982248434328, "grad_norm": 0.5906608587529497, "learning_rate": 8.859151121924845e-06, "loss": 0.2564, "step": 86365 }, { "epoch": 2.52174420811375, "grad_norm": 0.6440619875402877, "learning_rate": 8.856447688564478e-06, "loss": 0.2473, "step": 86370 }, { "epoch": 2.5218901913840672, "grad_norm": 0.5332421893799477, "learning_rate": 8.853744255204108e-06, "loss": 0.2379, "step": 86375 }, { "epoch": 2.5220361746543847, "grad_norm": 0.5585123007705908, "learning_rate": 8.851040821843742e-06, "loss": 0.2472, "step": 86380 }, { "epoch": 2.5221821579247017, "grad_norm": 0.5904465862958165, "learning_rate": 8.848337388483375e-06, "loss": 0.2493, "step": 86385 }, { "epoch": 2.522328141195019, "grad_norm": 0.5924059669693578, "learning_rate": 8.845633955123005e-06, "loss": 0.262, "step": 86390 }, { "epoch": 2.522474124465336, "grad_norm": 0.592149796584846, "learning_rate": 8.84293052176264e-06, "loss": 0.2513, "step": 86395 }, { "epoch": 2.5226201077356536, "grad_norm": 0.5460107300260392, "learning_rate": 8.840227088402272e-06, "loss": 0.2417, "step": 86400 }, { "epoch": 2.5227660910059706, "grad_norm": 0.5797098530443724, "learning_rate": 8.837523655041902e-06, "loss": 0.2424, "step": 86405 }, { "epoch": 2.522912074276288, "grad_norm": 0.5296759144433604, "learning_rate": 8.834820221681536e-06, "loss": 0.24, "step": 86410 }, { "epoch": 2.523058057546605, "grad_norm": 0.5777926198485501, "learning_rate": 8.832116788321169e-06, "loss": 0.2452, "step": 86415 }, { "epoch": 2.5232040408169225, "grad_norm": 0.553851514866953, "learning_rate": 8.829413354960801e-06, "loss": 0.2485, "step": 86420 }, { "epoch": 2.5233500240872395, "grad_norm": 0.58168257315009, "learning_rate": 8.826709921600433e-06, "loss": 0.2609, "step": 86425 }, { "epoch": 2.523496007357557, "grad_norm": 0.5527290167304235, "learning_rate": 8.824006488240066e-06, "loss": 0.2608, "step": 86430 }, { "epoch": 2.523641990627874, "grad_norm": 0.5616479348019354, "learning_rate": 8.821303054879698e-06, "loss": 0.2431, "step": 86435 }, { "epoch": 2.5237879738981914, "grad_norm": 0.5588740810460707, "learning_rate": 8.81859962151933e-06, "loss": 0.2455, "step": 86440 }, { "epoch": 2.5239339571685084, "grad_norm": 0.6254112378205859, "learning_rate": 8.815896188158963e-06, "loss": 0.2471, "step": 86445 }, { "epoch": 2.524079940438826, "grad_norm": 0.5601019342507406, "learning_rate": 8.813192754798595e-06, "loss": 0.2553, "step": 86450 }, { "epoch": 2.524225923709143, "grad_norm": 0.5727696934206628, "learning_rate": 8.810489321438227e-06, "loss": 0.2405, "step": 86455 }, { "epoch": 2.52437190697946, "grad_norm": 0.6830185755704598, "learning_rate": 8.80778588807786e-06, "loss": 0.2467, "step": 86460 }, { "epoch": 2.5245178902497774, "grad_norm": 0.5334732636548761, "learning_rate": 8.805082454717492e-06, "loss": 0.2398, "step": 86465 }, { "epoch": 2.524663873520095, "grad_norm": 0.6161548134192946, "learning_rate": 8.802379021357124e-06, "loss": 0.2621, "step": 86470 }, { "epoch": 2.524809856790412, "grad_norm": 0.6008461795962058, "learning_rate": 8.799675587996756e-06, "loss": 0.2526, "step": 86475 }, { "epoch": 2.524955840060729, "grad_norm": 0.586156471890663, "learning_rate": 8.796972154636389e-06, "loss": 0.2502, "step": 86480 }, { "epoch": 2.5251018233310463, "grad_norm": 0.5565508351308279, "learning_rate": 8.794268721276021e-06, "loss": 0.2394, "step": 86485 }, { "epoch": 2.5252478066013637, "grad_norm": 0.6368435780405872, "learning_rate": 8.791565287915653e-06, "loss": 0.2567, "step": 86490 }, { "epoch": 2.5253937898716807, "grad_norm": 0.6232632786133967, "learning_rate": 8.788861854555286e-06, "loss": 0.2488, "step": 86495 }, { "epoch": 2.5255397731419977, "grad_norm": 0.5800646581159179, "learning_rate": 8.786158421194918e-06, "loss": 0.2535, "step": 86500 }, { "epoch": 2.525685756412315, "grad_norm": 0.5661120745839874, "learning_rate": 8.78345498783455e-06, "loss": 0.245, "step": 86505 }, { "epoch": 2.5258317396826326, "grad_norm": 0.5669163908147267, "learning_rate": 8.780751554474183e-06, "loss": 0.2443, "step": 86510 }, { "epoch": 2.5259777229529496, "grad_norm": 0.5423529427956002, "learning_rate": 8.778048121113815e-06, "loss": 0.2325, "step": 86515 }, { "epoch": 2.5261237062232667, "grad_norm": 0.6173983598394794, "learning_rate": 8.775344687753447e-06, "loss": 0.2572, "step": 86520 }, { "epoch": 2.526269689493584, "grad_norm": 0.5796057761572623, "learning_rate": 8.77264125439308e-06, "loss": 0.2484, "step": 86525 }, { "epoch": 2.526415672763901, "grad_norm": 0.5126571793507219, "learning_rate": 8.769937821032712e-06, "loss": 0.2412, "step": 86530 }, { "epoch": 2.5265616560342186, "grad_norm": 0.5786774527587815, "learning_rate": 8.767234387672344e-06, "loss": 0.2665, "step": 86535 }, { "epoch": 2.5267076393045356, "grad_norm": 0.5354461088524527, "learning_rate": 8.764530954311977e-06, "loss": 0.2404, "step": 86540 }, { "epoch": 2.526853622574853, "grad_norm": 0.601127354197125, "learning_rate": 8.761827520951609e-06, "loss": 0.2586, "step": 86545 }, { "epoch": 2.52699960584517, "grad_norm": 0.6031662249819092, "learning_rate": 8.759124087591241e-06, "loss": 0.2584, "step": 86550 }, { "epoch": 2.5271455891154875, "grad_norm": 0.5426700876115333, "learning_rate": 8.756420654230874e-06, "loss": 0.2503, "step": 86555 }, { "epoch": 2.5272915723858045, "grad_norm": 0.5607792289391463, "learning_rate": 8.753717220870506e-06, "loss": 0.2421, "step": 86560 }, { "epoch": 2.527437555656122, "grad_norm": 0.5392612229733045, "learning_rate": 8.751013787510138e-06, "loss": 0.2402, "step": 86565 }, { "epoch": 2.527583538926439, "grad_norm": 0.5641439667760689, "learning_rate": 8.74831035414977e-06, "loss": 0.2502, "step": 86570 }, { "epoch": 2.5277295221967564, "grad_norm": 0.6185372362101271, "learning_rate": 8.745606920789403e-06, "loss": 0.2379, "step": 86575 }, { "epoch": 2.5278755054670734, "grad_norm": 0.5743942699251333, "learning_rate": 8.742903487429035e-06, "loss": 0.2506, "step": 86580 }, { "epoch": 2.528021488737391, "grad_norm": 0.6315168341458894, "learning_rate": 8.740200054068667e-06, "loss": 0.2421, "step": 86585 }, { "epoch": 2.528167472007708, "grad_norm": 0.6039938822595597, "learning_rate": 8.7374966207083e-06, "loss": 0.2558, "step": 86590 }, { "epoch": 2.528313455278025, "grad_norm": 0.586688583266225, "learning_rate": 8.734793187347932e-06, "loss": 0.2572, "step": 86595 }, { "epoch": 2.5284594385483423, "grad_norm": 0.6084485221240791, "learning_rate": 8.732089753987564e-06, "loss": 0.2504, "step": 86600 }, { "epoch": 2.5286054218186598, "grad_norm": 0.5619233900801766, "learning_rate": 8.729386320627197e-06, "loss": 0.2587, "step": 86605 }, { "epoch": 2.528751405088977, "grad_norm": 0.5902729264736963, "learning_rate": 8.726682887266829e-06, "loss": 0.2435, "step": 86610 }, { "epoch": 2.528897388359294, "grad_norm": 0.6052413251476599, "learning_rate": 8.723979453906461e-06, "loss": 0.2529, "step": 86615 }, { "epoch": 2.5290433716296112, "grad_norm": 0.5709350547717427, "learning_rate": 8.721276020546094e-06, "loss": 0.228, "step": 86620 }, { "epoch": 2.5291893548999287, "grad_norm": 0.657433155507196, "learning_rate": 8.718572587185726e-06, "loss": 0.2359, "step": 86625 }, { "epoch": 2.5293353381702457, "grad_norm": 0.5796280133300732, "learning_rate": 8.715869153825358e-06, "loss": 0.2336, "step": 86630 }, { "epoch": 2.5294813214405627, "grad_norm": 0.6124665168787402, "learning_rate": 8.71316572046499e-06, "loss": 0.2459, "step": 86635 }, { "epoch": 2.52962730471088, "grad_norm": 0.5987739584078892, "learning_rate": 8.710462287104623e-06, "loss": 0.2578, "step": 86640 }, { "epoch": 2.5297732879811976, "grad_norm": 0.5724964608958742, "learning_rate": 8.707758853744255e-06, "loss": 0.2497, "step": 86645 }, { "epoch": 2.5299192712515146, "grad_norm": 0.5873875147029585, "learning_rate": 8.705055420383888e-06, "loss": 0.2582, "step": 86650 }, { "epoch": 2.5300652545218316, "grad_norm": 0.5570636250096375, "learning_rate": 8.70235198702352e-06, "loss": 0.2521, "step": 86655 }, { "epoch": 2.530211237792149, "grad_norm": 0.5515927779152588, "learning_rate": 8.699648553663152e-06, "loss": 0.2396, "step": 86660 }, { "epoch": 2.530357221062466, "grad_norm": 0.5554142097789124, "learning_rate": 8.696945120302785e-06, "loss": 0.2534, "step": 86665 }, { "epoch": 2.5305032043327835, "grad_norm": 0.6060617080210129, "learning_rate": 8.694241686942417e-06, "loss": 0.2353, "step": 86670 }, { "epoch": 2.5306491876031005, "grad_norm": 0.5392714118919142, "learning_rate": 8.691538253582051e-06, "loss": 0.2508, "step": 86675 }, { "epoch": 2.530795170873418, "grad_norm": 0.560083661324521, "learning_rate": 8.688834820221681e-06, "loss": 0.2437, "step": 86680 }, { "epoch": 2.530941154143735, "grad_norm": 0.5605319408370734, "learning_rate": 8.686131386861314e-06, "loss": 0.2424, "step": 86685 }, { "epoch": 2.5310871374140524, "grad_norm": 0.5589654855459262, "learning_rate": 8.683427953500948e-06, "loss": 0.2511, "step": 86690 }, { "epoch": 2.5312331206843695, "grad_norm": 0.5628216623699062, "learning_rate": 8.680724520140578e-06, "loss": 0.2481, "step": 86695 }, { "epoch": 2.531379103954687, "grad_norm": 0.6384044964731216, "learning_rate": 8.67802108678021e-06, "loss": 0.2617, "step": 86700 }, { "epoch": 2.531525087225004, "grad_norm": 0.5933278963567827, "learning_rate": 8.675317653419845e-06, "loss": 0.2423, "step": 86705 }, { "epoch": 2.5316710704953214, "grad_norm": 0.6199062940008879, "learning_rate": 8.672614220059475e-06, "loss": 0.2413, "step": 86710 }, { "epoch": 2.5318170537656384, "grad_norm": 0.5542157628528159, "learning_rate": 8.669910786699108e-06, "loss": 0.2442, "step": 86715 }, { "epoch": 2.531963037035956, "grad_norm": 0.5725525568988143, "learning_rate": 8.667207353338742e-06, "loss": 0.2462, "step": 86720 }, { "epoch": 2.532109020306273, "grad_norm": 0.5999674625505784, "learning_rate": 8.664503919978372e-06, "loss": 0.2499, "step": 86725 }, { "epoch": 2.5322550035765903, "grad_norm": 0.5856257933796775, "learning_rate": 8.661800486618005e-06, "loss": 0.2503, "step": 86730 }, { "epoch": 2.5324009868469073, "grad_norm": 0.5632982328568672, "learning_rate": 8.659097053257639e-06, "loss": 0.2543, "step": 86735 }, { "epoch": 2.5325469701172247, "grad_norm": 0.6121799342286252, "learning_rate": 8.65639361989727e-06, "loss": 0.249, "step": 86740 }, { "epoch": 2.5326929533875417, "grad_norm": 0.6115613505296916, "learning_rate": 8.653690186536902e-06, "loss": 0.2503, "step": 86745 }, { "epoch": 2.5328389366578588, "grad_norm": 0.5857893093502898, "learning_rate": 8.650986753176536e-06, "loss": 0.2431, "step": 86750 }, { "epoch": 2.532984919928176, "grad_norm": 0.558288031780159, "learning_rate": 8.648283319816166e-06, "loss": 0.2465, "step": 86755 }, { "epoch": 2.5331309031984937, "grad_norm": 0.6188536557147968, "learning_rate": 8.6455798864558e-06, "loss": 0.2335, "step": 86760 }, { "epoch": 2.5332768864688107, "grad_norm": 0.550484760872053, "learning_rate": 8.642876453095433e-06, "loss": 0.2589, "step": 86765 }, { "epoch": 2.5334228697391277, "grad_norm": 0.6024291961205173, "learning_rate": 8.640173019735063e-06, "loss": 0.253, "step": 86770 }, { "epoch": 2.533568853009445, "grad_norm": 0.5839884222530565, "learning_rate": 8.637469586374697e-06, "loss": 0.2546, "step": 86775 }, { "epoch": 2.5337148362797626, "grad_norm": 0.5898761545737768, "learning_rate": 8.63476615301433e-06, "loss": 0.2468, "step": 86780 }, { "epoch": 2.5338608195500796, "grad_norm": 0.6315995423821618, "learning_rate": 8.63206271965396e-06, "loss": 0.2544, "step": 86785 }, { "epoch": 2.5340068028203966, "grad_norm": 0.5418931175379088, "learning_rate": 8.629359286293594e-06, "loss": 0.2485, "step": 86790 }, { "epoch": 2.534152786090714, "grad_norm": 0.5787276344626509, "learning_rate": 8.626655852933225e-06, "loss": 0.2487, "step": 86795 }, { "epoch": 2.5342987693610315, "grad_norm": 0.5908495093612116, "learning_rate": 8.623952419572857e-06, "loss": 0.2448, "step": 86800 }, { "epoch": 2.5344447526313485, "grad_norm": 0.5586406457789639, "learning_rate": 8.621248986212491e-06, "loss": 0.2497, "step": 86805 }, { "epoch": 2.5345907359016655, "grad_norm": 0.5799036731443419, "learning_rate": 8.618545552852122e-06, "loss": 0.2475, "step": 86810 }, { "epoch": 2.534736719171983, "grad_norm": 0.585901458907937, "learning_rate": 8.615842119491754e-06, "loss": 0.2312, "step": 86815 }, { "epoch": 2.5348827024423, "grad_norm": 0.5751146503216816, "learning_rate": 8.613138686131388e-06, "loss": 0.2409, "step": 86820 }, { "epoch": 2.5350286857126174, "grad_norm": 0.5760883140294017, "learning_rate": 8.610435252771019e-06, "loss": 0.2427, "step": 86825 }, { "epoch": 2.5351746689829344, "grad_norm": 0.6442202933454008, "learning_rate": 8.607731819410651e-06, "loss": 0.2499, "step": 86830 }, { "epoch": 2.535320652253252, "grad_norm": 0.5468544502892055, "learning_rate": 8.605028386050285e-06, "loss": 0.2362, "step": 86835 }, { "epoch": 2.535466635523569, "grad_norm": 0.587010634296836, "learning_rate": 8.602324952689916e-06, "loss": 0.2406, "step": 86840 }, { "epoch": 2.5356126187938863, "grad_norm": 0.5364113048413758, "learning_rate": 8.59962151932955e-06, "loss": 0.2451, "step": 86845 }, { "epoch": 2.5357586020642033, "grad_norm": 0.5970613843674435, "learning_rate": 8.596918085969182e-06, "loss": 0.2269, "step": 86850 }, { "epoch": 2.535904585334521, "grad_norm": 0.5762030962831798, "learning_rate": 8.594214652608813e-06, "loss": 0.2405, "step": 86855 }, { "epoch": 2.536050568604838, "grad_norm": 0.579819236959443, "learning_rate": 8.591511219248447e-06, "loss": 0.2506, "step": 86860 }, { "epoch": 2.5361965518751552, "grad_norm": 0.6110314611877767, "learning_rate": 8.588807785888079e-06, "loss": 0.2651, "step": 86865 }, { "epoch": 2.5363425351454723, "grad_norm": 0.5899626293415028, "learning_rate": 8.58610435252771e-06, "loss": 0.262, "step": 86870 }, { "epoch": 2.5364885184157897, "grad_norm": 0.5932990166165223, "learning_rate": 8.583400919167344e-06, "loss": 0.2406, "step": 86875 }, { "epoch": 2.5366345016861067, "grad_norm": 0.5780183976263692, "learning_rate": 8.580697485806976e-06, "loss": 0.256, "step": 86880 }, { "epoch": 2.5367804849564237, "grad_norm": 0.6136098495644424, "learning_rate": 8.577994052446606e-06, "loss": 0.2558, "step": 86885 }, { "epoch": 2.536926468226741, "grad_norm": 0.577934395823002, "learning_rate": 8.57529061908624e-06, "loss": 0.2377, "step": 86890 }, { "epoch": 2.5370724514970586, "grad_norm": 0.5814839099730212, "learning_rate": 8.572587185725873e-06, "loss": 0.2443, "step": 86895 }, { "epoch": 2.5372184347673756, "grad_norm": 0.5926973023450073, "learning_rate": 8.569883752365503e-06, "loss": 0.247, "step": 86900 }, { "epoch": 2.5373644180376926, "grad_norm": 0.5722816446285695, "learning_rate": 8.567180319005137e-06, "loss": 0.236, "step": 86905 }, { "epoch": 2.53751040130801, "grad_norm": 0.6119282478459117, "learning_rate": 8.56447688564477e-06, "loss": 0.2435, "step": 86910 }, { "epoch": 2.5376563845783275, "grad_norm": 0.568696291087647, "learning_rate": 8.5617734522844e-06, "loss": 0.236, "step": 86915 }, { "epoch": 2.5378023678486445, "grad_norm": 0.5554266891869805, "learning_rate": 8.559070018924034e-06, "loss": 0.2526, "step": 86920 }, { "epoch": 2.5379483511189616, "grad_norm": 0.5862809168905905, "learning_rate": 8.556366585563667e-06, "loss": 0.2299, "step": 86925 }, { "epoch": 2.538094334389279, "grad_norm": 0.6193415591535884, "learning_rate": 8.553663152203299e-06, "loss": 0.2515, "step": 86930 }, { "epoch": 2.5382403176595965, "grad_norm": 0.6113284192893644, "learning_rate": 8.550959718842931e-06, "loss": 0.2481, "step": 86935 }, { "epoch": 2.5383863009299135, "grad_norm": 0.5626326511349188, "learning_rate": 8.548256285482564e-06, "loss": 0.2535, "step": 86940 }, { "epoch": 2.5385322842002305, "grad_norm": 0.6297061287314747, "learning_rate": 8.545552852122196e-06, "loss": 0.2644, "step": 86945 }, { "epoch": 2.538678267470548, "grad_norm": 0.5775832079154655, "learning_rate": 8.542849418761828e-06, "loss": 0.2489, "step": 86950 }, { "epoch": 2.5388242507408654, "grad_norm": 0.5889539310006423, "learning_rate": 8.54014598540146e-06, "loss": 0.2541, "step": 86955 }, { "epoch": 2.5389702340111824, "grad_norm": 0.6237536238637261, "learning_rate": 8.537442552041093e-06, "loss": 0.2645, "step": 86960 }, { "epoch": 2.5391162172814994, "grad_norm": 0.5864474597361453, "learning_rate": 8.534739118680725e-06, "loss": 0.249, "step": 86965 }, { "epoch": 2.539262200551817, "grad_norm": 0.5850934554818294, "learning_rate": 8.532035685320358e-06, "loss": 0.2441, "step": 86970 }, { "epoch": 2.539408183822134, "grad_norm": 0.6441381756275231, "learning_rate": 8.52933225195999e-06, "loss": 0.2566, "step": 86975 }, { "epoch": 2.5395541670924513, "grad_norm": 0.5611986340128303, "learning_rate": 8.526628818599622e-06, "loss": 0.2535, "step": 86980 }, { "epoch": 2.5397001503627683, "grad_norm": 0.551507170669307, "learning_rate": 8.523925385239253e-06, "loss": 0.2447, "step": 86985 }, { "epoch": 2.5398461336330858, "grad_norm": 0.56100306115777, "learning_rate": 8.521221951878887e-06, "loss": 0.2403, "step": 86990 }, { "epoch": 2.5399921169034028, "grad_norm": 0.5514172763294974, "learning_rate": 8.518518518518519e-06, "loss": 0.2631, "step": 86995 }, { "epoch": 2.54013810017372, "grad_norm": 0.6358807321764097, "learning_rate": 8.51581508515815e-06, "loss": 0.2507, "step": 87000 }, { "epoch": 2.540284083444037, "grad_norm": 0.5414871851970072, "learning_rate": 8.513111651797784e-06, "loss": 0.2435, "step": 87005 }, { "epoch": 2.5404300667143547, "grad_norm": 0.5300933154365898, "learning_rate": 8.510408218437416e-06, "loss": 0.256, "step": 87010 }, { "epoch": 2.5405760499846717, "grad_norm": 0.6006729546154427, "learning_rate": 8.507704785077048e-06, "loss": 0.2457, "step": 87015 }, { "epoch": 2.540722033254989, "grad_norm": 0.5803541963230792, "learning_rate": 8.50500135171668e-06, "loss": 0.2358, "step": 87020 }, { "epoch": 2.540868016525306, "grad_norm": 0.5995617873858727, "learning_rate": 8.502297918356313e-06, "loss": 0.2475, "step": 87025 }, { "epoch": 2.5410139997956236, "grad_norm": 0.5618319254194356, "learning_rate": 8.499594484995945e-06, "loss": 0.2491, "step": 87030 }, { "epoch": 2.5411599830659406, "grad_norm": 0.6042497209901025, "learning_rate": 8.496891051635578e-06, "loss": 0.2517, "step": 87035 }, { "epoch": 2.5413059663362576, "grad_norm": 0.5716236180081565, "learning_rate": 8.49418761827521e-06, "loss": 0.2732, "step": 87040 }, { "epoch": 2.541451949606575, "grad_norm": 0.5352782080663184, "learning_rate": 8.491484184914842e-06, "loss": 0.2379, "step": 87045 }, { "epoch": 2.5415979328768925, "grad_norm": 0.5671096076214084, "learning_rate": 8.488780751554475e-06, "loss": 0.2522, "step": 87050 }, { "epoch": 2.5417439161472095, "grad_norm": 0.5374334174475845, "learning_rate": 8.486077318194107e-06, "loss": 0.2244, "step": 87055 }, { "epoch": 2.5418898994175265, "grad_norm": 0.5721000452696362, "learning_rate": 8.48337388483374e-06, "loss": 0.2377, "step": 87060 }, { "epoch": 2.542035882687844, "grad_norm": 0.5695268109211856, "learning_rate": 8.480670451473372e-06, "loss": 0.2495, "step": 87065 }, { "epoch": 2.5421818659581614, "grad_norm": 0.6144275570238404, "learning_rate": 8.477967018113004e-06, "loss": 0.2632, "step": 87070 }, { "epoch": 2.5423278492284784, "grad_norm": 0.5937448006547729, "learning_rate": 8.475263584752636e-06, "loss": 0.2435, "step": 87075 }, { "epoch": 2.5424738324987954, "grad_norm": 0.638407623653489, "learning_rate": 8.472560151392269e-06, "loss": 0.2558, "step": 87080 }, { "epoch": 2.542619815769113, "grad_norm": 0.5698878444530957, "learning_rate": 8.469856718031901e-06, "loss": 0.2492, "step": 87085 }, { "epoch": 2.5427657990394303, "grad_norm": 0.593930414272736, "learning_rate": 8.467153284671533e-06, "loss": 0.2711, "step": 87090 }, { "epoch": 2.5429117823097473, "grad_norm": 0.546119879389147, "learning_rate": 8.464449851311165e-06, "loss": 0.2485, "step": 87095 }, { "epoch": 2.5430577655800644, "grad_norm": 0.6063041729689596, "learning_rate": 8.461746417950798e-06, "loss": 0.2678, "step": 87100 }, { "epoch": 2.543203748850382, "grad_norm": 0.6158480384638656, "learning_rate": 8.45904298459043e-06, "loss": 0.2443, "step": 87105 }, { "epoch": 2.543349732120699, "grad_norm": 0.5670041006018022, "learning_rate": 8.456339551230062e-06, "loss": 0.2488, "step": 87110 }, { "epoch": 2.5434957153910163, "grad_norm": 0.5913395660244355, "learning_rate": 8.453636117869695e-06, "loss": 0.2628, "step": 87115 }, { "epoch": 2.5436416986613333, "grad_norm": 0.623221584854046, "learning_rate": 8.450932684509327e-06, "loss": 0.2583, "step": 87120 }, { "epoch": 2.5437876819316507, "grad_norm": 0.6000122023090396, "learning_rate": 8.44822925114896e-06, "loss": 0.2602, "step": 87125 }, { "epoch": 2.5439336652019677, "grad_norm": 0.5912179219602417, "learning_rate": 8.445525817788592e-06, "loss": 0.2485, "step": 87130 }, { "epoch": 2.544079648472285, "grad_norm": 0.6062436443329253, "learning_rate": 8.442822384428224e-06, "loss": 0.2424, "step": 87135 }, { "epoch": 2.544225631742602, "grad_norm": 0.5882623413589163, "learning_rate": 8.440118951067856e-06, "loss": 0.2542, "step": 87140 }, { "epoch": 2.5443716150129196, "grad_norm": 0.6168553152223107, "learning_rate": 8.437415517707489e-06, "loss": 0.2453, "step": 87145 }, { "epoch": 2.5445175982832366, "grad_norm": 0.6002836607160659, "learning_rate": 8.434712084347121e-06, "loss": 0.2702, "step": 87150 }, { "epoch": 2.544663581553554, "grad_norm": 0.5991624571368783, "learning_rate": 8.432008650986753e-06, "loss": 0.2533, "step": 87155 }, { "epoch": 2.544809564823871, "grad_norm": 0.5638942659128969, "learning_rate": 8.429305217626386e-06, "loss": 0.2468, "step": 87160 }, { "epoch": 2.5449555480941886, "grad_norm": 0.5938665343845969, "learning_rate": 8.426601784266018e-06, "loss": 0.2411, "step": 87165 }, { "epoch": 2.5451015313645056, "grad_norm": 0.5761687672149022, "learning_rate": 8.423898350905652e-06, "loss": 0.2468, "step": 87170 }, { "epoch": 2.5452475146348226, "grad_norm": 0.6220892250982903, "learning_rate": 8.421194917545283e-06, "loss": 0.2509, "step": 87175 }, { "epoch": 2.54539349790514, "grad_norm": 0.5834779429593182, "learning_rate": 8.418491484184915e-06, "loss": 0.2327, "step": 87180 }, { "epoch": 2.5455394811754575, "grad_norm": 0.5904495026215033, "learning_rate": 8.415788050824549e-06, "loss": 0.2492, "step": 87185 }, { "epoch": 2.5456854644457745, "grad_norm": 0.5960092977526086, "learning_rate": 8.41308461746418e-06, "loss": 0.2392, "step": 87190 }, { "epoch": 2.5458314477160915, "grad_norm": 0.6128226284038271, "learning_rate": 8.410381184103812e-06, "loss": 0.2463, "step": 87195 }, { "epoch": 2.545977430986409, "grad_norm": 0.5917888698902194, "learning_rate": 8.407677750743446e-06, "loss": 0.2434, "step": 87200 }, { "epoch": 2.5461234142567264, "grad_norm": 0.5818564251290053, "learning_rate": 8.404974317383076e-06, "loss": 0.2723, "step": 87205 }, { "epoch": 2.5462693975270434, "grad_norm": 0.575296436944066, "learning_rate": 8.402270884022709e-06, "loss": 0.2429, "step": 87210 }, { "epoch": 2.5464153807973604, "grad_norm": 0.6120158597652671, "learning_rate": 8.399567450662343e-06, "loss": 0.242, "step": 87215 }, { "epoch": 2.546561364067678, "grad_norm": 0.5544389533354037, "learning_rate": 8.396864017301973e-06, "loss": 0.2568, "step": 87220 }, { "epoch": 2.5467073473379953, "grad_norm": 0.5329054909575134, "learning_rate": 8.394160583941606e-06, "loss": 0.2386, "step": 87225 }, { "epoch": 2.5468533306083123, "grad_norm": 0.5975761340791405, "learning_rate": 8.39145715058124e-06, "loss": 0.2509, "step": 87230 }, { "epoch": 2.5469993138786293, "grad_norm": 0.5768211804174407, "learning_rate": 8.38875371722087e-06, "loss": 0.2598, "step": 87235 }, { "epoch": 2.5471452971489468, "grad_norm": 0.5478473714758733, "learning_rate": 8.386050283860503e-06, "loss": 0.2436, "step": 87240 }, { "epoch": 2.5472912804192642, "grad_norm": 0.5420088257532218, "learning_rate": 8.383346850500135e-06, "loss": 0.2437, "step": 87245 }, { "epoch": 2.5474372636895812, "grad_norm": 0.5866120962398206, "learning_rate": 8.380643417139767e-06, "loss": 0.2593, "step": 87250 }, { "epoch": 2.5475832469598982, "grad_norm": 0.5614701884062695, "learning_rate": 8.377939983779401e-06, "loss": 0.2436, "step": 87255 }, { "epoch": 2.5477292302302157, "grad_norm": 0.5742781568635861, "learning_rate": 8.375236550419032e-06, "loss": 0.2324, "step": 87260 }, { "epoch": 2.5478752135005327, "grad_norm": 0.5879305101969993, "learning_rate": 8.372533117058664e-06, "loss": 0.2512, "step": 87265 }, { "epoch": 2.54802119677085, "grad_norm": 0.6121416281321957, "learning_rate": 8.369829683698298e-06, "loss": 0.2465, "step": 87270 }, { "epoch": 2.548167180041167, "grad_norm": 0.5702626208935011, "learning_rate": 8.367126250337929e-06, "loss": 0.2299, "step": 87275 }, { "epoch": 2.5483131633114846, "grad_norm": 0.5557989935987289, "learning_rate": 8.364422816977561e-06, "loss": 0.2511, "step": 87280 }, { "epoch": 2.5484591465818016, "grad_norm": 0.5523121425663653, "learning_rate": 8.361719383617195e-06, "loss": 0.2495, "step": 87285 }, { "epoch": 2.548605129852119, "grad_norm": 0.5588928342967323, "learning_rate": 8.359015950256826e-06, "loss": 0.2409, "step": 87290 }, { "epoch": 2.548751113122436, "grad_norm": 0.54504316719438, "learning_rate": 8.356312516896458e-06, "loss": 0.2465, "step": 87295 }, { "epoch": 2.5488970963927535, "grad_norm": 0.5885896085037035, "learning_rate": 8.353609083536092e-06, "loss": 0.2395, "step": 87300 }, { "epoch": 2.5490430796630705, "grad_norm": 0.5818734998900055, "learning_rate": 8.350905650175723e-06, "loss": 0.2336, "step": 87305 }, { "epoch": 2.549189062933388, "grad_norm": 0.6010653067387339, "learning_rate": 8.348202216815355e-06, "loss": 0.2624, "step": 87310 }, { "epoch": 2.549335046203705, "grad_norm": 0.5968145545435684, "learning_rate": 8.345498783454989e-06, "loss": 0.2373, "step": 87315 }, { "epoch": 2.5494810294740224, "grad_norm": 0.5935469609864918, "learning_rate": 8.34279535009462e-06, "loss": 0.2496, "step": 87320 }, { "epoch": 2.5496270127443394, "grad_norm": 0.5402260364132474, "learning_rate": 8.340091916734252e-06, "loss": 0.2458, "step": 87325 }, { "epoch": 2.5497729960146565, "grad_norm": 0.5869353708976119, "learning_rate": 8.337388483373886e-06, "loss": 0.2714, "step": 87330 }, { "epoch": 2.549918979284974, "grad_norm": 0.5558621188767338, "learning_rate": 8.334685050013517e-06, "loss": 0.2424, "step": 87335 }, { "epoch": 2.5500649625552914, "grad_norm": 0.5931915773367512, "learning_rate": 8.33198161665315e-06, "loss": 0.2513, "step": 87340 }, { "epoch": 2.5502109458256084, "grad_norm": 0.5385067164613867, "learning_rate": 8.329278183292783e-06, "loss": 0.2409, "step": 87345 }, { "epoch": 2.5503569290959254, "grad_norm": 0.5694230636181494, "learning_rate": 8.326574749932414e-06, "loss": 0.2502, "step": 87350 }, { "epoch": 2.550502912366243, "grad_norm": 0.5633121617092275, "learning_rate": 8.323871316572048e-06, "loss": 0.2424, "step": 87355 }, { "epoch": 2.5506488956365603, "grad_norm": 0.618358974515769, "learning_rate": 8.32116788321168e-06, "loss": 0.2638, "step": 87360 }, { "epoch": 2.5507948789068773, "grad_norm": 0.548113830609199, "learning_rate": 8.31846444985131e-06, "loss": 0.2288, "step": 87365 }, { "epoch": 2.5509408621771943, "grad_norm": 0.6245362180083915, "learning_rate": 8.315761016490945e-06, "loss": 0.2362, "step": 87370 }, { "epoch": 2.5510868454475117, "grad_norm": 0.5797182022756472, "learning_rate": 8.313057583130577e-06, "loss": 0.2561, "step": 87375 }, { "epoch": 2.551232828717829, "grad_norm": 0.6064469489218588, "learning_rate": 8.310354149770208e-06, "loss": 0.2422, "step": 87380 }, { "epoch": 2.551378811988146, "grad_norm": 0.5576597205747106, "learning_rate": 8.307650716409842e-06, "loss": 0.2439, "step": 87385 }, { "epoch": 2.551524795258463, "grad_norm": 0.5577272562705196, "learning_rate": 8.304947283049474e-06, "loss": 0.2406, "step": 87390 }, { "epoch": 2.5516707785287807, "grad_norm": 0.5693317136047541, "learning_rate": 8.302243849689105e-06, "loss": 0.239, "step": 87395 }, { "epoch": 2.5518167617990977, "grad_norm": 0.6435936719184356, "learning_rate": 8.299540416328739e-06, "loss": 0.2548, "step": 87400 }, { "epoch": 2.551962745069415, "grad_norm": 0.5810511160870039, "learning_rate": 8.296836982968371e-06, "loss": 0.249, "step": 87405 }, { "epoch": 2.552108728339732, "grad_norm": 0.6327461066708622, "learning_rate": 8.294133549608001e-06, "loss": 0.2563, "step": 87410 }, { "epoch": 2.5522547116100496, "grad_norm": 0.5595786893287668, "learning_rate": 8.291430116247635e-06, "loss": 0.2341, "step": 87415 }, { "epoch": 2.5524006948803666, "grad_norm": 0.5561830537809026, "learning_rate": 8.288726682887268e-06, "loss": 0.2313, "step": 87420 }, { "epoch": 2.552546678150684, "grad_norm": 0.5761329582264455, "learning_rate": 8.2860232495269e-06, "loss": 0.2503, "step": 87425 }, { "epoch": 2.552692661421001, "grad_norm": 0.5947218492057609, "learning_rate": 8.283319816166532e-06, "loss": 0.253, "step": 87430 }, { "epoch": 2.5528386446913185, "grad_norm": 0.5773092360809488, "learning_rate": 8.280616382806163e-06, "loss": 0.2348, "step": 87435 }, { "epoch": 2.5529846279616355, "grad_norm": 0.6110203553867146, "learning_rate": 8.277912949445797e-06, "loss": 0.2501, "step": 87440 }, { "epoch": 2.553130611231953, "grad_norm": 0.5819097719232044, "learning_rate": 8.27520951608543e-06, "loss": 0.2612, "step": 87445 }, { "epoch": 2.55327659450227, "grad_norm": 0.5677980667303942, "learning_rate": 8.27250608272506e-06, "loss": 0.2563, "step": 87450 }, { "epoch": 2.5534225777725874, "grad_norm": 0.5365694128891967, "learning_rate": 8.269802649364694e-06, "loss": 0.2488, "step": 87455 }, { "epoch": 2.5535685610429044, "grad_norm": 0.5325568023022804, "learning_rate": 8.267099216004326e-06, "loss": 0.2503, "step": 87460 }, { "epoch": 2.553714544313222, "grad_norm": 0.5814517535840323, "learning_rate": 8.264395782643957e-06, "loss": 0.2361, "step": 87465 }, { "epoch": 2.553860527583539, "grad_norm": 0.6054925875218392, "learning_rate": 8.261692349283591e-06, "loss": 0.2505, "step": 87470 }, { "epoch": 2.5540065108538563, "grad_norm": 0.558275993732063, "learning_rate": 8.258988915923223e-06, "loss": 0.2477, "step": 87475 }, { "epoch": 2.5541524941241733, "grad_norm": 0.5621737508594469, "learning_rate": 8.256285482562854e-06, "loss": 0.2595, "step": 87480 }, { "epoch": 2.5542984773944903, "grad_norm": 0.5668371736637031, "learning_rate": 8.253582049202488e-06, "loss": 0.2644, "step": 87485 }, { "epoch": 2.554444460664808, "grad_norm": 0.5658443090568397, "learning_rate": 8.25087861584212e-06, "loss": 0.2427, "step": 87490 }, { "epoch": 2.5545904439351252, "grad_norm": 0.6160171165559705, "learning_rate": 8.248175182481751e-06, "loss": 0.2373, "step": 87495 }, { "epoch": 2.5547364272054423, "grad_norm": 0.5751674158385793, "learning_rate": 8.245471749121385e-06, "loss": 0.2311, "step": 87500 }, { "epoch": 2.5548824104757593, "grad_norm": 0.5600613340347043, "learning_rate": 8.242768315761017e-06, "loss": 0.2441, "step": 87505 }, { "epoch": 2.5550283937460767, "grad_norm": 0.616646870995407, "learning_rate": 8.24006488240065e-06, "loss": 0.2323, "step": 87510 }, { "epoch": 2.555174377016394, "grad_norm": 0.5790040905164403, "learning_rate": 8.237361449040282e-06, "loss": 0.2572, "step": 87515 }, { "epoch": 2.555320360286711, "grad_norm": 0.6065942134759783, "learning_rate": 8.234658015679914e-06, "loss": 0.2545, "step": 87520 }, { "epoch": 2.555466343557028, "grad_norm": 0.6275627169220517, "learning_rate": 8.231954582319546e-06, "loss": 0.252, "step": 87525 }, { "epoch": 2.5556123268273456, "grad_norm": 0.6061982329317046, "learning_rate": 8.229251148959179e-06, "loss": 0.2349, "step": 87530 }, { "epoch": 2.555758310097663, "grad_norm": 0.584585686800202, "learning_rate": 8.226547715598811e-06, "loss": 0.2504, "step": 87535 }, { "epoch": 2.55590429336798, "grad_norm": 0.6134825548608724, "learning_rate": 8.223844282238443e-06, "loss": 0.2378, "step": 87540 }, { "epoch": 2.556050276638297, "grad_norm": 0.5958980132926494, "learning_rate": 8.221140848878076e-06, "loss": 0.2446, "step": 87545 }, { "epoch": 2.5561962599086145, "grad_norm": 0.578386107394197, "learning_rate": 8.218437415517708e-06, "loss": 0.2459, "step": 87550 }, { "epoch": 2.5563422431789316, "grad_norm": 0.6185530502001966, "learning_rate": 8.21573398215734e-06, "loss": 0.246, "step": 87555 }, { "epoch": 2.556488226449249, "grad_norm": 0.5471454764103929, "learning_rate": 8.213030548796973e-06, "loss": 0.2389, "step": 87560 }, { "epoch": 2.556634209719566, "grad_norm": 0.5938713348814901, "learning_rate": 8.210327115436605e-06, "loss": 0.2518, "step": 87565 }, { "epoch": 2.5567801929898835, "grad_norm": 0.5577816465247879, "learning_rate": 8.207623682076237e-06, "loss": 0.2478, "step": 87570 }, { "epoch": 2.5569261762602005, "grad_norm": 0.5782093405210823, "learning_rate": 8.20492024871587e-06, "loss": 0.2476, "step": 87575 }, { "epoch": 2.557072159530518, "grad_norm": 0.6042348917805149, "learning_rate": 8.202216815355502e-06, "loss": 0.2373, "step": 87580 }, { "epoch": 2.557218142800835, "grad_norm": 0.5804730965950653, "learning_rate": 8.199513381995134e-06, "loss": 0.2572, "step": 87585 }, { "epoch": 2.5573641260711524, "grad_norm": 0.5626832331248895, "learning_rate": 8.196809948634767e-06, "loss": 0.2384, "step": 87590 }, { "epoch": 2.5575101093414694, "grad_norm": 0.5480012519432709, "learning_rate": 8.194106515274399e-06, "loss": 0.2422, "step": 87595 }, { "epoch": 2.557656092611787, "grad_norm": 0.541298556855255, "learning_rate": 8.191403081914031e-06, "loss": 0.2214, "step": 87600 }, { "epoch": 2.557802075882104, "grad_norm": 0.6313596888283193, "learning_rate": 8.188699648553664e-06, "loss": 0.2446, "step": 87605 }, { "epoch": 2.5579480591524213, "grad_norm": 0.604896475110479, "learning_rate": 8.185996215193296e-06, "loss": 0.2741, "step": 87610 }, { "epoch": 2.5580940424227383, "grad_norm": 0.5461024681369443, "learning_rate": 8.183292781832928e-06, "loss": 0.2418, "step": 87615 }, { "epoch": 2.5582400256930553, "grad_norm": 0.5658812583131553, "learning_rate": 8.18058934847256e-06, "loss": 0.2398, "step": 87620 }, { "epoch": 2.5583860089633728, "grad_norm": 0.5511808573234599, "learning_rate": 8.177885915112193e-06, "loss": 0.2437, "step": 87625 }, { "epoch": 2.55853199223369, "grad_norm": 0.5875740355699334, "learning_rate": 8.175182481751825e-06, "loss": 0.2614, "step": 87630 }, { "epoch": 2.558677975504007, "grad_norm": 0.598691945611985, "learning_rate": 8.172479048391457e-06, "loss": 0.2443, "step": 87635 }, { "epoch": 2.5588239587743242, "grad_norm": 0.6012999732583582, "learning_rate": 8.16977561503109e-06, "loss": 0.2418, "step": 87640 }, { "epoch": 2.5589699420446417, "grad_norm": 0.5713205782376428, "learning_rate": 8.167072181670722e-06, "loss": 0.2618, "step": 87645 }, { "epoch": 2.559115925314959, "grad_norm": 0.5506157959895852, "learning_rate": 8.164368748310354e-06, "loss": 0.2558, "step": 87650 }, { "epoch": 2.559261908585276, "grad_norm": 0.6266430265103807, "learning_rate": 8.161665314949987e-06, "loss": 0.2513, "step": 87655 }, { "epoch": 2.559407891855593, "grad_norm": 0.5804985060988541, "learning_rate": 8.158961881589619e-06, "loss": 0.2557, "step": 87660 }, { "epoch": 2.5595538751259106, "grad_norm": 0.592849184602162, "learning_rate": 8.156258448229251e-06, "loss": 0.2573, "step": 87665 }, { "epoch": 2.559699858396228, "grad_norm": 0.5624281316487153, "learning_rate": 8.153555014868884e-06, "loss": 0.2423, "step": 87670 }, { "epoch": 2.559845841666545, "grad_norm": 0.5872577973040118, "learning_rate": 8.150851581508516e-06, "loss": 0.2646, "step": 87675 }, { "epoch": 2.559991824936862, "grad_norm": 0.5655490433785368, "learning_rate": 8.14814814814815e-06, "loss": 0.2345, "step": 87680 }, { "epoch": 2.5601378082071795, "grad_norm": 0.6111489492709005, "learning_rate": 8.14544471478778e-06, "loss": 0.2519, "step": 87685 }, { "epoch": 2.5602837914774965, "grad_norm": 0.639998498006304, "learning_rate": 8.142741281427413e-06, "loss": 0.2662, "step": 87690 }, { "epoch": 2.560429774747814, "grad_norm": 0.5350114098686966, "learning_rate": 8.140037848067047e-06, "loss": 0.2455, "step": 87695 }, { "epoch": 2.560575758018131, "grad_norm": 0.594536077632483, "learning_rate": 8.137334414706678e-06, "loss": 0.2558, "step": 87700 }, { "epoch": 2.5607217412884484, "grad_norm": 0.5770731580068235, "learning_rate": 8.13463098134631e-06, "loss": 0.2397, "step": 87705 }, { "epoch": 2.5608677245587654, "grad_norm": 0.6566551872248538, "learning_rate": 8.131927547985942e-06, "loss": 0.2524, "step": 87710 }, { "epoch": 2.561013707829083, "grad_norm": 0.5840520111886636, "learning_rate": 8.129224114625575e-06, "loss": 0.2472, "step": 87715 }, { "epoch": 2.5611596910994, "grad_norm": 0.5718440393316525, "learning_rate": 8.126520681265207e-06, "loss": 0.248, "step": 87720 }, { "epoch": 2.5613056743697173, "grad_norm": 0.598836315319764, "learning_rate": 8.12381724790484e-06, "loss": 0.2355, "step": 87725 }, { "epoch": 2.5614516576400344, "grad_norm": 0.5469722211144549, "learning_rate": 8.121113814544471e-06, "loss": 0.2652, "step": 87730 }, { "epoch": 2.561597640910352, "grad_norm": 0.5572904280891617, "learning_rate": 8.118410381184104e-06, "loss": 0.2255, "step": 87735 }, { "epoch": 2.561743624180669, "grad_norm": 0.6205613684469881, "learning_rate": 8.115706947823736e-06, "loss": 0.2499, "step": 87740 }, { "epoch": 2.5618896074509863, "grad_norm": 0.6326412526092949, "learning_rate": 8.113003514463368e-06, "loss": 0.2452, "step": 87745 }, { "epoch": 2.5620355907213033, "grad_norm": 0.5463624406299207, "learning_rate": 8.110300081103e-06, "loss": 0.2613, "step": 87750 }, { "epoch": 2.5621815739916207, "grad_norm": 0.5836329849424836, "learning_rate": 8.107596647742633e-06, "loss": 0.2305, "step": 87755 }, { "epoch": 2.5623275572619377, "grad_norm": 0.5907611144939797, "learning_rate": 8.104893214382265e-06, "loss": 0.2458, "step": 87760 }, { "epoch": 2.562473540532255, "grad_norm": 0.5851924678370848, "learning_rate": 8.1021897810219e-06, "loss": 0.2547, "step": 87765 }, { "epoch": 2.562619523802572, "grad_norm": 0.5583752265254958, "learning_rate": 8.09948634766153e-06, "loss": 0.2362, "step": 87770 }, { "epoch": 2.562765507072889, "grad_norm": 0.5767787952075407, "learning_rate": 8.096782914301162e-06, "loss": 0.2314, "step": 87775 }, { "epoch": 2.5629114903432066, "grad_norm": 0.5637766405717105, "learning_rate": 8.094079480940796e-06, "loss": 0.2593, "step": 87780 }, { "epoch": 2.563057473613524, "grad_norm": 0.5700925266730537, "learning_rate": 8.091376047580427e-06, "loss": 0.2545, "step": 87785 }, { "epoch": 2.563203456883841, "grad_norm": 0.6069045205922652, "learning_rate": 8.08867261422006e-06, "loss": 0.2493, "step": 87790 }, { "epoch": 2.563349440154158, "grad_norm": 0.6496757665410328, "learning_rate": 8.085969180859693e-06, "loss": 0.2516, "step": 87795 }, { "epoch": 2.5634954234244756, "grad_norm": 0.5575531489801994, "learning_rate": 8.083265747499324e-06, "loss": 0.2442, "step": 87800 }, { "epoch": 2.563641406694793, "grad_norm": 0.5925529508232377, "learning_rate": 8.080562314138956e-06, "loss": 0.253, "step": 87805 }, { "epoch": 2.56378738996511, "grad_norm": 0.571770417449846, "learning_rate": 8.07785888077859e-06, "loss": 0.2453, "step": 87810 }, { "epoch": 2.563933373235427, "grad_norm": 0.6129469089542081, "learning_rate": 8.075155447418221e-06, "loss": 0.2564, "step": 87815 }, { "epoch": 2.5640793565057445, "grad_norm": 0.5647672268939015, "learning_rate": 8.072452014057853e-06, "loss": 0.2465, "step": 87820 }, { "epoch": 2.564225339776062, "grad_norm": 0.6389474616980949, "learning_rate": 8.069748580697487e-06, "loss": 0.2478, "step": 87825 }, { "epoch": 2.564371323046379, "grad_norm": 0.574794337316816, "learning_rate": 8.067045147337118e-06, "loss": 0.2452, "step": 87830 }, { "epoch": 2.564517306316696, "grad_norm": 0.5910666542375115, "learning_rate": 8.06434171397675e-06, "loss": 0.2431, "step": 87835 }, { "epoch": 2.5646632895870134, "grad_norm": 0.5762413047097428, "learning_rate": 8.061638280616384e-06, "loss": 0.2475, "step": 87840 }, { "epoch": 2.5648092728573304, "grad_norm": 0.5958500420143165, "learning_rate": 8.058934847256015e-06, "loss": 0.2732, "step": 87845 }, { "epoch": 2.564955256127648, "grad_norm": 0.5960267647950851, "learning_rate": 8.056231413895649e-06, "loss": 0.2545, "step": 87850 }, { "epoch": 2.565101239397965, "grad_norm": 0.5941301211433955, "learning_rate": 8.053527980535281e-06, "loss": 0.2471, "step": 87855 }, { "epoch": 2.5652472226682823, "grad_norm": 0.5727167620841451, "learning_rate": 8.050824547174912e-06, "loss": 0.2505, "step": 87860 }, { "epoch": 2.5653932059385993, "grad_norm": 0.5906941421326296, "learning_rate": 8.048121113814546e-06, "loss": 0.25, "step": 87865 }, { "epoch": 2.5655391892089168, "grad_norm": 0.5709555295809678, "learning_rate": 8.045417680454178e-06, "loss": 0.2431, "step": 87870 }, { "epoch": 2.5656851724792338, "grad_norm": 0.5840411718242865, "learning_rate": 8.042714247093809e-06, "loss": 0.253, "step": 87875 }, { "epoch": 2.5658311557495512, "grad_norm": 0.5526910143254238, "learning_rate": 8.040010813733443e-06, "loss": 0.2313, "step": 87880 }, { "epoch": 2.5659771390198682, "grad_norm": 0.6286841612147323, "learning_rate": 8.037307380373075e-06, "loss": 0.2488, "step": 87885 }, { "epoch": 2.5661231222901857, "grad_norm": 0.5598655177516325, "learning_rate": 8.034603947012706e-06, "loss": 0.2496, "step": 87890 }, { "epoch": 2.5662691055605027, "grad_norm": 0.5656811811989813, "learning_rate": 8.03190051365234e-06, "loss": 0.2449, "step": 87895 }, { "epoch": 2.56641508883082, "grad_norm": 0.5825620794053519, "learning_rate": 8.02919708029197e-06, "loss": 0.2545, "step": 87900 }, { "epoch": 2.566561072101137, "grad_norm": 0.5387346492629336, "learning_rate": 8.026493646931603e-06, "loss": 0.2407, "step": 87905 }, { "epoch": 2.566707055371454, "grad_norm": 0.5625582797740858, "learning_rate": 8.023790213571237e-06, "loss": 0.2355, "step": 87910 }, { "epoch": 2.5668530386417716, "grad_norm": 0.6119762637133976, "learning_rate": 8.021086780210867e-06, "loss": 0.2585, "step": 87915 }, { "epoch": 2.566999021912089, "grad_norm": 0.5709440237790349, "learning_rate": 8.0183833468505e-06, "loss": 0.2439, "step": 87920 }, { "epoch": 2.567145005182406, "grad_norm": 0.5576496983049867, "learning_rate": 8.015679913490134e-06, "loss": 0.2253, "step": 87925 }, { "epoch": 2.567290988452723, "grad_norm": 0.5856715931158767, "learning_rate": 8.012976480129764e-06, "loss": 0.2326, "step": 87930 }, { "epoch": 2.5674369717230405, "grad_norm": 0.5889470380383219, "learning_rate": 8.010273046769398e-06, "loss": 0.2505, "step": 87935 }, { "epoch": 2.567582954993358, "grad_norm": 0.6232489037679205, "learning_rate": 8.00756961340903e-06, "loss": 0.2495, "step": 87940 }, { "epoch": 2.567728938263675, "grad_norm": 0.5932726599053841, "learning_rate": 8.004866180048661e-06, "loss": 0.26, "step": 87945 }, { "epoch": 2.567874921533992, "grad_norm": 0.582588624160032, "learning_rate": 8.002162746688295e-06, "loss": 0.2589, "step": 87950 }, { "epoch": 2.5680209048043094, "grad_norm": 0.6191339445791687, "learning_rate": 7.999459313327927e-06, "loss": 0.2532, "step": 87955 }, { "epoch": 2.568166888074627, "grad_norm": 0.6049986714243549, "learning_rate": 7.996755879967558e-06, "loss": 0.25, "step": 87960 }, { "epoch": 2.568312871344944, "grad_norm": 0.607994363405846, "learning_rate": 7.994052446607192e-06, "loss": 0.245, "step": 87965 }, { "epoch": 2.568458854615261, "grad_norm": 0.6155548697580321, "learning_rate": 7.991349013246824e-06, "loss": 0.2476, "step": 87970 }, { "epoch": 2.5686048378855784, "grad_norm": 0.5426270873280843, "learning_rate": 7.988645579886455e-06, "loss": 0.2376, "step": 87975 }, { "epoch": 2.5687508211558954, "grad_norm": 0.6045262914428329, "learning_rate": 7.985942146526089e-06, "loss": 0.2234, "step": 87980 }, { "epoch": 2.568896804426213, "grad_norm": 0.6025147454800441, "learning_rate": 7.983238713165721e-06, "loss": 0.2508, "step": 87985 }, { "epoch": 2.56904278769653, "grad_norm": 0.5720188028997569, "learning_rate": 7.980535279805352e-06, "loss": 0.2376, "step": 87990 }, { "epoch": 2.5691887709668473, "grad_norm": 0.6083476409714839, "learning_rate": 7.977831846444986e-06, "loss": 0.255, "step": 87995 }, { "epoch": 2.5693347542371643, "grad_norm": 0.5848972076501311, "learning_rate": 7.975128413084618e-06, "loss": 0.2317, "step": 88000 }, { "epoch": 2.5694807375074817, "grad_norm": 0.6070390739314142, "learning_rate": 7.972424979724249e-06, "loss": 0.2447, "step": 88005 }, { "epoch": 2.5696267207777987, "grad_norm": 0.6086070229569323, "learning_rate": 7.969721546363883e-06, "loss": 0.2256, "step": 88010 }, { "epoch": 2.569772704048116, "grad_norm": 0.5705612578922693, "learning_rate": 7.967018113003515e-06, "loss": 0.2416, "step": 88015 }, { "epoch": 2.569918687318433, "grad_norm": 0.5835418915247667, "learning_rate": 7.964314679643148e-06, "loss": 0.2481, "step": 88020 }, { "epoch": 2.5700646705887507, "grad_norm": 0.5473921114066437, "learning_rate": 7.96161124628278e-06, "loss": 0.2516, "step": 88025 }, { "epoch": 2.5702106538590677, "grad_norm": 0.6115678603239364, "learning_rate": 7.958907812922412e-06, "loss": 0.26, "step": 88030 }, { "epoch": 2.570356637129385, "grad_norm": 0.5754184586073882, "learning_rate": 7.956204379562045e-06, "loss": 0.2408, "step": 88035 }, { "epoch": 2.570502620399702, "grad_norm": 0.5662803525761569, "learning_rate": 7.953500946201677e-06, "loss": 0.2326, "step": 88040 }, { "epoch": 2.5706486036700196, "grad_norm": 0.5730406527778551, "learning_rate": 7.95079751284131e-06, "loss": 0.2563, "step": 88045 }, { "epoch": 2.5707945869403366, "grad_norm": 0.590513391924413, "learning_rate": 7.948094079480941e-06, "loss": 0.2299, "step": 88050 }, { "epoch": 2.570940570210654, "grad_norm": 0.5402652093772597, "learning_rate": 7.945390646120574e-06, "loss": 0.2358, "step": 88055 }, { "epoch": 2.571086553480971, "grad_norm": 0.5526589391888191, "learning_rate": 7.942687212760206e-06, "loss": 0.2282, "step": 88060 }, { "epoch": 2.571232536751288, "grad_norm": 0.5726604157414629, "learning_rate": 7.939983779399838e-06, "loss": 0.2502, "step": 88065 }, { "epoch": 2.5713785200216055, "grad_norm": 0.5997421495903207, "learning_rate": 7.93728034603947e-06, "loss": 0.2611, "step": 88070 }, { "epoch": 2.571524503291923, "grad_norm": 0.5752736365651153, "learning_rate": 7.934576912679103e-06, "loss": 0.2296, "step": 88075 }, { "epoch": 2.57167048656224, "grad_norm": 0.5459227441001236, "learning_rate": 7.931873479318735e-06, "loss": 0.2413, "step": 88080 }, { "epoch": 2.571816469832557, "grad_norm": 0.6160578544150521, "learning_rate": 7.929170045958368e-06, "loss": 0.2453, "step": 88085 }, { "epoch": 2.5719624531028744, "grad_norm": 0.6421527921205906, "learning_rate": 7.926466612597998e-06, "loss": 0.2692, "step": 88090 }, { "epoch": 2.572108436373192, "grad_norm": 0.5856411098493188, "learning_rate": 7.923763179237632e-06, "loss": 0.2287, "step": 88095 }, { "epoch": 2.572254419643509, "grad_norm": 0.588882002138383, "learning_rate": 7.921059745877265e-06, "loss": 0.2478, "step": 88100 }, { "epoch": 2.572400402913826, "grad_norm": 0.6243193977440779, "learning_rate": 7.918356312516897e-06, "loss": 0.2415, "step": 88105 }, { "epoch": 2.5725463861841433, "grad_norm": 0.5794463633237618, "learning_rate": 7.91565287915653e-06, "loss": 0.242, "step": 88110 }, { "epoch": 2.572692369454461, "grad_norm": 0.5647931143888214, "learning_rate": 7.912949445796162e-06, "loss": 0.2443, "step": 88115 }, { "epoch": 2.572838352724778, "grad_norm": 0.557603883844667, "learning_rate": 7.910246012435794e-06, "loss": 0.2344, "step": 88120 }, { "epoch": 2.572984335995095, "grad_norm": 0.593751012017465, "learning_rate": 7.907542579075426e-06, "loss": 0.2454, "step": 88125 }, { "epoch": 2.5731303192654122, "grad_norm": 0.5963877424444095, "learning_rate": 7.904839145715059e-06, "loss": 0.254, "step": 88130 }, { "epoch": 2.5732763025357293, "grad_norm": 0.5853203878924749, "learning_rate": 7.902135712354691e-06, "loss": 0.2473, "step": 88135 }, { "epoch": 2.5734222858060467, "grad_norm": 0.5924757771368792, "learning_rate": 7.899432278994323e-06, "loss": 0.2509, "step": 88140 }, { "epoch": 2.5735682690763637, "grad_norm": 0.5934392374485477, "learning_rate": 7.896728845633956e-06, "loss": 0.2294, "step": 88145 }, { "epoch": 2.573714252346681, "grad_norm": 0.602427902057771, "learning_rate": 7.894025412273588e-06, "loss": 0.2506, "step": 88150 }, { "epoch": 2.573860235616998, "grad_norm": 0.552926647895381, "learning_rate": 7.89132197891322e-06, "loss": 0.2534, "step": 88155 }, { "epoch": 2.5740062188873156, "grad_norm": 0.5848245235912348, "learning_rate": 7.888618545552852e-06, "loss": 0.2495, "step": 88160 }, { "epoch": 2.5741522021576326, "grad_norm": 0.5678847867884564, "learning_rate": 7.885915112192485e-06, "loss": 0.2456, "step": 88165 }, { "epoch": 2.57429818542795, "grad_norm": 0.5629592637899727, "learning_rate": 7.883211678832117e-06, "loss": 0.2531, "step": 88170 }, { "epoch": 2.574444168698267, "grad_norm": 0.5550090970735193, "learning_rate": 7.88050824547175e-06, "loss": 0.2502, "step": 88175 }, { "epoch": 2.5745901519685845, "grad_norm": 0.5627907534244324, "learning_rate": 7.877804812111382e-06, "loss": 0.241, "step": 88180 }, { "epoch": 2.5747361352389015, "grad_norm": 0.638447708742002, "learning_rate": 7.875101378751014e-06, "loss": 0.2605, "step": 88185 }, { "epoch": 2.574882118509219, "grad_norm": 0.5199987979542992, "learning_rate": 7.872397945390646e-06, "loss": 0.2404, "step": 88190 }, { "epoch": 2.575028101779536, "grad_norm": 0.5798328466525773, "learning_rate": 7.869694512030279e-06, "loss": 0.2502, "step": 88195 }, { "epoch": 2.575174085049853, "grad_norm": 0.6210476395158542, "learning_rate": 7.866991078669911e-06, "loss": 0.2494, "step": 88200 }, { "epoch": 2.5753200683201705, "grad_norm": 0.5947159202402282, "learning_rate": 7.864287645309543e-06, "loss": 0.2529, "step": 88205 }, { "epoch": 2.575466051590488, "grad_norm": 0.585225500502915, "learning_rate": 7.861584211949176e-06, "loss": 0.2473, "step": 88210 }, { "epoch": 2.575612034860805, "grad_norm": 0.5947992436031184, "learning_rate": 7.858880778588808e-06, "loss": 0.2564, "step": 88215 }, { "epoch": 2.575758018131122, "grad_norm": 0.5686140761958415, "learning_rate": 7.85617734522844e-06, "loss": 0.2549, "step": 88220 }, { "epoch": 2.5759040014014394, "grad_norm": 0.6197009245913135, "learning_rate": 7.853473911868073e-06, "loss": 0.2641, "step": 88225 }, { "epoch": 2.576049984671757, "grad_norm": 0.5846109251007444, "learning_rate": 7.850770478507705e-06, "loss": 0.2383, "step": 88230 }, { "epoch": 2.576195967942074, "grad_norm": 0.6056147054856936, "learning_rate": 7.848067045147337e-06, "loss": 0.2448, "step": 88235 }, { "epoch": 2.576341951212391, "grad_norm": 0.5476742307219509, "learning_rate": 7.84536361178697e-06, "loss": 0.2248, "step": 88240 }, { "epoch": 2.5764879344827083, "grad_norm": 0.5588399990338269, "learning_rate": 7.842660178426602e-06, "loss": 0.2433, "step": 88245 }, { "epoch": 2.5766339177530257, "grad_norm": 0.6092044500908975, "learning_rate": 7.839956745066234e-06, "loss": 0.256, "step": 88250 }, { "epoch": 2.5767799010233428, "grad_norm": 0.602933828438008, "learning_rate": 7.837253311705866e-06, "loss": 0.2552, "step": 88255 }, { "epoch": 2.5769258842936598, "grad_norm": 0.5697305276202601, "learning_rate": 7.834549878345499e-06, "loss": 0.2473, "step": 88260 }, { "epoch": 2.577071867563977, "grad_norm": 0.6073520662528852, "learning_rate": 7.831846444985131e-06, "loss": 0.2551, "step": 88265 }, { "epoch": 2.5772178508342947, "grad_norm": 0.5894244020372074, "learning_rate": 7.829143011624763e-06, "loss": 0.2589, "step": 88270 }, { "epoch": 2.5773638341046117, "grad_norm": 0.5831513560374981, "learning_rate": 7.826439578264397e-06, "loss": 0.2649, "step": 88275 }, { "epoch": 2.5775098173749287, "grad_norm": 0.5894635900039531, "learning_rate": 7.823736144904028e-06, "loss": 0.2352, "step": 88280 }, { "epoch": 2.577655800645246, "grad_norm": 0.6117810963750457, "learning_rate": 7.82103271154366e-06, "loss": 0.2414, "step": 88285 }, { "epoch": 2.577801783915563, "grad_norm": 0.5470600918384414, "learning_rate": 7.818329278183294e-06, "loss": 0.2456, "step": 88290 }, { "epoch": 2.5779477671858806, "grad_norm": 0.6361961913093407, "learning_rate": 7.815625844822925e-06, "loss": 0.2515, "step": 88295 }, { "epoch": 2.5780937504561976, "grad_norm": 0.5560076378121162, "learning_rate": 7.812922411462557e-06, "loss": 0.2366, "step": 88300 }, { "epoch": 2.578239733726515, "grad_norm": 0.5999943367985119, "learning_rate": 7.810218978102191e-06, "loss": 0.2492, "step": 88305 }, { "epoch": 2.578385716996832, "grad_norm": 0.6426170265605379, "learning_rate": 7.807515544741822e-06, "loss": 0.2516, "step": 88310 }, { "epoch": 2.5785317002671495, "grad_norm": 0.5733690455226454, "learning_rate": 7.804812111381454e-06, "loss": 0.2331, "step": 88315 }, { "epoch": 2.5786776835374665, "grad_norm": 0.627712619912629, "learning_rate": 7.802108678021088e-06, "loss": 0.2501, "step": 88320 }, { "epoch": 2.578823666807784, "grad_norm": 0.5721114736611794, "learning_rate": 7.799405244660719e-06, "loss": 0.2582, "step": 88325 }, { "epoch": 2.578969650078101, "grad_norm": 0.5581505499566254, "learning_rate": 7.796701811300351e-06, "loss": 0.2365, "step": 88330 }, { "epoch": 2.5791156333484184, "grad_norm": 0.5841112375646577, "learning_rate": 7.793998377939985e-06, "loss": 0.2406, "step": 88335 }, { "epoch": 2.5792616166187354, "grad_norm": 0.5808038180872529, "learning_rate": 7.791294944579616e-06, "loss": 0.2451, "step": 88340 }, { "epoch": 2.579407599889053, "grad_norm": 0.5612157599430112, "learning_rate": 7.788591511219248e-06, "loss": 0.2401, "step": 88345 }, { "epoch": 2.57955358315937, "grad_norm": 0.6017139553823653, "learning_rate": 7.78588807785888e-06, "loss": 0.2479, "step": 88350 }, { "epoch": 2.579699566429687, "grad_norm": 0.5466181768219932, "learning_rate": 7.783184644498513e-06, "loss": 0.241, "step": 88355 }, { "epoch": 2.5798455497000043, "grad_norm": 0.6207472690911519, "learning_rate": 7.780481211138147e-06, "loss": 0.2468, "step": 88360 }, { "epoch": 2.579991532970322, "grad_norm": 0.555595719109991, "learning_rate": 7.777777777777777e-06, "loss": 0.2342, "step": 88365 }, { "epoch": 2.580137516240639, "grad_norm": 0.6397921367553152, "learning_rate": 7.77507434441741e-06, "loss": 0.2528, "step": 88370 }, { "epoch": 2.580283499510956, "grad_norm": 0.5443424463969724, "learning_rate": 7.772370911057044e-06, "loss": 0.2344, "step": 88375 }, { "epoch": 2.5804294827812733, "grad_norm": 0.5691148066678567, "learning_rate": 7.769667477696674e-06, "loss": 0.2442, "step": 88380 }, { "epoch": 2.5805754660515907, "grad_norm": 0.5711606773898081, "learning_rate": 7.766964044336307e-06, "loss": 0.2447, "step": 88385 }, { "epoch": 2.5807214493219077, "grad_norm": 0.5996336161085941, "learning_rate": 7.76426061097594e-06, "loss": 0.2458, "step": 88390 }, { "epoch": 2.5808674325922247, "grad_norm": 0.5899396777823568, "learning_rate": 7.761557177615571e-06, "loss": 0.2416, "step": 88395 }, { "epoch": 2.581013415862542, "grad_norm": 0.5660814009750067, "learning_rate": 7.758853744255204e-06, "loss": 0.2593, "step": 88400 }, { "epoch": 2.5811593991328596, "grad_norm": 0.592386527825684, "learning_rate": 7.756150310894838e-06, "loss": 0.2425, "step": 88405 }, { "epoch": 2.5813053824031766, "grad_norm": 0.5682478691809727, "learning_rate": 7.753446877534468e-06, "loss": 0.2418, "step": 88410 }, { "epoch": 2.5814513656734936, "grad_norm": 0.5315736735729559, "learning_rate": 7.7507434441741e-06, "loss": 0.235, "step": 88415 }, { "epoch": 2.581597348943811, "grad_norm": 0.571242987855075, "learning_rate": 7.748040010813735e-06, "loss": 0.2287, "step": 88420 }, { "epoch": 2.581743332214128, "grad_norm": 0.6356632087001657, "learning_rate": 7.745336577453365e-06, "loss": 0.2661, "step": 88425 }, { "epoch": 2.5818893154844456, "grad_norm": 0.5759593243256711, "learning_rate": 7.742633144093e-06, "loss": 0.2425, "step": 88430 }, { "epoch": 2.5820352987547626, "grad_norm": 0.616366244728355, "learning_rate": 7.739929710732632e-06, "loss": 0.2618, "step": 88435 }, { "epoch": 2.58218128202508, "grad_norm": 0.5558421489255443, "learning_rate": 7.737226277372262e-06, "loss": 0.2393, "step": 88440 }, { "epoch": 2.582327265295397, "grad_norm": 0.5581164778855051, "learning_rate": 7.734522844011896e-06, "loss": 0.2309, "step": 88445 }, { "epoch": 2.5824732485657145, "grad_norm": 0.5838884501108114, "learning_rate": 7.731819410651529e-06, "loss": 0.2486, "step": 88450 }, { "epoch": 2.5826192318360315, "grad_norm": 0.6399473634960451, "learning_rate": 7.72911597729116e-06, "loss": 0.2562, "step": 88455 }, { "epoch": 2.582765215106349, "grad_norm": 0.5925999182546289, "learning_rate": 7.726412543930793e-06, "loss": 0.2548, "step": 88460 }, { "epoch": 2.582911198376666, "grad_norm": 0.5748853237863478, "learning_rate": 7.723709110570426e-06, "loss": 0.2543, "step": 88465 }, { "epoch": 2.5830571816469834, "grad_norm": 0.627820922357001, "learning_rate": 7.721005677210056e-06, "loss": 0.2349, "step": 88470 }, { "epoch": 2.5832031649173004, "grad_norm": 0.6214918482317384, "learning_rate": 7.71830224384969e-06, "loss": 0.2478, "step": 88475 }, { "epoch": 2.583349148187618, "grad_norm": 0.624268144966071, "learning_rate": 7.715598810489322e-06, "loss": 0.249, "step": 88480 }, { "epoch": 2.583495131457935, "grad_norm": 0.6241015837366116, "learning_rate": 7.712895377128953e-06, "loss": 0.2574, "step": 88485 }, { "epoch": 2.5836411147282523, "grad_norm": 0.6114980282110872, "learning_rate": 7.710191943768587e-06, "loss": 0.2594, "step": 88490 }, { "epoch": 2.5837870979985693, "grad_norm": 0.5908739919678231, "learning_rate": 7.70748851040822e-06, "loss": 0.2601, "step": 88495 }, { "epoch": 2.5839330812688868, "grad_norm": 0.5800736844624513, "learning_rate": 7.70478507704785e-06, "loss": 0.2356, "step": 88500 }, { "epoch": 2.5840790645392038, "grad_norm": 0.6388528085833381, "learning_rate": 7.702081643687484e-06, "loss": 0.2498, "step": 88505 }, { "epoch": 2.584225047809521, "grad_norm": 0.5855081153432509, "learning_rate": 7.699378210327116e-06, "loss": 0.2289, "step": 88510 }, { "epoch": 2.5843710310798382, "grad_norm": 0.6440597929659385, "learning_rate": 7.696674776966749e-06, "loss": 0.26, "step": 88515 }, { "epoch": 2.5845170143501557, "grad_norm": 0.6076527174733138, "learning_rate": 7.693971343606381e-06, "loss": 0.2608, "step": 88520 }, { "epoch": 2.5846629976204727, "grad_norm": 0.5965979610925254, "learning_rate": 7.691267910246013e-06, "loss": 0.255, "step": 88525 }, { "epoch": 2.5848089808907897, "grad_norm": 0.6190248239682835, "learning_rate": 7.688564476885646e-06, "loss": 0.2515, "step": 88530 }, { "epoch": 2.584954964161107, "grad_norm": 0.6075475210949921, "learning_rate": 7.685861043525278e-06, "loss": 0.2397, "step": 88535 }, { "epoch": 2.5851009474314246, "grad_norm": 0.5306231228581766, "learning_rate": 7.68315761016491e-06, "loss": 0.2503, "step": 88540 }, { "epoch": 2.5852469307017416, "grad_norm": 0.6096860072430634, "learning_rate": 7.680454176804543e-06, "loss": 0.2493, "step": 88545 }, { "epoch": 2.5853929139720586, "grad_norm": 0.5890746330517808, "learning_rate": 7.677750743444175e-06, "loss": 0.2398, "step": 88550 }, { "epoch": 2.585538897242376, "grad_norm": 0.5606624953495859, "learning_rate": 7.675047310083806e-06, "loss": 0.2434, "step": 88555 }, { "epoch": 2.5856848805126935, "grad_norm": 0.5477020886462597, "learning_rate": 7.67234387672344e-06, "loss": 0.2358, "step": 88560 }, { "epoch": 2.5858308637830105, "grad_norm": 0.5558370700419093, "learning_rate": 7.669640443363072e-06, "loss": 0.2384, "step": 88565 }, { "epoch": 2.5859768470533275, "grad_norm": 0.5701262057185454, "learning_rate": 7.666937010002702e-06, "loss": 0.2598, "step": 88570 }, { "epoch": 2.586122830323645, "grad_norm": 0.562473960158092, "learning_rate": 7.664233576642336e-06, "loss": 0.243, "step": 88575 }, { "epoch": 2.586268813593962, "grad_norm": 0.576674158424771, "learning_rate": 7.661530143281969e-06, "loss": 0.2364, "step": 88580 }, { "epoch": 2.5864147968642794, "grad_norm": 0.546333525465865, "learning_rate": 7.6588267099216e-06, "loss": 0.2367, "step": 88585 }, { "epoch": 2.5865607801345964, "grad_norm": 0.5621685686167884, "learning_rate": 7.656123276561233e-06, "loss": 0.2512, "step": 88590 }, { "epoch": 2.586706763404914, "grad_norm": 0.5381259292340121, "learning_rate": 7.653419843200866e-06, "loss": 0.2275, "step": 88595 }, { "epoch": 2.586852746675231, "grad_norm": 0.550983677053925, "learning_rate": 7.650716409840498e-06, "loss": 0.2478, "step": 88600 }, { "epoch": 2.5869987299455484, "grad_norm": 0.5220731881274412, "learning_rate": 7.64801297648013e-06, "loss": 0.2269, "step": 88605 }, { "epoch": 2.5871447132158654, "grad_norm": 0.6078322879260907, "learning_rate": 7.645309543119763e-06, "loss": 0.2611, "step": 88610 }, { "epoch": 2.587290696486183, "grad_norm": 0.6500133008906057, "learning_rate": 7.642606109759395e-06, "loss": 0.2481, "step": 88615 }, { "epoch": 2.5874366797565, "grad_norm": 0.6521014939271345, "learning_rate": 7.639902676399027e-06, "loss": 0.2406, "step": 88620 }, { "epoch": 2.5875826630268173, "grad_norm": 0.597757636042666, "learning_rate": 7.63719924303866e-06, "loss": 0.2536, "step": 88625 }, { "epoch": 2.5877286462971343, "grad_norm": 0.6178235049518492, "learning_rate": 7.634495809678292e-06, "loss": 0.2537, "step": 88630 }, { "epoch": 2.5878746295674517, "grad_norm": 0.5874839519345173, "learning_rate": 7.631792376317924e-06, "loss": 0.2524, "step": 88635 }, { "epoch": 2.5880206128377687, "grad_norm": 0.5951257073253771, "learning_rate": 7.629088942957556e-06, "loss": 0.2549, "step": 88640 }, { "epoch": 2.5881665961080857, "grad_norm": 0.6021411971145486, "learning_rate": 7.626385509597189e-06, "loss": 0.2424, "step": 88645 }, { "epoch": 2.588312579378403, "grad_norm": 0.5939760116973196, "learning_rate": 7.623682076236821e-06, "loss": 0.2433, "step": 88650 }, { "epoch": 2.5884585626487207, "grad_norm": 0.579475727814905, "learning_rate": 7.620978642876453e-06, "loss": 0.2426, "step": 88655 }, { "epoch": 2.5886045459190377, "grad_norm": 0.5858936136564011, "learning_rate": 7.618275209516086e-06, "loss": 0.249, "step": 88660 }, { "epoch": 2.5887505291893547, "grad_norm": 0.5810949136899914, "learning_rate": 7.615571776155718e-06, "loss": 0.2502, "step": 88665 }, { "epoch": 2.588896512459672, "grad_norm": 0.5911362788344845, "learning_rate": 7.61286834279535e-06, "loss": 0.2493, "step": 88670 }, { "epoch": 2.5890424957299896, "grad_norm": 0.6094669003312155, "learning_rate": 7.610164909434983e-06, "loss": 0.2614, "step": 88675 }, { "epoch": 2.5891884790003066, "grad_norm": 0.5872297483335885, "learning_rate": 7.607461476074615e-06, "loss": 0.2562, "step": 88680 }, { "epoch": 2.5893344622706236, "grad_norm": 0.6003795433614431, "learning_rate": 7.604758042714248e-06, "loss": 0.2508, "step": 88685 }, { "epoch": 2.589480445540941, "grad_norm": 0.6094749641298614, "learning_rate": 7.60205460935388e-06, "loss": 0.2572, "step": 88690 }, { "epoch": 2.5896264288112585, "grad_norm": 0.5863297228359328, "learning_rate": 7.599351175993512e-06, "loss": 0.2414, "step": 88695 }, { "epoch": 2.5897724120815755, "grad_norm": 0.5941945893444615, "learning_rate": 7.596647742633145e-06, "loss": 0.2505, "step": 88700 }, { "epoch": 2.5899183953518925, "grad_norm": 0.5798037240156743, "learning_rate": 7.593944309272777e-06, "loss": 0.2438, "step": 88705 }, { "epoch": 2.59006437862221, "grad_norm": 0.607750755583431, "learning_rate": 7.591240875912409e-06, "loss": 0.2448, "step": 88710 }, { "epoch": 2.590210361892527, "grad_norm": 0.5949515573658531, "learning_rate": 7.588537442552042e-06, "loss": 0.2525, "step": 88715 }, { "epoch": 2.5903563451628444, "grad_norm": 0.5584367158713862, "learning_rate": 7.585834009191674e-06, "loss": 0.2362, "step": 88720 }, { "epoch": 2.5905023284331614, "grad_norm": 0.6243964715106811, "learning_rate": 7.583130575831306e-06, "loss": 0.2579, "step": 88725 }, { "epoch": 2.590648311703479, "grad_norm": 0.6096392834577155, "learning_rate": 7.580427142470939e-06, "loss": 0.244, "step": 88730 }, { "epoch": 2.590794294973796, "grad_norm": 0.5851672142966331, "learning_rate": 7.577723709110571e-06, "loss": 0.2399, "step": 88735 }, { "epoch": 2.5909402782441133, "grad_norm": 0.6687084619812602, "learning_rate": 7.575020275750203e-06, "loss": 0.2594, "step": 88740 }, { "epoch": 2.5910862615144303, "grad_norm": 0.5613530917960926, "learning_rate": 7.572316842389836e-06, "loss": 0.2526, "step": 88745 }, { "epoch": 2.591232244784748, "grad_norm": 0.5664906615655023, "learning_rate": 7.569613409029468e-06, "loss": 0.2522, "step": 88750 }, { "epoch": 2.591378228055065, "grad_norm": 0.5716702599476853, "learning_rate": 7.566909975669099e-06, "loss": 0.2503, "step": 88755 }, { "epoch": 2.5915242113253822, "grad_norm": 0.5739517732635815, "learning_rate": 7.564206542308733e-06, "loss": 0.2322, "step": 88760 }, { "epoch": 2.5916701945956993, "grad_norm": 0.6090609479792781, "learning_rate": 7.5615031089483645e-06, "loss": 0.2603, "step": 88765 }, { "epoch": 2.5918161778660167, "grad_norm": 0.6020102321838794, "learning_rate": 7.558799675587998e-06, "loss": 0.241, "step": 88770 }, { "epoch": 2.5919621611363337, "grad_norm": 0.6482670617253745, "learning_rate": 7.55609624222763e-06, "loss": 0.2505, "step": 88775 }, { "epoch": 2.592108144406651, "grad_norm": 0.5593948264167086, "learning_rate": 7.5533928088672615e-06, "loss": 0.2448, "step": 88780 }, { "epoch": 2.592254127676968, "grad_norm": 0.5476867832996372, "learning_rate": 7.550689375506895e-06, "loss": 0.2398, "step": 88785 }, { "epoch": 2.5924001109472856, "grad_norm": 0.5241593085755659, "learning_rate": 7.547985942146526e-06, "loss": 0.2457, "step": 88790 }, { "epoch": 2.5925460942176026, "grad_norm": 0.5639550813469292, "learning_rate": 7.5452825087861585e-06, "loss": 0.2404, "step": 88795 }, { "epoch": 2.5926920774879196, "grad_norm": 0.5688301570865265, "learning_rate": 7.542579075425792e-06, "loss": 0.2274, "step": 88800 }, { "epoch": 2.592838060758237, "grad_norm": 0.5600218356625396, "learning_rate": 7.539875642065423e-06, "loss": 0.2442, "step": 88805 }, { "epoch": 2.5929840440285545, "grad_norm": 0.5682778140295439, "learning_rate": 7.537172208705055e-06, "loss": 0.2419, "step": 88810 }, { "epoch": 2.5931300272988715, "grad_norm": 0.5768773540754701, "learning_rate": 7.5344687753446886e-06, "loss": 0.2386, "step": 88815 }, { "epoch": 2.5932760105691885, "grad_norm": 0.5663284246542918, "learning_rate": 7.53176534198432e-06, "loss": 0.2502, "step": 88820 }, { "epoch": 2.593421993839506, "grad_norm": 0.5754137861878258, "learning_rate": 7.529061908623952e-06, "loss": 0.2519, "step": 88825 }, { "epoch": 2.5935679771098235, "grad_norm": 0.6258453088869492, "learning_rate": 7.5263584752635855e-06, "loss": 0.2629, "step": 88830 }, { "epoch": 2.5937139603801405, "grad_norm": 0.6056709609089302, "learning_rate": 7.523655041903217e-06, "loss": 0.2562, "step": 88835 }, { "epoch": 2.5938599436504575, "grad_norm": 0.6028556744152972, "learning_rate": 7.520951608542849e-06, "loss": 0.2605, "step": 88840 }, { "epoch": 2.594005926920775, "grad_norm": 0.5172373902810333, "learning_rate": 7.5182481751824825e-06, "loss": 0.2374, "step": 88845 }, { "epoch": 2.5941519101910924, "grad_norm": 0.5779952514690375, "learning_rate": 7.515544741822114e-06, "loss": 0.2503, "step": 88850 }, { "epoch": 2.5942978934614094, "grad_norm": 0.5655611252979851, "learning_rate": 7.512841308461747e-06, "loss": 0.2383, "step": 88855 }, { "epoch": 2.5944438767317264, "grad_norm": 0.5753187781217671, "learning_rate": 7.5101378751013794e-06, "loss": 0.2435, "step": 88860 }, { "epoch": 2.594589860002044, "grad_norm": 0.5954916666370957, "learning_rate": 7.507434441741011e-06, "loss": 0.2551, "step": 88865 }, { "epoch": 2.594735843272361, "grad_norm": 0.6182880346133517, "learning_rate": 7.504731008380644e-06, "loss": 0.2437, "step": 88870 }, { "epoch": 2.5948818265426783, "grad_norm": 0.5686081228271158, "learning_rate": 7.502027575020276e-06, "loss": 0.2527, "step": 88875 }, { "epoch": 2.5950278098129953, "grad_norm": 0.5829999140778962, "learning_rate": 7.499324141659908e-06, "loss": 0.2613, "step": 88880 }, { "epoch": 2.5951737930833128, "grad_norm": 0.6102462376816937, "learning_rate": 7.496620708299541e-06, "loss": 0.2499, "step": 88885 }, { "epoch": 2.5953197763536298, "grad_norm": 0.5903094454149215, "learning_rate": 7.493917274939173e-06, "loss": 0.2487, "step": 88890 }, { "epoch": 2.595465759623947, "grad_norm": 0.6008903454742245, "learning_rate": 7.491213841578805e-06, "loss": 0.2461, "step": 88895 }, { "epoch": 2.595611742894264, "grad_norm": 0.6517892422989516, "learning_rate": 7.488510408218438e-06, "loss": 0.2493, "step": 88900 }, { "epoch": 2.5957577261645817, "grad_norm": 0.6059634598664264, "learning_rate": 7.48580697485807e-06, "loss": 0.2391, "step": 88905 }, { "epoch": 2.5959037094348987, "grad_norm": 0.5747108055915053, "learning_rate": 7.483103541497702e-06, "loss": 0.2423, "step": 88910 }, { "epoch": 2.596049692705216, "grad_norm": 0.5969504122070802, "learning_rate": 7.480400108137335e-06, "loss": 0.2488, "step": 88915 }, { "epoch": 2.596195675975533, "grad_norm": 0.5639272632950348, "learning_rate": 7.477696674776967e-06, "loss": 0.2511, "step": 88920 }, { "epoch": 2.5963416592458506, "grad_norm": 0.5497584359022188, "learning_rate": 7.474993241416599e-06, "loss": 0.2522, "step": 88925 }, { "epoch": 2.5964876425161676, "grad_norm": 0.616700758935614, "learning_rate": 7.472289808056232e-06, "loss": 0.2452, "step": 88930 }, { "epoch": 2.5966336257864846, "grad_norm": 0.6042691311284618, "learning_rate": 7.469586374695864e-06, "loss": 0.2364, "step": 88935 }, { "epoch": 2.596779609056802, "grad_norm": 0.5726193313722062, "learning_rate": 7.466882941335497e-06, "loss": 0.2462, "step": 88940 }, { "epoch": 2.5969255923271195, "grad_norm": 0.5750366948752305, "learning_rate": 7.464179507975129e-06, "loss": 0.2512, "step": 88945 }, { "epoch": 2.5970715755974365, "grad_norm": 0.5667528208013887, "learning_rate": 7.461476074614761e-06, "loss": 0.2581, "step": 88950 }, { "epoch": 2.5972175588677535, "grad_norm": 0.5733581809882123, "learning_rate": 7.458772641254394e-06, "loss": 0.248, "step": 88955 }, { "epoch": 2.597363542138071, "grad_norm": 0.5923721988513845, "learning_rate": 7.456069207894026e-06, "loss": 0.2538, "step": 88960 }, { "epoch": 2.5975095254083884, "grad_norm": 0.6344304221920589, "learning_rate": 7.453365774533658e-06, "loss": 0.2455, "step": 88965 }, { "epoch": 2.5976555086787054, "grad_norm": 0.5622876273066743, "learning_rate": 7.450662341173291e-06, "loss": 0.2377, "step": 88970 }, { "epoch": 2.5978014919490224, "grad_norm": 0.6284396246639211, "learning_rate": 7.447958907812923e-06, "loss": 0.2573, "step": 88975 }, { "epoch": 2.59794747521934, "grad_norm": 0.566424147263293, "learning_rate": 7.445255474452554e-06, "loss": 0.2463, "step": 88980 }, { "epoch": 2.5980934584896573, "grad_norm": 0.5400158207112918, "learning_rate": 7.442552041092188e-06, "loss": 0.2321, "step": 88985 }, { "epoch": 2.5982394417599743, "grad_norm": 0.554764600512691, "learning_rate": 7.43984860773182e-06, "loss": 0.2401, "step": 88990 }, { "epoch": 2.5983854250302914, "grad_norm": 0.617436232022913, "learning_rate": 7.437145174371451e-06, "loss": 0.2575, "step": 88995 }, { "epoch": 2.598531408300609, "grad_norm": 0.5824516143793966, "learning_rate": 7.434441741011085e-06, "loss": 0.2389, "step": 89000 }, { "epoch": 2.598677391570926, "grad_norm": 0.5717738438916681, "learning_rate": 7.431738307650717e-06, "loss": 0.2341, "step": 89005 }, { "epoch": 2.5988233748412433, "grad_norm": 0.6023050258295671, "learning_rate": 7.429034874290348e-06, "loss": 0.2595, "step": 89010 }, { "epoch": 2.5989693581115603, "grad_norm": 0.5914573460410874, "learning_rate": 7.426331440929981e-06, "loss": 0.2497, "step": 89015 }, { "epoch": 2.5991153413818777, "grad_norm": 0.550412060769298, "learning_rate": 7.4236280075696136e-06, "loss": 0.2481, "step": 89020 }, { "epoch": 2.5992613246521947, "grad_norm": 0.5410478583675034, "learning_rate": 7.420924574209247e-06, "loss": 0.2379, "step": 89025 }, { "epoch": 2.599407307922512, "grad_norm": 0.6436727998200873, "learning_rate": 7.418221140848878e-06, "loss": 0.2524, "step": 89030 }, { "epoch": 2.599553291192829, "grad_norm": 0.6240227728394904, "learning_rate": 7.4155177074885105e-06, "loss": 0.2449, "step": 89035 }, { "epoch": 2.5996992744631466, "grad_norm": 0.5225638053505616, "learning_rate": 7.412814274128144e-06, "loss": 0.2445, "step": 89040 }, { "epoch": 2.5998452577334636, "grad_norm": 0.6028702491823654, "learning_rate": 7.410110840767775e-06, "loss": 0.2529, "step": 89045 }, { "epoch": 2.599991241003781, "grad_norm": 0.6488590980709721, "learning_rate": 7.4074074074074075e-06, "loss": 0.2488, "step": 89050 }, { "epoch": 2.600137224274098, "grad_norm": 0.5319560031780273, "learning_rate": 7.404703974047041e-06, "loss": 0.2332, "step": 89055 }, { "epoch": 2.6002832075444156, "grad_norm": 0.6138081666211248, "learning_rate": 7.402000540686672e-06, "loss": 0.2624, "step": 89060 }, { "epoch": 2.6004291908147326, "grad_norm": 0.6321048808013678, "learning_rate": 7.3992971073263044e-06, "loss": 0.2552, "step": 89065 }, { "epoch": 2.60057517408505, "grad_norm": 0.5897665743740749, "learning_rate": 7.396593673965938e-06, "loss": 0.2633, "step": 89070 }, { "epoch": 2.600721157355367, "grad_norm": 0.594612093185832, "learning_rate": 7.393890240605569e-06, "loss": 0.2638, "step": 89075 }, { "epoch": 2.6008671406256845, "grad_norm": 0.5951051135855875, "learning_rate": 7.391186807245201e-06, "loss": 0.2444, "step": 89080 }, { "epoch": 2.6010131238960015, "grad_norm": 0.578410949160956, "learning_rate": 7.3884833738848345e-06, "loss": 0.2497, "step": 89085 }, { "epoch": 2.6011591071663185, "grad_norm": 0.5871260001535836, "learning_rate": 7.385779940524466e-06, "loss": 0.2511, "step": 89090 }, { "epoch": 2.601305090436636, "grad_norm": 0.5829584907671465, "learning_rate": 7.383076507164098e-06, "loss": 0.254, "step": 89095 }, { "epoch": 2.6014510737069534, "grad_norm": 0.6178456663969107, "learning_rate": 7.3803730738037315e-06, "loss": 0.2501, "step": 89100 }, { "epoch": 2.6015970569772704, "grad_norm": 0.5836727662662086, "learning_rate": 7.377669640443363e-06, "loss": 0.2469, "step": 89105 }, { "epoch": 2.6017430402475874, "grad_norm": 0.5742016257062745, "learning_rate": 7.374966207082996e-06, "loss": 0.2398, "step": 89110 }, { "epoch": 2.601889023517905, "grad_norm": 0.5711353614780541, "learning_rate": 7.3722627737226285e-06, "loss": 0.2442, "step": 89115 }, { "epoch": 2.6020350067882223, "grad_norm": 0.630417265029803, "learning_rate": 7.36955934036226e-06, "loss": 0.253, "step": 89120 }, { "epoch": 2.6021809900585393, "grad_norm": 0.58289032915349, "learning_rate": 7.366855907001893e-06, "loss": 0.2367, "step": 89125 }, { "epoch": 2.6023269733288563, "grad_norm": 0.5750682107032968, "learning_rate": 7.364152473641525e-06, "loss": 0.2421, "step": 89130 }, { "epoch": 2.6024729565991738, "grad_norm": 0.619690992816082, "learning_rate": 7.361449040281157e-06, "loss": 0.2312, "step": 89135 }, { "epoch": 2.602618939869491, "grad_norm": 0.6117828362343916, "learning_rate": 7.35874560692079e-06, "loss": 0.2384, "step": 89140 }, { "epoch": 2.6027649231398082, "grad_norm": 0.6528365677550666, "learning_rate": 7.356042173560422e-06, "loss": 0.2508, "step": 89145 }, { "epoch": 2.6029109064101252, "grad_norm": 0.5931641630572649, "learning_rate": 7.353338740200054e-06, "loss": 0.2484, "step": 89150 }, { "epoch": 2.6030568896804427, "grad_norm": 0.6006631408196855, "learning_rate": 7.350635306839687e-06, "loss": 0.2362, "step": 89155 }, { "epoch": 2.6032028729507597, "grad_norm": 0.5796164631331852, "learning_rate": 7.347931873479319e-06, "loss": 0.2392, "step": 89160 }, { "epoch": 2.603348856221077, "grad_norm": 0.6344282575657667, "learning_rate": 7.345228440118951e-06, "loss": 0.2674, "step": 89165 }, { "epoch": 2.603494839491394, "grad_norm": 0.6387058679257104, "learning_rate": 7.342525006758584e-06, "loss": 0.2531, "step": 89170 }, { "epoch": 2.6036408227617116, "grad_norm": 0.5938729083171175, "learning_rate": 7.339821573398216e-06, "loss": 0.2521, "step": 89175 }, { "epoch": 2.6037868060320286, "grad_norm": 0.5834143139162585, "learning_rate": 7.337118140037848e-06, "loss": 0.2492, "step": 89180 }, { "epoch": 2.603932789302346, "grad_norm": 0.5589339096372185, "learning_rate": 7.334414706677481e-06, "loss": 0.2279, "step": 89185 }, { "epoch": 2.604078772572663, "grad_norm": 0.5725448241427832, "learning_rate": 7.331711273317113e-06, "loss": 0.2391, "step": 89190 }, { "epoch": 2.6042247558429805, "grad_norm": 0.6033364265035972, "learning_rate": 7.329007839956746e-06, "loss": 0.2478, "step": 89195 }, { "epoch": 2.6043707391132975, "grad_norm": 0.6745582853707034, "learning_rate": 7.326304406596378e-06, "loss": 0.2419, "step": 89200 }, { "epoch": 2.604516722383615, "grad_norm": 0.6185028820332562, "learning_rate": 7.32360097323601e-06, "loss": 0.2519, "step": 89205 }, { "epoch": 2.604662705653932, "grad_norm": 0.5286231055618207, "learning_rate": 7.320897539875643e-06, "loss": 0.2445, "step": 89210 }, { "epoch": 2.6048086889242494, "grad_norm": 0.5787358594877855, "learning_rate": 7.318194106515275e-06, "loss": 0.2487, "step": 89215 }, { "epoch": 2.6049546721945664, "grad_norm": 0.5794142208664432, "learning_rate": 7.315490673154906e-06, "loss": 0.259, "step": 89220 }, { "epoch": 2.6051006554648835, "grad_norm": 0.6077669445489325, "learning_rate": 7.31278723979454e-06, "loss": 0.2524, "step": 89225 }, { "epoch": 2.605246638735201, "grad_norm": 0.570030047425903, "learning_rate": 7.310083806434172e-06, "loss": 0.2392, "step": 89230 }, { "epoch": 2.6053926220055184, "grad_norm": 0.5739513199728382, "learning_rate": 7.307380373073803e-06, "loss": 0.2545, "step": 89235 }, { "epoch": 2.6055386052758354, "grad_norm": 0.5945397754923077, "learning_rate": 7.304676939713436e-06, "loss": 0.2526, "step": 89240 }, { "epoch": 2.6056845885461524, "grad_norm": 0.5909133308387698, "learning_rate": 7.301973506353069e-06, "loss": 0.2504, "step": 89245 }, { "epoch": 2.60583057181647, "grad_norm": 0.6129995861431103, "learning_rate": 7.2992700729927e-06, "loss": 0.2435, "step": 89250 }, { "epoch": 2.6059765550867873, "grad_norm": 0.5830526963946688, "learning_rate": 7.296566639632333e-06, "loss": 0.2502, "step": 89255 }, { "epoch": 2.6061225383571043, "grad_norm": 0.5897810491485241, "learning_rate": 7.293863206271966e-06, "loss": 0.2576, "step": 89260 }, { "epoch": 2.6062685216274213, "grad_norm": 0.579855177124141, "learning_rate": 7.291159772911597e-06, "loss": 0.2488, "step": 89265 }, { "epoch": 2.6064145048977387, "grad_norm": 0.5389110933701073, "learning_rate": 7.28845633955123e-06, "loss": 0.2385, "step": 89270 }, { "epoch": 2.606560488168056, "grad_norm": 0.5293857838609333, "learning_rate": 7.285752906190863e-06, "loss": 0.2354, "step": 89275 }, { "epoch": 2.606706471438373, "grad_norm": 0.5723180379311915, "learning_rate": 7.283049472830496e-06, "loss": 0.2602, "step": 89280 }, { "epoch": 2.60685245470869, "grad_norm": 0.6081330056302, "learning_rate": 7.280346039470127e-06, "loss": 0.2533, "step": 89285 }, { "epoch": 2.6069984379790077, "grad_norm": 0.5931260956556423, "learning_rate": 7.2776426061097596e-06, "loss": 0.2637, "step": 89290 }, { "epoch": 2.6071444212493247, "grad_norm": 0.5379371048707189, "learning_rate": 7.274939172749393e-06, "loss": 0.2467, "step": 89295 }, { "epoch": 2.607290404519642, "grad_norm": 0.5844151985915611, "learning_rate": 7.272235739389024e-06, "loss": 0.2482, "step": 89300 }, { "epoch": 2.607436387789959, "grad_norm": 0.5797620563189454, "learning_rate": 7.2695323060286565e-06, "loss": 0.2583, "step": 89305 }, { "epoch": 2.6075823710602766, "grad_norm": 0.5993230756886618, "learning_rate": 7.26682887266829e-06, "loss": 0.2584, "step": 89310 }, { "epoch": 2.6077283543305936, "grad_norm": 0.5926740184366743, "learning_rate": 7.264125439307921e-06, "loss": 0.2468, "step": 89315 }, { "epoch": 2.607874337600911, "grad_norm": 0.580118457570519, "learning_rate": 7.2614220059475535e-06, "loss": 0.2421, "step": 89320 }, { "epoch": 2.608020320871228, "grad_norm": 0.5974109201235301, "learning_rate": 7.258718572587187e-06, "loss": 0.2464, "step": 89325 }, { "epoch": 2.6081663041415455, "grad_norm": 0.5463563190267968, "learning_rate": 7.256015139226818e-06, "loss": 0.2427, "step": 89330 }, { "epoch": 2.6083122874118625, "grad_norm": 0.6200023811928261, "learning_rate": 7.25331170586645e-06, "loss": 0.256, "step": 89335 }, { "epoch": 2.60845827068218, "grad_norm": 0.5634348262506242, "learning_rate": 7.2506082725060836e-06, "loss": 0.2403, "step": 89340 }, { "epoch": 2.608604253952497, "grad_norm": 0.5858087682346301, "learning_rate": 7.247904839145715e-06, "loss": 0.2365, "step": 89345 }, { "epoch": 2.6087502372228144, "grad_norm": 0.6091971806314507, "learning_rate": 7.245201405785347e-06, "loss": 0.2497, "step": 89350 }, { "epoch": 2.6088962204931314, "grad_norm": 0.5749327951234745, "learning_rate": 7.2424979724249805e-06, "loss": 0.2503, "step": 89355 }, { "epoch": 2.609042203763449, "grad_norm": 0.5815319251726474, "learning_rate": 7.239794539064612e-06, "loss": 0.2543, "step": 89360 }, { "epoch": 2.609188187033766, "grad_norm": 0.6042156347543768, "learning_rate": 7.237091105704245e-06, "loss": 0.2485, "step": 89365 }, { "epoch": 2.6093341703040833, "grad_norm": 0.5782893855072649, "learning_rate": 7.2343876723438775e-06, "loss": 0.2421, "step": 89370 }, { "epoch": 2.6094801535744003, "grad_norm": 0.6132823862354211, "learning_rate": 7.231684238983509e-06, "loss": 0.2305, "step": 89375 }, { "epoch": 2.6096261368447173, "grad_norm": 0.5826790326453888, "learning_rate": 7.228980805623142e-06, "loss": 0.2511, "step": 89380 }, { "epoch": 2.609772120115035, "grad_norm": 0.6522024709516695, "learning_rate": 7.2262773722627744e-06, "loss": 0.2509, "step": 89385 }, { "epoch": 2.6099181033853522, "grad_norm": 0.6270502125067086, "learning_rate": 7.223573938902406e-06, "loss": 0.2654, "step": 89390 }, { "epoch": 2.6100640866556692, "grad_norm": 0.536394470835293, "learning_rate": 7.220870505542039e-06, "loss": 0.2248, "step": 89395 }, { "epoch": 2.6102100699259863, "grad_norm": 0.6042483157233011, "learning_rate": 7.218167072181671e-06, "loss": 0.2542, "step": 89400 }, { "epoch": 2.6103560531963037, "grad_norm": 0.6122805289629687, "learning_rate": 7.215463638821303e-06, "loss": 0.2497, "step": 89405 }, { "epoch": 2.610502036466621, "grad_norm": 0.5867370951683238, "learning_rate": 7.212760205460936e-06, "loss": 0.2396, "step": 89410 }, { "epoch": 2.610648019736938, "grad_norm": 0.5772621435175591, "learning_rate": 7.210056772100568e-06, "loss": 0.2384, "step": 89415 }, { "epoch": 2.610794003007255, "grad_norm": 0.6113452847196597, "learning_rate": 7.2073533387402e-06, "loss": 0.263, "step": 89420 }, { "epoch": 2.6109399862775726, "grad_norm": 0.6137907704981771, "learning_rate": 7.204649905379833e-06, "loss": 0.2534, "step": 89425 }, { "epoch": 2.61108596954789, "grad_norm": 0.5863411752040787, "learning_rate": 7.201946472019465e-06, "loss": 0.2383, "step": 89430 }, { "epoch": 2.611231952818207, "grad_norm": 0.5710174646825562, "learning_rate": 7.199243038659097e-06, "loss": 0.258, "step": 89435 }, { "epoch": 2.611377936088524, "grad_norm": 0.5906792478392162, "learning_rate": 7.19653960529873e-06, "loss": 0.2327, "step": 89440 }, { "epoch": 2.6115239193588415, "grad_norm": 0.5667313444415364, "learning_rate": 7.193836171938361e-06, "loss": 0.2662, "step": 89445 }, { "epoch": 2.6116699026291585, "grad_norm": 0.5873922287121449, "learning_rate": 7.191132738577995e-06, "loss": 0.2406, "step": 89450 }, { "epoch": 2.611815885899476, "grad_norm": 0.6148917280977283, "learning_rate": 7.188429305217627e-06, "loss": 0.2485, "step": 89455 }, { "epoch": 2.611961869169793, "grad_norm": 0.6316058945095114, "learning_rate": 7.185725871857258e-06, "loss": 0.2486, "step": 89460 }, { "epoch": 2.6121078524401105, "grad_norm": 0.6020836434863928, "learning_rate": 7.183022438496892e-06, "loss": 0.2422, "step": 89465 }, { "epoch": 2.6122538357104275, "grad_norm": 0.6017648538374837, "learning_rate": 7.180319005136524e-06, "loss": 0.2503, "step": 89470 }, { "epoch": 2.612399818980745, "grad_norm": 0.6423724720732819, "learning_rate": 7.177615571776155e-06, "loss": 0.2381, "step": 89475 }, { "epoch": 2.612545802251062, "grad_norm": 0.6111719196810734, "learning_rate": 7.1749121384157885e-06, "loss": 0.2351, "step": 89480 }, { "epoch": 2.6126917855213794, "grad_norm": 0.5964719174620507, "learning_rate": 7.172208705055421e-06, "loss": 0.2497, "step": 89485 }, { "epoch": 2.6128377687916964, "grad_norm": 0.5493943669594926, "learning_rate": 7.169505271695052e-06, "loss": 0.2471, "step": 89490 }, { "epoch": 2.612983752062014, "grad_norm": 0.5554169105340437, "learning_rate": 7.166801838334685e-06, "loss": 0.2427, "step": 89495 }, { "epoch": 2.613129735332331, "grad_norm": 0.5603976147069714, "learning_rate": 7.164098404974318e-06, "loss": 0.2316, "step": 89500 }, { "epoch": 2.6132757186026483, "grad_norm": 0.617693591964429, "learning_rate": 7.161394971613949e-06, "loss": 0.2481, "step": 89505 }, { "epoch": 2.6134217018729653, "grad_norm": 0.6118081182862901, "learning_rate": 7.158691538253582e-06, "loss": 0.2354, "step": 89510 }, { "epoch": 2.6135676851432823, "grad_norm": 0.5862213104665256, "learning_rate": 7.155988104893215e-06, "loss": 0.2445, "step": 89515 }, { "epoch": 2.6137136684135998, "grad_norm": 0.5663881243847392, "learning_rate": 7.153284671532846e-06, "loss": 0.2327, "step": 89520 }, { "epoch": 2.613859651683917, "grad_norm": 0.6133149631674456, "learning_rate": 7.150581238172479e-06, "loss": 0.2683, "step": 89525 }, { "epoch": 2.614005634954234, "grad_norm": 0.5842504540607237, "learning_rate": 7.147877804812112e-06, "loss": 0.2437, "step": 89530 }, { "epoch": 2.614151618224551, "grad_norm": 0.5321479477792065, "learning_rate": 7.145174371451745e-06, "loss": 0.2512, "step": 89535 }, { "epoch": 2.6142976014948687, "grad_norm": 0.5521674677867221, "learning_rate": 7.142470938091376e-06, "loss": 0.2445, "step": 89540 }, { "epoch": 2.614443584765186, "grad_norm": 0.5735873061062101, "learning_rate": 7.139767504731009e-06, "loss": 0.2501, "step": 89545 }, { "epoch": 2.614589568035503, "grad_norm": 0.5633784362136751, "learning_rate": 7.137064071370642e-06, "loss": 0.2362, "step": 89550 }, { "epoch": 2.61473555130582, "grad_norm": 0.5746896121891767, "learning_rate": 7.134360638010273e-06, "loss": 0.233, "step": 89555 }, { "epoch": 2.6148815345761376, "grad_norm": 0.6041403800064228, "learning_rate": 7.1316572046499055e-06, "loss": 0.2501, "step": 89560 }, { "epoch": 2.615027517846455, "grad_norm": 0.6064672618019215, "learning_rate": 7.128953771289539e-06, "loss": 0.2503, "step": 89565 }, { "epoch": 2.615173501116772, "grad_norm": 0.6342981809698917, "learning_rate": 7.12625033792917e-06, "loss": 0.2519, "step": 89570 }, { "epoch": 2.615319484387089, "grad_norm": 0.6025158039064499, "learning_rate": 7.1235469045688025e-06, "loss": 0.248, "step": 89575 }, { "epoch": 2.6154654676574065, "grad_norm": 0.5869783424622326, "learning_rate": 7.120843471208436e-06, "loss": 0.2363, "step": 89580 }, { "epoch": 2.615611450927724, "grad_norm": 0.6059170829507853, "learning_rate": 7.118140037848067e-06, "loss": 0.256, "step": 89585 }, { "epoch": 2.615757434198041, "grad_norm": 0.6115221826905877, "learning_rate": 7.1154366044876994e-06, "loss": 0.2377, "step": 89590 }, { "epoch": 2.615903417468358, "grad_norm": 0.6093111619491071, "learning_rate": 7.112733171127333e-06, "loss": 0.2381, "step": 89595 }, { "epoch": 2.6160494007386754, "grad_norm": 0.5583503309910689, "learning_rate": 7.110029737766964e-06, "loss": 0.2466, "step": 89600 }, { "epoch": 2.6161953840089924, "grad_norm": 0.5341970349141014, "learning_rate": 7.107326304406596e-06, "loss": 0.2389, "step": 89605 }, { "epoch": 2.61634136727931, "grad_norm": 0.5364831287044223, "learning_rate": 7.1046228710462296e-06, "loss": 0.2319, "step": 89610 }, { "epoch": 2.616487350549627, "grad_norm": 0.5178830343377399, "learning_rate": 7.101919437685861e-06, "loss": 0.2351, "step": 89615 }, { "epoch": 2.6166333338199443, "grad_norm": 0.6279170557906544, "learning_rate": 7.099216004325494e-06, "loss": 0.259, "step": 89620 }, { "epoch": 2.6167793170902613, "grad_norm": 0.584016807296053, "learning_rate": 7.0965125709651265e-06, "loss": 0.2363, "step": 89625 }, { "epoch": 2.616925300360579, "grad_norm": 0.6252144475379454, "learning_rate": 7.093809137604758e-06, "loss": 0.2416, "step": 89630 }, { "epoch": 2.617071283630896, "grad_norm": 0.5475387201688904, "learning_rate": 7.091105704244391e-06, "loss": 0.2485, "step": 89635 }, { "epoch": 2.6172172669012133, "grad_norm": 0.605732643496871, "learning_rate": 7.0884022708840235e-06, "loss": 0.2481, "step": 89640 }, { "epoch": 2.6173632501715303, "grad_norm": 0.5965038915256264, "learning_rate": 7.085698837523655e-06, "loss": 0.2473, "step": 89645 }, { "epoch": 2.6175092334418477, "grad_norm": 0.6187014794293454, "learning_rate": 7.082995404163288e-06, "loss": 0.2513, "step": 89650 }, { "epoch": 2.6176552167121647, "grad_norm": 0.6383697289518445, "learning_rate": 7.08029197080292e-06, "loss": 0.2496, "step": 89655 }, { "epoch": 2.617801199982482, "grad_norm": 0.5375638328668141, "learning_rate": 7.077588537442552e-06, "loss": 0.234, "step": 89660 }, { "epoch": 2.617947183252799, "grad_norm": 0.5762807117606406, "learning_rate": 7.074885104082185e-06, "loss": 0.2411, "step": 89665 }, { "epoch": 2.618093166523116, "grad_norm": 0.6004851071100735, "learning_rate": 7.0721816707218165e-06, "loss": 0.2444, "step": 89670 }, { "epoch": 2.6182391497934336, "grad_norm": 0.5654468450608428, "learning_rate": 7.069478237361449e-06, "loss": 0.2489, "step": 89675 }, { "epoch": 2.618385133063751, "grad_norm": 0.5713837095264683, "learning_rate": 7.066774804001082e-06, "loss": 0.2602, "step": 89680 }, { "epoch": 2.618531116334068, "grad_norm": 0.6278640598913869, "learning_rate": 7.0640713706407135e-06, "loss": 0.2628, "step": 89685 }, { "epoch": 2.618677099604385, "grad_norm": 0.542483327136994, "learning_rate": 7.0613679372803475e-06, "loss": 0.2424, "step": 89690 }, { "epoch": 2.6188230828747026, "grad_norm": 0.5705795973658404, "learning_rate": 7.058664503919979e-06, "loss": 0.2544, "step": 89695 }, { "epoch": 2.61896906614502, "grad_norm": 0.6600114396443757, "learning_rate": 7.05596107055961e-06, "loss": 0.2459, "step": 89700 }, { "epoch": 2.619115049415337, "grad_norm": 0.5516461402653019, "learning_rate": 7.053257637199244e-06, "loss": 0.2351, "step": 89705 }, { "epoch": 2.619261032685654, "grad_norm": 0.6281807336632128, "learning_rate": 7.050554203838876e-06, "loss": 0.2559, "step": 89710 }, { "epoch": 2.6194070159559715, "grad_norm": 0.5738518835205908, "learning_rate": 7.047850770478507e-06, "loss": 0.2527, "step": 89715 }, { "epoch": 2.619552999226289, "grad_norm": 0.6088666166258817, "learning_rate": 7.0451473371181405e-06, "loss": 0.2422, "step": 89720 }, { "epoch": 2.619698982496606, "grad_norm": 0.5540464453626934, "learning_rate": 7.042443903757773e-06, "loss": 0.2492, "step": 89725 }, { "epoch": 2.619844965766923, "grad_norm": 0.5747767759456228, "learning_rate": 7.039740470397404e-06, "loss": 0.2314, "step": 89730 }, { "epoch": 2.6199909490372404, "grad_norm": 0.6246860616851382, "learning_rate": 7.0370370370370375e-06, "loss": 0.25, "step": 89735 }, { "epoch": 2.6201369323075574, "grad_norm": 0.5919429948128154, "learning_rate": 7.03433360367667e-06, "loss": 0.2398, "step": 89740 }, { "epoch": 2.620282915577875, "grad_norm": 0.5809555853700082, "learning_rate": 7.031630170316301e-06, "loss": 0.2307, "step": 89745 }, { "epoch": 2.620428898848192, "grad_norm": 0.5422389500214032, "learning_rate": 7.0289267369559344e-06, "loss": 0.2478, "step": 89750 }, { "epoch": 2.6205748821185093, "grad_norm": 0.5806420466236347, "learning_rate": 7.026223303595567e-06, "loss": 0.2533, "step": 89755 }, { "epoch": 2.6207208653888263, "grad_norm": 0.6221918136066781, "learning_rate": 7.023519870235198e-06, "loss": 0.2477, "step": 89760 }, { "epoch": 2.6208668486591438, "grad_norm": 0.5710700773656308, "learning_rate": 7.020816436874831e-06, "loss": 0.2498, "step": 89765 }, { "epoch": 2.6210128319294608, "grad_norm": 0.5312697250843279, "learning_rate": 7.018113003514464e-06, "loss": 0.2408, "step": 89770 }, { "epoch": 2.6211588151997782, "grad_norm": 0.6348053491652511, "learning_rate": 7.015409570154097e-06, "loss": 0.2492, "step": 89775 }, { "epoch": 2.6213047984700952, "grad_norm": 0.5714599063091432, "learning_rate": 7.012706136793728e-06, "loss": 0.242, "step": 89780 }, { "epoch": 2.6214507817404127, "grad_norm": 0.5977391070933743, "learning_rate": 7.010002703433361e-06, "loss": 0.2428, "step": 89785 }, { "epoch": 2.6215967650107297, "grad_norm": 0.5627300446615217, "learning_rate": 7.007299270072994e-06, "loss": 0.2472, "step": 89790 }, { "epoch": 2.621742748281047, "grad_norm": 0.5932210936624055, "learning_rate": 7.004595836712625e-06, "loss": 0.2488, "step": 89795 }, { "epoch": 2.621888731551364, "grad_norm": 0.5300578348346212, "learning_rate": 7.001892403352258e-06, "loss": 0.2415, "step": 89800 }, { "epoch": 2.6220347148216816, "grad_norm": 0.5411591025348852, "learning_rate": 6.999188969991891e-06, "loss": 0.2471, "step": 89805 }, { "epoch": 2.6221806980919986, "grad_norm": 0.6357103012506947, "learning_rate": 6.996485536631522e-06, "loss": 0.2369, "step": 89810 }, { "epoch": 2.622326681362316, "grad_norm": 0.5664956711998694, "learning_rate": 6.9937821032711546e-06, "loss": 0.2538, "step": 89815 }, { "epoch": 2.622472664632633, "grad_norm": 0.564877220805952, "learning_rate": 6.991078669910788e-06, "loss": 0.2448, "step": 89820 }, { "epoch": 2.62261864790295, "grad_norm": 0.5704068165169399, "learning_rate": 6.988375236550419e-06, "loss": 0.2389, "step": 89825 }, { "epoch": 2.6227646311732675, "grad_norm": 0.6369358147892422, "learning_rate": 6.9856718031900515e-06, "loss": 0.2637, "step": 89830 }, { "epoch": 2.622910614443585, "grad_norm": 0.5777613629027113, "learning_rate": 6.982968369829685e-06, "loss": 0.2645, "step": 89835 }, { "epoch": 2.623056597713902, "grad_norm": 0.5861661345091909, "learning_rate": 6.980264936469316e-06, "loss": 0.2476, "step": 89840 }, { "epoch": 2.623202580984219, "grad_norm": 0.5463829029181543, "learning_rate": 6.9775615031089485e-06, "loss": 0.2419, "step": 89845 }, { "epoch": 2.6233485642545364, "grad_norm": 0.5739691160383011, "learning_rate": 6.974858069748582e-06, "loss": 0.2484, "step": 89850 }, { "epoch": 2.623494547524854, "grad_norm": 0.5946849477875323, "learning_rate": 6.972154636388213e-06, "loss": 0.2566, "step": 89855 }, { "epoch": 2.623640530795171, "grad_norm": 0.5418742840088617, "learning_rate": 6.969451203027846e-06, "loss": 0.2486, "step": 89860 }, { "epoch": 2.623786514065488, "grad_norm": 0.5778751639484453, "learning_rate": 6.966747769667479e-06, "loss": 0.2408, "step": 89865 }, { "epoch": 2.6239324973358054, "grad_norm": 0.584817688483613, "learning_rate": 6.96404433630711e-06, "loss": 0.2389, "step": 89870 }, { "epoch": 2.624078480606123, "grad_norm": 0.5965260314741728, "learning_rate": 6.961340902946743e-06, "loss": 0.2572, "step": 89875 }, { "epoch": 2.62422446387644, "grad_norm": 0.6065797374223577, "learning_rate": 6.9586374695863755e-06, "loss": 0.2567, "step": 89880 }, { "epoch": 2.624370447146757, "grad_norm": 0.5425626389724918, "learning_rate": 6.955934036226007e-06, "loss": 0.2481, "step": 89885 }, { "epoch": 2.6245164304170743, "grad_norm": 0.5848345598319242, "learning_rate": 6.95323060286564e-06, "loss": 0.2746, "step": 89890 }, { "epoch": 2.6246624136873913, "grad_norm": 0.5702617609594648, "learning_rate": 6.950527169505272e-06, "loss": 0.2411, "step": 89895 }, { "epoch": 2.6248083969577087, "grad_norm": 0.5895442164776723, "learning_rate": 6.947823736144904e-06, "loss": 0.2644, "step": 89900 }, { "epoch": 2.6249543802280257, "grad_norm": 0.6433831893681317, "learning_rate": 6.945120302784537e-06, "loss": 0.2474, "step": 89905 }, { "epoch": 2.625100363498343, "grad_norm": 0.5575397611194236, "learning_rate": 6.942416869424169e-06, "loss": 0.2353, "step": 89910 }, { "epoch": 2.62524634676866, "grad_norm": 0.5152088792679715, "learning_rate": 6.939713436063801e-06, "loss": 0.2262, "step": 89915 }, { "epoch": 2.6253923300389777, "grad_norm": 0.6309323249334619, "learning_rate": 6.937010002703434e-06, "loss": 0.2633, "step": 89920 }, { "epoch": 2.6255383133092947, "grad_norm": 0.5859302426832111, "learning_rate": 6.9343065693430655e-06, "loss": 0.2526, "step": 89925 }, { "epoch": 2.625684296579612, "grad_norm": 0.6620178935446019, "learning_rate": 6.931603135982698e-06, "loss": 0.2528, "step": 89930 }, { "epoch": 2.625830279849929, "grad_norm": 0.54862917390135, "learning_rate": 6.928899702622331e-06, "loss": 0.2381, "step": 89935 }, { "epoch": 2.6259762631202466, "grad_norm": 0.5972855036559656, "learning_rate": 6.9261962692619625e-06, "loss": 0.2507, "step": 89940 }, { "epoch": 2.6261222463905636, "grad_norm": 0.5490199758646749, "learning_rate": 6.923492835901596e-06, "loss": 0.2478, "step": 89945 }, { "epoch": 2.626268229660881, "grad_norm": 0.6001546386286956, "learning_rate": 6.920789402541228e-06, "loss": 0.2534, "step": 89950 }, { "epoch": 2.626414212931198, "grad_norm": 0.6123899770540132, "learning_rate": 6.9180859691808594e-06, "loss": 0.2518, "step": 89955 }, { "epoch": 2.626560196201515, "grad_norm": 0.5830549527626935, "learning_rate": 6.915382535820493e-06, "loss": 0.2609, "step": 89960 }, { "epoch": 2.6267061794718325, "grad_norm": 0.612386351323226, "learning_rate": 6.912679102460125e-06, "loss": 0.2568, "step": 89965 }, { "epoch": 2.62685216274215, "grad_norm": 0.5398515227370547, "learning_rate": 6.909975669099756e-06, "loss": 0.228, "step": 89970 }, { "epoch": 2.626998146012467, "grad_norm": 0.5839402256847945, "learning_rate": 6.9072722357393896e-06, "loss": 0.248, "step": 89975 }, { "epoch": 2.627144129282784, "grad_norm": 0.5386055913088641, "learning_rate": 6.904568802379022e-06, "loss": 0.2529, "step": 89980 }, { "epoch": 2.6272901125531014, "grad_norm": 0.6631893354616615, "learning_rate": 6.901865369018653e-06, "loss": 0.2475, "step": 89985 }, { "epoch": 2.627436095823419, "grad_norm": 0.5975368893974898, "learning_rate": 6.8991619356582865e-06, "loss": 0.2592, "step": 89990 }, { "epoch": 2.627582079093736, "grad_norm": 0.6004887778052805, "learning_rate": 6.896458502297919e-06, "loss": 0.2372, "step": 89995 }, { "epoch": 2.627728062364053, "grad_norm": 0.5664106461134872, "learning_rate": 6.89375506893755e-06, "loss": 0.247, "step": 90000 }, { "epoch": 2.6278740456343703, "grad_norm": 0.5663050833082228, "learning_rate": 6.8910516355771835e-06, "loss": 0.2359, "step": 90005 }, { "epoch": 2.6280200289046878, "grad_norm": 0.5668927062285035, "learning_rate": 6.888348202216816e-06, "loss": 0.2489, "step": 90010 }, { "epoch": 2.628166012175005, "grad_norm": 0.5575742329998921, "learning_rate": 6.885644768856447e-06, "loss": 0.232, "step": 90015 }, { "epoch": 2.628311995445322, "grad_norm": 0.6103471713243627, "learning_rate": 6.88294133549608e-06, "loss": 0.2708, "step": 90020 }, { "epoch": 2.6284579787156392, "grad_norm": 0.5565774075184349, "learning_rate": 6.880237902135713e-06, "loss": 0.2435, "step": 90025 }, { "epoch": 2.6286039619859562, "grad_norm": 0.5577049850137388, "learning_rate": 6.877534468775346e-06, "loss": 0.2302, "step": 90030 }, { "epoch": 2.6287499452562737, "grad_norm": 0.6053065564565094, "learning_rate": 6.874831035414977e-06, "loss": 0.2378, "step": 90035 }, { "epoch": 2.6288959285265907, "grad_norm": 0.5697157503462341, "learning_rate": 6.87212760205461e-06, "loss": 0.2332, "step": 90040 }, { "epoch": 2.629041911796908, "grad_norm": 0.600294703733134, "learning_rate": 6.869424168694243e-06, "loss": 0.2289, "step": 90045 }, { "epoch": 2.629187895067225, "grad_norm": 0.5884686293614104, "learning_rate": 6.866720735333874e-06, "loss": 0.2367, "step": 90050 }, { "epoch": 2.6293338783375426, "grad_norm": 0.546264475547011, "learning_rate": 6.864017301973507e-06, "loss": 0.2578, "step": 90055 }, { "epoch": 2.6294798616078596, "grad_norm": 0.563940868343774, "learning_rate": 6.86131386861314e-06, "loss": 0.2402, "step": 90060 }, { "epoch": 2.629625844878177, "grad_norm": 0.58682106459169, "learning_rate": 6.858610435252771e-06, "loss": 0.237, "step": 90065 }, { "epoch": 2.629771828148494, "grad_norm": 0.5785744416623726, "learning_rate": 6.855907001892404e-06, "loss": 0.2298, "step": 90070 }, { "epoch": 2.6299178114188115, "grad_norm": 0.5576867357062767, "learning_rate": 6.853203568532037e-06, "loss": 0.2356, "step": 90075 }, { "epoch": 2.6300637946891285, "grad_norm": 0.5407111139110391, "learning_rate": 6.850500135171668e-06, "loss": 0.2495, "step": 90080 }, { "epoch": 2.630209777959446, "grad_norm": 0.5856570047909235, "learning_rate": 6.8477967018113e-06, "loss": 0.2587, "step": 90085 }, { "epoch": 2.630355761229763, "grad_norm": 0.5823261061747922, "learning_rate": 6.845093268450934e-06, "loss": 0.2359, "step": 90090 }, { "epoch": 2.6305017445000805, "grad_norm": 0.5794877314322329, "learning_rate": 6.842389835090565e-06, "loss": 0.2545, "step": 90095 }, { "epoch": 2.6306477277703975, "grad_norm": 0.5945105059393152, "learning_rate": 6.839686401730197e-06, "loss": 0.2336, "step": 90100 }, { "epoch": 2.630793711040715, "grad_norm": 0.6116788749531161, "learning_rate": 6.836982968369831e-06, "loss": 0.2449, "step": 90105 }, { "epoch": 2.630939694311032, "grad_norm": 0.5779729027574096, "learning_rate": 6.834279535009462e-06, "loss": 0.2463, "step": 90110 }, { "epoch": 2.631085677581349, "grad_norm": 0.6289407604543464, "learning_rate": 6.831576101649095e-06, "loss": 0.2466, "step": 90115 }, { "epoch": 2.6312316608516664, "grad_norm": 0.5937921101711654, "learning_rate": 6.828872668288727e-06, "loss": 0.245, "step": 90120 }, { "epoch": 2.631377644121984, "grad_norm": 0.5782940648612427, "learning_rate": 6.826169234928359e-06, "loss": 0.2322, "step": 90125 }, { "epoch": 2.631523627392301, "grad_norm": 0.6353565000891229, "learning_rate": 6.823465801567992e-06, "loss": 0.2518, "step": 90130 }, { "epoch": 2.631669610662618, "grad_norm": 0.6129342196982777, "learning_rate": 6.820762368207624e-06, "loss": 0.2408, "step": 90135 }, { "epoch": 2.6318155939329353, "grad_norm": 0.642495530325018, "learning_rate": 6.818058934847256e-06, "loss": 0.2558, "step": 90140 }, { "epoch": 2.6319615772032527, "grad_norm": 0.5780861638625595, "learning_rate": 6.815355501486889e-06, "loss": 0.2428, "step": 90145 }, { "epoch": 2.6321075604735698, "grad_norm": 0.5633129544852612, "learning_rate": 6.812652068126521e-06, "loss": 0.236, "step": 90150 }, { "epoch": 2.6322535437438868, "grad_norm": 0.6024498788948981, "learning_rate": 6.809948634766153e-06, "loss": 0.2584, "step": 90155 }, { "epoch": 2.632399527014204, "grad_norm": 0.5572573901140883, "learning_rate": 6.807245201405786e-06, "loss": 0.2437, "step": 90160 }, { "epoch": 2.6325455102845217, "grad_norm": 0.5981791732872981, "learning_rate": 6.804541768045418e-06, "loss": 0.2323, "step": 90165 }, { "epoch": 2.6326914935548387, "grad_norm": 0.5638179045241959, "learning_rate": 6.80183833468505e-06, "loss": 0.2409, "step": 90170 }, { "epoch": 2.6328374768251557, "grad_norm": 0.6085138536332049, "learning_rate": 6.799134901324683e-06, "loss": 0.2328, "step": 90175 }, { "epoch": 2.632983460095473, "grad_norm": 0.5185466087249508, "learning_rate": 6.7964314679643146e-06, "loss": 0.2396, "step": 90180 }, { "epoch": 2.63312944336579, "grad_norm": 0.631701734755979, "learning_rate": 6.793728034603947e-06, "loss": 0.2612, "step": 90185 }, { "epoch": 2.6332754266361076, "grad_norm": 0.6196943734867797, "learning_rate": 6.79102460124358e-06, "loss": 0.2423, "step": 90190 }, { "epoch": 2.6334214099064246, "grad_norm": 0.6199841883642803, "learning_rate": 6.7883211678832115e-06, "loss": 0.2499, "step": 90195 }, { "epoch": 2.633567393176742, "grad_norm": 0.6058373273497548, "learning_rate": 6.785617734522845e-06, "loss": 0.2339, "step": 90200 }, { "epoch": 2.633713376447059, "grad_norm": 0.568224570799442, "learning_rate": 6.782914301162477e-06, "loss": 0.2429, "step": 90205 }, { "epoch": 2.6338593597173765, "grad_norm": 0.5731115468275195, "learning_rate": 6.7802108678021085e-06, "loss": 0.2431, "step": 90210 }, { "epoch": 2.6340053429876935, "grad_norm": 0.5747153707024154, "learning_rate": 6.777507434441742e-06, "loss": 0.2524, "step": 90215 }, { "epoch": 2.634151326258011, "grad_norm": 0.5797074071941056, "learning_rate": 6.774804001081374e-06, "loss": 0.2509, "step": 90220 }, { "epoch": 2.634297309528328, "grad_norm": 0.5848168627256525, "learning_rate": 6.7721005677210054e-06, "loss": 0.2288, "step": 90225 }, { "epoch": 2.6344432927986454, "grad_norm": 0.599278929949295, "learning_rate": 6.769397134360639e-06, "loss": 0.2453, "step": 90230 }, { "epoch": 2.6345892760689624, "grad_norm": 0.5970952116345034, "learning_rate": 6.766693701000271e-06, "loss": 0.2407, "step": 90235 }, { "epoch": 2.63473525933928, "grad_norm": 0.5670998070217435, "learning_rate": 6.763990267639902e-06, "loss": 0.25, "step": 90240 }, { "epoch": 2.634881242609597, "grad_norm": 0.5677191708449915, "learning_rate": 6.7612868342795355e-06, "loss": 0.2445, "step": 90245 }, { "epoch": 2.635027225879914, "grad_norm": 0.5760311835336416, "learning_rate": 6.758583400919168e-06, "loss": 0.2531, "step": 90250 }, { "epoch": 2.6351732091502313, "grad_norm": 0.5277420149430158, "learning_rate": 6.755879967558799e-06, "loss": 0.2305, "step": 90255 }, { "epoch": 2.635319192420549, "grad_norm": 0.5907234824549932, "learning_rate": 6.7531765341984325e-06, "loss": 0.2635, "step": 90260 }, { "epoch": 2.635465175690866, "grad_norm": 0.6161906404799543, "learning_rate": 6.750473100838065e-06, "loss": 0.2508, "step": 90265 }, { "epoch": 2.635611158961183, "grad_norm": 0.6145278191164244, "learning_rate": 6.747769667477696e-06, "loss": 0.256, "step": 90270 }, { "epoch": 2.6357571422315003, "grad_norm": 0.6254414155158787, "learning_rate": 6.7450662341173294e-06, "loss": 0.2476, "step": 90275 }, { "epoch": 2.6359031255018177, "grad_norm": 0.5841505848948586, "learning_rate": 6.742362800756962e-06, "loss": 0.2418, "step": 90280 }, { "epoch": 2.6360491087721347, "grad_norm": 0.5933036442467582, "learning_rate": 6.739659367396595e-06, "loss": 0.2548, "step": 90285 }, { "epoch": 2.6361950920424517, "grad_norm": 0.5754958748985297, "learning_rate": 6.736955934036226e-06, "loss": 0.2438, "step": 90290 }, { "epoch": 2.636341075312769, "grad_norm": 0.5775229099653969, "learning_rate": 6.734252500675859e-06, "loss": 0.2457, "step": 90295 }, { "epoch": 2.6364870585830866, "grad_norm": 0.590103609750121, "learning_rate": 6.731549067315492e-06, "loss": 0.2531, "step": 90300 }, { "epoch": 2.6366330418534036, "grad_norm": 0.5546990528763567, "learning_rate": 6.728845633955123e-06, "loss": 0.2263, "step": 90305 }, { "epoch": 2.6367790251237206, "grad_norm": 0.5933338654774668, "learning_rate": 6.726142200594756e-06, "loss": 0.2429, "step": 90310 }, { "epoch": 2.636925008394038, "grad_norm": 0.5508590552142341, "learning_rate": 6.723438767234389e-06, "loss": 0.2453, "step": 90315 }, { "epoch": 2.637070991664355, "grad_norm": 0.6081544662421744, "learning_rate": 6.72073533387402e-06, "loss": 0.248, "step": 90320 }, { "epoch": 2.6372169749346726, "grad_norm": 0.5778818129146327, "learning_rate": 6.718031900513652e-06, "loss": 0.2471, "step": 90325 }, { "epoch": 2.6373629582049896, "grad_norm": 0.5498423052626903, "learning_rate": 6.715328467153286e-06, "loss": 0.2437, "step": 90330 }, { "epoch": 2.637508941475307, "grad_norm": 0.560657100697663, "learning_rate": 6.712625033792917e-06, "loss": 0.2343, "step": 90335 }, { "epoch": 2.637654924745624, "grad_norm": 0.5431419533883943, "learning_rate": 6.709921600432549e-06, "loss": 0.2495, "step": 90340 }, { "epoch": 2.6378009080159415, "grad_norm": 0.5289964664272666, "learning_rate": 6.707218167072183e-06, "loss": 0.2421, "step": 90345 }, { "epoch": 2.6379468912862585, "grad_norm": 0.5130255315072081, "learning_rate": 6.704514733711814e-06, "loss": 0.2385, "step": 90350 }, { "epoch": 2.638092874556576, "grad_norm": 0.5750378899164773, "learning_rate": 6.701811300351446e-06, "loss": 0.2501, "step": 90355 }, { "epoch": 2.638238857826893, "grad_norm": 0.5637180330418184, "learning_rate": 6.699107866991079e-06, "loss": 0.2543, "step": 90360 }, { "epoch": 2.6383848410972104, "grad_norm": 0.5559797692756173, "learning_rate": 6.696404433630711e-06, "loss": 0.227, "step": 90365 }, { "epoch": 2.6385308243675274, "grad_norm": 0.6338390950785241, "learning_rate": 6.693701000270344e-06, "loss": 0.2515, "step": 90370 }, { "epoch": 2.638676807637845, "grad_norm": 0.5828032273615896, "learning_rate": 6.690997566909976e-06, "loss": 0.2493, "step": 90375 }, { "epoch": 2.638822790908162, "grad_norm": 0.5413697688773417, "learning_rate": 6.688294133549608e-06, "loss": 0.2248, "step": 90380 }, { "epoch": 2.6389687741784793, "grad_norm": 0.6118063252049457, "learning_rate": 6.685590700189241e-06, "loss": 0.2728, "step": 90385 }, { "epoch": 2.6391147574487963, "grad_norm": 0.6404847327969828, "learning_rate": 6.682887266828873e-06, "loss": 0.2594, "step": 90390 }, { "epoch": 2.6392607407191138, "grad_norm": 0.5994207411815181, "learning_rate": 6.680183833468505e-06, "loss": 0.2654, "step": 90395 }, { "epoch": 2.6394067239894308, "grad_norm": 0.594469135892397, "learning_rate": 6.677480400108138e-06, "loss": 0.2394, "step": 90400 }, { "epoch": 2.6395527072597478, "grad_norm": 0.624721940023828, "learning_rate": 6.67477696674777e-06, "loss": 0.2492, "step": 90405 }, { "epoch": 2.6396986905300652, "grad_norm": 0.5739757084181595, "learning_rate": 6.672073533387402e-06, "loss": 0.2519, "step": 90410 }, { "epoch": 2.6398446738003827, "grad_norm": 0.6026577195433724, "learning_rate": 6.669370100027035e-06, "loss": 0.2534, "step": 90415 }, { "epoch": 2.6399906570706997, "grad_norm": 0.5708852703455543, "learning_rate": 6.666666666666667e-06, "loss": 0.2278, "step": 90420 }, { "epoch": 2.6401366403410167, "grad_norm": 0.5262058343249932, "learning_rate": 6.663963233306299e-06, "loss": 0.22, "step": 90425 }, { "epoch": 2.640282623611334, "grad_norm": 0.614959088810789, "learning_rate": 6.661259799945932e-06, "loss": 0.258, "step": 90430 }, { "epoch": 2.6404286068816516, "grad_norm": 0.5902167782918701, "learning_rate": 6.658556366585564e-06, "loss": 0.2536, "step": 90435 }, { "epoch": 2.6405745901519686, "grad_norm": 0.5771505490741586, "learning_rate": 6.655852933225196e-06, "loss": 0.2245, "step": 90440 }, { "epoch": 2.6407205734222856, "grad_norm": 0.5479695682581032, "learning_rate": 6.653149499864829e-06, "loss": 0.2394, "step": 90445 }, { "epoch": 2.640866556692603, "grad_norm": 0.5772323484057653, "learning_rate": 6.6504460665044606e-06, "loss": 0.2503, "step": 90450 }, { "epoch": 2.6410125399629205, "grad_norm": 0.6472702362676143, "learning_rate": 6.647742633144094e-06, "loss": 0.2561, "step": 90455 }, { "epoch": 2.6411585232332375, "grad_norm": 0.5716239982631758, "learning_rate": 6.645039199783726e-06, "loss": 0.2584, "step": 90460 }, { "epoch": 2.6413045065035545, "grad_norm": 0.5474098416243567, "learning_rate": 6.6423357664233575e-06, "loss": 0.2494, "step": 90465 }, { "epoch": 2.641450489773872, "grad_norm": 0.5532563309075018, "learning_rate": 6.639632333062991e-06, "loss": 0.2498, "step": 90470 }, { "epoch": 2.641596473044189, "grad_norm": 0.6240511608524412, "learning_rate": 6.636928899702623e-06, "loss": 0.2523, "step": 90475 }, { "epoch": 2.6417424563145064, "grad_norm": 0.6131758503125831, "learning_rate": 6.6342254663422545e-06, "loss": 0.2543, "step": 90480 }, { "epoch": 2.6418884395848234, "grad_norm": 0.6136813453154445, "learning_rate": 6.631522032981888e-06, "loss": 0.2475, "step": 90485 }, { "epoch": 2.642034422855141, "grad_norm": 0.5749656953657352, "learning_rate": 6.62881859962152e-06, "loss": 0.244, "step": 90490 }, { "epoch": 2.642180406125458, "grad_norm": 0.5898643891728647, "learning_rate": 6.626115166261151e-06, "loss": 0.2512, "step": 90495 }, { "epoch": 2.6423263893957754, "grad_norm": 0.5272413038719741, "learning_rate": 6.6234117329007846e-06, "loss": 0.2548, "step": 90500 }, { "epoch": 2.6424723726660924, "grad_norm": 0.5831293907549108, "learning_rate": 6.620708299540417e-06, "loss": 0.2447, "step": 90505 }, { "epoch": 2.64261835593641, "grad_norm": 0.6011490690555267, "learning_rate": 6.618004866180048e-06, "loss": 0.2602, "step": 90510 }, { "epoch": 2.642764339206727, "grad_norm": 0.5439527221173291, "learning_rate": 6.6153014328196815e-06, "loss": 0.242, "step": 90515 }, { "epoch": 2.6429103224770443, "grad_norm": 0.5992741033623672, "learning_rate": 6.612597999459314e-06, "loss": 0.2586, "step": 90520 }, { "epoch": 2.6430563057473613, "grad_norm": 0.6164769009795878, "learning_rate": 6.609894566098945e-06, "loss": 0.2467, "step": 90525 }, { "epoch": 2.6432022890176787, "grad_norm": 0.5666059787655618, "learning_rate": 6.6071911327385785e-06, "loss": 0.2391, "step": 90530 }, { "epoch": 2.6433482722879957, "grad_norm": 0.5643821737177656, "learning_rate": 6.604487699378211e-06, "loss": 0.2546, "step": 90535 }, { "epoch": 2.6434942555583127, "grad_norm": 0.6083348412003041, "learning_rate": 6.601784266017844e-06, "loss": 0.2528, "step": 90540 }, { "epoch": 2.64364023882863, "grad_norm": 0.5748054031273213, "learning_rate": 6.5990808326574754e-06, "loss": 0.2354, "step": 90545 }, { "epoch": 2.6437862220989476, "grad_norm": 0.5824221512980944, "learning_rate": 6.596377399297107e-06, "loss": 0.2463, "step": 90550 }, { "epoch": 2.6439322053692647, "grad_norm": 0.5460624581058621, "learning_rate": 6.593673965936741e-06, "loss": 0.2253, "step": 90555 }, { "epoch": 2.6440781886395817, "grad_norm": 0.604704058326158, "learning_rate": 6.590970532576372e-06, "loss": 0.2647, "step": 90560 }, { "epoch": 2.644224171909899, "grad_norm": 0.56915688824317, "learning_rate": 6.588267099216004e-06, "loss": 0.2469, "step": 90565 }, { "epoch": 2.6443701551802166, "grad_norm": 0.5601171714291115, "learning_rate": 6.585563665855638e-06, "loss": 0.2384, "step": 90570 }, { "epoch": 2.6445161384505336, "grad_norm": 0.6129466426308627, "learning_rate": 6.582860232495269e-06, "loss": 0.2496, "step": 90575 }, { "epoch": 2.6446621217208506, "grad_norm": 0.5743741923741261, "learning_rate": 6.580156799134901e-06, "loss": 0.2315, "step": 90580 }, { "epoch": 2.644808104991168, "grad_norm": 0.5291361179167814, "learning_rate": 6.577453365774534e-06, "loss": 0.2419, "step": 90585 }, { "epoch": 2.6449540882614855, "grad_norm": 0.5732173919376291, "learning_rate": 6.574749932414166e-06, "loss": 0.2507, "step": 90590 }, { "epoch": 2.6451000715318025, "grad_norm": 0.5786483011442664, "learning_rate": 6.572046499053798e-06, "loss": 0.2438, "step": 90595 }, { "epoch": 2.6452460548021195, "grad_norm": 0.6018731725787879, "learning_rate": 6.569343065693431e-06, "loss": 0.2338, "step": 90600 }, { "epoch": 2.645392038072437, "grad_norm": 0.568583116385594, "learning_rate": 6.566639632333063e-06, "loss": 0.2435, "step": 90605 }, { "epoch": 2.6455380213427544, "grad_norm": 0.6167950235550272, "learning_rate": 6.563936198972695e-06, "loss": 0.2329, "step": 90610 }, { "epoch": 2.6456840046130714, "grad_norm": 0.542643394944657, "learning_rate": 6.561232765612328e-06, "loss": 0.2466, "step": 90615 }, { "epoch": 2.6458299878833884, "grad_norm": 0.6189678231411423, "learning_rate": 6.55852933225196e-06, "loss": 0.2601, "step": 90620 }, { "epoch": 2.645975971153706, "grad_norm": 0.6016286937661781, "learning_rate": 6.555825898891593e-06, "loss": 0.2505, "step": 90625 }, { "epoch": 2.646121954424023, "grad_norm": 0.5644020328349946, "learning_rate": 6.553122465531225e-06, "loss": 0.2486, "step": 90630 }, { "epoch": 2.6462679376943403, "grad_norm": 0.5658715595601839, "learning_rate": 6.550419032170857e-06, "loss": 0.2341, "step": 90635 }, { "epoch": 2.6464139209646573, "grad_norm": 0.6187172579743498, "learning_rate": 6.54771559881049e-06, "loss": 0.2492, "step": 90640 }, { "epoch": 2.646559904234975, "grad_norm": 0.5821715979625723, "learning_rate": 6.545012165450122e-06, "loss": 0.2505, "step": 90645 }, { "epoch": 2.646705887505292, "grad_norm": 0.5668444584262267, "learning_rate": 6.542308732089754e-06, "loss": 0.2504, "step": 90650 }, { "epoch": 2.6468518707756092, "grad_norm": 0.5853742468985929, "learning_rate": 6.539605298729387e-06, "loss": 0.2335, "step": 90655 }, { "epoch": 2.6469978540459262, "grad_norm": 0.5736281999956839, "learning_rate": 6.536901865369019e-06, "loss": 0.2387, "step": 90660 }, { "epoch": 2.6471438373162437, "grad_norm": 0.6036309185694824, "learning_rate": 6.534198432008651e-06, "loss": 0.2499, "step": 90665 }, { "epoch": 2.6472898205865607, "grad_norm": 0.6494895734833461, "learning_rate": 6.531494998648284e-06, "loss": 0.2567, "step": 90670 }, { "epoch": 2.647435803856878, "grad_norm": 0.5861836351193046, "learning_rate": 6.528791565287916e-06, "loss": 0.2634, "step": 90675 }, { "epoch": 2.647581787127195, "grad_norm": 0.5839585402470325, "learning_rate": 6.526088131927548e-06, "loss": 0.2382, "step": 90680 }, { "epoch": 2.6477277703975126, "grad_norm": 0.6018897855219152, "learning_rate": 6.523384698567181e-06, "loss": 0.2613, "step": 90685 }, { "epoch": 2.6478737536678296, "grad_norm": 0.5930770189516962, "learning_rate": 6.520681265206813e-06, "loss": 0.2393, "step": 90690 }, { "epoch": 2.6480197369381466, "grad_norm": 0.6152224625512133, "learning_rate": 6.517977831846445e-06, "loss": 0.2617, "step": 90695 }, { "epoch": 2.648165720208464, "grad_norm": 0.5651775801388007, "learning_rate": 6.515274398486078e-06, "loss": 0.2342, "step": 90700 }, { "epoch": 2.6483117034787815, "grad_norm": 0.6216981591972174, "learning_rate": 6.51257096512571e-06, "loss": 0.2613, "step": 90705 }, { "epoch": 2.6484576867490985, "grad_norm": 0.5760690104319618, "learning_rate": 6.509867531765343e-06, "loss": 0.234, "step": 90710 }, { "epoch": 2.6486036700194155, "grad_norm": 0.5258848203210947, "learning_rate": 6.507164098404975e-06, "loss": 0.2408, "step": 90715 }, { "epoch": 2.648749653289733, "grad_norm": 0.5750221560147706, "learning_rate": 6.5044606650446065e-06, "loss": 0.2351, "step": 90720 }, { "epoch": 2.6488956365600504, "grad_norm": 0.6095093794502617, "learning_rate": 6.50175723168424e-06, "loss": 0.2517, "step": 90725 }, { "epoch": 2.6490416198303675, "grad_norm": 0.6134598719790886, "learning_rate": 6.499053798323872e-06, "loss": 0.2487, "step": 90730 }, { "epoch": 2.6491876031006845, "grad_norm": 0.5699664592904337, "learning_rate": 6.4963503649635035e-06, "loss": 0.2511, "step": 90735 }, { "epoch": 2.649333586371002, "grad_norm": 0.627287607191896, "learning_rate": 6.493646931603137e-06, "loss": 0.2552, "step": 90740 }, { "epoch": 2.6494795696413194, "grad_norm": 0.5731611008937605, "learning_rate": 6.490943498242769e-06, "loss": 0.2384, "step": 90745 }, { "epoch": 2.6496255529116364, "grad_norm": 0.5515479855043748, "learning_rate": 6.4882400648824004e-06, "loss": 0.2275, "step": 90750 }, { "epoch": 2.6497715361819534, "grad_norm": 0.5699622782409843, "learning_rate": 6.485536631522034e-06, "loss": 0.2425, "step": 90755 }, { "epoch": 2.649917519452271, "grad_norm": 0.6191024312337986, "learning_rate": 6.482833198161666e-06, "loss": 0.2588, "step": 90760 }, { "epoch": 2.650063502722588, "grad_norm": 0.5788359553917639, "learning_rate": 6.480129764801297e-06, "loss": 0.2473, "step": 90765 }, { "epoch": 2.6502094859929053, "grad_norm": 0.5957155785781135, "learning_rate": 6.4774263314409306e-06, "loss": 0.234, "step": 90770 }, { "epoch": 2.6503554692632223, "grad_norm": 0.5761355603946632, "learning_rate": 6.474722898080562e-06, "loss": 0.2397, "step": 90775 }, { "epoch": 2.6505014525335397, "grad_norm": 0.5797973184324963, "learning_rate": 6.472019464720194e-06, "loss": 0.2365, "step": 90780 }, { "epoch": 2.6506474358038568, "grad_norm": 0.5508487709757199, "learning_rate": 6.4693160313598275e-06, "loss": 0.23, "step": 90785 }, { "epoch": 2.650793419074174, "grad_norm": 0.579987990235189, "learning_rate": 6.466612597999459e-06, "loss": 0.2329, "step": 90790 }, { "epoch": 2.650939402344491, "grad_norm": 0.5655892033673049, "learning_rate": 6.463909164639093e-06, "loss": 0.243, "step": 90795 }, { "epoch": 2.6510853856148087, "grad_norm": 0.627896142809355, "learning_rate": 6.4612057312787245e-06, "loss": 0.2436, "step": 90800 }, { "epoch": 2.6512313688851257, "grad_norm": 0.5780467501169636, "learning_rate": 6.458502297918356e-06, "loss": 0.2431, "step": 90805 }, { "epoch": 2.651377352155443, "grad_norm": 0.5744549960080406, "learning_rate": 6.455798864557989e-06, "loss": 0.2421, "step": 90810 }, { "epoch": 2.65152333542576, "grad_norm": 0.6247853633119429, "learning_rate": 6.453095431197621e-06, "loss": 0.2481, "step": 90815 }, { "epoch": 2.6516693186960776, "grad_norm": 0.5689621005816411, "learning_rate": 6.450391997837253e-06, "loss": 0.2311, "step": 90820 }, { "epoch": 2.6518153019663946, "grad_norm": 0.5966094211274545, "learning_rate": 6.447688564476886e-06, "loss": 0.2537, "step": 90825 }, { "epoch": 2.6519612852367116, "grad_norm": 0.5865624459741912, "learning_rate": 6.444985131116518e-06, "loss": 0.2435, "step": 90830 }, { "epoch": 2.652107268507029, "grad_norm": 0.6010791614227154, "learning_rate": 6.44228169775615e-06, "loss": 0.261, "step": 90835 }, { "epoch": 2.6522532517773465, "grad_norm": 0.6072526702790833, "learning_rate": 6.439578264395783e-06, "loss": 0.2572, "step": 90840 }, { "epoch": 2.6523992350476635, "grad_norm": 0.5551914584253597, "learning_rate": 6.436874831035415e-06, "loss": 0.2476, "step": 90845 }, { "epoch": 2.6525452183179805, "grad_norm": 0.5417389118078069, "learning_rate": 6.434171397675047e-06, "loss": 0.2288, "step": 90850 }, { "epoch": 2.652691201588298, "grad_norm": 0.5778451857226066, "learning_rate": 6.43146796431468e-06, "loss": 0.2487, "step": 90855 }, { "epoch": 2.6528371848586154, "grad_norm": 0.5764018248294411, "learning_rate": 6.428764530954312e-06, "loss": 0.2437, "step": 90860 }, { "epoch": 2.6529831681289324, "grad_norm": 0.6065163283141733, "learning_rate": 6.426061097593944e-06, "loss": 0.244, "step": 90865 }, { "epoch": 2.6531291513992494, "grad_norm": 0.6359413461615153, "learning_rate": 6.423357664233577e-06, "loss": 0.2431, "step": 90870 }, { "epoch": 2.653275134669567, "grad_norm": 0.5969044462673077, "learning_rate": 6.420654230873209e-06, "loss": 0.2521, "step": 90875 }, { "epoch": 2.6534211179398843, "grad_norm": 0.6485566116951008, "learning_rate": 6.417950797512842e-06, "loss": 0.2545, "step": 90880 }, { "epoch": 2.6535671012102013, "grad_norm": 0.536279289137434, "learning_rate": 6.415247364152474e-06, "loss": 0.2443, "step": 90885 }, { "epoch": 2.6537130844805183, "grad_norm": 0.6380301297259412, "learning_rate": 6.412543930792106e-06, "loss": 0.2465, "step": 90890 }, { "epoch": 2.653859067750836, "grad_norm": 0.616574205531583, "learning_rate": 6.409840497431739e-06, "loss": 0.2526, "step": 90895 }, { "epoch": 2.6540050510211532, "grad_norm": 0.5400341548235875, "learning_rate": 6.407137064071371e-06, "loss": 0.237, "step": 90900 }, { "epoch": 2.6541510342914703, "grad_norm": 0.6091743949439028, "learning_rate": 6.404433630711003e-06, "loss": 0.2471, "step": 90905 }, { "epoch": 2.6542970175617873, "grad_norm": 0.6126245851871691, "learning_rate": 6.401730197350636e-06, "loss": 0.2396, "step": 90910 }, { "epoch": 2.6544430008321047, "grad_norm": 0.5886313065517617, "learning_rate": 6.399026763990268e-06, "loss": 0.238, "step": 90915 }, { "epoch": 2.6545889841024217, "grad_norm": 0.5810793584510054, "learning_rate": 6.3963233306299e-06, "loss": 0.2344, "step": 90920 }, { "epoch": 2.654734967372739, "grad_norm": 0.5563902049525019, "learning_rate": 6.393619897269533e-06, "loss": 0.2362, "step": 90925 }, { "epoch": 2.654880950643056, "grad_norm": 0.5416828825504003, "learning_rate": 6.390916463909165e-06, "loss": 0.2283, "step": 90930 }, { "epoch": 2.6550269339133736, "grad_norm": 0.6003768488743992, "learning_rate": 6.388213030548797e-06, "loss": 0.2451, "step": 90935 }, { "epoch": 2.6551729171836906, "grad_norm": 0.6028094492314309, "learning_rate": 6.38550959718843e-06, "loss": 0.2475, "step": 90940 }, { "epoch": 2.655318900454008, "grad_norm": 0.6059835118224823, "learning_rate": 6.382806163828062e-06, "loss": 0.2592, "step": 90945 }, { "epoch": 2.655464883724325, "grad_norm": 0.5970763362311972, "learning_rate": 6.380102730467695e-06, "loss": 0.2345, "step": 90950 }, { "epoch": 2.6556108669946425, "grad_norm": 0.6152772249497683, "learning_rate": 6.377399297107327e-06, "loss": 0.2287, "step": 90955 }, { "epoch": 2.6557568502649596, "grad_norm": 0.5880962429755037, "learning_rate": 6.374695863746959e-06, "loss": 0.2512, "step": 90960 }, { "epoch": 2.655902833535277, "grad_norm": 0.5900855882604462, "learning_rate": 6.371992430386592e-06, "loss": 0.251, "step": 90965 }, { "epoch": 2.656048816805594, "grad_norm": 0.5763417571212409, "learning_rate": 6.369288997026224e-06, "loss": 0.2594, "step": 90970 }, { "epoch": 2.6561948000759115, "grad_norm": 0.5436232763463593, "learning_rate": 6.3665855636658556e-06, "loss": 0.244, "step": 90975 }, { "epoch": 2.6563407833462285, "grad_norm": 0.5925923194922826, "learning_rate": 6.363882130305489e-06, "loss": 0.2496, "step": 90980 }, { "epoch": 2.6564867666165455, "grad_norm": 0.5550559246503826, "learning_rate": 6.361178696945121e-06, "loss": 0.2343, "step": 90985 }, { "epoch": 2.656632749886863, "grad_norm": 0.6362457385663248, "learning_rate": 6.3584752635847525e-06, "loss": 0.2435, "step": 90990 }, { "epoch": 2.6567787331571804, "grad_norm": 0.5641435705551272, "learning_rate": 6.355771830224386e-06, "loss": 0.2391, "step": 90995 }, { "epoch": 2.6569247164274974, "grad_norm": 0.5985528600169006, "learning_rate": 6.353068396864017e-06, "loss": 0.2619, "step": 91000 }, { "epoch": 2.6570706996978144, "grad_norm": 0.5987527579670125, "learning_rate": 6.3503649635036495e-06, "loss": 0.2729, "step": 91005 }, { "epoch": 2.657216682968132, "grad_norm": 0.5665791457439626, "learning_rate": 6.347661530143283e-06, "loss": 0.2483, "step": 91010 }, { "epoch": 2.6573626662384493, "grad_norm": 0.5622363264807541, "learning_rate": 6.344958096782914e-06, "loss": 0.2368, "step": 91015 }, { "epoch": 2.6575086495087663, "grad_norm": 0.5370808547461416, "learning_rate": 6.342254663422546e-06, "loss": 0.2506, "step": 91020 }, { "epoch": 2.6576546327790833, "grad_norm": 0.5711878873464465, "learning_rate": 6.33955123006218e-06, "loss": 0.2561, "step": 91025 }, { "epoch": 2.6578006160494008, "grad_norm": 0.5703225971035571, "learning_rate": 6.336847796701811e-06, "loss": 0.2446, "step": 91030 }, { "epoch": 2.657946599319718, "grad_norm": 0.5800718027228703, "learning_rate": 6.334144363341444e-06, "loss": 0.2405, "step": 91035 }, { "epoch": 2.6580925825900352, "grad_norm": 0.5873456771736162, "learning_rate": 6.3314409299810765e-06, "loss": 0.2396, "step": 91040 }, { "epoch": 2.6582385658603522, "grad_norm": 0.5669206510642111, "learning_rate": 6.328737496620708e-06, "loss": 0.2396, "step": 91045 }, { "epoch": 2.6583845491306697, "grad_norm": 0.5790147513381032, "learning_rate": 6.326034063260341e-06, "loss": 0.2449, "step": 91050 }, { "epoch": 2.6585305324009867, "grad_norm": 0.5654511912778126, "learning_rate": 6.3233306298999735e-06, "loss": 0.2421, "step": 91055 }, { "epoch": 2.658676515671304, "grad_norm": 0.6036294307628796, "learning_rate": 6.320627196539605e-06, "loss": 0.2394, "step": 91060 }, { "epoch": 2.658822498941621, "grad_norm": 0.6057959290567742, "learning_rate": 6.317923763179238e-06, "loss": 0.2476, "step": 91065 }, { "epoch": 2.6589684822119386, "grad_norm": 0.6065356342964855, "learning_rate": 6.3152203298188704e-06, "loss": 0.2378, "step": 91070 }, { "epoch": 2.6591144654822556, "grad_norm": 0.6078903648484305, "learning_rate": 6.312516896458502e-06, "loss": 0.2399, "step": 91075 }, { "epoch": 2.659260448752573, "grad_norm": 0.5967557908892166, "learning_rate": 6.309813463098135e-06, "loss": 0.2656, "step": 91080 }, { "epoch": 2.65940643202289, "grad_norm": 0.6155466700786733, "learning_rate": 6.307110029737767e-06, "loss": 0.2522, "step": 91085 }, { "epoch": 2.6595524152932075, "grad_norm": 0.5820276348708459, "learning_rate": 6.304406596377399e-06, "loss": 0.2667, "step": 91090 }, { "epoch": 2.6596983985635245, "grad_norm": 0.5512861873727235, "learning_rate": 6.301703163017032e-06, "loss": 0.2271, "step": 91095 }, { "epoch": 2.659844381833842, "grad_norm": 0.5619890141486027, "learning_rate": 6.298999729656664e-06, "loss": 0.2462, "step": 91100 }, { "epoch": 2.659990365104159, "grad_norm": 0.5880221010375563, "learning_rate": 6.296296296296296e-06, "loss": 0.2442, "step": 91105 }, { "epoch": 2.6601363483744764, "grad_norm": 0.5631883087597275, "learning_rate": 6.293592862935929e-06, "loss": 0.2368, "step": 91110 }, { "epoch": 2.6602823316447934, "grad_norm": 0.5795979087964915, "learning_rate": 6.290889429575561e-06, "loss": 0.2343, "step": 91115 }, { "epoch": 2.660428314915111, "grad_norm": 0.5588586169285575, "learning_rate": 6.2881859962151945e-06, "loss": 0.227, "step": 91120 }, { "epoch": 2.660574298185428, "grad_norm": 0.5486925565595271, "learning_rate": 6.285482562854826e-06, "loss": 0.2464, "step": 91125 }, { "epoch": 2.6607202814557454, "grad_norm": 0.5961547627475067, "learning_rate": 6.282779129494458e-06, "loss": 0.2381, "step": 91130 }, { "epoch": 2.6608662647260624, "grad_norm": 0.5923745110401639, "learning_rate": 6.280075696134091e-06, "loss": 0.2598, "step": 91135 }, { "epoch": 2.6610122479963794, "grad_norm": 0.6223400742550116, "learning_rate": 6.277372262773723e-06, "loss": 0.2628, "step": 91140 }, { "epoch": 2.661158231266697, "grad_norm": 0.5906118613978508, "learning_rate": 6.274668829413355e-06, "loss": 0.2459, "step": 91145 }, { "epoch": 2.6613042145370143, "grad_norm": 0.5905006767059643, "learning_rate": 6.271965396052988e-06, "loss": 0.2418, "step": 91150 }, { "epoch": 2.6614501978073313, "grad_norm": 0.5983103884865497, "learning_rate": 6.26926196269262e-06, "loss": 0.2457, "step": 91155 }, { "epoch": 2.6615961810776483, "grad_norm": 0.5528240418252147, "learning_rate": 6.266558529332252e-06, "loss": 0.2339, "step": 91160 }, { "epoch": 2.6617421643479657, "grad_norm": 0.5708790796440419, "learning_rate": 6.263855095971885e-06, "loss": 0.2284, "step": 91165 }, { "epoch": 2.661888147618283, "grad_norm": 0.5482120826376008, "learning_rate": 6.261151662611517e-06, "loss": 0.2473, "step": 91170 }, { "epoch": 2.6620341308886, "grad_norm": 0.5909278716441421, "learning_rate": 6.258448229251149e-06, "loss": 0.257, "step": 91175 }, { "epoch": 2.662180114158917, "grad_norm": 0.6045844549224171, "learning_rate": 6.255744795890782e-06, "loss": 0.2397, "step": 91180 }, { "epoch": 2.6623260974292347, "grad_norm": 0.5798276058571693, "learning_rate": 6.253041362530414e-06, "loss": 0.2478, "step": 91185 }, { "epoch": 2.662472080699552, "grad_norm": 0.628104865866328, "learning_rate": 6.250337929170046e-06, "loss": 0.2433, "step": 91190 }, { "epoch": 2.662618063969869, "grad_norm": 0.5915248262246028, "learning_rate": 6.247634495809679e-06, "loss": 0.2442, "step": 91195 }, { "epoch": 2.662764047240186, "grad_norm": 0.6178212023382242, "learning_rate": 6.244931062449311e-06, "loss": 0.2455, "step": 91200 }, { "epoch": 2.6629100305105036, "grad_norm": 0.5851086757117054, "learning_rate": 6.242227629088943e-06, "loss": 0.2509, "step": 91205 }, { "epoch": 2.6630560137808206, "grad_norm": 0.5937236594378545, "learning_rate": 6.239524195728576e-06, "loss": 0.2453, "step": 91210 }, { "epoch": 2.663201997051138, "grad_norm": 0.5911861736390365, "learning_rate": 6.236820762368208e-06, "loss": 0.2441, "step": 91215 }, { "epoch": 2.663347980321455, "grad_norm": 0.5834697508724043, "learning_rate": 6.23411732900784e-06, "loss": 0.2578, "step": 91220 }, { "epoch": 2.6634939635917725, "grad_norm": 0.6092347831303335, "learning_rate": 6.231413895647472e-06, "loss": 0.2474, "step": 91225 }, { "epoch": 2.6636399468620895, "grad_norm": 0.5338547621131172, "learning_rate": 6.2287104622871054e-06, "loss": 0.2445, "step": 91230 }, { "epoch": 2.663785930132407, "grad_norm": 0.6298564573400061, "learning_rate": 6.226007028926737e-06, "loss": 0.2634, "step": 91235 }, { "epoch": 2.663931913402724, "grad_norm": 0.5703259691284728, "learning_rate": 6.223303595566369e-06, "loss": 0.2538, "step": 91240 }, { "epoch": 2.6640778966730414, "grad_norm": 0.5910432617195992, "learning_rate": 6.220600162206002e-06, "loss": 0.2353, "step": 91245 }, { "epoch": 2.6642238799433584, "grad_norm": 0.5823356709101448, "learning_rate": 6.217896728845634e-06, "loss": 0.2378, "step": 91250 }, { "epoch": 2.664369863213676, "grad_norm": 0.6240192506010614, "learning_rate": 6.215193295485266e-06, "loss": 0.2511, "step": 91255 }, { "epoch": 2.664515846483993, "grad_norm": 0.6062161574488293, "learning_rate": 6.212489862124899e-06, "loss": 0.2541, "step": 91260 }, { "epoch": 2.6646618297543103, "grad_norm": 0.5716215479390613, "learning_rate": 6.209786428764532e-06, "loss": 0.2536, "step": 91265 }, { "epoch": 2.6648078130246273, "grad_norm": 0.6373660743717048, "learning_rate": 6.207082995404163e-06, "loss": 0.2467, "step": 91270 }, { "epoch": 2.6649537962949443, "grad_norm": 0.5413109552928688, "learning_rate": 6.204379562043796e-06, "loss": 0.2495, "step": 91275 }, { "epoch": 2.665099779565262, "grad_norm": 0.583034658258688, "learning_rate": 6.201676128683429e-06, "loss": 0.2346, "step": 91280 }, { "epoch": 2.6652457628355792, "grad_norm": 0.6055442488294446, "learning_rate": 6.19897269532306e-06, "loss": 0.2419, "step": 91285 }, { "epoch": 2.6653917461058962, "grad_norm": 0.6172777441456595, "learning_rate": 6.196269261962693e-06, "loss": 0.2532, "step": 91290 }, { "epoch": 2.6655377293762132, "grad_norm": 0.5990608226016129, "learning_rate": 6.1935658286023256e-06, "loss": 0.2421, "step": 91295 }, { "epoch": 2.6656837126465307, "grad_norm": 0.5880677898615663, "learning_rate": 6.190862395241957e-06, "loss": 0.2261, "step": 91300 }, { "epoch": 2.665829695916848, "grad_norm": 0.5887947717307532, "learning_rate": 6.18815896188159e-06, "loss": 0.2273, "step": 91305 }, { "epoch": 2.665975679187165, "grad_norm": 0.6061712105053627, "learning_rate": 6.1854555285212225e-06, "loss": 0.2315, "step": 91310 }, { "epoch": 2.666121662457482, "grad_norm": 0.6022644652990129, "learning_rate": 6.182752095160855e-06, "loss": 0.2583, "step": 91315 }, { "epoch": 2.6662676457277996, "grad_norm": 0.6759530622190912, "learning_rate": 6.180048661800487e-06, "loss": 0.2359, "step": 91320 }, { "epoch": 2.666413628998117, "grad_norm": 0.60087946330701, "learning_rate": 6.1773452284401195e-06, "loss": 0.2318, "step": 91325 }, { "epoch": 2.666559612268434, "grad_norm": 0.5710663619404305, "learning_rate": 6.174641795079752e-06, "loss": 0.2357, "step": 91330 }, { "epoch": 2.666705595538751, "grad_norm": 0.5747617612721392, "learning_rate": 6.171938361719383e-06, "loss": 0.2355, "step": 91335 }, { "epoch": 2.6668515788090685, "grad_norm": 0.585551791635535, "learning_rate": 6.169234928359016e-06, "loss": 0.2506, "step": 91340 }, { "epoch": 2.6669975620793855, "grad_norm": 0.6418275631202172, "learning_rate": 6.166531494998649e-06, "loss": 0.2422, "step": 91345 }, { "epoch": 2.667143545349703, "grad_norm": 0.5692163575920023, "learning_rate": 6.163828061638281e-06, "loss": 0.2331, "step": 91350 }, { "epoch": 2.66728952862002, "grad_norm": 0.5937725167872491, "learning_rate": 6.161124628277913e-06, "loss": 0.2396, "step": 91355 }, { "epoch": 2.6674355118903375, "grad_norm": 0.5960204341305394, "learning_rate": 6.158421194917546e-06, "loss": 0.2487, "step": 91360 }, { "epoch": 2.6675814951606545, "grad_norm": 0.6009225174479681, "learning_rate": 6.155717761557178e-06, "loss": 0.2423, "step": 91365 }, { "epoch": 2.667727478430972, "grad_norm": 0.6034193083370774, "learning_rate": 6.15301432819681e-06, "loss": 0.2389, "step": 91370 }, { "epoch": 2.667873461701289, "grad_norm": 0.600901524335828, "learning_rate": 6.150310894836443e-06, "loss": 0.2408, "step": 91375 }, { "epoch": 2.6680194449716064, "grad_norm": 0.6174863396672212, "learning_rate": 6.147607461476075e-06, "loss": 0.2464, "step": 91380 }, { "epoch": 2.6681654282419234, "grad_norm": 0.5857430751731857, "learning_rate": 6.144904028115707e-06, "loss": 0.2404, "step": 91385 }, { "epoch": 2.668311411512241, "grad_norm": 0.592888019662816, "learning_rate": 6.14220059475534e-06, "loss": 0.2424, "step": 91390 }, { "epoch": 2.668457394782558, "grad_norm": 0.5901744132572472, "learning_rate": 6.139497161394972e-06, "loss": 0.2333, "step": 91395 }, { "epoch": 2.6686033780528753, "grad_norm": 0.5724988567228344, "learning_rate": 6.136793728034604e-06, "loss": 0.2394, "step": 91400 }, { "epoch": 2.6687493613231923, "grad_norm": 0.5960839523322559, "learning_rate": 6.1340902946742365e-06, "loss": 0.2439, "step": 91405 }, { "epoch": 2.6688953445935097, "grad_norm": 0.5683355138014803, "learning_rate": 6.131386861313869e-06, "loss": 0.2375, "step": 91410 }, { "epoch": 2.6690413278638268, "grad_norm": 0.5858758149124412, "learning_rate": 6.128683427953501e-06, "loss": 0.2443, "step": 91415 }, { "epoch": 2.669187311134144, "grad_norm": 0.6064284128992038, "learning_rate": 6.1259799945931335e-06, "loss": 0.2409, "step": 91420 }, { "epoch": 2.669333294404461, "grad_norm": 0.577216972203113, "learning_rate": 6.123276561232766e-06, "loss": 0.2303, "step": 91425 }, { "epoch": 2.669479277674778, "grad_norm": 0.6007631710425363, "learning_rate": 6.120573127872398e-06, "loss": 0.2402, "step": 91430 }, { "epoch": 2.6696252609450957, "grad_norm": 0.5817676165646257, "learning_rate": 6.117869694512031e-06, "loss": 0.233, "step": 91435 }, { "epoch": 2.669771244215413, "grad_norm": 0.5922417131841238, "learning_rate": 6.115166261151663e-06, "loss": 0.2459, "step": 91440 }, { "epoch": 2.66991722748573, "grad_norm": 0.5968941893542291, "learning_rate": 6.112462827791295e-06, "loss": 0.237, "step": 91445 }, { "epoch": 2.670063210756047, "grad_norm": 0.5974168678674499, "learning_rate": 6.109759394430928e-06, "loss": 0.2415, "step": 91450 }, { "epoch": 2.6702091940263646, "grad_norm": 0.609722449495671, "learning_rate": 6.10705596107056e-06, "loss": 0.2449, "step": 91455 }, { "epoch": 2.670355177296682, "grad_norm": 0.5734051311079926, "learning_rate": 6.104352527710192e-06, "loss": 0.245, "step": 91460 }, { "epoch": 2.670501160566999, "grad_norm": 0.5944003124948843, "learning_rate": 6.101649094349824e-06, "loss": 0.2496, "step": 91465 }, { "epoch": 2.670647143837316, "grad_norm": 0.6395628756451048, "learning_rate": 6.098945660989457e-06, "loss": 0.2369, "step": 91470 }, { "epoch": 2.6707931271076335, "grad_norm": 0.5775799228062724, "learning_rate": 6.096242227629089e-06, "loss": 0.2497, "step": 91475 }, { "epoch": 2.670939110377951, "grad_norm": 0.5810091693231061, "learning_rate": 6.093538794268721e-06, "loss": 0.2439, "step": 91480 }, { "epoch": 2.671085093648268, "grad_norm": 0.5590540025297569, "learning_rate": 6.0908353609083545e-06, "loss": 0.24, "step": 91485 }, { "epoch": 2.671231076918585, "grad_norm": 0.620956752381888, "learning_rate": 6.088131927547986e-06, "loss": 0.2578, "step": 91490 }, { "epoch": 2.6713770601889024, "grad_norm": 0.5534785524034613, "learning_rate": 6.085428494187618e-06, "loss": 0.2471, "step": 91495 }, { "epoch": 2.6715230434592194, "grad_norm": 0.6166471631347812, "learning_rate": 6.082725060827251e-06, "loss": 0.2362, "step": 91500 }, { "epoch": 2.671669026729537, "grad_norm": 0.5506484604513883, "learning_rate": 6.080021627466883e-06, "loss": 0.2304, "step": 91505 }, { "epoch": 2.671815009999854, "grad_norm": 0.5889741418448113, "learning_rate": 6.077318194106515e-06, "loss": 0.2366, "step": 91510 }, { "epoch": 2.6719609932701713, "grad_norm": 0.547769937845194, "learning_rate": 6.074614760746148e-06, "loss": 0.2507, "step": 91515 }, { "epoch": 2.6721069765404883, "grad_norm": 0.5878708778473801, "learning_rate": 6.071911327385781e-06, "loss": 0.2645, "step": 91520 }, { "epoch": 2.672252959810806, "grad_norm": 0.5623665009351475, "learning_rate": 6.069207894025412e-06, "loss": 0.2476, "step": 91525 }, { "epoch": 2.672398943081123, "grad_norm": 0.6300042595676034, "learning_rate": 6.066504460665045e-06, "loss": 0.2602, "step": 91530 }, { "epoch": 2.6725449263514403, "grad_norm": 0.5655494578575532, "learning_rate": 6.063801027304678e-06, "loss": 0.2456, "step": 91535 }, { "epoch": 2.6726909096217573, "grad_norm": 0.6136699698175434, "learning_rate": 6.061097593944309e-06, "loss": 0.2461, "step": 91540 }, { "epoch": 2.6728368928920747, "grad_norm": 0.5926230910556255, "learning_rate": 6.058394160583942e-06, "loss": 0.2499, "step": 91545 }, { "epoch": 2.6729828761623917, "grad_norm": 0.565977871712363, "learning_rate": 6.055690727223575e-06, "loss": 0.2473, "step": 91550 }, { "epoch": 2.673128859432709, "grad_norm": 0.5956232523797, "learning_rate": 6.052987293863206e-06, "loss": 0.2437, "step": 91555 }, { "epoch": 2.673274842703026, "grad_norm": 0.5943187721798797, "learning_rate": 6.050283860502838e-06, "loss": 0.2497, "step": 91560 }, { "epoch": 2.673420825973343, "grad_norm": 0.5632670342296582, "learning_rate": 6.0475804271424715e-06, "loss": 0.261, "step": 91565 }, { "epoch": 2.6735668092436606, "grad_norm": 0.5759135168697169, "learning_rate": 6.044876993782104e-06, "loss": 0.2513, "step": 91570 }, { "epoch": 2.673712792513978, "grad_norm": 0.5678930095014858, "learning_rate": 6.042173560421735e-06, "loss": 0.2286, "step": 91575 }, { "epoch": 2.673858775784295, "grad_norm": 0.5659551935536117, "learning_rate": 6.0394701270613685e-06, "loss": 0.2474, "step": 91580 }, { "epoch": 2.674004759054612, "grad_norm": 0.5974710545015666, "learning_rate": 6.036766693701001e-06, "loss": 0.2336, "step": 91585 }, { "epoch": 2.6741507423249296, "grad_norm": 0.5792764814497952, "learning_rate": 6.034063260340632e-06, "loss": 0.2358, "step": 91590 }, { "epoch": 2.674296725595247, "grad_norm": 0.6180709013810035, "learning_rate": 6.0313598269802654e-06, "loss": 0.2473, "step": 91595 }, { "epoch": 2.674442708865564, "grad_norm": 0.5734481504783636, "learning_rate": 6.028656393619898e-06, "loss": 0.2418, "step": 91600 }, { "epoch": 2.674588692135881, "grad_norm": 0.6128833047011016, "learning_rate": 6.02595296025953e-06, "loss": 0.256, "step": 91605 }, { "epoch": 2.6747346754061985, "grad_norm": 0.5773263533663868, "learning_rate": 6.023249526899162e-06, "loss": 0.2515, "step": 91610 }, { "epoch": 2.674880658676516, "grad_norm": 0.5883512321281315, "learning_rate": 6.020546093538795e-06, "loss": 0.2413, "step": 91615 }, { "epoch": 2.675026641946833, "grad_norm": 0.5473224747268529, "learning_rate": 6.017842660178427e-06, "loss": 0.2438, "step": 91620 }, { "epoch": 2.67517262521715, "grad_norm": 0.6102235400831126, "learning_rate": 6.015139226818059e-06, "loss": 0.2489, "step": 91625 }, { "epoch": 2.6753186084874674, "grad_norm": 0.5976353033741094, "learning_rate": 6.012435793457692e-06, "loss": 0.2448, "step": 91630 }, { "epoch": 2.6754645917577844, "grad_norm": 0.5539845625006274, "learning_rate": 6.009732360097324e-06, "loss": 0.223, "step": 91635 }, { "epoch": 2.675610575028102, "grad_norm": 0.5685415422739267, "learning_rate": 6.007028926736956e-06, "loss": 0.2406, "step": 91640 }, { "epoch": 2.675756558298419, "grad_norm": 0.5561667348895598, "learning_rate": 6.004325493376589e-06, "loss": 0.2353, "step": 91645 }, { "epoch": 2.6759025415687363, "grad_norm": 0.6218522163670237, "learning_rate": 6.001622060016221e-06, "loss": 0.2574, "step": 91650 }, { "epoch": 2.6760485248390533, "grad_norm": 0.5952612465548276, "learning_rate": 5.998918626655853e-06, "loss": 0.2394, "step": 91655 }, { "epoch": 2.6761945081093708, "grad_norm": 0.6417501900799732, "learning_rate": 5.9962151932954856e-06, "loss": 0.2467, "step": 91660 }, { "epoch": 2.6763404913796878, "grad_norm": 0.6345594242283974, "learning_rate": 5.993511759935118e-06, "loss": 0.2359, "step": 91665 }, { "epoch": 2.676486474650005, "grad_norm": 0.584914630712271, "learning_rate": 5.99080832657475e-06, "loss": 0.2435, "step": 91670 }, { "epoch": 2.6766324579203222, "grad_norm": 0.5818862355736647, "learning_rate": 5.9881048932143825e-06, "loss": 0.2358, "step": 91675 }, { "epoch": 2.6767784411906397, "grad_norm": 0.6025241962590321, "learning_rate": 5.985401459854015e-06, "loss": 0.2466, "step": 91680 }, { "epoch": 2.6769244244609567, "grad_norm": 0.5716017457715458, "learning_rate": 5.982698026493647e-06, "loss": 0.2291, "step": 91685 }, { "epoch": 2.677070407731274, "grad_norm": 0.6271792910991865, "learning_rate": 5.9799945931332795e-06, "loss": 0.2463, "step": 91690 }, { "epoch": 2.677216391001591, "grad_norm": 0.5991799405969384, "learning_rate": 5.977291159772912e-06, "loss": 0.2484, "step": 91695 }, { "epoch": 2.6773623742719086, "grad_norm": 0.6232158172367301, "learning_rate": 5.974587726412544e-06, "loss": 0.2566, "step": 91700 }, { "epoch": 2.6775083575422256, "grad_norm": 0.6072055479615387, "learning_rate": 5.971884293052176e-06, "loss": 0.2557, "step": 91705 }, { "epoch": 2.677654340812543, "grad_norm": 0.5822946673512367, "learning_rate": 5.969180859691809e-06, "loss": 0.2545, "step": 91710 }, { "epoch": 2.67780032408286, "grad_norm": 0.6165174119458671, "learning_rate": 5.966477426331441e-06, "loss": 0.237, "step": 91715 }, { "epoch": 2.677946307353177, "grad_norm": 0.5832960638337531, "learning_rate": 5.963773992971073e-06, "loss": 0.236, "step": 91720 }, { "epoch": 2.6780922906234945, "grad_norm": 0.6044020820842204, "learning_rate": 5.961070559610706e-06, "loss": 0.248, "step": 91725 }, { "epoch": 2.678238273893812, "grad_norm": 0.5742079310057921, "learning_rate": 5.958367126250338e-06, "loss": 0.2415, "step": 91730 }, { "epoch": 2.678384257164129, "grad_norm": 0.5835965484133129, "learning_rate": 5.95566369288997e-06, "loss": 0.2209, "step": 91735 }, { "epoch": 2.678530240434446, "grad_norm": 0.5573975080861928, "learning_rate": 5.9529602595296035e-06, "loss": 0.2409, "step": 91740 }, { "epoch": 2.6786762237047634, "grad_norm": 0.5080113506377341, "learning_rate": 5.950256826169235e-06, "loss": 0.2378, "step": 91745 }, { "epoch": 2.678822206975081, "grad_norm": 0.5976921063945871, "learning_rate": 5.947553392808867e-06, "loss": 0.2484, "step": 91750 }, { "epoch": 2.678968190245398, "grad_norm": 0.5889754422465557, "learning_rate": 5.9448499594485004e-06, "loss": 0.2534, "step": 91755 }, { "epoch": 2.679114173515715, "grad_norm": 0.6425113357431381, "learning_rate": 5.942146526088132e-06, "loss": 0.2595, "step": 91760 }, { "epoch": 2.6792601567860324, "grad_norm": 0.5905722501598784, "learning_rate": 5.939443092727764e-06, "loss": 0.2546, "step": 91765 }, { "epoch": 2.67940614005635, "grad_norm": 0.5949126534871851, "learning_rate": 5.936739659367397e-06, "loss": 0.2486, "step": 91770 }, { "epoch": 2.679552123326667, "grad_norm": 0.58070352775107, "learning_rate": 5.93403622600703e-06, "loss": 0.2428, "step": 91775 }, { "epoch": 2.679698106596984, "grad_norm": 0.6180539711674256, "learning_rate": 5.931332792646661e-06, "loss": 0.2446, "step": 91780 }, { "epoch": 2.6798440898673013, "grad_norm": 0.5887368281685798, "learning_rate": 5.9286293592862935e-06, "loss": 0.2464, "step": 91785 }, { "epoch": 2.6799900731376183, "grad_norm": 0.5948812282599724, "learning_rate": 5.925925925925927e-06, "loss": 0.2389, "step": 91790 }, { "epoch": 2.6801360564079357, "grad_norm": 0.5952503464034186, "learning_rate": 5.923222492565558e-06, "loss": 0.2373, "step": 91795 }, { "epoch": 2.6802820396782527, "grad_norm": 0.5781367856723133, "learning_rate": 5.9205190592051904e-06, "loss": 0.2453, "step": 91800 }, { "epoch": 2.68042802294857, "grad_norm": 0.5579399223727763, "learning_rate": 5.917815625844824e-06, "loss": 0.2413, "step": 91805 }, { "epoch": 2.680574006218887, "grad_norm": 0.5370700553404139, "learning_rate": 5.915112192484455e-06, "loss": 0.2283, "step": 91810 }, { "epoch": 2.6807199894892046, "grad_norm": 0.5256594249881419, "learning_rate": 5.912408759124087e-06, "loss": 0.2378, "step": 91815 }, { "epoch": 2.6808659727595217, "grad_norm": 0.5895015587265792, "learning_rate": 5.9097053257637206e-06, "loss": 0.2443, "step": 91820 }, { "epoch": 2.681011956029839, "grad_norm": 0.6288489832785981, "learning_rate": 5.907001892403353e-06, "loss": 0.2519, "step": 91825 }, { "epoch": 2.681157939300156, "grad_norm": 0.6353960193970973, "learning_rate": 5.904298459042984e-06, "loss": 0.2371, "step": 91830 }, { "epoch": 2.6813039225704736, "grad_norm": 0.565145675896656, "learning_rate": 5.9015950256826175e-06, "loss": 0.2428, "step": 91835 }, { "epoch": 2.6814499058407906, "grad_norm": 0.581146118152085, "learning_rate": 5.89889159232225e-06, "loss": 0.2362, "step": 91840 }, { "epoch": 2.681595889111108, "grad_norm": 0.5702401038508503, "learning_rate": 5.896188158961881e-06, "loss": 0.2432, "step": 91845 }, { "epoch": 2.681741872381425, "grad_norm": 0.5852881670297545, "learning_rate": 5.8934847256015145e-06, "loss": 0.2469, "step": 91850 }, { "epoch": 2.681887855651742, "grad_norm": 0.5924128252786156, "learning_rate": 5.890781292241147e-06, "loss": 0.2414, "step": 91855 }, { "epoch": 2.6820338389220595, "grad_norm": 0.6067730958972538, "learning_rate": 5.888077858880779e-06, "loss": 0.2432, "step": 91860 }, { "epoch": 2.682179822192377, "grad_norm": 0.5650061749647881, "learning_rate": 5.885374425520411e-06, "loss": 0.253, "step": 91865 }, { "epoch": 2.682325805462694, "grad_norm": 0.58755919114246, "learning_rate": 5.882670992160044e-06, "loss": 0.2558, "step": 91870 }, { "epoch": 2.682471788733011, "grad_norm": 0.5770763435725689, "learning_rate": 5.879967558799676e-06, "loss": 0.2375, "step": 91875 }, { "epoch": 2.6826177720033284, "grad_norm": 0.6706338166477849, "learning_rate": 5.8772641254393075e-06, "loss": 0.261, "step": 91880 }, { "epoch": 2.682763755273646, "grad_norm": 0.5911825825928704, "learning_rate": 5.874560692078941e-06, "loss": 0.2362, "step": 91885 }, { "epoch": 2.682909738543963, "grad_norm": 0.601100104804676, "learning_rate": 5.871857258718573e-06, "loss": 0.2278, "step": 91890 }, { "epoch": 2.68305572181428, "grad_norm": 0.5721188477918067, "learning_rate": 5.869153825358205e-06, "loss": 0.2565, "step": 91895 }, { "epoch": 2.6832017050845973, "grad_norm": 0.5760374978975966, "learning_rate": 5.866450391997838e-06, "loss": 0.2357, "step": 91900 }, { "epoch": 2.6833476883549148, "grad_norm": 0.601941704496472, "learning_rate": 5.86374695863747e-06, "loss": 0.24, "step": 91905 }, { "epoch": 2.683493671625232, "grad_norm": 0.6083413067553316, "learning_rate": 5.861043525277102e-06, "loss": 0.2476, "step": 91910 }, { "epoch": 2.683639654895549, "grad_norm": 0.58589532544315, "learning_rate": 5.858340091916735e-06, "loss": 0.2412, "step": 91915 }, { "epoch": 2.6837856381658662, "grad_norm": 0.5689661399970392, "learning_rate": 5.855636658556367e-06, "loss": 0.2467, "step": 91920 }, { "epoch": 2.6839316214361837, "grad_norm": 0.5932761296036481, "learning_rate": 5.852933225195999e-06, "loss": 0.2306, "step": 91925 }, { "epoch": 2.6840776047065007, "grad_norm": 0.6305506489804841, "learning_rate": 5.8502297918356315e-06, "loss": 0.2525, "step": 91930 }, { "epoch": 2.6842235879768177, "grad_norm": 0.5662698553248234, "learning_rate": 5.847526358475264e-06, "loss": 0.2313, "step": 91935 }, { "epoch": 2.684369571247135, "grad_norm": 0.5481154783652717, "learning_rate": 5.844822925114896e-06, "loss": 0.2392, "step": 91940 }, { "epoch": 2.684515554517452, "grad_norm": 0.6425907970637031, "learning_rate": 5.8421194917545285e-06, "loss": 0.2502, "step": 91945 }, { "epoch": 2.6846615377877696, "grad_norm": 0.6404420390354864, "learning_rate": 5.839416058394161e-06, "loss": 0.2442, "step": 91950 }, { "epoch": 2.6848075210580866, "grad_norm": 0.584381914858788, "learning_rate": 5.836712625033793e-06, "loss": 0.2492, "step": 91955 }, { "epoch": 2.684953504328404, "grad_norm": 0.5615842473033249, "learning_rate": 5.8340091916734254e-06, "loss": 0.2406, "step": 91960 }, { "epoch": 2.685099487598721, "grad_norm": 0.5781119854932348, "learning_rate": 5.831305758313058e-06, "loss": 0.239, "step": 91965 }, { "epoch": 2.6852454708690385, "grad_norm": 0.6327412969370485, "learning_rate": 5.82860232495269e-06, "loss": 0.2514, "step": 91970 }, { "epoch": 2.6853914541393555, "grad_norm": 0.6475581207275443, "learning_rate": 5.825898891592322e-06, "loss": 0.2386, "step": 91975 }, { "epoch": 2.685537437409673, "grad_norm": 0.6060543218574773, "learning_rate": 5.8231954582319556e-06, "loss": 0.2339, "step": 91980 }, { "epoch": 2.68568342067999, "grad_norm": 0.5826090774953185, "learning_rate": 5.820492024871587e-06, "loss": 0.25, "step": 91985 }, { "epoch": 2.6858294039503074, "grad_norm": 0.6064909208491556, "learning_rate": 5.817788591511219e-06, "loss": 0.2434, "step": 91990 }, { "epoch": 2.6859753872206245, "grad_norm": 0.5750458282310731, "learning_rate": 5.8150851581508525e-06, "loss": 0.2428, "step": 91995 }, { "epoch": 2.686121370490942, "grad_norm": 0.6322854198923367, "learning_rate": 5.812381724790484e-06, "loss": 0.2588, "step": 92000 }, { "epoch": 2.686267353761259, "grad_norm": 0.5935356311497406, "learning_rate": 5.809678291430116e-06, "loss": 0.2434, "step": 92005 }, { "epoch": 2.686413337031576, "grad_norm": 0.5806807356117604, "learning_rate": 5.806974858069749e-06, "loss": 0.2441, "step": 92010 }, { "epoch": 2.6865593203018934, "grad_norm": 0.6140529263646232, "learning_rate": 5.804271424709381e-06, "loss": 0.2503, "step": 92015 }, { "epoch": 2.686705303572211, "grad_norm": 0.5937774193384252, "learning_rate": 5.801567991349013e-06, "loss": 0.2436, "step": 92020 }, { "epoch": 2.686851286842528, "grad_norm": 0.52576908389824, "learning_rate": 5.7988645579886456e-06, "loss": 0.2259, "step": 92025 }, { "epoch": 2.686997270112845, "grad_norm": 0.5286279371451023, "learning_rate": 5.796161124628279e-06, "loss": 0.2224, "step": 92030 }, { "epoch": 2.6871432533831623, "grad_norm": 0.5702479224936383, "learning_rate": 5.79345769126791e-06, "loss": 0.256, "step": 92035 }, { "epoch": 2.6872892366534797, "grad_norm": 0.6108908513371495, "learning_rate": 5.7907542579075425e-06, "loss": 0.2518, "step": 92040 }, { "epoch": 2.6874352199237967, "grad_norm": 0.5684138257013547, "learning_rate": 5.788050824547176e-06, "loss": 0.2464, "step": 92045 }, { "epoch": 2.6875812031941138, "grad_norm": 0.5439905220359498, "learning_rate": 5.785347391186807e-06, "loss": 0.2509, "step": 92050 }, { "epoch": 2.687727186464431, "grad_norm": 0.5957700824741479, "learning_rate": 5.7826439578264395e-06, "loss": 0.2645, "step": 92055 }, { "epoch": 2.6878731697347487, "grad_norm": 0.5842381224274075, "learning_rate": 5.779940524466073e-06, "loss": 0.2391, "step": 92060 }, { "epoch": 2.6880191530050657, "grad_norm": 0.5958212975192952, "learning_rate": 5.777237091105705e-06, "loss": 0.2407, "step": 92065 }, { "epoch": 2.6881651362753827, "grad_norm": 0.6204907607064195, "learning_rate": 5.7745336577453364e-06, "loss": 0.2417, "step": 92070 }, { "epoch": 2.6883111195457, "grad_norm": 0.5574605900807248, "learning_rate": 5.77183022438497e-06, "loss": 0.2299, "step": 92075 }, { "epoch": 2.688457102816017, "grad_norm": 0.579377640562979, "learning_rate": 5.769126791024602e-06, "loss": 0.2345, "step": 92080 }, { "epoch": 2.6886030860863346, "grad_norm": 0.5667665503361097, "learning_rate": 5.766423357664233e-06, "loss": 0.2278, "step": 92085 }, { "epoch": 2.6887490693566516, "grad_norm": 0.6517709299649149, "learning_rate": 5.7637199243038665e-06, "loss": 0.2407, "step": 92090 }, { "epoch": 2.688895052626969, "grad_norm": 0.5973248777804367, "learning_rate": 5.761016490943499e-06, "loss": 0.2487, "step": 92095 }, { "epoch": 2.689041035897286, "grad_norm": 0.6031663569773047, "learning_rate": 5.75831305758313e-06, "loss": 0.2477, "step": 92100 }, { "epoch": 2.6891870191676035, "grad_norm": 0.5488221210894311, "learning_rate": 5.755609624222763e-06, "loss": 0.2433, "step": 92105 }, { "epoch": 2.6893330024379205, "grad_norm": 0.547060329437372, "learning_rate": 5.752906190862396e-06, "loss": 0.2372, "step": 92110 }, { "epoch": 2.689478985708238, "grad_norm": 0.611621478922858, "learning_rate": 5.750202757502028e-06, "loss": 0.2439, "step": 92115 }, { "epoch": 2.689624968978555, "grad_norm": 0.5803544201047036, "learning_rate": 5.74749932414166e-06, "loss": 0.2446, "step": 92120 }, { "epoch": 2.6897709522488724, "grad_norm": 0.601081171413135, "learning_rate": 5.744795890781293e-06, "loss": 0.2388, "step": 92125 }, { "epoch": 2.6899169355191894, "grad_norm": 0.598797327271165, "learning_rate": 5.742092457420925e-06, "loss": 0.2562, "step": 92130 }, { "epoch": 2.690062918789507, "grad_norm": 0.5797843126442795, "learning_rate": 5.7393890240605566e-06, "loss": 0.2527, "step": 92135 }, { "epoch": 2.690208902059824, "grad_norm": 0.6441676047708802, "learning_rate": 5.73668559070019e-06, "loss": 0.2522, "step": 92140 }, { "epoch": 2.690354885330141, "grad_norm": 0.6158162597514747, "learning_rate": 5.733982157339822e-06, "loss": 0.2211, "step": 92145 }, { "epoch": 2.6905008686004583, "grad_norm": 0.6098254558632858, "learning_rate": 5.731278723979454e-06, "loss": 0.2518, "step": 92150 }, { "epoch": 2.690646851870776, "grad_norm": 0.5942050137303171, "learning_rate": 5.728575290619087e-06, "loss": 0.2441, "step": 92155 }, { "epoch": 2.690792835141093, "grad_norm": 0.5956555751648126, "learning_rate": 5.725871857258719e-06, "loss": 0.2422, "step": 92160 }, { "epoch": 2.69093881841141, "grad_norm": 0.6061465087197272, "learning_rate": 5.723168423898351e-06, "loss": 0.2514, "step": 92165 }, { "epoch": 2.6910848016817273, "grad_norm": 0.6036417217283098, "learning_rate": 5.720464990537984e-06, "loss": 0.2366, "step": 92170 }, { "epoch": 2.6912307849520447, "grad_norm": 0.5673101933801219, "learning_rate": 5.717761557177616e-06, "loss": 0.2378, "step": 92175 }, { "epoch": 2.6913767682223617, "grad_norm": 0.6031303429700647, "learning_rate": 5.715058123817248e-06, "loss": 0.2302, "step": 92180 }, { "epoch": 2.6915227514926787, "grad_norm": 0.5512806026501555, "learning_rate": 5.7123546904568806e-06, "loss": 0.2429, "step": 92185 }, { "epoch": 2.691668734762996, "grad_norm": 0.5766235363486196, "learning_rate": 5.709651257096513e-06, "loss": 0.2388, "step": 92190 }, { "epoch": 2.6918147180333136, "grad_norm": 0.5540344813048855, "learning_rate": 5.706947823736145e-06, "loss": 0.2374, "step": 92195 }, { "epoch": 2.6919607013036306, "grad_norm": 0.5721443053253801, "learning_rate": 5.7042443903757775e-06, "loss": 0.2279, "step": 92200 }, { "epoch": 2.6921066845739476, "grad_norm": 0.5933595211014715, "learning_rate": 5.70154095701541e-06, "loss": 0.2476, "step": 92205 }, { "epoch": 2.692252667844265, "grad_norm": 0.6082149720925959, "learning_rate": 5.698837523655042e-06, "loss": 0.246, "step": 92210 }, { "epoch": 2.6923986511145825, "grad_norm": 0.5936556312583654, "learning_rate": 5.6961340902946745e-06, "loss": 0.2453, "step": 92215 }, { "epoch": 2.6925446343848995, "grad_norm": 0.5914795640279816, "learning_rate": 5.693430656934307e-06, "loss": 0.2384, "step": 92220 }, { "epoch": 2.6926906176552166, "grad_norm": 0.6178651333972897, "learning_rate": 5.690727223573939e-06, "loss": 0.2745, "step": 92225 }, { "epoch": 2.692836600925534, "grad_norm": 0.5759624504180664, "learning_rate": 5.6880237902135714e-06, "loss": 0.228, "step": 92230 }, { "epoch": 2.692982584195851, "grad_norm": 0.5936519912542142, "learning_rate": 5.685320356853204e-06, "loss": 0.2471, "step": 92235 }, { "epoch": 2.6931285674661685, "grad_norm": 0.5746472140878442, "learning_rate": 5.682616923492836e-06, "loss": 0.2443, "step": 92240 }, { "epoch": 2.6932745507364855, "grad_norm": 0.6160280012772535, "learning_rate": 5.679913490132468e-06, "loss": 0.2605, "step": 92245 }, { "epoch": 2.693420534006803, "grad_norm": 0.5727836479531644, "learning_rate": 5.677210056772101e-06, "loss": 0.2388, "step": 92250 }, { "epoch": 2.69356651727712, "grad_norm": 0.5412965669248346, "learning_rate": 5.674506623411733e-06, "loss": 0.2528, "step": 92255 }, { "epoch": 2.6937125005474374, "grad_norm": 0.5982549676836237, "learning_rate": 5.671803190051365e-06, "loss": 0.2645, "step": 92260 }, { "epoch": 2.6938584838177544, "grad_norm": 0.5630384813229448, "learning_rate": 5.669099756690998e-06, "loss": 0.236, "step": 92265 }, { "epoch": 2.694004467088072, "grad_norm": 0.5435626496857487, "learning_rate": 5.66639632333063e-06, "loss": 0.2403, "step": 92270 }, { "epoch": 2.694150450358389, "grad_norm": 0.609386214268874, "learning_rate": 5.663692889970262e-06, "loss": 0.2481, "step": 92275 }, { "epoch": 2.6942964336287063, "grad_norm": 0.582028757137529, "learning_rate": 5.660989456609895e-06, "loss": 0.2446, "step": 92280 }, { "epoch": 2.6944424168990233, "grad_norm": 0.586893792317451, "learning_rate": 5.658286023249528e-06, "loss": 0.229, "step": 92285 }, { "epoch": 2.6945884001693408, "grad_norm": 0.6120559004955474, "learning_rate": 5.655582589889159e-06, "loss": 0.2391, "step": 92290 }, { "epoch": 2.6947343834396578, "grad_norm": 0.6225452793995571, "learning_rate": 5.6528791565287916e-06, "loss": 0.2472, "step": 92295 }, { "epoch": 2.6948803667099748, "grad_norm": 0.5677403640593898, "learning_rate": 5.650175723168425e-06, "loss": 0.2586, "step": 92300 }, { "epoch": 2.6950263499802922, "grad_norm": 0.5311705199673703, "learning_rate": 5.647472289808056e-06, "loss": 0.2432, "step": 92305 }, { "epoch": 2.6951723332506097, "grad_norm": 0.5659347638401933, "learning_rate": 5.6447688564476885e-06, "loss": 0.2366, "step": 92310 }, { "epoch": 2.6953183165209267, "grad_norm": 0.5754249212544075, "learning_rate": 5.642065423087322e-06, "loss": 0.2359, "step": 92315 }, { "epoch": 2.6954642997912437, "grad_norm": 0.5806725786411434, "learning_rate": 5.639361989726954e-06, "loss": 0.2385, "step": 92320 }, { "epoch": 2.695610283061561, "grad_norm": 0.6124670260318369, "learning_rate": 5.6366585563665855e-06, "loss": 0.2435, "step": 92325 }, { "epoch": 2.6957562663318786, "grad_norm": 0.6258312296326556, "learning_rate": 5.633955123006218e-06, "loss": 0.237, "step": 92330 }, { "epoch": 2.6959022496021956, "grad_norm": 0.5698975073574016, "learning_rate": 5.631251689645851e-06, "loss": 0.2464, "step": 92335 }, { "epoch": 2.6960482328725126, "grad_norm": 0.5995570595639835, "learning_rate": 5.628548256285482e-06, "loss": 0.2478, "step": 92340 }, { "epoch": 2.69619421614283, "grad_norm": 0.5926924311525125, "learning_rate": 5.625844822925115e-06, "loss": 0.2439, "step": 92345 }, { "epoch": 2.6963401994131475, "grad_norm": 0.5610235989812794, "learning_rate": 5.623141389564748e-06, "loss": 0.2334, "step": 92350 }, { "epoch": 2.6964861826834645, "grad_norm": 0.5891761274813438, "learning_rate": 5.620437956204379e-06, "loss": 0.2382, "step": 92355 }, { "epoch": 2.6966321659537815, "grad_norm": 0.5737820152313868, "learning_rate": 5.617734522844012e-06, "loss": 0.2464, "step": 92360 }, { "epoch": 2.696778149224099, "grad_norm": 0.5781104931698281, "learning_rate": 5.615031089483645e-06, "loss": 0.2362, "step": 92365 }, { "epoch": 2.696924132494416, "grad_norm": 0.5467896728832253, "learning_rate": 5.612327656123277e-06, "loss": 0.2348, "step": 92370 }, { "epoch": 2.6970701157647334, "grad_norm": 0.5958647651445051, "learning_rate": 5.609624222762909e-06, "loss": 0.2482, "step": 92375 }, { "epoch": 2.6972160990350504, "grad_norm": 0.5639615895756008, "learning_rate": 5.606920789402542e-06, "loss": 0.2512, "step": 92380 }, { "epoch": 2.697362082305368, "grad_norm": 0.5904073650479862, "learning_rate": 5.604217356042174e-06, "loss": 0.2455, "step": 92385 }, { "epoch": 2.697508065575685, "grad_norm": 0.5967743403777165, "learning_rate": 5.601513922681806e-06, "loss": 0.2332, "step": 92390 }, { "epoch": 2.6976540488460024, "grad_norm": 0.6236531799476583, "learning_rate": 5.598810489321439e-06, "loss": 0.2563, "step": 92395 }, { "epoch": 2.6978000321163194, "grad_norm": 0.604428478487486, "learning_rate": 5.596107055961071e-06, "loss": 0.2506, "step": 92400 }, { "epoch": 2.697946015386637, "grad_norm": 0.6010992459581089, "learning_rate": 5.593403622600703e-06, "loss": 0.2608, "step": 92405 }, { "epoch": 2.698091998656954, "grad_norm": 0.6237163058025065, "learning_rate": 5.590700189240336e-06, "loss": 0.2573, "step": 92410 }, { "epoch": 2.6982379819272713, "grad_norm": 0.5846690821051125, "learning_rate": 5.587996755879968e-06, "loss": 0.2429, "step": 92415 }, { "epoch": 2.6983839651975883, "grad_norm": 0.5772443207808197, "learning_rate": 5.5852933225196e-06, "loss": 0.2437, "step": 92420 }, { "epoch": 2.6985299484679057, "grad_norm": 0.5581648422523795, "learning_rate": 5.582589889159233e-06, "loss": 0.235, "step": 92425 }, { "epoch": 2.6986759317382227, "grad_norm": 0.566623929860141, "learning_rate": 5.579886455798865e-06, "loss": 0.2355, "step": 92430 }, { "epoch": 2.69882191500854, "grad_norm": 0.5563163646817502, "learning_rate": 5.577183022438497e-06, "loss": 0.2409, "step": 92435 }, { "epoch": 2.698967898278857, "grad_norm": 0.5865245863668045, "learning_rate": 5.574479589078129e-06, "loss": 0.243, "step": 92440 }, { "epoch": 2.6991138815491746, "grad_norm": 0.5988166785278972, "learning_rate": 5.571776155717762e-06, "loss": 0.2544, "step": 92445 }, { "epoch": 2.6992598648194916, "grad_norm": 0.5981701977664468, "learning_rate": 5.569072722357394e-06, "loss": 0.2226, "step": 92450 }, { "epoch": 2.6994058480898087, "grad_norm": 0.5887409225452128, "learning_rate": 5.5663692889970266e-06, "loss": 0.2495, "step": 92455 }, { "epoch": 2.699551831360126, "grad_norm": 0.5925495256898528, "learning_rate": 5.563665855636659e-06, "loss": 0.2584, "step": 92460 }, { "epoch": 2.6996978146304436, "grad_norm": 0.5979728358363057, "learning_rate": 5.560962422276291e-06, "loss": 0.2496, "step": 92465 }, { "epoch": 2.6998437979007606, "grad_norm": 0.5694679070800365, "learning_rate": 5.5582589889159235e-06, "loss": 0.2267, "step": 92470 }, { "epoch": 2.6999897811710776, "grad_norm": 0.5740354375502995, "learning_rate": 5.555555555555556e-06, "loss": 0.2396, "step": 92475 }, { "epoch": 2.700135764441395, "grad_norm": 0.6189914268005714, "learning_rate": 5.552852122195188e-06, "loss": 0.2545, "step": 92480 }, { "epoch": 2.7002817477117125, "grad_norm": 0.5939560028921469, "learning_rate": 5.5501486888348205e-06, "loss": 0.2237, "step": 92485 }, { "epoch": 2.7004277309820295, "grad_norm": 0.599677436430931, "learning_rate": 5.547445255474453e-06, "loss": 0.2478, "step": 92490 }, { "epoch": 2.7005737142523465, "grad_norm": 0.5699254265784478, "learning_rate": 5.544741822114085e-06, "loss": 0.2495, "step": 92495 }, { "epoch": 2.700719697522664, "grad_norm": 0.5623341698203405, "learning_rate": 5.542038388753717e-06, "loss": 0.2385, "step": 92500 }, { "epoch": 2.7008656807929814, "grad_norm": 0.5997232955848163, "learning_rate": 5.53933495539335e-06, "loss": 0.2368, "step": 92505 }, { "epoch": 2.7010116640632984, "grad_norm": 0.6091330438052586, "learning_rate": 5.536631522032982e-06, "loss": 0.232, "step": 92510 }, { "epoch": 2.7011576473336154, "grad_norm": 0.6036414137807504, "learning_rate": 5.533928088672614e-06, "loss": 0.2432, "step": 92515 }, { "epoch": 2.701303630603933, "grad_norm": 0.5974298667982734, "learning_rate": 5.531224655312247e-06, "loss": 0.2463, "step": 92520 }, { "epoch": 2.70144961387425, "grad_norm": 0.5715590546827648, "learning_rate": 5.52852122195188e-06, "loss": 0.2528, "step": 92525 }, { "epoch": 2.7015955971445673, "grad_norm": 0.611331247324467, "learning_rate": 5.525817788591511e-06, "loss": 0.2434, "step": 92530 }, { "epoch": 2.7017415804148843, "grad_norm": 0.595813396969246, "learning_rate": 5.523114355231144e-06, "loss": 0.2429, "step": 92535 }, { "epoch": 2.7018875636852018, "grad_norm": 0.5422959921026365, "learning_rate": 5.520410921870777e-06, "loss": 0.2284, "step": 92540 }, { "epoch": 2.702033546955519, "grad_norm": 0.5762358044420569, "learning_rate": 5.517707488510408e-06, "loss": 0.2579, "step": 92545 }, { "epoch": 2.7021795302258362, "grad_norm": 0.5753600573648743, "learning_rate": 5.515004055150041e-06, "loss": 0.2429, "step": 92550 }, { "epoch": 2.7023255134961532, "grad_norm": 0.6156686776579141, "learning_rate": 5.512300621789674e-06, "loss": 0.2468, "step": 92555 }, { "epoch": 2.7024714967664707, "grad_norm": 0.5762894359322568, "learning_rate": 5.509597188429305e-06, "loss": 0.2336, "step": 92560 }, { "epoch": 2.7026174800367877, "grad_norm": 0.56497340567418, "learning_rate": 5.5068937550689375e-06, "loss": 0.248, "step": 92565 }, { "epoch": 2.702763463307105, "grad_norm": 0.6041067561337595, "learning_rate": 5.50419032170857e-06, "loss": 0.2423, "step": 92570 }, { "epoch": 2.702909446577422, "grad_norm": 0.6017597417956575, "learning_rate": 5.501486888348203e-06, "loss": 0.2367, "step": 92575 }, { "epoch": 2.7030554298477396, "grad_norm": 0.6525048909451208, "learning_rate": 5.4987834549878345e-06, "loss": 0.2473, "step": 92580 }, { "epoch": 2.7032014131180566, "grad_norm": 0.5584057019112584, "learning_rate": 5.496080021627467e-06, "loss": 0.2376, "step": 92585 }, { "epoch": 2.7033473963883736, "grad_norm": 0.612441917894781, "learning_rate": 5.4933765882671e-06, "loss": 0.2549, "step": 92590 }, { "epoch": 2.703493379658691, "grad_norm": 0.5726701922449592, "learning_rate": 5.4906731549067314e-06, "loss": 0.2423, "step": 92595 }, { "epoch": 2.7036393629290085, "grad_norm": 0.553256213923043, "learning_rate": 5.487969721546364e-06, "loss": 0.224, "step": 92600 }, { "epoch": 2.7037853461993255, "grad_norm": 0.569667080979938, "learning_rate": 5.485266288185997e-06, "loss": 0.2549, "step": 92605 }, { "epoch": 2.7039313294696425, "grad_norm": 0.6012439860266178, "learning_rate": 5.482562854825629e-06, "loss": 0.2407, "step": 92610 }, { "epoch": 2.70407731273996, "grad_norm": 0.5941242989091631, "learning_rate": 5.479859421465261e-06, "loss": 0.2323, "step": 92615 }, { "epoch": 2.7042232960102774, "grad_norm": 0.5623607821346537, "learning_rate": 5.477155988104894e-06, "loss": 0.2376, "step": 92620 }, { "epoch": 2.7043692792805945, "grad_norm": 0.5484078480055274, "learning_rate": 5.474452554744526e-06, "loss": 0.248, "step": 92625 }, { "epoch": 2.7045152625509115, "grad_norm": 0.5880025810615495, "learning_rate": 5.471749121384158e-06, "loss": 0.2306, "step": 92630 }, { "epoch": 2.704661245821229, "grad_norm": 0.5435026584312371, "learning_rate": 5.469045688023791e-06, "loss": 0.2411, "step": 92635 }, { "epoch": 2.7048072290915464, "grad_norm": 0.6428413529030025, "learning_rate": 5.466342254663423e-06, "loss": 0.2504, "step": 92640 }, { "epoch": 2.7049532123618634, "grad_norm": 0.5870654831384591, "learning_rate": 5.463638821303055e-06, "loss": 0.25, "step": 92645 }, { "epoch": 2.7050991956321804, "grad_norm": 0.6423713040260967, "learning_rate": 5.460935387942688e-06, "loss": 0.2489, "step": 92650 }, { "epoch": 2.705245178902498, "grad_norm": 0.6088841167146337, "learning_rate": 5.45823195458232e-06, "loss": 0.2327, "step": 92655 }, { "epoch": 2.705391162172815, "grad_norm": 0.6051550698581978, "learning_rate": 5.455528521221952e-06, "loss": 0.234, "step": 92660 }, { "epoch": 2.7055371454431323, "grad_norm": 0.6061809505193024, "learning_rate": 5.452825087861584e-06, "loss": 0.2386, "step": 92665 }, { "epoch": 2.7056831287134493, "grad_norm": 0.6284769116363311, "learning_rate": 5.450121654501217e-06, "loss": 0.249, "step": 92670 }, { "epoch": 2.7058291119837667, "grad_norm": 0.610474953291439, "learning_rate": 5.447418221140849e-06, "loss": 0.249, "step": 92675 }, { "epoch": 2.7059750952540838, "grad_norm": 0.5535272819033937, "learning_rate": 5.444714787780481e-06, "loss": 0.2384, "step": 92680 }, { "epoch": 2.706121078524401, "grad_norm": 0.601180659757751, "learning_rate": 5.442011354420114e-06, "loss": 0.2333, "step": 92685 }, { "epoch": 2.706267061794718, "grad_norm": 0.5783997147321636, "learning_rate": 5.439307921059746e-06, "loss": 0.2432, "step": 92690 }, { "epoch": 2.7064130450650357, "grad_norm": 0.5523028696877827, "learning_rate": 5.436604487699379e-06, "loss": 0.246, "step": 92695 }, { "epoch": 2.7065590283353527, "grad_norm": 0.5629993501848952, "learning_rate": 5.433901054339011e-06, "loss": 0.2322, "step": 92700 }, { "epoch": 2.70670501160567, "grad_norm": 0.5894279663620794, "learning_rate": 5.431197620978643e-06, "loss": 0.2471, "step": 92705 }, { "epoch": 2.706850994875987, "grad_norm": 0.568768073261667, "learning_rate": 5.428494187618276e-06, "loss": 0.2413, "step": 92710 }, { "epoch": 2.7069969781463046, "grad_norm": 0.6036330440522291, "learning_rate": 5.425790754257908e-06, "loss": 0.2522, "step": 92715 }, { "epoch": 2.7071429614166216, "grad_norm": 0.5534915578031756, "learning_rate": 5.42308732089754e-06, "loss": 0.2258, "step": 92720 }, { "epoch": 2.707288944686939, "grad_norm": 0.5587188739060941, "learning_rate": 5.4203838875371725e-06, "loss": 0.2429, "step": 92725 }, { "epoch": 2.707434927957256, "grad_norm": 0.6194842407070695, "learning_rate": 5.417680454176805e-06, "loss": 0.2343, "step": 92730 }, { "epoch": 2.7075809112275735, "grad_norm": 0.5714874325821488, "learning_rate": 5.414977020816437e-06, "loss": 0.2365, "step": 92735 }, { "epoch": 2.7077268944978905, "grad_norm": 0.5744749428223747, "learning_rate": 5.4122735874560695e-06, "loss": 0.2469, "step": 92740 }, { "epoch": 2.7078728777682075, "grad_norm": 0.6453401582618138, "learning_rate": 5.409570154095702e-06, "loss": 0.2512, "step": 92745 }, { "epoch": 2.708018861038525, "grad_norm": 0.6304733798743043, "learning_rate": 5.406866720735334e-06, "loss": 0.2377, "step": 92750 }, { "epoch": 2.7081648443088424, "grad_norm": 0.5749561893645906, "learning_rate": 5.4041632873749664e-06, "loss": 0.2442, "step": 92755 }, { "epoch": 2.7083108275791594, "grad_norm": 0.5243359060144158, "learning_rate": 5.401459854014599e-06, "loss": 0.2366, "step": 92760 }, { "epoch": 2.7084568108494764, "grad_norm": 0.5812257227955165, "learning_rate": 5.398756420654231e-06, "loss": 0.2467, "step": 92765 }, { "epoch": 2.708602794119794, "grad_norm": 0.5460431301665358, "learning_rate": 5.396052987293863e-06, "loss": 0.2449, "step": 92770 }, { "epoch": 2.7087487773901113, "grad_norm": 0.5652272458114789, "learning_rate": 5.393349553933496e-06, "loss": 0.2236, "step": 92775 }, { "epoch": 2.7088947606604283, "grad_norm": 0.5590138111414192, "learning_rate": 5.390646120573129e-06, "loss": 0.2284, "step": 92780 }, { "epoch": 2.7090407439307453, "grad_norm": 0.5728356707532123, "learning_rate": 5.38794268721276e-06, "loss": 0.2555, "step": 92785 }, { "epoch": 2.709186727201063, "grad_norm": 0.6101805606743912, "learning_rate": 5.385239253852393e-06, "loss": 0.2398, "step": 92790 }, { "epoch": 2.7093327104713802, "grad_norm": 0.5878800484198669, "learning_rate": 5.382535820492025e-06, "loss": 0.2419, "step": 92795 }, { "epoch": 2.7094786937416973, "grad_norm": 0.5706428727733147, "learning_rate": 5.379832387131657e-06, "loss": 0.2408, "step": 92800 }, { "epoch": 2.7096246770120143, "grad_norm": 0.5644788906566752, "learning_rate": 5.37712895377129e-06, "loss": 0.2481, "step": 92805 }, { "epoch": 2.7097706602823317, "grad_norm": 0.6134683078563625, "learning_rate": 5.374425520410922e-06, "loss": 0.2445, "step": 92810 }, { "epoch": 2.7099166435526487, "grad_norm": 0.555941813729873, "learning_rate": 5.371722087050554e-06, "loss": 0.2493, "step": 92815 }, { "epoch": 2.710062626822966, "grad_norm": 0.6086804200031339, "learning_rate": 5.3690186536901866e-06, "loss": 0.2452, "step": 92820 }, { "epoch": 2.710208610093283, "grad_norm": 0.6071742614358956, "learning_rate": 5.366315220329819e-06, "loss": 0.2334, "step": 92825 }, { "epoch": 2.7103545933636006, "grad_norm": 0.5628429375643292, "learning_rate": 5.363611786969452e-06, "loss": 0.2386, "step": 92830 }, { "epoch": 2.7105005766339176, "grad_norm": 0.5713036278971684, "learning_rate": 5.3609083536090835e-06, "loss": 0.2569, "step": 92835 }, { "epoch": 2.710646559904235, "grad_norm": 0.6100050126676057, "learning_rate": 5.358204920248716e-06, "loss": 0.2545, "step": 92840 }, { "epoch": 2.710792543174552, "grad_norm": 0.5818818435091566, "learning_rate": 5.355501486888349e-06, "loss": 0.2358, "step": 92845 }, { "epoch": 2.7109385264448695, "grad_norm": 0.6024583427258343, "learning_rate": 5.3527980535279805e-06, "loss": 0.2478, "step": 92850 }, { "epoch": 2.7110845097151866, "grad_norm": 0.6121766879510693, "learning_rate": 5.350094620167613e-06, "loss": 0.2435, "step": 92855 }, { "epoch": 2.711230492985504, "grad_norm": 0.5809595470423917, "learning_rate": 5.347391186807246e-06, "loss": 0.2565, "step": 92860 }, { "epoch": 2.711376476255821, "grad_norm": 0.6191019866068987, "learning_rate": 5.344687753446878e-06, "loss": 0.25, "step": 92865 }, { "epoch": 2.7115224595261385, "grad_norm": 0.5744694385760276, "learning_rate": 5.34198432008651e-06, "loss": 0.2363, "step": 92870 }, { "epoch": 2.7116684427964555, "grad_norm": 0.5739105178758209, "learning_rate": 5.339280886726143e-06, "loss": 0.2534, "step": 92875 }, { "epoch": 2.7118144260667725, "grad_norm": 0.5595207685720085, "learning_rate": 5.336577453365775e-06, "loss": 0.2499, "step": 92880 }, { "epoch": 2.71196040933709, "grad_norm": 0.5953126272796924, "learning_rate": 5.333874020005407e-06, "loss": 0.2337, "step": 92885 }, { "epoch": 2.7121063926074074, "grad_norm": 0.5858391887109798, "learning_rate": 5.331170586645039e-06, "loss": 0.2389, "step": 92890 }, { "epoch": 2.7122523758777244, "grad_norm": 0.6397936462053718, "learning_rate": 5.328467153284672e-06, "loss": 0.2357, "step": 92895 }, { "epoch": 2.7123983591480414, "grad_norm": 0.5943627573452905, "learning_rate": 5.325763719924304e-06, "loss": 0.2377, "step": 92900 }, { "epoch": 2.712544342418359, "grad_norm": 0.6336796408738784, "learning_rate": 5.323060286563936e-06, "loss": 0.2548, "step": 92905 }, { "epoch": 2.7126903256886763, "grad_norm": 0.5655720167607357, "learning_rate": 5.320356853203569e-06, "loss": 0.2534, "step": 92910 }, { "epoch": 2.7128363089589933, "grad_norm": 0.5945162235776125, "learning_rate": 5.3176534198432014e-06, "loss": 0.2413, "step": 92915 }, { "epoch": 2.7129822922293103, "grad_norm": 0.6183262244305382, "learning_rate": 5.314949986482833e-06, "loss": 0.2348, "step": 92920 }, { "epoch": 2.7131282754996278, "grad_norm": 0.6078845843248205, "learning_rate": 5.312246553122466e-06, "loss": 0.232, "step": 92925 }, { "epoch": 2.713274258769945, "grad_norm": 0.5916322772967328, "learning_rate": 5.309543119762098e-06, "loss": 0.24, "step": 92930 }, { "epoch": 2.713420242040262, "grad_norm": 0.5922180790939789, "learning_rate": 5.30683968640173e-06, "loss": 0.2392, "step": 92935 }, { "epoch": 2.7135662253105792, "grad_norm": 0.5452351378596096, "learning_rate": 5.304136253041363e-06, "loss": 0.2328, "step": 92940 }, { "epoch": 2.7137122085808967, "grad_norm": 0.5628518275618569, "learning_rate": 5.301432819680995e-06, "loss": 0.2402, "step": 92945 }, { "epoch": 2.7138581918512137, "grad_norm": 0.627317251214728, "learning_rate": 5.298729386320628e-06, "loss": 0.2651, "step": 92950 }, { "epoch": 2.714004175121531, "grad_norm": 0.5325093913812965, "learning_rate": 5.29602595296026e-06, "loss": 0.238, "step": 92955 }, { "epoch": 2.714150158391848, "grad_norm": 0.5412782513992893, "learning_rate": 5.293322519599892e-06, "loss": 0.2349, "step": 92960 }, { "epoch": 2.7142961416621656, "grad_norm": 0.6336210787463913, "learning_rate": 5.290619086239525e-06, "loss": 0.2446, "step": 92965 }, { "epoch": 2.7144421249324826, "grad_norm": 0.5920992926987676, "learning_rate": 5.287915652879157e-06, "loss": 0.2369, "step": 92970 }, { "epoch": 2.7145881082028, "grad_norm": 0.6451595089318901, "learning_rate": 5.285212219518789e-06, "loss": 0.2434, "step": 92975 }, { "epoch": 2.714734091473117, "grad_norm": 0.5666796469543838, "learning_rate": 5.2825087861584216e-06, "loss": 0.2334, "step": 92980 }, { "epoch": 2.7148800747434345, "grad_norm": 0.654227881548227, "learning_rate": 5.279805352798053e-06, "loss": 0.2462, "step": 92985 }, { "epoch": 2.7150260580137515, "grad_norm": 0.6423783373222108, "learning_rate": 5.277101919437686e-06, "loss": 0.254, "step": 92990 }, { "epoch": 2.715172041284069, "grad_norm": 0.5614419018083133, "learning_rate": 5.2743984860773185e-06, "loss": 0.245, "step": 92995 }, { "epoch": 2.715318024554386, "grad_norm": 0.5559336229099041, "learning_rate": 5.271695052716951e-06, "loss": 0.2406, "step": 93000 }, { "epoch": 2.7154640078247034, "grad_norm": 0.6389129722987712, "learning_rate": 5.268991619356583e-06, "loss": 0.2576, "step": 93005 }, { "epoch": 2.7156099910950204, "grad_norm": 0.6015928793548935, "learning_rate": 5.2662881859962155e-06, "loss": 0.2503, "step": 93010 }, { "epoch": 2.715755974365338, "grad_norm": 0.6028353213239424, "learning_rate": 5.263584752635848e-06, "loss": 0.2482, "step": 93015 }, { "epoch": 2.715901957635655, "grad_norm": 0.6012484205498693, "learning_rate": 5.26088131927548e-06, "loss": 0.2512, "step": 93020 }, { "epoch": 2.7160479409059723, "grad_norm": 0.5326452489148391, "learning_rate": 5.258177885915112e-06, "loss": 0.236, "step": 93025 }, { "epoch": 2.7161939241762894, "grad_norm": 0.5462684100915427, "learning_rate": 5.255474452554745e-06, "loss": 0.2316, "step": 93030 }, { "epoch": 2.7163399074466064, "grad_norm": 0.5916970306322399, "learning_rate": 5.252771019194377e-06, "loss": 0.2504, "step": 93035 }, { "epoch": 2.716485890716924, "grad_norm": 0.6205951094220927, "learning_rate": 5.250067585834009e-06, "loss": 0.2618, "step": 93040 }, { "epoch": 2.7166318739872413, "grad_norm": 0.6118263919817577, "learning_rate": 5.247364152473642e-06, "loss": 0.2588, "step": 93045 }, { "epoch": 2.7167778572575583, "grad_norm": 0.5946456749142953, "learning_rate": 5.244660719113274e-06, "loss": 0.2569, "step": 93050 }, { "epoch": 2.7169238405278753, "grad_norm": 0.6034395268683782, "learning_rate": 5.241957285752906e-06, "loss": 0.2576, "step": 93055 }, { "epoch": 2.7170698237981927, "grad_norm": 0.6022372978697383, "learning_rate": 5.239253852392539e-06, "loss": 0.2421, "step": 93060 }, { "epoch": 2.71721580706851, "grad_norm": 0.5637357453178088, "learning_rate": 5.236550419032171e-06, "loss": 0.247, "step": 93065 }, { "epoch": 2.717361790338827, "grad_norm": 0.5571817196715424, "learning_rate": 5.233846985671804e-06, "loss": 0.2338, "step": 93070 }, { "epoch": 2.717507773609144, "grad_norm": 0.5946775304260667, "learning_rate": 5.231143552311436e-06, "loss": 0.2406, "step": 93075 }, { "epoch": 2.7176537568794616, "grad_norm": 0.5971759420249497, "learning_rate": 5.228440118951068e-06, "loss": 0.2414, "step": 93080 }, { "epoch": 2.717799740149779, "grad_norm": 0.6115697110902537, "learning_rate": 5.225736685590701e-06, "loss": 0.2497, "step": 93085 }, { "epoch": 2.717945723420096, "grad_norm": 0.6009095516764864, "learning_rate": 5.2230332522303325e-06, "loss": 0.2383, "step": 93090 }, { "epoch": 2.718091706690413, "grad_norm": 0.586972842222464, "learning_rate": 5.220329818869965e-06, "loss": 0.2445, "step": 93095 }, { "epoch": 2.7182376899607306, "grad_norm": 0.5373555837866347, "learning_rate": 5.217626385509598e-06, "loss": 0.2266, "step": 93100 }, { "epoch": 2.7183836732310476, "grad_norm": 0.6113395121955079, "learning_rate": 5.2149229521492295e-06, "loss": 0.2622, "step": 93105 }, { "epoch": 2.718529656501365, "grad_norm": 0.6042564722363583, "learning_rate": 5.212219518788862e-06, "loss": 0.2423, "step": 93110 }, { "epoch": 2.718675639771682, "grad_norm": 0.5952789591944848, "learning_rate": 5.209516085428494e-06, "loss": 0.2328, "step": 93115 }, { "epoch": 2.7188216230419995, "grad_norm": 0.5879574281559269, "learning_rate": 5.206812652068127e-06, "loss": 0.2411, "step": 93120 }, { "epoch": 2.7189676063123165, "grad_norm": 0.5533408213630745, "learning_rate": 5.204109218707759e-06, "loss": 0.2323, "step": 93125 }, { "epoch": 2.719113589582634, "grad_norm": 0.6627346891588564, "learning_rate": 5.201405785347391e-06, "loss": 0.2647, "step": 93130 }, { "epoch": 2.719259572852951, "grad_norm": 0.5472035863305384, "learning_rate": 5.198702351987024e-06, "loss": 0.2361, "step": 93135 }, { "epoch": 2.7194055561232684, "grad_norm": 0.5258072008366507, "learning_rate": 5.195998918626656e-06, "loss": 0.2502, "step": 93140 }, { "epoch": 2.7195515393935854, "grad_norm": 0.5424801277851149, "learning_rate": 5.193295485266288e-06, "loss": 0.2444, "step": 93145 }, { "epoch": 2.719697522663903, "grad_norm": 0.6193015650639874, "learning_rate": 5.190592051905921e-06, "loss": 0.2527, "step": 93150 }, { "epoch": 2.71984350593422, "grad_norm": 0.5754776082714335, "learning_rate": 5.1878886185455535e-06, "loss": 0.2512, "step": 93155 }, { "epoch": 2.7199894892045373, "grad_norm": 0.5734981191859149, "learning_rate": 5.185185185185185e-06, "loss": 0.2519, "step": 93160 }, { "epoch": 2.7201354724748543, "grad_norm": 0.5970792530971116, "learning_rate": 5.182481751824818e-06, "loss": 0.2395, "step": 93165 }, { "epoch": 2.7202814557451713, "grad_norm": 0.5418420081458795, "learning_rate": 5.1797783184644505e-06, "loss": 0.2318, "step": 93170 }, { "epoch": 2.720427439015489, "grad_norm": 0.6135379170309115, "learning_rate": 5.177074885104082e-06, "loss": 0.2438, "step": 93175 }, { "epoch": 2.7205734222858062, "grad_norm": 0.604666603009837, "learning_rate": 5.174371451743715e-06, "loss": 0.2376, "step": 93180 }, { "epoch": 2.7207194055561232, "grad_norm": 0.5932263581526093, "learning_rate": 5.171668018383347e-06, "loss": 0.2388, "step": 93185 }, { "epoch": 2.7208653888264402, "grad_norm": 0.5989606053839674, "learning_rate": 5.168964585022979e-06, "loss": 0.2435, "step": 93190 }, { "epoch": 2.7210113720967577, "grad_norm": 0.5928059703195084, "learning_rate": 5.166261151662612e-06, "loss": 0.2506, "step": 93195 }, { "epoch": 2.721157355367075, "grad_norm": 0.5514635668715294, "learning_rate": 5.163557718302244e-06, "loss": 0.2487, "step": 93200 }, { "epoch": 2.721303338637392, "grad_norm": 0.5826071399089476, "learning_rate": 5.160854284941877e-06, "loss": 0.2283, "step": 93205 }, { "epoch": 2.721449321907709, "grad_norm": 0.5933684152538705, "learning_rate": 5.158150851581508e-06, "loss": 0.2426, "step": 93210 }, { "epoch": 2.7215953051780266, "grad_norm": 0.574788703181802, "learning_rate": 5.155447418221141e-06, "loss": 0.2365, "step": 93215 }, { "epoch": 2.721741288448344, "grad_norm": 0.5877710302094077, "learning_rate": 5.152743984860774e-06, "loss": 0.2487, "step": 93220 }, { "epoch": 2.721887271718661, "grad_norm": 0.6302995154184757, "learning_rate": 5.150040551500405e-06, "loss": 0.2475, "step": 93225 }, { "epoch": 2.722033254988978, "grad_norm": 0.5609703384442191, "learning_rate": 5.147337118140038e-06, "loss": 0.2625, "step": 93230 }, { "epoch": 2.7221792382592955, "grad_norm": 0.6237813496011329, "learning_rate": 5.144633684779671e-06, "loss": 0.2447, "step": 93235 }, { "epoch": 2.722325221529613, "grad_norm": 0.5694137815613861, "learning_rate": 5.141930251419303e-06, "loss": 0.2403, "step": 93240 }, { "epoch": 2.72247120479993, "grad_norm": 0.5969761792954992, "learning_rate": 5.139226818058935e-06, "loss": 0.2361, "step": 93245 }, { "epoch": 2.722617188070247, "grad_norm": 0.5816866141248093, "learning_rate": 5.1365233846985675e-06, "loss": 0.2338, "step": 93250 }, { "epoch": 2.7227631713405644, "grad_norm": 0.5265194865867752, "learning_rate": 5.1338199513382e-06, "loss": 0.2239, "step": 93255 }, { "epoch": 2.7229091546108815, "grad_norm": 0.5635112880896528, "learning_rate": 5.131116517977832e-06, "loss": 0.2332, "step": 93260 }, { "epoch": 2.723055137881199, "grad_norm": 0.5978960071252138, "learning_rate": 5.1284130846174645e-06, "loss": 0.2431, "step": 93265 }, { "epoch": 2.723201121151516, "grad_norm": 0.5624980209409376, "learning_rate": 5.125709651257097e-06, "loss": 0.2347, "step": 93270 }, { "epoch": 2.7233471044218334, "grad_norm": 0.6053096849055539, "learning_rate": 5.123006217896729e-06, "loss": 0.2498, "step": 93275 }, { "epoch": 2.7234930876921504, "grad_norm": 0.6284212167103851, "learning_rate": 5.1203027845363614e-06, "loss": 0.2464, "step": 93280 }, { "epoch": 2.723639070962468, "grad_norm": 0.5835379925833286, "learning_rate": 5.117599351175994e-06, "loss": 0.2419, "step": 93285 }, { "epoch": 2.723785054232785, "grad_norm": 0.5477808523605963, "learning_rate": 5.114895917815626e-06, "loss": 0.2284, "step": 93290 }, { "epoch": 2.7239310375031023, "grad_norm": 0.6077552997733497, "learning_rate": 5.112192484455258e-06, "loss": 0.2365, "step": 93295 }, { "epoch": 2.7240770207734193, "grad_norm": 0.5648049995852197, "learning_rate": 5.109489051094891e-06, "loss": 0.2365, "step": 93300 }, { "epoch": 2.7242230040437367, "grad_norm": 0.5728115480274591, "learning_rate": 5.106785617734523e-06, "loss": 0.2325, "step": 93305 }, { "epoch": 2.7243689873140537, "grad_norm": 0.5855501560528498, "learning_rate": 5.104082184374155e-06, "loss": 0.2337, "step": 93310 }, { "epoch": 2.724514970584371, "grad_norm": 0.563049735541992, "learning_rate": 5.101378751013788e-06, "loss": 0.2361, "step": 93315 }, { "epoch": 2.724660953854688, "grad_norm": 0.5729583561389409, "learning_rate": 5.09867531765342e-06, "loss": 0.2309, "step": 93320 }, { "epoch": 2.724806937125005, "grad_norm": 0.6045048548186845, "learning_rate": 5.095971884293053e-06, "loss": 0.2386, "step": 93325 }, { "epoch": 2.7249529203953227, "grad_norm": 0.6444670503730654, "learning_rate": 5.093268450932685e-06, "loss": 0.2453, "step": 93330 }, { "epoch": 2.72509890366564, "grad_norm": 0.5885012466110564, "learning_rate": 5.090565017572317e-06, "loss": 0.2525, "step": 93335 }, { "epoch": 2.725244886935957, "grad_norm": 0.5637301244555133, "learning_rate": 5.087861584211949e-06, "loss": 0.245, "step": 93340 }, { "epoch": 2.725390870206274, "grad_norm": 0.570651247096301, "learning_rate": 5.0851581508515816e-06, "loss": 0.2539, "step": 93345 }, { "epoch": 2.7255368534765916, "grad_norm": 0.603963201193604, "learning_rate": 5.082454717491214e-06, "loss": 0.2386, "step": 93350 }, { "epoch": 2.725682836746909, "grad_norm": 0.6389525151907315, "learning_rate": 5.079751284130846e-06, "loss": 0.2253, "step": 93355 }, { "epoch": 2.725828820017226, "grad_norm": 0.5965090767955095, "learning_rate": 5.0770478507704785e-06, "loss": 0.2415, "step": 93360 }, { "epoch": 2.725974803287543, "grad_norm": 0.6513368740804469, "learning_rate": 5.074344417410111e-06, "loss": 0.2555, "step": 93365 }, { "epoch": 2.7261207865578605, "grad_norm": 0.5735006527531742, "learning_rate": 5.071640984049743e-06, "loss": 0.2446, "step": 93370 }, { "epoch": 2.726266769828178, "grad_norm": 0.5505296200156343, "learning_rate": 5.068937550689376e-06, "loss": 0.2425, "step": 93375 }, { "epoch": 2.726412753098495, "grad_norm": 0.587753104723413, "learning_rate": 5.066234117329008e-06, "loss": 0.2409, "step": 93380 }, { "epoch": 2.726558736368812, "grad_norm": 0.587296739727403, "learning_rate": 5.06353068396864e-06, "loss": 0.2554, "step": 93385 }, { "epoch": 2.7267047196391294, "grad_norm": 0.5803589318642617, "learning_rate": 5.060827250608273e-06, "loss": 0.2395, "step": 93390 }, { "epoch": 2.7268507029094464, "grad_norm": 0.5309080202891715, "learning_rate": 5.058123817247905e-06, "loss": 0.2358, "step": 93395 }, { "epoch": 2.726996686179764, "grad_norm": 0.624423958271831, "learning_rate": 5.055420383887537e-06, "loss": 0.244, "step": 93400 }, { "epoch": 2.727142669450081, "grad_norm": 0.5981851915057474, "learning_rate": 5.05271695052717e-06, "loss": 0.2483, "step": 93405 }, { "epoch": 2.7272886527203983, "grad_norm": 0.5919478871502363, "learning_rate": 5.0500135171668025e-06, "loss": 0.2357, "step": 93410 }, { "epoch": 2.7274346359907153, "grad_norm": 0.5673163032922185, "learning_rate": 5.047310083806434e-06, "loss": 0.2419, "step": 93415 }, { "epoch": 2.727580619261033, "grad_norm": 0.5846510748354579, "learning_rate": 5.044606650446067e-06, "loss": 0.2411, "step": 93420 }, { "epoch": 2.72772660253135, "grad_norm": 0.6365160755231459, "learning_rate": 5.0419032170856995e-06, "loss": 0.2448, "step": 93425 }, { "epoch": 2.7278725858016672, "grad_norm": 0.594720810810428, "learning_rate": 5.039199783725331e-06, "loss": 0.2462, "step": 93430 }, { "epoch": 2.7280185690719843, "grad_norm": 0.6160331320388996, "learning_rate": 5.036496350364963e-06, "loss": 0.2513, "step": 93435 }, { "epoch": 2.7281645523423017, "grad_norm": 0.5493863325763597, "learning_rate": 5.0337929170045964e-06, "loss": 0.2357, "step": 93440 }, { "epoch": 2.7283105356126187, "grad_norm": 0.5603169206031525, "learning_rate": 5.031089483644228e-06, "loss": 0.2539, "step": 93445 }, { "epoch": 2.728456518882936, "grad_norm": 0.6155591171574187, "learning_rate": 5.02838605028386e-06, "loss": 0.2478, "step": 93450 }, { "epoch": 2.728602502153253, "grad_norm": 0.6546878760231636, "learning_rate": 5.025682616923493e-06, "loss": 0.2495, "step": 93455 }, { "epoch": 2.7287484854235706, "grad_norm": 0.6190074599159358, "learning_rate": 5.022979183563126e-06, "loss": 0.2379, "step": 93460 }, { "epoch": 2.7288944686938876, "grad_norm": 0.5550867946893251, "learning_rate": 5.020275750202757e-06, "loss": 0.2436, "step": 93465 }, { "epoch": 2.729040451964205, "grad_norm": 0.649243651731423, "learning_rate": 5.01757231684239e-06, "loss": 0.2553, "step": 93470 }, { "epoch": 2.729186435234522, "grad_norm": 0.5629846791972021, "learning_rate": 5.014868883482023e-06, "loss": 0.2361, "step": 93475 }, { "epoch": 2.729332418504839, "grad_norm": 0.6382391916593338, "learning_rate": 5.012165450121654e-06, "loss": 0.2514, "step": 93480 }, { "epoch": 2.7294784017751565, "grad_norm": 0.6258853497513766, "learning_rate": 5.009462016761287e-06, "loss": 0.2361, "step": 93485 }, { "epoch": 2.729624385045474, "grad_norm": 0.5710631663574492, "learning_rate": 5.00675858340092e-06, "loss": 0.2435, "step": 93490 }, { "epoch": 2.729770368315791, "grad_norm": 0.6122207448900737, "learning_rate": 5.004055150040552e-06, "loss": 0.2492, "step": 93495 }, { "epoch": 2.729916351586108, "grad_norm": 0.5999906738895103, "learning_rate": 5.001351716680184e-06, "loss": 0.254, "step": 93500 }, { "epoch": 2.7300623348564255, "grad_norm": 0.5281155713587834, "learning_rate": 4.9986482833198166e-06, "loss": 0.2383, "step": 93505 }, { "epoch": 2.730208318126743, "grad_norm": 0.5818142723388692, "learning_rate": 4.995944849959449e-06, "loss": 0.259, "step": 93510 }, { "epoch": 2.73035430139706, "grad_norm": 0.5672193437379687, "learning_rate": 4.993241416599081e-06, "loss": 0.2406, "step": 93515 }, { "epoch": 2.730500284667377, "grad_norm": 0.5753266763856428, "learning_rate": 4.9905379832387135e-06, "loss": 0.2484, "step": 93520 }, { "epoch": 2.7306462679376944, "grad_norm": 0.6412384519679114, "learning_rate": 4.987834549878346e-06, "loss": 0.2366, "step": 93525 }, { "epoch": 2.730792251208012, "grad_norm": 0.59554316322256, "learning_rate": 4.985131116517978e-06, "loss": 0.257, "step": 93530 }, { "epoch": 2.730938234478329, "grad_norm": 0.5610647379657013, "learning_rate": 4.9824276831576105e-06, "loss": 0.2518, "step": 93535 }, { "epoch": 2.731084217748646, "grad_norm": 0.5802462085832033, "learning_rate": 4.979724249797243e-06, "loss": 0.2448, "step": 93540 }, { "epoch": 2.7312302010189633, "grad_norm": 0.6053596898102592, "learning_rate": 4.977020816436875e-06, "loss": 0.2456, "step": 93545 }, { "epoch": 2.7313761842892803, "grad_norm": 0.5782956185479122, "learning_rate": 4.974317383076507e-06, "loss": 0.2489, "step": 93550 }, { "epoch": 2.7315221675595978, "grad_norm": 0.5847908838200495, "learning_rate": 4.97161394971614e-06, "loss": 0.2426, "step": 93555 }, { "epoch": 2.7316681508299148, "grad_norm": 0.6019658137651475, "learning_rate": 4.968910516355772e-06, "loss": 0.2409, "step": 93560 }, { "epoch": 2.731814134100232, "grad_norm": 0.5949934761851049, "learning_rate": 4.966207082995404e-06, "loss": 0.2558, "step": 93565 }, { "epoch": 2.7319601173705492, "grad_norm": 0.5811857335858615, "learning_rate": 4.963503649635037e-06, "loss": 0.2469, "step": 93570 }, { "epoch": 2.7321061006408667, "grad_norm": 0.6100921787477528, "learning_rate": 4.960800216274669e-06, "loss": 0.2389, "step": 93575 }, { "epoch": 2.7322520839111837, "grad_norm": 0.5718480819117127, "learning_rate": 4.958096782914301e-06, "loss": 0.2366, "step": 93580 }, { "epoch": 2.732398067181501, "grad_norm": 0.5806897735997955, "learning_rate": 4.955393349553934e-06, "loss": 0.2274, "step": 93585 }, { "epoch": 2.732544050451818, "grad_norm": 0.5968858964741881, "learning_rate": 4.952689916193566e-06, "loss": 0.2358, "step": 93590 }, { "epoch": 2.7326900337221356, "grad_norm": 0.6031639554949625, "learning_rate": 4.949986482833198e-06, "loss": 0.2481, "step": 93595 }, { "epoch": 2.7328360169924526, "grad_norm": 0.6121997977326799, "learning_rate": 4.947283049472831e-06, "loss": 0.2324, "step": 93600 }, { "epoch": 2.73298200026277, "grad_norm": 0.5812202963323618, "learning_rate": 4.944579616112463e-06, "loss": 0.2392, "step": 93605 }, { "epoch": 2.733127983533087, "grad_norm": 0.6260565339568471, "learning_rate": 4.941876182752095e-06, "loss": 0.2518, "step": 93610 }, { "epoch": 2.733273966803404, "grad_norm": 0.5721550503212712, "learning_rate": 4.9391727493917275e-06, "loss": 0.2422, "step": 93615 }, { "epoch": 2.7334199500737215, "grad_norm": 0.5669253273504099, "learning_rate": 4.93646931603136e-06, "loss": 0.2276, "step": 93620 }, { "epoch": 2.733565933344039, "grad_norm": 0.5258956749510022, "learning_rate": 4.933765882670992e-06, "loss": 0.2373, "step": 93625 }, { "epoch": 2.733711916614356, "grad_norm": 0.6361661696359181, "learning_rate": 4.931062449310625e-06, "loss": 0.2518, "step": 93630 }, { "epoch": 2.733857899884673, "grad_norm": 0.5913167172977307, "learning_rate": 4.928359015950257e-06, "loss": 0.2371, "step": 93635 }, { "epoch": 2.7340038831549904, "grad_norm": 0.5258772410227965, "learning_rate": 4.925655582589889e-06, "loss": 0.247, "step": 93640 }, { "epoch": 2.734149866425308, "grad_norm": 0.5671370332625877, "learning_rate": 4.922952149229522e-06, "loss": 0.2569, "step": 93645 }, { "epoch": 2.734295849695625, "grad_norm": 0.5397369478951733, "learning_rate": 4.920248715869154e-06, "loss": 0.2432, "step": 93650 }, { "epoch": 2.734441832965942, "grad_norm": 0.5725623240844742, "learning_rate": 4.917545282508786e-06, "loss": 0.2551, "step": 93655 }, { "epoch": 2.7345878162362593, "grad_norm": 0.551379478779155, "learning_rate": 4.914841849148419e-06, "loss": 0.2447, "step": 93660 }, { "epoch": 2.734733799506577, "grad_norm": 0.5602122550174785, "learning_rate": 4.9121384157880516e-06, "loss": 0.2345, "step": 93665 }, { "epoch": 2.734879782776894, "grad_norm": 0.5955858718636209, "learning_rate": 4.909434982427683e-06, "loss": 0.2332, "step": 93670 }, { "epoch": 2.735025766047211, "grad_norm": 0.6340323901906766, "learning_rate": 4.906731549067315e-06, "loss": 0.2413, "step": 93675 }, { "epoch": 2.7351717493175283, "grad_norm": 0.6021913720459721, "learning_rate": 4.9040281157069485e-06, "loss": 0.2385, "step": 93680 }, { "epoch": 2.7353177325878453, "grad_norm": 0.5807264436565321, "learning_rate": 4.90132468234658e-06, "loss": 0.2343, "step": 93685 }, { "epoch": 2.7354637158581627, "grad_norm": 0.5573258499238912, "learning_rate": 4.898621248986212e-06, "loss": 0.2495, "step": 93690 }, { "epoch": 2.7356096991284797, "grad_norm": 0.5897642454150112, "learning_rate": 4.8959178156258455e-06, "loss": 0.2599, "step": 93695 }, { "epoch": 2.735755682398797, "grad_norm": 0.6197765382252531, "learning_rate": 4.893214382265478e-06, "loss": 0.2565, "step": 93700 }, { "epoch": 2.735901665669114, "grad_norm": 0.5890546941664327, "learning_rate": 4.890510948905109e-06, "loss": 0.2505, "step": 93705 }, { "epoch": 2.7360476489394316, "grad_norm": 0.5820544530589512, "learning_rate": 4.887807515544742e-06, "loss": 0.2453, "step": 93710 }, { "epoch": 2.7361936322097486, "grad_norm": 0.5660509198935155, "learning_rate": 4.885104082184375e-06, "loss": 0.2326, "step": 93715 }, { "epoch": 2.736339615480066, "grad_norm": 0.551932716411586, "learning_rate": 4.882400648824006e-06, "loss": 0.2458, "step": 93720 }, { "epoch": 2.736485598750383, "grad_norm": 0.6518221452394379, "learning_rate": 4.879697215463639e-06, "loss": 0.2527, "step": 93725 }, { "epoch": 2.7366315820207006, "grad_norm": 0.6219184021977161, "learning_rate": 4.876993782103272e-06, "loss": 0.2591, "step": 93730 }, { "epoch": 2.7367775652910176, "grad_norm": 0.626090222690313, "learning_rate": 4.874290348742903e-06, "loss": 0.2478, "step": 93735 }, { "epoch": 2.736923548561335, "grad_norm": 0.5312154327961971, "learning_rate": 4.871586915382536e-06, "loss": 0.2352, "step": 93740 }, { "epoch": 2.737069531831652, "grad_norm": 0.5627307268773304, "learning_rate": 4.868883482022169e-06, "loss": 0.2463, "step": 93745 }, { "epoch": 2.7372155151019695, "grad_norm": 0.5858760900992361, "learning_rate": 4.866180048661801e-06, "loss": 0.2432, "step": 93750 }, { "epoch": 2.7373614983722865, "grad_norm": 0.5743836839165131, "learning_rate": 4.863476615301433e-06, "loss": 0.2325, "step": 93755 }, { "epoch": 2.737507481642604, "grad_norm": 0.5702031648958439, "learning_rate": 4.860773181941066e-06, "loss": 0.232, "step": 93760 }, { "epoch": 2.737653464912921, "grad_norm": 0.5681312894256189, "learning_rate": 4.858069748580698e-06, "loss": 0.2348, "step": 93765 }, { "epoch": 2.737799448183238, "grad_norm": 0.6171678321715979, "learning_rate": 4.855366315220329e-06, "loss": 0.2437, "step": 93770 }, { "epoch": 2.7379454314535554, "grad_norm": 0.593835742840076, "learning_rate": 4.8526628818599625e-06, "loss": 0.2476, "step": 93775 }, { "epoch": 2.738091414723873, "grad_norm": 0.6149853085066046, "learning_rate": 4.849959448499595e-06, "loss": 0.2457, "step": 93780 }, { "epoch": 2.73823739799419, "grad_norm": 0.5814595116339961, "learning_rate": 4.847256015139227e-06, "loss": 0.236, "step": 93785 }, { "epoch": 2.738383381264507, "grad_norm": 0.5438167301047471, "learning_rate": 4.8445525817788595e-06, "loss": 0.2395, "step": 93790 }, { "epoch": 2.7385293645348243, "grad_norm": 0.550203918730245, "learning_rate": 4.841849148418492e-06, "loss": 0.2282, "step": 93795 }, { "epoch": 2.7386753478051418, "grad_norm": 0.6451372427606553, "learning_rate": 4.839145715058124e-06, "loss": 0.2404, "step": 93800 }, { "epoch": 2.7388213310754588, "grad_norm": 0.5673570223870783, "learning_rate": 4.8364422816977565e-06, "loss": 0.2373, "step": 93805 }, { "epoch": 2.738967314345776, "grad_norm": 0.5310017313271637, "learning_rate": 4.833738848337389e-06, "loss": 0.2366, "step": 93810 }, { "epoch": 2.7391132976160932, "grad_norm": 0.5300366199446324, "learning_rate": 4.831035414977021e-06, "loss": 0.2301, "step": 93815 }, { "epoch": 2.7392592808864107, "grad_norm": 0.5543888705573463, "learning_rate": 4.828331981616653e-06, "loss": 0.2375, "step": 93820 }, { "epoch": 2.7394052641567277, "grad_norm": 0.6245907783516137, "learning_rate": 4.825628548256286e-06, "loss": 0.2403, "step": 93825 }, { "epoch": 2.7395512474270447, "grad_norm": 0.6085311066594936, "learning_rate": 4.822925114895918e-06, "loss": 0.2287, "step": 93830 }, { "epoch": 2.739697230697362, "grad_norm": 0.5936531485725102, "learning_rate": 4.82022168153555e-06, "loss": 0.2534, "step": 93835 }, { "epoch": 2.739843213967679, "grad_norm": 0.5677474772640472, "learning_rate": 4.817518248175183e-06, "loss": 0.2372, "step": 93840 }, { "epoch": 2.7399891972379966, "grad_norm": 0.6282544960591442, "learning_rate": 4.814814814814815e-06, "loss": 0.2404, "step": 93845 }, { "epoch": 2.7401351805083136, "grad_norm": 0.6213331785924838, "learning_rate": 4.812111381454447e-06, "loss": 0.244, "step": 93850 }, { "epoch": 2.740281163778631, "grad_norm": 0.6088770572422001, "learning_rate": 4.80940794809408e-06, "loss": 0.2503, "step": 93855 }, { "epoch": 2.740427147048948, "grad_norm": 0.5894636150259519, "learning_rate": 4.806704514733712e-06, "loss": 0.2385, "step": 93860 }, { "epoch": 2.7405731303192655, "grad_norm": 0.5789059475930378, "learning_rate": 4.804001081373344e-06, "loss": 0.2336, "step": 93865 }, { "epoch": 2.7407191135895825, "grad_norm": 0.6004508638769259, "learning_rate": 4.801297648012977e-06, "loss": 0.2306, "step": 93870 }, { "epoch": 2.7408650968599, "grad_norm": 0.6674211234881479, "learning_rate": 4.798594214652609e-06, "loss": 0.259, "step": 93875 }, { "epoch": 2.741011080130217, "grad_norm": 0.6103233269009412, "learning_rate": 4.795890781292241e-06, "loss": 0.2429, "step": 93880 }, { "epoch": 2.7411570634005344, "grad_norm": 0.5944177317740078, "learning_rate": 4.793187347931874e-06, "loss": 0.2409, "step": 93885 }, { "epoch": 2.7413030466708515, "grad_norm": 0.5928644730405651, "learning_rate": 4.790483914571506e-06, "loss": 0.2502, "step": 93890 }, { "epoch": 2.741449029941169, "grad_norm": 0.552876633156553, "learning_rate": 4.787780481211138e-06, "loss": 0.2438, "step": 93895 }, { "epoch": 2.741595013211486, "grad_norm": 0.5372343600568079, "learning_rate": 4.7850770478507705e-06, "loss": 0.2516, "step": 93900 }, { "epoch": 2.741740996481803, "grad_norm": 0.606119538667143, "learning_rate": 4.782373614490403e-06, "loss": 0.2368, "step": 93905 }, { "epoch": 2.7418869797521204, "grad_norm": 0.5933319575679808, "learning_rate": 4.779670181130035e-06, "loss": 0.2345, "step": 93910 }, { "epoch": 2.742032963022438, "grad_norm": 0.5675076743130836, "learning_rate": 4.7769667477696674e-06, "loss": 0.2353, "step": 93915 }, { "epoch": 2.742178946292755, "grad_norm": 0.6133605809796735, "learning_rate": 4.774263314409301e-06, "loss": 0.2346, "step": 93920 }, { "epoch": 2.742324929563072, "grad_norm": 0.5888917184425972, "learning_rate": 4.771559881048932e-06, "loss": 0.2394, "step": 93925 }, { "epoch": 2.7424709128333893, "grad_norm": 0.6363153288549203, "learning_rate": 4.768856447688564e-06, "loss": 0.2372, "step": 93930 }, { "epoch": 2.7426168961037067, "grad_norm": 0.581846436897951, "learning_rate": 4.7661530143281975e-06, "loss": 0.2312, "step": 93935 }, { "epoch": 2.7427628793740237, "grad_norm": 0.6237401293127777, "learning_rate": 4.763449580967829e-06, "loss": 0.2553, "step": 93940 }, { "epoch": 2.7429088626443408, "grad_norm": 0.5740902635141046, "learning_rate": 4.760746147607461e-06, "loss": 0.2368, "step": 93945 }, { "epoch": 2.743054845914658, "grad_norm": 0.564686700163829, "learning_rate": 4.7580427142470945e-06, "loss": 0.2453, "step": 93950 }, { "epoch": 2.7432008291849757, "grad_norm": 0.6234682359757385, "learning_rate": 4.755339280886727e-06, "loss": 0.2431, "step": 93955 }, { "epoch": 2.7433468124552927, "grad_norm": 0.6042088967274274, "learning_rate": 4.752635847526358e-06, "loss": 0.2545, "step": 93960 }, { "epoch": 2.7434927957256097, "grad_norm": 0.5833251394459557, "learning_rate": 4.7499324141659915e-06, "loss": 0.2605, "step": 93965 }, { "epoch": 2.743638778995927, "grad_norm": 0.5276706959533988, "learning_rate": 4.747228980805624e-06, "loss": 0.2132, "step": 93970 }, { "epoch": 2.743784762266244, "grad_norm": 0.56064778826616, "learning_rate": 4.744525547445255e-06, "loss": 0.2546, "step": 93975 }, { "epoch": 2.7439307455365616, "grad_norm": 0.5554711123043896, "learning_rate": 4.741822114084888e-06, "loss": 0.2402, "step": 93980 }, { "epoch": 2.7440767288068786, "grad_norm": 0.567787007060461, "learning_rate": 4.739118680724521e-06, "loss": 0.2538, "step": 93985 }, { "epoch": 2.744222712077196, "grad_norm": 0.5414465532988676, "learning_rate": 4.736415247364152e-06, "loss": 0.2502, "step": 93990 }, { "epoch": 2.744368695347513, "grad_norm": 0.5702839827809295, "learning_rate": 4.7337118140037845e-06, "loss": 0.2312, "step": 93995 }, { "epoch": 2.7445146786178305, "grad_norm": 0.5767133059222239, "learning_rate": 4.731008380643418e-06, "loss": 0.2319, "step": 94000 }, { "epoch": 2.7446606618881475, "grad_norm": 0.6462875109683119, "learning_rate": 4.72830494728305e-06, "loss": 0.2667, "step": 94005 }, { "epoch": 2.744806645158465, "grad_norm": 0.572302982802274, "learning_rate": 4.7256015139226815e-06, "loss": 0.23, "step": 94010 }, { "epoch": 2.744952628428782, "grad_norm": 0.6084933374325097, "learning_rate": 4.722898080562315e-06, "loss": 0.2403, "step": 94015 }, { "epoch": 2.7450986116990994, "grad_norm": 0.6110638298826774, "learning_rate": 4.720194647201947e-06, "loss": 0.2613, "step": 94020 }, { "epoch": 2.7452445949694164, "grad_norm": 0.5302388264087683, "learning_rate": 4.717491213841578e-06, "loss": 0.2463, "step": 94025 }, { "epoch": 2.745390578239734, "grad_norm": 0.5621348261597731, "learning_rate": 4.7147877804812116e-06, "loss": 0.2196, "step": 94030 }, { "epoch": 2.745536561510051, "grad_norm": 0.5910606659357076, "learning_rate": 4.712084347120844e-06, "loss": 0.248, "step": 94035 }, { "epoch": 2.7456825447803683, "grad_norm": 0.5256958934624172, "learning_rate": 4.709380913760476e-06, "loss": 0.237, "step": 94040 }, { "epoch": 2.7458285280506853, "grad_norm": 0.6237170678638043, "learning_rate": 4.7066774804001085e-06, "loss": 0.2544, "step": 94045 }, { "epoch": 2.745974511321003, "grad_norm": 0.5806413953295034, "learning_rate": 4.703974047039741e-06, "loss": 0.2284, "step": 94050 }, { "epoch": 2.74612049459132, "grad_norm": 0.5550293108179235, "learning_rate": 4.701270613679373e-06, "loss": 0.2199, "step": 94055 }, { "epoch": 2.746266477861637, "grad_norm": 0.6336477965879641, "learning_rate": 4.6985671803190055e-06, "loss": 0.2512, "step": 94060 }, { "epoch": 2.7464124611319543, "grad_norm": 0.6380885075582292, "learning_rate": 4.695863746958638e-06, "loss": 0.2298, "step": 94065 }, { "epoch": 2.7465584444022717, "grad_norm": 0.5478359248350908, "learning_rate": 4.69316031359827e-06, "loss": 0.257, "step": 94070 }, { "epoch": 2.7467044276725887, "grad_norm": 0.6390830095195078, "learning_rate": 4.6904568802379024e-06, "loss": 0.2491, "step": 94075 }, { "epoch": 2.7468504109429057, "grad_norm": 0.5842407263986849, "learning_rate": 4.687753446877535e-06, "loss": 0.2326, "step": 94080 }, { "epoch": 2.746996394213223, "grad_norm": 0.5862034801519485, "learning_rate": 4.685050013517167e-06, "loss": 0.2355, "step": 94085 }, { "epoch": 2.7471423774835406, "grad_norm": 0.5976697546052857, "learning_rate": 4.682346580156799e-06, "loss": 0.251, "step": 94090 }, { "epoch": 2.7472883607538576, "grad_norm": 0.5931600056165462, "learning_rate": 4.679643146796432e-06, "loss": 0.2579, "step": 94095 }, { "epoch": 2.7474343440241746, "grad_norm": 0.5993340741080071, "learning_rate": 4.676939713436064e-06, "loss": 0.2438, "step": 94100 }, { "epoch": 2.747580327294492, "grad_norm": 0.6633996524399988, "learning_rate": 4.674236280075696e-06, "loss": 0.2409, "step": 94105 }, { "epoch": 2.7477263105648095, "grad_norm": 0.6314734273844914, "learning_rate": 4.671532846715329e-06, "loss": 0.2366, "step": 94110 }, { "epoch": 2.7478722938351265, "grad_norm": 0.6165335207992263, "learning_rate": 4.668829413354961e-06, "loss": 0.2555, "step": 94115 }, { "epoch": 2.7480182771054436, "grad_norm": 0.6006759592136405, "learning_rate": 4.666125979994593e-06, "loss": 0.2504, "step": 94120 }, { "epoch": 2.748164260375761, "grad_norm": 0.584986427162964, "learning_rate": 4.663422546634226e-06, "loss": 0.2428, "step": 94125 }, { "epoch": 2.748310243646078, "grad_norm": 0.5373344994647896, "learning_rate": 4.660719113273858e-06, "loss": 0.2511, "step": 94130 }, { "epoch": 2.7484562269163955, "grad_norm": 0.5967083657375296, "learning_rate": 4.65801567991349e-06, "loss": 0.2258, "step": 94135 }, { "epoch": 2.7486022101867125, "grad_norm": 0.5624750966538635, "learning_rate": 4.6553122465531226e-06, "loss": 0.2354, "step": 94140 }, { "epoch": 2.74874819345703, "grad_norm": 0.6371650362447748, "learning_rate": 4.652608813192755e-06, "loss": 0.2561, "step": 94145 }, { "epoch": 2.748894176727347, "grad_norm": 0.575521926753064, "learning_rate": 4.649905379832387e-06, "loss": 0.2297, "step": 94150 }, { "epoch": 2.7490401599976644, "grad_norm": 0.6398250087623722, "learning_rate": 4.6472019464720195e-06, "loss": 0.2433, "step": 94155 }, { "epoch": 2.7491861432679814, "grad_norm": 0.6062756637121709, "learning_rate": 4.644498513111652e-06, "loss": 0.2433, "step": 94160 }, { "epoch": 2.749332126538299, "grad_norm": 0.5947337586027238, "learning_rate": 4.641795079751284e-06, "loss": 0.2339, "step": 94165 }, { "epoch": 2.749478109808616, "grad_norm": 0.5742533048301344, "learning_rate": 4.6390916463909165e-06, "loss": 0.2333, "step": 94170 }, { "epoch": 2.7496240930789333, "grad_norm": 0.58354570153972, "learning_rate": 4.63638821303055e-06, "loss": 0.2299, "step": 94175 }, { "epoch": 2.7497700763492503, "grad_norm": 0.5531972007651752, "learning_rate": 4.633684779670181e-06, "loss": 0.2487, "step": 94180 }, { "epoch": 2.7499160596195678, "grad_norm": 0.5392917699538067, "learning_rate": 4.630981346309813e-06, "loss": 0.2409, "step": 94185 }, { "epoch": 2.7500620428898848, "grad_norm": 0.6008912358828803, "learning_rate": 4.6282779129494466e-06, "loss": 0.236, "step": 94190 }, { "epoch": 2.7502080261602018, "grad_norm": 0.594348822109865, "learning_rate": 4.625574479589078e-06, "loss": 0.2545, "step": 94195 }, { "epoch": 2.750354009430519, "grad_norm": 0.6318523399759153, "learning_rate": 4.62287104622871e-06, "loss": 0.2427, "step": 94200 }, { "epoch": 2.7504999927008367, "grad_norm": 0.5241296640294605, "learning_rate": 4.6201676128683435e-06, "loss": 0.245, "step": 94205 }, { "epoch": 2.7506459759711537, "grad_norm": 0.612051445514853, "learning_rate": 4.617464179507976e-06, "loss": 0.2461, "step": 94210 }, { "epoch": 2.7507919592414707, "grad_norm": 0.5246059238743652, "learning_rate": 4.614760746147607e-06, "loss": 0.2401, "step": 94215 }, { "epoch": 2.750937942511788, "grad_norm": 0.5591078826213092, "learning_rate": 4.61205731278724e-06, "loss": 0.2377, "step": 94220 }, { "epoch": 2.7510839257821056, "grad_norm": 0.6283545514191216, "learning_rate": 4.609353879426873e-06, "loss": 0.246, "step": 94225 }, { "epoch": 2.7512299090524226, "grad_norm": 0.5759766896979233, "learning_rate": 4.606650446066504e-06, "loss": 0.2496, "step": 94230 }, { "epoch": 2.7513758923227396, "grad_norm": 0.5218196913848298, "learning_rate": 4.603947012706137e-06, "loss": 0.2243, "step": 94235 }, { "epoch": 2.751521875593057, "grad_norm": 0.5840433194637528, "learning_rate": 4.60124357934577e-06, "loss": 0.242, "step": 94240 }, { "epoch": 2.7516678588633745, "grad_norm": 0.590606284904302, "learning_rate": 4.598540145985401e-06, "loss": 0.2381, "step": 94245 }, { "epoch": 2.7518138421336915, "grad_norm": 0.5691992479311697, "learning_rate": 4.5958367126250335e-06, "loss": 0.2394, "step": 94250 }, { "epoch": 2.7519598254040085, "grad_norm": 0.6075884604476842, "learning_rate": 4.593133279264667e-06, "loss": 0.2554, "step": 94255 }, { "epoch": 2.752105808674326, "grad_norm": 0.6230816715941087, "learning_rate": 4.590429845904299e-06, "loss": 0.238, "step": 94260 }, { "epoch": 2.752251791944643, "grad_norm": 0.6041862899076316, "learning_rate": 4.5877264125439305e-06, "loss": 0.2355, "step": 94265 }, { "epoch": 2.7523977752149604, "grad_norm": 0.5743985945283049, "learning_rate": 4.585022979183564e-06, "loss": 0.2415, "step": 94270 }, { "epoch": 2.7525437584852774, "grad_norm": 0.5813709024194988, "learning_rate": 4.582319545823196e-06, "loss": 0.2371, "step": 94275 }, { "epoch": 2.752689741755595, "grad_norm": 0.5818285647456237, "learning_rate": 4.5796161124628274e-06, "loss": 0.2436, "step": 94280 }, { "epoch": 2.752835725025912, "grad_norm": 0.5750362860882752, "learning_rate": 4.576912679102461e-06, "loss": 0.234, "step": 94285 }, { "epoch": 2.7529817082962293, "grad_norm": 0.6318958549985808, "learning_rate": 4.574209245742093e-06, "loss": 0.2692, "step": 94290 }, { "epoch": 2.7531276915665464, "grad_norm": 0.5528907390966606, "learning_rate": 4.571505812381725e-06, "loss": 0.2378, "step": 94295 }, { "epoch": 2.753273674836864, "grad_norm": 0.5968264605319645, "learning_rate": 4.5688023790213576e-06, "loss": 0.2418, "step": 94300 }, { "epoch": 2.753419658107181, "grad_norm": 0.5652488612119161, "learning_rate": 4.56609894566099e-06, "loss": 0.2346, "step": 94305 }, { "epoch": 2.7535656413774983, "grad_norm": 0.5786186710381818, "learning_rate": 4.563395512300622e-06, "loss": 0.2445, "step": 94310 }, { "epoch": 2.7537116246478153, "grad_norm": 0.5878021244967702, "learning_rate": 4.560692078940254e-06, "loss": 0.2496, "step": 94315 }, { "epoch": 2.7538576079181327, "grad_norm": 0.6276826919507192, "learning_rate": 4.557988645579887e-06, "loss": 0.2341, "step": 94320 }, { "epoch": 2.7540035911884497, "grad_norm": 0.5611897657666279, "learning_rate": 4.555285212219519e-06, "loss": 0.2574, "step": 94325 }, { "epoch": 2.754149574458767, "grad_norm": 0.5567128652966806, "learning_rate": 4.5525817788591515e-06, "loss": 0.231, "step": 94330 }, { "epoch": 2.754295557729084, "grad_norm": 0.5913955189666547, "learning_rate": 4.549878345498784e-06, "loss": 0.2437, "step": 94335 }, { "epoch": 2.7544415409994016, "grad_norm": 0.6096443333692995, "learning_rate": 4.547174912138416e-06, "loss": 0.2308, "step": 94340 }, { "epoch": 2.7545875242697186, "grad_norm": 0.6028225424015299, "learning_rate": 4.544471478778048e-06, "loss": 0.2462, "step": 94345 }, { "epoch": 2.7547335075400357, "grad_norm": 0.6279466052210345, "learning_rate": 4.541768045417681e-06, "loss": 0.2417, "step": 94350 }, { "epoch": 2.754879490810353, "grad_norm": 0.5476198226879648, "learning_rate": 4.539064612057313e-06, "loss": 0.2346, "step": 94355 }, { "epoch": 2.7550254740806706, "grad_norm": 0.5669430621446887, "learning_rate": 4.536361178696945e-06, "loss": 0.2347, "step": 94360 }, { "epoch": 2.7551714573509876, "grad_norm": 0.5770446660522344, "learning_rate": 4.533657745336578e-06, "loss": 0.247, "step": 94365 }, { "epoch": 2.7553174406213046, "grad_norm": 0.6286975261551883, "learning_rate": 4.53095431197621e-06, "loss": 0.2468, "step": 94370 }, { "epoch": 2.755463423891622, "grad_norm": 0.5722318453326004, "learning_rate": 4.528250878615842e-06, "loss": 0.2414, "step": 94375 }, { "epoch": 2.7556094071619395, "grad_norm": 0.557784762223025, "learning_rate": 4.525547445255475e-06, "loss": 0.24, "step": 94380 }, { "epoch": 2.7557553904322565, "grad_norm": 0.615362201010635, "learning_rate": 4.522844011895107e-06, "loss": 0.2384, "step": 94385 }, { "epoch": 2.7559013737025735, "grad_norm": 0.6467799758297312, "learning_rate": 4.520140578534739e-06, "loss": 0.246, "step": 94390 }, { "epoch": 2.756047356972891, "grad_norm": 0.5813456678927339, "learning_rate": 4.517437145174372e-06, "loss": 0.24, "step": 94395 }, { "epoch": 2.7561933402432084, "grad_norm": 0.6303545170403279, "learning_rate": 4.514733711814004e-06, "loss": 0.2367, "step": 94400 }, { "epoch": 2.7563393235135254, "grad_norm": 0.569945156957648, "learning_rate": 4.512030278453636e-06, "loss": 0.238, "step": 94405 }, { "epoch": 2.7564853067838424, "grad_norm": 0.5988501162553777, "learning_rate": 4.5093268450932685e-06, "loss": 0.2391, "step": 94410 }, { "epoch": 2.75663129005416, "grad_norm": 0.5756630590967416, "learning_rate": 4.506623411732902e-06, "loss": 0.2324, "step": 94415 }, { "epoch": 2.756777273324477, "grad_norm": 0.5842207781631314, "learning_rate": 4.503919978372533e-06, "loss": 0.245, "step": 94420 }, { "epoch": 2.7569232565947943, "grad_norm": 0.6067465621915492, "learning_rate": 4.5012165450121655e-06, "loss": 0.2394, "step": 94425 }, { "epoch": 2.7570692398651113, "grad_norm": 0.604820584197496, "learning_rate": 4.498513111651799e-06, "loss": 0.2673, "step": 94430 }, { "epoch": 2.7572152231354288, "grad_norm": 0.5973969506725041, "learning_rate": 4.49580967829143e-06, "loss": 0.2426, "step": 94435 }, { "epoch": 2.757361206405746, "grad_norm": 0.5885765532333732, "learning_rate": 4.4931062449310624e-06, "loss": 0.2313, "step": 94440 }, { "epoch": 2.7575071896760632, "grad_norm": 0.6679422360813937, "learning_rate": 4.490402811570695e-06, "loss": 0.2523, "step": 94445 }, { "epoch": 2.7576531729463802, "grad_norm": 0.541098540477692, "learning_rate": 4.487699378210327e-06, "loss": 0.2413, "step": 94450 }, { "epoch": 2.7577991562166977, "grad_norm": 0.575447419050047, "learning_rate": 4.484995944849959e-06, "loss": 0.2402, "step": 94455 }, { "epoch": 2.7579451394870147, "grad_norm": 0.5930997026718214, "learning_rate": 4.482292511489592e-06, "loss": 0.2468, "step": 94460 }, { "epoch": 2.758091122757332, "grad_norm": 0.6667645847682763, "learning_rate": 4.479589078129225e-06, "loss": 0.2425, "step": 94465 }, { "epoch": 2.758237106027649, "grad_norm": 0.6350418825655698, "learning_rate": 4.476885644768856e-06, "loss": 0.2426, "step": 94470 }, { "epoch": 2.7583830892979666, "grad_norm": 0.5658750577451701, "learning_rate": 4.474182211408489e-06, "loss": 0.2408, "step": 94475 }, { "epoch": 2.7585290725682836, "grad_norm": 0.6080938282574535, "learning_rate": 4.471478778048122e-06, "loss": 0.2383, "step": 94480 }, { "epoch": 2.7586750558386006, "grad_norm": 0.5865287298145799, "learning_rate": 4.468775344687753e-06, "loss": 0.224, "step": 94485 }, { "epoch": 2.758821039108918, "grad_norm": 0.5377387298869378, "learning_rate": 4.466071911327386e-06, "loss": 0.2438, "step": 94490 }, { "epoch": 2.7589670223792355, "grad_norm": 0.6374075915260657, "learning_rate": 4.463368477967019e-06, "loss": 0.2355, "step": 94495 }, { "epoch": 2.7591130056495525, "grad_norm": 0.556183894099095, "learning_rate": 4.460665044606651e-06, "loss": 0.2436, "step": 94500 }, { "epoch": 2.7592589889198695, "grad_norm": 0.6262325214223855, "learning_rate": 4.4579616112462826e-06, "loss": 0.2328, "step": 94505 }, { "epoch": 2.759404972190187, "grad_norm": 0.5541493840564347, "learning_rate": 4.455258177885916e-06, "loss": 0.2294, "step": 94510 }, { "epoch": 2.7595509554605044, "grad_norm": 0.585612526966077, "learning_rate": 4.452554744525548e-06, "loss": 0.2198, "step": 94515 }, { "epoch": 2.7596969387308214, "grad_norm": 0.5904993140847928, "learning_rate": 4.4498513111651795e-06, "loss": 0.2453, "step": 94520 }, { "epoch": 2.7598429220011385, "grad_norm": 0.6056491130032253, "learning_rate": 4.447147877804813e-06, "loss": 0.2456, "step": 94525 }, { "epoch": 2.759988905271456, "grad_norm": 0.545587313984438, "learning_rate": 4.444444444444445e-06, "loss": 0.2263, "step": 94530 }, { "epoch": 2.7601348885417734, "grad_norm": 0.5620815250432214, "learning_rate": 4.4417410110840765e-06, "loss": 0.2528, "step": 94535 }, { "epoch": 2.7602808718120904, "grad_norm": 0.6292122114778153, "learning_rate": 4.439037577723709e-06, "loss": 0.257, "step": 94540 }, { "epoch": 2.7604268550824074, "grad_norm": 0.6352170989432958, "learning_rate": 4.436334144363342e-06, "loss": 0.2507, "step": 94545 }, { "epoch": 2.760572838352725, "grad_norm": 0.5705700678399152, "learning_rate": 4.433630711002974e-06, "loss": 0.2489, "step": 94550 }, { "epoch": 2.7607188216230423, "grad_norm": 0.5596108173826942, "learning_rate": 4.430927277642606e-06, "loss": 0.2459, "step": 94555 }, { "epoch": 2.7608648048933593, "grad_norm": 0.5980781100444994, "learning_rate": 4.428223844282239e-06, "loss": 0.2331, "step": 94560 }, { "epoch": 2.7610107881636763, "grad_norm": 0.6413180148509598, "learning_rate": 4.425520410921871e-06, "loss": 0.2532, "step": 94565 }, { "epoch": 2.7611567714339937, "grad_norm": 0.582761900030815, "learning_rate": 4.422816977561503e-06, "loss": 0.2423, "step": 94570 }, { "epoch": 2.7613027547043107, "grad_norm": 0.5694681760291977, "learning_rate": 4.420113544201136e-06, "loss": 0.2186, "step": 94575 }, { "epoch": 2.761448737974628, "grad_norm": 0.6100419388417223, "learning_rate": 4.417410110840768e-06, "loss": 0.2317, "step": 94580 }, { "epoch": 2.761594721244945, "grad_norm": 0.5992197678793815, "learning_rate": 4.4147066774804005e-06, "loss": 0.2469, "step": 94585 }, { "epoch": 2.7617407045152627, "grad_norm": 0.6415187968501359, "learning_rate": 4.412003244120033e-06, "loss": 0.2512, "step": 94590 }, { "epoch": 2.7618866877855797, "grad_norm": 0.6035274812029618, "learning_rate": 4.409299810759665e-06, "loss": 0.2629, "step": 94595 }, { "epoch": 2.762032671055897, "grad_norm": 0.5172288031960858, "learning_rate": 4.4065963773992974e-06, "loss": 0.2269, "step": 94600 }, { "epoch": 2.762178654326214, "grad_norm": 0.5400950202573445, "learning_rate": 4.40389294403893e-06, "loss": 0.2461, "step": 94605 }, { "epoch": 2.7623246375965316, "grad_norm": 0.6478879605900615, "learning_rate": 4.401189510678562e-06, "loss": 0.2372, "step": 94610 }, { "epoch": 2.7624706208668486, "grad_norm": 0.5999279021039706, "learning_rate": 4.398486077318194e-06, "loss": 0.2734, "step": 94615 }, { "epoch": 2.762616604137166, "grad_norm": 0.5761582887344117, "learning_rate": 4.395782643957827e-06, "loss": 0.2605, "step": 94620 }, { "epoch": 2.762762587407483, "grad_norm": 0.5836609609841703, "learning_rate": 4.393079210597459e-06, "loss": 0.2604, "step": 94625 }, { "epoch": 2.7629085706778005, "grad_norm": 0.5844710834835286, "learning_rate": 4.390375777237091e-06, "loss": 0.2453, "step": 94630 }, { "epoch": 2.7630545539481175, "grad_norm": 0.6538125242961448, "learning_rate": 4.387672343876724e-06, "loss": 0.2505, "step": 94635 }, { "epoch": 2.7632005372184345, "grad_norm": 0.5777721914685049, "learning_rate": 4.384968910516356e-06, "loss": 0.2353, "step": 94640 }, { "epoch": 2.763346520488752, "grad_norm": 0.5816209465370419, "learning_rate": 4.382265477155988e-06, "loss": 0.2261, "step": 94645 }, { "epoch": 2.7634925037590694, "grad_norm": 0.6277114341580146, "learning_rate": 4.379562043795621e-06, "loss": 0.2575, "step": 94650 }, { "epoch": 2.7636384870293864, "grad_norm": 0.5992176135035286, "learning_rate": 4.376858610435253e-06, "loss": 0.2475, "step": 94655 }, { "epoch": 2.7637844702997034, "grad_norm": 0.5421958307189475, "learning_rate": 4.374155177074885e-06, "loss": 0.237, "step": 94660 }, { "epoch": 2.763930453570021, "grad_norm": 0.5743918876224718, "learning_rate": 4.3714517437145176e-06, "loss": 0.2355, "step": 94665 }, { "epoch": 2.7640764368403383, "grad_norm": 0.5969748979486259, "learning_rate": 4.36874831035415e-06, "loss": 0.2399, "step": 94670 }, { "epoch": 2.7642224201106553, "grad_norm": 0.5882837698233966, "learning_rate": 4.366044876993782e-06, "loss": 0.2416, "step": 94675 }, { "epoch": 2.7643684033809723, "grad_norm": 0.5852152251583572, "learning_rate": 4.3633414436334145e-06, "loss": 0.234, "step": 94680 }, { "epoch": 2.76451438665129, "grad_norm": 0.6209680609770923, "learning_rate": 4.360638010273047e-06, "loss": 0.2453, "step": 94685 }, { "epoch": 2.7646603699216072, "grad_norm": 0.5576090321723315, "learning_rate": 4.357934576912679e-06, "loss": 0.2348, "step": 94690 }, { "epoch": 2.7648063531919242, "grad_norm": 0.5715037311660769, "learning_rate": 4.3552311435523115e-06, "loss": 0.2322, "step": 94695 }, { "epoch": 2.7649523364622413, "grad_norm": 0.6046629082656547, "learning_rate": 4.352527710191944e-06, "loss": 0.2503, "step": 94700 }, { "epoch": 2.7650983197325587, "grad_norm": 0.5532049027827309, "learning_rate": 4.349824276831576e-06, "loss": 0.2402, "step": 94705 }, { "epoch": 2.7652443030028757, "grad_norm": 0.5604116484189199, "learning_rate": 4.347120843471208e-06, "loss": 0.2237, "step": 94710 }, { "epoch": 2.765390286273193, "grad_norm": 0.5679325463739284, "learning_rate": 4.344417410110841e-06, "loss": 0.2372, "step": 94715 }, { "epoch": 2.76553626954351, "grad_norm": 0.573715247681551, "learning_rate": 4.341713976750474e-06, "loss": 0.2393, "step": 94720 }, { "epoch": 2.7656822528138276, "grad_norm": 0.6034098797786194, "learning_rate": 4.339010543390105e-06, "loss": 0.2444, "step": 94725 }, { "epoch": 2.7658282360841446, "grad_norm": 0.6007618560529351, "learning_rate": 4.336307110029738e-06, "loss": 0.251, "step": 94730 }, { "epoch": 2.765974219354462, "grad_norm": 0.6052540665821061, "learning_rate": 4.333603676669371e-06, "loss": 0.2387, "step": 94735 }, { "epoch": 2.766120202624779, "grad_norm": 0.5614725250987397, "learning_rate": 4.330900243309002e-06, "loss": 0.2411, "step": 94740 }, { "epoch": 2.7662661858950965, "grad_norm": 0.6168611883234171, "learning_rate": 4.328196809948635e-06, "loss": 0.2421, "step": 94745 }, { "epoch": 2.7664121691654135, "grad_norm": 0.6241909661616233, "learning_rate": 4.325493376588268e-06, "loss": 0.2497, "step": 94750 }, { "epoch": 2.766558152435731, "grad_norm": 0.6153475123462888, "learning_rate": 4.3227899432279e-06, "loss": 0.2259, "step": 94755 }, { "epoch": 2.766704135706048, "grad_norm": 0.6618965677302261, "learning_rate": 4.320086509867532e-06, "loss": 0.2479, "step": 94760 }, { "epoch": 2.7668501189763655, "grad_norm": 0.5731705923817166, "learning_rate": 4.317383076507165e-06, "loss": 0.2449, "step": 94765 }, { "epoch": 2.7669961022466825, "grad_norm": 0.5871490675537866, "learning_rate": 4.314679643146797e-06, "loss": 0.2372, "step": 94770 }, { "epoch": 2.767142085517, "grad_norm": 0.5879649900082535, "learning_rate": 4.3119762097864285e-06, "loss": 0.2453, "step": 94775 }, { "epoch": 2.767288068787317, "grad_norm": 0.524751820286258, "learning_rate": 4.309272776426061e-06, "loss": 0.2343, "step": 94780 }, { "epoch": 2.7674340520576344, "grad_norm": 0.6071019040024525, "learning_rate": 4.306569343065694e-06, "loss": 0.2369, "step": 94785 }, { "epoch": 2.7675800353279514, "grad_norm": 0.5947539673583427, "learning_rate": 4.3038659097053255e-06, "loss": 0.2476, "step": 94790 }, { "epoch": 2.7677260185982684, "grad_norm": 0.5813763437793928, "learning_rate": 4.301162476344958e-06, "loss": 0.2477, "step": 94795 }, { "epoch": 2.767872001868586, "grad_norm": 0.5817946445049396, "learning_rate": 4.298459042984591e-06, "loss": 0.2518, "step": 94800 }, { "epoch": 2.7680179851389033, "grad_norm": 0.5811471848209049, "learning_rate": 4.295755609624223e-06, "loss": 0.2258, "step": 94805 }, { "epoch": 2.7681639684092203, "grad_norm": 0.5813793073176354, "learning_rate": 4.293052176263855e-06, "loss": 0.2298, "step": 94810 }, { "epoch": 2.7683099516795373, "grad_norm": 0.5543401614709647, "learning_rate": 4.290348742903488e-06, "loss": 0.2225, "step": 94815 }, { "epoch": 2.7684559349498548, "grad_norm": 0.5332096539927469, "learning_rate": 4.28764530954312e-06, "loss": 0.2355, "step": 94820 }, { "epoch": 2.768601918220172, "grad_norm": 0.5882270370059233, "learning_rate": 4.284941876182752e-06, "loss": 0.241, "step": 94825 }, { "epoch": 2.768747901490489, "grad_norm": 0.5779941981334068, "learning_rate": 4.282238442822385e-06, "loss": 0.2366, "step": 94830 }, { "epoch": 2.7688938847608062, "grad_norm": 0.5784550528724801, "learning_rate": 4.279535009462017e-06, "loss": 0.2389, "step": 94835 }, { "epoch": 2.7690398680311237, "grad_norm": 0.5958066582569879, "learning_rate": 4.2768315761016495e-06, "loss": 0.2499, "step": 94840 }, { "epoch": 2.769185851301441, "grad_norm": 0.5782829695956487, "learning_rate": 4.274128142741282e-06, "loss": 0.2182, "step": 94845 }, { "epoch": 2.769331834571758, "grad_norm": 0.553111762883878, "learning_rate": 4.271424709380914e-06, "loss": 0.2328, "step": 94850 }, { "epoch": 2.769477817842075, "grad_norm": 0.5628181153138091, "learning_rate": 4.2687212760205465e-06, "loss": 0.2385, "step": 94855 }, { "epoch": 2.7696238011123926, "grad_norm": 0.6124118759243229, "learning_rate": 4.266017842660179e-06, "loss": 0.2373, "step": 94860 }, { "epoch": 2.7697697843827096, "grad_norm": 0.5829183438899463, "learning_rate": 4.263314409299811e-06, "loss": 0.2438, "step": 94865 }, { "epoch": 2.769915767653027, "grad_norm": 0.5932502826297199, "learning_rate": 4.260610975939443e-06, "loss": 0.2329, "step": 94870 }, { "epoch": 2.770061750923344, "grad_norm": 0.6023447519355207, "learning_rate": 4.257907542579075e-06, "loss": 0.2397, "step": 94875 }, { "epoch": 2.7702077341936615, "grad_norm": 0.5672312275729439, "learning_rate": 4.255204109218708e-06, "loss": 0.2366, "step": 94880 }, { "epoch": 2.7703537174639785, "grad_norm": 0.5589253613684299, "learning_rate": 4.25250067585834e-06, "loss": 0.2303, "step": 94885 }, { "epoch": 2.770499700734296, "grad_norm": 0.5434728025074036, "learning_rate": 4.249797242497973e-06, "loss": 0.2504, "step": 94890 }, { "epoch": 2.770645684004613, "grad_norm": 0.5931413678879415, "learning_rate": 4.247093809137605e-06, "loss": 0.2428, "step": 94895 }, { "epoch": 2.7707916672749304, "grad_norm": 0.606736197896348, "learning_rate": 4.244390375777237e-06, "loss": 0.2599, "step": 94900 }, { "epoch": 2.7709376505452474, "grad_norm": 0.5825511064373058, "learning_rate": 4.24168694241687e-06, "loss": 0.2299, "step": 94905 }, { "epoch": 2.771083633815565, "grad_norm": 0.5771765535000771, "learning_rate": 4.238983509056502e-06, "loss": 0.2465, "step": 94910 }, { "epoch": 2.771229617085882, "grad_norm": 0.6059558730415984, "learning_rate": 4.236280075696134e-06, "loss": 0.2436, "step": 94915 }, { "epoch": 2.7713756003561993, "grad_norm": 0.5749781497388217, "learning_rate": 4.233576642335767e-06, "loss": 0.2355, "step": 94920 }, { "epoch": 2.7715215836265163, "grad_norm": 0.577973184129675, "learning_rate": 4.230873208975399e-06, "loss": 0.2434, "step": 94925 }, { "epoch": 2.7716675668968334, "grad_norm": 0.5870169497353286, "learning_rate": 4.228169775615031e-06, "loss": 0.2478, "step": 94930 }, { "epoch": 2.771813550167151, "grad_norm": 0.6448678327882917, "learning_rate": 4.2254663422546635e-06, "loss": 0.2458, "step": 94935 }, { "epoch": 2.7719595334374683, "grad_norm": 0.5803692173141473, "learning_rate": 4.222762908894296e-06, "loss": 0.2369, "step": 94940 }, { "epoch": 2.7721055167077853, "grad_norm": 0.5787167099630798, "learning_rate": 4.220059475533928e-06, "loss": 0.2449, "step": 94945 }, { "epoch": 2.7722514999781023, "grad_norm": 0.5620267362215291, "learning_rate": 4.2173560421735605e-06, "loss": 0.2445, "step": 94950 }, { "epoch": 2.7723974832484197, "grad_norm": 0.5752618412411401, "learning_rate": 4.214652608813193e-06, "loss": 0.2155, "step": 94955 }, { "epoch": 2.772543466518737, "grad_norm": 0.6353301287271403, "learning_rate": 4.211949175452826e-06, "loss": 0.25, "step": 94960 }, { "epoch": 2.772689449789054, "grad_norm": 0.619936435401721, "learning_rate": 4.2092457420924574e-06, "loss": 0.2357, "step": 94965 }, { "epoch": 2.772835433059371, "grad_norm": 0.6272125717973112, "learning_rate": 4.20654230873209e-06, "loss": 0.2462, "step": 94970 }, { "epoch": 2.7729814163296886, "grad_norm": 0.5690114789598328, "learning_rate": 4.203838875371723e-06, "loss": 0.2477, "step": 94975 }, { "epoch": 2.773127399600006, "grad_norm": 0.6083503016382869, "learning_rate": 4.201135442011354e-06, "loss": 0.2485, "step": 94980 }, { "epoch": 2.773273382870323, "grad_norm": 0.5952663972898649, "learning_rate": 4.198432008650987e-06, "loss": 0.2522, "step": 94985 }, { "epoch": 2.77341936614064, "grad_norm": 0.554454042959324, "learning_rate": 4.19572857529062e-06, "loss": 0.2342, "step": 94990 }, { "epoch": 2.7735653494109576, "grad_norm": 0.6101561629434336, "learning_rate": 4.193025141930251e-06, "loss": 0.2553, "step": 94995 }, { "epoch": 2.7737113326812746, "grad_norm": 0.5751541505619778, "learning_rate": 4.190321708569884e-06, "loss": 0.2463, "step": 95000 }, { "epoch": 2.773857315951592, "grad_norm": 0.6242269092022921, "learning_rate": 4.187618275209516e-06, "loss": 0.2374, "step": 95005 }, { "epoch": 2.774003299221909, "grad_norm": 0.5963521013698765, "learning_rate": 4.184914841849149e-06, "loss": 0.247, "step": 95010 }, { "epoch": 2.7741492824922265, "grad_norm": 0.5773045785984622, "learning_rate": 4.182211408488781e-06, "loss": 0.2464, "step": 95015 }, { "epoch": 2.7742952657625435, "grad_norm": 0.6205406241230436, "learning_rate": 4.179507975128413e-06, "loss": 0.2407, "step": 95020 }, { "epoch": 2.774441249032861, "grad_norm": 0.5998373874961236, "learning_rate": 4.176804541768046e-06, "loss": 0.2445, "step": 95025 }, { "epoch": 2.774587232303178, "grad_norm": 0.585798058292291, "learning_rate": 4.1741011084076776e-06, "loss": 0.2399, "step": 95030 }, { "epoch": 2.7747332155734954, "grad_norm": 0.5923657525201312, "learning_rate": 4.17139767504731e-06, "loss": 0.2547, "step": 95035 }, { "epoch": 2.7748791988438124, "grad_norm": 0.5840937844598043, "learning_rate": 4.168694241686943e-06, "loss": 0.2444, "step": 95040 }, { "epoch": 2.77502518211413, "grad_norm": 0.6399001306279597, "learning_rate": 4.165990808326575e-06, "loss": 0.2464, "step": 95045 }, { "epoch": 2.775171165384447, "grad_norm": 0.5827690953027763, "learning_rate": 4.163287374966207e-06, "loss": 0.2376, "step": 95050 }, { "epoch": 2.7753171486547643, "grad_norm": 0.5861551831824362, "learning_rate": 4.16058394160584e-06, "loss": 0.2445, "step": 95055 }, { "epoch": 2.7754631319250813, "grad_norm": 0.6100105459428381, "learning_rate": 4.157880508245472e-06, "loss": 0.2364, "step": 95060 }, { "epoch": 2.7756091151953988, "grad_norm": 0.591611001917579, "learning_rate": 4.155177074885104e-06, "loss": 0.2383, "step": 95065 }, { "epoch": 2.7757550984657158, "grad_norm": 0.5830899948179512, "learning_rate": 4.152473641524737e-06, "loss": 0.2411, "step": 95070 }, { "epoch": 2.7759010817360332, "grad_norm": 0.6212979612536629, "learning_rate": 4.149770208164369e-06, "loss": 0.2575, "step": 95075 }, { "epoch": 2.7760470650063502, "grad_norm": 0.5415878093045801, "learning_rate": 4.147066774804001e-06, "loss": 0.2451, "step": 95080 }, { "epoch": 2.7761930482766672, "grad_norm": 0.5944875376701338, "learning_rate": 4.144363341443634e-06, "loss": 0.2412, "step": 95085 }, { "epoch": 2.7763390315469847, "grad_norm": 0.5993937791930396, "learning_rate": 4.141659908083266e-06, "loss": 0.2391, "step": 95090 }, { "epoch": 2.776485014817302, "grad_norm": 0.5864486799990275, "learning_rate": 4.1389564747228985e-06, "loss": 0.2317, "step": 95095 }, { "epoch": 2.776630998087619, "grad_norm": 0.5807728512482508, "learning_rate": 4.13625304136253e-06, "loss": 0.244, "step": 95100 }, { "epoch": 2.776776981357936, "grad_norm": 0.6430522086740925, "learning_rate": 4.133549608002163e-06, "loss": 0.2349, "step": 95105 }, { "epoch": 2.7769229646282536, "grad_norm": 0.5584468453205567, "learning_rate": 4.1308461746417955e-06, "loss": 0.2501, "step": 95110 }, { "epoch": 2.777068947898571, "grad_norm": 0.5478448171877465, "learning_rate": 4.128142741281427e-06, "loss": 0.2396, "step": 95115 }, { "epoch": 2.777214931168888, "grad_norm": 0.6237357593073471, "learning_rate": 4.12543930792106e-06, "loss": 0.2428, "step": 95120 }, { "epoch": 2.777360914439205, "grad_norm": 0.544810089051436, "learning_rate": 4.1227358745606924e-06, "loss": 0.2349, "step": 95125 }, { "epoch": 2.7775068977095225, "grad_norm": 0.6210793459263138, "learning_rate": 4.120032441200325e-06, "loss": 0.245, "step": 95130 }, { "epoch": 2.77765288097984, "grad_norm": 0.5446957969847981, "learning_rate": 4.117329007839957e-06, "loss": 0.2342, "step": 95135 }, { "epoch": 2.777798864250157, "grad_norm": 0.627365335118201, "learning_rate": 4.114625574479589e-06, "loss": 0.2526, "step": 95140 }, { "epoch": 2.777944847520474, "grad_norm": 0.6075776637423687, "learning_rate": 4.111922141119222e-06, "loss": 0.2504, "step": 95145 }, { "epoch": 2.7780908307907914, "grad_norm": 0.5762434917146569, "learning_rate": 4.109218707758854e-06, "loss": 0.2399, "step": 95150 }, { "epoch": 2.7782368140611085, "grad_norm": 0.5338557816047815, "learning_rate": 4.106515274398486e-06, "loss": 0.229, "step": 95155 }, { "epoch": 2.778382797331426, "grad_norm": 0.5817134803909034, "learning_rate": 4.103811841038119e-06, "loss": 0.2347, "step": 95160 }, { "epoch": 2.778528780601743, "grad_norm": 0.6236475249988764, "learning_rate": 4.101108407677751e-06, "loss": 0.2446, "step": 95165 }, { "epoch": 2.7786747638720604, "grad_norm": 0.5453329833122483, "learning_rate": 4.098404974317383e-06, "loss": 0.2198, "step": 95170 }, { "epoch": 2.7788207471423774, "grad_norm": 0.599050605525107, "learning_rate": 4.095701540957016e-06, "loss": 0.2399, "step": 95175 }, { "epoch": 2.778966730412695, "grad_norm": 0.5946683582186472, "learning_rate": 4.092998107596648e-06, "loss": 0.2333, "step": 95180 }, { "epoch": 2.779112713683012, "grad_norm": 0.5619933858448267, "learning_rate": 4.09029467423628e-06, "loss": 0.2235, "step": 95185 }, { "epoch": 2.7792586969533293, "grad_norm": 0.6303769421005783, "learning_rate": 4.0875912408759126e-06, "loss": 0.2675, "step": 95190 }, { "epoch": 2.7794046802236463, "grad_norm": 0.5632128087175948, "learning_rate": 4.084887807515545e-06, "loss": 0.236, "step": 95195 }, { "epoch": 2.7795506634939637, "grad_norm": 0.5907588474274584, "learning_rate": 4.082184374155177e-06, "loss": 0.2415, "step": 95200 }, { "epoch": 2.7796966467642807, "grad_norm": 0.6083099415148338, "learning_rate": 4.0794809407948095e-06, "loss": 0.2346, "step": 95205 }, { "epoch": 2.779842630034598, "grad_norm": 0.6156905187776115, "learning_rate": 4.076777507434442e-06, "loss": 0.2477, "step": 95210 }, { "epoch": 2.779988613304915, "grad_norm": 0.5604152504342217, "learning_rate": 4.074074074074075e-06, "loss": 0.3144, "step": 95215 }, { "epoch": 2.780134596575232, "grad_norm": 0.6173176018464217, "learning_rate": 4.0713706407137065e-06, "loss": 0.2574, "step": 95220 }, { "epoch": 2.7802805798455497, "grad_norm": 0.563712857720502, "learning_rate": 4.068667207353339e-06, "loss": 0.2398, "step": 95225 }, { "epoch": 2.780426563115867, "grad_norm": 0.6143291264695451, "learning_rate": 4.065963773992971e-06, "loss": 0.2347, "step": 95230 }, { "epoch": 2.780572546386184, "grad_norm": 0.6136735187054527, "learning_rate": 4.0632603406326034e-06, "loss": 0.2303, "step": 95235 }, { "epoch": 2.780718529656501, "grad_norm": 0.6018822634604519, "learning_rate": 4.060556907272236e-06, "loss": 0.2338, "step": 95240 }, { "epoch": 2.7808645129268186, "grad_norm": 0.6344107102847844, "learning_rate": 4.057853473911868e-06, "loss": 0.2419, "step": 95245 }, { "epoch": 2.781010496197136, "grad_norm": 0.5251654422696497, "learning_rate": 4.0551500405515e-06, "loss": 0.2217, "step": 95250 }, { "epoch": 2.781156479467453, "grad_norm": 0.5899817158547608, "learning_rate": 4.052446607191133e-06, "loss": 0.2472, "step": 95255 }, { "epoch": 2.78130246273777, "grad_norm": 0.545900025510058, "learning_rate": 4.049743173830765e-06, "loss": 0.2389, "step": 95260 }, { "epoch": 2.7814484460080875, "grad_norm": 0.5898966057629359, "learning_rate": 4.047039740470398e-06, "loss": 0.2422, "step": 95265 }, { "epoch": 2.781594429278405, "grad_norm": 0.5514857421150278, "learning_rate": 4.04433630711003e-06, "loss": 0.2376, "step": 95270 }, { "epoch": 2.781740412548722, "grad_norm": 0.5838444648458468, "learning_rate": 4.041632873749662e-06, "loss": 0.2592, "step": 95275 }, { "epoch": 2.781886395819039, "grad_norm": 0.6059155520180507, "learning_rate": 4.038929440389295e-06, "loss": 0.2568, "step": 95280 }, { "epoch": 2.7820323790893564, "grad_norm": 0.5726428683715865, "learning_rate": 4.036226007028927e-06, "loss": 0.2378, "step": 95285 }, { "epoch": 2.7821783623596734, "grad_norm": 0.5959359397765916, "learning_rate": 4.033522573668559e-06, "loss": 0.232, "step": 95290 }, { "epoch": 2.782324345629991, "grad_norm": 0.6123070634887198, "learning_rate": 4.030819140308192e-06, "loss": 0.2582, "step": 95295 }, { "epoch": 2.782470328900308, "grad_norm": 0.6421645506018401, "learning_rate": 4.028115706947824e-06, "loss": 0.2456, "step": 95300 }, { "epoch": 2.7826163121706253, "grad_norm": 0.5827315369769618, "learning_rate": 4.025412273587456e-06, "loss": 0.2453, "step": 95305 }, { "epoch": 2.7827622954409423, "grad_norm": 0.6443384307575293, "learning_rate": 4.022708840227089e-06, "loss": 0.2448, "step": 95310 }, { "epoch": 2.78290827871126, "grad_norm": 0.6103102112742, "learning_rate": 4.020005406866721e-06, "loss": 0.2405, "step": 95315 }, { "epoch": 2.783054261981577, "grad_norm": 0.5442038738584838, "learning_rate": 4.017301973506353e-06, "loss": 0.2504, "step": 95320 }, { "epoch": 2.7832002452518942, "grad_norm": 0.5628762916762067, "learning_rate": 4.014598540145985e-06, "loss": 0.2392, "step": 95325 }, { "epoch": 2.7833462285222113, "grad_norm": 0.608898978585189, "learning_rate": 4.011895106785618e-06, "loss": 0.2559, "step": 95330 }, { "epoch": 2.7834922117925287, "grad_norm": 0.5903664543470752, "learning_rate": 4.00919167342525e-06, "loss": 0.2515, "step": 95335 }, { "epoch": 2.7836381950628457, "grad_norm": 0.5855298893560718, "learning_rate": 4.006488240064882e-06, "loss": 0.2509, "step": 95340 }, { "epoch": 2.783784178333163, "grad_norm": 0.5447944633055323, "learning_rate": 4.003784806704515e-06, "loss": 0.2279, "step": 95345 }, { "epoch": 2.78393016160348, "grad_norm": 0.617635905796336, "learning_rate": 4.0010813733441476e-06, "loss": 0.2421, "step": 95350 }, { "epoch": 2.7840761448737976, "grad_norm": 0.5788176952603068, "learning_rate": 3.998377939983779e-06, "loss": 0.2325, "step": 95355 }, { "epoch": 2.7842221281441146, "grad_norm": 0.5868147292807552, "learning_rate": 3.995674506623412e-06, "loss": 0.2373, "step": 95360 }, { "epoch": 2.784368111414432, "grad_norm": 0.5489213751832949, "learning_rate": 3.9929710732630445e-06, "loss": 0.2312, "step": 95365 }, { "epoch": 2.784514094684749, "grad_norm": 0.5723812095576634, "learning_rate": 3.990267639902676e-06, "loss": 0.2442, "step": 95370 }, { "epoch": 2.784660077955066, "grad_norm": 0.539726176287348, "learning_rate": 3.987564206542309e-06, "loss": 0.2404, "step": 95375 }, { "epoch": 2.7848060612253835, "grad_norm": 0.5361361084528088, "learning_rate": 3.9848607731819415e-06, "loss": 0.2406, "step": 95380 }, { "epoch": 2.784952044495701, "grad_norm": 0.5379938949777396, "learning_rate": 3.982157339821574e-06, "loss": 0.2313, "step": 95385 }, { "epoch": 2.785098027766018, "grad_norm": 0.6334187458635805, "learning_rate": 3.979453906461206e-06, "loss": 0.248, "step": 95390 }, { "epoch": 2.785244011036335, "grad_norm": 0.5626014188731469, "learning_rate": 3.9767504731008384e-06, "loss": 0.2541, "step": 95395 }, { "epoch": 2.7853899943066525, "grad_norm": 0.6023755919692334, "learning_rate": 3.974047039740471e-06, "loss": 0.2498, "step": 95400 }, { "epoch": 2.78553597757697, "grad_norm": 0.5694908131166788, "learning_rate": 3.971343606380103e-06, "loss": 0.2292, "step": 95405 }, { "epoch": 2.785681960847287, "grad_norm": 0.570206233755112, "learning_rate": 3.968640173019735e-06, "loss": 0.2432, "step": 95410 }, { "epoch": 2.785827944117604, "grad_norm": 0.6259487997244505, "learning_rate": 3.965936739659368e-06, "loss": 0.2361, "step": 95415 }, { "epoch": 2.7859739273879214, "grad_norm": 0.6229614183947645, "learning_rate": 3.963233306298999e-06, "loss": 0.2316, "step": 95420 }, { "epoch": 2.786119910658239, "grad_norm": 0.5913044923865692, "learning_rate": 3.960529872938632e-06, "loss": 0.2415, "step": 95425 }, { "epoch": 2.786265893928556, "grad_norm": 0.6063115315649119, "learning_rate": 3.957826439578265e-06, "loss": 0.2431, "step": 95430 }, { "epoch": 2.786411877198873, "grad_norm": 0.562964137001157, "learning_rate": 3.955123006217897e-06, "loss": 0.2242, "step": 95435 }, { "epoch": 2.7865578604691903, "grad_norm": 0.6373633059727755, "learning_rate": 3.952419572857529e-06, "loss": 0.2403, "step": 95440 }, { "epoch": 2.7867038437395073, "grad_norm": 0.5489166966462133, "learning_rate": 3.949716139497162e-06, "loss": 0.2311, "step": 95445 }, { "epoch": 2.7868498270098248, "grad_norm": 0.6143646830185818, "learning_rate": 3.947012706136794e-06, "loss": 0.2331, "step": 95450 }, { "epoch": 2.7869958102801418, "grad_norm": 0.609362740310146, "learning_rate": 3.944309272776426e-06, "loss": 0.2318, "step": 95455 }, { "epoch": 2.787141793550459, "grad_norm": 0.5884766683094295, "learning_rate": 3.9416058394160585e-06, "loss": 0.2587, "step": 95460 }, { "epoch": 2.787287776820776, "grad_norm": 0.5447798908746712, "learning_rate": 3.938902406055691e-06, "loss": 0.2294, "step": 95465 }, { "epoch": 2.7874337600910937, "grad_norm": 0.5917964541745169, "learning_rate": 3.936198972695323e-06, "loss": 0.2494, "step": 95470 }, { "epoch": 2.7875797433614107, "grad_norm": 0.6400364709767147, "learning_rate": 3.9334955393349555e-06, "loss": 0.2488, "step": 95475 }, { "epoch": 2.787725726631728, "grad_norm": 0.6114755249261833, "learning_rate": 3.930792105974588e-06, "loss": 0.2538, "step": 95480 }, { "epoch": 2.787871709902045, "grad_norm": 0.569382325639863, "learning_rate": 3.92808867261422e-06, "loss": 0.2487, "step": 95485 }, { "epoch": 2.7880176931723626, "grad_norm": 0.5827796653018946, "learning_rate": 3.9253852392538525e-06, "loss": 0.2428, "step": 95490 }, { "epoch": 2.7881636764426796, "grad_norm": 0.6030770178634565, "learning_rate": 3.922681805893485e-06, "loss": 0.2362, "step": 95495 }, { "epoch": 2.788309659712997, "grad_norm": 0.6293640599507065, "learning_rate": 3.919978372533117e-06, "loss": 0.248, "step": 95500 }, { "epoch": 2.788455642983314, "grad_norm": 0.5888833830882768, "learning_rate": 3.917274939172749e-06, "loss": 0.2353, "step": 95505 }, { "epoch": 2.788601626253631, "grad_norm": 0.6263068925641264, "learning_rate": 3.914571505812382e-06, "loss": 0.2568, "step": 95510 }, { "epoch": 2.7887476095239485, "grad_norm": 0.6027859032943247, "learning_rate": 3.911868072452014e-06, "loss": 0.2427, "step": 95515 }, { "epoch": 2.788893592794266, "grad_norm": 0.5492523656522983, "learning_rate": 3.909164639091647e-06, "loss": 0.244, "step": 95520 }, { "epoch": 2.789039576064583, "grad_norm": 0.5590971985715355, "learning_rate": 3.906461205731279e-06, "loss": 0.2414, "step": 95525 }, { "epoch": 2.7891855593349, "grad_norm": 0.5848079133628431, "learning_rate": 3.903757772370911e-06, "loss": 0.257, "step": 95530 }, { "epoch": 2.7893315426052174, "grad_norm": 0.6135693174904897, "learning_rate": 3.901054339010544e-06, "loss": 0.2459, "step": 95535 }, { "epoch": 2.789477525875535, "grad_norm": 0.5838236995557958, "learning_rate": 3.898350905650176e-06, "loss": 0.2435, "step": 95540 }, { "epoch": 2.789623509145852, "grad_norm": 0.5594550534024292, "learning_rate": 3.895647472289808e-06, "loss": 0.2239, "step": 95545 }, { "epoch": 2.789769492416169, "grad_norm": 0.5797002372339182, "learning_rate": 3.89294403892944e-06, "loss": 0.2419, "step": 95550 }, { "epoch": 2.7899154756864863, "grad_norm": 0.573140863572537, "learning_rate": 3.8902406055690734e-06, "loss": 0.2373, "step": 95555 }, { "epoch": 2.790061458956804, "grad_norm": 0.5750565999922049, "learning_rate": 3.887537172208705e-06, "loss": 0.2362, "step": 95560 }, { "epoch": 2.790207442227121, "grad_norm": 0.630317425502159, "learning_rate": 3.884833738848337e-06, "loss": 0.2555, "step": 95565 }, { "epoch": 2.790353425497438, "grad_norm": 0.5805549875793248, "learning_rate": 3.88213030548797e-06, "loss": 0.2297, "step": 95570 }, { "epoch": 2.7904994087677553, "grad_norm": 0.636514475336435, "learning_rate": 3.879426872127602e-06, "loss": 0.2603, "step": 95575 }, { "epoch": 2.7906453920380727, "grad_norm": 0.5758784094045396, "learning_rate": 3.876723438767234e-06, "loss": 0.2385, "step": 95580 }, { "epoch": 2.7907913753083897, "grad_norm": 0.5771612757573944, "learning_rate": 3.874020005406867e-06, "loss": 0.2425, "step": 95585 }, { "epoch": 2.7909373585787067, "grad_norm": 0.57197241816521, "learning_rate": 3.8713165720465e-06, "loss": 0.2406, "step": 95590 }, { "epoch": 2.791083341849024, "grad_norm": 0.5357593437428675, "learning_rate": 3.868613138686131e-06, "loss": 0.2451, "step": 95595 }, { "epoch": 2.791229325119341, "grad_norm": 0.5566234408034104, "learning_rate": 3.865909705325764e-06, "loss": 0.2311, "step": 95600 }, { "epoch": 2.7913753083896586, "grad_norm": 0.6505349877651623, "learning_rate": 3.863206271965397e-06, "loss": 0.2369, "step": 95605 }, { "epoch": 2.7915212916599756, "grad_norm": 0.648777206490879, "learning_rate": 3.860502838605028e-06, "loss": 0.241, "step": 95610 }, { "epoch": 2.791667274930293, "grad_norm": 0.6097668822191168, "learning_rate": 3.857799405244661e-06, "loss": 0.234, "step": 95615 }, { "epoch": 2.79181325820061, "grad_norm": 0.6017883646143135, "learning_rate": 3.8550959718842935e-06, "loss": 0.2268, "step": 95620 }, { "epoch": 2.7919592414709276, "grad_norm": 0.5525149734793231, "learning_rate": 3.852392538523925e-06, "loss": 0.2309, "step": 95625 }, { "epoch": 2.7921052247412446, "grad_norm": 0.5625124554718406, "learning_rate": 3.849689105163558e-06, "loss": 0.234, "step": 95630 }, { "epoch": 2.792251208011562, "grad_norm": 0.5917872806708369, "learning_rate": 3.8469856718031905e-06, "loss": 0.2344, "step": 95635 }, { "epoch": 2.792397191281879, "grad_norm": 0.600435112964363, "learning_rate": 3.844282238442823e-06, "loss": 0.2487, "step": 95640 }, { "epoch": 2.7925431745521965, "grad_norm": 0.5382343439653786, "learning_rate": 3.841578805082455e-06, "loss": 0.2342, "step": 95645 }, { "epoch": 2.7926891578225135, "grad_norm": 0.5714105774972826, "learning_rate": 3.8388753717220875e-06, "loss": 0.2437, "step": 95650 }, { "epoch": 2.792835141092831, "grad_norm": 0.627115696160673, "learning_rate": 3.83617193836172e-06, "loss": 0.2366, "step": 95655 }, { "epoch": 2.792981124363148, "grad_norm": 0.6504873664447902, "learning_rate": 3.833468505001351e-06, "loss": 0.2616, "step": 95660 }, { "epoch": 2.793127107633465, "grad_norm": 0.5779644182379458, "learning_rate": 3.830765071640984e-06, "loss": 0.2421, "step": 95665 }, { "epoch": 2.7932730909037824, "grad_norm": 0.6716085001736474, "learning_rate": 3.828061638280617e-06, "loss": 0.2462, "step": 95670 }, { "epoch": 2.7934190741741, "grad_norm": 0.6081519863125041, "learning_rate": 3.825358204920249e-06, "loss": 0.2539, "step": 95675 }, { "epoch": 2.793565057444417, "grad_norm": 0.5766523999385068, "learning_rate": 3.822654771559881e-06, "loss": 0.24, "step": 95680 }, { "epoch": 2.793711040714734, "grad_norm": 0.5766267377026725, "learning_rate": 3.819951338199514e-06, "loss": 0.229, "step": 95685 }, { "epoch": 2.7938570239850513, "grad_norm": 0.5820099490652628, "learning_rate": 3.817247904839146e-06, "loss": 0.2479, "step": 95690 }, { "epoch": 2.7940030072553688, "grad_norm": 0.588083133201547, "learning_rate": 3.814544471478778e-06, "loss": 0.2313, "step": 95695 }, { "epoch": 2.7941489905256858, "grad_norm": 0.5836338196518177, "learning_rate": 3.8118410381184106e-06, "loss": 0.2417, "step": 95700 }, { "epoch": 2.794294973796003, "grad_norm": 0.5823739513255184, "learning_rate": 3.809137604758043e-06, "loss": 0.2471, "step": 95705 }, { "epoch": 2.7944409570663202, "grad_norm": 0.5863880433426389, "learning_rate": 3.806434171397675e-06, "loss": 0.2291, "step": 95710 }, { "epoch": 2.7945869403366377, "grad_norm": 0.5201626814679619, "learning_rate": 3.8037307380373076e-06, "loss": 0.2365, "step": 95715 }, { "epoch": 2.7947329236069547, "grad_norm": 0.5895641145020724, "learning_rate": 3.80102730467694e-06, "loss": 0.2406, "step": 95720 }, { "epoch": 2.7948789068772717, "grad_norm": 0.592543619497287, "learning_rate": 3.7983238713165726e-06, "loss": 0.2442, "step": 95725 }, { "epoch": 2.795024890147589, "grad_norm": 0.5990405530684006, "learning_rate": 3.7956204379562045e-06, "loss": 0.2473, "step": 95730 }, { "epoch": 2.795170873417906, "grad_norm": 0.6065411495974845, "learning_rate": 3.792917004595837e-06, "loss": 0.2414, "step": 95735 }, { "epoch": 2.7953168566882236, "grad_norm": 0.609726576598584, "learning_rate": 3.7902135712354696e-06, "loss": 0.2359, "step": 95740 }, { "epoch": 2.7954628399585406, "grad_norm": 0.5870996590075525, "learning_rate": 3.7875101378751015e-06, "loss": 0.2466, "step": 95745 }, { "epoch": 2.795608823228858, "grad_norm": 0.6255189364279095, "learning_rate": 3.784806704514734e-06, "loss": 0.2528, "step": 95750 }, { "epoch": 2.795754806499175, "grad_norm": 0.660378003683659, "learning_rate": 3.7821032711543665e-06, "loss": 0.2473, "step": 95755 }, { "epoch": 2.7959007897694925, "grad_norm": 0.578162344942495, "learning_rate": 3.779399837793999e-06, "loss": 0.2343, "step": 95760 }, { "epoch": 2.7960467730398095, "grad_norm": 0.5909737307374477, "learning_rate": 3.7766964044336308e-06, "loss": 0.2387, "step": 95765 }, { "epoch": 2.796192756310127, "grad_norm": 0.6089008543178103, "learning_rate": 3.773992971073263e-06, "loss": 0.2518, "step": 95770 }, { "epoch": 2.796338739580444, "grad_norm": 0.6142770379774999, "learning_rate": 3.771289537712896e-06, "loss": 0.2515, "step": 95775 }, { "epoch": 2.7964847228507614, "grad_norm": 0.6059037457235159, "learning_rate": 3.7685861043525277e-06, "loss": 0.2543, "step": 95780 }, { "epoch": 2.7966307061210784, "grad_norm": 0.6127098326850221, "learning_rate": 3.76588267099216e-06, "loss": 0.2575, "step": 95785 }, { "epoch": 2.796776689391396, "grad_norm": 0.5900647315624014, "learning_rate": 3.7631792376317928e-06, "loss": 0.2472, "step": 95790 }, { "epoch": 2.796922672661713, "grad_norm": 0.5705392739434197, "learning_rate": 3.7604758042714247e-06, "loss": 0.2463, "step": 95795 }, { "epoch": 2.79706865593203, "grad_norm": 0.5748237654307898, "learning_rate": 3.757772370911057e-06, "loss": 0.2417, "step": 95800 }, { "epoch": 2.7972146392023474, "grad_norm": 0.6538232542256377, "learning_rate": 3.7550689375506897e-06, "loss": 0.2493, "step": 95805 }, { "epoch": 2.797360622472665, "grad_norm": 0.6019553557395093, "learning_rate": 3.752365504190322e-06, "loss": 0.2351, "step": 95810 }, { "epoch": 2.797506605742982, "grad_norm": 0.5732605654960392, "learning_rate": 3.749662070829954e-06, "loss": 0.234, "step": 95815 }, { "epoch": 2.797652589013299, "grad_norm": 0.6116789535787533, "learning_rate": 3.7469586374695867e-06, "loss": 0.2418, "step": 95820 }, { "epoch": 2.7977985722836163, "grad_norm": 0.527999591218121, "learning_rate": 3.744255204109219e-06, "loss": 0.2328, "step": 95825 }, { "epoch": 2.7979445555539337, "grad_norm": 0.5769451475827414, "learning_rate": 3.741551770748851e-06, "loss": 0.2596, "step": 95830 }, { "epoch": 2.7980905388242507, "grad_norm": 0.5338208256909305, "learning_rate": 3.7388483373884836e-06, "loss": 0.2299, "step": 95835 }, { "epoch": 2.7982365220945677, "grad_norm": 0.6095446458736721, "learning_rate": 3.736144904028116e-06, "loss": 0.2347, "step": 95840 }, { "epoch": 2.798382505364885, "grad_norm": 0.6176937137912795, "learning_rate": 3.7334414706677487e-06, "loss": 0.2504, "step": 95845 }, { "epoch": 2.7985284886352026, "grad_norm": 0.6447244798782802, "learning_rate": 3.7307380373073806e-06, "loss": 0.2608, "step": 95850 }, { "epoch": 2.7986744719055197, "grad_norm": 0.5671107932994621, "learning_rate": 3.728034603947013e-06, "loss": 0.2243, "step": 95855 }, { "epoch": 2.7988204551758367, "grad_norm": 0.6221655490099927, "learning_rate": 3.7253311705866456e-06, "loss": 0.2358, "step": 95860 }, { "epoch": 2.798966438446154, "grad_norm": 0.5691658307700884, "learning_rate": 3.722627737226277e-06, "loss": 0.2262, "step": 95865 }, { "epoch": 2.7991124217164716, "grad_norm": 0.5801471194880539, "learning_rate": 3.71992430386591e-06, "loss": 0.2396, "step": 95870 }, { "epoch": 2.7992584049867886, "grad_norm": 0.5487820762379854, "learning_rate": 3.7172208705055426e-06, "loss": 0.2354, "step": 95875 }, { "epoch": 2.7994043882571056, "grad_norm": 0.574003306825659, "learning_rate": 3.714517437145174e-06, "loss": 0.2365, "step": 95880 }, { "epoch": 2.799550371527423, "grad_norm": 0.6186688657363736, "learning_rate": 3.7118140037848068e-06, "loss": 0.2361, "step": 95885 }, { "epoch": 2.79969635479774, "grad_norm": 0.5599890029278727, "learning_rate": 3.709110570424439e-06, "loss": 0.2347, "step": 95890 }, { "epoch": 2.7998423380680575, "grad_norm": 0.5529837808972379, "learning_rate": 3.706407137064072e-06, "loss": 0.2382, "step": 95895 }, { "epoch": 2.7999883213383745, "grad_norm": 0.6449847382495645, "learning_rate": 3.7037037037037037e-06, "loss": 0.2394, "step": 95900 }, { "epoch": 2.800134304608692, "grad_norm": 0.6144015197968384, "learning_rate": 3.701000270343336e-06, "loss": 0.2569, "step": 95905 }, { "epoch": 2.800280287879009, "grad_norm": 0.6242192021075288, "learning_rate": 3.698296836982969e-06, "loss": 0.2597, "step": 95910 }, { "epoch": 2.8004262711493264, "grad_norm": 0.5809520837859888, "learning_rate": 3.6955934036226007e-06, "loss": 0.2599, "step": 95915 }, { "epoch": 2.8005722544196434, "grad_norm": 0.6353248120673403, "learning_rate": 3.692889970262233e-06, "loss": 0.249, "step": 95920 }, { "epoch": 2.800718237689961, "grad_norm": 0.5735667050023995, "learning_rate": 3.6901865369018657e-06, "loss": 0.2459, "step": 95925 }, { "epoch": 2.800864220960278, "grad_norm": 0.6185870605406424, "learning_rate": 3.687483103541498e-06, "loss": 0.2454, "step": 95930 }, { "epoch": 2.8010102042305953, "grad_norm": 0.5564690570268833, "learning_rate": 3.68477967018113e-06, "loss": 0.2303, "step": 95935 }, { "epoch": 2.8011561875009123, "grad_norm": 0.587552648595175, "learning_rate": 3.6820762368207627e-06, "loss": 0.2434, "step": 95940 }, { "epoch": 2.80130217077123, "grad_norm": 0.563732249728676, "learning_rate": 3.679372803460395e-06, "loss": 0.2508, "step": 95945 }, { "epoch": 2.801448154041547, "grad_norm": 0.5940412685464096, "learning_rate": 3.676669370100027e-06, "loss": 0.2478, "step": 95950 }, { "epoch": 2.801594137311864, "grad_norm": 0.6279332844061217, "learning_rate": 3.6739659367396597e-06, "loss": 0.2469, "step": 95955 }, { "epoch": 2.8017401205821812, "grad_norm": 0.6058426643259283, "learning_rate": 3.671262503379292e-06, "loss": 0.2375, "step": 95960 }, { "epoch": 2.8018861038524987, "grad_norm": 0.6406117904077894, "learning_rate": 3.668559070018924e-06, "loss": 0.2443, "step": 95965 }, { "epoch": 2.8020320871228157, "grad_norm": 0.616736809588044, "learning_rate": 3.6658556366585566e-06, "loss": 0.2491, "step": 95970 }, { "epoch": 2.8021780703931327, "grad_norm": 0.6129205209263526, "learning_rate": 3.663152203298189e-06, "loss": 0.2482, "step": 95975 }, { "epoch": 2.80232405366345, "grad_norm": 0.6143110045534458, "learning_rate": 3.6604487699378217e-06, "loss": 0.2431, "step": 95980 }, { "epoch": 2.8024700369337676, "grad_norm": 0.6215928670423502, "learning_rate": 3.657745336577453e-06, "loss": 0.2584, "step": 95985 }, { "epoch": 2.8026160202040846, "grad_norm": 0.5604595821977864, "learning_rate": 3.655041903217086e-06, "loss": 0.2401, "step": 95990 }, { "epoch": 2.8027620034744016, "grad_norm": 0.5897097633203092, "learning_rate": 3.652338469856718e-06, "loss": 0.2372, "step": 95995 }, { "epoch": 2.802907986744719, "grad_norm": 0.6334475724813355, "learning_rate": 3.64963503649635e-06, "loss": 0.2521, "step": 96000 }, { "epoch": 2.8030539700150365, "grad_norm": 0.5738316242904958, "learning_rate": 3.646931603135983e-06, "loss": 0.2554, "step": 96005 }, { "epoch": 2.8031999532853535, "grad_norm": 0.6067926100459613, "learning_rate": 3.644228169775615e-06, "loss": 0.2478, "step": 96010 }, { "epoch": 2.8033459365556705, "grad_norm": 0.5899587934479082, "learning_rate": 3.641524736415248e-06, "loss": 0.2623, "step": 96015 }, { "epoch": 2.803491919825988, "grad_norm": 0.5315839693152882, "learning_rate": 3.6388213030548798e-06, "loss": 0.234, "step": 96020 }, { "epoch": 2.803637903096305, "grad_norm": 0.565326732178002, "learning_rate": 3.636117869694512e-06, "loss": 0.2461, "step": 96025 }, { "epoch": 2.8037838863666225, "grad_norm": 0.621517441603281, "learning_rate": 3.633414436334145e-06, "loss": 0.2647, "step": 96030 }, { "epoch": 2.8039298696369395, "grad_norm": 0.5951189608118973, "learning_rate": 3.6307110029737767e-06, "loss": 0.2402, "step": 96035 }, { "epoch": 2.804075852907257, "grad_norm": 0.6055623968702741, "learning_rate": 3.628007569613409e-06, "loss": 0.2479, "step": 96040 }, { "epoch": 2.804221836177574, "grad_norm": 0.5955418329301202, "learning_rate": 3.6253041362530418e-06, "loss": 0.259, "step": 96045 }, { "epoch": 2.8043678194478914, "grad_norm": 0.5728540764844406, "learning_rate": 3.6226007028926737e-06, "loss": 0.2456, "step": 96050 }, { "epoch": 2.8045138027182084, "grad_norm": 0.5671313390494704, "learning_rate": 3.619897269532306e-06, "loss": 0.2443, "step": 96055 }, { "epoch": 2.804659785988526, "grad_norm": 0.542482856637249, "learning_rate": 3.6171938361719387e-06, "loss": 0.2327, "step": 96060 }, { "epoch": 2.804805769258843, "grad_norm": 0.6492607750571673, "learning_rate": 3.614490402811571e-06, "loss": 0.2376, "step": 96065 }, { "epoch": 2.8049517525291603, "grad_norm": 0.6182094664398946, "learning_rate": 3.611786969451203e-06, "loss": 0.243, "step": 96070 }, { "epoch": 2.8050977357994773, "grad_norm": 0.6044666855123846, "learning_rate": 3.6090835360908357e-06, "loss": 0.2337, "step": 96075 }, { "epoch": 2.8052437190697947, "grad_norm": 0.5707182020128575, "learning_rate": 3.606380102730468e-06, "loss": 0.2324, "step": 96080 }, { "epoch": 2.8053897023401118, "grad_norm": 0.5876789065661088, "learning_rate": 3.6036766693701e-06, "loss": 0.2384, "step": 96085 }, { "epoch": 2.805535685610429, "grad_norm": 0.5573470178072987, "learning_rate": 3.6009732360097326e-06, "loss": 0.2213, "step": 96090 }, { "epoch": 2.805681668880746, "grad_norm": 0.509528119206994, "learning_rate": 3.598269802649365e-06, "loss": 0.2297, "step": 96095 }, { "epoch": 2.8058276521510637, "grad_norm": 0.6262599560602481, "learning_rate": 3.5955663692889977e-06, "loss": 0.2569, "step": 96100 }, { "epoch": 2.8059736354213807, "grad_norm": 0.5693724277850977, "learning_rate": 3.592862935928629e-06, "loss": 0.2368, "step": 96105 }, { "epoch": 2.8061196186916977, "grad_norm": 0.5541037095955803, "learning_rate": 3.590159502568262e-06, "loss": 0.2277, "step": 96110 }, { "epoch": 2.806265601962015, "grad_norm": 0.6227099475412566, "learning_rate": 3.5874560692078942e-06, "loss": 0.2455, "step": 96115 }, { "epoch": 2.8064115852323326, "grad_norm": 0.5394226186283954, "learning_rate": 3.584752635847526e-06, "loss": 0.2284, "step": 96120 }, { "epoch": 2.8065575685026496, "grad_norm": 0.6081643414669603, "learning_rate": 3.582049202487159e-06, "loss": 0.2421, "step": 96125 }, { "epoch": 2.8067035517729666, "grad_norm": 0.5576905924965179, "learning_rate": 3.579345769126791e-06, "loss": 0.2422, "step": 96130 }, { "epoch": 2.806849535043284, "grad_norm": 0.5518710109486163, "learning_rate": 3.576642335766423e-06, "loss": 0.235, "step": 96135 }, { "epoch": 2.8069955183136015, "grad_norm": 0.5511827317371911, "learning_rate": 3.573938902406056e-06, "loss": 0.2427, "step": 96140 }, { "epoch": 2.8071415015839185, "grad_norm": 0.603465095675751, "learning_rate": 3.571235469045688e-06, "loss": 0.24, "step": 96145 }, { "epoch": 2.8072874848542355, "grad_norm": 0.6150278180604601, "learning_rate": 3.568532035685321e-06, "loss": 0.2463, "step": 96150 }, { "epoch": 2.807433468124553, "grad_norm": 0.6018295843110397, "learning_rate": 3.5658286023249528e-06, "loss": 0.2353, "step": 96155 }, { "epoch": 2.8075794513948704, "grad_norm": 0.5927009209074873, "learning_rate": 3.563125168964585e-06, "loss": 0.2492, "step": 96160 }, { "epoch": 2.8077254346651874, "grad_norm": 0.5745920492703066, "learning_rate": 3.560421735604218e-06, "loss": 0.2174, "step": 96165 }, { "epoch": 2.8078714179355044, "grad_norm": 0.5977441243516897, "learning_rate": 3.5577183022438497e-06, "loss": 0.2425, "step": 96170 }, { "epoch": 2.808017401205822, "grad_norm": 0.559947979885725, "learning_rate": 3.555014868883482e-06, "loss": 0.253, "step": 96175 }, { "epoch": 2.808163384476139, "grad_norm": 0.6117693874934849, "learning_rate": 3.5523114355231148e-06, "loss": 0.2384, "step": 96180 }, { "epoch": 2.8083093677464563, "grad_norm": 0.6179903592458784, "learning_rate": 3.549608002162747e-06, "loss": 0.2242, "step": 96185 }, { "epoch": 2.8084553510167733, "grad_norm": 0.6017306357054166, "learning_rate": 3.546904568802379e-06, "loss": 0.2446, "step": 96190 }, { "epoch": 2.808601334287091, "grad_norm": 0.5909270591431185, "learning_rate": 3.5442011354420117e-06, "loss": 0.2355, "step": 96195 }, { "epoch": 2.808747317557408, "grad_norm": 0.6084377796673037, "learning_rate": 3.541497702081644e-06, "loss": 0.2354, "step": 96200 }, { "epoch": 2.8088933008277253, "grad_norm": 0.5833120840672444, "learning_rate": 3.538794268721276e-06, "loss": 0.2464, "step": 96205 }, { "epoch": 2.8090392840980423, "grad_norm": 0.6250967015221972, "learning_rate": 3.5360908353609083e-06, "loss": 0.2383, "step": 96210 }, { "epoch": 2.8091852673683597, "grad_norm": 0.5854151726341075, "learning_rate": 3.533387402000541e-06, "loss": 0.2286, "step": 96215 }, { "epoch": 2.8093312506386767, "grad_norm": 0.6213543302134742, "learning_rate": 3.5306839686401737e-06, "loss": 0.2423, "step": 96220 }, { "epoch": 2.809477233908994, "grad_norm": 0.5781515119284264, "learning_rate": 3.527980535279805e-06, "loss": 0.2298, "step": 96225 }, { "epoch": 2.809623217179311, "grad_norm": 0.6045963837279733, "learning_rate": 3.525277101919438e-06, "loss": 0.2432, "step": 96230 }, { "epoch": 2.8097692004496286, "grad_norm": 0.5908795794630819, "learning_rate": 3.5225736685590703e-06, "loss": 0.2329, "step": 96235 }, { "epoch": 2.8099151837199456, "grad_norm": 0.5994349256437508, "learning_rate": 3.519870235198702e-06, "loss": 0.252, "step": 96240 }, { "epoch": 2.8100611669902626, "grad_norm": 0.5743857875900158, "learning_rate": 3.517166801838335e-06, "loss": 0.2392, "step": 96245 }, { "epoch": 2.81020715026058, "grad_norm": 0.6315453993604834, "learning_rate": 3.5144633684779672e-06, "loss": 0.2529, "step": 96250 }, { "epoch": 2.8103531335308976, "grad_norm": 0.6554229356127964, "learning_rate": 3.511759935117599e-06, "loss": 0.254, "step": 96255 }, { "epoch": 2.8104991168012146, "grad_norm": 0.6187216951535103, "learning_rate": 3.509056501757232e-06, "loss": 0.2365, "step": 96260 }, { "epoch": 2.8106451000715316, "grad_norm": 0.6190785237227321, "learning_rate": 3.506353068396864e-06, "loss": 0.2433, "step": 96265 }, { "epoch": 2.810791083341849, "grad_norm": 0.5726816182210643, "learning_rate": 3.503649635036497e-06, "loss": 0.2284, "step": 96270 }, { "epoch": 2.8109370666121665, "grad_norm": 0.5966146475398939, "learning_rate": 3.500946201676129e-06, "loss": 0.2449, "step": 96275 }, { "epoch": 2.8110830498824835, "grad_norm": 0.609995467315303, "learning_rate": 3.498242768315761e-06, "loss": 0.2495, "step": 96280 }, { "epoch": 2.8112290331528005, "grad_norm": 0.6372176426186327, "learning_rate": 3.495539334955394e-06, "loss": 0.2439, "step": 96285 }, { "epoch": 2.811375016423118, "grad_norm": 0.5881849014641826, "learning_rate": 3.4928359015950258e-06, "loss": 0.2512, "step": 96290 }, { "epoch": 2.8115209996934354, "grad_norm": 0.6033570263563869, "learning_rate": 3.490132468234658e-06, "loss": 0.2461, "step": 96295 }, { "epoch": 2.8116669829637524, "grad_norm": 0.598386159026042, "learning_rate": 3.487429034874291e-06, "loss": 0.2391, "step": 96300 }, { "epoch": 2.8118129662340694, "grad_norm": 0.5970199795699906, "learning_rate": 3.484725601513923e-06, "loss": 0.2435, "step": 96305 }, { "epoch": 2.811958949504387, "grad_norm": 0.6438863782253021, "learning_rate": 3.482022168153555e-06, "loss": 0.2612, "step": 96310 }, { "epoch": 2.812104932774704, "grad_norm": 0.548634567185984, "learning_rate": 3.4793187347931878e-06, "loss": 0.2394, "step": 96315 }, { "epoch": 2.8122509160450213, "grad_norm": 0.5878752296601435, "learning_rate": 3.47661530143282e-06, "loss": 0.2499, "step": 96320 }, { "epoch": 2.8123968993153383, "grad_norm": 0.5820695150568592, "learning_rate": 3.473911868072452e-06, "loss": 0.2431, "step": 96325 }, { "epoch": 2.8125428825856558, "grad_norm": 0.6040083996701398, "learning_rate": 3.4712084347120843e-06, "loss": 0.2433, "step": 96330 }, { "epoch": 2.8126888658559728, "grad_norm": 0.5927662037871932, "learning_rate": 3.468505001351717e-06, "loss": 0.2406, "step": 96335 }, { "epoch": 2.8128348491262902, "grad_norm": 0.6343963486247551, "learning_rate": 3.465801567991349e-06, "loss": 0.2468, "step": 96340 }, { "epoch": 2.8129808323966072, "grad_norm": 0.579868570258642, "learning_rate": 3.4630981346309812e-06, "loss": 0.2455, "step": 96345 }, { "epoch": 2.8131268156669247, "grad_norm": 0.6186123615813448, "learning_rate": 3.460394701270614e-06, "loss": 0.257, "step": 96350 }, { "epoch": 2.8132727989372417, "grad_norm": 0.6372572652270384, "learning_rate": 3.4576912679102463e-06, "loss": 0.2511, "step": 96355 }, { "epoch": 2.813418782207559, "grad_norm": 0.5651255012014266, "learning_rate": 3.454987834549878e-06, "loss": 0.2362, "step": 96360 }, { "epoch": 2.813564765477876, "grad_norm": 0.5915907618620552, "learning_rate": 3.452284401189511e-06, "loss": 0.249, "step": 96365 }, { "epoch": 2.8137107487481936, "grad_norm": 0.6163609241628794, "learning_rate": 3.4495809678291433e-06, "loss": 0.2353, "step": 96370 }, { "epoch": 2.8138567320185106, "grad_norm": 0.5702467751857999, "learning_rate": 3.446877534468775e-06, "loss": 0.2507, "step": 96375 }, { "epoch": 2.814002715288828, "grad_norm": 0.6034449165102055, "learning_rate": 3.444174101108408e-06, "loss": 0.2478, "step": 96380 }, { "epoch": 2.814148698559145, "grad_norm": 0.6030328670831353, "learning_rate": 3.44147066774804e-06, "loss": 0.2407, "step": 96385 }, { "epoch": 2.8142946818294625, "grad_norm": 0.6312864633354233, "learning_rate": 3.438767234387673e-06, "loss": 0.2466, "step": 96390 }, { "epoch": 2.8144406650997795, "grad_norm": 0.627580193608335, "learning_rate": 3.436063801027305e-06, "loss": 0.2393, "step": 96395 }, { "epoch": 2.8145866483700965, "grad_norm": 0.5961405646903202, "learning_rate": 3.433360367666937e-06, "loss": 0.24, "step": 96400 }, { "epoch": 2.814732631640414, "grad_norm": 0.5623621018019579, "learning_rate": 3.43065693430657e-06, "loss": 0.2503, "step": 96405 }, { "epoch": 2.8148786149107314, "grad_norm": 0.5851426267339954, "learning_rate": 3.427953500946202e-06, "loss": 0.2291, "step": 96410 }, { "epoch": 2.8150245981810484, "grad_norm": 0.5696846904785733, "learning_rate": 3.425250067585834e-06, "loss": 0.2417, "step": 96415 }, { "epoch": 2.8151705814513655, "grad_norm": 0.591861045663782, "learning_rate": 3.422546634225467e-06, "loss": 0.2292, "step": 96420 }, { "epoch": 2.815316564721683, "grad_norm": 0.6043171218545718, "learning_rate": 3.4198432008650983e-06, "loss": 0.2377, "step": 96425 }, { "epoch": 2.8154625479920004, "grad_norm": 0.5741881429367949, "learning_rate": 3.417139767504731e-06, "loss": 0.2429, "step": 96430 }, { "epoch": 2.8156085312623174, "grad_norm": 0.5227280600406371, "learning_rate": 3.4144363341443634e-06, "loss": 0.2289, "step": 96435 }, { "epoch": 2.8157545145326344, "grad_norm": 0.5919550987749743, "learning_rate": 3.411732900783996e-06, "loss": 0.2379, "step": 96440 }, { "epoch": 2.815900497802952, "grad_norm": 0.5957087202141187, "learning_rate": 3.409029467423628e-06, "loss": 0.2371, "step": 96445 }, { "epoch": 2.8160464810732693, "grad_norm": 0.601265955623467, "learning_rate": 3.4063260340632603e-06, "loss": 0.2406, "step": 96450 }, { "epoch": 2.8161924643435863, "grad_norm": 0.573172802191685, "learning_rate": 3.403622600702893e-06, "loss": 0.2329, "step": 96455 }, { "epoch": 2.8163384476139033, "grad_norm": 0.5737177684543762, "learning_rate": 3.400919167342525e-06, "loss": 0.2178, "step": 96460 }, { "epoch": 2.8164844308842207, "grad_norm": 0.5662329530319584, "learning_rate": 3.3982157339821573e-06, "loss": 0.2527, "step": 96465 }, { "epoch": 2.8166304141545377, "grad_norm": 0.5778660018468601, "learning_rate": 3.39551230062179e-06, "loss": 0.2398, "step": 96470 }, { "epoch": 2.816776397424855, "grad_norm": 0.6108406565061258, "learning_rate": 3.3928088672614223e-06, "loss": 0.231, "step": 96475 }, { "epoch": 2.816922380695172, "grad_norm": 0.639009114341134, "learning_rate": 3.3901054339010542e-06, "loss": 0.2344, "step": 96480 }, { "epoch": 2.8170683639654897, "grad_norm": 0.5692815872464505, "learning_rate": 3.387402000540687e-06, "loss": 0.2388, "step": 96485 }, { "epoch": 2.8172143472358067, "grad_norm": 0.562854805943027, "learning_rate": 3.3846985671803193e-06, "loss": 0.2377, "step": 96490 }, { "epoch": 2.817360330506124, "grad_norm": 0.6125252487643416, "learning_rate": 3.381995133819951e-06, "loss": 0.2502, "step": 96495 }, { "epoch": 2.817506313776441, "grad_norm": 0.5642548335340369, "learning_rate": 3.379291700459584e-06, "loss": 0.2413, "step": 96500 }, { "epoch": 2.8176522970467586, "grad_norm": 0.6030552749059425, "learning_rate": 3.3765882670992162e-06, "loss": 0.2352, "step": 96505 }, { "epoch": 2.8177982803170756, "grad_norm": 0.5757929518542785, "learning_rate": 3.373884833738848e-06, "loss": 0.2497, "step": 96510 }, { "epoch": 2.817944263587393, "grad_norm": 0.6310114080027914, "learning_rate": 3.371181400378481e-06, "loss": 0.2661, "step": 96515 }, { "epoch": 2.81809024685771, "grad_norm": 0.587536189835475, "learning_rate": 3.368477967018113e-06, "loss": 0.2627, "step": 96520 }, { "epoch": 2.8182362301280275, "grad_norm": 0.5912966099765566, "learning_rate": 3.365774533657746e-06, "loss": 0.2481, "step": 96525 }, { "epoch": 2.8183822133983445, "grad_norm": 0.591631599299084, "learning_rate": 3.363071100297378e-06, "loss": 0.2429, "step": 96530 }, { "epoch": 2.8185281966686615, "grad_norm": 0.565288800025086, "learning_rate": 3.36036766693701e-06, "loss": 0.2203, "step": 96535 }, { "epoch": 2.818674179938979, "grad_norm": 0.566566235185064, "learning_rate": 3.357664233576643e-06, "loss": 0.2414, "step": 96540 }, { "epoch": 2.8188201632092964, "grad_norm": 0.5388515311207406, "learning_rate": 3.3549608002162744e-06, "loss": 0.2309, "step": 96545 }, { "epoch": 2.8189661464796134, "grad_norm": 0.5924037951200617, "learning_rate": 3.352257366855907e-06, "loss": 0.2459, "step": 96550 }, { "epoch": 2.8191121297499304, "grad_norm": 0.5568310868080981, "learning_rate": 3.3495539334955394e-06, "loss": 0.2328, "step": 96555 }, { "epoch": 2.819258113020248, "grad_norm": 0.5500109078495156, "learning_rate": 3.346850500135172e-06, "loss": 0.2504, "step": 96560 }, { "epoch": 2.8194040962905653, "grad_norm": 0.6186479193232578, "learning_rate": 3.344147066774804e-06, "loss": 0.2417, "step": 96565 }, { "epoch": 2.8195500795608823, "grad_norm": 0.5631743447163305, "learning_rate": 3.3414436334144364e-06, "loss": 0.2361, "step": 96570 }, { "epoch": 2.8196960628311993, "grad_norm": 0.6222403737682286, "learning_rate": 3.338740200054069e-06, "loss": 0.2465, "step": 96575 }, { "epoch": 2.819842046101517, "grad_norm": 0.5787052849382535, "learning_rate": 3.336036766693701e-06, "loss": 0.2483, "step": 96580 }, { "epoch": 2.8199880293718342, "grad_norm": 0.5610081055531733, "learning_rate": 3.3333333333333333e-06, "loss": 0.243, "step": 96585 }, { "epoch": 2.8201340126421512, "grad_norm": 0.6133297921656147, "learning_rate": 3.330629899972966e-06, "loss": 0.2337, "step": 96590 }, { "epoch": 2.8202799959124683, "grad_norm": 0.6499911770815526, "learning_rate": 3.327926466612598e-06, "loss": 0.2564, "step": 96595 }, { "epoch": 2.8204259791827857, "grad_norm": 0.6284089992721859, "learning_rate": 3.3252230332522303e-06, "loss": 0.245, "step": 96600 }, { "epoch": 2.8205719624531027, "grad_norm": 0.5675626492216561, "learning_rate": 3.322519599891863e-06, "loss": 0.2421, "step": 96605 }, { "epoch": 2.82071794572342, "grad_norm": 0.5449908005786149, "learning_rate": 3.3198161665314953e-06, "loss": 0.2304, "step": 96610 }, { "epoch": 2.820863928993737, "grad_norm": 0.6045110936843546, "learning_rate": 3.3171127331711272e-06, "loss": 0.2464, "step": 96615 }, { "epoch": 2.8210099122640546, "grad_norm": 0.5850871264423188, "learning_rate": 3.31440929981076e-06, "loss": 0.2409, "step": 96620 }, { "epoch": 2.8211558955343716, "grad_norm": 0.5855684586676453, "learning_rate": 3.3117058664503923e-06, "loss": 0.2605, "step": 96625 }, { "epoch": 2.821301878804689, "grad_norm": 0.5914891591053809, "learning_rate": 3.309002433090024e-06, "loss": 0.2401, "step": 96630 }, { "epoch": 2.821447862075006, "grad_norm": 0.5750706741234161, "learning_rate": 3.306298999729657e-06, "loss": 0.2496, "step": 96635 }, { "epoch": 2.8215938453453235, "grad_norm": 0.5710519921092496, "learning_rate": 3.3035955663692892e-06, "loss": 0.2517, "step": 96640 }, { "epoch": 2.8217398286156405, "grad_norm": 0.5746979921310787, "learning_rate": 3.300892133008922e-06, "loss": 0.226, "step": 96645 }, { "epoch": 2.821885811885958, "grad_norm": 0.6194063451960117, "learning_rate": 3.2981886996485534e-06, "loss": 0.2445, "step": 96650 }, { "epoch": 2.822031795156275, "grad_norm": 0.5938775466628844, "learning_rate": 3.295485266288186e-06, "loss": 0.2447, "step": 96655 }, { "epoch": 2.8221777784265925, "grad_norm": 0.6106460605728216, "learning_rate": 3.292781832927819e-06, "loss": 0.2385, "step": 96660 }, { "epoch": 2.8223237616969095, "grad_norm": 0.6312604420057226, "learning_rate": 3.2900783995674504e-06, "loss": 0.2426, "step": 96665 }, { "epoch": 2.822469744967227, "grad_norm": 0.6189152914324398, "learning_rate": 3.287374966207083e-06, "loss": 0.2494, "step": 96670 }, { "epoch": 2.822615728237544, "grad_norm": 0.5968620506385038, "learning_rate": 3.2846715328467155e-06, "loss": 0.2241, "step": 96675 }, { "epoch": 2.8227617115078614, "grad_norm": 0.5596726616435743, "learning_rate": 3.2819680994863474e-06, "loss": 0.2336, "step": 96680 }, { "epoch": 2.8229076947781784, "grad_norm": 0.5330295171239304, "learning_rate": 3.27926466612598e-06, "loss": 0.226, "step": 96685 }, { "epoch": 2.8230536780484954, "grad_norm": 0.5814328151527566, "learning_rate": 3.2765612327656124e-06, "loss": 0.2278, "step": 96690 }, { "epoch": 2.823199661318813, "grad_norm": 0.5919494359543955, "learning_rate": 3.273857799405245e-06, "loss": 0.2403, "step": 96695 }, { "epoch": 2.8233456445891303, "grad_norm": 0.5606085926447634, "learning_rate": 3.271154366044877e-06, "loss": 0.2308, "step": 96700 }, { "epoch": 2.8234916278594473, "grad_norm": 0.6229553058304431, "learning_rate": 3.2684509326845094e-06, "loss": 0.2358, "step": 96705 }, { "epoch": 2.8236376111297643, "grad_norm": 0.5560761248292454, "learning_rate": 3.265747499324142e-06, "loss": 0.2289, "step": 96710 }, { "epoch": 2.8237835944000818, "grad_norm": 0.5635929997767727, "learning_rate": 3.263044065963774e-06, "loss": 0.2356, "step": 96715 }, { "epoch": 2.823929577670399, "grad_norm": 0.65990020376646, "learning_rate": 3.2603406326034063e-06, "loss": 0.2478, "step": 96720 }, { "epoch": 2.824075560940716, "grad_norm": 0.58385635415997, "learning_rate": 3.257637199243039e-06, "loss": 0.2422, "step": 96725 }, { "epoch": 2.824221544211033, "grad_norm": 0.5871277469670634, "learning_rate": 3.2549337658826714e-06, "loss": 0.2365, "step": 96730 }, { "epoch": 2.8243675274813507, "grad_norm": 0.6005964259476079, "learning_rate": 3.2522303325223033e-06, "loss": 0.2567, "step": 96735 }, { "epoch": 2.824513510751668, "grad_norm": 0.6270981838908205, "learning_rate": 3.249526899161936e-06, "loss": 0.2473, "step": 96740 }, { "epoch": 2.824659494021985, "grad_norm": 0.5421559035898342, "learning_rate": 3.2468234658015683e-06, "loss": 0.2497, "step": 96745 }, { "epoch": 2.824805477292302, "grad_norm": 0.5475361761339407, "learning_rate": 3.2441200324412002e-06, "loss": 0.2289, "step": 96750 }, { "epoch": 2.8249514605626196, "grad_norm": 0.5694787161387004, "learning_rate": 3.241416599080833e-06, "loss": 0.2574, "step": 96755 }, { "epoch": 2.8250974438329366, "grad_norm": 0.5299812892782708, "learning_rate": 3.2387131657204653e-06, "loss": 0.2281, "step": 96760 }, { "epoch": 2.825243427103254, "grad_norm": 0.5856769272555526, "learning_rate": 3.236009732360097e-06, "loss": 0.2462, "step": 96765 }, { "epoch": 2.825389410373571, "grad_norm": 0.5958005671546912, "learning_rate": 3.2333062989997295e-06, "loss": 0.2321, "step": 96770 }, { "epoch": 2.8255353936438885, "grad_norm": 0.5304256824187243, "learning_rate": 3.2306028656393622e-06, "loss": 0.2455, "step": 96775 }, { "epoch": 2.8256813769142055, "grad_norm": 0.5665608103738291, "learning_rate": 3.2278994322789945e-06, "loss": 0.2421, "step": 96780 }, { "epoch": 2.825827360184523, "grad_norm": 0.6034484673702196, "learning_rate": 3.2251959989186264e-06, "loss": 0.2427, "step": 96785 }, { "epoch": 2.82597334345484, "grad_norm": 0.640238890248026, "learning_rate": 3.222492565558259e-06, "loss": 0.2626, "step": 96790 }, { "epoch": 2.8261193267251574, "grad_norm": 0.5903007390270676, "learning_rate": 3.2197891321978915e-06, "loss": 0.252, "step": 96795 }, { "epoch": 2.8262653099954744, "grad_norm": 0.6298921707203554, "learning_rate": 3.2170856988375234e-06, "loss": 0.2315, "step": 96800 }, { "epoch": 2.826411293265792, "grad_norm": 0.5459114461689577, "learning_rate": 3.214382265477156e-06, "loss": 0.2242, "step": 96805 }, { "epoch": 2.826557276536109, "grad_norm": 0.57411589486528, "learning_rate": 3.2116788321167884e-06, "loss": 0.2492, "step": 96810 }, { "epoch": 2.8267032598064263, "grad_norm": 0.603941888015851, "learning_rate": 3.208975398756421e-06, "loss": 0.231, "step": 96815 }, { "epoch": 2.8268492430767433, "grad_norm": 0.5453724646607544, "learning_rate": 3.206271965396053e-06, "loss": 0.2384, "step": 96820 }, { "epoch": 2.8269952263470604, "grad_norm": 0.6120405790750725, "learning_rate": 3.2035685320356854e-06, "loss": 0.2404, "step": 96825 }, { "epoch": 2.827141209617378, "grad_norm": 0.5676821567341982, "learning_rate": 3.200865098675318e-06, "loss": 0.2509, "step": 96830 }, { "epoch": 2.8272871928876953, "grad_norm": 0.6155870154706496, "learning_rate": 3.19816166531495e-06, "loss": 0.2486, "step": 96835 }, { "epoch": 2.8274331761580123, "grad_norm": 0.6096788356842772, "learning_rate": 3.1954582319545824e-06, "loss": 0.2439, "step": 96840 }, { "epoch": 2.8275791594283293, "grad_norm": 0.6433392925585847, "learning_rate": 3.192754798594215e-06, "loss": 0.2517, "step": 96845 }, { "epoch": 2.8277251426986467, "grad_norm": 0.6022076140616601, "learning_rate": 3.1900513652338474e-06, "loss": 0.2412, "step": 96850 }, { "epoch": 2.827871125968964, "grad_norm": 0.5777191819822386, "learning_rate": 3.1873479318734793e-06, "loss": 0.239, "step": 96855 }, { "epoch": 2.828017109239281, "grad_norm": 0.5982419911584644, "learning_rate": 3.184644498513112e-06, "loss": 0.2322, "step": 96860 }, { "epoch": 2.828163092509598, "grad_norm": 0.6673240652021667, "learning_rate": 3.1819410651527444e-06, "loss": 0.2437, "step": 96865 }, { "epoch": 2.8283090757799156, "grad_norm": 0.6583039667187294, "learning_rate": 3.1792376317923763e-06, "loss": 0.2483, "step": 96870 }, { "epoch": 2.828455059050233, "grad_norm": 0.6387306483835555, "learning_rate": 3.1765341984320086e-06, "loss": 0.2445, "step": 96875 }, { "epoch": 2.82860104232055, "grad_norm": 0.6170554088774381, "learning_rate": 3.1738307650716413e-06, "loss": 0.2519, "step": 96880 }, { "epoch": 2.828747025590867, "grad_norm": 0.6159630147739139, "learning_rate": 3.171127331711273e-06, "loss": 0.2374, "step": 96885 }, { "epoch": 2.8288930088611846, "grad_norm": 0.5864315648477578, "learning_rate": 3.1684238983509055e-06, "loss": 0.2384, "step": 96890 }, { "epoch": 2.829038992131502, "grad_norm": 0.5873087141330123, "learning_rate": 3.1657204649905383e-06, "loss": 0.2392, "step": 96895 }, { "epoch": 2.829184975401819, "grad_norm": 0.5876724836368946, "learning_rate": 3.1630170316301706e-06, "loss": 0.2415, "step": 96900 }, { "epoch": 2.829330958672136, "grad_norm": 0.6083876182288098, "learning_rate": 3.1603135982698025e-06, "loss": 0.2423, "step": 96905 }, { "epoch": 2.8294769419424535, "grad_norm": 0.6280017759566179, "learning_rate": 3.1576101649094352e-06, "loss": 0.2289, "step": 96910 }, { "epoch": 2.8296229252127705, "grad_norm": 0.6254059093239833, "learning_rate": 3.1549067315490675e-06, "loss": 0.2387, "step": 96915 }, { "epoch": 2.829768908483088, "grad_norm": 0.6064894369774665, "learning_rate": 3.1522032981886994e-06, "loss": 0.2373, "step": 96920 }, { "epoch": 2.829914891753405, "grad_norm": 0.6040243220547898, "learning_rate": 3.149499864828332e-06, "loss": 0.2457, "step": 96925 }, { "epoch": 2.8300608750237224, "grad_norm": 0.6214163883099739, "learning_rate": 3.1467964314679645e-06, "loss": 0.2251, "step": 96930 }, { "epoch": 2.8302068582940394, "grad_norm": 0.5725099548635338, "learning_rate": 3.1440929981075972e-06, "loss": 0.2405, "step": 96935 }, { "epoch": 2.830352841564357, "grad_norm": 0.6515167604743649, "learning_rate": 3.141389564747229e-06, "loss": 0.2487, "step": 96940 }, { "epoch": 2.830498824834674, "grad_norm": 0.6479706917466062, "learning_rate": 3.1386861313868614e-06, "loss": 0.2308, "step": 96945 }, { "epoch": 2.8306448081049913, "grad_norm": 0.5181939303791548, "learning_rate": 3.135982698026494e-06, "loss": 0.2378, "step": 96950 }, { "epoch": 2.8307907913753083, "grad_norm": 0.5908903314911308, "learning_rate": 3.133279264666126e-06, "loss": 0.2379, "step": 96955 }, { "epoch": 2.8309367746456258, "grad_norm": 0.5731840567651966, "learning_rate": 3.1305758313057584e-06, "loss": 0.2496, "step": 96960 }, { "epoch": 2.8310827579159428, "grad_norm": 0.5872567243382745, "learning_rate": 3.127872397945391e-06, "loss": 0.246, "step": 96965 }, { "epoch": 2.8312287411862602, "grad_norm": 0.5614843265427822, "learning_rate": 3.125168964585023e-06, "loss": 0.254, "step": 96970 }, { "epoch": 2.8313747244565772, "grad_norm": 0.5958278899145721, "learning_rate": 3.1224655312246553e-06, "loss": 0.2466, "step": 96975 }, { "epoch": 2.8315207077268942, "grad_norm": 0.6073236482667022, "learning_rate": 3.119762097864288e-06, "loss": 0.2507, "step": 96980 }, { "epoch": 2.8316666909972117, "grad_norm": 0.5846272536106814, "learning_rate": 3.11705866450392e-06, "loss": 0.243, "step": 96985 }, { "epoch": 2.831812674267529, "grad_norm": 0.5650875115683687, "learning_rate": 3.1143552311435527e-06, "loss": 0.2402, "step": 96990 }, { "epoch": 2.831958657537846, "grad_norm": 0.607127395002437, "learning_rate": 3.1116517977831846e-06, "loss": 0.2489, "step": 96995 }, { "epoch": 2.832104640808163, "grad_norm": 0.5961623157849879, "learning_rate": 3.108948364422817e-06, "loss": 0.2358, "step": 97000 }, { "epoch": 2.8322506240784806, "grad_norm": 0.5818214533505182, "learning_rate": 3.1062449310624497e-06, "loss": 0.2399, "step": 97005 }, { "epoch": 2.832396607348798, "grad_norm": 0.5743483926846948, "learning_rate": 3.1035414977020816e-06, "loss": 0.2384, "step": 97010 }, { "epoch": 2.832542590619115, "grad_norm": 0.6112928495595104, "learning_rate": 3.1008380643417143e-06, "loss": 0.2342, "step": 97015 }, { "epoch": 2.832688573889432, "grad_norm": 0.6173335264389146, "learning_rate": 3.0981346309813466e-06, "loss": 0.2486, "step": 97020 }, { "epoch": 2.8328345571597495, "grad_norm": 0.607286650142808, "learning_rate": 3.0954311976209785e-06, "loss": 0.2551, "step": 97025 }, { "epoch": 2.832980540430067, "grad_norm": 0.6072486124890457, "learning_rate": 3.0927277642606113e-06, "loss": 0.2433, "step": 97030 }, { "epoch": 2.833126523700384, "grad_norm": 0.6305769966727937, "learning_rate": 3.0900243309002436e-06, "loss": 0.2371, "step": 97035 }, { "epoch": 2.833272506970701, "grad_norm": 0.5877849188583488, "learning_rate": 3.087320897539876e-06, "loss": 0.2396, "step": 97040 }, { "epoch": 2.8334184902410184, "grad_norm": 0.5405007963722072, "learning_rate": 3.084617464179508e-06, "loss": 0.2623, "step": 97045 }, { "epoch": 2.8335644735113354, "grad_norm": 0.5712027773624295, "learning_rate": 3.0819140308191405e-06, "loss": 0.2484, "step": 97050 }, { "epoch": 2.833710456781653, "grad_norm": 0.5811217486491371, "learning_rate": 3.079210597458773e-06, "loss": 0.2344, "step": 97055 }, { "epoch": 2.83385644005197, "grad_norm": 0.6207568307550525, "learning_rate": 3.076507164098405e-06, "loss": 0.2422, "step": 97060 }, { "epoch": 2.8340024233222874, "grad_norm": 0.5796846133034553, "learning_rate": 3.0738037307380375e-06, "loss": 0.2516, "step": 97065 }, { "epoch": 2.8341484065926044, "grad_norm": 0.5591700583311916, "learning_rate": 3.07110029737767e-06, "loss": 0.2251, "step": 97070 }, { "epoch": 2.834294389862922, "grad_norm": 0.6208304384305262, "learning_rate": 3.068396864017302e-06, "loss": 0.2405, "step": 97075 }, { "epoch": 2.834440373133239, "grad_norm": 0.58977456318053, "learning_rate": 3.0656934306569344e-06, "loss": 0.2433, "step": 97080 }, { "epoch": 2.8345863564035563, "grad_norm": 0.5517919164887672, "learning_rate": 3.0629899972965667e-06, "loss": 0.2312, "step": 97085 }, { "epoch": 2.8347323396738733, "grad_norm": 0.5748846286490942, "learning_rate": 3.060286563936199e-06, "loss": 0.221, "step": 97090 }, { "epoch": 2.8348783229441907, "grad_norm": 0.6178244722848047, "learning_rate": 3.0575831305758314e-06, "loss": 0.239, "step": 97095 }, { "epoch": 2.8350243062145077, "grad_norm": 0.6102602478242652, "learning_rate": 3.054879697215464e-06, "loss": 0.2487, "step": 97100 }, { "epoch": 2.835170289484825, "grad_norm": 0.5576847586501213, "learning_rate": 3.052176263855096e-06, "loss": 0.2405, "step": 97105 }, { "epoch": 2.835316272755142, "grad_norm": 0.6004276174330215, "learning_rate": 3.0494728304947283e-06, "loss": 0.244, "step": 97110 }, { "epoch": 2.8354622560254596, "grad_norm": 0.5849535189593198, "learning_rate": 3.0467693971343606e-06, "loss": 0.2322, "step": 97115 }, { "epoch": 2.8356082392957767, "grad_norm": 0.5815427525001553, "learning_rate": 3.044065963773993e-06, "loss": 0.2431, "step": 97120 }, { "epoch": 2.835754222566094, "grad_norm": 0.5612731394042221, "learning_rate": 3.0413625304136257e-06, "loss": 0.2439, "step": 97125 }, { "epoch": 2.835900205836411, "grad_norm": 0.5440221948113632, "learning_rate": 3.0386590970532576e-06, "loss": 0.2259, "step": 97130 }, { "epoch": 2.836046189106728, "grad_norm": 0.5991302539165246, "learning_rate": 3.0359556636928903e-06, "loss": 0.2414, "step": 97135 }, { "epoch": 2.8361921723770456, "grad_norm": 0.576892857914352, "learning_rate": 3.0332522303325227e-06, "loss": 0.2226, "step": 97140 }, { "epoch": 2.836338155647363, "grad_norm": 0.5172817422711192, "learning_rate": 3.0305487969721546e-06, "loss": 0.2317, "step": 97145 }, { "epoch": 2.83648413891768, "grad_norm": 0.6451540579138632, "learning_rate": 3.0278453636117873e-06, "loss": 0.2382, "step": 97150 }, { "epoch": 2.836630122187997, "grad_norm": 0.6091095361052609, "learning_rate": 3.025141930251419e-06, "loss": 0.2374, "step": 97155 }, { "epoch": 2.8367761054583145, "grad_norm": 0.5728364884235344, "learning_rate": 3.022438496891052e-06, "loss": 0.265, "step": 97160 }, { "epoch": 2.836922088728632, "grad_norm": 0.5836063686075356, "learning_rate": 3.0197350635306842e-06, "loss": 0.2364, "step": 97165 }, { "epoch": 2.837068071998949, "grad_norm": 0.5759842183718105, "learning_rate": 3.017031630170316e-06, "loss": 0.2488, "step": 97170 }, { "epoch": 2.837214055269266, "grad_norm": 0.5923048036059222, "learning_rate": 3.014328196809949e-06, "loss": 0.247, "step": 97175 }, { "epoch": 2.8373600385395834, "grad_norm": 0.8215498948548469, "learning_rate": 3.011624763449581e-06, "loss": 0.2436, "step": 97180 }, { "epoch": 2.837506021809901, "grad_norm": 0.5980146214209301, "learning_rate": 3.0089213300892135e-06, "loss": 0.2331, "step": 97185 }, { "epoch": 2.837652005080218, "grad_norm": 0.598137553340668, "learning_rate": 3.006217896728846e-06, "loss": 0.2589, "step": 97190 }, { "epoch": 2.837797988350535, "grad_norm": 0.5917873884135957, "learning_rate": 3.003514463368478e-06, "loss": 0.2469, "step": 97195 }, { "epoch": 2.8379439716208523, "grad_norm": 0.589518353513358, "learning_rate": 3.0008110300081105e-06, "loss": 0.2375, "step": 97200 }, { "epoch": 2.8380899548911693, "grad_norm": 0.6325538195017136, "learning_rate": 2.9981075966477428e-06, "loss": 0.2504, "step": 97205 }, { "epoch": 2.838235938161487, "grad_norm": 0.5829882734585484, "learning_rate": 2.995404163287375e-06, "loss": 0.2371, "step": 97210 }, { "epoch": 2.838381921431804, "grad_norm": 0.5988275106608385, "learning_rate": 2.9927007299270074e-06, "loss": 0.2352, "step": 97215 }, { "epoch": 2.8385279047021212, "grad_norm": 0.58056477422228, "learning_rate": 2.9899972965666397e-06, "loss": 0.2427, "step": 97220 }, { "epoch": 2.8386738879724382, "grad_norm": 0.5710482168842975, "learning_rate": 2.987293863206272e-06, "loss": 0.2436, "step": 97225 }, { "epoch": 2.8388198712427557, "grad_norm": 0.5811248086586431, "learning_rate": 2.9845904298459044e-06, "loss": 0.235, "step": 97230 }, { "epoch": 2.8389658545130727, "grad_norm": 0.5947049483787415, "learning_rate": 2.9818869964855367e-06, "loss": 0.2331, "step": 97235 }, { "epoch": 2.83911183778339, "grad_norm": 0.568794265574102, "learning_rate": 2.979183563125169e-06, "loss": 0.268, "step": 97240 }, { "epoch": 2.839257821053707, "grad_norm": 0.5908225171798076, "learning_rate": 2.9764801297648017e-06, "loss": 0.2336, "step": 97245 }, { "epoch": 2.8394038043240246, "grad_norm": 0.6103507769551699, "learning_rate": 2.9737766964044336e-06, "loss": 0.2408, "step": 97250 }, { "epoch": 2.8395497875943416, "grad_norm": 0.6366548875426605, "learning_rate": 2.971073263044066e-06, "loss": 0.245, "step": 97255 }, { "epoch": 2.839695770864659, "grad_norm": 0.5824472685695081, "learning_rate": 2.9683698296836987e-06, "loss": 0.2302, "step": 97260 }, { "epoch": 2.839841754134976, "grad_norm": 0.6298106402971363, "learning_rate": 2.9656663963233306e-06, "loss": 0.2478, "step": 97265 }, { "epoch": 2.839987737405293, "grad_norm": 0.6445535766358373, "learning_rate": 2.9629629629629633e-06, "loss": 0.2506, "step": 97270 }, { "epoch": 2.8401337206756105, "grad_norm": 0.5752969094406764, "learning_rate": 2.9602595296025952e-06, "loss": 0.2423, "step": 97275 }, { "epoch": 2.840279703945928, "grad_norm": 0.6128005244479414, "learning_rate": 2.9575560962422275e-06, "loss": 0.2338, "step": 97280 }, { "epoch": 2.840425687216245, "grad_norm": 0.6201120576780542, "learning_rate": 2.9548526628818603e-06, "loss": 0.2393, "step": 97285 }, { "epoch": 2.840571670486562, "grad_norm": 0.5513592865604681, "learning_rate": 2.952149229521492e-06, "loss": 0.243, "step": 97290 }, { "epoch": 2.8407176537568795, "grad_norm": 0.5427103915804433, "learning_rate": 2.949445796161125e-06, "loss": 0.2458, "step": 97295 }, { "epoch": 2.840863637027197, "grad_norm": 0.5648376752316089, "learning_rate": 2.9467423628007572e-06, "loss": 0.2516, "step": 97300 }, { "epoch": 2.841009620297514, "grad_norm": 0.5845280643543433, "learning_rate": 2.9440389294403896e-06, "loss": 0.2337, "step": 97305 }, { "epoch": 2.841155603567831, "grad_norm": 0.581653423378922, "learning_rate": 2.941335496080022e-06, "loss": 0.2266, "step": 97310 }, { "epoch": 2.8413015868381484, "grad_norm": 0.5826590217764881, "learning_rate": 2.9386320627196538e-06, "loss": 0.2374, "step": 97315 }, { "epoch": 2.841447570108466, "grad_norm": 0.6081105189687931, "learning_rate": 2.9359286293592865e-06, "loss": 0.2468, "step": 97320 }, { "epoch": 2.841593553378783, "grad_norm": 0.6560696225074848, "learning_rate": 2.933225195998919e-06, "loss": 0.2573, "step": 97325 }, { "epoch": 2.8417395366491, "grad_norm": 0.5940242614623936, "learning_rate": 2.930521762638551e-06, "loss": 0.2505, "step": 97330 }, { "epoch": 2.8418855199194173, "grad_norm": 0.5672249472863836, "learning_rate": 2.9278183292781835e-06, "loss": 0.2419, "step": 97335 }, { "epoch": 2.8420315031897343, "grad_norm": 0.5839291291756411, "learning_rate": 2.9251148959178158e-06, "loss": 0.2369, "step": 97340 }, { "epoch": 2.8421774864600517, "grad_norm": 0.5822535657268729, "learning_rate": 2.922411462557448e-06, "loss": 0.2355, "step": 97345 }, { "epoch": 2.8423234697303688, "grad_norm": 0.568522850599185, "learning_rate": 2.9197080291970804e-06, "loss": 0.2321, "step": 97350 }, { "epoch": 2.842469453000686, "grad_norm": 0.5829198586347688, "learning_rate": 2.9170045958367127e-06, "loss": 0.2436, "step": 97355 }, { "epoch": 2.842615436271003, "grad_norm": 0.6235121013731624, "learning_rate": 2.914301162476345e-06, "loss": 0.2417, "step": 97360 }, { "epoch": 2.8427614195413207, "grad_norm": 0.5591930671888452, "learning_rate": 2.9115977291159778e-06, "loss": 0.2385, "step": 97365 }, { "epoch": 2.8429074028116377, "grad_norm": 0.5557297939244384, "learning_rate": 2.9088942957556097e-06, "loss": 0.2374, "step": 97370 }, { "epoch": 2.843053386081955, "grad_norm": 0.5577432144699498, "learning_rate": 2.906190862395242e-06, "loss": 0.2343, "step": 97375 }, { "epoch": 2.843199369352272, "grad_norm": 0.5846724946366509, "learning_rate": 2.9034874290348743e-06, "loss": 0.2439, "step": 97380 }, { "epoch": 2.8433453526225896, "grad_norm": 0.598932177484111, "learning_rate": 2.9007839956745066e-06, "loss": 0.2445, "step": 97385 }, { "epoch": 2.8434913358929066, "grad_norm": 0.605760918471932, "learning_rate": 2.8980805623141394e-06, "loss": 0.2482, "step": 97390 }, { "epoch": 2.843637319163224, "grad_norm": 0.596233537489872, "learning_rate": 2.8953771289537713e-06, "loss": 0.2519, "step": 97395 }, { "epoch": 2.843783302433541, "grad_norm": 0.59294319725543, "learning_rate": 2.8926736955934036e-06, "loss": 0.234, "step": 97400 }, { "epoch": 2.8439292857038585, "grad_norm": 0.5432238644236131, "learning_rate": 2.8899702622330363e-06, "loss": 0.2325, "step": 97405 }, { "epoch": 2.8440752689741755, "grad_norm": 0.578901455060534, "learning_rate": 2.8872668288726682e-06, "loss": 0.233, "step": 97410 }, { "epoch": 2.844221252244493, "grad_norm": 0.5669047800168912, "learning_rate": 2.884563395512301e-06, "loss": 0.2418, "step": 97415 }, { "epoch": 2.84436723551481, "grad_norm": 0.6224855459618686, "learning_rate": 2.8818599621519333e-06, "loss": 0.2395, "step": 97420 }, { "epoch": 2.844513218785127, "grad_norm": 0.6223695328080144, "learning_rate": 2.879156528791565e-06, "loss": 0.246, "step": 97425 }, { "epoch": 2.8446592020554444, "grad_norm": 0.5191524239434583, "learning_rate": 2.876453095431198e-06, "loss": 0.2167, "step": 97430 }, { "epoch": 2.844805185325762, "grad_norm": 0.639380566327423, "learning_rate": 2.87374966207083e-06, "loss": 0.2288, "step": 97435 }, { "epoch": 2.844951168596079, "grad_norm": 0.612439889703472, "learning_rate": 2.8710462287104625e-06, "loss": 0.2472, "step": 97440 }, { "epoch": 2.845097151866396, "grad_norm": 0.5768081605643848, "learning_rate": 2.868342795350095e-06, "loss": 0.2444, "step": 97445 }, { "epoch": 2.8452431351367133, "grad_norm": 0.5599638140016664, "learning_rate": 2.865639361989727e-06, "loss": 0.2328, "step": 97450 }, { "epoch": 2.845389118407031, "grad_norm": 0.5950860518555184, "learning_rate": 2.8629359286293595e-06, "loss": 0.2212, "step": 97455 }, { "epoch": 2.845535101677348, "grad_norm": 0.5655134536713268, "learning_rate": 2.860232495268992e-06, "loss": 0.2402, "step": 97460 }, { "epoch": 2.845681084947665, "grad_norm": 0.5787153136087522, "learning_rate": 2.857529061908624e-06, "loss": 0.2122, "step": 97465 }, { "epoch": 2.8458270682179823, "grad_norm": 0.6128451909584393, "learning_rate": 2.8548256285482564e-06, "loss": 0.2527, "step": 97470 }, { "epoch": 2.8459730514882997, "grad_norm": 0.5814994158027942, "learning_rate": 2.8521221951878888e-06, "loss": 0.2358, "step": 97475 }, { "epoch": 2.8461190347586167, "grad_norm": 0.617265462009725, "learning_rate": 2.849418761827521e-06, "loss": 0.2242, "step": 97480 }, { "epoch": 2.8462650180289337, "grad_norm": 0.5954159026539996, "learning_rate": 2.8467153284671534e-06, "loss": 0.2338, "step": 97485 }, { "epoch": 2.846411001299251, "grad_norm": 0.5723941781041949, "learning_rate": 2.8440118951067857e-06, "loss": 0.2407, "step": 97490 }, { "epoch": 2.846556984569568, "grad_norm": 0.5898548594795778, "learning_rate": 2.841308461746418e-06, "loss": 0.2324, "step": 97495 }, { "epoch": 2.8467029678398856, "grad_norm": 0.6119461459456444, "learning_rate": 2.8386050283860503e-06, "loss": 0.2421, "step": 97500 }, { "epoch": 2.8468489511102026, "grad_norm": 0.6066850804766705, "learning_rate": 2.8359015950256827e-06, "loss": 0.2338, "step": 97505 }, { "epoch": 2.84699493438052, "grad_norm": 0.5983140971451135, "learning_rate": 2.833198161665315e-06, "loss": 0.2428, "step": 97510 }, { "epoch": 2.847140917650837, "grad_norm": 0.5746678082602905, "learning_rate": 2.8304947283049473e-06, "loss": 0.2307, "step": 97515 }, { "epoch": 2.8472869009211546, "grad_norm": 0.566571542080377, "learning_rate": 2.8277912949445796e-06, "loss": 0.2443, "step": 97520 }, { "epoch": 2.8474328841914716, "grad_norm": 0.5825888145391767, "learning_rate": 2.8250878615842124e-06, "loss": 0.2339, "step": 97525 }, { "epoch": 2.847578867461789, "grad_norm": 0.6036059089591329, "learning_rate": 2.8223844282238443e-06, "loss": 0.2485, "step": 97530 }, { "epoch": 2.847724850732106, "grad_norm": 0.5669352778302146, "learning_rate": 2.819680994863477e-06, "loss": 0.244, "step": 97535 }, { "epoch": 2.8478708340024235, "grad_norm": 0.6134454244236053, "learning_rate": 2.816977561503109e-06, "loss": 0.237, "step": 97540 }, { "epoch": 2.8480168172727405, "grad_norm": 0.5843355318569631, "learning_rate": 2.814274128142741e-06, "loss": 0.2402, "step": 97545 }, { "epoch": 2.848162800543058, "grad_norm": 0.5777793109765024, "learning_rate": 2.811570694782374e-06, "loss": 0.247, "step": 97550 }, { "epoch": 2.848308783813375, "grad_norm": 0.5672785687524443, "learning_rate": 2.808867261422006e-06, "loss": 0.2324, "step": 97555 }, { "epoch": 2.848454767083692, "grad_norm": 0.635408901120565, "learning_rate": 2.8061638280616386e-06, "loss": 0.2505, "step": 97560 }, { "epoch": 2.8486007503540094, "grad_norm": 0.6014920305041153, "learning_rate": 2.803460394701271e-06, "loss": 0.2495, "step": 97565 }, { "epoch": 2.848746733624327, "grad_norm": 0.5995355922322045, "learning_rate": 2.800756961340903e-06, "loss": 0.2452, "step": 97570 }, { "epoch": 2.848892716894644, "grad_norm": 0.6312221850425938, "learning_rate": 2.7980535279805355e-06, "loss": 0.2326, "step": 97575 }, { "epoch": 2.849038700164961, "grad_norm": 0.597645183971426, "learning_rate": 2.795350094620168e-06, "loss": 0.2426, "step": 97580 }, { "epoch": 2.8491846834352783, "grad_norm": 0.5680627635572987, "learning_rate": 2.7926466612598e-06, "loss": 0.2333, "step": 97585 }, { "epoch": 2.8493306667055958, "grad_norm": 0.606100045754618, "learning_rate": 2.7899432278994325e-06, "loss": 0.2376, "step": 97590 }, { "epoch": 2.8494766499759128, "grad_norm": 0.5800103750212414, "learning_rate": 2.7872397945390644e-06, "loss": 0.2306, "step": 97595 }, { "epoch": 2.8496226332462298, "grad_norm": 0.6078479494750763, "learning_rate": 2.784536361178697e-06, "loss": 0.2334, "step": 97600 }, { "epoch": 2.8497686165165472, "grad_norm": 0.5837077374267783, "learning_rate": 2.7818329278183294e-06, "loss": 0.235, "step": 97605 }, { "epoch": 2.8499145997868647, "grad_norm": 0.5639460233403787, "learning_rate": 2.7791294944579618e-06, "loss": 0.2299, "step": 97610 }, { "epoch": 2.8500605830571817, "grad_norm": 0.8693967001606785, "learning_rate": 2.776426061097594e-06, "loss": 0.2675, "step": 97615 }, { "epoch": 2.8502065663274987, "grad_norm": 0.5766415995531551, "learning_rate": 2.7737226277372264e-06, "loss": 0.2494, "step": 97620 }, { "epoch": 2.850352549597816, "grad_norm": 0.6304920066205921, "learning_rate": 2.7710191943768587e-06, "loss": 0.253, "step": 97625 }, { "epoch": 2.850498532868133, "grad_norm": 0.5817206976454733, "learning_rate": 2.768315761016491e-06, "loss": 0.2366, "step": 97630 }, { "epoch": 2.8506445161384506, "grad_norm": 0.5798965628879185, "learning_rate": 2.7656123276561233e-06, "loss": 0.2322, "step": 97635 }, { "epoch": 2.8507904994087676, "grad_norm": 0.5979783492496727, "learning_rate": 2.7629088942957557e-06, "loss": 0.2457, "step": 97640 }, { "epoch": 2.850936482679085, "grad_norm": 0.5860593336722453, "learning_rate": 2.7602054609353884e-06, "loss": 0.2606, "step": 97645 }, { "epoch": 2.851082465949402, "grad_norm": 0.5839050338925968, "learning_rate": 2.7575020275750203e-06, "loss": 0.2473, "step": 97650 }, { "epoch": 2.8512284492197195, "grad_norm": 0.6055888658891407, "learning_rate": 2.7547985942146526e-06, "loss": 0.2536, "step": 97655 }, { "epoch": 2.8513744324900365, "grad_norm": 0.5588922346192269, "learning_rate": 2.752095160854285e-06, "loss": 0.224, "step": 97660 }, { "epoch": 2.851520415760354, "grad_norm": 0.5710219134276792, "learning_rate": 2.7493917274939172e-06, "loss": 0.2411, "step": 97665 }, { "epoch": 2.851666399030671, "grad_norm": 0.5269950173312448, "learning_rate": 2.74668829413355e-06, "loss": 0.2288, "step": 97670 }, { "epoch": 2.8518123823009884, "grad_norm": 0.6185173334428948, "learning_rate": 2.743984860773182e-06, "loss": 0.2483, "step": 97675 }, { "epoch": 2.8519583655713054, "grad_norm": 0.5979187733440698, "learning_rate": 2.7412814274128146e-06, "loss": 0.2537, "step": 97680 }, { "epoch": 2.852104348841623, "grad_norm": 0.5825885657122065, "learning_rate": 2.738577994052447e-06, "loss": 0.2388, "step": 97685 }, { "epoch": 2.85225033211194, "grad_norm": 0.5974092541944603, "learning_rate": 2.735874560692079e-06, "loss": 0.2378, "step": 97690 }, { "epoch": 2.8523963153822574, "grad_norm": 0.5755146679591173, "learning_rate": 2.7331711273317116e-06, "loss": 0.2435, "step": 97695 }, { "epoch": 2.8525422986525744, "grad_norm": 0.6077521093337972, "learning_rate": 2.730467693971344e-06, "loss": 0.2431, "step": 97700 }, { "epoch": 2.852688281922892, "grad_norm": 0.5523558173200419, "learning_rate": 2.727764260610976e-06, "loss": 0.2252, "step": 97705 }, { "epoch": 2.852834265193209, "grad_norm": 0.5861808776780633, "learning_rate": 2.7250608272506085e-06, "loss": 0.2794, "step": 97710 }, { "epoch": 2.852980248463526, "grad_norm": 0.5803296651756353, "learning_rate": 2.7223573938902404e-06, "loss": 0.2331, "step": 97715 }, { "epoch": 2.8531262317338433, "grad_norm": 0.5630008328431926, "learning_rate": 2.719653960529873e-06, "loss": 0.2331, "step": 97720 }, { "epoch": 2.8532722150041607, "grad_norm": 0.6419400699616464, "learning_rate": 2.7169505271695055e-06, "loss": 0.2498, "step": 97725 }, { "epoch": 2.8534181982744777, "grad_norm": 0.5897611433471683, "learning_rate": 2.714247093809138e-06, "loss": 0.234, "step": 97730 }, { "epoch": 2.8535641815447947, "grad_norm": 0.5853603329465565, "learning_rate": 2.71154366044877e-06, "loss": 0.2376, "step": 97735 }, { "epoch": 2.853710164815112, "grad_norm": 0.6010121206261236, "learning_rate": 2.7088402270884024e-06, "loss": 0.2428, "step": 97740 }, { "epoch": 2.8538561480854296, "grad_norm": 0.633502140760068, "learning_rate": 2.7061367937280347e-06, "loss": 0.24, "step": 97745 }, { "epoch": 2.8540021313557467, "grad_norm": 0.6248478975459238, "learning_rate": 2.703433360367667e-06, "loss": 0.2511, "step": 97750 }, { "epoch": 2.8541481146260637, "grad_norm": 0.5705636401806233, "learning_rate": 2.7007299270072994e-06, "loss": 0.2431, "step": 97755 }, { "epoch": 2.854294097896381, "grad_norm": 0.5544115046337029, "learning_rate": 2.6980264936469317e-06, "loss": 0.2345, "step": 97760 }, { "epoch": 2.8544400811666986, "grad_norm": 0.5801106449763276, "learning_rate": 2.6953230602865644e-06, "loss": 0.2369, "step": 97765 }, { "epoch": 2.8545860644370156, "grad_norm": 0.570238769814787, "learning_rate": 2.6926196269261963e-06, "loss": 0.2545, "step": 97770 }, { "epoch": 2.8547320477073326, "grad_norm": 0.5475494308383203, "learning_rate": 2.6899161935658286e-06, "loss": 0.2467, "step": 97775 }, { "epoch": 2.85487803097765, "grad_norm": 0.5842572940879146, "learning_rate": 2.687212760205461e-06, "loss": 0.2486, "step": 97780 }, { "epoch": 2.855024014247967, "grad_norm": 0.5937178123302742, "learning_rate": 2.6845093268450933e-06, "loss": 0.2377, "step": 97785 }, { "epoch": 2.8551699975182845, "grad_norm": 0.5608268210151107, "learning_rate": 2.681805893484726e-06, "loss": 0.2272, "step": 97790 }, { "epoch": 2.8553159807886015, "grad_norm": 0.5950764848969021, "learning_rate": 2.679102460124358e-06, "loss": 0.2407, "step": 97795 }, { "epoch": 2.855461964058919, "grad_norm": 0.5427875870781663, "learning_rate": 2.6763990267639902e-06, "loss": 0.2506, "step": 97800 }, { "epoch": 2.855607947329236, "grad_norm": 0.5781599194912453, "learning_rate": 2.673695593403623e-06, "loss": 0.2379, "step": 97805 }, { "epoch": 2.8557539305995534, "grad_norm": 0.6283425478827821, "learning_rate": 2.670992160043255e-06, "loss": 0.2512, "step": 97810 }, { "epoch": 2.8558999138698704, "grad_norm": 0.5821803637569348, "learning_rate": 2.6682887266828876e-06, "loss": 0.2544, "step": 97815 }, { "epoch": 2.856045897140188, "grad_norm": 0.5773944537609115, "learning_rate": 2.6655852933225195e-06, "loss": 0.2349, "step": 97820 }, { "epoch": 2.856191880410505, "grad_norm": 0.6103617323172404, "learning_rate": 2.662881859962152e-06, "loss": 0.2354, "step": 97825 }, { "epoch": 2.8563378636808223, "grad_norm": 0.5577530126678886, "learning_rate": 2.6601784266017846e-06, "loss": 0.2314, "step": 97830 }, { "epoch": 2.8564838469511393, "grad_norm": 0.587254285964915, "learning_rate": 2.6574749932414165e-06, "loss": 0.2456, "step": 97835 }, { "epoch": 2.856629830221457, "grad_norm": 0.5789958603863908, "learning_rate": 2.654771559881049e-06, "loss": 0.2305, "step": 97840 }, { "epoch": 2.856775813491774, "grad_norm": 0.5832126730072713, "learning_rate": 2.6520681265206815e-06, "loss": 0.227, "step": 97845 }, { "epoch": 2.856921796762091, "grad_norm": 0.5754598235355374, "learning_rate": 2.649364693160314e-06, "loss": 0.2516, "step": 97850 }, { "epoch": 2.8570677800324082, "grad_norm": 0.6508708949668605, "learning_rate": 2.646661259799946e-06, "loss": 0.2438, "step": 97855 }, { "epoch": 2.8572137633027257, "grad_norm": 0.6125368909444325, "learning_rate": 2.6439578264395785e-06, "loss": 0.2538, "step": 97860 }, { "epoch": 2.8573597465730427, "grad_norm": 0.6676824935471806, "learning_rate": 2.6412543930792108e-06, "loss": 0.2511, "step": 97865 }, { "epoch": 2.8575057298433597, "grad_norm": 0.587099900389414, "learning_rate": 2.638550959718843e-06, "loss": 0.2333, "step": 97870 }, { "epoch": 2.857651713113677, "grad_norm": 0.6058839093731758, "learning_rate": 2.6358475263584754e-06, "loss": 0.246, "step": 97875 }, { "epoch": 2.8577976963839946, "grad_norm": 0.5723359371885689, "learning_rate": 2.6331440929981077e-06, "loss": 0.249, "step": 97880 }, { "epoch": 2.8579436796543116, "grad_norm": 0.6029643089607769, "learning_rate": 2.63044065963774e-06, "loss": 0.232, "step": 97885 }, { "epoch": 2.8580896629246286, "grad_norm": 0.5804279592690209, "learning_rate": 2.6277372262773724e-06, "loss": 0.2473, "step": 97890 }, { "epoch": 2.858235646194946, "grad_norm": 0.5898811759254272, "learning_rate": 2.6250337929170047e-06, "loss": 0.233, "step": 97895 }, { "epoch": 2.8583816294652635, "grad_norm": 0.656990809793894, "learning_rate": 2.622330359556637e-06, "loss": 0.2442, "step": 97900 }, { "epoch": 2.8585276127355805, "grad_norm": 0.5712158899418066, "learning_rate": 2.6196269261962693e-06, "loss": 0.2469, "step": 97905 }, { "epoch": 2.8586735960058975, "grad_norm": 0.5896632096025949, "learning_rate": 2.616923492835902e-06, "loss": 0.2323, "step": 97910 }, { "epoch": 2.858819579276215, "grad_norm": 0.5595539947957149, "learning_rate": 2.614220059475534e-06, "loss": 0.2378, "step": 97915 }, { "epoch": 2.858965562546532, "grad_norm": 0.5965894820607683, "learning_rate": 2.6115166261151663e-06, "loss": 0.2549, "step": 97920 }, { "epoch": 2.8591115458168495, "grad_norm": 0.5857002866236393, "learning_rate": 2.608813192754799e-06, "loss": 0.2316, "step": 97925 }, { "epoch": 2.8592575290871665, "grad_norm": 0.5850295084570353, "learning_rate": 2.606109759394431e-06, "loss": 0.2397, "step": 97930 }, { "epoch": 2.859403512357484, "grad_norm": 0.5712929013818182, "learning_rate": 2.6034063260340636e-06, "loss": 0.2561, "step": 97935 }, { "epoch": 2.859549495627801, "grad_norm": 0.5944054676196521, "learning_rate": 2.6007028926736955e-06, "loss": 0.2362, "step": 97940 }, { "epoch": 2.8596954788981184, "grad_norm": 0.6001913775132223, "learning_rate": 2.597999459313328e-06, "loss": 0.2457, "step": 97945 }, { "epoch": 2.8598414621684354, "grad_norm": 0.578012824732915, "learning_rate": 2.5952960259529606e-06, "loss": 0.2182, "step": 97950 }, { "epoch": 2.859987445438753, "grad_norm": 0.5948049582193126, "learning_rate": 2.5925925925925925e-06, "loss": 0.236, "step": 97955 }, { "epoch": 2.86013342870907, "grad_norm": 0.5793102434999495, "learning_rate": 2.5898891592322252e-06, "loss": 0.2525, "step": 97960 }, { "epoch": 2.8602794119793873, "grad_norm": 0.5809560684230269, "learning_rate": 2.5871857258718575e-06, "loss": 0.2377, "step": 97965 }, { "epoch": 2.8604253952497043, "grad_norm": 0.6018949540483234, "learning_rate": 2.5844822925114894e-06, "loss": 0.232, "step": 97970 }, { "epoch": 2.8605713785200217, "grad_norm": 0.5836467013472804, "learning_rate": 2.581778859151122e-06, "loss": 0.2372, "step": 97975 }, { "epoch": 2.8607173617903388, "grad_norm": 0.5651510493925249, "learning_rate": 2.579075425790754e-06, "loss": 0.2339, "step": 97980 }, { "epoch": 2.860863345060656, "grad_norm": 0.5561659502170464, "learning_rate": 2.576371992430387e-06, "loss": 0.238, "step": 97985 }, { "epoch": 2.861009328330973, "grad_norm": 0.5672105550198087, "learning_rate": 2.573668559070019e-06, "loss": 0.2296, "step": 97990 }, { "epoch": 2.8611553116012907, "grad_norm": 0.6064379565145763, "learning_rate": 2.5709651257096515e-06, "loss": 0.2491, "step": 97995 }, { "epoch": 2.8613012948716077, "grad_norm": 0.5522751540646557, "learning_rate": 2.5682616923492838e-06, "loss": 0.2344, "step": 98000 }, { "epoch": 2.8614472781419247, "grad_norm": 0.5814944590526935, "learning_rate": 2.565558258988916e-06, "loss": 0.2357, "step": 98005 }, { "epoch": 2.861593261412242, "grad_norm": 0.5958538886341461, "learning_rate": 2.5628548256285484e-06, "loss": 0.2311, "step": 98010 }, { "epoch": 2.8617392446825596, "grad_norm": 0.5925021059983889, "learning_rate": 2.5601513922681807e-06, "loss": 0.2384, "step": 98015 }, { "epoch": 2.8618852279528766, "grad_norm": 0.588140846381115, "learning_rate": 2.557447958907813e-06, "loss": 0.2495, "step": 98020 }, { "epoch": 2.8620312112231936, "grad_norm": 0.6147418545801955, "learning_rate": 2.5547445255474454e-06, "loss": 0.2522, "step": 98025 }, { "epoch": 2.862177194493511, "grad_norm": 0.583520658781176, "learning_rate": 2.5520410921870777e-06, "loss": 0.2298, "step": 98030 }, { "epoch": 2.8623231777638285, "grad_norm": 0.6219149227662234, "learning_rate": 2.54933765882671e-06, "loss": 0.2296, "step": 98035 }, { "epoch": 2.8624691610341455, "grad_norm": 0.5742976262791966, "learning_rate": 2.5466342254663423e-06, "loss": 0.2435, "step": 98040 }, { "epoch": 2.8626151443044625, "grad_norm": 0.5776592619773633, "learning_rate": 2.5439307921059746e-06, "loss": 0.2486, "step": 98045 }, { "epoch": 2.86276112757478, "grad_norm": 0.5944214787201337, "learning_rate": 2.541227358745607e-06, "loss": 0.2548, "step": 98050 }, { "epoch": 2.8629071108450974, "grad_norm": 0.5788540730862067, "learning_rate": 2.5385239253852393e-06, "loss": 0.2422, "step": 98055 }, { "epoch": 2.8630530941154144, "grad_norm": 0.5755586789717749, "learning_rate": 2.5358204920248716e-06, "loss": 0.2521, "step": 98060 }, { "epoch": 2.8631990773857314, "grad_norm": 0.5789298595026192, "learning_rate": 2.533117058664504e-06, "loss": 0.2431, "step": 98065 }, { "epoch": 2.863345060656049, "grad_norm": 0.5496434141939983, "learning_rate": 2.5304136253041366e-06, "loss": 0.2405, "step": 98070 }, { "epoch": 2.863491043926366, "grad_norm": 0.5634361907917388, "learning_rate": 2.5277101919437685e-06, "loss": 0.2314, "step": 98075 }, { "epoch": 2.8636370271966833, "grad_norm": 0.5619757636637641, "learning_rate": 2.5250067585834013e-06, "loss": 0.2166, "step": 98080 }, { "epoch": 2.8637830104670003, "grad_norm": 0.5989623315735486, "learning_rate": 2.5223033252230336e-06, "loss": 0.2398, "step": 98085 }, { "epoch": 2.863928993737318, "grad_norm": 0.5697683080128911, "learning_rate": 2.5195998918626655e-06, "loss": 0.2255, "step": 98090 }, { "epoch": 2.864074977007635, "grad_norm": 0.5828825931180959, "learning_rate": 2.5168964585022982e-06, "loss": 0.2323, "step": 98095 }, { "epoch": 2.8642209602779523, "grad_norm": 0.6043893566516648, "learning_rate": 2.51419302514193e-06, "loss": 0.2488, "step": 98100 }, { "epoch": 2.8643669435482693, "grad_norm": 0.7038338540917956, "learning_rate": 2.511489591781563e-06, "loss": 0.2512, "step": 98105 }, { "epoch": 2.8645129268185867, "grad_norm": 0.6018959777183034, "learning_rate": 2.508786158421195e-06, "loss": 0.2289, "step": 98110 }, { "epoch": 2.8646589100889037, "grad_norm": 0.5909776875984097, "learning_rate": 2.506082725060827e-06, "loss": 0.2515, "step": 98115 }, { "epoch": 2.864804893359221, "grad_norm": 0.5722665511758763, "learning_rate": 2.50337929170046e-06, "loss": 0.2455, "step": 98120 }, { "epoch": 2.864950876629538, "grad_norm": 0.626062323712058, "learning_rate": 2.500675858340092e-06, "loss": 0.2443, "step": 98125 }, { "epoch": 2.8650968598998556, "grad_norm": 0.5986537803924075, "learning_rate": 2.4979724249797244e-06, "loss": 0.2358, "step": 98130 }, { "epoch": 2.8652428431701726, "grad_norm": 0.5800023025093622, "learning_rate": 2.4952689916193568e-06, "loss": 0.2271, "step": 98135 }, { "epoch": 2.8653888264404896, "grad_norm": 0.6006313624681813, "learning_rate": 2.492565558258989e-06, "loss": 0.2483, "step": 98140 }, { "epoch": 2.865534809710807, "grad_norm": 0.6061328499561711, "learning_rate": 2.4898621248986214e-06, "loss": 0.2267, "step": 98145 }, { "epoch": 2.8656807929811245, "grad_norm": 0.6156694396075485, "learning_rate": 2.4871586915382537e-06, "loss": 0.2485, "step": 98150 }, { "epoch": 2.8658267762514416, "grad_norm": 0.5718001708060002, "learning_rate": 2.484455258177886e-06, "loss": 0.2506, "step": 98155 }, { "epoch": 2.8659727595217586, "grad_norm": 0.6052679029126091, "learning_rate": 2.4817518248175183e-06, "loss": 0.2404, "step": 98160 }, { "epoch": 2.866118742792076, "grad_norm": 0.6179271517825338, "learning_rate": 2.4790483914571507e-06, "loss": 0.2379, "step": 98165 }, { "epoch": 2.8662647260623935, "grad_norm": 0.5141225226350868, "learning_rate": 2.476344958096783e-06, "loss": 0.229, "step": 98170 }, { "epoch": 2.8664107093327105, "grad_norm": 0.618003622499757, "learning_rate": 2.4736415247364153e-06, "loss": 0.2362, "step": 98175 }, { "epoch": 2.8665566926030275, "grad_norm": 0.565941026690928, "learning_rate": 2.4709380913760476e-06, "loss": 0.2361, "step": 98180 }, { "epoch": 2.866702675873345, "grad_norm": 0.6019647582754397, "learning_rate": 2.46823465801568e-06, "loss": 0.2368, "step": 98185 }, { "epoch": 2.8668486591436624, "grad_norm": 0.5406939416488082, "learning_rate": 2.4655312246553127e-06, "loss": 0.24, "step": 98190 }, { "epoch": 2.8669946424139794, "grad_norm": 0.5402831832291766, "learning_rate": 2.4628277912949446e-06, "loss": 0.2301, "step": 98195 }, { "epoch": 2.8671406256842964, "grad_norm": 0.575022581854301, "learning_rate": 2.460124357934577e-06, "loss": 0.2324, "step": 98200 }, { "epoch": 2.867286608954614, "grad_norm": 0.5875445393114485, "learning_rate": 2.4574209245742096e-06, "loss": 0.2462, "step": 98205 }, { "epoch": 2.8674325922249313, "grad_norm": 0.6201644550426714, "learning_rate": 2.4547174912138415e-06, "loss": 0.2478, "step": 98210 }, { "epoch": 2.8675785754952483, "grad_norm": 0.6209092221399961, "learning_rate": 2.4520140578534743e-06, "loss": 0.2544, "step": 98215 }, { "epoch": 2.8677245587655653, "grad_norm": 0.5969460424513945, "learning_rate": 2.449310624493106e-06, "loss": 0.2444, "step": 98220 }, { "epoch": 2.8678705420358828, "grad_norm": 0.6180093238467335, "learning_rate": 2.446607191132739e-06, "loss": 0.2355, "step": 98225 }, { "epoch": 2.8680165253061998, "grad_norm": 0.5793752722478416, "learning_rate": 2.443903757772371e-06, "loss": 0.2607, "step": 98230 }, { "epoch": 2.8681625085765172, "grad_norm": 0.5672250995294013, "learning_rate": 2.441200324412003e-06, "loss": 0.241, "step": 98235 }, { "epoch": 2.8683084918468342, "grad_norm": 0.5548278575125133, "learning_rate": 2.438496891051636e-06, "loss": 0.243, "step": 98240 }, { "epoch": 2.8684544751171517, "grad_norm": 0.5580763643095968, "learning_rate": 2.435793457691268e-06, "loss": 0.233, "step": 98245 }, { "epoch": 2.8686004583874687, "grad_norm": 0.5865060071846454, "learning_rate": 2.4330900243309005e-06, "loss": 0.2286, "step": 98250 }, { "epoch": 2.868746441657786, "grad_norm": 0.5784857292262764, "learning_rate": 2.430386590970533e-06, "loss": 0.2258, "step": 98255 }, { "epoch": 2.868892424928103, "grad_norm": 0.5495433984054334, "learning_rate": 2.4276831576101647e-06, "loss": 0.2473, "step": 98260 }, { "epoch": 2.8690384081984206, "grad_norm": 0.6011988520396823, "learning_rate": 2.4249797242497974e-06, "loss": 0.2435, "step": 98265 }, { "epoch": 2.8691843914687376, "grad_norm": 0.5663327712681899, "learning_rate": 2.4222762908894297e-06, "loss": 0.2242, "step": 98270 }, { "epoch": 2.869330374739055, "grad_norm": 0.6391746680690119, "learning_rate": 2.419572857529062e-06, "loss": 0.2503, "step": 98275 }, { "epoch": 2.869476358009372, "grad_norm": 0.6088250060222158, "learning_rate": 2.4168694241686944e-06, "loss": 0.2283, "step": 98280 }, { "epoch": 2.8696223412796895, "grad_norm": 0.6195701480110463, "learning_rate": 2.4141659908083267e-06, "loss": 0.2469, "step": 98285 }, { "epoch": 2.8697683245500065, "grad_norm": 0.568386836609297, "learning_rate": 2.411462557447959e-06, "loss": 0.2222, "step": 98290 }, { "epoch": 2.8699143078203235, "grad_norm": 0.5879707525163306, "learning_rate": 2.4087591240875913e-06, "loss": 0.2479, "step": 98295 }, { "epoch": 2.870060291090641, "grad_norm": 0.6056758856535279, "learning_rate": 2.4060556907272237e-06, "loss": 0.2408, "step": 98300 }, { "epoch": 2.8702062743609584, "grad_norm": 0.5461956152933588, "learning_rate": 2.403352257366856e-06, "loss": 0.2278, "step": 98305 }, { "epoch": 2.8703522576312754, "grad_norm": 0.5507678897791984, "learning_rate": 2.4006488240064887e-06, "loss": 0.2356, "step": 98310 }, { "epoch": 2.8704982409015924, "grad_norm": 0.5967978721108722, "learning_rate": 2.3979453906461206e-06, "loss": 0.2372, "step": 98315 }, { "epoch": 2.87064422417191, "grad_norm": 0.5897970029101133, "learning_rate": 2.395241957285753e-06, "loss": 0.2497, "step": 98320 }, { "epoch": 2.8707902074422273, "grad_norm": 0.6132357514848751, "learning_rate": 2.3925385239253852e-06, "loss": 0.248, "step": 98325 }, { "epoch": 2.8709361907125444, "grad_norm": 0.5941763769372134, "learning_rate": 2.3898350905650176e-06, "loss": 0.2404, "step": 98330 }, { "epoch": 2.8710821739828614, "grad_norm": 0.6089873506393768, "learning_rate": 2.3871316572046503e-06, "loss": 0.2514, "step": 98335 }, { "epoch": 2.871228157253179, "grad_norm": 0.5648996331064224, "learning_rate": 2.384428223844282e-06, "loss": 0.2311, "step": 98340 }, { "epoch": 2.8713741405234963, "grad_norm": 0.5663333565218134, "learning_rate": 2.3817247904839145e-06, "loss": 0.2343, "step": 98345 }, { "epoch": 2.8715201237938133, "grad_norm": 0.5884238586764656, "learning_rate": 2.3790213571235472e-06, "loss": 0.2497, "step": 98350 }, { "epoch": 2.8716661070641303, "grad_norm": 0.5437434363734325, "learning_rate": 2.376317923763179e-06, "loss": 0.2406, "step": 98355 }, { "epoch": 2.8718120903344477, "grad_norm": 0.5875744947940824, "learning_rate": 2.373614490402812e-06, "loss": 0.2366, "step": 98360 }, { "epoch": 2.8719580736047647, "grad_norm": 0.604792004984771, "learning_rate": 2.370911057042444e-06, "loss": 0.2388, "step": 98365 }, { "epoch": 2.872104056875082, "grad_norm": 0.6036740454108817, "learning_rate": 2.368207623682076e-06, "loss": 0.2325, "step": 98370 }, { "epoch": 2.872250040145399, "grad_norm": 0.5838501696298257, "learning_rate": 2.365504190321709e-06, "loss": 0.2474, "step": 98375 }, { "epoch": 2.8723960234157166, "grad_norm": 0.6168494306108024, "learning_rate": 2.3628007569613407e-06, "loss": 0.2265, "step": 98380 }, { "epoch": 2.8725420066860337, "grad_norm": 0.561187023779812, "learning_rate": 2.3600973236009735e-06, "loss": 0.2329, "step": 98385 }, { "epoch": 2.872687989956351, "grad_norm": 0.5821930566041396, "learning_rate": 2.3573938902406058e-06, "loss": 0.2279, "step": 98390 }, { "epoch": 2.872833973226668, "grad_norm": 0.5854965814876043, "learning_rate": 2.354690456880238e-06, "loss": 0.2437, "step": 98395 }, { "epoch": 2.8729799564969856, "grad_norm": 0.6260733195735132, "learning_rate": 2.3519870235198704e-06, "loss": 0.254, "step": 98400 }, { "epoch": 2.8731259397673026, "grad_norm": 0.5869894522393843, "learning_rate": 2.3492835901595027e-06, "loss": 0.2311, "step": 98405 }, { "epoch": 2.87327192303762, "grad_norm": 0.6260507693484234, "learning_rate": 2.346580156799135e-06, "loss": 0.2472, "step": 98410 }, { "epoch": 2.873417906307937, "grad_norm": 0.57315208993403, "learning_rate": 2.3438767234387674e-06, "loss": 0.2319, "step": 98415 }, { "epoch": 2.8735638895782545, "grad_norm": 0.5368877694061337, "learning_rate": 2.3411732900783997e-06, "loss": 0.244, "step": 98420 }, { "epoch": 2.8737098728485715, "grad_norm": 0.6063093387599594, "learning_rate": 2.338469856718032e-06, "loss": 0.2498, "step": 98425 }, { "epoch": 2.873855856118889, "grad_norm": 0.6411650446287686, "learning_rate": 2.3357664233576643e-06, "loss": 0.2552, "step": 98430 }, { "epoch": 2.874001839389206, "grad_norm": 0.5788735850089499, "learning_rate": 2.3330629899972966e-06, "loss": 0.2493, "step": 98435 }, { "epoch": 2.8741478226595234, "grad_norm": 0.5838619238751541, "learning_rate": 2.330359556636929e-06, "loss": 0.236, "step": 98440 }, { "epoch": 2.8742938059298404, "grad_norm": 0.5991823599045104, "learning_rate": 2.3276561232765613e-06, "loss": 0.238, "step": 98445 }, { "epoch": 2.8744397892001574, "grad_norm": 0.5633159603395753, "learning_rate": 2.3249526899161936e-06, "loss": 0.2378, "step": 98450 }, { "epoch": 2.874585772470475, "grad_norm": 0.5552059337057988, "learning_rate": 2.322249256555826e-06, "loss": 0.2357, "step": 98455 }, { "epoch": 2.8747317557407923, "grad_norm": 0.5463732062728749, "learning_rate": 2.3195458231954582e-06, "loss": 0.2379, "step": 98460 }, { "epoch": 2.8748777390111093, "grad_norm": 0.6014986751456975, "learning_rate": 2.3168423898350905e-06, "loss": 0.2404, "step": 98465 }, { "epoch": 2.8750237222814263, "grad_norm": 0.6286664104883767, "learning_rate": 2.3141389564747233e-06, "loss": 0.2324, "step": 98470 }, { "epoch": 2.875169705551744, "grad_norm": 0.5975523194236755, "learning_rate": 2.311435523114355e-06, "loss": 0.2342, "step": 98475 }, { "epoch": 2.8753156888220612, "grad_norm": 0.6114024546322138, "learning_rate": 2.308732089753988e-06, "loss": 0.233, "step": 98480 }, { "epoch": 2.8754616720923782, "grad_norm": 0.5998461906067925, "learning_rate": 2.30602865639362e-06, "loss": 0.2291, "step": 98485 }, { "epoch": 2.8756076553626952, "grad_norm": 0.5787182116670697, "learning_rate": 2.303325223033252e-06, "loss": 0.2454, "step": 98490 }, { "epoch": 2.8757536386330127, "grad_norm": 0.5821566672323093, "learning_rate": 2.300621789672885e-06, "loss": 0.2505, "step": 98495 }, { "epoch": 2.87589962190333, "grad_norm": 0.5846143254773203, "learning_rate": 2.2979183563125168e-06, "loss": 0.2488, "step": 98500 }, { "epoch": 2.876045605173647, "grad_norm": 0.5833270576044696, "learning_rate": 2.2952149229521495e-06, "loss": 0.2553, "step": 98505 }, { "epoch": 2.876191588443964, "grad_norm": 0.6310221472400876, "learning_rate": 2.292511489591782e-06, "loss": 0.2305, "step": 98510 }, { "epoch": 2.8763375717142816, "grad_norm": 0.6500240280620168, "learning_rate": 2.2898080562314137e-06, "loss": 0.2321, "step": 98515 }, { "epoch": 2.8764835549845986, "grad_norm": 0.5733590278441117, "learning_rate": 2.2871046228710465e-06, "loss": 0.2433, "step": 98520 }, { "epoch": 2.876629538254916, "grad_norm": 0.5497216928111839, "learning_rate": 2.2844011895106788e-06, "loss": 0.2388, "step": 98525 }, { "epoch": 2.876775521525233, "grad_norm": 0.5609562385743301, "learning_rate": 2.281697756150311e-06, "loss": 0.2295, "step": 98530 }, { "epoch": 2.8769215047955505, "grad_norm": 0.6341872030599391, "learning_rate": 2.2789943227899434e-06, "loss": 0.2412, "step": 98535 }, { "epoch": 2.8770674880658675, "grad_norm": 0.6365738429950015, "learning_rate": 2.2762908894295757e-06, "loss": 0.2521, "step": 98540 }, { "epoch": 2.877213471336185, "grad_norm": 0.6001160269470833, "learning_rate": 2.273587456069208e-06, "loss": 0.2349, "step": 98545 }, { "epoch": 2.877359454606502, "grad_norm": 0.576082717656331, "learning_rate": 2.2708840227088404e-06, "loss": 0.2471, "step": 98550 }, { "epoch": 2.8775054378768194, "grad_norm": 0.5792498982967412, "learning_rate": 2.2681805893484727e-06, "loss": 0.2416, "step": 98555 }, { "epoch": 2.8776514211471365, "grad_norm": 0.5823755561525495, "learning_rate": 2.265477155988105e-06, "loss": 0.2519, "step": 98560 }, { "epoch": 2.877797404417454, "grad_norm": 0.630652064805427, "learning_rate": 2.2627737226277373e-06, "loss": 0.2448, "step": 98565 }, { "epoch": 2.877943387687771, "grad_norm": 0.59237876702837, "learning_rate": 2.2600702892673696e-06, "loss": 0.241, "step": 98570 }, { "epoch": 2.8780893709580884, "grad_norm": 0.595783825864576, "learning_rate": 2.257366855907002e-06, "loss": 0.2463, "step": 98575 }, { "epoch": 2.8782353542284054, "grad_norm": 0.5552949728560228, "learning_rate": 2.2546634225466343e-06, "loss": 0.241, "step": 98580 }, { "epoch": 2.8783813374987224, "grad_norm": 0.5698721408004547, "learning_rate": 2.2519599891862666e-06, "loss": 0.2423, "step": 98585 }, { "epoch": 2.87852732076904, "grad_norm": 0.6140274798159389, "learning_rate": 2.2492565558258993e-06, "loss": 0.239, "step": 98590 }, { "epoch": 2.8786733040393573, "grad_norm": 0.6572709168715258, "learning_rate": 2.2465531224655312e-06, "loss": 0.235, "step": 98595 }, { "epoch": 2.8788192873096743, "grad_norm": 0.5610820342316102, "learning_rate": 2.2438496891051635e-06, "loss": 0.2198, "step": 98600 }, { "epoch": 2.8789652705799913, "grad_norm": 0.5708458067269484, "learning_rate": 2.241146255744796e-06, "loss": 0.2403, "step": 98605 }, { "epoch": 2.8791112538503087, "grad_norm": 0.5784844302953975, "learning_rate": 2.238442822384428e-06, "loss": 0.2499, "step": 98610 }, { "epoch": 2.879257237120626, "grad_norm": 0.6091859494237901, "learning_rate": 2.235739389024061e-06, "loss": 0.2474, "step": 98615 }, { "epoch": 2.879403220390943, "grad_norm": 0.5820399681781153, "learning_rate": 2.233035955663693e-06, "loss": 0.2458, "step": 98620 }, { "epoch": 2.87954920366126, "grad_norm": 0.5794005512387488, "learning_rate": 2.2303325223033255e-06, "loss": 0.2412, "step": 98625 }, { "epoch": 2.8796951869315777, "grad_norm": 0.5980112510852159, "learning_rate": 2.227629088942958e-06, "loss": 0.2398, "step": 98630 }, { "epoch": 2.879841170201895, "grad_norm": 0.6994039502615496, "learning_rate": 2.2249256555825898e-06, "loss": 0.245, "step": 98635 }, { "epoch": 2.879987153472212, "grad_norm": 0.608689834709318, "learning_rate": 2.2222222222222225e-06, "loss": 0.2367, "step": 98640 }, { "epoch": 2.880133136742529, "grad_norm": 0.5685605638644805, "learning_rate": 2.2195187888618544e-06, "loss": 0.2358, "step": 98645 }, { "epoch": 2.8802791200128466, "grad_norm": 0.6094091981818083, "learning_rate": 2.216815355501487e-06, "loss": 0.2304, "step": 98650 }, { "epoch": 2.8804251032831636, "grad_norm": 0.5695503237150352, "learning_rate": 2.2141119221411194e-06, "loss": 0.2281, "step": 98655 }, { "epoch": 2.880571086553481, "grad_norm": 0.6313540274293772, "learning_rate": 2.2114084887807513e-06, "loss": 0.2495, "step": 98660 }, { "epoch": 2.880717069823798, "grad_norm": 0.5845985783329443, "learning_rate": 2.208705055420384e-06, "loss": 0.2599, "step": 98665 }, { "epoch": 2.8808630530941155, "grad_norm": 0.5835825162847731, "learning_rate": 2.2060016220600164e-06, "loss": 0.2531, "step": 98670 }, { "epoch": 2.8810090363644325, "grad_norm": 0.6127704180383121, "learning_rate": 2.2032981886996487e-06, "loss": 0.2478, "step": 98675 }, { "epoch": 2.88115501963475, "grad_norm": 0.575796733973035, "learning_rate": 2.200594755339281e-06, "loss": 0.2362, "step": 98680 }, { "epoch": 2.881301002905067, "grad_norm": 0.5693274610352419, "learning_rate": 2.1978913219789134e-06, "loss": 0.2361, "step": 98685 }, { "epoch": 2.8814469861753844, "grad_norm": 0.6078611624552132, "learning_rate": 2.1951878886185457e-06, "loss": 0.2435, "step": 98690 }, { "epoch": 2.8815929694457014, "grad_norm": 0.5617032976991087, "learning_rate": 2.192484455258178e-06, "loss": 0.2401, "step": 98695 }, { "epoch": 2.881738952716019, "grad_norm": 0.5527406916442714, "learning_rate": 2.1897810218978103e-06, "loss": 0.2344, "step": 98700 }, { "epoch": 2.881884935986336, "grad_norm": 0.6071758904280766, "learning_rate": 2.1870775885374426e-06, "loss": 0.2357, "step": 98705 }, { "epoch": 2.8820309192566533, "grad_norm": 0.5995309709538613, "learning_rate": 2.184374155177075e-06, "loss": 0.2397, "step": 98710 }, { "epoch": 2.8821769025269703, "grad_norm": 0.554826909075755, "learning_rate": 2.1816707218167073e-06, "loss": 0.2373, "step": 98715 }, { "epoch": 2.882322885797288, "grad_norm": 0.557593277726422, "learning_rate": 2.1789672884563396e-06, "loss": 0.2415, "step": 98720 }, { "epoch": 2.882468869067605, "grad_norm": 0.564802146749158, "learning_rate": 2.176263855095972e-06, "loss": 0.2546, "step": 98725 }, { "epoch": 2.8826148523379223, "grad_norm": 0.6308059524742283, "learning_rate": 2.173560421735604e-06, "loss": 0.2254, "step": 98730 }, { "epoch": 2.8827608356082393, "grad_norm": 0.5556144951176459, "learning_rate": 2.170856988375237e-06, "loss": 0.2438, "step": 98735 }, { "epoch": 2.8829068188785563, "grad_norm": 0.5949849109620708, "learning_rate": 2.168153555014869e-06, "loss": 0.2271, "step": 98740 }, { "epoch": 2.8830528021488737, "grad_norm": 0.5965472226918523, "learning_rate": 2.165450121654501e-06, "loss": 0.239, "step": 98745 }, { "epoch": 2.883198785419191, "grad_norm": 0.6041643097433064, "learning_rate": 2.162746688294134e-06, "loss": 0.2271, "step": 98750 }, { "epoch": 2.883344768689508, "grad_norm": 0.6039761820444552, "learning_rate": 2.160043254933766e-06, "loss": 0.2298, "step": 98755 }, { "epoch": 2.883490751959825, "grad_norm": 0.5977604742964773, "learning_rate": 2.1573398215733985e-06, "loss": 0.2427, "step": 98760 }, { "epoch": 2.8836367352301426, "grad_norm": 0.586653526686934, "learning_rate": 2.1546363882130304e-06, "loss": 0.2216, "step": 98765 }, { "epoch": 2.88378271850046, "grad_norm": 0.6284631016218887, "learning_rate": 2.1519329548526627e-06, "loss": 0.2423, "step": 98770 }, { "epoch": 2.883928701770777, "grad_norm": 0.6567612184573562, "learning_rate": 2.1492295214922955e-06, "loss": 0.2352, "step": 98775 }, { "epoch": 2.884074685041094, "grad_norm": 0.6127264320087802, "learning_rate": 2.1465260881319274e-06, "loss": 0.2372, "step": 98780 }, { "epoch": 2.8842206683114116, "grad_norm": 0.6116392930750679, "learning_rate": 2.14382265477156e-06, "loss": 0.2432, "step": 98785 }, { "epoch": 2.884366651581729, "grad_norm": 0.6049278842296201, "learning_rate": 2.1411192214111924e-06, "loss": 0.2266, "step": 98790 }, { "epoch": 2.884512634852046, "grad_norm": 0.5831700455572218, "learning_rate": 2.1384157880508248e-06, "loss": 0.2367, "step": 98795 }, { "epoch": 2.884658618122363, "grad_norm": 0.5529604426548008, "learning_rate": 2.135712354690457e-06, "loss": 0.2381, "step": 98800 }, { "epoch": 2.8848046013926805, "grad_norm": 0.5817630568304732, "learning_rate": 2.1330089213300894e-06, "loss": 0.2411, "step": 98805 }, { "epoch": 2.8849505846629975, "grad_norm": 0.5871202595576136, "learning_rate": 2.1303054879697217e-06, "loss": 0.2467, "step": 98810 }, { "epoch": 2.885096567933315, "grad_norm": 0.6056184200081537, "learning_rate": 2.127602054609354e-06, "loss": 0.2418, "step": 98815 }, { "epoch": 2.885242551203632, "grad_norm": 0.5923199960369164, "learning_rate": 2.1248986212489863e-06, "loss": 0.2301, "step": 98820 }, { "epoch": 2.8853885344739494, "grad_norm": 0.5725947993458674, "learning_rate": 2.1221951878886187e-06, "loss": 0.243, "step": 98825 }, { "epoch": 2.8855345177442664, "grad_norm": 0.5798509015105492, "learning_rate": 2.119491754528251e-06, "loss": 0.2344, "step": 98830 }, { "epoch": 2.885680501014584, "grad_norm": 0.6039545382596054, "learning_rate": 2.1167883211678833e-06, "loss": 0.2458, "step": 98835 }, { "epoch": 2.885826484284901, "grad_norm": 0.5522309208934785, "learning_rate": 2.1140848878075156e-06, "loss": 0.2477, "step": 98840 }, { "epoch": 2.8859724675552183, "grad_norm": 0.6002590482099923, "learning_rate": 2.111381454447148e-06, "loss": 0.2416, "step": 98845 }, { "epoch": 2.8861184508255353, "grad_norm": 0.5670645693307574, "learning_rate": 2.1086780210867802e-06, "loss": 0.2205, "step": 98850 }, { "epoch": 2.8862644340958528, "grad_norm": 0.5830745471812739, "learning_rate": 2.105974587726413e-06, "loss": 0.2337, "step": 98855 }, { "epoch": 2.8864104173661698, "grad_norm": 0.6365822171595422, "learning_rate": 2.103271154366045e-06, "loss": 0.2439, "step": 98860 }, { "epoch": 2.886556400636487, "grad_norm": 0.6052202024016152, "learning_rate": 2.100567721005677e-06, "loss": 0.2356, "step": 98865 }, { "epoch": 2.8867023839068042, "grad_norm": 0.5527232005324272, "learning_rate": 2.09786428764531e-06, "loss": 0.2246, "step": 98870 }, { "epoch": 2.8868483671771212, "grad_norm": 0.63150191716098, "learning_rate": 2.095160854284942e-06, "loss": 0.247, "step": 98875 }, { "epoch": 2.8869943504474387, "grad_norm": 0.5671327174811409, "learning_rate": 2.0924574209245746e-06, "loss": 0.2492, "step": 98880 }, { "epoch": 2.887140333717756, "grad_norm": 0.5908126092873243, "learning_rate": 2.0897539875642065e-06, "loss": 0.2327, "step": 98885 }, { "epoch": 2.887286316988073, "grad_norm": 0.5609271340077188, "learning_rate": 2.0870505542038388e-06, "loss": 0.2301, "step": 98890 }, { "epoch": 2.88743230025839, "grad_norm": 0.5736061102562864, "learning_rate": 2.0843471208434715e-06, "loss": 0.2341, "step": 98895 }, { "epoch": 2.8875782835287076, "grad_norm": 0.5746467782132605, "learning_rate": 2.0816436874831034e-06, "loss": 0.2417, "step": 98900 }, { "epoch": 2.887724266799025, "grad_norm": 0.6179549460832193, "learning_rate": 2.078940254122736e-06, "loss": 0.2371, "step": 98905 }, { "epoch": 2.887870250069342, "grad_norm": 0.5519319106151502, "learning_rate": 2.0762368207623685e-06, "loss": 0.2391, "step": 98910 }, { "epoch": 2.888016233339659, "grad_norm": 0.5740313004970333, "learning_rate": 2.0735333874020004e-06, "loss": 0.2555, "step": 98915 }, { "epoch": 2.8881622166099765, "grad_norm": 0.5653220519323859, "learning_rate": 2.070829954041633e-06, "loss": 0.2495, "step": 98920 }, { "epoch": 2.888308199880294, "grad_norm": 0.5967273937160391, "learning_rate": 2.068126520681265e-06, "loss": 0.2401, "step": 98925 }, { "epoch": 2.888454183150611, "grad_norm": 0.586598395437427, "learning_rate": 2.0654230873208977e-06, "loss": 0.2285, "step": 98930 }, { "epoch": 2.888600166420928, "grad_norm": 0.5713494239827654, "learning_rate": 2.06271965396053e-06, "loss": 0.2335, "step": 98935 }, { "epoch": 2.8887461496912454, "grad_norm": 0.6562828074932238, "learning_rate": 2.0600162206001624e-06, "loss": 0.2494, "step": 98940 }, { "epoch": 2.8888921329615624, "grad_norm": 0.5843179906762482, "learning_rate": 2.0573127872397947e-06, "loss": 0.237, "step": 98945 }, { "epoch": 2.88903811623188, "grad_norm": 0.6400669794160436, "learning_rate": 2.054609353879427e-06, "loss": 0.2416, "step": 98950 }, { "epoch": 2.889184099502197, "grad_norm": 0.5572465135695944, "learning_rate": 2.0519059205190593e-06, "loss": 0.2399, "step": 98955 }, { "epoch": 2.8893300827725144, "grad_norm": 0.541170048896527, "learning_rate": 2.0492024871586917e-06, "loss": 0.2373, "step": 98960 }, { "epoch": 2.8894760660428314, "grad_norm": 0.626930652013212, "learning_rate": 2.046499053798324e-06, "loss": 0.2591, "step": 98965 }, { "epoch": 2.889622049313149, "grad_norm": 0.5511978570282684, "learning_rate": 2.0437956204379563e-06, "loss": 0.242, "step": 98970 }, { "epoch": 2.889768032583466, "grad_norm": 0.5594542932491468, "learning_rate": 2.0410921870775886e-06, "loss": 0.2372, "step": 98975 }, { "epoch": 2.8899140158537833, "grad_norm": 0.6129819389081212, "learning_rate": 2.038388753717221e-06, "loss": 0.2351, "step": 98980 }, { "epoch": 2.8900599991241003, "grad_norm": 0.6595577619279935, "learning_rate": 2.0356853203568532e-06, "loss": 0.2419, "step": 98985 }, { "epoch": 2.8902059823944177, "grad_norm": 0.5815875207480997, "learning_rate": 2.0329818869964856e-06, "loss": 0.2366, "step": 98990 }, { "epoch": 2.8903519656647347, "grad_norm": 0.596586445250005, "learning_rate": 2.030278453636118e-06, "loss": 0.2465, "step": 98995 }, { "epoch": 2.890497948935052, "grad_norm": 0.5516938188453556, "learning_rate": 2.02757502027575e-06, "loss": 0.2515, "step": 99000 }, { "epoch": 2.890643932205369, "grad_norm": 0.5951650071489355, "learning_rate": 2.0248715869153825e-06, "loss": 0.2469, "step": 99005 }, { "epoch": 2.8907899154756866, "grad_norm": 0.5419484882716408, "learning_rate": 2.022168153555015e-06, "loss": 0.2266, "step": 99010 }, { "epoch": 2.8909358987460037, "grad_norm": 0.5201269793054282, "learning_rate": 2.0194647201946476e-06, "loss": 0.2399, "step": 99015 }, { "epoch": 2.891081882016321, "grad_norm": 0.612742098494818, "learning_rate": 2.0167612868342795e-06, "loss": 0.2401, "step": 99020 }, { "epoch": 2.891227865286638, "grad_norm": 0.6144590071320387, "learning_rate": 2.014057853473912e-06, "loss": 0.2418, "step": 99025 }, { "epoch": 2.891373848556955, "grad_norm": 0.5744632047607493, "learning_rate": 2.0113544201135445e-06, "loss": 0.242, "step": 99030 }, { "epoch": 2.8915198318272726, "grad_norm": 0.6355583772861043, "learning_rate": 2.0086509867531764e-06, "loss": 0.2447, "step": 99035 }, { "epoch": 2.89166581509759, "grad_norm": 0.5457909922541965, "learning_rate": 2.005947553392809e-06, "loss": 0.2408, "step": 99040 }, { "epoch": 2.891811798367907, "grad_norm": 0.6057456588018986, "learning_rate": 2.003244120032441e-06, "loss": 0.2455, "step": 99045 }, { "epoch": 2.891957781638224, "grad_norm": 0.5868111872346305, "learning_rate": 2.0005406866720738e-06, "loss": 0.2444, "step": 99050 }, { "epoch": 2.8921037649085415, "grad_norm": 0.5931337408591421, "learning_rate": 1.997837253311706e-06, "loss": 0.2402, "step": 99055 }, { "epoch": 2.892249748178859, "grad_norm": 0.6032091054016915, "learning_rate": 1.995133819951338e-06, "loss": 0.2479, "step": 99060 }, { "epoch": 2.892395731449176, "grad_norm": 0.5913461136737773, "learning_rate": 1.9924303865909707e-06, "loss": 0.2439, "step": 99065 }, { "epoch": 2.892541714719493, "grad_norm": 0.5642599408728343, "learning_rate": 1.989726953230603e-06, "loss": 0.2361, "step": 99070 }, { "epoch": 2.8926876979898104, "grad_norm": 0.567020010917857, "learning_rate": 1.9870235198702354e-06, "loss": 0.2416, "step": 99075 }, { "epoch": 2.892833681260128, "grad_norm": 0.5732789037251613, "learning_rate": 1.9843200865098677e-06, "loss": 0.2321, "step": 99080 }, { "epoch": 2.892979664530445, "grad_norm": 0.5481994369699837, "learning_rate": 1.9816166531494996e-06, "loss": 0.2542, "step": 99085 }, { "epoch": 2.893125647800762, "grad_norm": 0.6179539458421236, "learning_rate": 1.9789132197891323e-06, "loss": 0.2483, "step": 99090 }, { "epoch": 2.8932716310710793, "grad_norm": 0.5580136449974455, "learning_rate": 1.9762097864287646e-06, "loss": 0.2432, "step": 99095 }, { "epoch": 2.8934176143413963, "grad_norm": 0.6295220226230641, "learning_rate": 1.973506353068397e-06, "loss": 0.2398, "step": 99100 }, { "epoch": 2.893563597611714, "grad_norm": 0.5993902844812835, "learning_rate": 1.9708029197080293e-06, "loss": 0.2352, "step": 99105 }, { "epoch": 2.893709580882031, "grad_norm": 0.5444684477449836, "learning_rate": 1.9680994863476616e-06, "loss": 0.2342, "step": 99110 }, { "epoch": 2.8938555641523482, "grad_norm": 0.6177051204438732, "learning_rate": 1.965396052987294e-06, "loss": 0.2518, "step": 99115 }, { "epoch": 2.8940015474226652, "grad_norm": 0.6020756761863247, "learning_rate": 1.9626926196269262e-06, "loss": 0.2345, "step": 99120 }, { "epoch": 2.8941475306929827, "grad_norm": 0.580152718652534, "learning_rate": 1.9599891862665585e-06, "loss": 0.2383, "step": 99125 }, { "epoch": 2.8942935139632997, "grad_norm": 0.5477314662882565, "learning_rate": 1.957285752906191e-06, "loss": 0.2312, "step": 99130 }, { "epoch": 2.894439497233617, "grad_norm": 0.622323534698714, "learning_rate": 1.9545823195458236e-06, "loss": 0.2412, "step": 99135 }, { "epoch": 2.894585480503934, "grad_norm": 0.5622218302666578, "learning_rate": 1.9518788861854555e-06, "loss": 0.2594, "step": 99140 }, { "epoch": 2.8947314637742516, "grad_norm": 0.6260944294844955, "learning_rate": 1.949175452825088e-06, "loss": 0.2433, "step": 99145 }, { "epoch": 2.8948774470445686, "grad_norm": 0.6355055666621616, "learning_rate": 1.94647201946472e-06, "loss": 0.2438, "step": 99150 }, { "epoch": 2.895023430314886, "grad_norm": 0.5978952765305279, "learning_rate": 1.9437685861043524e-06, "loss": 0.2377, "step": 99155 }, { "epoch": 2.895169413585203, "grad_norm": 0.608604729157683, "learning_rate": 1.941065152743985e-06, "loss": 0.2569, "step": 99160 }, { "epoch": 2.89531539685552, "grad_norm": 0.591080349050024, "learning_rate": 1.938361719383617e-06, "loss": 0.2339, "step": 99165 }, { "epoch": 2.8954613801258375, "grad_norm": 0.6323786184858872, "learning_rate": 1.93565828602325e-06, "loss": 0.2483, "step": 99170 }, { "epoch": 2.895607363396155, "grad_norm": 0.5549005520518059, "learning_rate": 1.932954852662882e-06, "loss": 0.2349, "step": 99175 }, { "epoch": 2.895753346666472, "grad_norm": 0.6053744114519977, "learning_rate": 1.930251419302514e-06, "loss": 0.254, "step": 99180 }, { "epoch": 2.895899329936789, "grad_norm": 0.5701214315598631, "learning_rate": 1.9275479859421468e-06, "loss": 0.2466, "step": 99185 }, { "epoch": 2.8960453132071065, "grad_norm": 0.5910462657972506, "learning_rate": 1.924844552581779e-06, "loss": 0.2378, "step": 99190 }, { "epoch": 2.896191296477424, "grad_norm": 0.5820049755610345, "learning_rate": 1.9221411192214114e-06, "loss": 0.2487, "step": 99195 }, { "epoch": 2.896337279747741, "grad_norm": 0.607110454118239, "learning_rate": 1.9194376858610437e-06, "loss": 0.2393, "step": 99200 }, { "epoch": 2.896483263018058, "grad_norm": 0.6124914740422728, "learning_rate": 1.9167342525006756e-06, "loss": 0.2242, "step": 99205 }, { "epoch": 2.8966292462883754, "grad_norm": 0.5728339571541566, "learning_rate": 1.9140308191403084e-06, "loss": 0.2418, "step": 99210 }, { "epoch": 2.896775229558693, "grad_norm": 0.5891896432448331, "learning_rate": 1.9113273857799407e-06, "loss": 0.2431, "step": 99215 }, { "epoch": 2.89692121282901, "grad_norm": 0.5738300059438106, "learning_rate": 1.908623952419573e-06, "loss": 0.2427, "step": 99220 }, { "epoch": 2.897067196099327, "grad_norm": 0.5891506585682054, "learning_rate": 1.9059205190592053e-06, "loss": 0.2368, "step": 99225 }, { "epoch": 2.8972131793696443, "grad_norm": 0.6299689041899741, "learning_rate": 1.9032170856988374e-06, "loss": 0.2376, "step": 99230 }, { "epoch": 2.8973591626399613, "grad_norm": 0.5405939493788454, "learning_rate": 1.90051365233847e-06, "loss": 0.2316, "step": 99235 }, { "epoch": 2.8975051459102787, "grad_norm": 0.6153704182145986, "learning_rate": 1.8978102189781023e-06, "loss": 0.2317, "step": 99240 }, { "epoch": 2.8976511291805958, "grad_norm": 0.5990394731004117, "learning_rate": 1.8951067856177348e-06, "loss": 0.2409, "step": 99245 }, { "epoch": 2.897797112450913, "grad_norm": 0.5807613876028204, "learning_rate": 1.892403352257367e-06, "loss": 0.2259, "step": 99250 }, { "epoch": 2.89794309572123, "grad_norm": 0.5717104302243436, "learning_rate": 1.8896999188969994e-06, "loss": 0.2421, "step": 99255 }, { "epoch": 2.8980890789915477, "grad_norm": 0.6424726488998114, "learning_rate": 1.8869964855366315e-06, "loss": 0.2529, "step": 99260 }, { "epoch": 2.8982350622618647, "grad_norm": 0.560604742263403, "learning_rate": 1.8842930521762639e-06, "loss": 0.2292, "step": 99265 }, { "epoch": 2.898381045532182, "grad_norm": 0.6321343727589191, "learning_rate": 1.8815896188158964e-06, "loss": 0.2453, "step": 99270 }, { "epoch": 2.898527028802499, "grad_norm": 0.5671759796751819, "learning_rate": 1.8788861854555285e-06, "loss": 0.2608, "step": 99275 }, { "epoch": 2.8986730120728166, "grad_norm": 0.5634355317099379, "learning_rate": 1.876182752095161e-06, "loss": 0.2338, "step": 99280 }, { "epoch": 2.8988189953431336, "grad_norm": 0.6143722138025732, "learning_rate": 1.8734793187347933e-06, "loss": 0.2497, "step": 99285 }, { "epoch": 2.898964978613451, "grad_norm": 0.605824497621311, "learning_rate": 1.8707758853744254e-06, "loss": 0.2556, "step": 99290 }, { "epoch": 2.899110961883768, "grad_norm": 0.5788482495905615, "learning_rate": 1.868072452014058e-06, "loss": 0.2545, "step": 99295 }, { "epoch": 2.8992569451540855, "grad_norm": 0.519702624685101, "learning_rate": 1.8653690186536903e-06, "loss": 0.2304, "step": 99300 }, { "epoch": 2.8994029284244025, "grad_norm": 0.6077209342696009, "learning_rate": 1.8626655852933228e-06, "loss": 0.2576, "step": 99305 }, { "epoch": 2.89954891169472, "grad_norm": 0.5852128871909026, "learning_rate": 1.859962151932955e-06, "loss": 0.24, "step": 99310 }, { "epoch": 2.899694894965037, "grad_norm": 0.6229182701766213, "learning_rate": 1.857258718572587e-06, "loss": 0.2498, "step": 99315 }, { "epoch": 2.899840878235354, "grad_norm": 0.6361326447516552, "learning_rate": 1.8545552852122196e-06, "loss": 0.2478, "step": 99320 }, { "epoch": 2.8999868615056714, "grad_norm": 0.6052144328745178, "learning_rate": 1.8518518518518519e-06, "loss": 0.238, "step": 99325 }, { "epoch": 2.900132844775989, "grad_norm": 0.5726090441166733, "learning_rate": 1.8491484184914844e-06, "loss": 0.2367, "step": 99330 }, { "epoch": 2.900278828046306, "grad_norm": 0.5871499778914095, "learning_rate": 1.8464449851311165e-06, "loss": 0.2498, "step": 99335 }, { "epoch": 2.900424811316623, "grad_norm": 0.5953207618963183, "learning_rate": 1.843741551770749e-06, "loss": 0.2383, "step": 99340 }, { "epoch": 2.9005707945869403, "grad_norm": 0.5993454473872538, "learning_rate": 1.8410381184103814e-06, "loss": 0.2478, "step": 99345 }, { "epoch": 2.900716777857258, "grad_norm": 0.48797913605808196, "learning_rate": 1.8383346850500135e-06, "loss": 0.2393, "step": 99350 }, { "epoch": 2.900862761127575, "grad_norm": 0.6148769183305087, "learning_rate": 1.835631251689646e-06, "loss": 0.2382, "step": 99355 }, { "epoch": 2.901008744397892, "grad_norm": 0.6367655063879312, "learning_rate": 1.8329278183292783e-06, "loss": 0.2441, "step": 99360 }, { "epoch": 2.9011547276682093, "grad_norm": 0.6464915373948286, "learning_rate": 1.8302243849689108e-06, "loss": 0.237, "step": 99365 }, { "epoch": 2.9013007109385267, "grad_norm": 0.5723827422698899, "learning_rate": 1.827520951608543e-06, "loss": 0.2386, "step": 99370 }, { "epoch": 2.9014466942088437, "grad_norm": 0.5988427639974192, "learning_rate": 1.824817518248175e-06, "loss": 0.2359, "step": 99375 }, { "epoch": 2.9015926774791607, "grad_norm": 0.5614241505859967, "learning_rate": 1.8221140848878076e-06, "loss": 0.2399, "step": 99380 }, { "epoch": 2.901738660749478, "grad_norm": 0.5948715892388473, "learning_rate": 1.8194106515274399e-06, "loss": 0.233, "step": 99385 }, { "epoch": 2.901884644019795, "grad_norm": 0.6016311558928785, "learning_rate": 1.8167072181670724e-06, "loss": 0.2477, "step": 99390 }, { "epoch": 2.9020306272901126, "grad_norm": 0.6027830878935818, "learning_rate": 1.8140037848067045e-06, "loss": 0.2362, "step": 99395 }, { "epoch": 2.9021766105604296, "grad_norm": 0.5792569721266618, "learning_rate": 1.8113003514463368e-06, "loss": 0.2352, "step": 99400 }, { "epoch": 2.902322593830747, "grad_norm": 0.5852456414704016, "learning_rate": 1.8085969180859694e-06, "loss": 0.2345, "step": 99405 }, { "epoch": 2.902468577101064, "grad_norm": 0.6136100487037104, "learning_rate": 1.8058934847256015e-06, "loss": 0.2515, "step": 99410 }, { "epoch": 2.9026145603713815, "grad_norm": 0.6391310644247233, "learning_rate": 1.803190051365234e-06, "loss": 0.2429, "step": 99415 }, { "epoch": 2.9027605436416986, "grad_norm": 0.57816480942088, "learning_rate": 1.8004866180048663e-06, "loss": 0.2442, "step": 99420 }, { "epoch": 2.902906526912016, "grad_norm": 0.5740657043702058, "learning_rate": 1.7977831846444989e-06, "loss": 0.2279, "step": 99425 }, { "epoch": 2.903052510182333, "grad_norm": 0.6142509218711047, "learning_rate": 1.795079751284131e-06, "loss": 0.244, "step": 99430 }, { "epoch": 2.9031984934526505, "grad_norm": 0.5599204608446595, "learning_rate": 1.792376317923763e-06, "loss": 0.2349, "step": 99435 }, { "epoch": 2.9033444767229675, "grad_norm": 0.528430735443558, "learning_rate": 1.7896728845633956e-06, "loss": 0.2234, "step": 99440 }, { "epoch": 2.903490459993285, "grad_norm": 0.5963135768866691, "learning_rate": 1.786969451203028e-06, "loss": 0.2481, "step": 99445 }, { "epoch": 2.903636443263602, "grad_norm": 0.5931444284559007, "learning_rate": 1.7842660178426604e-06, "loss": 0.2472, "step": 99450 }, { "epoch": 2.903782426533919, "grad_norm": 0.5354990725986422, "learning_rate": 1.7815625844822925e-06, "loss": 0.2339, "step": 99455 }, { "epoch": 2.9039284098042364, "grad_norm": 0.5844004542046384, "learning_rate": 1.7788591511219249e-06, "loss": 0.2438, "step": 99460 }, { "epoch": 2.904074393074554, "grad_norm": 0.5649612673669412, "learning_rate": 1.7761557177615574e-06, "loss": 0.2343, "step": 99465 }, { "epoch": 2.904220376344871, "grad_norm": 0.5613034515962912, "learning_rate": 1.7734522844011895e-06, "loss": 0.2319, "step": 99470 }, { "epoch": 2.904366359615188, "grad_norm": 0.6221510908854614, "learning_rate": 1.770748851040822e-06, "loss": 0.2371, "step": 99475 }, { "epoch": 2.9045123428855053, "grad_norm": 0.6083053195678836, "learning_rate": 1.7680454176804541e-06, "loss": 0.2308, "step": 99480 }, { "epoch": 2.9046583261558228, "grad_norm": 0.5791737098831135, "learning_rate": 1.7653419843200869e-06, "loss": 0.2337, "step": 99485 }, { "epoch": 2.9048043094261398, "grad_norm": 0.5717224662693414, "learning_rate": 1.762638550959719e-06, "loss": 0.2377, "step": 99490 }, { "epoch": 2.9049502926964568, "grad_norm": 0.6037050568439276, "learning_rate": 1.759935117599351e-06, "loss": 0.235, "step": 99495 }, { "epoch": 2.905096275966774, "grad_norm": 0.6218389508663598, "learning_rate": 1.7572316842389836e-06, "loss": 0.2557, "step": 99500 }, { "epoch": 2.9052422592370917, "grad_norm": 0.6450964307407496, "learning_rate": 1.754528250878616e-06, "loss": 0.2405, "step": 99505 }, { "epoch": 2.9053882425074087, "grad_norm": 0.6344364557815669, "learning_rate": 1.7518248175182485e-06, "loss": 0.2375, "step": 99510 }, { "epoch": 2.9055342257777257, "grad_norm": 0.6134467313092601, "learning_rate": 1.7491213841578806e-06, "loss": 0.2326, "step": 99515 }, { "epoch": 2.905680209048043, "grad_norm": 0.5718125858547644, "learning_rate": 1.7464179507975129e-06, "loss": 0.2316, "step": 99520 }, { "epoch": 2.9058261923183606, "grad_norm": 0.6310669399481956, "learning_rate": 1.7437145174371454e-06, "loss": 0.2336, "step": 99525 }, { "epoch": 2.9059721755886776, "grad_norm": 0.5480969572159575, "learning_rate": 1.7410110840767775e-06, "loss": 0.2239, "step": 99530 }, { "epoch": 2.9061181588589946, "grad_norm": 0.6220530673007185, "learning_rate": 1.73830765071641e-06, "loss": 0.2515, "step": 99535 }, { "epoch": 2.906264142129312, "grad_norm": 0.6171566943360629, "learning_rate": 1.7356042173560421e-06, "loss": 0.2452, "step": 99540 }, { "epoch": 2.906410125399629, "grad_norm": 0.5848105752577898, "learning_rate": 1.7329007839956745e-06, "loss": 0.2519, "step": 99545 }, { "epoch": 2.9065561086699465, "grad_norm": 0.5968717169238821, "learning_rate": 1.730197350635307e-06, "loss": 0.2344, "step": 99550 }, { "epoch": 2.9067020919402635, "grad_norm": 0.5951407706806906, "learning_rate": 1.727493917274939e-06, "loss": 0.2174, "step": 99555 }, { "epoch": 2.906848075210581, "grad_norm": 0.5925672332101517, "learning_rate": 1.7247904839145716e-06, "loss": 0.2199, "step": 99560 }, { "epoch": 2.906994058480898, "grad_norm": 0.6606904608414179, "learning_rate": 1.722087050554204e-06, "loss": 0.2579, "step": 99565 }, { "epoch": 2.9071400417512154, "grad_norm": 0.5742547546805098, "learning_rate": 1.7193836171938365e-06, "loss": 0.2318, "step": 99570 }, { "epoch": 2.9072860250215324, "grad_norm": 0.5988571731482959, "learning_rate": 1.7166801838334686e-06, "loss": 0.2348, "step": 99575 }, { "epoch": 2.90743200829185, "grad_norm": 0.6153705736320473, "learning_rate": 1.713976750473101e-06, "loss": 0.248, "step": 99580 }, { "epoch": 2.907577991562167, "grad_norm": 0.6067018215471816, "learning_rate": 1.7112733171127334e-06, "loss": 0.2356, "step": 99585 }, { "epoch": 2.9077239748324843, "grad_norm": 0.6323464226758109, "learning_rate": 1.7085698837523655e-06, "loss": 0.2411, "step": 99590 }, { "epoch": 2.9078699581028014, "grad_norm": 0.6046594463769733, "learning_rate": 1.705866450391998e-06, "loss": 0.2438, "step": 99595 }, { "epoch": 2.908015941373119, "grad_norm": 0.6098963860318933, "learning_rate": 1.7031630170316302e-06, "loss": 0.2412, "step": 99600 }, { "epoch": 2.908161924643436, "grad_norm": 6.519390508188121, "learning_rate": 1.7004595836712625e-06, "loss": 0.2867, "step": 99605 }, { "epoch": 2.908307907913753, "grad_norm": 0.5669928652177222, "learning_rate": 1.697756150310895e-06, "loss": 0.2287, "step": 99610 }, { "epoch": 2.9084538911840703, "grad_norm": 0.596697178520918, "learning_rate": 1.6950527169505271e-06, "loss": 0.2396, "step": 99615 }, { "epoch": 2.9085998744543877, "grad_norm": 0.6222180666124694, "learning_rate": 1.6923492835901596e-06, "loss": 0.238, "step": 99620 }, { "epoch": 2.9087458577247047, "grad_norm": 0.6128228872839179, "learning_rate": 1.689645850229792e-06, "loss": 0.2615, "step": 99625 }, { "epoch": 2.9088918409950217, "grad_norm": 0.633065620215068, "learning_rate": 1.686942416869424e-06, "loss": 0.2528, "step": 99630 }, { "epoch": 2.909037824265339, "grad_norm": 0.605893006459529, "learning_rate": 1.6842389835090566e-06, "loss": 0.2447, "step": 99635 }, { "epoch": 2.9091838075356566, "grad_norm": 0.5984159904749237, "learning_rate": 1.681535550148689e-06, "loss": 0.2382, "step": 99640 }, { "epoch": 2.9093297908059736, "grad_norm": 0.614337841482015, "learning_rate": 1.6788321167883214e-06, "loss": 0.2428, "step": 99645 }, { "epoch": 2.9094757740762907, "grad_norm": 0.578571599492148, "learning_rate": 1.6761286834279536e-06, "loss": 0.2407, "step": 99650 }, { "epoch": 2.909621757346608, "grad_norm": 0.5894300767110453, "learning_rate": 1.673425250067586e-06, "loss": 0.2448, "step": 99655 }, { "epoch": 2.9097677406169256, "grad_norm": 0.5836755526867523, "learning_rate": 1.6707218167072182e-06, "loss": 0.2343, "step": 99660 }, { "epoch": 2.9099137238872426, "grad_norm": 0.6017604939862369, "learning_rate": 1.6680183833468505e-06, "loss": 0.2338, "step": 99665 }, { "epoch": 2.9100597071575596, "grad_norm": 0.6346738182644067, "learning_rate": 1.665314949986483e-06, "loss": 0.2492, "step": 99670 }, { "epoch": 2.910205690427877, "grad_norm": 0.6110208409599719, "learning_rate": 1.6626115166261151e-06, "loss": 0.2306, "step": 99675 }, { "epoch": 2.910351673698194, "grad_norm": 0.6245904894153885, "learning_rate": 1.6599080832657477e-06, "loss": 0.2406, "step": 99680 }, { "epoch": 2.9104976569685115, "grad_norm": 0.5833304278652124, "learning_rate": 1.65720464990538e-06, "loss": 0.226, "step": 99685 }, { "epoch": 2.9106436402388285, "grad_norm": 0.6018290858464199, "learning_rate": 1.654501216545012e-06, "loss": 0.2301, "step": 99690 }, { "epoch": 2.910789623509146, "grad_norm": 0.6233892112051913, "learning_rate": 1.6517977831846446e-06, "loss": 0.2333, "step": 99695 }, { "epoch": 2.910935606779463, "grad_norm": 0.5639914531364311, "learning_rate": 1.6490943498242767e-06, "loss": 0.2298, "step": 99700 }, { "epoch": 2.9110815900497804, "grad_norm": 0.5583583535640858, "learning_rate": 1.6463909164639095e-06, "loss": 0.2443, "step": 99705 }, { "epoch": 2.9112275733200974, "grad_norm": 0.5787820554343949, "learning_rate": 1.6436874831035416e-06, "loss": 0.2405, "step": 99710 }, { "epoch": 2.911373556590415, "grad_norm": 0.6247885853610764, "learning_rate": 1.6409840497431737e-06, "loss": 0.2511, "step": 99715 }, { "epoch": 2.911519539860732, "grad_norm": 0.5726027115458984, "learning_rate": 1.6382806163828062e-06, "loss": 0.2406, "step": 99720 }, { "epoch": 2.9116655231310493, "grad_norm": 0.5968875014964637, "learning_rate": 1.6355771830224385e-06, "loss": 0.2267, "step": 99725 }, { "epoch": 2.9118115064013663, "grad_norm": 0.5690948681467706, "learning_rate": 1.632873749662071e-06, "loss": 0.2343, "step": 99730 }, { "epoch": 2.9119574896716838, "grad_norm": 0.6046771650927946, "learning_rate": 1.6301703163017032e-06, "loss": 0.2574, "step": 99735 }, { "epoch": 2.912103472942001, "grad_norm": 0.5776921142930241, "learning_rate": 1.6274668829413357e-06, "loss": 0.2311, "step": 99740 }, { "epoch": 2.9122494562123182, "grad_norm": 0.6233668901547518, "learning_rate": 1.624763449580968e-06, "loss": 0.243, "step": 99745 }, { "epoch": 2.9123954394826352, "grad_norm": 0.6019849775099351, "learning_rate": 1.6220600162206001e-06, "loss": 0.2493, "step": 99750 }, { "epoch": 2.9125414227529527, "grad_norm": 0.523167375847193, "learning_rate": 1.6193565828602326e-06, "loss": 0.2321, "step": 99755 }, { "epoch": 2.9126874060232697, "grad_norm": 0.6246189176487043, "learning_rate": 1.6166531494998647e-06, "loss": 0.246, "step": 99760 }, { "epoch": 2.9128333892935867, "grad_norm": 0.5887907015532048, "learning_rate": 1.6139497161394973e-06, "loss": 0.233, "step": 99765 }, { "epoch": 2.912979372563904, "grad_norm": 0.5974119573187424, "learning_rate": 1.6112462827791296e-06, "loss": 0.2416, "step": 99770 }, { "epoch": 2.9131253558342216, "grad_norm": 0.5995159305713248, "learning_rate": 1.6085428494187617e-06, "loss": 0.2352, "step": 99775 }, { "epoch": 2.9132713391045386, "grad_norm": 0.5804392805480709, "learning_rate": 1.6058394160583942e-06, "loss": 0.2438, "step": 99780 }, { "epoch": 2.9134173223748556, "grad_norm": 0.5946134520339037, "learning_rate": 1.6031359826980265e-06, "loss": 0.2397, "step": 99785 }, { "epoch": 2.913563305645173, "grad_norm": 0.6325835717534379, "learning_rate": 1.600432549337659e-06, "loss": 0.2332, "step": 99790 }, { "epoch": 2.9137092889154905, "grad_norm": 0.5420082282336591, "learning_rate": 1.5977291159772912e-06, "loss": 0.2318, "step": 99795 }, { "epoch": 2.9138552721858075, "grad_norm": 0.5843439416101598, "learning_rate": 1.5950256826169237e-06, "loss": 0.2289, "step": 99800 }, { "epoch": 2.9140012554561245, "grad_norm": 0.5745516151550764, "learning_rate": 1.592322249256556e-06, "loss": 0.2511, "step": 99805 }, { "epoch": 2.914147238726442, "grad_norm": 0.599181765459627, "learning_rate": 1.5896188158961881e-06, "loss": 0.2328, "step": 99810 }, { "epoch": 2.9142932219967594, "grad_norm": 0.5895564744459462, "learning_rate": 1.5869153825358207e-06, "loss": 0.2295, "step": 99815 }, { "epoch": 2.9144392052670764, "grad_norm": 0.6097605044843417, "learning_rate": 1.5842119491754528e-06, "loss": 0.2356, "step": 99820 }, { "epoch": 2.9145851885373935, "grad_norm": 0.6009901133026998, "learning_rate": 1.5815085158150853e-06, "loss": 0.2558, "step": 99825 }, { "epoch": 2.914731171807711, "grad_norm": 0.6232223489569793, "learning_rate": 1.5788050824547176e-06, "loss": 0.2376, "step": 99830 }, { "epoch": 2.914877155078028, "grad_norm": 0.5481623122187007, "learning_rate": 1.5761016490943497e-06, "loss": 0.2358, "step": 99835 }, { "epoch": 2.9150231383483454, "grad_norm": 0.6448164028256073, "learning_rate": 1.5733982157339822e-06, "loss": 0.2326, "step": 99840 }, { "epoch": 2.9151691216186624, "grad_norm": 0.5772135008502339, "learning_rate": 1.5706947823736146e-06, "loss": 0.2316, "step": 99845 }, { "epoch": 2.91531510488898, "grad_norm": 0.6491517520474971, "learning_rate": 1.567991349013247e-06, "loss": 0.2516, "step": 99850 }, { "epoch": 2.915461088159297, "grad_norm": 0.6247047409886171, "learning_rate": 1.5652879156528792e-06, "loss": 0.2401, "step": 99855 }, { "epoch": 2.9156070714296143, "grad_norm": 0.55569383821035, "learning_rate": 1.5625844822925115e-06, "loss": 0.2341, "step": 99860 }, { "epoch": 2.9157530546999313, "grad_norm": 0.5713968969989199, "learning_rate": 1.559881048932144e-06, "loss": 0.2376, "step": 99865 }, { "epoch": 2.9158990379702487, "grad_norm": 0.6394900308352424, "learning_rate": 1.5571776155717764e-06, "loss": 0.2328, "step": 99870 }, { "epoch": 2.9160450212405657, "grad_norm": 0.5634174618515038, "learning_rate": 1.5544741822114085e-06, "loss": 0.2377, "step": 99875 }, { "epoch": 2.916191004510883, "grad_norm": 0.5883732944767448, "learning_rate": 1.5517707488510408e-06, "loss": 0.2513, "step": 99880 }, { "epoch": 2.9163369877812, "grad_norm": 0.6403733606940402, "learning_rate": 1.5490673154906733e-06, "loss": 0.2483, "step": 99885 }, { "epoch": 2.9164829710515177, "grad_norm": 0.6026071650027492, "learning_rate": 1.5463638821303056e-06, "loss": 0.2453, "step": 99890 }, { "epoch": 2.9166289543218347, "grad_norm": 0.5978428043565559, "learning_rate": 1.543660448769938e-06, "loss": 0.2512, "step": 99895 }, { "epoch": 2.9167749375921517, "grad_norm": 0.5993303455761072, "learning_rate": 1.5409570154095703e-06, "loss": 0.2528, "step": 99900 }, { "epoch": 2.916920920862469, "grad_norm": 0.5832403231417606, "learning_rate": 1.5382535820492026e-06, "loss": 0.2473, "step": 99905 }, { "epoch": 2.9170669041327866, "grad_norm": 0.6359040049683402, "learning_rate": 1.535550148688835e-06, "loss": 0.2483, "step": 99910 }, { "epoch": 2.9172128874031036, "grad_norm": 0.6063850932172251, "learning_rate": 1.5328467153284672e-06, "loss": 0.2456, "step": 99915 }, { "epoch": 2.9173588706734206, "grad_norm": 0.6417526377497182, "learning_rate": 1.5301432819680995e-06, "loss": 0.2348, "step": 99920 }, { "epoch": 2.917504853943738, "grad_norm": 0.5828872803688329, "learning_rate": 1.527439848607732e-06, "loss": 0.2475, "step": 99925 }, { "epoch": 2.9176508372140555, "grad_norm": 0.6046132534945573, "learning_rate": 1.5247364152473642e-06, "loss": 0.2254, "step": 99930 }, { "epoch": 2.9177968204843725, "grad_norm": 0.5489207532355, "learning_rate": 1.5220329818869965e-06, "loss": 0.2244, "step": 99935 }, { "epoch": 2.9179428037546895, "grad_norm": 0.6132597412967965, "learning_rate": 1.5193295485266288e-06, "loss": 0.2296, "step": 99940 }, { "epoch": 2.918088787025007, "grad_norm": 0.5802650956942743, "learning_rate": 1.5166261151662613e-06, "loss": 0.2386, "step": 99945 }, { "epoch": 2.9182347702953244, "grad_norm": 0.5878565970847406, "learning_rate": 1.5139226818058936e-06, "loss": 0.2473, "step": 99950 }, { "epoch": 2.9183807535656414, "grad_norm": 0.6235846250945427, "learning_rate": 1.511219248445526e-06, "loss": 0.25, "step": 99955 }, { "epoch": 2.9185267368359584, "grad_norm": 0.53374685000616, "learning_rate": 1.508515815085158e-06, "loss": 0.243, "step": 99960 }, { "epoch": 2.918672720106276, "grad_norm": 0.5724456818159757, "learning_rate": 1.5058123817247906e-06, "loss": 0.2254, "step": 99965 }, { "epoch": 2.918818703376593, "grad_norm": 0.5493237905569472, "learning_rate": 1.503108948364423e-06, "loss": 0.2467, "step": 99970 }, { "epoch": 2.9189646866469103, "grad_norm": 0.5541388048981106, "learning_rate": 1.5004055150040552e-06, "loss": 0.2285, "step": 99975 }, { "epoch": 2.9191106699172273, "grad_norm": 0.6165948843449099, "learning_rate": 1.4977020816436876e-06, "loss": 0.2559, "step": 99980 }, { "epoch": 2.919256653187545, "grad_norm": 0.5969180955602932, "learning_rate": 1.4949986482833199e-06, "loss": 0.2436, "step": 99985 }, { "epoch": 2.919402636457862, "grad_norm": 0.5878513250596203, "learning_rate": 1.4922952149229522e-06, "loss": 0.2341, "step": 99990 }, { "epoch": 2.9195486197281793, "grad_norm": 0.5469460849661989, "learning_rate": 1.4895917815625845e-06, "loss": 0.2178, "step": 99995 }, { "epoch": 2.9196946029984963, "grad_norm": 0.6106912794578281, "learning_rate": 1.4868883482022168e-06, "loss": 0.237, "step": 100000 }, { "epoch": 2.9198405862688137, "grad_norm": 0.5864910614432461, "learning_rate": 1.4841849148418493e-06, "loss": 0.2484, "step": 100005 }, { "epoch": 2.9199865695391307, "grad_norm": 0.5539065404984442, "learning_rate": 1.4814814814814817e-06, "loss": 0.2399, "step": 100010 }, { "epoch": 2.920132552809448, "grad_norm": 0.567969285092774, "learning_rate": 1.4787780481211138e-06, "loss": 0.2421, "step": 100015 }, { "epoch": 2.920278536079765, "grad_norm": 0.5915125594579009, "learning_rate": 1.476074614760746e-06, "loss": 0.2445, "step": 100020 }, { "epoch": 2.9204245193500826, "grad_norm": 0.6043527051582904, "learning_rate": 1.4733711814003786e-06, "loss": 0.2331, "step": 100025 }, { "epoch": 2.9205705026203996, "grad_norm": 0.5501801041221581, "learning_rate": 1.470667748040011e-06, "loss": 0.2304, "step": 100030 }, { "epoch": 2.920716485890717, "grad_norm": 0.6398186244035867, "learning_rate": 1.4679643146796433e-06, "loss": 0.2378, "step": 100035 }, { "epoch": 2.920862469161034, "grad_norm": 0.5598494839416354, "learning_rate": 1.4652608813192756e-06, "loss": 0.2463, "step": 100040 }, { "epoch": 2.9210084524313515, "grad_norm": 0.5700426995455168, "learning_rate": 1.4625574479589079e-06, "loss": 0.2261, "step": 100045 }, { "epoch": 2.9211544357016686, "grad_norm": 0.5915296161014734, "learning_rate": 1.4598540145985402e-06, "loss": 0.2502, "step": 100050 }, { "epoch": 2.9213004189719856, "grad_norm": 0.5664484195266741, "learning_rate": 1.4571505812381725e-06, "loss": 0.2407, "step": 100055 }, { "epoch": 2.921446402242303, "grad_norm": 0.5735033707260283, "learning_rate": 1.4544471478778048e-06, "loss": 0.2456, "step": 100060 }, { "epoch": 2.9215923855126205, "grad_norm": 0.5794037426795133, "learning_rate": 1.4517437145174372e-06, "loss": 0.2255, "step": 100065 }, { "epoch": 2.9217383687829375, "grad_norm": 0.6115443342373534, "learning_rate": 1.4490402811570697e-06, "loss": 0.2338, "step": 100070 }, { "epoch": 2.9218843520532545, "grad_norm": 0.5787899615940775, "learning_rate": 1.4463368477967018e-06, "loss": 0.2246, "step": 100075 }, { "epoch": 2.922030335323572, "grad_norm": 0.5789277124414279, "learning_rate": 1.4436334144363341e-06, "loss": 0.2256, "step": 100080 }, { "epoch": 2.9221763185938894, "grad_norm": 0.6234915665064293, "learning_rate": 1.4409299810759666e-06, "loss": 0.2445, "step": 100085 }, { "epoch": 2.9223223018642064, "grad_norm": 0.6070319673574464, "learning_rate": 1.438226547715599e-06, "loss": 0.2442, "step": 100090 }, { "epoch": 2.9224682851345234, "grad_norm": 0.6024050960455015, "learning_rate": 1.4355231143552313e-06, "loss": 0.2299, "step": 100095 }, { "epoch": 2.922614268404841, "grad_norm": 0.60814162274852, "learning_rate": 1.4328196809948636e-06, "loss": 0.2391, "step": 100100 }, { "epoch": 2.9227602516751583, "grad_norm": 0.6057773864830878, "learning_rate": 1.430116247634496e-06, "loss": 0.2346, "step": 100105 }, { "epoch": 2.9229062349454753, "grad_norm": 0.5678376676991419, "learning_rate": 1.4274128142741282e-06, "loss": 0.2383, "step": 100110 }, { "epoch": 2.9230522182157923, "grad_norm": 0.5723889078969646, "learning_rate": 1.4247093809137605e-06, "loss": 0.2418, "step": 100115 }, { "epoch": 2.9231982014861098, "grad_norm": 0.5660590095512819, "learning_rate": 1.4220059475533929e-06, "loss": 0.2213, "step": 100120 }, { "epoch": 2.9233441847564268, "grad_norm": 0.627029769279019, "learning_rate": 1.4193025141930252e-06, "loss": 0.2558, "step": 100125 }, { "epoch": 2.923490168026744, "grad_norm": 0.5921079778554004, "learning_rate": 1.4165990808326575e-06, "loss": 0.2377, "step": 100130 }, { "epoch": 2.9236361512970612, "grad_norm": 0.5812093841362571, "learning_rate": 1.4138956474722898e-06, "loss": 0.2336, "step": 100135 }, { "epoch": 2.9237821345673787, "grad_norm": 0.6145254201052149, "learning_rate": 1.4111922141119221e-06, "loss": 0.2444, "step": 100140 }, { "epoch": 2.9239281178376957, "grad_norm": 0.5912646976281032, "learning_rate": 1.4084887807515544e-06, "loss": 0.2217, "step": 100145 }, { "epoch": 2.924074101108013, "grad_norm": 0.6189550255343582, "learning_rate": 1.405785347391187e-06, "loss": 0.2413, "step": 100150 }, { "epoch": 2.92422008437833, "grad_norm": 0.6619667873156124, "learning_rate": 1.4030819140308193e-06, "loss": 0.2521, "step": 100155 }, { "epoch": 2.9243660676486476, "grad_norm": 0.6243589809179055, "learning_rate": 1.4003784806704514e-06, "loss": 0.2454, "step": 100160 }, { "epoch": 2.9245120509189646, "grad_norm": 0.6184794614566447, "learning_rate": 1.397675047310084e-06, "loss": 0.2406, "step": 100165 }, { "epoch": 2.924658034189282, "grad_norm": 0.5925634423730624, "learning_rate": 1.3949716139497162e-06, "loss": 0.2401, "step": 100170 }, { "epoch": 2.924804017459599, "grad_norm": 0.5812146233475861, "learning_rate": 1.3922681805893486e-06, "loss": 0.2334, "step": 100175 }, { "epoch": 2.9249500007299165, "grad_norm": 0.5651501073817159, "learning_rate": 1.3895647472289809e-06, "loss": 0.2323, "step": 100180 }, { "epoch": 2.9250959840002335, "grad_norm": 0.5692052281099651, "learning_rate": 1.3868613138686132e-06, "loss": 0.2358, "step": 100185 }, { "epoch": 2.9252419672705505, "grad_norm": 0.6534040133703028, "learning_rate": 1.3841578805082455e-06, "loss": 0.2666, "step": 100190 }, { "epoch": 2.925387950540868, "grad_norm": 0.5813188963304353, "learning_rate": 1.3814544471478778e-06, "loss": 0.2363, "step": 100195 }, { "epoch": 2.9255339338111854, "grad_norm": 0.5737029376592675, "learning_rate": 1.3787510137875101e-06, "loss": 0.2316, "step": 100200 }, { "epoch": 2.9256799170815024, "grad_norm": 0.5782370973055276, "learning_rate": 1.3760475804271425e-06, "loss": 0.2598, "step": 100205 }, { "epoch": 2.9258259003518194, "grad_norm": 0.5876573801246241, "learning_rate": 1.373344147066775e-06, "loss": 0.2348, "step": 100210 }, { "epoch": 2.925971883622137, "grad_norm": 0.6190064031379091, "learning_rate": 1.3706407137064073e-06, "loss": 0.2376, "step": 100215 }, { "epoch": 2.9261178668924543, "grad_norm": 0.5921132827984205, "learning_rate": 1.3679372803460394e-06, "loss": 0.2456, "step": 100220 }, { "epoch": 2.9262638501627714, "grad_norm": 0.5680497886523981, "learning_rate": 1.365233846985672e-06, "loss": 0.2314, "step": 100225 }, { "epoch": 2.9264098334330884, "grad_norm": 0.6420294984793938, "learning_rate": 1.3625304136253043e-06, "loss": 0.258, "step": 100230 }, { "epoch": 2.926555816703406, "grad_norm": 0.5392603855586589, "learning_rate": 1.3598269802649366e-06, "loss": 0.2342, "step": 100235 }, { "epoch": 2.9267017999737233, "grad_norm": 0.5884266229900039, "learning_rate": 1.357123546904569e-06, "loss": 0.2478, "step": 100240 }, { "epoch": 2.9268477832440403, "grad_norm": 0.6182301992157181, "learning_rate": 1.3544201135442012e-06, "loss": 0.242, "step": 100245 }, { "epoch": 2.9269937665143573, "grad_norm": 0.6096223152786788, "learning_rate": 1.3517166801838335e-06, "loss": 0.2367, "step": 100250 }, { "epoch": 2.9271397497846747, "grad_norm": 0.6138210903049424, "learning_rate": 1.3490132468234658e-06, "loss": 0.2346, "step": 100255 }, { "epoch": 2.9272857330549917, "grad_norm": 0.6016113264044014, "learning_rate": 1.3463098134630982e-06, "loss": 0.2236, "step": 100260 }, { "epoch": 2.927431716325309, "grad_norm": 0.539320148884343, "learning_rate": 1.3436063801027305e-06, "loss": 0.2318, "step": 100265 }, { "epoch": 2.927577699595626, "grad_norm": 0.5789569497766078, "learning_rate": 1.340902946742363e-06, "loss": 0.2411, "step": 100270 }, { "epoch": 2.9277236828659436, "grad_norm": 0.6315221096175946, "learning_rate": 1.3381995133819951e-06, "loss": 0.2387, "step": 100275 }, { "epoch": 2.9278696661362607, "grad_norm": 0.6030051822758636, "learning_rate": 1.3354960800216274e-06, "loss": 0.2369, "step": 100280 }, { "epoch": 2.928015649406578, "grad_norm": 0.565723621898027, "learning_rate": 1.3327926466612598e-06, "loss": 0.2265, "step": 100285 }, { "epoch": 2.928161632676895, "grad_norm": 0.6275411235982074, "learning_rate": 1.3300892133008923e-06, "loss": 0.2231, "step": 100290 }, { "epoch": 2.9283076159472126, "grad_norm": 0.5698967972840349, "learning_rate": 1.3273857799405246e-06, "loss": 0.2222, "step": 100295 }, { "epoch": 2.9284535992175296, "grad_norm": 0.6009583331556243, "learning_rate": 1.324682346580157e-06, "loss": 0.2297, "step": 100300 }, { "epoch": 2.928599582487847, "grad_norm": 0.5914410133977938, "learning_rate": 1.3219789132197892e-06, "loss": 0.2398, "step": 100305 }, { "epoch": 2.928745565758164, "grad_norm": 0.5751939773194638, "learning_rate": 1.3192754798594215e-06, "loss": 0.2218, "step": 100310 }, { "epoch": 2.9288915490284815, "grad_norm": 0.5776579409571094, "learning_rate": 1.3165720464990539e-06, "loss": 0.2338, "step": 100315 }, { "epoch": 2.9290375322987985, "grad_norm": 0.5791516224108353, "learning_rate": 1.3138686131386862e-06, "loss": 0.2377, "step": 100320 }, { "epoch": 2.929183515569116, "grad_norm": 0.5699192595276104, "learning_rate": 1.3111651797783185e-06, "loss": 0.2465, "step": 100325 }, { "epoch": 2.929329498839433, "grad_norm": 0.5770813495645968, "learning_rate": 1.308461746417951e-06, "loss": 0.2348, "step": 100330 }, { "epoch": 2.9294754821097504, "grad_norm": 0.6236883110781521, "learning_rate": 1.3057583130575831e-06, "loss": 0.257, "step": 100335 }, { "epoch": 2.9296214653800674, "grad_norm": 0.5758148280071859, "learning_rate": 1.3030548796972155e-06, "loss": 0.2456, "step": 100340 }, { "epoch": 2.9297674486503844, "grad_norm": 0.5700121080751063, "learning_rate": 1.3003514463368478e-06, "loss": 0.2411, "step": 100345 }, { "epoch": 2.929913431920702, "grad_norm": 0.5914900159331584, "learning_rate": 1.2976480129764803e-06, "loss": 0.2464, "step": 100350 }, { "epoch": 2.9300594151910193, "grad_norm": 0.5715836848453435, "learning_rate": 1.2949445796161126e-06, "loss": 0.2443, "step": 100355 }, { "epoch": 2.9302053984613363, "grad_norm": 0.5934533246198448, "learning_rate": 1.2922411462557447e-06, "loss": 0.2451, "step": 100360 }, { "epoch": 2.9303513817316533, "grad_norm": 0.6159827873192716, "learning_rate": 1.289537712895377e-06, "loss": 0.2388, "step": 100365 }, { "epoch": 2.9304973650019708, "grad_norm": 0.5416996548290982, "learning_rate": 1.2868342795350096e-06, "loss": 0.2345, "step": 100370 }, { "epoch": 2.9306433482722882, "grad_norm": 0.5673330635296974, "learning_rate": 1.2841308461746419e-06, "loss": 0.2298, "step": 100375 }, { "epoch": 2.9307893315426052, "grad_norm": 0.5956040614743191, "learning_rate": 1.2814274128142742e-06, "loss": 0.2369, "step": 100380 }, { "epoch": 2.9309353148129222, "grad_norm": 0.5735589372747895, "learning_rate": 1.2787239794539065e-06, "loss": 0.2448, "step": 100385 }, { "epoch": 2.9310812980832397, "grad_norm": 0.5905297991577816, "learning_rate": 1.2760205460935388e-06, "loss": 0.2387, "step": 100390 }, { "epoch": 2.931227281353557, "grad_norm": 0.5722444442838455, "learning_rate": 1.2733171127331712e-06, "loss": 0.2324, "step": 100395 }, { "epoch": 2.931373264623874, "grad_norm": 0.6017623822553679, "learning_rate": 1.2706136793728035e-06, "loss": 0.2379, "step": 100400 }, { "epoch": 2.931519247894191, "grad_norm": 0.575484227093294, "learning_rate": 1.2679102460124358e-06, "loss": 0.2388, "step": 100405 }, { "epoch": 2.9316652311645086, "grad_norm": 0.5622878413566285, "learning_rate": 1.2652068126520683e-06, "loss": 0.2282, "step": 100410 }, { "epoch": 2.9318112144348256, "grad_norm": 0.5321445373570496, "learning_rate": 1.2625033792917006e-06, "loss": 0.247, "step": 100415 }, { "epoch": 2.931957197705143, "grad_norm": 0.5691346855100017, "learning_rate": 1.2597999459313327e-06, "loss": 0.249, "step": 100420 }, { "epoch": 2.93210318097546, "grad_norm": 0.6501737385374602, "learning_rate": 1.257096512570965e-06, "loss": 0.2529, "step": 100425 }, { "epoch": 2.9322491642457775, "grad_norm": 0.5970723701031231, "learning_rate": 1.2543930792105976e-06, "loss": 0.2344, "step": 100430 }, { "epoch": 2.9323951475160945, "grad_norm": 0.6066521564785144, "learning_rate": 1.25168964585023e-06, "loss": 0.2355, "step": 100435 }, { "epoch": 2.932541130786412, "grad_norm": 0.5639626826741144, "learning_rate": 1.2489862124898622e-06, "loss": 0.2386, "step": 100440 }, { "epoch": 2.932687114056729, "grad_norm": 0.5337951591864121, "learning_rate": 1.2462827791294945e-06, "loss": 0.2176, "step": 100445 }, { "epoch": 2.9328330973270464, "grad_norm": 0.5698226600325123, "learning_rate": 1.2435793457691269e-06, "loss": 0.2195, "step": 100450 }, { "epoch": 2.9329790805973635, "grad_norm": 0.5763027595197069, "learning_rate": 1.2408759124087592e-06, "loss": 0.2432, "step": 100455 }, { "epoch": 2.933125063867681, "grad_norm": 0.6134257781998254, "learning_rate": 1.2381724790483915e-06, "loss": 0.228, "step": 100460 }, { "epoch": 2.933271047137998, "grad_norm": 0.572057329822723, "learning_rate": 1.2354690456880238e-06, "loss": 0.2376, "step": 100465 }, { "epoch": 2.9334170304083154, "grad_norm": 0.6414040693438179, "learning_rate": 1.2327656123276563e-06, "loss": 0.2634, "step": 100470 }, { "epoch": 2.9335630136786324, "grad_norm": 0.6057555392976469, "learning_rate": 1.2300621789672884e-06, "loss": 0.2264, "step": 100475 }, { "epoch": 2.9337089969489494, "grad_norm": 0.6277298497433467, "learning_rate": 1.2273587456069208e-06, "loss": 0.256, "step": 100480 }, { "epoch": 2.933854980219267, "grad_norm": 0.6169311969527159, "learning_rate": 1.224655312246553e-06, "loss": 0.2422, "step": 100485 }, { "epoch": 2.9340009634895843, "grad_norm": 0.5871607653803494, "learning_rate": 1.2219518788861856e-06, "loss": 0.2495, "step": 100490 }, { "epoch": 2.9341469467599013, "grad_norm": 0.6087688867965396, "learning_rate": 1.219248445525818e-06, "loss": 0.2437, "step": 100495 }, { "epoch": 2.9342929300302183, "grad_norm": 0.6436218507541939, "learning_rate": 1.2165450121654502e-06, "loss": 0.2427, "step": 100500 }, { "epoch": 2.9344389133005357, "grad_norm": 0.6464352603422011, "learning_rate": 1.2138415788050823e-06, "loss": 0.2527, "step": 100505 }, { "epoch": 2.934584896570853, "grad_norm": 0.6432444319425954, "learning_rate": 1.2111381454447149e-06, "loss": 0.2257, "step": 100510 }, { "epoch": 2.93473087984117, "grad_norm": 0.5661614963224981, "learning_rate": 1.2084347120843472e-06, "loss": 0.2385, "step": 100515 }, { "epoch": 2.934876863111487, "grad_norm": 0.5968540784939658, "learning_rate": 1.2057312787239795e-06, "loss": 0.24, "step": 100520 }, { "epoch": 2.9350228463818047, "grad_norm": 0.6349902864680483, "learning_rate": 1.2030278453636118e-06, "loss": 0.2285, "step": 100525 }, { "epoch": 2.935168829652122, "grad_norm": 0.5898978223048696, "learning_rate": 1.2003244120032444e-06, "loss": 0.245, "step": 100530 }, { "epoch": 2.935314812922439, "grad_norm": 0.6183076524597586, "learning_rate": 1.1976209786428765e-06, "loss": 0.2398, "step": 100535 }, { "epoch": 2.935460796192756, "grad_norm": 0.5847156383718308, "learning_rate": 1.1949175452825088e-06, "loss": 0.2112, "step": 100540 }, { "epoch": 2.9356067794630736, "grad_norm": 0.5957227696871324, "learning_rate": 1.192214111922141e-06, "loss": 0.252, "step": 100545 }, { "epoch": 2.935752762733391, "grad_norm": 0.6147183261233776, "learning_rate": 1.1895106785617736e-06, "loss": 0.2473, "step": 100550 }, { "epoch": 2.935898746003708, "grad_norm": 0.5371796978110996, "learning_rate": 1.186807245201406e-06, "loss": 0.2277, "step": 100555 }, { "epoch": 2.936044729274025, "grad_norm": 0.5603147966642396, "learning_rate": 1.184103811841038e-06, "loss": 0.2201, "step": 100560 }, { "epoch": 2.9361907125443425, "grad_norm": 0.5696208472283272, "learning_rate": 1.1814003784806704e-06, "loss": 0.231, "step": 100565 }, { "epoch": 2.9363366958146595, "grad_norm": 0.5495339252806984, "learning_rate": 1.1786969451203029e-06, "loss": 0.2398, "step": 100570 }, { "epoch": 2.936482679084977, "grad_norm": 0.578757091885726, "learning_rate": 1.1759935117599352e-06, "loss": 0.2455, "step": 100575 }, { "epoch": 2.936628662355294, "grad_norm": 0.6100828145345315, "learning_rate": 1.1732900783995675e-06, "loss": 0.2314, "step": 100580 }, { "epoch": 2.9367746456256114, "grad_norm": 0.5893736953025932, "learning_rate": 1.1705866450391998e-06, "loss": 0.225, "step": 100585 }, { "epoch": 2.9369206288959284, "grad_norm": 0.5514889106535106, "learning_rate": 1.1678832116788322e-06, "loss": 0.2172, "step": 100590 }, { "epoch": 2.937066612166246, "grad_norm": 0.6147448784981873, "learning_rate": 1.1651797783184645e-06, "loss": 0.231, "step": 100595 }, { "epoch": 2.937212595436563, "grad_norm": 0.594776113147532, "learning_rate": 1.1624763449580968e-06, "loss": 0.2523, "step": 100600 }, { "epoch": 2.9373585787068803, "grad_norm": 0.575023145943728, "learning_rate": 1.1597729115977291e-06, "loss": 0.2354, "step": 100605 }, { "epoch": 2.9375045619771973, "grad_norm": 0.5558733925958186, "learning_rate": 1.1570694782373616e-06, "loss": 0.2254, "step": 100610 }, { "epoch": 2.937650545247515, "grad_norm": 0.5709074140916658, "learning_rate": 1.154366044876994e-06, "loss": 0.2291, "step": 100615 }, { "epoch": 2.937796528517832, "grad_norm": 0.585473862187068, "learning_rate": 1.151662611516626e-06, "loss": 0.2325, "step": 100620 }, { "epoch": 2.9379425117881492, "grad_norm": 0.5409454393623223, "learning_rate": 1.1489591781562584e-06, "loss": 0.2284, "step": 100625 }, { "epoch": 2.9380884950584663, "grad_norm": 0.555247273033488, "learning_rate": 1.146255744795891e-06, "loss": 0.242, "step": 100630 }, { "epoch": 2.9382344783287833, "grad_norm": 0.5898411828072848, "learning_rate": 1.1435523114355232e-06, "loss": 0.236, "step": 100635 }, { "epoch": 2.9383804615991007, "grad_norm": 0.5489522508036242, "learning_rate": 1.1408488780751555e-06, "loss": 0.2403, "step": 100640 }, { "epoch": 2.938526444869418, "grad_norm": 0.6242846708815737, "learning_rate": 1.1381454447147879e-06, "loss": 0.2433, "step": 100645 }, { "epoch": 2.938672428139735, "grad_norm": 0.5494357553436525, "learning_rate": 1.1354420113544202e-06, "loss": 0.227, "step": 100650 }, { "epoch": 2.938818411410052, "grad_norm": 0.6456679521450019, "learning_rate": 1.1327385779940525e-06, "loss": 0.2541, "step": 100655 }, { "epoch": 2.9389643946803696, "grad_norm": 0.5608652508585623, "learning_rate": 1.1300351446336848e-06, "loss": 0.2292, "step": 100660 }, { "epoch": 2.939110377950687, "grad_norm": 0.6412456875745396, "learning_rate": 1.1273317112733171e-06, "loss": 0.2435, "step": 100665 }, { "epoch": 2.939256361221004, "grad_norm": 0.5995976825641683, "learning_rate": 1.1246282779129497e-06, "loss": 0.2469, "step": 100670 }, { "epoch": 2.939402344491321, "grad_norm": 0.5505892978111948, "learning_rate": 1.1219248445525818e-06, "loss": 0.2321, "step": 100675 }, { "epoch": 2.9395483277616385, "grad_norm": 0.5755574571383523, "learning_rate": 1.119221411192214e-06, "loss": 0.2349, "step": 100680 }, { "epoch": 2.939694311031956, "grad_norm": 0.6690177162083708, "learning_rate": 1.1165179778318464e-06, "loss": 0.2515, "step": 100685 }, { "epoch": 2.939840294302273, "grad_norm": 0.5519713059286095, "learning_rate": 1.113814544471479e-06, "loss": 0.2405, "step": 100690 }, { "epoch": 2.93998627757259, "grad_norm": 0.5752400539933925, "learning_rate": 1.1111111111111112e-06, "loss": 0.2429, "step": 100695 }, { "epoch": 2.9401322608429075, "grad_norm": 0.5908131726812766, "learning_rate": 1.1084076777507436e-06, "loss": 0.2248, "step": 100700 }, { "epoch": 2.9402782441132245, "grad_norm": 0.5748465951285688, "learning_rate": 1.1057042443903757e-06, "loss": 0.2469, "step": 100705 }, { "epoch": 2.940424227383542, "grad_norm": 0.575372350663759, "learning_rate": 1.1030008110300082e-06, "loss": 0.2469, "step": 100710 }, { "epoch": 2.940570210653859, "grad_norm": 0.62975000408807, "learning_rate": 1.1002973776696405e-06, "loss": 0.2542, "step": 100715 }, { "epoch": 2.9407161939241764, "grad_norm": 0.589783766216864, "learning_rate": 1.0975939443092728e-06, "loss": 0.2301, "step": 100720 }, { "epoch": 2.9408621771944934, "grad_norm": 0.5602387639685606, "learning_rate": 1.0948905109489052e-06, "loss": 0.2321, "step": 100725 }, { "epoch": 2.941008160464811, "grad_norm": 0.6065529393811765, "learning_rate": 1.0921870775885375e-06, "loss": 0.2417, "step": 100730 }, { "epoch": 2.941154143735128, "grad_norm": 0.5997588584812465, "learning_rate": 1.0894836442281698e-06, "loss": 0.2447, "step": 100735 }, { "epoch": 2.9413001270054453, "grad_norm": 0.5066612572950767, "learning_rate": 1.086780210867802e-06, "loss": 0.2283, "step": 100740 }, { "epoch": 2.9414461102757623, "grad_norm": 0.546647910168614, "learning_rate": 1.0840767775074344e-06, "loss": 0.2338, "step": 100745 }, { "epoch": 2.9415920935460798, "grad_norm": 0.6191290403975036, "learning_rate": 1.081373344147067e-06, "loss": 0.2352, "step": 100750 }, { "epoch": 2.9417380768163968, "grad_norm": 0.5614119321263552, "learning_rate": 1.0786699107866993e-06, "loss": 0.2317, "step": 100755 }, { "epoch": 2.941884060086714, "grad_norm": 0.5827202783850877, "learning_rate": 1.0759664774263314e-06, "loss": 0.2413, "step": 100760 }, { "epoch": 2.942030043357031, "grad_norm": 0.5680693340181662, "learning_rate": 1.0732630440659637e-06, "loss": 0.2287, "step": 100765 }, { "epoch": 2.9421760266273482, "grad_norm": 0.6303182046332427, "learning_rate": 1.0705596107055962e-06, "loss": 0.2354, "step": 100770 }, { "epoch": 2.9423220098976657, "grad_norm": 0.5939746158904634, "learning_rate": 1.0678561773452285e-06, "loss": 0.2416, "step": 100775 }, { "epoch": 2.942467993167983, "grad_norm": 0.5735410285905519, "learning_rate": 1.0651527439848609e-06, "loss": 0.2265, "step": 100780 }, { "epoch": 2.9426139764383, "grad_norm": 0.5795321174625299, "learning_rate": 1.0624493106244932e-06, "loss": 0.2169, "step": 100785 }, { "epoch": 2.942759959708617, "grad_norm": 0.5990045489077277, "learning_rate": 1.0597458772641255e-06, "loss": 0.2418, "step": 100790 }, { "epoch": 2.9429059429789346, "grad_norm": 0.5700694811770882, "learning_rate": 1.0570424439037578e-06, "loss": 0.2413, "step": 100795 }, { "epoch": 2.943051926249252, "grad_norm": 0.5615802533091955, "learning_rate": 1.0543390105433901e-06, "loss": 0.238, "step": 100800 }, { "epoch": 2.943197909519569, "grad_norm": 0.6132175505792977, "learning_rate": 1.0516355771830224e-06, "loss": 0.2365, "step": 100805 }, { "epoch": 2.943343892789886, "grad_norm": 0.5828347879785092, "learning_rate": 1.048932143822655e-06, "loss": 0.2372, "step": 100810 }, { "epoch": 2.9434898760602035, "grad_norm": 0.5870561109071055, "learning_rate": 1.0462287104622873e-06, "loss": 0.2314, "step": 100815 }, { "epoch": 2.943635859330521, "grad_norm": 0.535280292245531, "learning_rate": 1.0435252771019194e-06, "loss": 0.2401, "step": 100820 }, { "epoch": 2.943781842600838, "grad_norm": 0.6367827670495845, "learning_rate": 1.0408218437415517e-06, "loss": 0.2307, "step": 100825 }, { "epoch": 2.943927825871155, "grad_norm": 0.5673457494738157, "learning_rate": 1.0381184103811842e-06, "loss": 0.2341, "step": 100830 }, { "epoch": 2.9440738091414724, "grad_norm": 0.6295022755698836, "learning_rate": 1.0354149770208166e-06, "loss": 0.2491, "step": 100835 }, { "epoch": 2.94421979241179, "grad_norm": 0.5689485520955814, "learning_rate": 1.0327115436604489e-06, "loss": 0.2388, "step": 100840 }, { "epoch": 2.944365775682107, "grad_norm": 0.5764340023198198, "learning_rate": 1.0300081103000812e-06, "loss": 0.2259, "step": 100845 }, { "epoch": 2.944511758952424, "grad_norm": 0.6296520926084607, "learning_rate": 1.0273046769397135e-06, "loss": 0.2371, "step": 100850 }, { "epoch": 2.9446577422227413, "grad_norm": 0.585892179597973, "learning_rate": 1.0246012435793458e-06, "loss": 0.2345, "step": 100855 }, { "epoch": 2.9448037254930584, "grad_norm": 0.5538017486002565, "learning_rate": 1.0218978102189781e-06, "loss": 0.2345, "step": 100860 }, { "epoch": 2.944949708763376, "grad_norm": 0.5436870634778731, "learning_rate": 1.0191943768586105e-06, "loss": 0.2275, "step": 100865 }, { "epoch": 2.945095692033693, "grad_norm": 0.5541826343831974, "learning_rate": 1.0164909434982428e-06, "loss": 0.2344, "step": 100870 }, { "epoch": 2.9452416753040103, "grad_norm": 0.5810431201317342, "learning_rate": 1.013787510137875e-06, "loss": 0.2423, "step": 100875 }, { "epoch": 2.9453876585743273, "grad_norm": 0.5651271056413297, "learning_rate": 1.0110840767775074e-06, "loss": 0.2309, "step": 100880 }, { "epoch": 2.9455336418446447, "grad_norm": 0.5913885509048324, "learning_rate": 1.0083806434171397e-06, "loss": 0.2345, "step": 100885 }, { "epoch": 2.9456796251149617, "grad_norm": 0.6143825408529757, "learning_rate": 1.0056772100567723e-06, "loss": 0.2444, "step": 100890 }, { "epoch": 2.945825608385279, "grad_norm": 0.6293506816069085, "learning_rate": 1.0029737766964046e-06, "loss": 0.2475, "step": 100895 }, { "epoch": 2.945971591655596, "grad_norm": 0.5782409461556234, "learning_rate": 1.0002703433360369e-06, "loss": 0.2372, "step": 100900 }, { "epoch": 2.9461175749259136, "grad_norm": 0.6031892720091971, "learning_rate": 9.97566909975669e-07, "loss": 0.2296, "step": 100905 }, { "epoch": 2.9462635581962306, "grad_norm": 0.5554493850599748, "learning_rate": 9.948634766153015e-07, "loss": 0.248, "step": 100910 }, { "epoch": 2.946409541466548, "grad_norm": 0.5820285082066843, "learning_rate": 9.921600432549338e-07, "loss": 0.2309, "step": 100915 }, { "epoch": 2.946555524736865, "grad_norm": 0.589208903148496, "learning_rate": 9.894566098945662e-07, "loss": 0.2344, "step": 100920 }, { "epoch": 2.946701508007182, "grad_norm": 0.6109834039012776, "learning_rate": 9.867531765341985e-07, "loss": 0.2313, "step": 100925 }, { "epoch": 2.9468474912774996, "grad_norm": 0.6039881678760683, "learning_rate": 9.840497431738308e-07, "loss": 0.2511, "step": 100930 }, { "epoch": 2.946993474547817, "grad_norm": 0.5801101827644387, "learning_rate": 9.813463098134631e-07, "loss": 0.2366, "step": 100935 }, { "epoch": 2.947139457818134, "grad_norm": 0.6044782571614064, "learning_rate": 9.786428764530954e-07, "loss": 0.2471, "step": 100940 }, { "epoch": 2.947285441088451, "grad_norm": 0.5814801251382675, "learning_rate": 9.759394430927277e-07, "loss": 0.2351, "step": 100945 }, { "epoch": 2.9474314243587685, "grad_norm": 0.6374153205468545, "learning_rate": 9.7323600973236e-07, "loss": 0.2563, "step": 100950 }, { "epoch": 2.947577407629086, "grad_norm": 0.5861480610569106, "learning_rate": 9.705325763719926e-07, "loss": 0.2418, "step": 100955 }, { "epoch": 2.947723390899403, "grad_norm": 0.6365378473870449, "learning_rate": 9.67829143011625e-07, "loss": 0.24, "step": 100960 }, { "epoch": 2.94786937416972, "grad_norm": 0.57116181260383, "learning_rate": 9.65125709651257e-07, "loss": 0.2349, "step": 100965 }, { "epoch": 2.9480153574400374, "grad_norm": 0.572254452839346, "learning_rate": 9.624222762908895e-07, "loss": 0.2206, "step": 100970 }, { "epoch": 2.948161340710355, "grad_norm": 0.5773287387991427, "learning_rate": 9.597188429305219e-07, "loss": 0.2479, "step": 100975 }, { "epoch": 2.948307323980672, "grad_norm": 0.5979987966759245, "learning_rate": 9.570154095701542e-07, "loss": 0.2392, "step": 100980 }, { "epoch": 2.948453307250989, "grad_norm": 0.5909194273879856, "learning_rate": 9.543119762097865e-07, "loss": 0.2514, "step": 100985 }, { "epoch": 2.9485992905213063, "grad_norm": 0.6159354127788185, "learning_rate": 9.516085428494187e-07, "loss": 0.2229, "step": 100990 }, { "epoch": 2.9487452737916233, "grad_norm": 0.6155498284268902, "learning_rate": 9.489051094890511e-07, "loss": 0.2479, "step": 100995 }, { "epoch": 2.9488912570619408, "grad_norm": 0.6161455546022703, "learning_rate": 9.462016761286834e-07, "loss": 0.2392, "step": 101000 }, { "epoch": 2.949037240332258, "grad_norm": 0.5712325951508277, "learning_rate": 9.434982427683158e-07, "loss": 0.2346, "step": 101005 }, { "epoch": 2.9491832236025752, "grad_norm": 0.594564217657545, "learning_rate": 9.407948094079482e-07, "loss": 0.2457, "step": 101010 }, { "epoch": 2.9493292068728922, "grad_norm": 0.5875778672049706, "learning_rate": 9.380913760475805e-07, "loss": 0.2443, "step": 101015 }, { "epoch": 2.9494751901432097, "grad_norm": 0.585328252668909, "learning_rate": 9.353879426872127e-07, "loss": 0.2245, "step": 101020 }, { "epoch": 2.9496211734135267, "grad_norm": 0.5994758868168051, "learning_rate": 9.326845093268451e-07, "loss": 0.2305, "step": 101025 }, { "epoch": 2.949767156683844, "grad_norm": 0.5946165896361867, "learning_rate": 9.299810759664775e-07, "loss": 0.2415, "step": 101030 }, { "epoch": 2.949913139954161, "grad_norm": 0.5857764036723832, "learning_rate": 9.272776426061098e-07, "loss": 0.2423, "step": 101035 }, { "epoch": 2.9500591232244786, "grad_norm": 0.5762364337274207, "learning_rate": 9.245742092457422e-07, "loss": 0.2341, "step": 101040 }, { "epoch": 2.9502051064947956, "grad_norm": 0.5987980713774969, "learning_rate": 9.218707758853745e-07, "loss": 0.241, "step": 101045 }, { "epoch": 2.950351089765113, "grad_norm": 0.6506942425698564, "learning_rate": 9.191673425250067e-07, "loss": 0.2402, "step": 101050 }, { "epoch": 2.95049707303543, "grad_norm": 0.5888703965564008, "learning_rate": 9.164639091646392e-07, "loss": 0.2382, "step": 101055 }, { "epoch": 2.9506430563057475, "grad_norm": 0.5627247541202267, "learning_rate": 9.137604758042715e-07, "loss": 0.2224, "step": 101060 }, { "epoch": 2.9507890395760645, "grad_norm": 0.5748438718800819, "learning_rate": 9.110570424439038e-07, "loss": 0.2262, "step": 101065 }, { "epoch": 2.950935022846382, "grad_norm": 0.5910870715301836, "learning_rate": 9.083536090835362e-07, "loss": 0.2332, "step": 101070 }, { "epoch": 2.951081006116699, "grad_norm": 0.5417703814022908, "learning_rate": 9.056501757231684e-07, "loss": 0.2335, "step": 101075 }, { "epoch": 2.951226989387016, "grad_norm": 0.5593905894742871, "learning_rate": 9.029467423628007e-07, "loss": 0.2381, "step": 101080 }, { "epoch": 2.9513729726573334, "grad_norm": 0.6303312620293763, "learning_rate": 9.002433090024332e-07, "loss": 0.2572, "step": 101085 }, { "epoch": 2.951518955927651, "grad_norm": 0.6106538945266816, "learning_rate": 8.975398756420655e-07, "loss": 0.2525, "step": 101090 }, { "epoch": 2.951664939197968, "grad_norm": 0.5817392501363827, "learning_rate": 8.948364422816978e-07, "loss": 0.2347, "step": 101095 }, { "epoch": 2.951810922468285, "grad_norm": 0.576249161899957, "learning_rate": 8.921330089213302e-07, "loss": 0.2354, "step": 101100 }, { "epoch": 2.9519569057386024, "grad_norm": 0.5449222248180388, "learning_rate": 8.894295755609624e-07, "loss": 0.2421, "step": 101105 }, { "epoch": 2.95210288900892, "grad_norm": 0.5653078600457516, "learning_rate": 8.867261422005947e-07, "loss": 0.2433, "step": 101110 }, { "epoch": 2.952248872279237, "grad_norm": 0.5198544869162786, "learning_rate": 8.840227088402271e-07, "loss": 0.2235, "step": 101115 }, { "epoch": 2.952394855549554, "grad_norm": 0.600388650898499, "learning_rate": 8.813192754798595e-07, "loss": 0.2477, "step": 101120 }, { "epoch": 2.9525408388198713, "grad_norm": 0.5758811833574798, "learning_rate": 8.786158421194918e-07, "loss": 0.2382, "step": 101125 }, { "epoch": 2.9526868220901887, "grad_norm": 0.609085310028717, "learning_rate": 8.759124087591242e-07, "loss": 0.2446, "step": 101130 }, { "epoch": 2.9528328053605057, "grad_norm": 0.5629108826254697, "learning_rate": 8.732089753987564e-07, "loss": 0.2457, "step": 101135 }, { "epoch": 2.9529787886308227, "grad_norm": 0.5890970476673619, "learning_rate": 8.705055420383888e-07, "loss": 0.2379, "step": 101140 }, { "epoch": 2.95312477190114, "grad_norm": 0.5800361037296062, "learning_rate": 8.678021086780211e-07, "loss": 0.2209, "step": 101145 }, { "epoch": 2.953270755171457, "grad_norm": 0.5881217904926908, "learning_rate": 8.650986753176535e-07, "loss": 0.2278, "step": 101150 }, { "epoch": 2.9534167384417747, "grad_norm": 0.6211051064140481, "learning_rate": 8.623952419572858e-07, "loss": 0.2462, "step": 101155 }, { "epoch": 2.9535627217120917, "grad_norm": 0.5458710342883453, "learning_rate": 8.596918085969182e-07, "loss": 0.2284, "step": 101160 }, { "epoch": 2.953708704982409, "grad_norm": 0.5662040298759392, "learning_rate": 8.569883752365504e-07, "loss": 0.2303, "step": 101165 }, { "epoch": 2.953854688252726, "grad_norm": 0.564527195423084, "learning_rate": 8.542849418761828e-07, "loss": 0.2494, "step": 101170 }, { "epoch": 2.9540006715230436, "grad_norm": 0.6679484531066818, "learning_rate": 8.515815085158151e-07, "loss": 0.2474, "step": 101175 }, { "epoch": 2.9541466547933606, "grad_norm": 0.6066126507439067, "learning_rate": 8.488780751554475e-07, "loss": 0.242, "step": 101180 }, { "epoch": 2.954292638063678, "grad_norm": 0.5608889552526115, "learning_rate": 8.461746417950798e-07, "loss": 0.2383, "step": 101185 }, { "epoch": 2.954438621333995, "grad_norm": 0.5901475557232783, "learning_rate": 8.43471208434712e-07, "loss": 0.2159, "step": 101190 }, { "epoch": 2.9545846046043125, "grad_norm": 0.6400207648030011, "learning_rate": 8.407677750743445e-07, "loss": 0.238, "step": 101195 }, { "epoch": 2.9547305878746295, "grad_norm": 0.5873407872751389, "learning_rate": 8.380643417139768e-07, "loss": 0.2458, "step": 101200 }, { "epoch": 2.954876571144947, "grad_norm": 0.5709406677423885, "learning_rate": 8.353609083536091e-07, "loss": 0.2424, "step": 101205 }, { "epoch": 2.955022554415264, "grad_norm": 0.603216754854572, "learning_rate": 8.326574749932415e-07, "loss": 0.2387, "step": 101210 }, { "epoch": 2.955168537685581, "grad_norm": 0.5892954454238014, "learning_rate": 8.299540416328738e-07, "loss": 0.2277, "step": 101215 }, { "epoch": 2.9553145209558984, "grad_norm": 0.6217315849684049, "learning_rate": 8.27250608272506e-07, "loss": 0.227, "step": 101220 }, { "epoch": 2.955460504226216, "grad_norm": 0.5548571387855004, "learning_rate": 8.245471749121384e-07, "loss": 0.2229, "step": 101225 }, { "epoch": 2.955606487496533, "grad_norm": 0.5630521753920107, "learning_rate": 8.218437415517708e-07, "loss": 0.2253, "step": 101230 }, { "epoch": 2.95575247076685, "grad_norm": 0.6014331336618643, "learning_rate": 8.191403081914031e-07, "loss": 0.2392, "step": 101235 }, { "epoch": 2.9558984540371673, "grad_norm": 0.5941283292822533, "learning_rate": 8.164368748310355e-07, "loss": 0.2404, "step": 101240 }, { "epoch": 2.956044437307485, "grad_norm": 0.5831631482657945, "learning_rate": 8.137334414706678e-07, "loss": 0.2451, "step": 101245 }, { "epoch": 2.956190420577802, "grad_norm": 0.5765520646131428, "learning_rate": 8.110300081103001e-07, "loss": 0.2301, "step": 101250 }, { "epoch": 2.956336403848119, "grad_norm": 0.6208618462574328, "learning_rate": 8.083265747499324e-07, "loss": 0.2444, "step": 101255 }, { "epoch": 2.9564823871184363, "grad_norm": 0.585346544251604, "learning_rate": 8.056231413895648e-07, "loss": 0.2284, "step": 101260 }, { "epoch": 2.9566283703887537, "grad_norm": 0.5879999219380979, "learning_rate": 8.029197080291971e-07, "loss": 0.2339, "step": 101265 }, { "epoch": 2.9567743536590707, "grad_norm": 0.6115527487541568, "learning_rate": 8.002162746688295e-07, "loss": 0.2299, "step": 101270 }, { "epoch": 2.9569203369293877, "grad_norm": 0.611038486181289, "learning_rate": 7.975128413084619e-07, "loss": 0.2342, "step": 101275 }, { "epoch": 2.957066320199705, "grad_norm": 0.6271148350811365, "learning_rate": 7.948094079480941e-07, "loss": 0.2459, "step": 101280 }, { "epoch": 2.957212303470022, "grad_norm": 0.5863619536473408, "learning_rate": 7.921059745877264e-07, "loss": 0.2322, "step": 101285 }, { "epoch": 2.9573582867403396, "grad_norm": 0.6326896741657368, "learning_rate": 7.894025412273588e-07, "loss": 0.2471, "step": 101290 }, { "epoch": 2.9575042700106566, "grad_norm": 0.5837111621491504, "learning_rate": 7.866991078669911e-07, "loss": 0.2356, "step": 101295 }, { "epoch": 2.957650253280974, "grad_norm": 0.5737927086025775, "learning_rate": 7.839956745066235e-07, "loss": 0.237, "step": 101300 }, { "epoch": 2.957796236551291, "grad_norm": 0.575414730049592, "learning_rate": 7.812922411462558e-07, "loss": 0.2273, "step": 101305 }, { "epoch": 2.9579422198216085, "grad_norm": 0.5421252466692547, "learning_rate": 7.785888077858882e-07, "loss": 0.2372, "step": 101310 }, { "epoch": 2.9580882030919255, "grad_norm": 0.5812069653455421, "learning_rate": 7.758853744255204e-07, "loss": 0.2397, "step": 101315 }, { "epoch": 2.958234186362243, "grad_norm": 0.6044352405323439, "learning_rate": 7.731819410651528e-07, "loss": 0.2491, "step": 101320 }, { "epoch": 2.95838016963256, "grad_norm": 0.5961509137258717, "learning_rate": 7.704785077047851e-07, "loss": 0.2253, "step": 101325 }, { "epoch": 2.9585261529028775, "grad_norm": 0.6092222357213137, "learning_rate": 7.677750743444174e-07, "loss": 0.2345, "step": 101330 }, { "epoch": 2.9586721361731945, "grad_norm": 0.5692578787838617, "learning_rate": 7.650716409840498e-07, "loss": 0.2302, "step": 101335 }, { "epoch": 2.958818119443512, "grad_norm": 0.5887243677481273, "learning_rate": 7.623682076236821e-07, "loss": 0.2436, "step": 101340 }, { "epoch": 2.958964102713829, "grad_norm": 0.5753189342296915, "learning_rate": 7.596647742633144e-07, "loss": 0.2322, "step": 101345 }, { "epoch": 2.9591100859841464, "grad_norm": 0.6014913079823859, "learning_rate": 7.569613409029468e-07, "loss": 0.2324, "step": 101350 }, { "epoch": 2.9592560692544634, "grad_norm": 0.6555931778359373, "learning_rate": 7.54257907542579e-07, "loss": 0.2397, "step": 101355 }, { "epoch": 2.959402052524781, "grad_norm": 0.5913598698630422, "learning_rate": 7.515544741822115e-07, "loss": 0.2268, "step": 101360 }, { "epoch": 2.959548035795098, "grad_norm": 0.631355501412169, "learning_rate": 7.488510408218438e-07, "loss": 0.2512, "step": 101365 }, { "epoch": 2.959694019065415, "grad_norm": 0.5514063259475065, "learning_rate": 7.461476074614761e-07, "loss": 0.224, "step": 101370 }, { "epoch": 2.9598400023357323, "grad_norm": 0.6293092539670528, "learning_rate": 7.434441741011084e-07, "loss": 0.2326, "step": 101375 }, { "epoch": 2.9599859856060498, "grad_norm": 0.6276083081001421, "learning_rate": 7.407407407407408e-07, "loss": 0.227, "step": 101380 }, { "epoch": 2.9601319688763668, "grad_norm": 0.6377679771138335, "learning_rate": 7.38037307380373e-07, "loss": 0.2455, "step": 101385 }, { "epoch": 2.9602779521466838, "grad_norm": 0.6142884455011055, "learning_rate": 7.353338740200055e-07, "loss": 0.2632, "step": 101390 }, { "epoch": 2.960423935417001, "grad_norm": 0.5671258246982428, "learning_rate": 7.326304406596378e-07, "loss": 0.2272, "step": 101395 }, { "epoch": 2.9605699186873187, "grad_norm": 0.5832853775177136, "learning_rate": 7.299270072992701e-07, "loss": 0.2393, "step": 101400 }, { "epoch": 2.9607159019576357, "grad_norm": 0.5905573153982702, "learning_rate": 7.272235739389024e-07, "loss": 0.2399, "step": 101405 }, { "epoch": 2.9608618852279527, "grad_norm": 0.5526409684296697, "learning_rate": 7.245201405785348e-07, "loss": 0.2421, "step": 101410 }, { "epoch": 2.96100786849827, "grad_norm": 0.6235764902052937, "learning_rate": 7.218167072181671e-07, "loss": 0.2416, "step": 101415 }, { "epoch": 2.9611538517685876, "grad_norm": 0.6022998324981338, "learning_rate": 7.191132738577995e-07, "loss": 0.2376, "step": 101420 }, { "epoch": 2.9612998350389046, "grad_norm": 0.6020828583163449, "learning_rate": 7.164098404974318e-07, "loss": 0.2406, "step": 101425 }, { "epoch": 2.9614458183092216, "grad_norm": 0.5735124422813127, "learning_rate": 7.137064071370641e-07, "loss": 0.2305, "step": 101430 }, { "epoch": 2.961591801579539, "grad_norm": 0.6399191966874868, "learning_rate": 7.110029737766964e-07, "loss": 0.2395, "step": 101435 }, { "epoch": 2.961737784849856, "grad_norm": 0.5748479757060986, "learning_rate": 7.082995404163287e-07, "loss": 0.2469, "step": 101440 }, { "epoch": 2.9618837681201735, "grad_norm": 0.6473412478124744, "learning_rate": 7.055961070559611e-07, "loss": 0.234, "step": 101445 }, { "epoch": 2.9620297513904905, "grad_norm": 0.6099998508802363, "learning_rate": 7.028926736955935e-07, "loss": 0.2477, "step": 101450 }, { "epoch": 2.962175734660808, "grad_norm": 0.5761801577474538, "learning_rate": 7.001892403352257e-07, "loss": 0.2428, "step": 101455 }, { "epoch": 2.962321717931125, "grad_norm": 0.6312387842565232, "learning_rate": 6.974858069748581e-07, "loss": 0.2412, "step": 101460 }, { "epoch": 2.9624677012014424, "grad_norm": 0.5760776929674896, "learning_rate": 6.947823736144904e-07, "loss": 0.2427, "step": 101465 }, { "epoch": 2.9626136844717594, "grad_norm": 0.5526696414500127, "learning_rate": 6.920789402541228e-07, "loss": 0.2437, "step": 101470 }, { "epoch": 2.962759667742077, "grad_norm": 0.569330716696916, "learning_rate": 6.893755068937551e-07, "loss": 0.2394, "step": 101475 }, { "epoch": 2.962905651012394, "grad_norm": 0.5399854974723078, "learning_rate": 6.866720735333875e-07, "loss": 0.2421, "step": 101480 }, { "epoch": 2.9630516342827113, "grad_norm": 0.5965064762949387, "learning_rate": 6.839686401730197e-07, "loss": 0.2452, "step": 101485 }, { "epoch": 2.9631976175530284, "grad_norm": 0.6092475570307059, "learning_rate": 6.812652068126521e-07, "loss": 0.2543, "step": 101490 }, { "epoch": 2.963343600823346, "grad_norm": 0.587663833834898, "learning_rate": 6.785617734522844e-07, "loss": 0.2409, "step": 101495 }, { "epoch": 2.963489584093663, "grad_norm": 0.582185456809749, "learning_rate": 6.758583400919168e-07, "loss": 0.2334, "step": 101500 }, { "epoch": 2.96363556736398, "grad_norm": 0.5305209624798515, "learning_rate": 6.731549067315491e-07, "loss": 0.2277, "step": 101505 }, { "epoch": 2.9637815506342973, "grad_norm": 0.591692686390549, "learning_rate": 6.704514733711815e-07, "loss": 0.2367, "step": 101510 }, { "epoch": 2.9639275339046147, "grad_norm": 0.5791636543526507, "learning_rate": 6.677480400108137e-07, "loss": 0.243, "step": 101515 }, { "epoch": 2.9640735171749317, "grad_norm": 0.5995031303502998, "learning_rate": 6.650446066504461e-07, "loss": 0.227, "step": 101520 }, { "epoch": 2.9642195004452487, "grad_norm": 0.5807641468506163, "learning_rate": 6.623411732900785e-07, "loss": 0.2329, "step": 101525 }, { "epoch": 2.964365483715566, "grad_norm": 0.5816804397866708, "learning_rate": 6.596377399297108e-07, "loss": 0.2358, "step": 101530 }, { "epoch": 2.9645114669858836, "grad_norm": 0.591747455282264, "learning_rate": 6.569343065693431e-07, "loss": 0.244, "step": 101535 }, { "epoch": 2.9646574502562006, "grad_norm": 0.5854860311100204, "learning_rate": 6.542308732089755e-07, "loss": 0.2232, "step": 101540 }, { "epoch": 2.9648034335265177, "grad_norm": 0.5962677270843825, "learning_rate": 6.515274398486077e-07, "loss": 0.2271, "step": 101545 }, { "epoch": 2.964949416796835, "grad_norm": 0.5745178419425612, "learning_rate": 6.488240064882401e-07, "loss": 0.2302, "step": 101550 }, { "epoch": 2.9650954000671526, "grad_norm": 0.6298403505427654, "learning_rate": 6.461205731278724e-07, "loss": 0.2428, "step": 101555 }, { "epoch": 2.9652413833374696, "grad_norm": 0.582607019180269, "learning_rate": 6.434171397675048e-07, "loss": 0.2345, "step": 101560 }, { "epoch": 2.9653873666077866, "grad_norm": 0.609136993844624, "learning_rate": 6.407137064071371e-07, "loss": 0.2394, "step": 101565 }, { "epoch": 2.965533349878104, "grad_norm": 0.6063842931353692, "learning_rate": 6.380102730467694e-07, "loss": 0.2443, "step": 101570 }, { "epoch": 2.965679333148421, "grad_norm": 0.5605802964192275, "learning_rate": 6.353068396864017e-07, "loss": 0.2368, "step": 101575 }, { "epoch": 2.9658253164187385, "grad_norm": 0.6088206997272881, "learning_rate": 6.326034063260342e-07, "loss": 0.2444, "step": 101580 }, { "epoch": 2.9659712996890555, "grad_norm": 0.5632118769989383, "learning_rate": 6.298999729656664e-07, "loss": 0.2416, "step": 101585 }, { "epoch": 2.966117282959373, "grad_norm": 0.6199728676771473, "learning_rate": 6.271965396052988e-07, "loss": 0.2304, "step": 101590 }, { "epoch": 2.96626326622969, "grad_norm": 0.6451118945457294, "learning_rate": 6.244931062449311e-07, "loss": 0.2452, "step": 101595 }, { "epoch": 2.9664092495000074, "grad_norm": 0.6042421734972582, "learning_rate": 6.217896728845634e-07, "loss": 0.2473, "step": 101600 }, { "epoch": 2.9665552327703244, "grad_norm": 0.560514252031928, "learning_rate": 6.190862395241957e-07, "loss": 0.2345, "step": 101605 }, { "epoch": 2.966701216040642, "grad_norm": 0.538602837792281, "learning_rate": 6.163828061638282e-07, "loss": 0.2375, "step": 101610 }, { "epoch": 2.966847199310959, "grad_norm": 0.5578799730657333, "learning_rate": 6.136793728034604e-07, "loss": 0.2418, "step": 101615 }, { "epoch": 2.9669931825812763, "grad_norm": 0.6377442978061443, "learning_rate": 6.109759394430928e-07, "loss": 0.2488, "step": 101620 }, { "epoch": 2.9671391658515933, "grad_norm": 0.5990200789706467, "learning_rate": 6.082725060827251e-07, "loss": 0.246, "step": 101625 }, { "epoch": 2.9672851491219108, "grad_norm": 0.6327252048989533, "learning_rate": 6.055690727223574e-07, "loss": 0.2307, "step": 101630 }, { "epoch": 2.9674311323922278, "grad_norm": 0.6055859829884903, "learning_rate": 6.028656393619898e-07, "loss": 0.2402, "step": 101635 }, { "epoch": 2.9675771156625452, "grad_norm": 0.5896718579800688, "learning_rate": 6.001622060016222e-07, "loss": 0.2327, "step": 101640 }, { "epoch": 2.9677230989328622, "grad_norm": 0.5958285358479223, "learning_rate": 5.974587726412544e-07, "loss": 0.2339, "step": 101645 }, { "epoch": 2.9678690822031797, "grad_norm": 0.6078309827139794, "learning_rate": 5.947553392808868e-07, "loss": 0.2525, "step": 101650 }, { "epoch": 2.9680150654734967, "grad_norm": 0.5615860524218863, "learning_rate": 5.92051905920519e-07, "loss": 0.2335, "step": 101655 }, { "epoch": 2.9681610487438137, "grad_norm": 0.5914739792803367, "learning_rate": 5.893484725601514e-07, "loss": 0.2585, "step": 101660 }, { "epoch": 2.968307032014131, "grad_norm": 0.6081997085905008, "learning_rate": 5.866450391997838e-07, "loss": 0.2428, "step": 101665 }, { "epoch": 2.9684530152844486, "grad_norm": 0.585951724693924, "learning_rate": 5.839416058394161e-07, "loss": 0.2298, "step": 101670 }, { "epoch": 2.9685989985547656, "grad_norm": 0.6315211996456818, "learning_rate": 5.812381724790484e-07, "loss": 0.2396, "step": 101675 }, { "epoch": 2.9687449818250826, "grad_norm": 0.5961597158012435, "learning_rate": 5.785347391186808e-07, "loss": 0.2477, "step": 101680 }, { "epoch": 2.9688909650954, "grad_norm": 0.5753507291573944, "learning_rate": 5.75831305758313e-07, "loss": 0.2547, "step": 101685 }, { "epoch": 2.9690369483657175, "grad_norm": 0.6002127812991584, "learning_rate": 5.731278723979455e-07, "loss": 0.2463, "step": 101690 }, { "epoch": 2.9691829316360345, "grad_norm": 0.5609118252011492, "learning_rate": 5.704244390375778e-07, "loss": 0.2228, "step": 101695 }, { "epoch": 2.9693289149063515, "grad_norm": 0.5694869012671888, "learning_rate": 5.677210056772101e-07, "loss": 0.2339, "step": 101700 }, { "epoch": 2.969474898176669, "grad_norm": 0.6226204807453359, "learning_rate": 5.650175723168424e-07, "loss": 0.2417, "step": 101705 }, { "epoch": 2.9696208814469864, "grad_norm": 0.5754973074960132, "learning_rate": 5.623141389564748e-07, "loss": 0.2324, "step": 101710 }, { "epoch": 2.9697668647173034, "grad_norm": 0.6106698432158127, "learning_rate": 5.59610705596107e-07, "loss": 0.2473, "step": 101715 }, { "epoch": 2.9699128479876205, "grad_norm": 0.6292151087161201, "learning_rate": 5.569072722357395e-07, "loss": 0.2421, "step": 101720 }, { "epoch": 2.970058831257938, "grad_norm": 0.5775995239424787, "learning_rate": 5.542038388753718e-07, "loss": 0.2496, "step": 101725 }, { "epoch": 2.970204814528255, "grad_norm": 0.6027514809719458, "learning_rate": 5.515004055150041e-07, "loss": 0.2345, "step": 101730 }, { "epoch": 2.9703507977985724, "grad_norm": 0.6190801942003451, "learning_rate": 5.487969721546364e-07, "loss": 0.2333, "step": 101735 }, { "epoch": 2.9704967810688894, "grad_norm": 0.6052906113840052, "learning_rate": 5.460935387942687e-07, "loss": 0.2411, "step": 101740 }, { "epoch": 2.970642764339207, "grad_norm": 0.599334539485788, "learning_rate": 5.43390105433901e-07, "loss": 0.2344, "step": 101745 }, { "epoch": 2.970788747609524, "grad_norm": 0.6335508733157243, "learning_rate": 5.406866720735335e-07, "loss": 0.2629, "step": 101750 }, { "epoch": 2.9709347308798413, "grad_norm": 0.5855906577923378, "learning_rate": 5.379832387131657e-07, "loss": 0.2471, "step": 101755 }, { "epoch": 2.9710807141501583, "grad_norm": 0.6033011359885414, "learning_rate": 5.352798053527981e-07, "loss": 0.2321, "step": 101760 }, { "epoch": 2.9712266974204757, "grad_norm": 0.5566480455327464, "learning_rate": 5.325763719924304e-07, "loss": 0.2345, "step": 101765 }, { "epoch": 2.9713726806907927, "grad_norm": 0.5845029426739691, "learning_rate": 5.298729386320627e-07, "loss": 0.2353, "step": 101770 }, { "epoch": 2.97151866396111, "grad_norm": 0.5820475587603725, "learning_rate": 5.271695052716951e-07, "loss": 0.234, "step": 101775 }, { "epoch": 2.971664647231427, "grad_norm": 0.5979943082644433, "learning_rate": 5.244660719113275e-07, "loss": 0.2472, "step": 101780 }, { "epoch": 2.9718106305017447, "grad_norm": 0.5737684743655614, "learning_rate": 5.217626385509597e-07, "loss": 0.237, "step": 101785 }, { "epoch": 2.9719566137720617, "grad_norm": 0.6314664191032723, "learning_rate": 5.190592051905921e-07, "loss": 0.2377, "step": 101790 }, { "epoch": 2.9721025970423787, "grad_norm": 0.6102911529739182, "learning_rate": 5.163557718302244e-07, "loss": 0.223, "step": 101795 }, { "epoch": 2.972248580312696, "grad_norm": 0.5725504791103324, "learning_rate": 5.136523384698568e-07, "loss": 0.2317, "step": 101800 }, { "epoch": 2.9723945635830136, "grad_norm": 0.5884310178252722, "learning_rate": 5.109489051094891e-07, "loss": 0.2403, "step": 101805 }, { "epoch": 2.9725405468533306, "grad_norm": 0.569243104846029, "learning_rate": 5.082454717491214e-07, "loss": 0.2441, "step": 101810 }, { "epoch": 2.9726865301236476, "grad_norm": 0.6012452065411071, "learning_rate": 5.055420383887537e-07, "loss": 0.2323, "step": 101815 }, { "epoch": 2.972832513393965, "grad_norm": 0.6021954958458586, "learning_rate": 5.028386050283861e-07, "loss": 0.2342, "step": 101820 }, { "epoch": 2.9729784966642825, "grad_norm": 0.6004388261288083, "learning_rate": 5.001351716680184e-07, "loss": 0.2441, "step": 101825 }, { "epoch": 2.9731244799345995, "grad_norm": 0.6017564421411341, "learning_rate": 4.974317383076508e-07, "loss": 0.2462, "step": 101830 }, { "epoch": 2.9732704632049165, "grad_norm": 0.5502138683611876, "learning_rate": 4.947283049472831e-07, "loss": 0.2493, "step": 101835 }, { "epoch": 2.973416446475234, "grad_norm": 0.5420941487414807, "learning_rate": 4.920248715869154e-07, "loss": 0.2244, "step": 101840 }, { "epoch": 2.9735624297455514, "grad_norm": 0.6008017585236795, "learning_rate": 4.893214382265477e-07, "loss": 0.2473, "step": 101845 }, { "epoch": 2.9737084130158684, "grad_norm": 0.5615776918663016, "learning_rate": 4.8661800486618e-07, "loss": 0.2274, "step": 101850 }, { "epoch": 2.9738543962861854, "grad_norm": 0.5573125669048585, "learning_rate": 4.839145715058125e-07, "loss": 0.2282, "step": 101855 }, { "epoch": 2.974000379556503, "grad_norm": 0.6147234972274163, "learning_rate": 4.812111381454448e-07, "loss": 0.2373, "step": 101860 }, { "epoch": 2.9741463628268203, "grad_norm": 0.5738329106300095, "learning_rate": 4.785077047850771e-07, "loss": 0.2171, "step": 101865 }, { "epoch": 2.9742923460971373, "grad_norm": 0.6261427677505126, "learning_rate": 4.7580427142470935e-07, "loss": 0.2491, "step": 101870 }, { "epoch": 2.9744383293674543, "grad_norm": 0.5925479040640712, "learning_rate": 4.731008380643417e-07, "loss": 0.2479, "step": 101875 }, { "epoch": 2.974584312637772, "grad_norm": 0.6286402022808145, "learning_rate": 4.703974047039741e-07, "loss": 0.2431, "step": 101880 }, { "epoch": 2.974730295908089, "grad_norm": 0.6067909908285615, "learning_rate": 4.6769397134360636e-07, "loss": 0.2557, "step": 101885 }, { "epoch": 2.9748762791784062, "grad_norm": 0.6065251434175207, "learning_rate": 4.6499053798323873e-07, "loss": 0.2359, "step": 101890 }, { "epoch": 2.9750222624487233, "grad_norm": 0.6387972168835638, "learning_rate": 4.622871046228711e-07, "loss": 0.2576, "step": 101895 }, { "epoch": 2.9751682457190407, "grad_norm": 0.5841001819852718, "learning_rate": 4.5958367126250336e-07, "loss": 0.2411, "step": 101900 }, { "epoch": 2.9753142289893577, "grad_norm": 0.5739217141090431, "learning_rate": 4.5688023790213573e-07, "loss": 0.2318, "step": 101905 }, { "epoch": 2.975460212259675, "grad_norm": 0.6456097465670078, "learning_rate": 4.541768045417681e-07, "loss": 0.2466, "step": 101910 }, { "epoch": 2.975606195529992, "grad_norm": 0.6128631469860616, "learning_rate": 4.5147337118140037e-07, "loss": 0.2456, "step": 101915 }, { "epoch": 2.9757521788003096, "grad_norm": 0.5541927591147804, "learning_rate": 4.4876993782103274e-07, "loss": 0.2358, "step": 101920 }, { "epoch": 2.9758981620706266, "grad_norm": 0.5856724814682409, "learning_rate": 4.460665044606651e-07, "loss": 0.2338, "step": 101925 }, { "epoch": 2.976044145340944, "grad_norm": 0.6374544171275923, "learning_rate": 4.433630711002974e-07, "loss": 0.2434, "step": 101930 }, { "epoch": 2.976190128611261, "grad_norm": 0.5929230105011051, "learning_rate": 4.4065963773992974e-07, "loss": 0.2537, "step": 101935 }, { "epoch": 2.9763361118815785, "grad_norm": 0.5880752287424154, "learning_rate": 4.379562043795621e-07, "loss": 0.2358, "step": 101940 }, { "epoch": 2.9764820951518955, "grad_norm": 0.5722538049745364, "learning_rate": 4.352527710191944e-07, "loss": 0.2505, "step": 101945 }, { "epoch": 2.9766280784222126, "grad_norm": 0.5433567795056595, "learning_rate": 4.3254933765882675e-07, "loss": 0.2218, "step": 101950 }, { "epoch": 2.97677406169253, "grad_norm": 0.637336560354722, "learning_rate": 4.298459042984591e-07, "loss": 0.2314, "step": 101955 }, { "epoch": 2.9769200449628475, "grad_norm": 0.577359286467623, "learning_rate": 4.271424709380914e-07, "loss": 0.2387, "step": 101960 }, { "epoch": 2.9770660282331645, "grad_norm": 0.5756210951259138, "learning_rate": 4.2443903757772375e-07, "loss": 0.2412, "step": 101965 }, { "epoch": 2.9772120115034815, "grad_norm": 0.6479211735661897, "learning_rate": 4.21735604217356e-07, "loss": 0.2527, "step": 101970 }, { "epoch": 2.977357994773799, "grad_norm": 0.627708628599693, "learning_rate": 4.190321708569884e-07, "loss": 0.2458, "step": 101975 }, { "epoch": 2.9775039780441164, "grad_norm": 0.6000690504084342, "learning_rate": 4.1632873749662076e-07, "loss": 0.247, "step": 101980 }, { "epoch": 2.9776499613144334, "grad_norm": 0.568508686767264, "learning_rate": 4.13625304136253e-07, "loss": 0.2464, "step": 101985 }, { "epoch": 2.9777959445847504, "grad_norm": 0.5905247629135266, "learning_rate": 4.109218707758854e-07, "loss": 0.2384, "step": 101990 }, { "epoch": 2.977941927855068, "grad_norm": 0.5051759600115218, "learning_rate": 4.0821843741551776e-07, "loss": 0.2251, "step": 101995 }, { "epoch": 2.9780879111253853, "grad_norm": 0.6255704696631199, "learning_rate": 4.0551500405515003e-07, "loss": 0.238, "step": 102000 }, { "epoch": 2.9782338943957023, "grad_norm": 0.5730464026444818, "learning_rate": 4.028115706947824e-07, "loss": 0.2246, "step": 102005 }, { "epoch": 2.9783798776660193, "grad_norm": 0.5888624223289829, "learning_rate": 4.0010813733441477e-07, "loss": 0.2467, "step": 102010 }, { "epoch": 2.9785258609363368, "grad_norm": 0.5686801739218439, "learning_rate": 3.9740470397404703e-07, "loss": 0.2169, "step": 102015 }, { "epoch": 2.9786718442066538, "grad_norm": 0.5867539905937565, "learning_rate": 3.947012706136794e-07, "loss": 0.238, "step": 102020 }, { "epoch": 2.978817827476971, "grad_norm": 0.5303805697099806, "learning_rate": 3.9199783725331177e-07, "loss": 0.2397, "step": 102025 }, { "epoch": 2.978963810747288, "grad_norm": 0.5828186430913347, "learning_rate": 3.892944038929441e-07, "loss": 0.2339, "step": 102030 }, { "epoch": 2.9791097940176057, "grad_norm": 0.5336083230605283, "learning_rate": 3.865909705325764e-07, "loss": 0.2415, "step": 102035 }, { "epoch": 2.9792557772879227, "grad_norm": 0.5387183875362954, "learning_rate": 3.838875371722087e-07, "loss": 0.2408, "step": 102040 }, { "epoch": 2.97940176055824, "grad_norm": 0.6671103943440186, "learning_rate": 3.8118410381184104e-07, "loss": 0.2449, "step": 102045 }, { "epoch": 2.979547743828557, "grad_norm": 0.597439449360043, "learning_rate": 3.784806704514734e-07, "loss": 0.2441, "step": 102050 }, { "epoch": 2.9796937270988746, "grad_norm": 0.6330622859303587, "learning_rate": 3.7577723709110573e-07, "loss": 0.2479, "step": 102055 }, { "epoch": 2.9798397103691916, "grad_norm": 0.5875748370269018, "learning_rate": 3.7307380373073805e-07, "loss": 0.2263, "step": 102060 }, { "epoch": 2.979985693639509, "grad_norm": 0.5916085638873685, "learning_rate": 3.703703703703704e-07, "loss": 0.2396, "step": 102065 }, { "epoch": 2.980131676909826, "grad_norm": 0.6102077961784287, "learning_rate": 3.6766693701000273e-07, "loss": 0.2345, "step": 102070 }, { "epoch": 2.9802776601801435, "grad_norm": 0.6509892547418988, "learning_rate": 3.6496350364963505e-07, "loss": 0.2523, "step": 102075 }, { "epoch": 2.9804236434504605, "grad_norm": 0.6135231217935764, "learning_rate": 3.622600702892674e-07, "loss": 0.251, "step": 102080 }, { "epoch": 2.980569626720778, "grad_norm": 0.6155564587866115, "learning_rate": 3.5955663692889974e-07, "loss": 0.2421, "step": 102085 }, { "epoch": 2.980715609991095, "grad_norm": 0.6433348320446894, "learning_rate": 3.5685320356853206e-07, "loss": 0.2248, "step": 102090 }, { "epoch": 2.9808615932614124, "grad_norm": 0.622754639727007, "learning_rate": 3.5414977020816437e-07, "loss": 0.2326, "step": 102095 }, { "epoch": 2.9810075765317294, "grad_norm": 0.5573634437954669, "learning_rate": 3.5144633684779674e-07, "loss": 0.2428, "step": 102100 }, { "epoch": 2.9811535598020464, "grad_norm": 0.5949777230502007, "learning_rate": 3.4874290348742906e-07, "loss": 0.2316, "step": 102105 }, { "epoch": 2.981299543072364, "grad_norm": 0.6030636888916193, "learning_rate": 3.460394701270614e-07, "loss": 0.2444, "step": 102110 }, { "epoch": 2.9814455263426813, "grad_norm": 0.5799081790980172, "learning_rate": 3.4333603676669375e-07, "loss": 0.2374, "step": 102115 }, { "epoch": 2.9815915096129983, "grad_norm": 0.5714113513918577, "learning_rate": 3.4063260340632607e-07, "loss": 0.2413, "step": 102120 }, { "epoch": 2.9817374928833154, "grad_norm": 0.6041307940760573, "learning_rate": 3.379291700459584e-07, "loss": 0.2425, "step": 102125 }, { "epoch": 2.981883476153633, "grad_norm": 0.573387922291312, "learning_rate": 3.3522573668559075e-07, "loss": 0.2292, "step": 102130 }, { "epoch": 2.9820294594239503, "grad_norm": 0.6047518623961928, "learning_rate": 3.3252230332522307e-07, "loss": 0.2377, "step": 102135 }, { "epoch": 2.9821754426942673, "grad_norm": 0.5318795921461869, "learning_rate": 3.298188699648554e-07, "loss": 0.2369, "step": 102140 }, { "epoch": 2.9823214259645843, "grad_norm": 0.6211711480870907, "learning_rate": 3.2711543660448776e-07, "loss": 0.2333, "step": 102145 }, { "epoch": 2.9824674092349017, "grad_norm": 0.5709087262070721, "learning_rate": 3.244120032441201e-07, "loss": 0.251, "step": 102150 }, { "epoch": 2.982613392505219, "grad_norm": 0.6557756804297128, "learning_rate": 3.217085698837524e-07, "loss": 0.2574, "step": 102155 }, { "epoch": 2.982759375775536, "grad_norm": 0.5887878463934747, "learning_rate": 3.190051365233847e-07, "loss": 0.2347, "step": 102160 }, { "epoch": 2.982905359045853, "grad_norm": 0.5753334128712951, "learning_rate": 3.163017031630171e-07, "loss": 0.2235, "step": 102165 }, { "epoch": 2.9830513423161706, "grad_norm": 0.5753003675906113, "learning_rate": 3.135982698026494e-07, "loss": 0.2506, "step": 102170 }, { "epoch": 2.9831973255864876, "grad_norm": 0.5832669065080267, "learning_rate": 3.108948364422817e-07, "loss": 0.2428, "step": 102175 }, { "epoch": 2.983343308856805, "grad_norm": 0.575299381476736, "learning_rate": 3.081914030819141e-07, "loss": 0.2466, "step": 102180 }, { "epoch": 2.983489292127122, "grad_norm": 0.605148504615763, "learning_rate": 3.054879697215464e-07, "loss": 0.2442, "step": 102185 }, { "epoch": 2.9836352753974396, "grad_norm": 0.5909240883831267, "learning_rate": 3.027845363611787e-07, "loss": 0.2437, "step": 102190 }, { "epoch": 2.9837812586677566, "grad_norm": 0.5900748880876794, "learning_rate": 3.000811030008111e-07, "loss": 0.2476, "step": 102195 }, { "epoch": 2.983927241938074, "grad_norm": 0.5574195082790244, "learning_rate": 2.973776696404434e-07, "loss": 0.2306, "step": 102200 }, { "epoch": 2.984073225208391, "grad_norm": 0.5765734819551598, "learning_rate": 2.946742362800757e-07, "loss": 0.2407, "step": 102205 }, { "epoch": 2.9842192084787085, "grad_norm": 0.5808132824887102, "learning_rate": 2.9197080291970804e-07, "loss": 0.2444, "step": 102210 }, { "epoch": 2.9843651917490255, "grad_norm": 0.6578268227557618, "learning_rate": 2.892673695593404e-07, "loss": 0.2493, "step": 102215 }, { "epoch": 2.984511175019343, "grad_norm": 0.5621029165017969, "learning_rate": 2.8656393619897273e-07, "loss": 0.2291, "step": 102220 }, { "epoch": 2.98465715828966, "grad_norm": 0.6195093775973232, "learning_rate": 2.8386050283860505e-07, "loss": 0.2427, "step": 102225 }, { "epoch": 2.9848031415599774, "grad_norm": 0.5885630851455494, "learning_rate": 2.811570694782374e-07, "loss": 0.2443, "step": 102230 }, { "epoch": 2.9849491248302944, "grad_norm": 0.5820343182333514, "learning_rate": 2.7845363611786973e-07, "loss": 0.241, "step": 102235 }, { "epoch": 2.9850951081006114, "grad_norm": 0.5952477715681266, "learning_rate": 2.7575020275750205e-07, "loss": 0.2536, "step": 102240 }, { "epoch": 2.985241091370929, "grad_norm": 0.5711961790131369, "learning_rate": 2.7304676939713437e-07, "loss": 0.2256, "step": 102245 }, { "epoch": 2.9853870746412463, "grad_norm": 0.5887211248875569, "learning_rate": 2.7034333603676674e-07, "loss": 0.2282, "step": 102250 }, { "epoch": 2.9855330579115633, "grad_norm": 0.6109783036459209, "learning_rate": 2.6763990267639905e-07, "loss": 0.2311, "step": 102255 }, { "epoch": 2.9856790411818803, "grad_norm": 0.5414392111752523, "learning_rate": 2.6493646931603137e-07, "loss": 0.233, "step": 102260 }, { "epoch": 2.9858250244521978, "grad_norm": 0.5897848436501365, "learning_rate": 2.6223303595566374e-07, "loss": 0.2413, "step": 102265 }, { "epoch": 2.9859710077225152, "grad_norm": 0.5584970638292301, "learning_rate": 2.5952960259529606e-07, "loss": 0.2304, "step": 102270 }, { "epoch": 2.9861169909928322, "grad_norm": 0.561983000942511, "learning_rate": 2.568261692349284e-07, "loss": 0.2294, "step": 102275 }, { "epoch": 2.9862629742631492, "grad_norm": 0.608268916142966, "learning_rate": 2.541227358745607e-07, "loss": 0.256, "step": 102280 }, { "epoch": 2.9864089575334667, "grad_norm": 0.5733893637831329, "learning_rate": 2.5141930251419306e-07, "loss": 0.2264, "step": 102285 }, { "epoch": 2.986554940803784, "grad_norm": 0.5491262051031658, "learning_rate": 2.487158691538254e-07, "loss": 0.2315, "step": 102290 }, { "epoch": 2.986700924074101, "grad_norm": 0.5755664437805363, "learning_rate": 2.460124357934577e-07, "loss": 0.2208, "step": 102295 }, { "epoch": 2.986846907344418, "grad_norm": 0.6240237057776074, "learning_rate": 2.4330900243309e-07, "loss": 0.2342, "step": 102300 }, { "epoch": 2.9869928906147356, "grad_norm": 0.6354467289175235, "learning_rate": 2.406055690727224e-07, "loss": 0.2362, "step": 102305 }, { "epoch": 2.9871388738850526, "grad_norm": 0.5911239262519798, "learning_rate": 2.3790213571235468e-07, "loss": 0.231, "step": 102310 }, { "epoch": 2.98728485715537, "grad_norm": 0.5796130529050216, "learning_rate": 2.3519870235198705e-07, "loss": 0.2508, "step": 102315 }, { "epoch": 2.987430840425687, "grad_norm": 0.5915549880879374, "learning_rate": 2.3249526899161936e-07, "loss": 0.2478, "step": 102320 }, { "epoch": 2.9875768236960045, "grad_norm": 0.568553531307193, "learning_rate": 2.2979183563125168e-07, "loss": 0.2363, "step": 102325 }, { "epoch": 2.9877228069663215, "grad_norm": 0.5972739980774147, "learning_rate": 2.2708840227088405e-07, "loss": 0.2379, "step": 102330 }, { "epoch": 2.987868790236639, "grad_norm": 0.6031635411496498, "learning_rate": 2.2438496891051637e-07, "loss": 0.2374, "step": 102335 }, { "epoch": 2.988014773506956, "grad_norm": 0.5581405499391217, "learning_rate": 2.216815355501487e-07, "loss": 0.2309, "step": 102340 }, { "epoch": 2.9881607567772734, "grad_norm": 0.5736234485936259, "learning_rate": 2.1897810218978106e-07, "loss": 0.2388, "step": 102345 }, { "epoch": 2.9883067400475904, "grad_norm": 0.5939860518081317, "learning_rate": 2.1627466882941337e-07, "loss": 0.2425, "step": 102350 }, { "epoch": 2.988452723317908, "grad_norm": 0.5853601107318248, "learning_rate": 2.135712354690457e-07, "loss": 0.2451, "step": 102355 }, { "epoch": 2.988598706588225, "grad_norm": 0.6082324991174097, "learning_rate": 2.10867802108678e-07, "loss": 0.245, "step": 102360 }, { "epoch": 2.9887446898585424, "grad_norm": 0.6513741362352238, "learning_rate": 2.0816436874831038e-07, "loss": 0.2375, "step": 102365 }, { "epoch": 2.9888906731288594, "grad_norm": 0.6233296138022903, "learning_rate": 2.054609353879427e-07, "loss": 0.2418, "step": 102370 }, { "epoch": 2.989036656399177, "grad_norm": 0.6373494185460846, "learning_rate": 2.0275750202757501e-07, "loss": 0.2556, "step": 102375 }, { "epoch": 2.989182639669494, "grad_norm": 0.5651433291368612, "learning_rate": 2.0005406866720738e-07, "loss": 0.2306, "step": 102380 }, { "epoch": 2.9893286229398113, "grad_norm": 0.6167877754470382, "learning_rate": 1.973506353068397e-07, "loss": 0.2572, "step": 102385 }, { "epoch": 2.9894746062101283, "grad_norm": 0.6025993251259603, "learning_rate": 1.9464720194647204e-07, "loss": 0.2437, "step": 102390 }, { "epoch": 2.9896205894804453, "grad_norm": 0.5809870404216209, "learning_rate": 1.9194376858610436e-07, "loss": 0.247, "step": 102395 }, { "epoch": 2.9897665727507627, "grad_norm": 0.6168797279995831, "learning_rate": 1.892403352257367e-07, "loss": 0.2555, "step": 102400 }, { "epoch": 2.98991255602108, "grad_norm": 0.5848919432744913, "learning_rate": 1.8653690186536902e-07, "loss": 0.2333, "step": 102405 }, { "epoch": 2.990058539291397, "grad_norm": 0.5567118186310229, "learning_rate": 1.8383346850500137e-07, "loss": 0.2293, "step": 102410 }, { "epoch": 2.990204522561714, "grad_norm": 0.6110833421749107, "learning_rate": 1.811300351446337e-07, "loss": 0.2436, "step": 102415 }, { "epoch": 2.9903505058320317, "grad_norm": 0.5839945064435558, "learning_rate": 1.7842660178426603e-07, "loss": 0.2224, "step": 102420 }, { "epoch": 2.990496489102349, "grad_norm": 0.6108977088442078, "learning_rate": 1.7572316842389837e-07, "loss": 0.2537, "step": 102425 }, { "epoch": 2.990642472372666, "grad_norm": 0.6570387577735841, "learning_rate": 1.730197350635307e-07, "loss": 0.238, "step": 102430 }, { "epoch": 2.990788455642983, "grad_norm": 0.5587904611184422, "learning_rate": 1.7031630170316303e-07, "loss": 0.231, "step": 102435 }, { "epoch": 2.9909344389133006, "grad_norm": 0.6024464290401893, "learning_rate": 1.6761286834279538e-07, "loss": 0.2395, "step": 102440 }, { "epoch": 2.991080422183618, "grad_norm": 0.622580784871049, "learning_rate": 1.649094349824277e-07, "loss": 0.2426, "step": 102445 }, { "epoch": 2.991226405453935, "grad_norm": 0.5969870142672318, "learning_rate": 1.6220600162206004e-07, "loss": 0.2352, "step": 102450 }, { "epoch": 2.991372388724252, "grad_norm": 0.5379079971329418, "learning_rate": 1.5950256826169235e-07, "loss": 0.2322, "step": 102455 }, { "epoch": 2.9915183719945695, "grad_norm": 0.5910754533339423, "learning_rate": 1.567991349013247e-07, "loss": 0.2502, "step": 102460 }, { "epoch": 2.9916643552648865, "grad_norm": 0.6100255392461942, "learning_rate": 1.5409570154095704e-07, "loss": 0.2448, "step": 102465 }, { "epoch": 2.991810338535204, "grad_norm": 0.575613022746423, "learning_rate": 1.5139226818058936e-07, "loss": 0.2341, "step": 102470 }, { "epoch": 2.991956321805521, "grad_norm": 0.6131992166383282, "learning_rate": 1.486888348202217e-07, "loss": 0.2363, "step": 102475 }, { "epoch": 2.9921023050758384, "grad_norm": 0.5881007212428617, "learning_rate": 1.4598540145985402e-07, "loss": 0.2367, "step": 102480 }, { "epoch": 2.9922482883461554, "grad_norm": 0.5816639357047583, "learning_rate": 1.4328196809948636e-07, "loss": 0.2296, "step": 102485 }, { "epoch": 2.992394271616473, "grad_norm": 0.5229713318695425, "learning_rate": 1.405785347391187e-07, "loss": 0.2277, "step": 102490 }, { "epoch": 2.99254025488679, "grad_norm": 0.6043672719766559, "learning_rate": 1.3787510137875103e-07, "loss": 0.241, "step": 102495 }, { "epoch": 2.9926862381571073, "grad_norm": 0.6028378133086939, "learning_rate": 1.3517166801838337e-07, "loss": 0.2456, "step": 102500 }, { "epoch": 2.9928322214274243, "grad_norm": 0.5921514826880576, "learning_rate": 1.3246823465801569e-07, "loss": 0.2198, "step": 102505 }, { "epoch": 2.992978204697742, "grad_norm": 0.5841488277245624, "learning_rate": 1.2976480129764803e-07, "loss": 0.2438, "step": 102510 }, { "epoch": 2.993124187968059, "grad_norm": 0.6137184031780409, "learning_rate": 1.2706136793728035e-07, "loss": 0.2588, "step": 102515 }, { "epoch": 2.9932701712383762, "grad_norm": 0.5425896567080635, "learning_rate": 1.243579345769127e-07, "loss": 0.2371, "step": 102520 }, { "epoch": 2.9934161545086932, "grad_norm": 0.5855826645040291, "learning_rate": 1.21654501216545e-07, "loss": 0.2266, "step": 102525 }, { "epoch": 2.9935621377790103, "grad_norm": 0.5593822971036069, "learning_rate": 1.1895106785617734e-07, "loss": 0.2466, "step": 102530 }, { "epoch": 2.9937081210493277, "grad_norm": 0.5623251512516729, "learning_rate": 1.1624763449580968e-07, "loss": 0.2436, "step": 102535 }, { "epoch": 2.993854104319645, "grad_norm": 0.6252304688825939, "learning_rate": 1.1354420113544203e-07, "loss": 0.2384, "step": 102540 }, { "epoch": 2.994000087589962, "grad_norm": 0.6405903817744446, "learning_rate": 1.1084076777507434e-07, "loss": 0.2303, "step": 102545 }, { "epoch": 2.994146070860279, "grad_norm": 0.5985555224505191, "learning_rate": 1.0813733441470669e-07, "loss": 0.2226, "step": 102550 }, { "epoch": 2.9942920541305966, "grad_norm": 0.5721447506663094, "learning_rate": 1.05433901054339e-07, "loss": 0.247, "step": 102555 }, { "epoch": 2.994438037400914, "grad_norm": 0.6388765842174907, "learning_rate": 1.0273046769397135e-07, "loss": 0.2426, "step": 102560 }, { "epoch": 2.994584020671231, "grad_norm": 0.5592509298137516, "learning_rate": 1.0002703433360369e-07, "loss": 0.2278, "step": 102565 }, { "epoch": 2.994730003941548, "grad_norm": 0.6026274564257424, "learning_rate": 9.732360097323602e-08, "loss": 0.2345, "step": 102570 }, { "epoch": 2.9948759872118655, "grad_norm": 0.6105832543096248, "learning_rate": 9.462016761286835e-08, "loss": 0.231, "step": 102575 }, { "epoch": 2.995021970482183, "grad_norm": 0.5774193803087655, "learning_rate": 9.191673425250068e-08, "loss": 0.2316, "step": 102580 }, { "epoch": 2.9951679537525, "grad_norm": 0.6039827323556543, "learning_rate": 8.921330089213301e-08, "loss": 0.2401, "step": 102585 }, { "epoch": 2.995313937022817, "grad_norm": 0.5688247251469605, "learning_rate": 8.650986753176534e-08, "loss": 0.2438, "step": 102590 }, { "epoch": 2.9954599202931345, "grad_norm": 0.5948056737137435, "learning_rate": 8.380643417139769e-08, "loss": 0.2376, "step": 102595 }, { "epoch": 2.9956059035634515, "grad_norm": 0.5991385429543821, "learning_rate": 8.110300081103002e-08, "loss": 0.2391, "step": 102600 }, { "epoch": 2.995751886833769, "grad_norm": 0.5837634660348465, "learning_rate": 7.839956745066235e-08, "loss": 0.2324, "step": 102605 }, { "epoch": 2.995897870104086, "grad_norm": 0.6222252427757722, "learning_rate": 7.569613409029468e-08, "loss": 0.2477, "step": 102610 }, { "epoch": 2.9960438533744034, "grad_norm": 0.5709720160061531, "learning_rate": 7.299270072992701e-08, "loss": 0.2475, "step": 102615 }, { "epoch": 2.9961898366447204, "grad_norm": 0.6155633323835648, "learning_rate": 7.028926736955935e-08, "loss": 0.2489, "step": 102620 }, { "epoch": 2.996335819915038, "grad_norm": 0.5360246710606262, "learning_rate": 6.758583400919168e-08, "loss": 0.2372, "step": 102625 }, { "epoch": 2.996481803185355, "grad_norm": 0.607982153528745, "learning_rate": 6.488240064882401e-08, "loss": 0.2368, "step": 102630 }, { "epoch": 2.9966277864556723, "grad_norm": 0.5806059072952439, "learning_rate": 6.217896728845635e-08, "loss": 0.2346, "step": 102635 }, { "epoch": 2.9967737697259893, "grad_norm": 0.5825964431997268, "learning_rate": 5.947553392808867e-08, "loss": 0.2358, "step": 102640 }, { "epoch": 2.9969197529963068, "grad_norm": 0.6368656579065913, "learning_rate": 5.677210056772101e-08, "loss": 0.2511, "step": 102645 }, { "epoch": 2.9970657362666238, "grad_norm": 0.6198370389934259, "learning_rate": 5.4068667207353344e-08, "loss": 0.2438, "step": 102650 }, { "epoch": 2.997211719536941, "grad_norm": 0.5339613571526317, "learning_rate": 5.1365233846985674e-08, "loss": 0.2245, "step": 102655 }, { "epoch": 2.997357702807258, "grad_norm": 0.585134171026239, "learning_rate": 4.866180048661801e-08, "loss": 0.2368, "step": 102660 }, { "epoch": 2.9975036860775757, "grad_norm": 0.5577005864329171, "learning_rate": 4.595836712625034e-08, "loss": 0.2382, "step": 102665 }, { "epoch": 2.9976496693478927, "grad_norm": 0.6077084251562246, "learning_rate": 4.325493376588267e-08, "loss": 0.242, "step": 102670 }, { "epoch": 2.99779565261821, "grad_norm": 0.6001919157920957, "learning_rate": 4.055150040551501e-08, "loss": 0.2295, "step": 102675 }, { "epoch": 2.997941635888527, "grad_norm": 0.6272481429854233, "learning_rate": 3.784806704514734e-08, "loss": 0.2352, "step": 102680 }, { "epoch": 2.998087619158844, "grad_norm": 0.5843079677026998, "learning_rate": 3.514463368477968e-08, "loss": 0.237, "step": 102685 }, { "epoch": 2.9982336024291616, "grad_norm": 0.6224837020927518, "learning_rate": 3.244120032441201e-08, "loss": 0.2334, "step": 102690 }, { "epoch": 2.998379585699479, "grad_norm": 0.5555869768161682, "learning_rate": 2.9737766964044335e-08, "loss": 0.2336, "step": 102695 }, { "epoch": 2.998525568969796, "grad_norm": 0.5876081331862111, "learning_rate": 2.7034333603676672e-08, "loss": 0.2284, "step": 102700 }, { "epoch": 2.998671552240113, "grad_norm": 0.6117114077126239, "learning_rate": 2.4330900243309006e-08, "loss": 0.2487, "step": 102705 }, { "epoch": 2.9988175355104305, "grad_norm": 0.6060586338824137, "learning_rate": 2.1627466882941336e-08, "loss": 0.2387, "step": 102710 }, { "epoch": 2.998963518780748, "grad_norm": 0.5443718059634589, "learning_rate": 1.892403352257367e-08, "loss": 0.2305, "step": 102715 }, { "epoch": 2.999109502051065, "grad_norm": 0.6444967946464477, "learning_rate": 1.6220600162206004e-08, "loss": 0.2443, "step": 102720 }, { "epoch": 2.999255485321382, "grad_norm": 0.6167090047201302, "learning_rate": 1.3517166801838336e-08, "loss": 0.2338, "step": 102725 }, { "epoch": 2.9994014685916994, "grad_norm": 0.5793985248517449, "learning_rate": 1.0813733441470668e-08, "loss": 0.2424, "step": 102730 }, { "epoch": 2.999547451862017, "grad_norm": 0.6088553770621983, "learning_rate": 8.110300081103002e-09, "loss": 0.2418, "step": 102735 }, { "epoch": 2.999693435132334, "grad_norm": 0.5361581454242158, "learning_rate": 5.406866720735334e-09, "loss": 0.2292, "step": 102740 }, { "epoch": 2.999839418402651, "grad_norm": 0.5488666778696888, "learning_rate": 2.703433360367667e-09, "loss": 0.2392, "step": 102745 }, { "epoch": 2.9999854016729683, "grad_norm": 0.619331776767218, "learning_rate": 0.0, "loss": 0.2422, "step": 102750 }, { "epoch": 2.9999854016729683, "step": 102750, "total_flos": 2.755550219146887e+18, "train_loss": 0.10376385831600848, "train_runtime": 268166.0162, "train_samples_per_second": 6.131, "train_steps_per_second": 0.383 } ], "logging_steps": 5, "max_steps": 102750, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.755550219146887e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }