{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 82053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.0936224147806904e-05, "grad_norm": 9.026714346797093, "learning_rate": 4.874482086278334e-09, "loss": 1.4119, "step": 5 }, { "epoch": 0.00012187244829561381, "grad_norm": 5.724669106188132, "learning_rate": 1.096758469412625e-08, "loss": 1.2476, "step": 10 }, { "epoch": 0.0001828086724434207, "grad_norm": 9.370900633578326, "learning_rate": 1.7060687301974167e-08, "loss": 1.2876, "step": 15 }, { "epoch": 0.00024374489659122762, "grad_norm": 6.733649336979855, "learning_rate": 2.3153789909822082e-08, "loss": 1.3354, "step": 20 }, { "epoch": 0.00030468112073903455, "grad_norm": 5.56401117144956, "learning_rate": 2.924689251767e-08, "loss": 1.2233, "step": 25 }, { "epoch": 0.0003656173448868414, "grad_norm": 6.309336319968911, "learning_rate": 3.5339995125517914e-08, "loss": 1.3008, "step": 30 }, { "epoch": 0.0004265535690346483, "grad_norm": 8.952799011607611, "learning_rate": 4.143309773336583e-08, "loss": 1.2768, "step": 35 }, { "epoch": 0.00048748979318245523, "grad_norm": 7.589887449285111, "learning_rate": 4.752620034121375e-08, "loss": 1.2564, "step": 40 }, { "epoch": 0.0005484260173302622, "grad_norm": 6.796978799363198, "learning_rate": 5.361930294906167e-08, "loss": 1.3642, "step": 45 }, { "epoch": 0.0006093622414780691, "grad_norm": 5.87567131099471, "learning_rate": 5.971240555690958e-08, "loss": 1.2701, "step": 50 }, { "epoch": 0.0006702984656258759, "grad_norm": 5.269629870483229, "learning_rate": 6.580550816475749e-08, "loss": 1.3217, "step": 55 }, { "epoch": 0.0007312346897736828, "grad_norm": 14.543554729171786, "learning_rate": 7.189861077260542e-08, "loss": 1.2739, "step": 60 }, { "epoch": 0.0007921709139214897, "grad_norm": 8.835247007763146, "learning_rate": 7.799171338045334e-08, "loss": 1.2945, "step": 65 }, { "epoch": 0.0008531071380692966, "grad_norm": 6.540548626389739, "learning_rate": 8.408481598830125e-08, "loss": 1.2514, "step": 70 }, { "epoch": 0.0009140433622171036, "grad_norm": 8.714456663244377, "learning_rate": 9.017791859614917e-08, "loss": 1.3637, "step": 75 }, { "epoch": 0.0009749795863649105, "grad_norm": 6.184526736280786, "learning_rate": 9.627102120399708e-08, "loss": 1.3085, "step": 80 }, { "epoch": 0.0010359158105127174, "grad_norm": 5.501355758002334, "learning_rate": 1.02364123811845e-07, "loss": 1.3329, "step": 85 }, { "epoch": 0.0010968520346605244, "grad_norm": 11.90734004813424, "learning_rate": 1.0845722641969292e-07, "loss": 1.2281, "step": 90 }, { "epoch": 0.0011577882588083312, "grad_norm": 6.522942795561103, "learning_rate": 1.1455032902754083e-07, "loss": 1.2616, "step": 95 }, { "epoch": 0.0012187244829561382, "grad_norm": 5.719916865266339, "learning_rate": 1.2064343163538875e-07, "loss": 1.215, "step": 100 }, { "epoch": 0.001279660707103945, "grad_norm": 4.752506581779107, "learning_rate": 1.2673653424323666e-07, "loss": 1.2203, "step": 105 }, { "epoch": 0.0013405969312517518, "grad_norm": 8.266584832579808, "learning_rate": 1.3282963685108458e-07, "loss": 1.2599, "step": 110 }, { "epoch": 0.0014015331553995588, "grad_norm": 5.1022899110593585, "learning_rate": 1.389227394589325e-07, "loss": 1.2425, "step": 115 }, { "epoch": 0.0014624693795473656, "grad_norm": 5.466370878847477, "learning_rate": 1.450158420667804e-07, "loss": 1.29, "step": 120 }, { "epoch": 0.0015234056036951727, "grad_norm": 5.163289647721094, "learning_rate": 1.5110894467462833e-07, "loss": 1.2416, "step": 125 }, { "epoch": 0.0015843418278429795, "grad_norm": 5.0079156215381015, "learning_rate": 1.5720204728247624e-07, "loss": 1.2691, "step": 130 }, { "epoch": 0.0016452780519907865, "grad_norm": 8.165507792399469, "learning_rate": 1.6329514989032416e-07, "loss": 1.2566, "step": 135 }, { "epoch": 0.0017062142761385933, "grad_norm": 5.939612887230345, "learning_rate": 1.6938825249817207e-07, "loss": 1.204, "step": 140 }, { "epoch": 0.0017671505002864003, "grad_norm": 7.433981272848387, "learning_rate": 1.7548135510602e-07, "loss": 1.1429, "step": 145 }, { "epoch": 0.001828086724434207, "grad_norm": 4.528297624377364, "learning_rate": 1.815744577138679e-07, "loss": 1.1912, "step": 150 }, { "epoch": 0.0018890229485820141, "grad_norm": 6.622576437388626, "learning_rate": 1.8766756032171582e-07, "loss": 1.1885, "step": 155 }, { "epoch": 0.001949959172729821, "grad_norm": 4.031445779591864, "learning_rate": 1.9376066292956374e-07, "loss": 1.1955, "step": 160 }, { "epoch": 0.002010895396877628, "grad_norm": 4.834038077361735, "learning_rate": 1.9985376553741168e-07, "loss": 1.1759, "step": 165 }, { "epoch": 0.0020718316210254347, "grad_norm": 3.822942370989535, "learning_rate": 2.059468681452596e-07, "loss": 1.156, "step": 170 }, { "epoch": 0.0021327678451732415, "grad_norm": 4.85692369277556, "learning_rate": 2.120399707531075e-07, "loss": 1.1481, "step": 175 }, { "epoch": 0.0021937040693210488, "grad_norm": 5.06187736832584, "learning_rate": 2.1813307336095543e-07, "loss": 1.1003, "step": 180 }, { "epoch": 0.0022546402934688556, "grad_norm": 13.65901103284219, "learning_rate": 2.2422617596880334e-07, "loss": 1.2171, "step": 185 }, { "epoch": 0.0023155765176166624, "grad_norm": 4.578994753928683, "learning_rate": 2.3031927857665126e-07, "loss": 1.1763, "step": 190 }, { "epoch": 0.002376512741764469, "grad_norm": 6.585520839739018, "learning_rate": 2.3641238118449917e-07, "loss": 1.1894, "step": 195 }, { "epoch": 0.0024374489659122764, "grad_norm": 4.224004941600262, "learning_rate": 2.4250548379234706e-07, "loss": 1.1261, "step": 200 }, { "epoch": 0.0024983851900600832, "grad_norm": 3.7296392220307215, "learning_rate": 2.48598586400195e-07, "loss": 1.1149, "step": 205 }, { "epoch": 0.00255932141420789, "grad_norm": 3.65320938926858, "learning_rate": 2.546916890080429e-07, "loss": 1.2106, "step": 210 }, { "epoch": 0.002620257638355697, "grad_norm": 3.8841358234331915, "learning_rate": 2.6078479161589084e-07, "loss": 1.043, "step": 215 }, { "epoch": 0.0026811938625035036, "grad_norm": 4.765850885054147, "learning_rate": 2.668778942237387e-07, "loss": 1.2594, "step": 220 }, { "epoch": 0.002742130086651311, "grad_norm": 3.9426936893971445, "learning_rate": 2.7297099683158667e-07, "loss": 1.0313, "step": 225 }, { "epoch": 0.0028030663107991177, "grad_norm": 3.5023359905103093, "learning_rate": 2.7906409943943456e-07, "loss": 1.0658, "step": 230 }, { "epoch": 0.0028640025349469245, "grad_norm": 3.110380950656999, "learning_rate": 2.851572020472825e-07, "loss": 1.041, "step": 235 }, { "epoch": 0.0029249387590947313, "grad_norm": 2.774255507014411, "learning_rate": 2.912503046551304e-07, "loss": 1.0528, "step": 240 }, { "epoch": 0.0029858749832425385, "grad_norm": 3.6043603192610707, "learning_rate": 2.9734340726297833e-07, "loss": 1.1906, "step": 245 }, { "epoch": 0.0030468112073903453, "grad_norm": 4.422171138246605, "learning_rate": 3.0343650987082627e-07, "loss": 1.0645, "step": 250 }, { "epoch": 0.003107747431538152, "grad_norm": 3.800114295897458, "learning_rate": 3.0952961247867416e-07, "loss": 1.1105, "step": 255 }, { "epoch": 0.003168683655685959, "grad_norm": 3.89169528526891, "learning_rate": 3.156227150865221e-07, "loss": 1.0727, "step": 260 }, { "epoch": 0.003229619879833766, "grad_norm": 4.129825756873968, "learning_rate": 3.2171581769437e-07, "loss": 1.1155, "step": 265 }, { "epoch": 0.003290556103981573, "grad_norm": 3.3797954199329805, "learning_rate": 3.2780892030221794e-07, "loss": 1.0564, "step": 270 }, { "epoch": 0.0033514923281293798, "grad_norm": 4.351999997660905, "learning_rate": 3.339020229100658e-07, "loss": 1.104, "step": 275 }, { "epoch": 0.0034124285522771866, "grad_norm": 4.8027258073503445, "learning_rate": 3.3999512551791377e-07, "loss": 1.1144, "step": 280 }, { "epoch": 0.003473364776424994, "grad_norm": 3.537582793152354, "learning_rate": 3.4608822812576166e-07, "loss": 1.1528, "step": 285 }, { "epoch": 0.0035343010005728006, "grad_norm": 3.642240656699903, "learning_rate": 3.521813307336096e-07, "loss": 1.1309, "step": 290 }, { "epoch": 0.0035952372247206074, "grad_norm": 4.214507471086856, "learning_rate": 3.582744333414575e-07, "loss": 1.0604, "step": 295 }, { "epoch": 0.003656173448868414, "grad_norm": 3.129023728085997, "learning_rate": 3.6436753594930543e-07, "loss": 1.0923, "step": 300 }, { "epoch": 0.0037171096730162214, "grad_norm": 4.152180102702356, "learning_rate": 3.704606385571533e-07, "loss": 0.9948, "step": 305 }, { "epoch": 0.0037780458971640282, "grad_norm": 4.727750941883665, "learning_rate": 3.765537411650012e-07, "loss": 1.0369, "step": 310 }, { "epoch": 0.003838982121311835, "grad_norm": 2.7377951812967667, "learning_rate": 3.8264684377284915e-07, "loss": 1.073, "step": 315 }, { "epoch": 0.003899918345459642, "grad_norm": 3.0476764344640985, "learning_rate": 3.8873994638069704e-07, "loss": 1.02, "step": 320 }, { "epoch": 0.003960854569607449, "grad_norm": 3.694212312393187, "learning_rate": 3.94833048988545e-07, "loss": 1.1367, "step": 325 }, { "epoch": 0.004021790793755256, "grad_norm": 4.521395806283557, "learning_rate": 4.0092615159639287e-07, "loss": 1.0608, "step": 330 }, { "epoch": 0.004082727017903062, "grad_norm": 4.189849377531953, "learning_rate": 4.070192542042408e-07, "loss": 1.1609, "step": 335 }, { "epoch": 0.0041436632420508695, "grad_norm": 4.528768439092698, "learning_rate": 4.131123568120887e-07, "loss": 1.1133, "step": 340 }, { "epoch": 0.004204599466198677, "grad_norm": 4.319123255738737, "learning_rate": 4.1920545941993665e-07, "loss": 1.047, "step": 345 }, { "epoch": 0.004265535690346483, "grad_norm": 3.4100678598510936, "learning_rate": 4.2529856202778454e-07, "loss": 1.0351, "step": 350 }, { "epoch": 0.00432647191449429, "grad_norm": 3.808595581132052, "learning_rate": 4.313916646356325e-07, "loss": 1.0927, "step": 355 }, { "epoch": 0.0043874081386420976, "grad_norm": 3.6329609445007343, "learning_rate": 4.3748476724348047e-07, "loss": 0.9827, "step": 360 }, { "epoch": 0.004448344362789904, "grad_norm": 3.513808727166505, "learning_rate": 4.4357786985132836e-07, "loss": 1.0515, "step": 365 }, { "epoch": 0.004509280586937711, "grad_norm": 2.9021602930003434, "learning_rate": 4.496709724591763e-07, "loss": 1.0638, "step": 370 }, { "epoch": 0.0045702168110855175, "grad_norm": 3.7357179341818814, "learning_rate": 4.557640750670242e-07, "loss": 1.0472, "step": 375 }, { "epoch": 0.004631153035233325, "grad_norm": 5.198862633916503, "learning_rate": 4.618571776748721e-07, "loss": 1.0199, "step": 380 }, { "epoch": 0.004692089259381132, "grad_norm": 2.86072366349048, "learning_rate": 4.6795028028272e-07, "loss": 1.0239, "step": 385 }, { "epoch": 0.004753025483528938, "grad_norm": 4.306035698755343, "learning_rate": 4.740433828905679e-07, "loss": 0.9679, "step": 390 }, { "epoch": 0.004813961707676746, "grad_norm": 3.2689682931445496, "learning_rate": 4.801364854984159e-07, "loss": 1.0383, "step": 395 }, { "epoch": 0.004874897931824553, "grad_norm": 3.3798029807037704, "learning_rate": 4.862295881062637e-07, "loss": 1.0467, "step": 400 }, { "epoch": 0.004935834155972359, "grad_norm": 3.430201203957132, "learning_rate": 4.923226907141116e-07, "loss": 1.0478, "step": 405 }, { "epoch": 0.0049967703801201665, "grad_norm": 2.7426043092120285, "learning_rate": 4.984157933219596e-07, "loss": 1.0085, "step": 410 }, { "epoch": 0.005057706604267973, "grad_norm": 3.275774978200537, "learning_rate": 5.045088959298075e-07, "loss": 0.9764, "step": 415 }, { "epoch": 0.00511864282841578, "grad_norm": 3.644675262021148, "learning_rate": 5.106019985376554e-07, "loss": 1.0148, "step": 420 }, { "epoch": 0.005179579052563587, "grad_norm": 3.6573514138296277, "learning_rate": 5.166951011455033e-07, "loss": 1.0768, "step": 425 }, { "epoch": 0.005240515276711394, "grad_norm": 3.751406415121169, "learning_rate": 5.227882037533513e-07, "loss": 0.9862, "step": 430 }, { "epoch": 0.005301451500859201, "grad_norm": 3.204996123282442, "learning_rate": 5.288813063611992e-07, "loss": 1.0299, "step": 435 }, { "epoch": 0.005362387725007007, "grad_norm": 2.857426160817859, "learning_rate": 5.349744089690471e-07, "loss": 0.933, "step": 440 }, { "epoch": 0.0054233239491548145, "grad_norm": 3.4787649363281847, "learning_rate": 5.41067511576895e-07, "loss": 0.9735, "step": 445 }, { "epoch": 0.005484260173302622, "grad_norm": 2.6486494457447516, "learning_rate": 5.47160614184743e-07, "loss": 0.9425, "step": 450 }, { "epoch": 0.005545196397450428, "grad_norm": 3.3792713520551496, "learning_rate": 5.532537167925908e-07, "loss": 1.0308, "step": 455 }, { "epoch": 0.005606132621598235, "grad_norm": 8.471223695200397, "learning_rate": 5.593468194004387e-07, "loss": 0.9989, "step": 460 }, { "epoch": 0.005667068845746043, "grad_norm": 3.464702717377911, "learning_rate": 5.654399220082866e-07, "loss": 1.0407, "step": 465 }, { "epoch": 0.005728005069893849, "grad_norm": 3.0179090768104686, "learning_rate": 5.715330246161345e-07, "loss": 1.0863, "step": 470 }, { "epoch": 0.005788941294041656, "grad_norm": 3.848897913196683, "learning_rate": 5.776261272239825e-07, "loss": 1.0052, "step": 475 }, { "epoch": 0.0058498775181894625, "grad_norm": 4.137361690122276, "learning_rate": 5.837192298318304e-07, "loss": 1.0962, "step": 480 }, { "epoch": 0.00591081374233727, "grad_norm": 3.1494516319973522, "learning_rate": 5.898123324396783e-07, "loss": 1.0516, "step": 485 }, { "epoch": 0.005971749966485077, "grad_norm": 6.235045911157299, "learning_rate": 5.959054350475262e-07, "loss": 0.9725, "step": 490 }, { "epoch": 0.006032686190632883, "grad_norm": 2.8115243370570684, "learning_rate": 6.019985376553742e-07, "loss": 0.9885, "step": 495 }, { "epoch": 0.006093622414780691, "grad_norm": 3.6497838402184275, "learning_rate": 6.080916402632221e-07, "loss": 1.0266, "step": 500 }, { "epoch": 0.006154558638928498, "grad_norm": 3.059221378013513, "learning_rate": 6.1418474287107e-07, "loss": 1.01, "step": 505 }, { "epoch": 0.006215494863076304, "grad_norm": 2.676671031811883, "learning_rate": 6.202778454789178e-07, "loss": 1.0294, "step": 510 }, { "epoch": 0.0062764310872241115, "grad_norm": 5.543141134368499, "learning_rate": 6.263709480867658e-07, "loss": 0.9732, "step": 515 }, { "epoch": 0.006337367311371918, "grad_norm": 3.9557097846338105, "learning_rate": 6.324640506946138e-07, "loss": 1.0064, "step": 520 }, { "epoch": 0.006398303535519725, "grad_norm": 2.795691167813269, "learning_rate": 6.385571533024616e-07, "loss": 1.0501, "step": 525 }, { "epoch": 0.006459239759667532, "grad_norm": 2.8380768581564224, "learning_rate": 6.446502559103096e-07, "loss": 0.9941, "step": 530 }, { "epoch": 0.006520175983815339, "grad_norm": 8.931522129518475, "learning_rate": 6.507433585181575e-07, "loss": 0.9982, "step": 535 }, { "epoch": 0.006581112207963146, "grad_norm": 3.071337563597113, "learning_rate": 6.568364611260054e-07, "loss": 1.0397, "step": 540 }, { "epoch": 0.006642048432110952, "grad_norm": 2.9124266337344524, "learning_rate": 6.629295637338533e-07, "loss": 0.9693, "step": 545 }, { "epoch": 0.0067029846562587595, "grad_norm": 3.640350225112588, "learning_rate": 6.690226663417013e-07, "loss": 1.0786, "step": 550 }, { "epoch": 0.006763920880406567, "grad_norm": 2.952368187239268, "learning_rate": 6.751157689495492e-07, "loss": 1.052, "step": 555 }, { "epoch": 0.006824857104554373, "grad_norm": 7.179633173214855, "learning_rate": 6.81208871557397e-07, "loss": 1.0276, "step": 560 }, { "epoch": 0.00688579332870218, "grad_norm": 6.19334690613605, "learning_rate": 6.873019741652449e-07, "loss": 1.0651, "step": 565 }, { "epoch": 0.006946729552849988, "grad_norm": 4.178023986653975, "learning_rate": 6.933950767730929e-07, "loss": 0.9535, "step": 570 }, { "epoch": 0.007007665776997794, "grad_norm": 4.893003536517908, "learning_rate": 6.994881793809407e-07, "loss": 1.0771, "step": 575 }, { "epoch": 0.007068602001145601, "grad_norm": 4.136534859175122, "learning_rate": 7.055812819887887e-07, "loss": 1.0088, "step": 580 }, { "epoch": 0.0071295382252934076, "grad_norm": 3.489841078369627, "learning_rate": 7.116743845966366e-07, "loss": 0.8796, "step": 585 }, { "epoch": 0.007190474449441215, "grad_norm": 2.689055671214176, "learning_rate": 7.177674872044846e-07, "loss": 0.9847, "step": 590 }, { "epoch": 0.007251410673589022, "grad_norm": 3.8978639866905436, "learning_rate": 7.238605898123326e-07, "loss": 1.0237, "step": 595 }, { "epoch": 0.007312346897736828, "grad_norm": 2.5598718247763736, "learning_rate": 7.299536924201804e-07, "loss": 1.0451, "step": 600 }, { "epoch": 0.007373283121884636, "grad_norm": 6.633866891458327, "learning_rate": 7.360467950280284e-07, "loss": 1.0058, "step": 605 }, { "epoch": 0.007434219346032443, "grad_norm": 3.0762852196261483, "learning_rate": 7.421398976358763e-07, "loss": 0.9989, "step": 610 }, { "epoch": 0.007495155570180249, "grad_norm": 3.8753394846631712, "learning_rate": 7.482330002437243e-07, "loss": 1.0524, "step": 615 }, { "epoch": 0.0075560917943280565, "grad_norm": 3.0668750205458153, "learning_rate": 7.54326102851572e-07, "loss": 0.9636, "step": 620 }, { "epoch": 0.007617028018475863, "grad_norm": 4.705701192099608, "learning_rate": 7.6041920545942e-07, "loss": 1.0206, "step": 625 }, { "epoch": 0.00767796424262367, "grad_norm": 3.2093114634430515, "learning_rate": 7.665123080672679e-07, "loss": 1.0604, "step": 630 }, { "epoch": 0.007738900466771477, "grad_norm": 3.322269572188954, "learning_rate": 7.726054106751159e-07, "loss": 0.9627, "step": 635 }, { "epoch": 0.007799836690919284, "grad_norm": 3.642190923889922, "learning_rate": 7.786985132829637e-07, "loss": 0.9885, "step": 640 }, { "epoch": 0.00786077291506709, "grad_norm": 3.3596065603205356, "learning_rate": 7.847916158908117e-07, "loss": 1.0203, "step": 645 }, { "epoch": 0.007921709139214897, "grad_norm": 3.196735463164649, "learning_rate": 7.908847184986596e-07, "loss": 0.9765, "step": 650 }, { "epoch": 0.007982645363362705, "grad_norm": 2.5333086397831215, "learning_rate": 7.969778211065076e-07, "loss": 0.9322, "step": 655 }, { "epoch": 0.008043581587510512, "grad_norm": 3.2815130473282523, "learning_rate": 8.030709237143554e-07, "loss": 1.0269, "step": 660 }, { "epoch": 0.008104517811658319, "grad_norm": 3.588244738199355, "learning_rate": 8.091640263222034e-07, "loss": 1.0143, "step": 665 }, { "epoch": 0.008165454035806124, "grad_norm": 3.484550772532751, "learning_rate": 8.152571289300512e-07, "loss": 0.9983, "step": 670 }, { "epoch": 0.008226390259953932, "grad_norm": 3.497332997475492, "learning_rate": 8.213502315378992e-07, "loss": 1.0085, "step": 675 }, { "epoch": 0.008287326484101739, "grad_norm": 3.8932851212861626, "learning_rate": 8.27443334145747e-07, "loss": 0.9744, "step": 680 }, { "epoch": 0.008348262708249546, "grad_norm": 3.0958909036136535, "learning_rate": 8.33536436753595e-07, "loss": 0.9887, "step": 685 }, { "epoch": 0.008409198932397353, "grad_norm": 3.2323603700596073, "learning_rate": 8.396295393614429e-07, "loss": 1.0015, "step": 690 }, { "epoch": 0.00847013515654516, "grad_norm": 3.56789555312337, "learning_rate": 8.457226419692908e-07, "loss": 1.06, "step": 695 }, { "epoch": 0.008531071380692966, "grad_norm": 2.8443756532911855, "learning_rate": 8.518157445771387e-07, "loss": 0.9184, "step": 700 }, { "epoch": 0.008592007604840773, "grad_norm": 2.9308178194432575, "learning_rate": 8.579088471849867e-07, "loss": 1.0232, "step": 705 }, { "epoch": 0.00865294382898858, "grad_norm": 3.1186163131839306, "learning_rate": 8.640019497928346e-07, "loss": 0.9978, "step": 710 }, { "epoch": 0.008713880053136388, "grad_norm": 3.022924541851841, "learning_rate": 8.700950524006825e-07, "loss": 0.975, "step": 715 }, { "epoch": 0.008774816277284195, "grad_norm": 3.0398440699137086, "learning_rate": 8.761881550085304e-07, "loss": 0.9632, "step": 720 }, { "epoch": 0.008835752501432, "grad_norm": 3.121171859791589, "learning_rate": 8.822812576163783e-07, "loss": 0.9234, "step": 725 }, { "epoch": 0.008896688725579808, "grad_norm": 3.5256201862789553, "learning_rate": 8.883743602242261e-07, "loss": 0.911, "step": 730 }, { "epoch": 0.008957624949727615, "grad_norm": 3.047387449118935, "learning_rate": 8.944674628320741e-07, "loss": 0.9396, "step": 735 }, { "epoch": 0.009018561173875422, "grad_norm": 3.862189050606417, "learning_rate": 9.00560565439922e-07, "loss": 1.0422, "step": 740 }, { "epoch": 0.00907949739802323, "grad_norm": 2.9735503573279725, "learning_rate": 9.0665366804777e-07, "loss": 1.0067, "step": 745 }, { "epoch": 0.009140433622171035, "grad_norm": 3.1595760804205044, "learning_rate": 9.12746770655618e-07, "loss": 1.0112, "step": 750 }, { "epoch": 0.009201369846318842, "grad_norm": 2.9730756798521956, "learning_rate": 9.188398732634658e-07, "loss": 0.9161, "step": 755 }, { "epoch": 0.00926230607046665, "grad_norm": 3.366305017600436, "learning_rate": 9.249329758713138e-07, "loss": 0.9089, "step": 760 }, { "epoch": 0.009323242294614457, "grad_norm": 3.351908423742, "learning_rate": 9.310260784791617e-07, "loss": 0.9681, "step": 765 }, { "epoch": 0.009384178518762264, "grad_norm": 7.267281476130126, "learning_rate": 9.371191810870097e-07, "loss": 1.0291, "step": 770 }, { "epoch": 0.00944511474291007, "grad_norm": 4.306958135877391, "learning_rate": 9.432122836948575e-07, "loss": 1.0209, "step": 775 }, { "epoch": 0.009506050967057877, "grad_norm": 3.463394179752939, "learning_rate": 9.493053863027054e-07, "loss": 0.9747, "step": 780 }, { "epoch": 0.009566987191205684, "grad_norm": 2.5136316282717077, "learning_rate": 9.553984889105532e-07, "loss": 0.9635, "step": 785 }, { "epoch": 0.009627923415353491, "grad_norm": 6.9331970679171855, "learning_rate": 9.614915915184012e-07, "loss": 0.9775, "step": 790 }, { "epoch": 0.009688859639501298, "grad_norm": 3.4345686733011362, "learning_rate": 9.675846941262492e-07, "loss": 0.9491, "step": 795 }, { "epoch": 0.009749795863649106, "grad_norm": 2.384604344217859, "learning_rate": 9.736777967340972e-07, "loss": 0.9296, "step": 800 }, { "epoch": 0.009810732087796911, "grad_norm": 2.9013566353327698, "learning_rate": 9.79770899341945e-07, "loss": 0.9306, "step": 805 }, { "epoch": 0.009871668311944718, "grad_norm": 3.5875827638822577, "learning_rate": 9.85864001949793e-07, "loss": 0.9197, "step": 810 }, { "epoch": 0.009932604536092526, "grad_norm": 3.050263524605999, "learning_rate": 9.919571045576408e-07, "loss": 0.8979, "step": 815 }, { "epoch": 0.009993540760240333, "grad_norm": 6.862490877011035, "learning_rate": 9.980502071654888e-07, "loss": 1.0273, "step": 820 }, { "epoch": 0.01005447698438814, "grad_norm": 3.7911465420190185, "learning_rate": 1.0041433097733366e-06, "loss": 1.0121, "step": 825 }, { "epoch": 0.010115413208535946, "grad_norm": 3.3413774672564114, "learning_rate": 1.0102364123811846e-06, "loss": 0.9973, "step": 830 }, { "epoch": 0.010176349432683753, "grad_norm": 3.2183222894943193, "learning_rate": 1.0163295149890325e-06, "loss": 0.9742, "step": 835 }, { "epoch": 0.01023728565683156, "grad_norm": 3.251137088805102, "learning_rate": 1.0224226175968805e-06, "loss": 0.8859, "step": 840 }, { "epoch": 0.010298221880979367, "grad_norm": 3.0262378340282754, "learning_rate": 1.0285157202047283e-06, "loss": 1.0363, "step": 845 }, { "epoch": 0.010359158105127175, "grad_norm": 2.9462825478011676, "learning_rate": 1.0346088228125763e-06, "loss": 0.9282, "step": 850 }, { "epoch": 0.01042009432927498, "grad_norm": 3.5644720845128384, "learning_rate": 1.040701925420424e-06, "loss": 1.0418, "step": 855 }, { "epoch": 0.010481030553422787, "grad_norm": 3.630768067270525, "learning_rate": 1.046795028028272e-06, "loss": 0.9514, "step": 860 }, { "epoch": 0.010541966777570595, "grad_norm": 2.69536880653524, "learning_rate": 1.0528881306361199e-06, "loss": 1.0081, "step": 865 }, { "epoch": 0.010602903001718402, "grad_norm": 2.739279830952773, "learning_rate": 1.0589812332439679e-06, "loss": 1.0316, "step": 870 }, { "epoch": 0.010663839225866209, "grad_norm": 3.1460550563977603, "learning_rate": 1.0650743358518159e-06, "loss": 1.0078, "step": 875 }, { "epoch": 0.010724775450014015, "grad_norm": 3.1063588658517216, "learning_rate": 1.0711674384596637e-06, "loss": 0.9367, "step": 880 }, { "epoch": 0.010785711674161822, "grad_norm": 2.885148956951979, "learning_rate": 1.0772605410675117e-06, "loss": 0.8806, "step": 885 }, { "epoch": 0.010846647898309629, "grad_norm": 4.475234334725604, "learning_rate": 1.0833536436753596e-06, "loss": 0.9698, "step": 890 }, { "epoch": 0.010907584122457436, "grad_norm": 3.3435762994291145, "learning_rate": 1.0894467462832074e-06, "loss": 0.9892, "step": 895 }, { "epoch": 0.010968520346605243, "grad_norm": 3.899447814054372, "learning_rate": 1.0955398488910554e-06, "loss": 1.0058, "step": 900 }, { "epoch": 0.01102945657075305, "grad_norm": 2.677866148242483, "learning_rate": 1.1016329514989034e-06, "loss": 0.9245, "step": 905 }, { "epoch": 0.011090392794900856, "grad_norm": 2.878312114992633, "learning_rate": 1.1077260541067512e-06, "loss": 0.9662, "step": 910 }, { "epoch": 0.011151329019048663, "grad_norm": 3.1510992428331446, "learning_rate": 1.1138191567145992e-06, "loss": 0.9319, "step": 915 }, { "epoch": 0.01121226524319647, "grad_norm": 3.464027244455156, "learning_rate": 1.119912259322447e-06, "loss": 0.9417, "step": 920 }, { "epoch": 0.011273201467344278, "grad_norm": 2.797327092638718, "learning_rate": 1.126005361930295e-06, "loss": 0.8694, "step": 925 }, { "epoch": 0.011334137691492085, "grad_norm": 3.7354914451464394, "learning_rate": 1.132098464538143e-06, "loss": 0.9383, "step": 930 }, { "epoch": 0.01139507391563989, "grad_norm": 3.1827867216783003, "learning_rate": 1.138191567145991e-06, "loss": 0.9679, "step": 935 }, { "epoch": 0.011456010139787698, "grad_norm": 3.415871097209064, "learning_rate": 1.1442846697538388e-06, "loss": 0.9578, "step": 940 }, { "epoch": 0.011516946363935505, "grad_norm": 3.1639310272412606, "learning_rate": 1.1503777723616867e-06, "loss": 0.9672, "step": 945 }, { "epoch": 0.011577882588083312, "grad_norm": 2.775032526211974, "learning_rate": 1.1564708749695345e-06, "loss": 0.9271, "step": 950 }, { "epoch": 0.01163881881223112, "grad_norm": 3.0377726469571864, "learning_rate": 1.1625639775773825e-06, "loss": 0.9929, "step": 955 }, { "epoch": 0.011699755036378925, "grad_norm": 3.8799843623090493, "learning_rate": 1.1686570801852303e-06, "loss": 0.9879, "step": 960 }, { "epoch": 0.011760691260526732, "grad_norm": 3.015463344752538, "learning_rate": 1.1747501827930783e-06, "loss": 0.8875, "step": 965 }, { "epoch": 0.01182162748467454, "grad_norm": 2.8913161062561645, "learning_rate": 1.1808432854009263e-06, "loss": 0.9892, "step": 970 }, { "epoch": 0.011882563708822347, "grad_norm": 3.4600704101040693, "learning_rate": 1.1869363880087743e-06, "loss": 1.1136, "step": 975 }, { "epoch": 0.011943499932970154, "grad_norm": 3.4209228435604406, "learning_rate": 1.193029490616622e-06, "loss": 0.9779, "step": 980 }, { "epoch": 0.01200443615711796, "grad_norm": 3.420249839614387, "learning_rate": 1.19912259322447e-06, "loss": 0.9614, "step": 985 }, { "epoch": 0.012065372381265767, "grad_norm": 3.9800783329269405, "learning_rate": 1.2052156958323179e-06, "loss": 0.8848, "step": 990 }, { "epoch": 0.012126308605413574, "grad_norm": 2.682019159694129, "learning_rate": 1.2113087984401659e-06, "loss": 1.0557, "step": 995 }, { "epoch": 0.012187244829561381, "grad_norm": 2.787924100688068, "learning_rate": 1.2174019010480136e-06, "loss": 1.0159, "step": 1000 }, { "epoch": 0.012248181053709188, "grad_norm": 2.8126383958875536, "learning_rate": 1.2234950036558616e-06, "loss": 0.8992, "step": 1005 }, { "epoch": 0.012309117277856996, "grad_norm": 3.6612665917848357, "learning_rate": 1.2295881062637096e-06, "loss": 0.9258, "step": 1010 }, { "epoch": 0.012370053502004801, "grad_norm": 5.703731409608363, "learning_rate": 1.2356812088715574e-06, "loss": 0.9943, "step": 1015 }, { "epoch": 0.012430989726152608, "grad_norm": 3.0276223598307244, "learning_rate": 1.2417743114794054e-06, "loss": 0.9614, "step": 1020 }, { "epoch": 0.012491925950300416, "grad_norm": 3.3816572760083567, "learning_rate": 1.2478674140872534e-06, "loss": 0.975, "step": 1025 }, { "epoch": 0.012552862174448223, "grad_norm": 2.654698222081222, "learning_rate": 1.2539605166951014e-06, "loss": 0.9573, "step": 1030 }, { "epoch": 0.01261379839859603, "grad_norm": 2.977388116792593, "learning_rate": 1.2600536193029492e-06, "loss": 1.0149, "step": 1035 }, { "epoch": 0.012674734622743836, "grad_norm": 2.3909597552380286, "learning_rate": 1.266146721910797e-06, "loss": 0.9159, "step": 1040 }, { "epoch": 0.012735670846891643, "grad_norm": 2.959981734311671, "learning_rate": 1.272239824518645e-06, "loss": 0.907, "step": 1045 }, { "epoch": 0.01279660707103945, "grad_norm": 2.9250126432745986, "learning_rate": 1.278332927126493e-06, "loss": 0.8972, "step": 1050 }, { "epoch": 0.012857543295187257, "grad_norm": 3.9749677120252715, "learning_rate": 1.2844260297343407e-06, "loss": 1.0274, "step": 1055 }, { "epoch": 0.012918479519335065, "grad_norm": 4.587671035870923, "learning_rate": 1.2905191323421887e-06, "loss": 0.9088, "step": 1060 }, { "epoch": 0.01297941574348287, "grad_norm": 3.3409713811230963, "learning_rate": 1.2966122349500367e-06, "loss": 0.9279, "step": 1065 }, { "epoch": 0.013040351967630677, "grad_norm": 3.138519118839619, "learning_rate": 1.3027053375578847e-06, "loss": 0.9237, "step": 1070 }, { "epoch": 0.013101288191778485, "grad_norm": 3.443729322231052, "learning_rate": 1.3087984401657325e-06, "loss": 0.9461, "step": 1075 }, { "epoch": 0.013162224415926292, "grad_norm": 2.9573497412157606, "learning_rate": 1.3148915427735803e-06, "loss": 0.8669, "step": 1080 }, { "epoch": 0.013223160640074099, "grad_norm": 2.900897425468756, "learning_rate": 1.3209846453814285e-06, "loss": 0.938, "step": 1085 }, { "epoch": 0.013284096864221905, "grad_norm": 2.859966516272599, "learning_rate": 1.3270777479892763e-06, "loss": 0.9408, "step": 1090 }, { "epoch": 0.013345033088369712, "grad_norm": 2.7703042004840657, "learning_rate": 1.333170850597124e-06, "loss": 0.8058, "step": 1095 }, { "epoch": 0.013405969312517519, "grad_norm": 3.466598932563937, "learning_rate": 1.339263953204972e-06, "loss": 0.9041, "step": 1100 }, { "epoch": 0.013466905536665326, "grad_norm": 3.9411543131992826, "learning_rate": 1.34535705581282e-06, "loss": 1.0131, "step": 1105 }, { "epoch": 0.013527841760813133, "grad_norm": 3.4886521962294976, "learning_rate": 1.351450158420668e-06, "loss": 0.9281, "step": 1110 }, { "epoch": 0.01358877798496094, "grad_norm": 2.9949618897743937, "learning_rate": 1.3575432610285158e-06, "loss": 0.9288, "step": 1115 }, { "epoch": 0.013649714209108746, "grad_norm": 3.5038676103768256, "learning_rate": 1.3636363636363636e-06, "loss": 0.9478, "step": 1120 }, { "epoch": 0.013710650433256553, "grad_norm": 4.357129919283307, "learning_rate": 1.3697294662442118e-06, "loss": 0.9415, "step": 1125 }, { "epoch": 0.01377158665740436, "grad_norm": 3.3024597144223407, "learning_rate": 1.3758225688520596e-06, "loss": 0.9061, "step": 1130 }, { "epoch": 0.013832522881552168, "grad_norm": 3.1946030259737177, "learning_rate": 1.3819156714599074e-06, "loss": 0.9556, "step": 1135 }, { "epoch": 0.013893459105699975, "grad_norm": 3.1439330391845313, "learning_rate": 1.3880087740677554e-06, "loss": 0.9319, "step": 1140 }, { "epoch": 0.01395439532984778, "grad_norm": 3.310604462074563, "learning_rate": 1.3941018766756034e-06, "loss": 0.8817, "step": 1145 }, { "epoch": 0.014015331553995588, "grad_norm": 2.6296155922960867, "learning_rate": 1.4001949792834514e-06, "loss": 0.9095, "step": 1150 }, { "epoch": 0.014076267778143395, "grad_norm": 4.346919063992292, "learning_rate": 1.4062880818912992e-06, "loss": 0.9362, "step": 1155 }, { "epoch": 0.014137204002291202, "grad_norm": 3.0177939396254136, "learning_rate": 1.412381184499147e-06, "loss": 0.9183, "step": 1160 }, { "epoch": 0.01419814022643901, "grad_norm": 3.075814772770072, "learning_rate": 1.4184742871069951e-06, "loss": 0.9499, "step": 1165 }, { "epoch": 0.014259076450586815, "grad_norm": 3.0459360917383953, "learning_rate": 1.424567389714843e-06, "loss": 0.9592, "step": 1170 }, { "epoch": 0.014320012674734622, "grad_norm": 4.191519185543608, "learning_rate": 1.4306604923226907e-06, "loss": 0.9177, "step": 1175 }, { "epoch": 0.01438094889888243, "grad_norm": 2.9333722123262693, "learning_rate": 1.4367535949305387e-06, "loss": 0.9534, "step": 1180 }, { "epoch": 0.014441885123030237, "grad_norm": 3.165478521701969, "learning_rate": 1.4428466975383867e-06, "loss": 0.9008, "step": 1185 }, { "epoch": 0.014502821347178044, "grad_norm": 3.8279993937686405, "learning_rate": 1.4489398001462345e-06, "loss": 0.9254, "step": 1190 }, { "epoch": 0.01456375757132585, "grad_norm": 3.8388233569222363, "learning_rate": 1.4550329027540825e-06, "loss": 0.9176, "step": 1195 }, { "epoch": 0.014624693795473657, "grad_norm": 3.1822392572061036, "learning_rate": 1.4611260053619303e-06, "loss": 1.0357, "step": 1200 }, { "epoch": 0.014685630019621464, "grad_norm": 5.88476790874934, "learning_rate": 1.4672191079697785e-06, "loss": 0.9621, "step": 1205 }, { "epoch": 0.014746566243769271, "grad_norm": 3.2603743907836646, "learning_rate": 1.4733122105776263e-06, "loss": 0.975, "step": 1210 }, { "epoch": 0.014807502467917079, "grad_norm": 3.3407493978398324, "learning_rate": 1.479405313185474e-06, "loss": 0.9418, "step": 1215 }, { "epoch": 0.014868438692064886, "grad_norm": 2.869322309831218, "learning_rate": 1.4854984157933222e-06, "loss": 0.927, "step": 1220 }, { "epoch": 0.014929374916212691, "grad_norm": 3.312661870829624, "learning_rate": 1.49159151840117e-06, "loss": 0.9039, "step": 1225 }, { "epoch": 0.014990311140360498, "grad_norm": 2.4967160989343333, "learning_rate": 1.4976846210090178e-06, "loss": 0.8522, "step": 1230 }, { "epoch": 0.015051247364508306, "grad_norm": 3.3222530447466547, "learning_rate": 1.5037777236168658e-06, "loss": 0.9292, "step": 1235 }, { "epoch": 0.015112183588656113, "grad_norm": 2.5436218948974716, "learning_rate": 1.5098708262247138e-06, "loss": 0.8387, "step": 1240 }, { "epoch": 0.01517311981280392, "grad_norm": 2.7032155191672214, "learning_rate": 1.5159639288325618e-06, "loss": 0.9522, "step": 1245 }, { "epoch": 0.015234056036951726, "grad_norm": 2.8140139977568817, "learning_rate": 1.5220570314404096e-06, "loss": 0.9119, "step": 1250 }, { "epoch": 0.015294992261099533, "grad_norm": 4.124121277203501, "learning_rate": 1.5281501340482574e-06, "loss": 0.9175, "step": 1255 }, { "epoch": 0.01535592848524734, "grad_norm": 2.925563488445132, "learning_rate": 1.5342432366561056e-06, "loss": 0.891, "step": 1260 }, { "epoch": 0.015416864709395147, "grad_norm": 3.6569753646786194, "learning_rate": 1.5403363392639534e-06, "loss": 0.9443, "step": 1265 }, { "epoch": 0.015477800933542955, "grad_norm": 3.079535016419565, "learning_rate": 1.5464294418718011e-06, "loss": 0.8413, "step": 1270 }, { "epoch": 0.01553873715769076, "grad_norm": 4.983348370392309, "learning_rate": 1.5525225444796491e-06, "loss": 0.8979, "step": 1275 }, { "epoch": 0.015599673381838567, "grad_norm": 3.273853302364632, "learning_rate": 1.5586156470874971e-06, "loss": 0.9505, "step": 1280 }, { "epoch": 0.015660609605986375, "grad_norm": 2.6331129726163183, "learning_rate": 1.5647087496953451e-06, "loss": 0.8181, "step": 1285 }, { "epoch": 0.01572154583013418, "grad_norm": 3.1200938548423527, "learning_rate": 1.570801852303193e-06, "loss": 0.9076, "step": 1290 }, { "epoch": 0.01578248205428199, "grad_norm": 3.148371697753742, "learning_rate": 1.5768949549110407e-06, "loss": 0.9044, "step": 1295 }, { "epoch": 0.015843418278429795, "grad_norm": 3.0077543149066166, "learning_rate": 1.582988057518889e-06, "loss": 0.8997, "step": 1300 }, { "epoch": 0.015904354502577604, "grad_norm": 3.460403985740263, "learning_rate": 1.5890811601267367e-06, "loss": 0.8418, "step": 1305 }, { "epoch": 0.01596529072672541, "grad_norm": 3.1339420665706954, "learning_rate": 1.5951742627345845e-06, "loss": 0.9673, "step": 1310 }, { "epoch": 0.016026226950873215, "grad_norm": 2.619364060662349, "learning_rate": 1.6012673653424325e-06, "loss": 0.9928, "step": 1315 }, { "epoch": 0.016087163175021024, "grad_norm": 3.5550846225440256, "learning_rate": 1.6073604679502805e-06, "loss": 1.0315, "step": 1320 }, { "epoch": 0.01614809939916883, "grad_norm": 3.1104990253288833, "learning_rate": 1.6134535705581284e-06, "loss": 0.9681, "step": 1325 }, { "epoch": 0.016209035623316638, "grad_norm": 2.788670339653882, "learning_rate": 1.6195466731659762e-06, "loss": 0.8865, "step": 1330 }, { "epoch": 0.016269971847464443, "grad_norm": 4.930072427968182, "learning_rate": 1.625639775773824e-06, "loss": 0.9351, "step": 1335 }, { "epoch": 0.01633090807161225, "grad_norm": 3.802565831782715, "learning_rate": 1.6317328783816722e-06, "loss": 0.9301, "step": 1340 }, { "epoch": 0.016391844295760058, "grad_norm": 2.9134646025300777, "learning_rate": 1.63782598098952e-06, "loss": 0.9565, "step": 1345 }, { "epoch": 0.016452780519907863, "grad_norm": 3.592277623717387, "learning_rate": 1.6439190835973678e-06, "loss": 0.9468, "step": 1350 }, { "epoch": 0.016513716744055672, "grad_norm": 2.5410261039617232, "learning_rate": 1.6500121862052158e-06, "loss": 0.8314, "step": 1355 }, { "epoch": 0.016574652968203478, "grad_norm": 2.7591701046371213, "learning_rate": 1.6561052888130638e-06, "loss": 0.9333, "step": 1360 }, { "epoch": 0.016635589192351283, "grad_norm": 3.1736265843180944, "learning_rate": 1.6621983914209116e-06, "loss": 0.9679, "step": 1365 }, { "epoch": 0.016696525416499092, "grad_norm": 3.04717827303303, "learning_rate": 1.6682914940287596e-06, "loss": 1.0036, "step": 1370 }, { "epoch": 0.016757461640646898, "grad_norm": 2.9453691302730496, "learning_rate": 1.6743845966366076e-06, "loss": 0.9428, "step": 1375 }, { "epoch": 0.016818397864794707, "grad_norm": 3.462640718917231, "learning_rate": 1.6804776992444555e-06, "loss": 0.9312, "step": 1380 }, { "epoch": 0.016879334088942512, "grad_norm": 2.624205309708396, "learning_rate": 1.6865708018523033e-06, "loss": 0.8681, "step": 1385 }, { "epoch": 0.01694027031309032, "grad_norm": 3.9694708809954955, "learning_rate": 1.6926639044601511e-06, "loss": 0.9377, "step": 1390 }, { "epoch": 0.017001206537238127, "grad_norm": 3.7668942208502845, "learning_rate": 1.6987570070679993e-06, "loss": 0.9516, "step": 1395 }, { "epoch": 0.017062142761385932, "grad_norm": 3.3439272141834877, "learning_rate": 1.7048501096758471e-06, "loss": 0.8972, "step": 1400 }, { "epoch": 0.01712307898553374, "grad_norm": 2.5467654830818685, "learning_rate": 1.7109432122836949e-06, "loss": 0.9139, "step": 1405 }, { "epoch": 0.017184015209681547, "grad_norm": 3.493236785151873, "learning_rate": 1.7170363148915429e-06, "loss": 0.968, "step": 1410 }, { "epoch": 0.017244951433829356, "grad_norm": 3.759341958039462, "learning_rate": 1.7231294174993909e-06, "loss": 0.9889, "step": 1415 }, { "epoch": 0.01730588765797716, "grad_norm": 3.658955713251779, "learning_rate": 1.7292225201072389e-06, "loss": 0.888, "step": 1420 }, { "epoch": 0.017366823882124967, "grad_norm": 3.1341749479180225, "learning_rate": 1.7353156227150867e-06, "loss": 0.8886, "step": 1425 }, { "epoch": 0.017427760106272776, "grad_norm": 3.0467595554772116, "learning_rate": 1.7414087253229344e-06, "loss": 0.9127, "step": 1430 }, { "epoch": 0.01748869633042058, "grad_norm": 2.8625105519301073, "learning_rate": 1.7475018279307826e-06, "loss": 0.8601, "step": 1435 }, { "epoch": 0.01754963255456839, "grad_norm": 3.1406143450827755, "learning_rate": 1.7535949305386304e-06, "loss": 1.0131, "step": 1440 }, { "epoch": 0.017610568778716196, "grad_norm": 4.434599200983635, "learning_rate": 1.7596880331464782e-06, "loss": 0.8846, "step": 1445 }, { "epoch": 0.017671505002864, "grad_norm": 2.7786280453957986, "learning_rate": 1.7657811357543262e-06, "loss": 0.9147, "step": 1450 }, { "epoch": 0.01773244122701181, "grad_norm": 2.685635871659692, "learning_rate": 1.7718742383621742e-06, "loss": 1.0013, "step": 1455 }, { "epoch": 0.017793377451159616, "grad_norm": 2.9504753891145876, "learning_rate": 1.7779673409700222e-06, "loss": 0.9506, "step": 1460 }, { "epoch": 0.017854313675307425, "grad_norm": 2.930383443702689, "learning_rate": 1.78406044357787e-06, "loss": 0.9767, "step": 1465 }, { "epoch": 0.01791524989945523, "grad_norm": 3.1904701804629716, "learning_rate": 1.7901535461857178e-06, "loss": 1.0079, "step": 1470 }, { "epoch": 0.017976186123603036, "grad_norm": 3.2337043051816643, "learning_rate": 1.796246648793566e-06, "loss": 0.9424, "step": 1475 }, { "epoch": 0.018037122347750845, "grad_norm": 2.8194404774291955, "learning_rate": 1.8023397514014138e-06, "loss": 0.9607, "step": 1480 }, { "epoch": 0.01809805857189865, "grad_norm": 3.6839034326322753, "learning_rate": 1.8084328540092615e-06, "loss": 0.9192, "step": 1485 }, { "epoch": 0.01815899479604646, "grad_norm": 3.787337506539989, "learning_rate": 1.8145259566171095e-06, "loss": 0.8994, "step": 1490 }, { "epoch": 0.018219931020194265, "grad_norm": 4.590700380896843, "learning_rate": 1.8206190592249575e-06, "loss": 1.0125, "step": 1495 }, { "epoch": 0.01828086724434207, "grad_norm": 2.860773714868024, "learning_rate": 1.8267121618328053e-06, "loss": 0.8983, "step": 1500 }, { "epoch": 0.01834180346848988, "grad_norm": 2.7789873336817856, "learning_rate": 1.8328052644406533e-06, "loss": 0.9267, "step": 1505 }, { "epoch": 0.018402739692637685, "grad_norm": 3.238873425635919, "learning_rate": 1.838898367048501e-06, "loss": 0.9347, "step": 1510 }, { "epoch": 0.018463675916785494, "grad_norm": 3.2396745210176405, "learning_rate": 1.8449914696563493e-06, "loss": 0.9808, "step": 1515 }, { "epoch": 0.0185246121409333, "grad_norm": 3.2352959237804466, "learning_rate": 1.851084572264197e-06, "loss": 0.9013, "step": 1520 }, { "epoch": 0.018585548365081105, "grad_norm": 2.7971025783946613, "learning_rate": 1.8571776748720449e-06, "loss": 0.8233, "step": 1525 }, { "epoch": 0.018646484589228914, "grad_norm": 5.33401897734639, "learning_rate": 1.863270777479893e-06, "loss": 0.9451, "step": 1530 }, { "epoch": 0.01870742081337672, "grad_norm": 3.1163890785754442, "learning_rate": 1.8693638800877409e-06, "loss": 0.8666, "step": 1535 }, { "epoch": 0.018768357037524528, "grad_norm": 2.6228858781250683, "learning_rate": 1.8754569826955886e-06, "loss": 0.9459, "step": 1540 }, { "epoch": 0.018829293261672334, "grad_norm": 3.1676688460874396, "learning_rate": 1.8815500853034366e-06, "loss": 0.9365, "step": 1545 }, { "epoch": 0.01889022948582014, "grad_norm": 2.5443464298443788, "learning_rate": 1.8876431879112846e-06, "loss": 0.9576, "step": 1550 }, { "epoch": 0.018951165709967948, "grad_norm": 2.927217193820488, "learning_rate": 1.8937362905191326e-06, "loss": 0.9106, "step": 1555 }, { "epoch": 0.019012101934115753, "grad_norm": 4.869214646727002, "learning_rate": 1.8998293931269804e-06, "loss": 0.8326, "step": 1560 }, { "epoch": 0.019073038158263562, "grad_norm": 2.97085496709296, "learning_rate": 1.9059224957348282e-06, "loss": 0.8892, "step": 1565 }, { "epoch": 0.019133974382411368, "grad_norm": 3.162913595908134, "learning_rate": 1.912015598342676e-06, "loss": 0.9596, "step": 1570 }, { "epoch": 0.019194910606559173, "grad_norm": 2.4022117136485734, "learning_rate": 1.9181087009505244e-06, "loss": 0.9588, "step": 1575 }, { "epoch": 0.019255846830706982, "grad_norm": 2.5657298454111297, "learning_rate": 1.924201803558372e-06, "loss": 1.0038, "step": 1580 }, { "epoch": 0.019316783054854788, "grad_norm": 3.0135622820520838, "learning_rate": 1.93029490616622e-06, "loss": 0.954, "step": 1585 }, { "epoch": 0.019377719279002597, "grad_norm": 3.002032961934701, "learning_rate": 1.936388008774068e-06, "loss": 0.9392, "step": 1590 }, { "epoch": 0.019438655503150402, "grad_norm": 3.462998122851637, "learning_rate": 1.942481111381916e-06, "loss": 0.8904, "step": 1595 }, { "epoch": 0.01949959172729821, "grad_norm": 3.008984298890071, "learning_rate": 1.9485742139897637e-06, "loss": 0.8911, "step": 1600 }, { "epoch": 0.019560527951446017, "grad_norm": 3.3611471098305628, "learning_rate": 1.9546673165976115e-06, "loss": 0.9734, "step": 1605 }, { "epoch": 0.019621464175593822, "grad_norm": 3.490110191484208, "learning_rate": 1.9607604192054597e-06, "loss": 0.8876, "step": 1610 }, { "epoch": 0.01968240039974163, "grad_norm": 2.6886422766433857, "learning_rate": 1.9668535218133075e-06, "loss": 0.9571, "step": 1615 }, { "epoch": 0.019743336623889437, "grad_norm": 2.8358945512076943, "learning_rate": 1.9729466244211553e-06, "loss": 0.9082, "step": 1620 }, { "epoch": 0.019804272848037246, "grad_norm": 3.5026283979351835, "learning_rate": 1.979039727029003e-06, "loss": 0.9364, "step": 1625 }, { "epoch": 0.01986520907218505, "grad_norm": 3.124898390876078, "learning_rate": 1.9851328296368513e-06, "loss": 0.9432, "step": 1630 }, { "epoch": 0.019926145296332857, "grad_norm": 2.735102443032615, "learning_rate": 1.991225932244699e-06, "loss": 0.9349, "step": 1635 }, { "epoch": 0.019987081520480666, "grad_norm": 3.4248613392974425, "learning_rate": 1.997319034852547e-06, "loss": 0.8827, "step": 1640 }, { "epoch": 0.02004801774462847, "grad_norm": 2.8796464051879966, "learning_rate": 2.003412137460395e-06, "loss": 0.8716, "step": 1645 }, { "epoch": 0.02010895396877628, "grad_norm": 3.5760468800048364, "learning_rate": 2.009505240068243e-06, "loss": 0.9309, "step": 1650 }, { "epoch": 0.020169890192924086, "grad_norm": 2.4904092342921365, "learning_rate": 2.0155983426760906e-06, "loss": 0.8161, "step": 1655 }, { "epoch": 0.02023082641707189, "grad_norm": 2.8241858249782132, "learning_rate": 2.021691445283939e-06, "loss": 0.9531, "step": 1660 }, { "epoch": 0.0202917626412197, "grad_norm": 3.1883369160964867, "learning_rate": 2.0277845478917866e-06, "loss": 0.902, "step": 1665 }, { "epoch": 0.020352698865367506, "grad_norm": 3.0003991756404096, "learning_rate": 2.033877650499635e-06, "loss": 0.9107, "step": 1670 }, { "epoch": 0.020413635089515315, "grad_norm": 2.832565811649498, "learning_rate": 2.0399707531074826e-06, "loss": 0.8477, "step": 1675 }, { "epoch": 0.02047457131366312, "grad_norm": 2.963393971059901, "learning_rate": 2.0460638557153304e-06, "loss": 0.9263, "step": 1680 }, { "epoch": 0.020535507537810926, "grad_norm": 2.6295245176584756, "learning_rate": 2.0521569583231786e-06, "loss": 0.8781, "step": 1685 }, { "epoch": 0.020596443761958735, "grad_norm": 4.519610771038712, "learning_rate": 2.0582500609310264e-06, "loss": 0.811, "step": 1690 }, { "epoch": 0.02065737998610654, "grad_norm": 2.9276457053112566, "learning_rate": 2.064343163538874e-06, "loss": 0.9326, "step": 1695 }, { "epoch": 0.02071831621025435, "grad_norm": 4.639025039955979, "learning_rate": 2.070436266146722e-06, "loss": 0.8967, "step": 1700 }, { "epoch": 0.020779252434402155, "grad_norm": 2.525941255377196, "learning_rate": 2.07652936875457e-06, "loss": 0.8903, "step": 1705 }, { "epoch": 0.02084018865854996, "grad_norm": 3.449539787251327, "learning_rate": 2.082622471362418e-06, "loss": 0.9213, "step": 1710 }, { "epoch": 0.02090112488269777, "grad_norm": 3.242273537907253, "learning_rate": 2.0887155739702657e-06, "loss": 0.8959, "step": 1715 }, { "epoch": 0.020962061106845575, "grad_norm": 2.625426193306078, "learning_rate": 2.0948086765781135e-06, "loss": 0.9304, "step": 1720 }, { "epoch": 0.021022997330993384, "grad_norm": 3.0512611273106347, "learning_rate": 2.1009017791859617e-06, "loss": 0.939, "step": 1725 }, { "epoch": 0.02108393355514119, "grad_norm": 2.6692172593576857, "learning_rate": 2.1069948817938095e-06, "loss": 0.8365, "step": 1730 }, { "epoch": 0.021144869779288995, "grad_norm": 3.177135481818799, "learning_rate": 2.1130879844016573e-06, "loss": 0.8932, "step": 1735 }, { "epoch": 0.021205806003436804, "grad_norm": 2.8203858968447815, "learning_rate": 2.1191810870095055e-06, "loss": 0.8866, "step": 1740 }, { "epoch": 0.02126674222758461, "grad_norm": 2.866141671990887, "learning_rate": 2.1252741896173533e-06, "loss": 0.8672, "step": 1745 }, { "epoch": 0.021327678451732418, "grad_norm": 3.5380449535899268, "learning_rate": 2.1313672922252015e-06, "loss": 0.8691, "step": 1750 }, { "epoch": 0.021388614675880224, "grad_norm": 3.7711457698542126, "learning_rate": 2.1374603948330493e-06, "loss": 0.9371, "step": 1755 }, { "epoch": 0.02144955090002803, "grad_norm": 3.14320562927681, "learning_rate": 2.143553497440897e-06, "loss": 0.9575, "step": 1760 }, { "epoch": 0.021510487124175838, "grad_norm": 3.259132134644347, "learning_rate": 2.1496466000487452e-06, "loss": 0.9435, "step": 1765 }, { "epoch": 0.021571423348323644, "grad_norm": 2.683636826740316, "learning_rate": 2.155739702656593e-06, "loss": 0.9147, "step": 1770 }, { "epoch": 0.021632359572471452, "grad_norm": 3.0743522701032124, "learning_rate": 2.161832805264441e-06, "loss": 0.9109, "step": 1775 }, { "epoch": 0.021693295796619258, "grad_norm": 2.8152089851266813, "learning_rate": 2.1679259078722886e-06, "loss": 0.9419, "step": 1780 }, { "epoch": 0.021754232020767063, "grad_norm": 2.956073272729089, "learning_rate": 2.174019010480137e-06, "loss": 0.8615, "step": 1785 }, { "epoch": 0.021815168244914872, "grad_norm": 3.3769677462947953, "learning_rate": 2.1801121130879846e-06, "loss": 0.9606, "step": 1790 }, { "epoch": 0.021876104469062678, "grad_norm": 3.3203997891847954, "learning_rate": 2.1862052156958324e-06, "loss": 1.0035, "step": 1795 }, { "epoch": 0.021937040693210487, "grad_norm": 3.3066407797568584, "learning_rate": 2.19229831830368e-06, "loss": 0.9608, "step": 1800 }, { "epoch": 0.021997976917358292, "grad_norm": 3.1278529462880607, "learning_rate": 2.1983914209115284e-06, "loss": 0.8809, "step": 1805 }, { "epoch": 0.0220589131415061, "grad_norm": 2.9113833813689447, "learning_rate": 2.204484523519376e-06, "loss": 0.9305, "step": 1810 }, { "epoch": 0.022119849365653907, "grad_norm": 2.5671261385476627, "learning_rate": 2.210577626127224e-06, "loss": 0.8759, "step": 1815 }, { "epoch": 0.022180785589801712, "grad_norm": 4.1211504339224145, "learning_rate": 2.216670728735072e-06, "loss": 0.9442, "step": 1820 }, { "epoch": 0.02224172181394952, "grad_norm": 2.8677317252572134, "learning_rate": 2.22276383134292e-06, "loss": 0.9675, "step": 1825 }, { "epoch": 0.022302658038097327, "grad_norm": 7.670611990351148, "learning_rate": 2.2288569339507677e-06, "loss": 0.8922, "step": 1830 }, { "epoch": 0.022363594262245136, "grad_norm": 2.902397128003386, "learning_rate": 2.234950036558616e-06, "loss": 0.8873, "step": 1835 }, { "epoch": 0.02242453048639294, "grad_norm": 2.860112008549231, "learning_rate": 2.2410431391664637e-06, "loss": 1.0161, "step": 1840 }, { "epoch": 0.022485466710540747, "grad_norm": 2.658237975041591, "learning_rate": 2.247136241774312e-06, "loss": 0.9202, "step": 1845 }, { "epoch": 0.022546402934688556, "grad_norm": 4.035948733811586, "learning_rate": 2.2532293443821597e-06, "loss": 0.8695, "step": 1850 }, { "epoch": 0.02260733915883636, "grad_norm": 2.7214743697476527, "learning_rate": 2.2593224469900075e-06, "loss": 0.9017, "step": 1855 }, { "epoch": 0.02266827538298417, "grad_norm": 3.2165212607654965, "learning_rate": 2.2654155495978557e-06, "loss": 0.8807, "step": 1860 }, { "epoch": 0.022729211607131976, "grad_norm": 2.868060547920833, "learning_rate": 2.2715086522057035e-06, "loss": 0.8952, "step": 1865 }, { "epoch": 0.02279014783127978, "grad_norm": 2.7888750166233782, "learning_rate": 2.2776017548135512e-06, "loss": 0.9499, "step": 1870 }, { "epoch": 0.02285108405542759, "grad_norm": 5.5599202188623265, "learning_rate": 2.283694857421399e-06, "loss": 0.8712, "step": 1875 }, { "epoch": 0.022912020279575396, "grad_norm": 3.175453999857402, "learning_rate": 2.2897879600292472e-06, "loss": 0.9245, "step": 1880 }, { "epoch": 0.022972956503723205, "grad_norm": 4.8373276387771815, "learning_rate": 2.295881062637095e-06, "loss": 0.9562, "step": 1885 }, { "epoch": 0.02303389272787101, "grad_norm": 2.9621635733913148, "learning_rate": 2.301974165244943e-06, "loss": 0.9434, "step": 1890 }, { "epoch": 0.023094828952018816, "grad_norm": 2.7520355103804692, "learning_rate": 2.3080672678527906e-06, "loss": 0.9063, "step": 1895 }, { "epoch": 0.023155765176166625, "grad_norm": 2.8578846000714773, "learning_rate": 2.3141603704606388e-06, "loss": 0.9014, "step": 1900 }, { "epoch": 0.02321670140031443, "grad_norm": 3.1685241202782737, "learning_rate": 2.3202534730684866e-06, "loss": 0.9775, "step": 1905 }, { "epoch": 0.02327763762446224, "grad_norm": 2.975617357813839, "learning_rate": 2.3263465756763344e-06, "loss": 0.8516, "step": 1910 }, { "epoch": 0.023338573848610045, "grad_norm": 3.020777533677601, "learning_rate": 2.3324396782841826e-06, "loss": 0.8991, "step": 1915 }, { "epoch": 0.02339951007275785, "grad_norm": 3.4253173494152236, "learning_rate": 2.3385327808920303e-06, "loss": 0.9709, "step": 1920 }, { "epoch": 0.02346044629690566, "grad_norm": 3.2247612797837797, "learning_rate": 2.3446258834998786e-06, "loss": 0.8875, "step": 1925 }, { "epoch": 0.023521382521053465, "grad_norm": 2.7768542807531227, "learning_rate": 2.3507189861077263e-06, "loss": 0.9761, "step": 1930 }, { "epoch": 0.023582318745201274, "grad_norm": 2.9746473490964465, "learning_rate": 2.356812088715574e-06, "loss": 0.8943, "step": 1935 }, { "epoch": 0.02364325496934908, "grad_norm": 3.093339040995718, "learning_rate": 2.3629051913234223e-06, "loss": 0.8521, "step": 1940 }, { "epoch": 0.023704191193496885, "grad_norm": 3.626978203052832, "learning_rate": 2.36899829393127e-06, "loss": 0.8452, "step": 1945 }, { "epoch": 0.023765127417644694, "grad_norm": 2.9804738330360525, "learning_rate": 2.375091396539118e-06, "loss": 0.9232, "step": 1950 }, { "epoch": 0.0238260636417925, "grad_norm": 2.452175936788015, "learning_rate": 2.3811844991469657e-06, "loss": 0.8782, "step": 1955 }, { "epoch": 0.023886999865940308, "grad_norm": 3.231864854126322, "learning_rate": 2.387277601754814e-06, "loss": 0.8486, "step": 1960 }, { "epoch": 0.023947936090088114, "grad_norm": 3.171001510870015, "learning_rate": 2.3933707043626617e-06, "loss": 0.944, "step": 1965 }, { "epoch": 0.02400887231423592, "grad_norm": 2.557585803713419, "learning_rate": 2.3994638069705094e-06, "loss": 0.8622, "step": 1970 }, { "epoch": 0.024069808538383728, "grad_norm": 3.1311567361935064, "learning_rate": 2.4055569095783572e-06, "loss": 0.8178, "step": 1975 }, { "epoch": 0.024130744762531534, "grad_norm": 3.2917811127014227, "learning_rate": 2.4116500121862054e-06, "loss": 0.9248, "step": 1980 }, { "epoch": 0.024191680986679343, "grad_norm": 3.3428694974596285, "learning_rate": 2.4177431147940532e-06, "loss": 0.9114, "step": 1985 }, { "epoch": 0.024252617210827148, "grad_norm": 2.6402831269814877, "learning_rate": 2.423836217401901e-06, "loss": 0.8943, "step": 1990 }, { "epoch": 0.024313553434974954, "grad_norm": 3.89912089668144, "learning_rate": 2.4299293200097492e-06, "loss": 0.9659, "step": 1995 }, { "epoch": 0.024374489659122762, "grad_norm": 2.7053970945669, "learning_rate": 2.436022422617597e-06, "loss": 0.8581, "step": 2000 }, { "epoch": 0.024435425883270568, "grad_norm": 3.4171552463684196, "learning_rate": 2.4421155252254448e-06, "loss": 0.8501, "step": 2005 }, { "epoch": 0.024496362107418377, "grad_norm": 2.4571568871328253, "learning_rate": 2.448208627833293e-06, "loss": 0.9646, "step": 2010 }, { "epoch": 0.024557298331566182, "grad_norm": 4.4136065605895345, "learning_rate": 2.4543017304411408e-06, "loss": 0.8882, "step": 2015 }, { "epoch": 0.02461823455571399, "grad_norm": 2.826011485162239, "learning_rate": 2.460394833048989e-06, "loss": 0.8863, "step": 2020 }, { "epoch": 0.024679170779861797, "grad_norm": 2.8913637637177154, "learning_rate": 2.4664879356568368e-06, "loss": 0.8742, "step": 2025 }, { "epoch": 0.024740107004009602, "grad_norm": 3.2107183466478504, "learning_rate": 2.4725810382646845e-06, "loss": 0.9477, "step": 2030 }, { "epoch": 0.02480104322815741, "grad_norm": 3.4183484784058367, "learning_rate": 2.4786741408725328e-06, "loss": 0.8794, "step": 2035 }, { "epoch": 0.024861979452305217, "grad_norm": 2.3853345811891393, "learning_rate": 2.4847672434803805e-06, "loss": 0.911, "step": 2040 }, { "epoch": 0.024922915676453026, "grad_norm": 2.8481672626742993, "learning_rate": 2.4908603460882283e-06, "loss": 0.9193, "step": 2045 }, { "epoch": 0.02498385190060083, "grad_norm": 3.3499801162690623, "learning_rate": 2.496953448696076e-06, "loss": 0.881, "step": 2050 }, { "epoch": 0.025044788124748637, "grad_norm": 3.0876198988807904, "learning_rate": 2.5030465513039243e-06, "loss": 0.8787, "step": 2055 }, { "epoch": 0.025105724348896446, "grad_norm": 2.473085473020387, "learning_rate": 2.5091396539117717e-06, "loss": 0.8974, "step": 2060 }, { "epoch": 0.02516666057304425, "grad_norm": 3.2196817164978646, "learning_rate": 2.51523275651962e-06, "loss": 0.8498, "step": 2065 }, { "epoch": 0.02522759679719206, "grad_norm": 4.088997055357136, "learning_rate": 2.521325859127468e-06, "loss": 0.8888, "step": 2070 }, { "epoch": 0.025288533021339866, "grad_norm": 3.0631670512719866, "learning_rate": 2.5274189617353154e-06, "loss": 0.8049, "step": 2075 }, { "epoch": 0.02534946924548767, "grad_norm": 2.7808609324303317, "learning_rate": 2.5335120643431636e-06, "loss": 1.0798, "step": 2080 }, { "epoch": 0.02541040546963548, "grad_norm": 3.2051606388480756, "learning_rate": 2.539605166951012e-06, "loss": 0.9331, "step": 2085 }, { "epoch": 0.025471341693783286, "grad_norm": 3.45872387314215, "learning_rate": 2.5456982695588596e-06, "loss": 0.943, "step": 2090 }, { "epoch": 0.025532277917931095, "grad_norm": 2.9906279274548195, "learning_rate": 2.5517913721667074e-06, "loss": 0.8393, "step": 2095 }, { "epoch": 0.0255932141420789, "grad_norm": 3.1585900413154837, "learning_rate": 2.557884474774555e-06, "loss": 0.853, "step": 2100 }, { "epoch": 0.025654150366226706, "grad_norm": 3.6622596684143356, "learning_rate": 2.5639775773824034e-06, "loss": 0.9125, "step": 2105 }, { "epoch": 0.025715086590374515, "grad_norm": 2.7874381927253453, "learning_rate": 2.570070679990251e-06, "loss": 0.8138, "step": 2110 }, { "epoch": 0.02577602281452232, "grad_norm": 2.8930971747683336, "learning_rate": 2.576163782598099e-06, "loss": 0.8661, "step": 2115 }, { "epoch": 0.02583695903867013, "grad_norm": 2.6958162684103963, "learning_rate": 2.582256885205947e-06, "loss": 0.911, "step": 2120 }, { "epoch": 0.025897895262817935, "grad_norm": 2.6022701340840966, "learning_rate": 2.5883499878137954e-06, "loss": 0.9105, "step": 2125 }, { "epoch": 0.02595883148696574, "grad_norm": 2.6466083875408293, "learning_rate": 2.5944430904216428e-06, "loss": 0.8397, "step": 2130 }, { "epoch": 0.02601976771111355, "grad_norm": 2.953382723933664, "learning_rate": 2.600536193029491e-06, "loss": 0.8989, "step": 2135 }, { "epoch": 0.026080703935261355, "grad_norm": 3.4571868650352333, "learning_rate": 2.606629295637339e-06, "loss": 0.9222, "step": 2140 }, { "epoch": 0.026141640159409164, "grad_norm": 2.852872158143792, "learning_rate": 2.6127223982451865e-06, "loss": 0.9728, "step": 2145 }, { "epoch": 0.02620257638355697, "grad_norm": 3.121359075506037, "learning_rate": 2.6188155008530347e-06, "loss": 0.8887, "step": 2150 }, { "epoch": 0.026263512607704775, "grad_norm": 4.855028205158237, "learning_rate": 2.624908603460882e-06, "loss": 0.8708, "step": 2155 }, { "epoch": 0.026324448831852584, "grad_norm": 2.4699828528578998, "learning_rate": 2.6310017060687303e-06, "loss": 0.9098, "step": 2160 }, { "epoch": 0.02638538505600039, "grad_norm": 2.6546332458114725, "learning_rate": 2.6370948086765785e-06, "loss": 0.8846, "step": 2165 }, { "epoch": 0.026446321280148198, "grad_norm": 3.182516328043099, "learning_rate": 2.6431879112844263e-06, "loss": 0.9347, "step": 2170 }, { "epoch": 0.026507257504296004, "grad_norm": 3.4905404619878024, "learning_rate": 2.649281013892274e-06, "loss": 0.9495, "step": 2175 }, { "epoch": 0.02656819372844381, "grad_norm": 2.5891165200367214, "learning_rate": 2.6553741165001223e-06, "loss": 0.8927, "step": 2180 }, { "epoch": 0.026629129952591618, "grad_norm": 2.9406848679694955, "learning_rate": 2.66146721910797e-06, "loss": 0.9293, "step": 2185 }, { "epoch": 0.026690066176739424, "grad_norm": 3.8109643156690143, "learning_rate": 2.667560321715818e-06, "loss": 0.8557, "step": 2190 }, { "epoch": 0.026751002400887233, "grad_norm": 2.994052855719372, "learning_rate": 2.6736534243236656e-06, "loss": 0.8555, "step": 2195 }, { "epoch": 0.026811938625035038, "grad_norm": 3.0720738757426878, "learning_rate": 2.679746526931514e-06, "loss": 0.9246, "step": 2200 }, { "epoch": 0.026872874849182844, "grad_norm": 2.9180086761012456, "learning_rate": 2.685839629539362e-06, "loss": 0.8752, "step": 2205 }, { "epoch": 0.026933811073330653, "grad_norm": 2.8415676185661995, "learning_rate": 2.6919327321472094e-06, "loss": 0.8743, "step": 2210 }, { "epoch": 0.026994747297478458, "grad_norm": 3.9586063465264565, "learning_rate": 2.6980258347550576e-06, "loss": 0.9134, "step": 2215 }, { "epoch": 0.027055683521626267, "grad_norm": 3.0155858379117717, "learning_rate": 2.704118937362906e-06, "loss": 0.9354, "step": 2220 }, { "epoch": 0.027116619745774072, "grad_norm": 2.5888497396180576, "learning_rate": 2.710212039970753e-06, "loss": 0.9271, "step": 2225 }, { "epoch": 0.02717755596992188, "grad_norm": 2.7566754707589824, "learning_rate": 2.7163051425786014e-06, "loss": 0.9335, "step": 2230 }, { "epoch": 0.027238492194069687, "grad_norm": 2.7310562640196787, "learning_rate": 2.7223982451864487e-06, "loss": 0.9045, "step": 2235 }, { "epoch": 0.027299428418217492, "grad_norm": 3.643233040772188, "learning_rate": 2.728491347794297e-06, "loss": 0.8332, "step": 2240 }, { "epoch": 0.0273603646423653, "grad_norm": 3.0190868747746307, "learning_rate": 2.734584450402145e-06, "loss": 0.8302, "step": 2245 }, { "epoch": 0.027421300866513107, "grad_norm": 6.252617397052888, "learning_rate": 2.7406775530099925e-06, "loss": 0.9074, "step": 2250 }, { "epoch": 0.027482237090660916, "grad_norm": 3.603934834824472, "learning_rate": 2.7467706556178407e-06, "loss": 0.868, "step": 2255 }, { "epoch": 0.02754317331480872, "grad_norm": 2.4170505282002552, "learning_rate": 2.752863758225689e-06, "loss": 0.9037, "step": 2260 }, { "epoch": 0.027604109538956527, "grad_norm": 2.9778788920985955, "learning_rate": 2.7589568608335367e-06, "loss": 0.9792, "step": 2265 }, { "epoch": 0.027665045763104336, "grad_norm": 2.8150049896867166, "learning_rate": 2.7650499634413845e-06, "loss": 0.9248, "step": 2270 }, { "epoch": 0.02772598198725214, "grad_norm": 4.022844310353356, "learning_rate": 2.7711430660492323e-06, "loss": 0.8519, "step": 2275 }, { "epoch": 0.02778691821139995, "grad_norm": 3.1676269365862857, "learning_rate": 2.7772361686570805e-06, "loss": 0.9033, "step": 2280 }, { "epoch": 0.027847854435547756, "grad_norm": 2.8729806872152706, "learning_rate": 2.7833292712649283e-06, "loss": 0.8489, "step": 2285 }, { "epoch": 0.02790879065969556, "grad_norm": 3.569376515759466, "learning_rate": 2.789422373872776e-06, "loss": 0.8662, "step": 2290 }, { "epoch": 0.02796972688384337, "grad_norm": 2.541901779193836, "learning_rate": 2.7955154764806243e-06, "loss": 0.8717, "step": 2295 }, { "epoch": 0.028030663107991176, "grad_norm": 2.9825087834823165, "learning_rate": 2.8016085790884725e-06, "loss": 0.8679, "step": 2300 }, { "epoch": 0.028091599332138985, "grad_norm": 2.9294014155331465, "learning_rate": 2.80770168169632e-06, "loss": 0.8284, "step": 2305 }, { "epoch": 0.02815253555628679, "grad_norm": 2.8294799447177836, "learning_rate": 2.813794784304168e-06, "loss": 0.8261, "step": 2310 }, { "epoch": 0.028213471780434596, "grad_norm": 3.0795368771013636, "learning_rate": 2.8198878869120162e-06, "loss": 0.8481, "step": 2315 }, { "epoch": 0.028274408004582405, "grad_norm": 4.015085383025379, "learning_rate": 2.8259809895198636e-06, "loss": 0.9197, "step": 2320 }, { "epoch": 0.02833534422873021, "grad_norm": 3.022550656576845, "learning_rate": 2.832074092127712e-06, "loss": 0.8528, "step": 2325 }, { "epoch": 0.02839628045287802, "grad_norm": 3.3963913769382903, "learning_rate": 2.838167194735559e-06, "loss": 0.9327, "step": 2330 }, { "epoch": 0.028457216677025825, "grad_norm": 3.5386051529779294, "learning_rate": 2.8442602973434074e-06, "loss": 1.0279, "step": 2335 }, { "epoch": 0.02851815290117363, "grad_norm": 2.8276352779963436, "learning_rate": 2.8503533999512556e-06, "loss": 0.8874, "step": 2340 }, { "epoch": 0.02857908912532144, "grad_norm": 3.4343725699834926, "learning_rate": 2.8564465025591034e-06, "loss": 0.9394, "step": 2345 }, { "epoch": 0.028640025349469245, "grad_norm": 2.7061310526625793, "learning_rate": 2.862539605166951e-06, "loss": 0.8867, "step": 2350 }, { "epoch": 0.028700961573617054, "grad_norm": 2.9858954346640445, "learning_rate": 2.8686327077747994e-06, "loss": 0.8923, "step": 2355 }, { "epoch": 0.02876189779776486, "grad_norm": 3.6749996217761014, "learning_rate": 2.874725810382647e-06, "loss": 0.9391, "step": 2360 }, { "epoch": 0.028822834021912665, "grad_norm": 3.0318453735695123, "learning_rate": 2.880818912990495e-06, "loss": 0.8953, "step": 2365 }, { "epoch": 0.028883770246060474, "grad_norm": 4.220639145950532, "learning_rate": 2.8869120155983427e-06, "loss": 0.9335, "step": 2370 }, { "epoch": 0.02894470647020828, "grad_norm": 3.9579970421276793, "learning_rate": 2.893005118206191e-06, "loss": 0.8268, "step": 2375 }, { "epoch": 0.029005642694356088, "grad_norm": 3.695412824887408, "learning_rate": 2.8990982208140387e-06, "loss": 0.8859, "step": 2380 }, { "epoch": 0.029066578918503894, "grad_norm": 2.713667119667507, "learning_rate": 2.9051913234218865e-06, "loss": 0.8684, "step": 2385 }, { "epoch": 0.0291275151426517, "grad_norm": 2.611128405939939, "learning_rate": 2.9112844260297347e-06, "loss": 0.9392, "step": 2390 }, { "epoch": 0.029188451366799508, "grad_norm": 2.8679930024395146, "learning_rate": 2.917377528637583e-06, "loss": 0.8996, "step": 2395 }, { "epoch": 0.029249387590947314, "grad_norm": 2.7824082240382464, "learning_rate": 2.9234706312454303e-06, "loss": 0.9087, "step": 2400 }, { "epoch": 0.029310323815095123, "grad_norm": 2.6484047433388285, "learning_rate": 2.9295637338532785e-06, "loss": 0.8464, "step": 2405 }, { "epoch": 0.029371260039242928, "grad_norm": 2.647759766369723, "learning_rate": 2.935656836461126e-06, "loss": 0.9183, "step": 2410 }, { "epoch": 0.029432196263390734, "grad_norm": 4.297942528338698, "learning_rate": 2.941749939068974e-06, "loss": 0.8679, "step": 2415 }, { "epoch": 0.029493132487538543, "grad_norm": 2.467262588281947, "learning_rate": 2.9478430416768222e-06, "loss": 0.8757, "step": 2420 }, { "epoch": 0.029554068711686348, "grad_norm": 2.9435190284900368, "learning_rate": 2.9539361442846696e-06, "loss": 0.8913, "step": 2425 }, { "epoch": 0.029615004935834157, "grad_norm": 2.9695802375302653, "learning_rate": 2.960029246892518e-06, "loss": 0.9559, "step": 2430 }, { "epoch": 0.029675941159981963, "grad_norm": 3.162334429183133, "learning_rate": 2.966122349500366e-06, "loss": 0.8617, "step": 2435 }, { "epoch": 0.02973687738412977, "grad_norm": 2.849089104812545, "learning_rate": 2.972215452108214e-06, "loss": 0.8513, "step": 2440 }, { "epoch": 0.029797813608277577, "grad_norm": 2.570734078469627, "learning_rate": 2.9783085547160616e-06, "loss": 0.8753, "step": 2445 }, { "epoch": 0.029858749832425382, "grad_norm": 3.302601255970864, "learning_rate": 2.9844016573239098e-06, "loss": 0.869, "step": 2450 }, { "epoch": 0.02991968605657319, "grad_norm": 2.521287717638087, "learning_rate": 2.9904947599317576e-06, "loss": 0.9234, "step": 2455 }, { "epoch": 0.029980622280720997, "grad_norm": 2.9890876787505163, "learning_rate": 2.9965878625396054e-06, "loss": 0.8826, "step": 2460 }, { "epoch": 0.030041558504868806, "grad_norm": 3.748559224197828, "learning_rate": 3.002680965147453e-06, "loss": 0.9718, "step": 2465 }, { "epoch": 0.03010249472901661, "grad_norm": 3.520094122992771, "learning_rate": 3.0087740677553013e-06, "loss": 0.9142, "step": 2470 }, { "epoch": 0.030163430953164417, "grad_norm": 2.5774895197370586, "learning_rate": 3.0148671703631495e-06, "loss": 0.8961, "step": 2475 }, { "epoch": 0.030224367177312226, "grad_norm": 2.8153944231591095, "learning_rate": 3.020960272970997e-06, "loss": 0.8922, "step": 2480 }, { "epoch": 0.03028530340146003, "grad_norm": 4.451010057446654, "learning_rate": 3.027053375578845e-06, "loss": 0.9029, "step": 2485 }, { "epoch": 0.03034623962560784, "grad_norm": 3.212730359190259, "learning_rate": 3.0331464781866933e-06, "loss": 0.9037, "step": 2490 }, { "epoch": 0.030407175849755646, "grad_norm": 3.7238309074221494, "learning_rate": 3.0392395807945407e-06, "loss": 0.8905, "step": 2495 }, { "epoch": 0.03046811207390345, "grad_norm": 2.782577543985592, "learning_rate": 3.045332683402389e-06, "loss": 0.8977, "step": 2500 }, { "epoch": 0.03052904829805126, "grad_norm": 3.5804342543149583, "learning_rate": 3.0514257860102362e-06, "loss": 0.864, "step": 2505 }, { "epoch": 0.030589984522199066, "grad_norm": 2.69327528875375, "learning_rate": 3.0575188886180845e-06, "loss": 0.8927, "step": 2510 }, { "epoch": 0.030650920746346875, "grad_norm": 3.2017886039371266, "learning_rate": 3.0636119912259327e-06, "loss": 0.9222, "step": 2515 }, { "epoch": 0.03071185697049468, "grad_norm": 3.2374838907571757, "learning_rate": 3.0697050938337804e-06, "loss": 0.958, "step": 2520 }, { "epoch": 0.030772793194642486, "grad_norm": 3.781902368380982, "learning_rate": 3.0757981964416282e-06, "loss": 0.8857, "step": 2525 }, { "epoch": 0.030833729418790295, "grad_norm": 2.661836518700419, "learning_rate": 3.0818912990494764e-06, "loss": 0.9812, "step": 2530 }, { "epoch": 0.0308946656429381, "grad_norm": 3.309649096386536, "learning_rate": 3.0879844016573242e-06, "loss": 1.013, "step": 2535 }, { "epoch": 0.03095560186708591, "grad_norm": 3.149592248640534, "learning_rate": 3.094077504265172e-06, "loss": 0.8876, "step": 2540 }, { "epoch": 0.031016538091233715, "grad_norm": 3.4139339934985227, "learning_rate": 3.1001706068730198e-06, "loss": 0.8645, "step": 2545 }, { "epoch": 0.03107747431538152, "grad_norm": 2.8935354059695215, "learning_rate": 3.106263709480868e-06, "loss": 0.8418, "step": 2550 }, { "epoch": 0.03113841053952933, "grad_norm": 2.7033320663418983, "learning_rate": 3.1123568120887158e-06, "loss": 0.924, "step": 2555 }, { "epoch": 0.031199346763677135, "grad_norm": 2.9060729413811, "learning_rate": 3.1184499146965636e-06, "loss": 0.8854, "step": 2560 }, { "epoch": 0.03126028298782494, "grad_norm": 2.926554152933258, "learning_rate": 3.1245430173044118e-06, "loss": 0.9523, "step": 2565 }, { "epoch": 0.03132121921197275, "grad_norm": 5.4343602509207845, "learning_rate": 3.13063611991226e-06, "loss": 0.8578, "step": 2570 }, { "epoch": 0.03138215543612056, "grad_norm": 2.675948314523667, "learning_rate": 3.1367292225201073e-06, "loss": 0.9267, "step": 2575 }, { "epoch": 0.03144309166026836, "grad_norm": 2.8482839421056463, "learning_rate": 3.1428223251279555e-06, "loss": 0.8887, "step": 2580 }, { "epoch": 0.03150402788441617, "grad_norm": 3.7953449530294967, "learning_rate": 3.148915427735803e-06, "loss": 0.8527, "step": 2585 }, { "epoch": 0.03156496410856398, "grad_norm": 3.382147311762555, "learning_rate": 3.155008530343651e-06, "loss": 0.9206, "step": 2590 }, { "epoch": 0.03162590033271179, "grad_norm": 2.751220312023275, "learning_rate": 3.1611016329514993e-06, "loss": 0.9127, "step": 2595 }, { "epoch": 0.03168683655685959, "grad_norm": 3.514793633738441, "learning_rate": 3.1671947355593467e-06, "loss": 0.8972, "step": 2600 }, { "epoch": 0.0317477727810074, "grad_norm": 3.592369421918894, "learning_rate": 3.173287838167195e-06, "loss": 0.9316, "step": 2605 }, { "epoch": 0.03180870900515521, "grad_norm": 2.967865661641282, "learning_rate": 3.179380940775043e-06, "loss": 0.8327, "step": 2610 }, { "epoch": 0.03186964522930301, "grad_norm": 2.8531271463967496, "learning_rate": 3.185474043382891e-06, "loss": 0.8591, "step": 2615 }, { "epoch": 0.03193058145345082, "grad_norm": 2.7799009463515088, "learning_rate": 3.1915671459907387e-06, "loss": 0.7973, "step": 2620 }, { "epoch": 0.03199151767759863, "grad_norm": 3.3334691117622524, "learning_rate": 3.197660248598587e-06, "loss": 0.9865, "step": 2625 }, { "epoch": 0.03205245390174643, "grad_norm": 4.59152065556489, "learning_rate": 3.2037533512064346e-06, "loss": 0.8983, "step": 2630 }, { "epoch": 0.03211339012589424, "grad_norm": 2.852787619432462, "learning_rate": 3.2098464538142824e-06, "loss": 0.969, "step": 2635 }, { "epoch": 0.03217432635004205, "grad_norm": 3.960558022060942, "learning_rate": 3.2159395564221302e-06, "loss": 0.8186, "step": 2640 }, { "epoch": 0.032235262574189856, "grad_norm": 3.11101078400435, "learning_rate": 3.2220326590299784e-06, "loss": 0.9928, "step": 2645 }, { "epoch": 0.03229619879833766, "grad_norm": 3.4457908391537178, "learning_rate": 3.2281257616378266e-06, "loss": 1.0106, "step": 2650 }, { "epoch": 0.03235713502248547, "grad_norm": 2.883284998028721, "learning_rate": 3.234218864245674e-06, "loss": 0.8465, "step": 2655 }, { "epoch": 0.032418071246633276, "grad_norm": 3.3322846843896077, "learning_rate": 3.240311966853522e-06, "loss": 0.9577, "step": 2660 }, { "epoch": 0.03247900747078108, "grad_norm": 2.9634509158425844, "learning_rate": 3.2464050694613704e-06, "loss": 0.9223, "step": 2665 }, { "epoch": 0.03253994369492889, "grad_norm": 3.495972398576219, "learning_rate": 3.2524981720692178e-06, "loss": 0.872, "step": 2670 }, { "epoch": 0.032600879919076696, "grad_norm": 2.482939897067202, "learning_rate": 3.258591274677066e-06, "loss": 0.9013, "step": 2675 }, { "epoch": 0.0326618161432245, "grad_norm": 5.483893195778087, "learning_rate": 3.2646843772849133e-06, "loss": 0.9482, "step": 2680 }, { "epoch": 0.03272275236737231, "grad_norm": 2.225663776142464, "learning_rate": 3.2707774798927615e-06, "loss": 0.8643, "step": 2685 }, { "epoch": 0.032783688591520116, "grad_norm": 2.5630486097493312, "learning_rate": 3.2768705825006097e-06, "loss": 0.8674, "step": 2690 }, { "epoch": 0.032844624815667925, "grad_norm": 2.17593909685018, "learning_rate": 3.282963685108457e-06, "loss": 0.8742, "step": 2695 }, { "epoch": 0.03290556103981573, "grad_norm": 3.2740660835303266, "learning_rate": 3.2890567877163053e-06, "loss": 0.9456, "step": 2700 }, { "epoch": 0.032966497263963536, "grad_norm": 2.8130166805019083, "learning_rate": 3.2951498903241535e-06, "loss": 0.9156, "step": 2705 }, { "epoch": 0.033027433488111345, "grad_norm": 2.9315051628568183, "learning_rate": 3.3012429929320013e-06, "loss": 0.878, "step": 2710 }, { "epoch": 0.03308836971225915, "grad_norm": 2.5316395403179857, "learning_rate": 3.307336095539849e-06, "loss": 0.9289, "step": 2715 }, { "epoch": 0.033149305936406956, "grad_norm": 2.4183595423627176, "learning_rate": 3.313429198147697e-06, "loss": 0.8295, "step": 2720 }, { "epoch": 0.033210242160554765, "grad_norm": 2.5515219358055625, "learning_rate": 3.319522300755545e-06, "loss": 0.8954, "step": 2725 }, { "epoch": 0.03327117838470257, "grad_norm": 4.09632396786243, "learning_rate": 3.325615403363393e-06, "loss": 0.8487, "step": 2730 }, { "epoch": 0.033332114608850376, "grad_norm": 3.0647328388251633, "learning_rate": 3.3317085059712406e-06, "loss": 0.8331, "step": 2735 }, { "epoch": 0.033393050832998185, "grad_norm": 2.5053551822823135, "learning_rate": 3.337801608579089e-06, "loss": 0.7579, "step": 2740 }, { "epoch": 0.033453987057145994, "grad_norm": 3.2760406700811195, "learning_rate": 3.343894711186937e-06, "loss": 0.9083, "step": 2745 }, { "epoch": 0.033514923281293796, "grad_norm": 4.567378548938428, "learning_rate": 3.3499878137947844e-06, "loss": 0.9699, "step": 2750 }, { "epoch": 0.033575859505441605, "grad_norm": 2.9454691154168526, "learning_rate": 3.3560809164026326e-06, "loss": 0.8675, "step": 2755 }, { "epoch": 0.033636795729589414, "grad_norm": 3.0031117288438414, "learning_rate": 3.362174019010481e-06, "loss": 0.9444, "step": 2760 }, { "epoch": 0.033697731953737216, "grad_norm": 3.470244168941455, "learning_rate": 3.368267121618328e-06, "loss": 0.9573, "step": 2765 }, { "epoch": 0.033758668177885025, "grad_norm": 2.5180787940432006, "learning_rate": 3.3743602242261764e-06, "loss": 1.0021, "step": 2770 }, { "epoch": 0.033819604402032834, "grad_norm": 2.4242183720271444, "learning_rate": 3.3804533268340238e-06, "loss": 0.8797, "step": 2775 }, { "epoch": 0.03388054062618064, "grad_norm": 2.409726338880536, "learning_rate": 3.386546429441872e-06, "loss": 0.8619, "step": 2780 }, { "epoch": 0.033941476850328445, "grad_norm": 2.4700000390491437, "learning_rate": 3.39263953204972e-06, "loss": 0.8377, "step": 2785 }, { "epoch": 0.034002413074476254, "grad_norm": 3.3581905703605512, "learning_rate": 3.398732634657568e-06, "loss": 0.8917, "step": 2790 }, { "epoch": 0.03406334929862406, "grad_norm": 3.1648701087241466, "learning_rate": 3.4048257372654157e-06, "loss": 0.8909, "step": 2795 }, { "epoch": 0.034124285522771865, "grad_norm": 2.4550729518208994, "learning_rate": 3.410918839873264e-06, "loss": 0.8838, "step": 2800 }, { "epoch": 0.034185221746919674, "grad_norm": 2.4748628469513716, "learning_rate": 3.4170119424811117e-06, "loss": 0.7873, "step": 2805 }, { "epoch": 0.03424615797106748, "grad_norm": 2.7034017014546845, "learning_rate": 3.4231050450889595e-06, "loss": 0.8902, "step": 2810 }, { "epoch": 0.034307094195215285, "grad_norm": 3.0427606778387024, "learning_rate": 3.4291981476968073e-06, "loss": 0.8906, "step": 2815 }, { "epoch": 0.034368030419363094, "grad_norm": 3.2525998485515406, "learning_rate": 3.4352912503046555e-06, "loss": 0.8937, "step": 2820 }, { "epoch": 0.0344289666435109, "grad_norm": 4.222022870136831, "learning_rate": 3.4413843529125037e-06, "loss": 0.8636, "step": 2825 }, { "epoch": 0.03448990286765871, "grad_norm": 3.123217131753902, "learning_rate": 3.447477455520351e-06, "loss": 0.8417, "step": 2830 }, { "epoch": 0.034550839091806514, "grad_norm": 3.9028659228462037, "learning_rate": 3.4535705581281993e-06, "loss": 0.7855, "step": 2835 }, { "epoch": 0.03461177531595432, "grad_norm": 3.0336066178291476, "learning_rate": 3.4596636607360475e-06, "loss": 0.9917, "step": 2840 }, { "epoch": 0.03467271154010213, "grad_norm": 3.533511902420011, "learning_rate": 3.465756763343895e-06, "loss": 0.8944, "step": 2845 }, { "epoch": 0.034733647764249934, "grad_norm": 2.905424976596759, "learning_rate": 3.471849865951743e-06, "loss": 0.949, "step": 2850 }, { "epoch": 0.03479458398839774, "grad_norm": 2.8030502832585955, "learning_rate": 3.4779429685595904e-06, "loss": 0.9289, "step": 2855 }, { "epoch": 0.03485552021254555, "grad_norm": 2.647791244013417, "learning_rate": 3.4840360711674386e-06, "loss": 0.7978, "step": 2860 }, { "epoch": 0.034916456436693354, "grad_norm": 3.8063184666901915, "learning_rate": 3.490129173775287e-06, "loss": 0.8522, "step": 2865 }, { "epoch": 0.03497739266084116, "grad_norm": 2.8266698352620976, "learning_rate": 3.496222276383134e-06, "loss": 0.9648, "step": 2870 }, { "epoch": 0.03503832888498897, "grad_norm": 2.533497616167194, "learning_rate": 3.5023153789909824e-06, "loss": 0.8596, "step": 2875 }, { "epoch": 0.03509926510913678, "grad_norm": 2.677635705829743, "learning_rate": 3.5084084815988306e-06, "loss": 0.9318, "step": 2880 }, { "epoch": 0.03516020133328458, "grad_norm": 2.985130552981491, "learning_rate": 3.5145015842066784e-06, "loss": 0.8783, "step": 2885 }, { "epoch": 0.03522113755743239, "grad_norm": 2.64604372150632, "learning_rate": 3.520594686814526e-06, "loss": 0.9298, "step": 2890 }, { "epoch": 0.0352820737815802, "grad_norm": 3.050745351957937, "learning_rate": 3.526687789422374e-06, "loss": 0.9582, "step": 2895 }, { "epoch": 0.035343010005728, "grad_norm": 2.686146638126703, "learning_rate": 3.532780892030222e-06, "loss": 0.8888, "step": 2900 }, { "epoch": 0.03540394622987581, "grad_norm": 2.440493182432543, "learning_rate": 3.53887399463807e-06, "loss": 0.8851, "step": 2905 }, { "epoch": 0.03546488245402362, "grad_norm": 3.186503701066206, "learning_rate": 3.5449670972459177e-06, "loss": 0.8785, "step": 2910 }, { "epoch": 0.03552581867817142, "grad_norm": 3.0997946156706098, "learning_rate": 3.551060199853766e-06, "loss": 0.9291, "step": 2915 }, { "epoch": 0.03558675490231923, "grad_norm": 2.5963073426846504, "learning_rate": 3.557153302461614e-06, "loss": 0.8555, "step": 2920 }, { "epoch": 0.03564769112646704, "grad_norm": 2.7316711978851496, "learning_rate": 3.5632464050694615e-06, "loss": 0.8891, "step": 2925 }, { "epoch": 0.03570862735061485, "grad_norm": 2.9981878478059487, "learning_rate": 3.5693395076773097e-06, "loss": 0.8785, "step": 2930 }, { "epoch": 0.03576956357476265, "grad_norm": 2.6840345088265933, "learning_rate": 3.575432610285158e-06, "loss": 0.9329, "step": 2935 }, { "epoch": 0.03583049979891046, "grad_norm": 3.7214581522702206, "learning_rate": 3.5815257128930053e-06, "loss": 0.8947, "step": 2940 }, { "epoch": 0.03589143602305827, "grad_norm": 2.8505567401291905, "learning_rate": 3.5876188155008535e-06, "loss": 0.9249, "step": 2945 }, { "epoch": 0.03595237224720607, "grad_norm": 3.0122628527798394, "learning_rate": 3.593711918108701e-06, "loss": 0.8845, "step": 2950 }, { "epoch": 0.03601330847135388, "grad_norm": 2.6710282827822995, "learning_rate": 3.599805020716549e-06, "loss": 0.8107, "step": 2955 }, { "epoch": 0.03607424469550169, "grad_norm": 2.4834995191385105, "learning_rate": 3.6058981233243972e-06, "loss": 0.8841, "step": 2960 }, { "epoch": 0.0361351809196495, "grad_norm": 2.662627368302022, "learning_rate": 3.611991225932245e-06, "loss": 0.9581, "step": 2965 }, { "epoch": 0.0361961171437973, "grad_norm": 2.9420517642240056, "learning_rate": 3.618084328540093e-06, "loss": 0.8149, "step": 2970 }, { "epoch": 0.03625705336794511, "grad_norm": 3.571529618278082, "learning_rate": 3.624177431147941e-06, "loss": 0.7901, "step": 2975 }, { "epoch": 0.03631798959209292, "grad_norm": 2.6706433784151895, "learning_rate": 3.630270533755789e-06, "loss": 0.9433, "step": 2980 }, { "epoch": 0.03637892581624072, "grad_norm": 3.241989960850275, "learning_rate": 3.6363636363636366e-06, "loss": 0.9089, "step": 2985 }, { "epoch": 0.03643986204038853, "grad_norm": 2.8676663034330305, "learning_rate": 3.6424567389714844e-06, "loss": 0.8474, "step": 2990 }, { "epoch": 0.03650079826453634, "grad_norm": 2.348899524234261, "learning_rate": 3.6485498415793326e-06, "loss": 0.8857, "step": 2995 }, { "epoch": 0.03656173448868414, "grad_norm": 2.770976638910068, "learning_rate": 3.6546429441871808e-06, "loss": 0.8151, "step": 3000 }, { "epoch": 0.03662267071283195, "grad_norm": 3.2452173664574206, "learning_rate": 3.660736046795028e-06, "loss": 0.8965, "step": 3005 }, { "epoch": 0.03668360693697976, "grad_norm": 3.5702870787414867, "learning_rate": 3.6668291494028763e-06, "loss": 0.815, "step": 3010 }, { "epoch": 0.03674454316112757, "grad_norm": 3.4883453302081353, "learning_rate": 3.6729222520107246e-06, "loss": 0.8761, "step": 3015 }, { "epoch": 0.03680547938527537, "grad_norm": 3.8801412775773882, "learning_rate": 3.679015354618572e-06, "loss": 0.8999, "step": 3020 }, { "epoch": 0.03686641560942318, "grad_norm": 3.3200831996241003, "learning_rate": 3.68510845722642e-06, "loss": 0.9109, "step": 3025 }, { "epoch": 0.03692735183357099, "grad_norm": 3.770523168078827, "learning_rate": 3.6912015598342675e-06, "loss": 0.8261, "step": 3030 }, { "epoch": 0.03698828805771879, "grad_norm": 3.2038108758560884, "learning_rate": 3.6972946624421157e-06, "loss": 0.8892, "step": 3035 }, { "epoch": 0.0370492242818666, "grad_norm": 2.8472668936505463, "learning_rate": 3.703387765049964e-06, "loss": 0.8313, "step": 3040 }, { "epoch": 0.03711016050601441, "grad_norm": 2.842649572917039, "learning_rate": 3.7094808676578113e-06, "loss": 0.8851, "step": 3045 }, { "epoch": 0.03717109673016221, "grad_norm": 2.658784797829515, "learning_rate": 3.7155739702656595e-06, "loss": 0.8194, "step": 3050 }, { "epoch": 0.03723203295431002, "grad_norm": 3.1112442115708308, "learning_rate": 3.7216670728735077e-06, "loss": 0.8616, "step": 3055 }, { "epoch": 0.03729296917845783, "grad_norm": 2.7751574055708415, "learning_rate": 3.7277601754813555e-06, "loss": 0.8974, "step": 3060 }, { "epoch": 0.037353905402605636, "grad_norm": 3.2118736876638767, "learning_rate": 3.7338532780892032e-06, "loss": 0.8992, "step": 3065 }, { "epoch": 0.03741484162675344, "grad_norm": 2.9801755498473246, "learning_rate": 3.7399463806970514e-06, "loss": 0.9187, "step": 3070 }, { "epoch": 0.03747577785090125, "grad_norm": 2.610250873295616, "learning_rate": 3.7460394833048992e-06, "loss": 0.8722, "step": 3075 }, { "epoch": 0.037536714075049056, "grad_norm": 2.494118502215657, "learning_rate": 3.752132585912747e-06, "loss": 0.874, "step": 3080 }, { "epoch": 0.03759765029919686, "grad_norm": 5.13156605993481, "learning_rate": 3.758225688520595e-06, "loss": 0.9782, "step": 3085 }, { "epoch": 0.03765858652334467, "grad_norm": 2.7658677784074883, "learning_rate": 3.764318791128443e-06, "loss": 0.8621, "step": 3090 }, { "epoch": 0.037719522747492476, "grad_norm": 3.4610437064908677, "learning_rate": 3.770411893736291e-06, "loss": 0.9183, "step": 3095 }, { "epoch": 0.03778045897164028, "grad_norm": 3.6344038784937744, "learning_rate": 3.7765049963441386e-06, "loss": 0.9255, "step": 3100 }, { "epoch": 0.03784139519578809, "grad_norm": 2.7277403428696005, "learning_rate": 3.7825980989519868e-06, "loss": 0.8628, "step": 3105 }, { "epoch": 0.037902331419935896, "grad_norm": 2.771852215761556, "learning_rate": 3.788691201559835e-06, "loss": 0.911, "step": 3110 }, { "epoch": 0.037963267644083705, "grad_norm": 2.3196614551040105, "learning_rate": 3.7947843041676823e-06, "loss": 0.8597, "step": 3115 }, { "epoch": 0.03802420386823151, "grad_norm": 3.0320655099947293, "learning_rate": 3.8008774067755305e-06, "loss": 0.9095, "step": 3120 }, { "epoch": 0.038085140092379316, "grad_norm": 3.5733517454346857, "learning_rate": 3.806970509383378e-06, "loss": 0.8805, "step": 3125 }, { "epoch": 0.038146076316527125, "grad_norm": 2.9385205733385704, "learning_rate": 3.813063611991226e-06, "loss": 0.9156, "step": 3130 }, { "epoch": 0.03820701254067493, "grad_norm": 2.558445569608755, "learning_rate": 3.819156714599074e-06, "loss": 0.8827, "step": 3135 }, { "epoch": 0.038267948764822736, "grad_norm": 2.3995684499190704, "learning_rate": 3.825249817206922e-06, "loss": 0.8703, "step": 3140 }, { "epoch": 0.038328884988970545, "grad_norm": 3.1227023939472693, "learning_rate": 3.83134291981477e-06, "loss": 0.8526, "step": 3145 }, { "epoch": 0.03838982121311835, "grad_norm": 2.5304639794370964, "learning_rate": 3.837436022422618e-06, "loss": 0.8766, "step": 3150 }, { "epoch": 0.038450757437266156, "grad_norm": 2.6983800501109885, "learning_rate": 3.8435291250304655e-06, "loss": 0.8777, "step": 3155 }, { "epoch": 0.038511693661413965, "grad_norm": 3.3573774842592585, "learning_rate": 3.849622227638314e-06, "loss": 0.9229, "step": 3160 }, { "epoch": 0.038572629885561774, "grad_norm": 7.584775337839192, "learning_rate": 3.855715330246161e-06, "loss": 0.9129, "step": 3165 }, { "epoch": 0.038633566109709576, "grad_norm": 3.1204641008747935, "learning_rate": 3.861808432854009e-06, "loss": 0.894, "step": 3170 }, { "epoch": 0.038694502333857385, "grad_norm": 2.497467378735605, "learning_rate": 3.8679015354618574e-06, "loss": 0.8289, "step": 3175 }, { "epoch": 0.038755438558005194, "grad_norm": 3.504454829168763, "learning_rate": 3.873994638069705e-06, "loss": 0.9844, "step": 3180 }, { "epoch": 0.038816374782152996, "grad_norm": 3.298626758094663, "learning_rate": 3.880087740677553e-06, "loss": 0.8531, "step": 3185 }, { "epoch": 0.038877311006300805, "grad_norm": 2.4759682240873544, "learning_rate": 3.886180843285401e-06, "loss": 0.9192, "step": 3190 }, { "epoch": 0.038938247230448614, "grad_norm": 4.820869910124849, "learning_rate": 3.892273945893249e-06, "loss": 0.89, "step": 3195 }, { "epoch": 0.03899918345459642, "grad_norm": 2.7976669126502998, "learning_rate": 3.898367048501097e-06, "loss": 0.9294, "step": 3200 }, { "epoch": 0.039060119678744225, "grad_norm": 2.9285350187145807, "learning_rate": 3.904460151108945e-06, "loss": 0.9266, "step": 3205 }, { "epoch": 0.039121055902892034, "grad_norm": 4.300465064597185, "learning_rate": 3.910553253716793e-06, "loss": 0.8092, "step": 3210 }, { "epoch": 0.03918199212703984, "grad_norm": 3.0898616015354596, "learning_rate": 3.9166463563246405e-06, "loss": 0.8607, "step": 3215 }, { "epoch": 0.039242928351187645, "grad_norm": 3.802928486785546, "learning_rate": 3.922739458932489e-06, "loss": 0.8958, "step": 3220 }, { "epoch": 0.039303864575335454, "grad_norm": 2.841705941254737, "learning_rate": 3.928832561540337e-06, "loss": 0.8227, "step": 3225 }, { "epoch": 0.03936480079948326, "grad_norm": 2.9791996019571223, "learning_rate": 3.934925664148185e-06, "loss": 0.8273, "step": 3230 }, { "epoch": 0.039425737023631065, "grad_norm": 2.8863915853962867, "learning_rate": 3.9410187667560325e-06, "loss": 0.8202, "step": 3235 }, { "epoch": 0.039486673247778874, "grad_norm": 6.563145637302977, "learning_rate": 3.947111869363881e-06, "loss": 0.8634, "step": 3240 }, { "epoch": 0.03954760947192668, "grad_norm": 3.5073686666037087, "learning_rate": 3.953204971971729e-06, "loss": 0.8837, "step": 3245 }, { "epoch": 0.03960854569607449, "grad_norm": 4.309104769368368, "learning_rate": 3.959298074579576e-06, "loss": 0.8741, "step": 3250 }, { "epoch": 0.039669481920222294, "grad_norm": 2.8098085953631102, "learning_rate": 3.9653911771874245e-06, "loss": 0.9462, "step": 3255 }, { "epoch": 0.0397304181443701, "grad_norm": 2.761873916399385, "learning_rate": 3.971484279795272e-06, "loss": 0.8597, "step": 3260 }, { "epoch": 0.03979135436851791, "grad_norm": 3.1828697544603597, "learning_rate": 3.97757738240312e-06, "loss": 0.9741, "step": 3265 }, { "epoch": 0.039852290592665714, "grad_norm": 2.9552793427880375, "learning_rate": 3.983670485010968e-06, "loss": 0.841, "step": 3270 }, { "epoch": 0.03991322681681352, "grad_norm": 2.865999142501733, "learning_rate": 3.989763587618816e-06, "loss": 0.8704, "step": 3275 }, { "epoch": 0.03997416304096133, "grad_norm": 3.69969301122723, "learning_rate": 3.995856690226664e-06, "loss": 0.8857, "step": 3280 }, { "epoch": 0.040035099265109134, "grad_norm": 4.217902981914763, "learning_rate": 4.001949792834512e-06, "loss": 0.9768, "step": 3285 }, { "epoch": 0.04009603548925694, "grad_norm": 3.4557931055936857, "learning_rate": 4.008042895442359e-06, "loss": 0.8538, "step": 3290 }, { "epoch": 0.04015697171340475, "grad_norm": 3.1964046155898735, "learning_rate": 4.014135998050208e-06, "loss": 0.9414, "step": 3295 }, { "epoch": 0.04021790793755256, "grad_norm": 2.8234589219570028, "learning_rate": 4.020229100658055e-06, "loss": 0.879, "step": 3300 }, { "epoch": 0.04027884416170036, "grad_norm": 2.672077656273274, "learning_rate": 4.026322203265903e-06, "loss": 0.8632, "step": 3305 }, { "epoch": 0.04033978038584817, "grad_norm": 2.8637479222857265, "learning_rate": 4.032415305873751e-06, "loss": 0.887, "step": 3310 }, { "epoch": 0.04040071660999598, "grad_norm": 3.035227832403076, "learning_rate": 4.038508408481599e-06, "loss": 0.8675, "step": 3315 }, { "epoch": 0.04046165283414378, "grad_norm": 3.017364174499845, "learning_rate": 4.044601511089447e-06, "loss": 0.8909, "step": 3320 }, { "epoch": 0.04052258905829159, "grad_norm": 3.097719772720629, "learning_rate": 4.050694613697295e-06, "loss": 0.8718, "step": 3325 }, { "epoch": 0.0405835252824394, "grad_norm": 2.425076671760029, "learning_rate": 4.0567877163051425e-06, "loss": 0.882, "step": 3330 }, { "epoch": 0.0406444615065872, "grad_norm": 3.1917516280116622, "learning_rate": 4.062880818912991e-06, "loss": 0.8855, "step": 3335 }, { "epoch": 0.04070539773073501, "grad_norm": 2.498840070865857, "learning_rate": 4.068973921520838e-06, "loss": 0.9151, "step": 3340 }, { "epoch": 0.04076633395488282, "grad_norm": 2.7470720939626325, "learning_rate": 4.075067024128686e-06, "loss": 0.869, "step": 3345 }, { "epoch": 0.04082727017903063, "grad_norm": 3.039496394593049, "learning_rate": 4.0811601267365345e-06, "loss": 0.8688, "step": 3350 }, { "epoch": 0.04088820640317843, "grad_norm": 2.739761772483928, "learning_rate": 4.087253229344382e-06, "loss": 0.8064, "step": 3355 }, { "epoch": 0.04094914262732624, "grad_norm": 2.864863205684917, "learning_rate": 4.09334633195223e-06, "loss": 0.893, "step": 3360 }, { "epoch": 0.04101007885147405, "grad_norm": 2.9911193405508585, "learning_rate": 4.099439434560078e-06, "loss": 0.8864, "step": 3365 }, { "epoch": 0.04107101507562185, "grad_norm": 2.874817794151049, "learning_rate": 4.1055325371679265e-06, "loss": 0.8156, "step": 3370 }, { "epoch": 0.04113195129976966, "grad_norm": 2.3052721162881307, "learning_rate": 4.111625639775774e-06, "loss": 0.9497, "step": 3375 }, { "epoch": 0.04119288752391747, "grad_norm": 2.7692269359271258, "learning_rate": 4.117718742383622e-06, "loss": 0.8164, "step": 3380 }, { "epoch": 0.04125382374806527, "grad_norm": 2.5559705008595768, "learning_rate": 4.12381184499147e-06, "loss": 0.8701, "step": 3385 }, { "epoch": 0.04131475997221308, "grad_norm": 2.9214245151573803, "learning_rate": 4.129904947599318e-06, "loss": 0.931, "step": 3390 }, { "epoch": 0.04137569619636089, "grad_norm": 3.2158538621154094, "learning_rate": 4.135998050207166e-06, "loss": 0.8623, "step": 3395 }, { "epoch": 0.0414366324205087, "grad_norm": 2.6437108306948605, "learning_rate": 4.142091152815014e-06, "loss": 0.8861, "step": 3400 }, { "epoch": 0.0414975686446565, "grad_norm": 2.674791811849309, "learning_rate": 4.148184255422862e-06, "loss": 0.8469, "step": 3405 }, { "epoch": 0.04155850486880431, "grad_norm": 2.57044345478914, "learning_rate": 4.15427735803071e-06, "loss": 0.8737, "step": 3410 }, { "epoch": 0.04161944109295212, "grad_norm": 2.511864122251487, "learning_rate": 4.160370460638558e-06, "loss": 0.9022, "step": 3415 }, { "epoch": 0.04168037731709992, "grad_norm": 2.518221656223307, "learning_rate": 4.166463563246406e-06, "loss": 0.8401, "step": 3420 }, { "epoch": 0.04174131354124773, "grad_norm": 2.539146337309433, "learning_rate": 4.172556665854253e-06, "loss": 0.8888, "step": 3425 }, { "epoch": 0.04180224976539554, "grad_norm": 3.0950920310924563, "learning_rate": 4.178649768462102e-06, "loss": 0.8566, "step": 3430 }, { "epoch": 0.04186318598954335, "grad_norm": 4.726983369263233, "learning_rate": 4.184742871069949e-06, "loss": 0.9146, "step": 3435 }, { "epoch": 0.04192412221369115, "grad_norm": 2.413494158261741, "learning_rate": 4.190835973677797e-06, "loss": 0.9137, "step": 3440 }, { "epoch": 0.04198505843783896, "grad_norm": 2.742482368352469, "learning_rate": 4.196929076285645e-06, "loss": 0.8529, "step": 3445 }, { "epoch": 0.04204599466198677, "grad_norm": 3.1895576761808, "learning_rate": 4.203022178893493e-06, "loss": 0.9034, "step": 3450 }, { "epoch": 0.04210693088613457, "grad_norm": 2.7688352266503466, "learning_rate": 4.209115281501341e-06, "loss": 0.8935, "step": 3455 }, { "epoch": 0.04216786711028238, "grad_norm": 3.0760904454517295, "learning_rate": 4.215208384109189e-06, "loss": 0.9052, "step": 3460 }, { "epoch": 0.04222880333443019, "grad_norm": 3.148467429090026, "learning_rate": 4.2213014867170365e-06, "loss": 0.8692, "step": 3465 }, { "epoch": 0.04228973955857799, "grad_norm": 3.0329577558023555, "learning_rate": 4.227394589324885e-06, "loss": 0.8198, "step": 3470 }, { "epoch": 0.0423506757827258, "grad_norm": 2.517163531045469, "learning_rate": 4.233487691932732e-06, "loss": 0.9172, "step": 3475 }, { "epoch": 0.04241161200687361, "grad_norm": 3.117579300735364, "learning_rate": 4.23958079454058e-06, "loss": 0.8108, "step": 3480 }, { "epoch": 0.042472548231021416, "grad_norm": 2.8625876831357853, "learning_rate": 4.2456738971484285e-06, "loss": 0.8246, "step": 3485 }, { "epoch": 0.04253348445516922, "grad_norm": 3.7122268485569836, "learning_rate": 4.251766999756276e-06, "loss": 0.9448, "step": 3490 }, { "epoch": 0.04259442067931703, "grad_norm": 3.0096187408200725, "learning_rate": 4.257860102364124e-06, "loss": 0.8811, "step": 3495 }, { "epoch": 0.042655356903464836, "grad_norm": 2.3109491143579626, "learning_rate": 4.263953204971972e-06, "loss": 0.7996, "step": 3500 }, { "epoch": 0.04271629312761264, "grad_norm": 3.3125585459376063, "learning_rate": 4.27004630757982e-06, "loss": 0.8082, "step": 3505 }, { "epoch": 0.04277722935176045, "grad_norm": 3.5929531157710466, "learning_rate": 4.276139410187668e-06, "loss": 0.9185, "step": 3510 }, { "epoch": 0.042838165575908256, "grad_norm": 3.644042767066514, "learning_rate": 4.282232512795515e-06, "loss": 0.8837, "step": 3515 }, { "epoch": 0.04289910180005606, "grad_norm": 3.292945911342641, "learning_rate": 4.288325615403363e-06, "loss": 0.9884, "step": 3520 }, { "epoch": 0.04296003802420387, "grad_norm": 2.7791180941785045, "learning_rate": 4.294418718011212e-06, "loss": 0.8728, "step": 3525 }, { "epoch": 0.043020974248351676, "grad_norm": 3.136057923895066, "learning_rate": 4.300511820619059e-06, "loss": 0.8735, "step": 3530 }, { "epoch": 0.043081910472499485, "grad_norm": 3.8047513019415895, "learning_rate": 4.306604923226907e-06, "loss": 0.9681, "step": 3535 }, { "epoch": 0.04314284669664729, "grad_norm": 3.721178807582475, "learning_rate": 4.312698025834755e-06, "loss": 0.8039, "step": 3540 }, { "epoch": 0.043203782920795096, "grad_norm": 2.634529386842268, "learning_rate": 4.3187911284426036e-06, "loss": 0.9688, "step": 3545 }, { "epoch": 0.043264719144942905, "grad_norm": 2.5796045458168737, "learning_rate": 4.324884231050451e-06, "loss": 0.9064, "step": 3550 }, { "epoch": 0.04332565536909071, "grad_norm": 2.377167281560882, "learning_rate": 4.330977333658299e-06, "loss": 0.8085, "step": 3555 }, { "epoch": 0.043386591593238516, "grad_norm": 3.4100371954312187, "learning_rate": 4.337070436266147e-06, "loss": 0.9084, "step": 3560 }, { "epoch": 0.043447527817386325, "grad_norm": 3.3661387717380378, "learning_rate": 4.343163538873995e-06, "loss": 0.9078, "step": 3565 }, { "epoch": 0.04350846404153413, "grad_norm": 4.291062426970656, "learning_rate": 4.349256641481843e-06, "loss": 0.8888, "step": 3570 }, { "epoch": 0.043569400265681936, "grad_norm": 2.8197609863181663, "learning_rate": 4.355349744089691e-06, "loss": 0.8344, "step": 3575 }, { "epoch": 0.043630336489829745, "grad_norm": 2.9564587080651323, "learning_rate": 4.361442846697539e-06, "loss": 0.8599, "step": 3580 }, { "epoch": 0.043691272713977554, "grad_norm": 2.6079131734408167, "learning_rate": 4.367535949305387e-06, "loss": 0.8866, "step": 3585 }, { "epoch": 0.043752208938125356, "grad_norm": 3.1794065908962095, "learning_rate": 4.373629051913235e-06, "loss": 0.8685, "step": 3590 }, { "epoch": 0.043813145162273165, "grad_norm": 2.493082518019306, "learning_rate": 4.379722154521083e-06, "loss": 0.9106, "step": 3595 }, { "epoch": 0.043874081386420974, "grad_norm": 3.317975827445305, "learning_rate": 4.3858152571289305e-06, "loss": 0.9012, "step": 3600 }, { "epoch": 0.043935017610568776, "grad_norm": 2.387493012705608, "learning_rate": 4.391908359736779e-06, "loss": 0.8418, "step": 3605 }, { "epoch": 0.043995953834716585, "grad_norm": 3.2800112608971186, "learning_rate": 4.398001462344626e-06, "loss": 0.8813, "step": 3610 }, { "epoch": 0.044056890058864394, "grad_norm": 2.622970925720891, "learning_rate": 4.404094564952474e-06, "loss": 0.9136, "step": 3615 }, { "epoch": 0.0441178262830122, "grad_norm": 2.8439672229494604, "learning_rate": 4.4101876675603224e-06, "loss": 0.8731, "step": 3620 }, { "epoch": 0.044178762507160005, "grad_norm": 3.145574845827749, "learning_rate": 4.41628077016817e-06, "loss": 0.9534, "step": 3625 }, { "epoch": 0.044239698731307814, "grad_norm": 3.321423062455449, "learning_rate": 4.422373872776018e-06, "loss": 0.9168, "step": 3630 }, { "epoch": 0.04430063495545562, "grad_norm": 2.784742460088556, "learning_rate": 4.428466975383866e-06, "loss": 0.8872, "step": 3635 }, { "epoch": 0.044361571179603425, "grad_norm": 2.8428787989391116, "learning_rate": 4.4345600779917136e-06, "loss": 0.8395, "step": 3640 }, { "epoch": 0.044422507403751234, "grad_norm": 2.828539387925095, "learning_rate": 4.440653180599562e-06, "loss": 0.8637, "step": 3645 }, { "epoch": 0.04448344362789904, "grad_norm": 3.1176652324652827, "learning_rate": 4.446746283207409e-06, "loss": 0.9205, "step": 3650 }, { "epoch": 0.044544379852046845, "grad_norm": 2.8131361150675196, "learning_rate": 4.452839385815257e-06, "loss": 0.8662, "step": 3655 }, { "epoch": 0.044605316076194654, "grad_norm": 4.220053652967579, "learning_rate": 4.4589324884231056e-06, "loss": 0.8609, "step": 3660 }, { "epoch": 0.04466625230034246, "grad_norm": 2.2285818458739666, "learning_rate": 4.465025591030953e-06, "loss": 0.8408, "step": 3665 }, { "epoch": 0.04472718852449027, "grad_norm": 2.878042800302432, "learning_rate": 4.471118693638801e-06, "loss": 0.9338, "step": 3670 }, { "epoch": 0.044788124748638074, "grad_norm": 3.1099585769749285, "learning_rate": 4.477211796246649e-06, "loss": 0.9469, "step": 3675 }, { "epoch": 0.04484906097278588, "grad_norm": 3.929046958984902, "learning_rate": 4.483304898854497e-06, "loss": 0.8583, "step": 3680 }, { "epoch": 0.04490999719693369, "grad_norm": 2.4923262120114167, "learning_rate": 4.489398001462345e-06, "loss": 0.8747, "step": 3685 }, { "epoch": 0.044970933421081494, "grad_norm": 2.8045593322963427, "learning_rate": 4.495491104070193e-06, "loss": 0.8842, "step": 3690 }, { "epoch": 0.0450318696452293, "grad_norm": 2.560704774375894, "learning_rate": 4.5015842066780405e-06, "loss": 0.8671, "step": 3695 }, { "epoch": 0.04509280586937711, "grad_norm": 3.2147581849720837, "learning_rate": 4.507677309285889e-06, "loss": 0.8412, "step": 3700 }, { "epoch": 0.045153742093524914, "grad_norm": 3.5041330864282627, "learning_rate": 4.513770411893736e-06, "loss": 0.8037, "step": 3705 }, { "epoch": 0.04521467831767272, "grad_norm": 2.4255504108882198, "learning_rate": 4.519863514501584e-06, "loss": 0.9083, "step": 3710 }, { "epoch": 0.04527561454182053, "grad_norm": 2.7080136604591214, "learning_rate": 4.5259566171094324e-06, "loss": 0.8457, "step": 3715 }, { "epoch": 0.04533655076596834, "grad_norm": 3.543185714335572, "learning_rate": 4.532049719717281e-06, "loss": 0.9658, "step": 3720 }, { "epoch": 0.04539748699011614, "grad_norm": 2.8348819454128673, "learning_rate": 4.538142822325128e-06, "loss": 0.8475, "step": 3725 }, { "epoch": 0.04545842321426395, "grad_norm": 2.405270162095749, "learning_rate": 4.544235924932976e-06, "loss": 0.9247, "step": 3730 }, { "epoch": 0.04551935943841176, "grad_norm": 2.940037460723577, "learning_rate": 4.550329027540824e-06, "loss": 0.8912, "step": 3735 }, { "epoch": 0.04558029566255956, "grad_norm": 3.5126097727560444, "learning_rate": 4.556422130148672e-06, "loss": 0.8817, "step": 3740 }, { "epoch": 0.04564123188670737, "grad_norm": 3.318307197358053, "learning_rate": 4.56251523275652e-06, "loss": 0.8397, "step": 3745 }, { "epoch": 0.04570216811085518, "grad_norm": 2.7552249403866407, "learning_rate": 4.568608335364368e-06, "loss": 0.8621, "step": 3750 }, { "epoch": 0.04576310433500298, "grad_norm": 2.692419132632163, "learning_rate": 4.574701437972216e-06, "loss": 0.86, "step": 3755 }, { "epoch": 0.04582404055915079, "grad_norm": 3.1507467431240195, "learning_rate": 4.580794540580064e-06, "loss": 0.9618, "step": 3760 }, { "epoch": 0.0458849767832986, "grad_norm": 2.7896849965539325, "learning_rate": 4.586887643187912e-06, "loss": 0.8844, "step": 3765 }, { "epoch": 0.04594591300744641, "grad_norm": 2.862219500513635, "learning_rate": 4.59298074579576e-06, "loss": 0.843, "step": 3770 }, { "epoch": 0.04600684923159421, "grad_norm": 2.9731977643724066, "learning_rate": 4.5990738484036075e-06, "loss": 0.9381, "step": 3775 }, { "epoch": 0.04606778545574202, "grad_norm": 3.0830309609229523, "learning_rate": 4.605166951011456e-06, "loss": 0.9023, "step": 3780 }, { "epoch": 0.04612872167988983, "grad_norm": 4.690095028456161, "learning_rate": 4.611260053619303e-06, "loss": 0.8599, "step": 3785 }, { "epoch": 0.04618965790403763, "grad_norm": 2.4481682372933724, "learning_rate": 4.617353156227151e-06, "loss": 0.9106, "step": 3790 }, { "epoch": 0.04625059412818544, "grad_norm": 3.8647403639012876, "learning_rate": 4.6234462588349995e-06, "loss": 0.8753, "step": 3795 }, { "epoch": 0.04631153035233325, "grad_norm": 4.442226335735473, "learning_rate": 4.629539361442847e-06, "loss": 0.92, "step": 3800 }, { "epoch": 0.04637246657648105, "grad_norm": 2.9572802327733294, "learning_rate": 4.635632464050695e-06, "loss": 0.8392, "step": 3805 }, { "epoch": 0.04643340280062886, "grad_norm": 2.3818959933432837, "learning_rate": 4.641725566658543e-06, "loss": 0.8043, "step": 3810 }, { "epoch": 0.04649433902477667, "grad_norm": 3.2946577276588087, "learning_rate": 4.647818669266391e-06, "loss": 0.9324, "step": 3815 }, { "epoch": 0.04655527524892448, "grad_norm": 2.606140779756161, "learning_rate": 4.653911771874239e-06, "loss": 0.9083, "step": 3820 }, { "epoch": 0.04661621147307228, "grad_norm": 3.4033433098676276, "learning_rate": 4.660004874482086e-06, "loss": 0.843, "step": 3825 }, { "epoch": 0.04667714769722009, "grad_norm": 2.7914702101100928, "learning_rate": 4.6660979770899344e-06, "loss": 0.8943, "step": 3830 }, { "epoch": 0.0467380839213679, "grad_norm": 2.3904628567368866, "learning_rate": 4.672191079697783e-06, "loss": 0.8791, "step": 3835 }, { "epoch": 0.0467990201455157, "grad_norm": 2.783560512062144, "learning_rate": 4.67828418230563e-06, "loss": 0.8969, "step": 3840 }, { "epoch": 0.04685995636966351, "grad_norm": 2.823081095741963, "learning_rate": 4.684377284913478e-06, "loss": 0.8771, "step": 3845 }, { "epoch": 0.04692089259381132, "grad_norm": 2.714721905506089, "learning_rate": 4.690470387521326e-06, "loss": 0.8986, "step": 3850 }, { "epoch": 0.04698182881795913, "grad_norm": 2.591528139063928, "learning_rate": 4.696563490129174e-06, "loss": 0.8917, "step": 3855 }, { "epoch": 0.04704276504210693, "grad_norm": 3.9521965076653367, "learning_rate": 4.702656592737022e-06, "loss": 0.9902, "step": 3860 }, { "epoch": 0.04710370126625474, "grad_norm": 2.8537721266326175, "learning_rate": 4.70874969534487e-06, "loss": 0.814, "step": 3865 }, { "epoch": 0.04716463749040255, "grad_norm": 2.7641792562820147, "learning_rate": 4.7148427979527175e-06, "loss": 0.9005, "step": 3870 }, { "epoch": 0.04722557371455035, "grad_norm": 2.8156016676662765, "learning_rate": 4.720935900560566e-06, "loss": 0.8889, "step": 3875 }, { "epoch": 0.04728650993869816, "grad_norm": 2.4729031558356245, "learning_rate": 4.727029003168413e-06, "loss": 0.8638, "step": 3880 }, { "epoch": 0.04734744616284597, "grad_norm": 2.6435358940674103, "learning_rate": 4.733122105776261e-06, "loss": 0.8724, "step": 3885 }, { "epoch": 0.04740838238699377, "grad_norm": 2.6645844838055432, "learning_rate": 4.7392152083841095e-06, "loss": 0.9322, "step": 3890 }, { "epoch": 0.04746931861114158, "grad_norm": 2.803863278672742, "learning_rate": 4.745308310991958e-06, "loss": 0.985, "step": 3895 }, { "epoch": 0.04753025483528939, "grad_norm": 2.906331892026681, "learning_rate": 4.751401413599805e-06, "loss": 0.9097, "step": 3900 }, { "epoch": 0.047591191059437196, "grad_norm": 2.664282961855364, "learning_rate": 4.757494516207653e-06, "loss": 0.8956, "step": 3905 }, { "epoch": 0.047652127283585, "grad_norm": 3.201927096782259, "learning_rate": 4.7635876188155015e-06, "loss": 0.8929, "step": 3910 }, { "epoch": 0.04771306350773281, "grad_norm": 4.203374881776084, "learning_rate": 4.769680721423349e-06, "loss": 0.8842, "step": 3915 }, { "epoch": 0.047773999731880616, "grad_norm": 2.417213543540723, "learning_rate": 4.775773824031197e-06, "loss": 0.7621, "step": 3920 }, { "epoch": 0.04783493595602842, "grad_norm": 3.117306997776223, "learning_rate": 4.781866926639045e-06, "loss": 0.8934, "step": 3925 }, { "epoch": 0.04789587218017623, "grad_norm": 3.221251901020262, "learning_rate": 4.7879600292468935e-06, "loss": 0.7798, "step": 3930 }, { "epoch": 0.047956808404324036, "grad_norm": 3.0798871457703285, "learning_rate": 4.794053131854741e-06, "loss": 0.9479, "step": 3935 }, { "epoch": 0.04801774462847184, "grad_norm": 3.3194781284177024, "learning_rate": 4.800146234462589e-06, "loss": 0.8213, "step": 3940 }, { "epoch": 0.04807868085261965, "grad_norm": 2.4009881978902574, "learning_rate": 4.806239337070437e-06, "loss": 0.9004, "step": 3945 }, { "epoch": 0.048139617076767456, "grad_norm": 3.8190322050254717, "learning_rate": 4.812332439678285e-06, "loss": 0.9159, "step": 3950 }, { "epoch": 0.048200553300915265, "grad_norm": 4.064050540701577, "learning_rate": 4.818425542286133e-06, "loss": 0.8463, "step": 3955 }, { "epoch": 0.04826148952506307, "grad_norm": 3.698946745132331, "learning_rate": 4.82451864489398e-06, "loss": 0.9073, "step": 3960 }, { "epoch": 0.048322425749210876, "grad_norm": 2.4223769101990853, "learning_rate": 4.830611747501828e-06, "loss": 0.874, "step": 3965 }, { "epoch": 0.048383361973358685, "grad_norm": 2.4266948708286633, "learning_rate": 4.836704850109677e-06, "loss": 0.9523, "step": 3970 }, { "epoch": 0.04844429819750649, "grad_norm": 2.5716800077480073, "learning_rate": 4.842797952717524e-06, "loss": 0.8405, "step": 3975 }, { "epoch": 0.048505234421654296, "grad_norm": 3.4141317417781285, "learning_rate": 4.848891055325372e-06, "loss": 0.9351, "step": 3980 }, { "epoch": 0.048566170645802105, "grad_norm": 2.5813481190405594, "learning_rate": 4.85498415793322e-06, "loss": 0.936, "step": 3985 }, { "epoch": 0.04862710686994991, "grad_norm": 2.466306016725611, "learning_rate": 4.861077260541068e-06, "loss": 0.854, "step": 3990 }, { "epoch": 0.048688043094097716, "grad_norm": 2.8249258761101945, "learning_rate": 4.867170363148916e-06, "loss": 0.9052, "step": 3995 }, { "epoch": 0.048748979318245525, "grad_norm": 3.7300763735003497, "learning_rate": 4.873263465756764e-06, "loss": 0.855, "step": 4000 }, { "epoch": 0.048809915542393334, "grad_norm": 2.762148404741347, "learning_rate": 4.8793565683646115e-06, "loss": 0.8283, "step": 4005 }, { "epoch": 0.048870851766541136, "grad_norm": 2.4473799821852795, "learning_rate": 4.88544967097246e-06, "loss": 0.834, "step": 4010 }, { "epoch": 0.048931787990688945, "grad_norm": 2.116327989054846, "learning_rate": 4.891542773580307e-06, "loss": 0.9078, "step": 4015 }, { "epoch": 0.048992724214836754, "grad_norm": 2.3669392967760676, "learning_rate": 4.897635876188155e-06, "loss": 0.8628, "step": 4020 }, { "epoch": 0.049053660438984556, "grad_norm": 3.0148874964880976, "learning_rate": 4.9037289787960035e-06, "loss": 0.8219, "step": 4025 }, { "epoch": 0.049114596663132365, "grad_norm": 2.197075991371314, "learning_rate": 4.909822081403851e-06, "loss": 0.8226, "step": 4030 }, { "epoch": 0.049175532887280174, "grad_norm": 2.63177975859519, "learning_rate": 4.915915184011699e-06, "loss": 0.9336, "step": 4035 }, { "epoch": 0.04923646911142798, "grad_norm": 2.7553319464607786, "learning_rate": 4.922008286619547e-06, "loss": 0.9013, "step": 4040 }, { "epoch": 0.049297405335575785, "grad_norm": 2.569179532096851, "learning_rate": 4.928101389227395e-06, "loss": 0.8409, "step": 4045 }, { "epoch": 0.049358341559723594, "grad_norm": 2.6045766917536075, "learning_rate": 4.934194491835243e-06, "loss": 0.9037, "step": 4050 }, { "epoch": 0.0494192777838714, "grad_norm": 2.8776628071148598, "learning_rate": 4.94028759444309e-06, "loss": 0.8813, "step": 4055 }, { "epoch": 0.049480214008019205, "grad_norm": 3.4023211021028663, "learning_rate": 4.946380697050938e-06, "loss": 0.89, "step": 4060 }, { "epoch": 0.049541150232167014, "grad_norm": 3.3544117445098256, "learning_rate": 4.952473799658787e-06, "loss": 0.8892, "step": 4065 }, { "epoch": 0.04960208645631482, "grad_norm": 3.3779767156069482, "learning_rate": 4.958566902266635e-06, "loss": 0.8812, "step": 4070 }, { "epoch": 0.049663022680462625, "grad_norm": 2.7963670222055983, "learning_rate": 4.964660004874482e-06, "loss": 0.8426, "step": 4075 }, { "epoch": 0.049723958904610434, "grad_norm": 3.401901550365826, "learning_rate": 4.97075310748233e-06, "loss": 0.9118, "step": 4080 }, { "epoch": 0.04978489512875824, "grad_norm": 2.590267060988608, "learning_rate": 4.976846210090179e-06, "loss": 0.8677, "step": 4085 }, { "epoch": 0.04984583135290605, "grad_norm": 2.188628531419569, "learning_rate": 4.982939312698026e-06, "loss": 0.8166, "step": 4090 }, { "epoch": 0.049906767577053854, "grad_norm": 2.5157442512267516, "learning_rate": 4.989032415305874e-06, "loss": 0.8082, "step": 4095 }, { "epoch": 0.04996770380120166, "grad_norm": 2.4924922554579445, "learning_rate": 4.995125517913722e-06, "loss": 0.9206, "step": 4100 }, { "epoch": 0.05002864002534947, "grad_norm": 2.6718737504891665, "learning_rate": 4.999935856318153e-06, "loss": 0.903, "step": 4105 }, { "epoch": 0.050089576249497274, "grad_norm": 3.2796483140240644, "learning_rate": 4.999615137908916e-06, "loss": 0.8501, "step": 4110 }, { "epoch": 0.05015051247364508, "grad_norm": 2.514532607488826, "learning_rate": 4.99929441949968e-06, "loss": 0.9036, "step": 4115 }, { "epoch": 0.05021144869779289, "grad_norm": 2.9436422583601147, "learning_rate": 4.998973701090443e-06, "loss": 0.9076, "step": 4120 }, { "epoch": 0.050272384921940694, "grad_norm": 2.748247638604848, "learning_rate": 4.998652982681207e-06, "loss": 0.8486, "step": 4125 }, { "epoch": 0.0503333211460885, "grad_norm": 2.7955454004267826, "learning_rate": 4.99833226427197e-06, "loss": 0.9219, "step": 4130 }, { "epoch": 0.05039425737023631, "grad_norm": 3.2977486273226555, "learning_rate": 4.998011545862733e-06, "loss": 0.8402, "step": 4135 }, { "epoch": 0.05045519359438412, "grad_norm": 2.7438119588223766, "learning_rate": 4.997690827453497e-06, "loss": 0.8841, "step": 4140 }, { "epoch": 0.05051612981853192, "grad_norm": 3.020903278358673, "learning_rate": 4.99737010904426e-06, "loss": 0.9498, "step": 4145 }, { "epoch": 0.05057706604267973, "grad_norm": 2.3381507516651525, "learning_rate": 4.997049390635023e-06, "loss": 0.7859, "step": 4150 }, { "epoch": 0.05063800226682754, "grad_norm": 2.990345333375744, "learning_rate": 4.9967286722257866e-06, "loss": 0.8458, "step": 4155 }, { "epoch": 0.05069893849097534, "grad_norm": 2.8919577341887015, "learning_rate": 4.99640795381655e-06, "loss": 0.8712, "step": 4160 }, { "epoch": 0.05075987471512315, "grad_norm": 2.6226010467429113, "learning_rate": 4.996087235407313e-06, "loss": 0.7713, "step": 4165 }, { "epoch": 0.05082081093927096, "grad_norm": 2.6159085135522298, "learning_rate": 4.9957665169980765e-06, "loss": 0.8137, "step": 4170 }, { "epoch": 0.05088174716341876, "grad_norm": 3.263400266843647, "learning_rate": 4.9954457985888395e-06, "loss": 0.8883, "step": 4175 }, { "epoch": 0.05094268338756657, "grad_norm": 2.6255687869025297, "learning_rate": 4.9951250801796025e-06, "loss": 0.8487, "step": 4180 }, { "epoch": 0.05100361961171438, "grad_norm": 2.9064377078378283, "learning_rate": 4.994804361770366e-06, "loss": 0.9368, "step": 4185 }, { "epoch": 0.05106455583586219, "grad_norm": 3.0376481294672466, "learning_rate": 4.994483643361129e-06, "loss": 0.845, "step": 4190 }, { "epoch": 0.05112549206000999, "grad_norm": 2.4400727989474484, "learning_rate": 4.994162924951892e-06, "loss": 0.8634, "step": 4195 }, { "epoch": 0.0511864282841578, "grad_norm": 3.111563187809676, "learning_rate": 4.993842206542656e-06, "loss": 0.8502, "step": 4200 }, { "epoch": 0.05124736450830561, "grad_norm": 2.9561846625819963, "learning_rate": 4.993521488133419e-06, "loss": 0.8418, "step": 4205 }, { "epoch": 0.05130830073245341, "grad_norm": 2.3793416938508543, "learning_rate": 4.993200769724182e-06, "loss": 0.8587, "step": 4210 }, { "epoch": 0.05136923695660122, "grad_norm": 5.101503117309126, "learning_rate": 4.992880051314946e-06, "loss": 0.8809, "step": 4215 }, { "epoch": 0.05143017318074903, "grad_norm": 3.0229016842639, "learning_rate": 4.992559332905709e-06, "loss": 0.8253, "step": 4220 }, { "epoch": 0.05149110940489683, "grad_norm": 2.3515407436065425, "learning_rate": 4.992238614496472e-06, "loss": 0.7855, "step": 4225 }, { "epoch": 0.05155204562904464, "grad_norm": 2.497059178002736, "learning_rate": 4.991917896087236e-06, "loss": 0.8747, "step": 4230 }, { "epoch": 0.05161298185319245, "grad_norm": 2.656987534603116, "learning_rate": 4.991597177677999e-06, "loss": 0.94, "step": 4235 }, { "epoch": 0.05167391807734026, "grad_norm": 2.897459682041592, "learning_rate": 4.991276459268762e-06, "loss": 0.8616, "step": 4240 }, { "epoch": 0.05173485430148806, "grad_norm": 2.585440606481318, "learning_rate": 4.990955740859525e-06, "loss": 0.8778, "step": 4245 }, { "epoch": 0.05179579052563587, "grad_norm": 2.6889411705607658, "learning_rate": 4.990635022450289e-06, "loss": 0.921, "step": 4250 }, { "epoch": 0.05185672674978368, "grad_norm": 2.945400698445776, "learning_rate": 4.990314304041052e-06, "loss": 0.9092, "step": 4255 }, { "epoch": 0.05191766297393148, "grad_norm": 3.105435040568432, "learning_rate": 4.989993585631816e-06, "loss": 0.9446, "step": 4260 }, { "epoch": 0.05197859919807929, "grad_norm": 2.6488806307530344, "learning_rate": 4.989672867222579e-06, "loss": 0.8717, "step": 4265 }, { "epoch": 0.0520395354222271, "grad_norm": 3.147528594701163, "learning_rate": 4.989352148813342e-06, "loss": 0.8952, "step": 4270 }, { "epoch": 0.05210047164637491, "grad_norm": 2.8196078971350693, "learning_rate": 4.989031430404106e-06, "loss": 0.8713, "step": 4275 }, { "epoch": 0.05216140787052271, "grad_norm": 2.6248346219739904, "learning_rate": 4.988710711994869e-06, "loss": 0.7961, "step": 4280 }, { "epoch": 0.05222234409467052, "grad_norm": 3.03532589696951, "learning_rate": 4.988389993585633e-06, "loss": 0.8484, "step": 4285 }, { "epoch": 0.05228328031881833, "grad_norm": 2.5838825930904235, "learning_rate": 4.988069275176396e-06, "loss": 0.8414, "step": 4290 }, { "epoch": 0.05234421654296613, "grad_norm": 3.253228377482759, "learning_rate": 4.987748556767159e-06, "loss": 0.8361, "step": 4295 }, { "epoch": 0.05240515276711394, "grad_norm": 2.823755153459167, "learning_rate": 4.9874278383579225e-06, "loss": 0.8968, "step": 4300 }, { "epoch": 0.05246608899126175, "grad_norm": 2.5650731651769636, "learning_rate": 4.9871071199486855e-06, "loss": 0.8379, "step": 4305 }, { "epoch": 0.05252702521540955, "grad_norm": 3.7928684118429383, "learning_rate": 4.986786401539449e-06, "loss": 0.8373, "step": 4310 }, { "epoch": 0.05258796143955736, "grad_norm": 3.047893205656645, "learning_rate": 4.986465683130212e-06, "loss": 0.865, "step": 4315 }, { "epoch": 0.05264889766370517, "grad_norm": 3.006513903470706, "learning_rate": 4.9861449647209754e-06, "loss": 0.9018, "step": 4320 }, { "epoch": 0.052709833887852976, "grad_norm": 2.64868913853425, "learning_rate": 4.9858242463117385e-06, "loss": 0.8572, "step": 4325 }, { "epoch": 0.05277077011200078, "grad_norm": 2.8674872772750564, "learning_rate": 4.985503527902502e-06, "loss": 0.8344, "step": 4330 }, { "epoch": 0.05283170633614859, "grad_norm": 3.4799323988446536, "learning_rate": 4.985182809493265e-06, "loss": 0.8785, "step": 4335 }, { "epoch": 0.052892642560296396, "grad_norm": 2.587672979567204, "learning_rate": 4.984862091084028e-06, "loss": 0.9157, "step": 4340 }, { "epoch": 0.0529535787844442, "grad_norm": 2.9325032806581, "learning_rate": 4.984541372674792e-06, "loss": 0.8818, "step": 4345 }, { "epoch": 0.05301451500859201, "grad_norm": 3.4021373824425756, "learning_rate": 4.984220654265555e-06, "loss": 0.8926, "step": 4350 }, { "epoch": 0.053075451232739816, "grad_norm": 2.9376513678039524, "learning_rate": 4.983899935856318e-06, "loss": 0.8671, "step": 4355 }, { "epoch": 0.05313638745688762, "grad_norm": 3.671625017644929, "learning_rate": 4.983579217447082e-06, "loss": 0.8628, "step": 4360 }, { "epoch": 0.05319732368103543, "grad_norm": 2.7135289849333835, "learning_rate": 4.983258499037845e-06, "loss": 0.8183, "step": 4365 }, { "epoch": 0.053258259905183236, "grad_norm": 4.39612587776408, "learning_rate": 4.982937780628608e-06, "loss": 0.9092, "step": 4370 }, { "epoch": 0.053319196129331045, "grad_norm": 3.184699919153222, "learning_rate": 4.982617062219372e-06, "loss": 0.9128, "step": 4375 }, { "epoch": 0.05338013235347885, "grad_norm": 2.941763587658692, "learning_rate": 4.982296343810135e-06, "loss": 0.8254, "step": 4380 }, { "epoch": 0.053441068577626656, "grad_norm": 3.1432106333282466, "learning_rate": 4.981975625400898e-06, "loss": 0.8407, "step": 4385 }, { "epoch": 0.053502004801774465, "grad_norm": 3.07316802154918, "learning_rate": 4.981654906991662e-06, "loss": 0.8941, "step": 4390 }, { "epoch": 0.05356294102592227, "grad_norm": 2.608337858104819, "learning_rate": 4.981334188582425e-06, "loss": 0.8594, "step": 4395 }, { "epoch": 0.053623877250070076, "grad_norm": 3.1333915948137863, "learning_rate": 4.981013470173188e-06, "loss": 0.9285, "step": 4400 }, { "epoch": 0.053684813474217885, "grad_norm": 3.324846047829983, "learning_rate": 4.980692751763952e-06, "loss": 0.8526, "step": 4405 }, { "epoch": 0.05374574969836569, "grad_norm": 2.407937411518678, "learning_rate": 4.980372033354715e-06, "loss": 0.938, "step": 4410 }, { "epoch": 0.053806685922513496, "grad_norm": 3.203305908951927, "learning_rate": 4.980051314945478e-06, "loss": 0.9176, "step": 4415 }, { "epoch": 0.053867622146661305, "grad_norm": 2.7185718937211854, "learning_rate": 4.979730596536242e-06, "loss": 0.8782, "step": 4420 }, { "epoch": 0.053928558370809114, "grad_norm": 3.5169560219348996, "learning_rate": 4.979409878127005e-06, "loss": 0.9088, "step": 4425 }, { "epoch": 0.053989494594956916, "grad_norm": 2.432148808387339, "learning_rate": 4.979089159717769e-06, "loss": 0.8476, "step": 4430 }, { "epoch": 0.054050430819104725, "grad_norm": 2.295984550525731, "learning_rate": 4.978768441308532e-06, "loss": 0.7991, "step": 4435 }, { "epoch": 0.054111367043252534, "grad_norm": 2.8335664675482204, "learning_rate": 4.978447722899295e-06, "loss": 0.792, "step": 4440 }, { "epoch": 0.054172303267400336, "grad_norm": 3.10779674680694, "learning_rate": 4.9781270044900585e-06, "loss": 0.8043, "step": 4445 }, { "epoch": 0.054233239491548145, "grad_norm": 2.585635676658635, "learning_rate": 4.9778062860808215e-06, "loss": 0.8433, "step": 4450 }, { "epoch": 0.054294175715695954, "grad_norm": 3.7452102005258565, "learning_rate": 4.977485567671585e-06, "loss": 0.8111, "step": 4455 }, { "epoch": 0.05435511193984376, "grad_norm": 2.4083060744269456, "learning_rate": 4.977164849262348e-06, "loss": 0.906, "step": 4460 }, { "epoch": 0.054416048163991565, "grad_norm": 2.9670331606881657, "learning_rate": 4.976844130853111e-06, "loss": 0.9638, "step": 4465 }, { "epoch": 0.054476984388139374, "grad_norm": 3.964815813585893, "learning_rate": 4.976523412443875e-06, "loss": 0.9054, "step": 4470 }, { "epoch": 0.05453792061228718, "grad_norm": 3.124022059530714, "learning_rate": 4.976202694034638e-06, "loss": 0.9114, "step": 4475 }, { "epoch": 0.054598856836434985, "grad_norm": 2.8360787221654986, "learning_rate": 4.975881975625401e-06, "loss": 0.8429, "step": 4480 }, { "epoch": 0.054659793060582794, "grad_norm": 2.890689567357029, "learning_rate": 4.975561257216164e-06, "loss": 0.9078, "step": 4485 }, { "epoch": 0.0547207292847306, "grad_norm": 3.113642286287127, "learning_rate": 4.975240538806928e-06, "loss": 0.8946, "step": 4490 }, { "epoch": 0.054781665508878405, "grad_norm": 2.6049619410244738, "learning_rate": 4.974919820397691e-06, "loss": 0.8871, "step": 4495 }, { "epoch": 0.054842601733026214, "grad_norm": 2.4299046567035463, "learning_rate": 4.974599101988454e-06, "loss": 0.8752, "step": 4500 }, { "epoch": 0.05490353795717402, "grad_norm": 2.4173067462136024, "learning_rate": 4.974278383579218e-06, "loss": 0.8386, "step": 4505 }, { "epoch": 0.05496447418132183, "grad_norm": 2.5806647843516832, "learning_rate": 4.973957665169981e-06, "loss": 0.91, "step": 4510 }, { "epoch": 0.055025410405469634, "grad_norm": 3.7301419106538987, "learning_rate": 4.973636946760744e-06, "loss": 0.9427, "step": 4515 }, { "epoch": 0.05508634662961744, "grad_norm": 2.3818846398501106, "learning_rate": 4.973316228351508e-06, "loss": 0.8114, "step": 4520 }, { "epoch": 0.05514728285376525, "grad_norm": 3.3052054668584656, "learning_rate": 4.972995509942271e-06, "loss": 0.8578, "step": 4525 }, { "epoch": 0.055208219077913054, "grad_norm": 2.931176299211362, "learning_rate": 4.972674791533034e-06, "loss": 0.9151, "step": 4530 }, { "epoch": 0.05526915530206086, "grad_norm": 2.4552299970514957, "learning_rate": 4.972354073123798e-06, "loss": 0.9042, "step": 4535 }, { "epoch": 0.05533009152620867, "grad_norm": 2.5695398444606736, "learning_rate": 4.972033354714561e-06, "loss": 0.9068, "step": 4540 }, { "epoch": 0.055391027750356474, "grad_norm": 3.313814463750054, "learning_rate": 4.971712636305324e-06, "loss": 0.8775, "step": 4545 }, { "epoch": 0.05545196397450428, "grad_norm": 4.167144233595884, "learning_rate": 4.971391917896088e-06, "loss": 0.8563, "step": 4550 }, { "epoch": 0.05551290019865209, "grad_norm": 2.97828085167141, "learning_rate": 4.971071199486851e-06, "loss": 0.8667, "step": 4555 }, { "epoch": 0.0555738364227999, "grad_norm": 2.9026304792166857, "learning_rate": 4.970750481077614e-06, "loss": 0.8885, "step": 4560 }, { "epoch": 0.0556347726469477, "grad_norm": 2.4506312448704013, "learning_rate": 4.970429762668378e-06, "loss": 0.9199, "step": 4565 }, { "epoch": 0.05569570887109551, "grad_norm": 3.8117460508014185, "learning_rate": 4.970109044259141e-06, "loss": 0.9272, "step": 4570 }, { "epoch": 0.05575664509524332, "grad_norm": 4.7810697674246905, "learning_rate": 4.969788325849904e-06, "loss": 0.8483, "step": 4575 }, { "epoch": 0.05581758131939112, "grad_norm": 2.550109462257968, "learning_rate": 4.9694676074406676e-06, "loss": 0.8829, "step": 4580 }, { "epoch": 0.05587851754353893, "grad_norm": 2.7076452246763942, "learning_rate": 4.9691468890314306e-06, "loss": 0.8846, "step": 4585 }, { "epoch": 0.05593945376768674, "grad_norm": 2.878859136021422, "learning_rate": 4.9688261706221944e-06, "loss": 0.9469, "step": 4590 }, { "epoch": 0.05600038999183454, "grad_norm": 2.6268912791649437, "learning_rate": 4.9685054522129575e-06, "loss": 0.8916, "step": 4595 }, { "epoch": 0.05606132621598235, "grad_norm": 2.7342718206976975, "learning_rate": 4.968184733803721e-06, "loss": 0.9278, "step": 4600 }, { "epoch": 0.05612226244013016, "grad_norm": 3.534728077041669, "learning_rate": 4.967864015394484e-06, "loss": 0.8552, "step": 4605 }, { "epoch": 0.05618319866427797, "grad_norm": 2.650532467316838, "learning_rate": 4.967543296985247e-06, "loss": 0.8972, "step": 4610 }, { "epoch": 0.05624413488842577, "grad_norm": 3.401368471659767, "learning_rate": 4.967222578576011e-06, "loss": 0.8851, "step": 4615 }, { "epoch": 0.05630507111257358, "grad_norm": 4.100484763642169, "learning_rate": 4.966901860166774e-06, "loss": 0.8894, "step": 4620 }, { "epoch": 0.05636600733672139, "grad_norm": 2.7626559460395486, "learning_rate": 4.966581141757537e-06, "loss": 0.8434, "step": 4625 }, { "epoch": 0.05642694356086919, "grad_norm": 3.060546575351353, "learning_rate": 4.966260423348301e-06, "loss": 0.8547, "step": 4630 }, { "epoch": 0.056487879785017, "grad_norm": 2.5360267241877676, "learning_rate": 4.965939704939064e-06, "loss": 0.8512, "step": 4635 }, { "epoch": 0.05654881600916481, "grad_norm": 4.1358632000016025, "learning_rate": 4.965618986529827e-06, "loss": 0.834, "step": 4640 }, { "epoch": 0.05660975223331261, "grad_norm": 3.605781143849183, "learning_rate": 4.965298268120591e-06, "loss": 0.9284, "step": 4645 }, { "epoch": 0.05667068845746042, "grad_norm": 2.8064164484594287, "learning_rate": 4.964977549711354e-06, "loss": 0.8968, "step": 4650 }, { "epoch": 0.05673162468160823, "grad_norm": 2.7947879067639985, "learning_rate": 4.964656831302117e-06, "loss": 0.8633, "step": 4655 }, { "epoch": 0.05679256090575604, "grad_norm": 2.854630734175736, "learning_rate": 4.96433611289288e-06, "loss": 0.8972, "step": 4660 }, { "epoch": 0.05685349712990384, "grad_norm": 3.115642997479381, "learning_rate": 4.964015394483644e-06, "loss": 0.802, "step": 4665 }, { "epoch": 0.05691443335405165, "grad_norm": 2.770164790499687, "learning_rate": 4.963694676074407e-06, "loss": 0.8463, "step": 4670 }, { "epoch": 0.05697536957819946, "grad_norm": 2.653317023470389, "learning_rate": 4.96337395766517e-06, "loss": 0.9141, "step": 4675 }, { "epoch": 0.05703630580234726, "grad_norm": 3.1049574477257775, "learning_rate": 4.963053239255934e-06, "loss": 0.8683, "step": 4680 }, { "epoch": 0.05709724202649507, "grad_norm": 2.9333262530998407, "learning_rate": 4.962732520846697e-06, "loss": 0.8594, "step": 4685 }, { "epoch": 0.05715817825064288, "grad_norm": 2.7502671557607967, "learning_rate": 4.96241180243746e-06, "loss": 0.7997, "step": 4690 }, { "epoch": 0.05721911447479069, "grad_norm": 2.8209367215015884, "learning_rate": 4.962091084028224e-06, "loss": 0.8693, "step": 4695 }, { "epoch": 0.05728005069893849, "grad_norm": 2.618392556499458, "learning_rate": 4.961770365618987e-06, "loss": 0.9362, "step": 4700 }, { "epoch": 0.0573409869230863, "grad_norm": 2.5156284565366906, "learning_rate": 4.96144964720975e-06, "loss": 0.9132, "step": 4705 }, { "epoch": 0.05740192314723411, "grad_norm": 3.3386452589433158, "learning_rate": 4.961128928800514e-06, "loss": 0.9003, "step": 4710 }, { "epoch": 0.05746285937138191, "grad_norm": 2.3210742493812226, "learning_rate": 4.960808210391277e-06, "loss": 0.845, "step": 4715 }, { "epoch": 0.05752379559552972, "grad_norm": 2.9920132533725723, "learning_rate": 4.96048749198204e-06, "loss": 0.877, "step": 4720 }, { "epoch": 0.05758473181967753, "grad_norm": 2.8460931157406892, "learning_rate": 4.9601667735728035e-06, "loss": 0.8664, "step": 4725 }, { "epoch": 0.05764566804382533, "grad_norm": 2.5469722170570477, "learning_rate": 4.9598460551635665e-06, "loss": 0.8663, "step": 4730 }, { "epoch": 0.05770660426797314, "grad_norm": 2.6111854272913595, "learning_rate": 4.95952533675433e-06, "loss": 0.9751, "step": 4735 }, { "epoch": 0.05776754049212095, "grad_norm": 2.38639065990386, "learning_rate": 4.959204618345093e-06, "loss": 0.864, "step": 4740 }, { "epoch": 0.057828476716268756, "grad_norm": 2.4736679780763406, "learning_rate": 4.9588838999358564e-06, "loss": 0.8885, "step": 4745 }, { "epoch": 0.05788941294041656, "grad_norm": 2.4147212526512614, "learning_rate": 4.95856318152662e-06, "loss": 0.9183, "step": 4750 }, { "epoch": 0.05795034916456437, "grad_norm": 2.6080309955140915, "learning_rate": 4.958242463117383e-06, "loss": 0.8471, "step": 4755 }, { "epoch": 0.058011285388712176, "grad_norm": 2.7240986502174556, "learning_rate": 4.957921744708147e-06, "loss": 0.901, "step": 4760 }, { "epoch": 0.05807222161285998, "grad_norm": 3.6752033408878875, "learning_rate": 4.95760102629891e-06, "loss": 0.8926, "step": 4765 }, { "epoch": 0.05813315783700779, "grad_norm": 3.2841768282489636, "learning_rate": 4.957280307889673e-06, "loss": 0.9291, "step": 4770 }, { "epoch": 0.058194094061155596, "grad_norm": 3.1384311235058133, "learning_rate": 4.956959589480437e-06, "loss": 0.7782, "step": 4775 }, { "epoch": 0.0582550302853034, "grad_norm": 2.48823883362724, "learning_rate": 4.9566388710712e-06, "loss": 0.7855, "step": 4780 }, { "epoch": 0.05831596650945121, "grad_norm": 2.647534012249943, "learning_rate": 4.956318152661963e-06, "loss": 0.8845, "step": 4785 }, { "epoch": 0.058376902733599016, "grad_norm": 2.4082826005880307, "learning_rate": 4.955997434252727e-06, "loss": 0.9473, "step": 4790 }, { "epoch": 0.058437838957746825, "grad_norm": 2.7274799386038726, "learning_rate": 4.95567671584349e-06, "loss": 0.9382, "step": 4795 }, { "epoch": 0.05849877518189463, "grad_norm": 2.509357743481677, "learning_rate": 4.955355997434253e-06, "loss": 0.8167, "step": 4800 }, { "epoch": 0.058559711406042436, "grad_norm": 2.723015754480411, "learning_rate": 4.955035279025017e-06, "loss": 0.8489, "step": 4805 }, { "epoch": 0.058620647630190245, "grad_norm": 2.295198152539672, "learning_rate": 4.95471456061578e-06, "loss": 0.9079, "step": 4810 }, { "epoch": 0.05868158385433805, "grad_norm": 2.424838027482766, "learning_rate": 4.954393842206543e-06, "loss": 0.8586, "step": 4815 }, { "epoch": 0.058742520078485856, "grad_norm": 2.783684034940123, "learning_rate": 4.954073123797306e-06, "loss": 0.9213, "step": 4820 }, { "epoch": 0.058803456302633665, "grad_norm": 3.2306503522899344, "learning_rate": 4.95375240538807e-06, "loss": 0.9356, "step": 4825 }, { "epoch": 0.05886439252678147, "grad_norm": 2.749660181541375, "learning_rate": 4.953431686978833e-06, "loss": 0.8761, "step": 4830 }, { "epoch": 0.058925328750929276, "grad_norm": 3.2786906290802493, "learning_rate": 4.953110968569596e-06, "loss": 0.8825, "step": 4835 }, { "epoch": 0.058986264975077085, "grad_norm": 2.63423897463124, "learning_rate": 4.95279025016036e-06, "loss": 0.911, "step": 4840 }, { "epoch": 0.059047201199224894, "grad_norm": 3.097315524020849, "learning_rate": 4.952469531751123e-06, "loss": 0.8859, "step": 4845 }, { "epoch": 0.059108137423372696, "grad_norm": 4.2202099713850085, "learning_rate": 4.952148813341886e-06, "loss": 0.9245, "step": 4850 }, { "epoch": 0.059169073647520505, "grad_norm": 2.925004949389726, "learning_rate": 4.9518280949326496e-06, "loss": 0.9102, "step": 4855 }, { "epoch": 0.059230009871668314, "grad_norm": 3.1852127477806924, "learning_rate": 4.951507376523413e-06, "loss": 0.8631, "step": 4860 }, { "epoch": 0.059290946095816116, "grad_norm": 2.2239908446459293, "learning_rate": 4.951186658114176e-06, "loss": 0.8667, "step": 4865 }, { "epoch": 0.059351882319963925, "grad_norm": 3.250238058138981, "learning_rate": 4.9508659397049395e-06, "loss": 0.8885, "step": 4870 }, { "epoch": 0.059412818544111734, "grad_norm": 2.9918788516620847, "learning_rate": 4.9505452212957025e-06, "loss": 0.9155, "step": 4875 }, { "epoch": 0.05947375476825954, "grad_norm": 2.4543927232341254, "learning_rate": 4.950224502886466e-06, "loss": 0.9127, "step": 4880 }, { "epoch": 0.059534690992407345, "grad_norm": 3.2880463776316557, "learning_rate": 4.949903784477229e-06, "loss": 0.927, "step": 4885 }, { "epoch": 0.059595627216555154, "grad_norm": 4.057709961964419, "learning_rate": 4.949583066067992e-06, "loss": 0.844, "step": 4890 }, { "epoch": 0.05965656344070296, "grad_norm": 2.4559693183235103, "learning_rate": 4.949262347658756e-06, "loss": 0.8124, "step": 4895 }, { "epoch": 0.059717499664850765, "grad_norm": 2.769137378934702, "learning_rate": 4.948941629249519e-06, "loss": 0.917, "step": 4900 }, { "epoch": 0.059778435888998574, "grad_norm": 2.8689012889082157, "learning_rate": 4.948620910840283e-06, "loss": 0.8512, "step": 4905 }, { "epoch": 0.05983937211314638, "grad_norm": 2.6929471679187955, "learning_rate": 4.948300192431046e-06, "loss": 0.9256, "step": 4910 }, { "epoch": 0.059900308337294185, "grad_norm": 3.7747485376399963, "learning_rate": 4.947979474021809e-06, "loss": 0.852, "step": 4915 }, { "epoch": 0.059961244561441994, "grad_norm": 2.8171171559791306, "learning_rate": 4.947658755612573e-06, "loss": 0.9418, "step": 4920 }, { "epoch": 0.0600221807855898, "grad_norm": 2.7846456500462744, "learning_rate": 4.947338037203336e-06, "loss": 0.8741, "step": 4925 }, { "epoch": 0.06008311700973761, "grad_norm": 3.0905037956013546, "learning_rate": 4.947017318794099e-06, "loss": 0.8975, "step": 4930 }, { "epoch": 0.060144053233885414, "grad_norm": 2.4870294117207283, "learning_rate": 4.946696600384863e-06, "loss": 0.8574, "step": 4935 }, { "epoch": 0.06020498945803322, "grad_norm": 2.787968848405539, "learning_rate": 4.946375881975626e-06, "loss": 0.8705, "step": 4940 }, { "epoch": 0.06026592568218103, "grad_norm": 2.939278330251377, "learning_rate": 4.946055163566389e-06, "loss": 0.8193, "step": 4945 }, { "epoch": 0.060326861906328834, "grad_norm": 3.026172026782189, "learning_rate": 4.945734445157153e-06, "loss": 0.9282, "step": 4950 }, { "epoch": 0.06038779813047664, "grad_norm": 2.8768667166101394, "learning_rate": 4.945413726747916e-06, "loss": 0.868, "step": 4955 }, { "epoch": 0.06044873435462445, "grad_norm": 3.047512546302642, "learning_rate": 4.945093008338679e-06, "loss": 0.9056, "step": 4960 }, { "epoch": 0.060509670578772254, "grad_norm": 2.3101904215700344, "learning_rate": 4.944772289929443e-06, "loss": 0.8598, "step": 4965 }, { "epoch": 0.06057060680292006, "grad_norm": 3.153531262311097, "learning_rate": 4.944451571520206e-06, "loss": 0.8688, "step": 4970 }, { "epoch": 0.06063154302706787, "grad_norm": 2.7762730754010354, "learning_rate": 4.944130853110969e-06, "loss": 0.9065, "step": 4975 }, { "epoch": 0.06069247925121568, "grad_norm": 2.305907862522277, "learning_rate": 4.943810134701733e-06, "loss": 0.8107, "step": 4980 }, { "epoch": 0.06075341547536348, "grad_norm": 2.6723473092504237, "learning_rate": 4.943489416292496e-06, "loss": 0.8561, "step": 4985 }, { "epoch": 0.06081435169951129, "grad_norm": 3.0594966093032983, "learning_rate": 4.943168697883259e-06, "loss": 0.8718, "step": 4990 }, { "epoch": 0.0608752879236591, "grad_norm": 2.351754363382407, "learning_rate": 4.942847979474022e-06, "loss": 0.8138, "step": 4995 }, { "epoch": 0.0609362241478069, "grad_norm": 2.5231941438693966, "learning_rate": 4.9425272610647855e-06, "loss": 0.9229, "step": 5000 }, { "epoch": 0.06099716037195471, "grad_norm": 2.559436511222845, "learning_rate": 4.9422065426555485e-06, "loss": 0.8055, "step": 5005 }, { "epoch": 0.06105809659610252, "grad_norm": 2.7618133164904277, "learning_rate": 4.9418858242463116e-06, "loss": 0.9639, "step": 5010 }, { "epoch": 0.06111903282025032, "grad_norm": 2.3088341970755084, "learning_rate": 4.9415651058370754e-06, "loss": 0.8822, "step": 5015 }, { "epoch": 0.06117996904439813, "grad_norm": 3.5586919483158654, "learning_rate": 4.9412443874278384e-06, "loss": 0.8775, "step": 5020 }, { "epoch": 0.06124090526854594, "grad_norm": 2.761993226629295, "learning_rate": 4.9409236690186015e-06, "loss": 0.7963, "step": 5025 }, { "epoch": 0.06130184149269375, "grad_norm": 2.3115251981013705, "learning_rate": 4.940602950609365e-06, "loss": 0.835, "step": 5030 }, { "epoch": 0.06136277771684155, "grad_norm": 2.9027240831551873, "learning_rate": 4.940282232200128e-06, "loss": 0.7849, "step": 5035 }, { "epoch": 0.06142371394098936, "grad_norm": 2.5259889726245532, "learning_rate": 4.939961513790892e-06, "loss": 0.9444, "step": 5040 }, { "epoch": 0.06148465016513717, "grad_norm": 4.038501777116391, "learning_rate": 4.939640795381655e-06, "loss": 0.8371, "step": 5045 }, { "epoch": 0.06154558638928497, "grad_norm": 3.80452365652981, "learning_rate": 4.939320076972419e-06, "loss": 1.0111, "step": 5050 }, { "epoch": 0.06160652261343278, "grad_norm": 2.870216934131181, "learning_rate": 4.938999358563182e-06, "loss": 0.824, "step": 5055 }, { "epoch": 0.06166745883758059, "grad_norm": 2.6880713218291907, "learning_rate": 4.938678640153945e-06, "loss": 0.8482, "step": 5060 }, { "epoch": 0.06172839506172839, "grad_norm": 2.461804571098925, "learning_rate": 4.938357921744709e-06, "loss": 0.8732, "step": 5065 }, { "epoch": 0.0617893312858762, "grad_norm": 2.271420974697857, "learning_rate": 4.938037203335472e-06, "loss": 0.846, "step": 5070 }, { "epoch": 0.06185026751002401, "grad_norm": 3.164639917872496, "learning_rate": 4.937716484926235e-06, "loss": 0.8329, "step": 5075 }, { "epoch": 0.06191120373417182, "grad_norm": 3.6798324507835383, "learning_rate": 4.937395766516999e-06, "loss": 0.9056, "step": 5080 }, { "epoch": 0.06197213995831962, "grad_norm": 3.3669812475106062, "learning_rate": 4.937075048107762e-06, "loss": 0.9238, "step": 5085 }, { "epoch": 0.06203307618246743, "grad_norm": 2.454276280050244, "learning_rate": 4.936754329698525e-06, "loss": 0.9199, "step": 5090 }, { "epoch": 0.06209401240661524, "grad_norm": 3.1611380596895247, "learning_rate": 4.936433611289289e-06, "loss": 0.8863, "step": 5095 }, { "epoch": 0.06215494863076304, "grad_norm": 2.518340101892241, "learning_rate": 4.936112892880052e-06, "loss": 0.8707, "step": 5100 }, { "epoch": 0.06221588485491085, "grad_norm": 3.3354214822161823, "learning_rate": 4.935792174470815e-06, "loss": 0.8414, "step": 5105 }, { "epoch": 0.06227682107905866, "grad_norm": 3.925366707818128, "learning_rate": 4.935471456061579e-06, "loss": 0.8992, "step": 5110 }, { "epoch": 0.06233775730320647, "grad_norm": 2.5394558246595516, "learning_rate": 4.935150737652342e-06, "loss": 0.8956, "step": 5115 }, { "epoch": 0.06239869352735427, "grad_norm": 2.3530701453669938, "learning_rate": 4.934830019243105e-06, "loss": 0.9385, "step": 5120 }, { "epoch": 0.06245962975150208, "grad_norm": 2.0534909579558995, "learning_rate": 4.9345093008338686e-06, "loss": 0.8412, "step": 5125 }, { "epoch": 0.06252056597564988, "grad_norm": 2.560324518378106, "learning_rate": 4.934188582424632e-06, "loss": 0.7971, "step": 5130 }, { "epoch": 0.06258150219979769, "grad_norm": 2.6083659690813947, "learning_rate": 4.933867864015395e-06, "loss": 0.8767, "step": 5135 }, { "epoch": 0.0626424384239455, "grad_norm": 2.7700592478461052, "learning_rate": 4.9335471456061585e-06, "loss": 0.9032, "step": 5140 }, { "epoch": 0.06270337464809331, "grad_norm": 2.9374964577208313, "learning_rate": 4.9332264271969215e-06, "loss": 0.9026, "step": 5145 }, { "epoch": 0.06276431087224112, "grad_norm": 2.711262322407839, "learning_rate": 4.9329057087876845e-06, "loss": 0.8985, "step": 5150 }, { "epoch": 0.06282524709638893, "grad_norm": 3.7666866416596387, "learning_rate": 4.932584990378448e-06, "loss": 0.9277, "step": 5155 }, { "epoch": 0.06288618332053672, "grad_norm": 2.2972866989469383, "learning_rate": 4.932264271969211e-06, "loss": 0.8771, "step": 5160 }, { "epoch": 0.06294711954468453, "grad_norm": 2.939692716977551, "learning_rate": 4.931943553559974e-06, "loss": 0.8321, "step": 5165 }, { "epoch": 0.06300805576883234, "grad_norm": 2.8905623819083863, "learning_rate": 4.931622835150737e-06, "loss": 0.8191, "step": 5170 }, { "epoch": 0.06306899199298015, "grad_norm": 2.729238648713164, "learning_rate": 4.931302116741501e-06, "loss": 0.9016, "step": 5175 }, { "epoch": 0.06312992821712796, "grad_norm": 4.770633891832591, "learning_rate": 4.930981398332264e-06, "loss": 0.9237, "step": 5180 }, { "epoch": 0.06319086444127577, "grad_norm": 2.4557264603208075, "learning_rate": 4.930660679923028e-06, "loss": 0.8291, "step": 5185 }, { "epoch": 0.06325180066542357, "grad_norm": 2.6135461186216298, "learning_rate": 4.930339961513791e-06, "loss": 0.8984, "step": 5190 }, { "epoch": 0.06331273688957137, "grad_norm": 2.932450539755879, "learning_rate": 4.930019243104554e-06, "loss": 0.9104, "step": 5195 }, { "epoch": 0.06337367311371918, "grad_norm": 2.556583705255972, "learning_rate": 4.929698524695318e-06, "loss": 0.8834, "step": 5200 }, { "epoch": 0.06343460933786699, "grad_norm": 2.510758348334057, "learning_rate": 4.929377806286081e-06, "loss": 0.8578, "step": 5205 }, { "epoch": 0.0634955455620148, "grad_norm": 2.3888468120339197, "learning_rate": 4.929057087876845e-06, "loss": 0.9296, "step": 5210 }, { "epoch": 0.0635564817861626, "grad_norm": 3.0105516989370082, "learning_rate": 4.928736369467608e-06, "loss": 0.925, "step": 5215 }, { "epoch": 0.06361741801031041, "grad_norm": 2.6710691478950244, "learning_rate": 4.928415651058371e-06, "loss": 0.7645, "step": 5220 }, { "epoch": 0.06367835423445821, "grad_norm": 2.697029584324094, "learning_rate": 4.928094932649135e-06, "loss": 0.7771, "step": 5225 }, { "epoch": 0.06373929045860602, "grad_norm": 3.192647797597726, "learning_rate": 4.927774214239898e-06, "loss": 0.9055, "step": 5230 }, { "epoch": 0.06380022668275383, "grad_norm": 3.387699228819277, "learning_rate": 4.927453495830662e-06, "loss": 0.9406, "step": 5235 }, { "epoch": 0.06386116290690164, "grad_norm": 2.3364654820567967, "learning_rate": 4.927132777421425e-06, "loss": 0.9026, "step": 5240 }, { "epoch": 0.06392209913104945, "grad_norm": 2.884869111703136, "learning_rate": 4.926812059012188e-06, "loss": 0.8918, "step": 5245 }, { "epoch": 0.06398303535519725, "grad_norm": 2.730622848836569, "learning_rate": 4.926491340602951e-06, "loss": 0.8564, "step": 5250 }, { "epoch": 0.06404397157934506, "grad_norm": 2.581194565902162, "learning_rate": 4.926170622193715e-06, "loss": 0.8525, "step": 5255 }, { "epoch": 0.06410490780349286, "grad_norm": 3.0621559435738384, "learning_rate": 4.925849903784478e-06, "loss": 0.939, "step": 5260 }, { "epoch": 0.06416584402764067, "grad_norm": 3.029328145330221, "learning_rate": 4.925529185375241e-06, "loss": 0.8965, "step": 5265 }, { "epoch": 0.06422678025178848, "grad_norm": 2.728778367372791, "learning_rate": 4.9252084669660045e-06, "loss": 0.8954, "step": 5270 }, { "epoch": 0.06428771647593629, "grad_norm": 2.9626951909019175, "learning_rate": 4.9248877485567675e-06, "loss": 0.9422, "step": 5275 }, { "epoch": 0.0643486527000841, "grad_norm": 2.7954700468229468, "learning_rate": 4.9245670301475306e-06, "loss": 0.9074, "step": 5280 }, { "epoch": 0.0644095889242319, "grad_norm": 2.4395538874520017, "learning_rate": 4.9242463117382944e-06, "loss": 0.9062, "step": 5285 }, { "epoch": 0.06447052514837971, "grad_norm": 3.375536555263763, "learning_rate": 4.9239255933290574e-06, "loss": 0.8772, "step": 5290 }, { "epoch": 0.06453146137252751, "grad_norm": 2.334595173238215, "learning_rate": 4.9236048749198205e-06, "loss": 0.8395, "step": 5295 }, { "epoch": 0.06459239759667532, "grad_norm": 3.029195737102442, "learning_rate": 4.923284156510584e-06, "loss": 0.8501, "step": 5300 }, { "epoch": 0.06465333382082313, "grad_norm": 4.759185020514222, "learning_rate": 4.922963438101347e-06, "loss": 0.8266, "step": 5305 }, { "epoch": 0.06471427004497093, "grad_norm": 2.947483019596026, "learning_rate": 4.92264271969211e-06, "loss": 0.8742, "step": 5310 }, { "epoch": 0.06477520626911874, "grad_norm": 2.4950087832816554, "learning_rate": 4.922322001282874e-06, "loss": 0.8939, "step": 5315 }, { "epoch": 0.06483614249326655, "grad_norm": 2.740045071490078, "learning_rate": 4.922001282873637e-06, "loss": 0.897, "step": 5320 }, { "epoch": 0.06489707871741436, "grad_norm": 3.7397030776871407, "learning_rate": 4.9216805644644e-06, "loss": 0.7898, "step": 5325 }, { "epoch": 0.06495801494156216, "grad_norm": 3.057691349951453, "learning_rate": 4.921359846055164e-06, "loss": 0.8345, "step": 5330 }, { "epoch": 0.06501895116570997, "grad_norm": 2.616419577027526, "learning_rate": 4.921039127645927e-06, "loss": 0.8653, "step": 5335 }, { "epoch": 0.06507988738985777, "grad_norm": 5.275771974871898, "learning_rate": 4.92071840923669e-06, "loss": 0.8809, "step": 5340 }, { "epoch": 0.06514082361400558, "grad_norm": 2.6874027587263867, "learning_rate": 4.920397690827454e-06, "loss": 0.8771, "step": 5345 }, { "epoch": 0.06520175983815339, "grad_norm": 2.4242991703539887, "learning_rate": 4.920076972418217e-06, "loss": 0.875, "step": 5350 }, { "epoch": 0.0652626960623012, "grad_norm": 2.4925231500684384, "learning_rate": 4.919756254008981e-06, "loss": 0.8473, "step": 5355 }, { "epoch": 0.065323632286449, "grad_norm": 2.5280515867022566, "learning_rate": 4.919435535599744e-06, "loss": 0.8596, "step": 5360 }, { "epoch": 0.0653845685105968, "grad_norm": 2.44651266447618, "learning_rate": 4.919114817190507e-06, "loss": 0.8594, "step": 5365 }, { "epoch": 0.06544550473474461, "grad_norm": 2.8148175934937725, "learning_rate": 4.918794098781271e-06, "loss": 0.9227, "step": 5370 }, { "epoch": 0.06550644095889242, "grad_norm": 2.704371595925388, "learning_rate": 4.918473380372034e-06, "loss": 0.8649, "step": 5375 }, { "epoch": 0.06556737718304023, "grad_norm": 2.856644535214019, "learning_rate": 4.918152661962798e-06, "loss": 0.9401, "step": 5380 }, { "epoch": 0.06562831340718804, "grad_norm": 3.429917639807311, "learning_rate": 4.917831943553561e-06, "loss": 0.9345, "step": 5385 }, { "epoch": 0.06568924963133585, "grad_norm": 2.8612849787055863, "learning_rate": 4.917511225144324e-06, "loss": 0.8742, "step": 5390 }, { "epoch": 0.06575018585548364, "grad_norm": 2.2847018906313354, "learning_rate": 4.9171905067350876e-06, "loss": 0.8503, "step": 5395 }, { "epoch": 0.06581112207963145, "grad_norm": 3.1319930425786766, "learning_rate": 4.916869788325851e-06, "loss": 0.8407, "step": 5400 }, { "epoch": 0.06587205830377926, "grad_norm": 2.6408547699342853, "learning_rate": 4.916549069916614e-06, "loss": 0.881, "step": 5405 }, { "epoch": 0.06593299452792707, "grad_norm": 3.466066922460106, "learning_rate": 4.916228351507377e-06, "loss": 0.827, "step": 5410 }, { "epoch": 0.06599393075207488, "grad_norm": 2.3533347459576452, "learning_rate": 4.9159076330981405e-06, "loss": 0.8199, "step": 5415 }, { "epoch": 0.06605486697622269, "grad_norm": 3.606721340896255, "learning_rate": 4.9155869146889035e-06, "loss": 0.8954, "step": 5420 }, { "epoch": 0.0661158032003705, "grad_norm": 3.140114000110389, "learning_rate": 4.9152661962796665e-06, "loss": 0.895, "step": 5425 }, { "epoch": 0.0661767394245183, "grad_norm": 3.066893475051485, "learning_rate": 4.91494547787043e-06, "loss": 0.9657, "step": 5430 }, { "epoch": 0.0662376756486661, "grad_norm": 2.5785416800724263, "learning_rate": 4.914624759461193e-06, "loss": 0.8724, "step": 5435 }, { "epoch": 0.06629861187281391, "grad_norm": 2.4824411767959655, "learning_rate": 4.914304041051956e-06, "loss": 0.7594, "step": 5440 }, { "epoch": 0.06635954809696172, "grad_norm": 2.734991266022788, "learning_rate": 4.91398332264272e-06, "loss": 0.8934, "step": 5445 }, { "epoch": 0.06642048432110953, "grad_norm": 2.9177273061196822, "learning_rate": 4.913662604233483e-06, "loss": 0.8772, "step": 5450 }, { "epoch": 0.06648142054525734, "grad_norm": 3.332351609726888, "learning_rate": 4.913341885824246e-06, "loss": 0.9116, "step": 5455 }, { "epoch": 0.06654235676940513, "grad_norm": 2.8218884592134263, "learning_rate": 4.91302116741501e-06, "loss": 0.8093, "step": 5460 }, { "epoch": 0.06660329299355294, "grad_norm": 2.770364249762085, "learning_rate": 4.912700449005773e-06, "loss": 0.8186, "step": 5465 }, { "epoch": 0.06666422921770075, "grad_norm": 2.226147116261329, "learning_rate": 4.912379730596536e-06, "loss": 0.8847, "step": 5470 }, { "epoch": 0.06672516544184856, "grad_norm": 3.0330884122329262, "learning_rate": 4.9120590121873e-06, "loss": 0.7338, "step": 5475 }, { "epoch": 0.06678610166599637, "grad_norm": 2.772029504065196, "learning_rate": 4.911738293778063e-06, "loss": 0.8404, "step": 5480 }, { "epoch": 0.06684703789014418, "grad_norm": 3.1199654348869705, "learning_rate": 4.911417575368826e-06, "loss": 0.8567, "step": 5485 }, { "epoch": 0.06690797411429199, "grad_norm": 2.8331843523839444, "learning_rate": 4.91109685695959e-06, "loss": 0.8385, "step": 5490 }, { "epoch": 0.06696891033843978, "grad_norm": 2.4476785869069158, "learning_rate": 4.910776138550353e-06, "loss": 0.8086, "step": 5495 }, { "epoch": 0.06702984656258759, "grad_norm": 2.6504431212850794, "learning_rate": 4.910455420141116e-06, "loss": 0.7861, "step": 5500 }, { "epoch": 0.0670907827867354, "grad_norm": 2.605466597996976, "learning_rate": 4.91013470173188e-06, "loss": 0.8737, "step": 5505 }, { "epoch": 0.06715171901088321, "grad_norm": 2.645674851909664, "learning_rate": 4.909813983322643e-06, "loss": 0.861, "step": 5510 }, { "epoch": 0.06721265523503102, "grad_norm": 3.059189941921467, "learning_rate": 4.909493264913407e-06, "loss": 0.8553, "step": 5515 }, { "epoch": 0.06727359145917883, "grad_norm": 2.524239416482313, "learning_rate": 4.90917254650417e-06, "loss": 0.9296, "step": 5520 }, { "epoch": 0.06733452768332664, "grad_norm": 2.589690946803285, "learning_rate": 4.908851828094934e-06, "loss": 0.8691, "step": 5525 }, { "epoch": 0.06739546390747443, "grad_norm": 2.3041724020775325, "learning_rate": 4.908531109685697e-06, "loss": 0.8892, "step": 5530 }, { "epoch": 0.06745640013162224, "grad_norm": 2.4578223648798647, "learning_rate": 4.90821039127646e-06, "loss": 0.845, "step": 5535 }, { "epoch": 0.06751733635577005, "grad_norm": 2.3621552032548063, "learning_rate": 4.9078896728672235e-06, "loss": 0.9407, "step": 5540 }, { "epoch": 0.06757827257991786, "grad_norm": 4.283218457038622, "learning_rate": 4.9075689544579865e-06, "loss": 0.841, "step": 5545 }, { "epoch": 0.06763920880406567, "grad_norm": 5.260979393234803, "learning_rate": 4.9072482360487496e-06, "loss": 0.8952, "step": 5550 }, { "epoch": 0.06770014502821348, "grad_norm": 2.7315323589165086, "learning_rate": 4.9069275176395134e-06, "loss": 0.8564, "step": 5555 }, { "epoch": 0.06776108125236129, "grad_norm": 2.6382068496050697, "learning_rate": 4.9066067992302764e-06, "loss": 0.831, "step": 5560 }, { "epoch": 0.06782201747650908, "grad_norm": 2.444343438602761, "learning_rate": 4.9062860808210395e-06, "loss": 0.8881, "step": 5565 }, { "epoch": 0.06788295370065689, "grad_norm": 2.7791128188058205, "learning_rate": 4.905965362411803e-06, "loss": 0.9381, "step": 5570 }, { "epoch": 0.0679438899248047, "grad_norm": 3.725546240479315, "learning_rate": 4.905644644002566e-06, "loss": 0.805, "step": 5575 }, { "epoch": 0.06800482614895251, "grad_norm": 2.719796391357469, "learning_rate": 4.905323925593329e-06, "loss": 0.8426, "step": 5580 }, { "epoch": 0.06806576237310032, "grad_norm": 2.935873318878117, "learning_rate": 4.905003207184092e-06, "loss": 0.9528, "step": 5585 }, { "epoch": 0.06812669859724813, "grad_norm": 2.766745746039371, "learning_rate": 4.904682488774856e-06, "loss": 0.8854, "step": 5590 }, { "epoch": 0.06818763482139592, "grad_norm": 2.8883922345491184, "learning_rate": 4.904361770365619e-06, "loss": 0.8526, "step": 5595 }, { "epoch": 0.06824857104554373, "grad_norm": 3.0697281841211663, "learning_rate": 4.904041051956382e-06, "loss": 0.8007, "step": 5600 }, { "epoch": 0.06830950726969154, "grad_norm": 2.449923300684263, "learning_rate": 4.903720333547146e-06, "loss": 0.8749, "step": 5605 }, { "epoch": 0.06837044349383935, "grad_norm": 2.7118070681856357, "learning_rate": 4.903399615137909e-06, "loss": 0.8548, "step": 5610 }, { "epoch": 0.06843137971798716, "grad_norm": 2.7389504038508448, "learning_rate": 4.903078896728672e-06, "loss": 0.8164, "step": 5615 }, { "epoch": 0.06849231594213497, "grad_norm": 3.026776117702652, "learning_rate": 4.902758178319436e-06, "loss": 0.7922, "step": 5620 }, { "epoch": 0.06855325216628277, "grad_norm": 2.834959073747205, "learning_rate": 4.902437459910199e-06, "loss": 0.7646, "step": 5625 }, { "epoch": 0.06861418839043057, "grad_norm": 2.9441206431157756, "learning_rate": 4.902116741500962e-06, "loss": 0.9082, "step": 5630 }, { "epoch": 0.06867512461457838, "grad_norm": 2.6192224395930803, "learning_rate": 4.901796023091726e-06, "loss": 0.9153, "step": 5635 }, { "epoch": 0.06873606083872619, "grad_norm": 2.734009448271947, "learning_rate": 4.901475304682489e-06, "loss": 0.8456, "step": 5640 }, { "epoch": 0.068796997062874, "grad_norm": 2.529394089314517, "learning_rate": 4.901154586273252e-06, "loss": 0.8611, "step": 5645 }, { "epoch": 0.0688579332870218, "grad_norm": 4.1841020518784395, "learning_rate": 4.900833867864016e-06, "loss": 0.8389, "step": 5650 }, { "epoch": 0.06891886951116961, "grad_norm": 2.6684884605696793, "learning_rate": 4.900513149454779e-06, "loss": 0.868, "step": 5655 }, { "epoch": 0.06897980573531742, "grad_norm": 3.207561584933704, "learning_rate": 4.900192431045543e-06, "loss": 0.8642, "step": 5660 }, { "epoch": 0.06904074195946522, "grad_norm": 2.841887485585454, "learning_rate": 4.899871712636306e-06, "loss": 0.9411, "step": 5665 }, { "epoch": 0.06910167818361303, "grad_norm": 3.0278005116482114, "learning_rate": 4.899550994227069e-06, "loss": 0.804, "step": 5670 }, { "epoch": 0.06916261440776084, "grad_norm": 3.4069359467326135, "learning_rate": 4.899230275817833e-06, "loss": 0.8832, "step": 5675 }, { "epoch": 0.06922355063190865, "grad_norm": 2.808393187836781, "learning_rate": 4.898909557408596e-06, "loss": 0.8842, "step": 5680 }, { "epoch": 0.06928448685605645, "grad_norm": 2.692744184844298, "learning_rate": 4.8985888389993595e-06, "loss": 0.8773, "step": 5685 }, { "epoch": 0.06934542308020426, "grad_norm": 2.769456033551442, "learning_rate": 4.8982681205901225e-06, "loss": 0.8765, "step": 5690 }, { "epoch": 0.06940635930435206, "grad_norm": 2.312641730921403, "learning_rate": 4.8979474021808855e-06, "loss": 0.9576, "step": 5695 }, { "epoch": 0.06946729552849987, "grad_norm": 2.6061226119341505, "learning_rate": 4.897626683771649e-06, "loss": 0.9083, "step": 5700 }, { "epoch": 0.06952823175264768, "grad_norm": 2.77967357603841, "learning_rate": 4.897305965362412e-06, "loss": 0.792, "step": 5705 }, { "epoch": 0.06958916797679549, "grad_norm": 2.402023266561998, "learning_rate": 4.896985246953175e-06, "loss": 0.9002, "step": 5710 }, { "epoch": 0.0696501042009433, "grad_norm": 2.975563561506904, "learning_rate": 4.896664528543939e-06, "loss": 0.8708, "step": 5715 }, { "epoch": 0.0697110404250911, "grad_norm": 2.2262362578249992, "learning_rate": 4.896343810134702e-06, "loss": 0.8034, "step": 5720 }, { "epoch": 0.06977197664923891, "grad_norm": 4.290519616858721, "learning_rate": 4.896023091725465e-06, "loss": 0.8427, "step": 5725 }, { "epoch": 0.06983291287338671, "grad_norm": 4.040395763637728, "learning_rate": 4.895702373316229e-06, "loss": 0.8746, "step": 5730 }, { "epoch": 0.06989384909753452, "grad_norm": 2.9253388409069374, "learning_rate": 4.895381654906992e-06, "loss": 0.8704, "step": 5735 }, { "epoch": 0.06995478532168233, "grad_norm": 2.6850840025310267, "learning_rate": 4.895060936497755e-06, "loss": 0.8761, "step": 5740 }, { "epoch": 0.07001572154583013, "grad_norm": 2.2180008667382523, "learning_rate": 4.894740218088519e-06, "loss": 0.825, "step": 5745 }, { "epoch": 0.07007665776997794, "grad_norm": 2.910374078120218, "learning_rate": 4.894419499679282e-06, "loss": 0.8124, "step": 5750 }, { "epoch": 0.07013759399412575, "grad_norm": 3.362426229461176, "learning_rate": 4.894098781270045e-06, "loss": 0.8166, "step": 5755 }, { "epoch": 0.07019853021827356, "grad_norm": 3.98995302293536, "learning_rate": 4.893778062860808e-06, "loss": 0.9225, "step": 5760 }, { "epoch": 0.07025946644242136, "grad_norm": 2.5676786747961127, "learning_rate": 4.893457344451572e-06, "loss": 0.7974, "step": 5765 }, { "epoch": 0.07032040266656917, "grad_norm": 2.205299080272891, "learning_rate": 4.893136626042335e-06, "loss": 0.8437, "step": 5770 }, { "epoch": 0.07038133889071697, "grad_norm": 2.8128945818567144, "learning_rate": 4.892815907633098e-06, "loss": 0.8525, "step": 5775 }, { "epoch": 0.07044227511486478, "grad_norm": 5.105193959122268, "learning_rate": 4.892495189223862e-06, "loss": 0.8931, "step": 5780 }, { "epoch": 0.07050321133901259, "grad_norm": 2.5655419556885946, "learning_rate": 4.892174470814625e-06, "loss": 0.9204, "step": 5785 }, { "epoch": 0.0705641475631604, "grad_norm": 2.568060141997831, "learning_rate": 4.891853752405388e-06, "loss": 0.838, "step": 5790 }, { "epoch": 0.07062508378730821, "grad_norm": 2.835589948063331, "learning_rate": 4.891533033996152e-06, "loss": 0.8718, "step": 5795 }, { "epoch": 0.070686020011456, "grad_norm": 2.807426627061723, "learning_rate": 4.891212315586915e-06, "loss": 0.8476, "step": 5800 }, { "epoch": 0.07074695623560381, "grad_norm": 2.512403776844728, "learning_rate": 4.890891597177679e-06, "loss": 0.9109, "step": 5805 }, { "epoch": 0.07080789245975162, "grad_norm": 2.600430781623416, "learning_rate": 4.890570878768442e-06, "loss": 0.7937, "step": 5810 }, { "epoch": 0.07086882868389943, "grad_norm": 2.5301840938747437, "learning_rate": 4.890250160359205e-06, "loss": 0.91, "step": 5815 }, { "epoch": 0.07092976490804724, "grad_norm": 2.6534945463271575, "learning_rate": 4.8899294419499686e-06, "loss": 0.9219, "step": 5820 }, { "epoch": 0.07099070113219505, "grad_norm": 3.1223425319499922, "learning_rate": 4.889608723540732e-06, "loss": 0.8784, "step": 5825 }, { "epoch": 0.07105163735634284, "grad_norm": 2.683165015436457, "learning_rate": 4.8892880051314954e-06, "loss": 0.8694, "step": 5830 }, { "epoch": 0.07111257358049065, "grad_norm": 2.5907627607719457, "learning_rate": 4.8889672867222585e-06, "loss": 0.8625, "step": 5835 }, { "epoch": 0.07117350980463846, "grad_norm": 2.690659505847811, "learning_rate": 4.8886465683130215e-06, "loss": 0.8801, "step": 5840 }, { "epoch": 0.07123444602878627, "grad_norm": 3.0967176828335807, "learning_rate": 4.888325849903785e-06, "loss": 0.8175, "step": 5845 }, { "epoch": 0.07129538225293408, "grad_norm": 4.060671083513535, "learning_rate": 4.888005131494548e-06, "loss": 0.856, "step": 5850 }, { "epoch": 0.07135631847708189, "grad_norm": 3.1720291232549767, "learning_rate": 4.887684413085311e-06, "loss": 0.908, "step": 5855 }, { "epoch": 0.0714172547012297, "grad_norm": 2.5691797556457514, "learning_rate": 4.887363694676075e-06, "loss": 0.9072, "step": 5860 }, { "epoch": 0.0714781909253775, "grad_norm": 2.3495680602338935, "learning_rate": 4.887042976266838e-06, "loss": 0.8324, "step": 5865 }, { "epoch": 0.0715391271495253, "grad_norm": 2.959966777684798, "learning_rate": 4.886722257857601e-06, "loss": 0.8859, "step": 5870 }, { "epoch": 0.07160006337367311, "grad_norm": 2.695929933320219, "learning_rate": 4.886401539448365e-06, "loss": 0.8591, "step": 5875 }, { "epoch": 0.07166099959782092, "grad_norm": 2.4707668896023858, "learning_rate": 4.886080821039128e-06, "loss": 0.8806, "step": 5880 }, { "epoch": 0.07172193582196873, "grad_norm": 2.3557051301509877, "learning_rate": 4.885760102629891e-06, "loss": 0.8444, "step": 5885 }, { "epoch": 0.07178287204611654, "grad_norm": 2.743272292247477, "learning_rate": 4.885439384220655e-06, "loss": 0.931, "step": 5890 }, { "epoch": 0.07184380827026435, "grad_norm": 3.38694272379509, "learning_rate": 4.885118665811418e-06, "loss": 0.8369, "step": 5895 }, { "epoch": 0.07190474449441214, "grad_norm": 2.6003141253487656, "learning_rate": 4.884797947402181e-06, "loss": 0.8796, "step": 5900 }, { "epoch": 0.07196568071855995, "grad_norm": 2.58696446934877, "learning_rate": 4.884477228992945e-06, "loss": 0.8824, "step": 5905 }, { "epoch": 0.07202661694270776, "grad_norm": 2.761634727320678, "learning_rate": 4.884156510583708e-06, "loss": 0.9102, "step": 5910 }, { "epoch": 0.07208755316685557, "grad_norm": 3.661474725346025, "learning_rate": 4.883835792174471e-06, "loss": 0.7734, "step": 5915 }, { "epoch": 0.07214848939100338, "grad_norm": 2.6458712679150027, "learning_rate": 4.883515073765234e-06, "loss": 0.8305, "step": 5920 }, { "epoch": 0.07220942561515119, "grad_norm": 3.508641729021613, "learning_rate": 4.883194355355998e-06, "loss": 0.894, "step": 5925 }, { "epoch": 0.072270361839299, "grad_norm": 2.5173823319002455, "learning_rate": 4.882873636946761e-06, "loss": 0.8847, "step": 5930 }, { "epoch": 0.07233129806344679, "grad_norm": 4.374922139130649, "learning_rate": 4.882552918537524e-06, "loss": 0.779, "step": 5935 }, { "epoch": 0.0723922342875946, "grad_norm": 2.4946117825855905, "learning_rate": 4.882232200128288e-06, "loss": 0.855, "step": 5940 }, { "epoch": 0.07245317051174241, "grad_norm": 2.271997582029705, "learning_rate": 4.881911481719051e-06, "loss": 0.7486, "step": 5945 }, { "epoch": 0.07251410673589022, "grad_norm": 2.7383845041972306, "learning_rate": 4.881590763309814e-06, "loss": 0.8962, "step": 5950 }, { "epoch": 0.07257504296003803, "grad_norm": 2.5998002446492263, "learning_rate": 4.881270044900578e-06, "loss": 0.75, "step": 5955 }, { "epoch": 0.07263597918418584, "grad_norm": 3.0703111030334385, "learning_rate": 4.880949326491341e-06, "loss": 0.7998, "step": 5960 }, { "epoch": 0.07269691540833363, "grad_norm": 3.203655512658267, "learning_rate": 4.8806286080821045e-06, "loss": 0.9571, "step": 5965 }, { "epoch": 0.07275785163248144, "grad_norm": 2.526522037896951, "learning_rate": 4.8803078896728675e-06, "loss": 0.8823, "step": 5970 }, { "epoch": 0.07281878785662925, "grad_norm": 2.8308642090059193, "learning_rate": 4.8799871712636306e-06, "loss": 0.8622, "step": 5975 }, { "epoch": 0.07287972408077706, "grad_norm": 2.3027124353378143, "learning_rate": 4.879666452854394e-06, "loss": 0.7938, "step": 5980 }, { "epoch": 0.07294066030492487, "grad_norm": 4.141232979574844, "learning_rate": 4.8793457344451574e-06, "loss": 0.8928, "step": 5985 }, { "epoch": 0.07300159652907268, "grad_norm": 2.900047159908736, "learning_rate": 4.879025016035921e-06, "loss": 0.8824, "step": 5990 }, { "epoch": 0.07306253275322049, "grad_norm": 2.501432378692932, "learning_rate": 4.878704297626684e-06, "loss": 0.8598, "step": 5995 }, { "epoch": 0.07312346897736828, "grad_norm": 2.771555847788162, "learning_rate": 4.878383579217447e-06, "loss": 0.7953, "step": 6000 }, { "epoch": 0.07318440520151609, "grad_norm": 2.9037828494905553, "learning_rate": 4.878062860808211e-06, "loss": 0.9277, "step": 6005 }, { "epoch": 0.0732453414256639, "grad_norm": 2.7291815536544375, "learning_rate": 4.877742142398974e-06, "loss": 0.9128, "step": 6010 }, { "epoch": 0.07330627764981171, "grad_norm": 2.483811096224595, "learning_rate": 4.877421423989737e-06, "loss": 0.8165, "step": 6015 }, { "epoch": 0.07336721387395952, "grad_norm": 3.023142668023918, "learning_rate": 4.877100705580501e-06, "loss": 0.8655, "step": 6020 }, { "epoch": 0.07342815009810733, "grad_norm": 2.3280470747138926, "learning_rate": 4.876779987171264e-06, "loss": 0.7953, "step": 6025 }, { "epoch": 0.07348908632225513, "grad_norm": 2.7997505549416033, "learning_rate": 4.876459268762027e-06, "loss": 0.8615, "step": 6030 }, { "epoch": 0.07355002254640293, "grad_norm": 2.988506319381698, "learning_rate": 4.876138550352791e-06, "loss": 0.9187, "step": 6035 }, { "epoch": 0.07361095877055074, "grad_norm": 2.9240178846918825, "learning_rate": 4.875817831943554e-06, "loss": 0.8815, "step": 6040 }, { "epoch": 0.07367189499469855, "grad_norm": 3.1304480573484805, "learning_rate": 4.875497113534317e-06, "loss": 0.7759, "step": 6045 }, { "epoch": 0.07373283121884636, "grad_norm": 2.84701108204781, "learning_rate": 4.875176395125081e-06, "loss": 0.8668, "step": 6050 }, { "epoch": 0.07379376744299417, "grad_norm": 2.809117411954213, "learning_rate": 4.874855676715844e-06, "loss": 0.8916, "step": 6055 }, { "epoch": 0.07385470366714197, "grad_norm": 3.02429483838506, "learning_rate": 4.874534958306607e-06, "loss": 0.7785, "step": 6060 }, { "epoch": 0.07391563989128977, "grad_norm": 3.421251672767865, "learning_rate": 4.874214239897371e-06, "loss": 0.7881, "step": 6065 }, { "epoch": 0.07397657611543758, "grad_norm": 2.5240905390228874, "learning_rate": 4.873893521488134e-06, "loss": 0.7939, "step": 6070 }, { "epoch": 0.07403751233958539, "grad_norm": 2.2013701968475834, "learning_rate": 4.873572803078897e-06, "loss": 0.8051, "step": 6075 }, { "epoch": 0.0740984485637332, "grad_norm": 3.590827874609413, "learning_rate": 4.873252084669661e-06, "loss": 0.8787, "step": 6080 }, { "epoch": 0.074159384787881, "grad_norm": 2.5778489557414153, "learning_rate": 4.872931366260424e-06, "loss": 0.7728, "step": 6085 }, { "epoch": 0.07422032101202881, "grad_norm": 3.0623723540801286, "learning_rate": 4.872610647851187e-06, "loss": 0.8764, "step": 6090 }, { "epoch": 0.07428125723617662, "grad_norm": 2.601275742440368, "learning_rate": 4.87228992944195e-06, "loss": 0.8609, "step": 6095 }, { "epoch": 0.07434219346032442, "grad_norm": 2.8065243952638306, "learning_rate": 4.871969211032714e-06, "loss": 0.8537, "step": 6100 }, { "epoch": 0.07440312968447223, "grad_norm": 2.932403183708498, "learning_rate": 4.871648492623477e-06, "loss": 0.8786, "step": 6105 }, { "epoch": 0.07446406590862004, "grad_norm": 3.193893262416536, "learning_rate": 4.8713277742142405e-06, "loss": 0.8573, "step": 6110 }, { "epoch": 0.07452500213276785, "grad_norm": 4.046276088860396, "learning_rate": 4.8710070558050035e-06, "loss": 0.9613, "step": 6115 }, { "epoch": 0.07458593835691565, "grad_norm": 2.684525564670616, "learning_rate": 4.8706863373957665e-06, "loss": 0.8178, "step": 6120 }, { "epoch": 0.07464687458106346, "grad_norm": 3.175950152460603, "learning_rate": 4.87036561898653e-06, "loss": 0.8461, "step": 6125 }, { "epoch": 0.07470781080521127, "grad_norm": 3.2219120167072997, "learning_rate": 4.870044900577293e-06, "loss": 0.7441, "step": 6130 }, { "epoch": 0.07476874702935907, "grad_norm": 3.033252053859501, "learning_rate": 4.869724182168057e-06, "loss": 0.8429, "step": 6135 }, { "epoch": 0.07482968325350688, "grad_norm": 2.3247027989832203, "learning_rate": 4.86940346375882e-06, "loss": 0.8921, "step": 6140 }, { "epoch": 0.07489061947765469, "grad_norm": 2.604027878844166, "learning_rate": 4.869082745349583e-06, "loss": 0.8247, "step": 6145 }, { "epoch": 0.0749515557018025, "grad_norm": 2.53404288773422, "learning_rate": 4.868762026940347e-06, "loss": 0.8325, "step": 6150 }, { "epoch": 0.0750124919259503, "grad_norm": 3.105487735209326, "learning_rate": 4.86844130853111e-06, "loss": 0.8261, "step": 6155 }, { "epoch": 0.07507342815009811, "grad_norm": 2.6785822967465958, "learning_rate": 4.868120590121874e-06, "loss": 0.8895, "step": 6160 }, { "epoch": 0.07513436437424592, "grad_norm": 2.885848739296841, "learning_rate": 4.867799871712637e-06, "loss": 0.8397, "step": 6165 }, { "epoch": 0.07519530059839372, "grad_norm": 2.878521581126106, "learning_rate": 4.8674791533034e-06, "loss": 0.8275, "step": 6170 }, { "epoch": 0.07525623682254153, "grad_norm": 2.62663201304314, "learning_rate": 4.867158434894163e-06, "loss": 0.8713, "step": 6175 }, { "epoch": 0.07531717304668933, "grad_norm": 2.5966925508053924, "learning_rate": 4.866837716484927e-06, "loss": 0.88, "step": 6180 }, { "epoch": 0.07537810927083714, "grad_norm": 2.7459894655557076, "learning_rate": 4.86651699807569e-06, "loss": 0.8171, "step": 6185 }, { "epoch": 0.07543904549498495, "grad_norm": 2.5841465354060005, "learning_rate": 4.866196279666453e-06, "loss": 0.8576, "step": 6190 }, { "epoch": 0.07549998171913276, "grad_norm": 2.7747365502845347, "learning_rate": 4.865875561257217e-06, "loss": 0.8264, "step": 6195 }, { "epoch": 0.07556091794328056, "grad_norm": 2.9016830614394533, "learning_rate": 4.86555484284798e-06, "loss": 0.8399, "step": 6200 }, { "epoch": 0.07562185416742837, "grad_norm": 2.4940728515381205, "learning_rate": 4.865234124438743e-06, "loss": 0.8905, "step": 6205 }, { "epoch": 0.07568279039157617, "grad_norm": 3.0318662382085617, "learning_rate": 4.864913406029507e-06, "loss": 0.7793, "step": 6210 }, { "epoch": 0.07574372661572398, "grad_norm": 2.516898348997984, "learning_rate": 4.86459268762027e-06, "loss": 0.7896, "step": 6215 }, { "epoch": 0.07580466283987179, "grad_norm": 2.4142543977337647, "learning_rate": 4.864271969211033e-06, "loss": 0.8667, "step": 6220 }, { "epoch": 0.0758655990640196, "grad_norm": 3.6225511320931965, "learning_rate": 4.863951250801797e-06, "loss": 0.8828, "step": 6225 }, { "epoch": 0.07592653528816741, "grad_norm": 3.322940265651165, "learning_rate": 4.86363053239256e-06, "loss": 0.8223, "step": 6230 }, { "epoch": 0.0759874715123152, "grad_norm": 4.46736013281453, "learning_rate": 4.863309813983323e-06, "loss": 0.8664, "step": 6235 }, { "epoch": 0.07604840773646301, "grad_norm": 2.5742307390765733, "learning_rate": 4.8629890955740865e-06, "loss": 0.8418, "step": 6240 }, { "epoch": 0.07610934396061082, "grad_norm": 2.598203493512729, "learning_rate": 4.8626683771648496e-06, "loss": 0.886, "step": 6245 }, { "epoch": 0.07617028018475863, "grad_norm": 3.9403214540336267, "learning_rate": 4.8623476587556126e-06, "loss": 0.8412, "step": 6250 }, { "epoch": 0.07623121640890644, "grad_norm": 3.1055754416899464, "learning_rate": 4.8620269403463764e-06, "loss": 0.8704, "step": 6255 }, { "epoch": 0.07629215263305425, "grad_norm": 3.0342813749916058, "learning_rate": 4.8617062219371395e-06, "loss": 0.8996, "step": 6260 }, { "epoch": 0.07635308885720206, "grad_norm": 2.642930714760876, "learning_rate": 4.8613855035279025e-06, "loss": 0.8491, "step": 6265 }, { "epoch": 0.07641402508134985, "grad_norm": 2.815398331443952, "learning_rate": 4.861064785118666e-06, "loss": 0.9099, "step": 6270 }, { "epoch": 0.07647496130549766, "grad_norm": 2.8092856734672833, "learning_rate": 4.860744066709429e-06, "loss": 0.7999, "step": 6275 }, { "epoch": 0.07653589752964547, "grad_norm": 2.4474986569358554, "learning_rate": 4.860423348300193e-06, "loss": 0.8327, "step": 6280 }, { "epoch": 0.07659683375379328, "grad_norm": 2.3148856601443146, "learning_rate": 4.860102629890956e-06, "loss": 0.84, "step": 6285 }, { "epoch": 0.07665776997794109, "grad_norm": 3.8834847122452985, "learning_rate": 4.859781911481719e-06, "loss": 0.8333, "step": 6290 }, { "epoch": 0.0767187062020889, "grad_norm": 3.325014935582837, "learning_rate": 4.859461193072483e-06, "loss": 0.9072, "step": 6295 }, { "epoch": 0.0767796424262367, "grad_norm": 3.379606600642676, "learning_rate": 4.859140474663246e-06, "loss": 0.7918, "step": 6300 }, { "epoch": 0.0768405786503845, "grad_norm": 2.6104614725155844, "learning_rate": 4.85881975625401e-06, "loss": 0.8684, "step": 6305 }, { "epoch": 0.07690151487453231, "grad_norm": 2.5798688688112654, "learning_rate": 4.858499037844773e-06, "loss": 0.8284, "step": 6310 }, { "epoch": 0.07696245109868012, "grad_norm": 2.2797069219039066, "learning_rate": 4.858178319435536e-06, "loss": 0.848, "step": 6315 }, { "epoch": 0.07702338732282793, "grad_norm": 2.5977151916688146, "learning_rate": 4.8578576010263e-06, "loss": 0.8562, "step": 6320 }, { "epoch": 0.07708432354697574, "grad_norm": 2.405959567220388, "learning_rate": 4.857536882617063e-06, "loss": 0.8275, "step": 6325 }, { "epoch": 0.07714525977112355, "grad_norm": 2.7002458929175774, "learning_rate": 4.857216164207826e-06, "loss": 0.8686, "step": 6330 }, { "epoch": 0.07720619599527134, "grad_norm": 2.9194905448197574, "learning_rate": 4.856895445798589e-06, "loss": 0.8, "step": 6335 }, { "epoch": 0.07726713221941915, "grad_norm": 3.067538173330147, "learning_rate": 4.856574727389353e-06, "loss": 0.8844, "step": 6340 }, { "epoch": 0.07732806844356696, "grad_norm": 2.9340960940928413, "learning_rate": 4.856254008980116e-06, "loss": 0.9168, "step": 6345 }, { "epoch": 0.07738900466771477, "grad_norm": 2.7753292091332247, "learning_rate": 4.855933290570879e-06, "loss": 0.9202, "step": 6350 }, { "epoch": 0.07744994089186258, "grad_norm": 2.275692359894826, "learning_rate": 4.855612572161643e-06, "loss": 0.7913, "step": 6355 }, { "epoch": 0.07751087711601039, "grad_norm": 2.6969648153888426, "learning_rate": 4.855291853752406e-06, "loss": 0.8993, "step": 6360 }, { "epoch": 0.0775718133401582, "grad_norm": 2.6498774245082877, "learning_rate": 4.854971135343169e-06, "loss": 0.8553, "step": 6365 }, { "epoch": 0.07763274956430599, "grad_norm": 3.994678907155242, "learning_rate": 4.854650416933933e-06, "loss": 0.9343, "step": 6370 }, { "epoch": 0.0776936857884538, "grad_norm": 2.5972321523681883, "learning_rate": 4.854329698524696e-06, "loss": 0.8312, "step": 6375 }, { "epoch": 0.07775462201260161, "grad_norm": 2.8971533238418465, "learning_rate": 4.854008980115459e-06, "loss": 0.835, "step": 6380 }, { "epoch": 0.07781555823674942, "grad_norm": 3.3019763604421724, "learning_rate": 4.8536882617062225e-06, "loss": 0.8032, "step": 6385 }, { "epoch": 0.07787649446089723, "grad_norm": 2.5073122498821983, "learning_rate": 4.8533675432969855e-06, "loss": 0.9215, "step": 6390 }, { "epoch": 0.07793743068504504, "grad_norm": 2.354886519983997, "learning_rate": 4.8530468248877485e-06, "loss": 0.882, "step": 6395 }, { "epoch": 0.07799836690919285, "grad_norm": 3.335735778801286, "learning_rate": 4.852726106478512e-06, "loss": 0.9221, "step": 6400 }, { "epoch": 0.07805930313334064, "grad_norm": 3.0414139657476507, "learning_rate": 4.852405388069275e-06, "loss": 0.8654, "step": 6405 }, { "epoch": 0.07812023935748845, "grad_norm": 2.5008194227150513, "learning_rate": 4.8520846696600384e-06, "loss": 0.8537, "step": 6410 }, { "epoch": 0.07818117558163626, "grad_norm": 2.82855699500421, "learning_rate": 4.851763951250802e-06, "loss": 0.9198, "step": 6415 }, { "epoch": 0.07824211180578407, "grad_norm": 2.4359580935025593, "learning_rate": 4.851443232841565e-06, "loss": 0.825, "step": 6420 }, { "epoch": 0.07830304802993188, "grad_norm": 2.7445561650838375, "learning_rate": 4.851122514432328e-06, "loss": 0.8694, "step": 6425 }, { "epoch": 0.07836398425407969, "grad_norm": 2.779263626435354, "learning_rate": 4.850801796023092e-06, "loss": 0.8735, "step": 6430 }, { "epoch": 0.07842492047822748, "grad_norm": 2.6436617548089716, "learning_rate": 4.850481077613855e-06, "loss": 0.7997, "step": 6435 }, { "epoch": 0.07848585670237529, "grad_norm": 2.642343757161658, "learning_rate": 4.850160359204619e-06, "loss": 0.8127, "step": 6440 }, { "epoch": 0.0785467929265231, "grad_norm": 2.508554394607273, "learning_rate": 4.849839640795382e-06, "loss": 0.8747, "step": 6445 }, { "epoch": 0.07860772915067091, "grad_norm": 3.0071597230483578, "learning_rate": 4.849518922386145e-06, "loss": 0.8342, "step": 6450 }, { "epoch": 0.07866866537481872, "grad_norm": 3.1958546097808767, "learning_rate": 4.849198203976909e-06, "loss": 0.849, "step": 6455 }, { "epoch": 0.07872960159896653, "grad_norm": 2.896529034980301, "learning_rate": 4.848877485567672e-06, "loss": 0.8707, "step": 6460 }, { "epoch": 0.07879053782311433, "grad_norm": 2.588329072042103, "learning_rate": 4.848556767158436e-06, "loss": 0.8676, "step": 6465 }, { "epoch": 0.07885147404726213, "grad_norm": 2.85869178186612, "learning_rate": 4.848236048749199e-06, "loss": 0.8655, "step": 6470 }, { "epoch": 0.07891241027140994, "grad_norm": 2.5597538770545083, "learning_rate": 4.847915330339962e-06, "loss": 0.8172, "step": 6475 }, { "epoch": 0.07897334649555775, "grad_norm": 6.316897516306535, "learning_rate": 4.847594611930726e-06, "loss": 0.8548, "step": 6480 }, { "epoch": 0.07903428271970556, "grad_norm": 2.503512641636353, "learning_rate": 4.847273893521489e-06, "loss": 0.7451, "step": 6485 }, { "epoch": 0.07909521894385337, "grad_norm": 2.531778357641113, "learning_rate": 4.846953175112252e-06, "loss": 0.8971, "step": 6490 }, { "epoch": 0.07915615516800117, "grad_norm": 2.328645548636664, "learning_rate": 4.846632456703016e-06, "loss": 0.8376, "step": 6495 }, { "epoch": 0.07921709139214898, "grad_norm": 2.7176389163004875, "learning_rate": 4.846311738293779e-06, "loss": 0.8147, "step": 6500 }, { "epoch": 0.07927802761629678, "grad_norm": 3.166818103640409, "learning_rate": 4.845991019884542e-06, "loss": 0.8753, "step": 6505 }, { "epoch": 0.07933896384044459, "grad_norm": 2.660029229765519, "learning_rate": 4.845670301475305e-06, "loss": 0.8713, "step": 6510 }, { "epoch": 0.0793999000645924, "grad_norm": 3.2274415167618122, "learning_rate": 4.8453495830660686e-06, "loss": 0.8289, "step": 6515 }, { "epoch": 0.0794608362887402, "grad_norm": 2.6896983427347925, "learning_rate": 4.8450288646568316e-06, "loss": 0.9028, "step": 6520 }, { "epoch": 0.07952177251288801, "grad_norm": 2.3366855830089315, "learning_rate": 4.844708146247595e-06, "loss": 0.8497, "step": 6525 }, { "epoch": 0.07958270873703582, "grad_norm": 2.583664394633953, "learning_rate": 4.8443874278383585e-06, "loss": 0.8179, "step": 6530 }, { "epoch": 0.07964364496118362, "grad_norm": 2.88155504656365, "learning_rate": 4.8440667094291215e-06, "loss": 0.8968, "step": 6535 }, { "epoch": 0.07970458118533143, "grad_norm": 4.581804024274248, "learning_rate": 4.8437459910198845e-06, "loss": 0.8851, "step": 6540 }, { "epoch": 0.07976551740947924, "grad_norm": 3.1791048488734193, "learning_rate": 4.843425272610648e-06, "loss": 0.8193, "step": 6545 }, { "epoch": 0.07982645363362705, "grad_norm": 3.056982407413356, "learning_rate": 4.843104554201411e-06, "loss": 0.8548, "step": 6550 }, { "epoch": 0.07988738985777485, "grad_norm": 3.257479367578229, "learning_rate": 4.842783835792174e-06, "loss": 0.8733, "step": 6555 }, { "epoch": 0.07994832608192266, "grad_norm": 2.4235599516448016, "learning_rate": 4.842463117382938e-06, "loss": 0.8205, "step": 6560 }, { "epoch": 0.08000926230607047, "grad_norm": 2.420083649148737, "learning_rate": 4.842142398973701e-06, "loss": 0.8743, "step": 6565 }, { "epoch": 0.08007019853021827, "grad_norm": 2.6620364586522984, "learning_rate": 4.841821680564464e-06, "loss": 0.8603, "step": 6570 }, { "epoch": 0.08013113475436608, "grad_norm": 2.5870092647005327, "learning_rate": 4.841500962155228e-06, "loss": 0.8851, "step": 6575 }, { "epoch": 0.08019207097851389, "grad_norm": 2.658356053927096, "learning_rate": 4.841180243745991e-06, "loss": 0.7815, "step": 6580 }, { "epoch": 0.0802530072026617, "grad_norm": 2.3187541469227013, "learning_rate": 4.840859525336755e-06, "loss": 0.8492, "step": 6585 }, { "epoch": 0.0803139434268095, "grad_norm": 2.529616702137905, "learning_rate": 4.840538806927518e-06, "loss": 0.9066, "step": 6590 }, { "epoch": 0.08037487965095731, "grad_norm": 3.437266366115022, "learning_rate": 4.840218088518281e-06, "loss": 0.8455, "step": 6595 }, { "epoch": 0.08043581587510512, "grad_norm": 2.683542034266557, "learning_rate": 4.839897370109045e-06, "loss": 0.8295, "step": 6600 }, { "epoch": 0.08049675209925292, "grad_norm": 2.5555603274148253, "learning_rate": 4.839576651699808e-06, "loss": 0.864, "step": 6605 }, { "epoch": 0.08055768832340073, "grad_norm": 2.2333366570094277, "learning_rate": 4.839255933290572e-06, "loss": 0.8339, "step": 6610 }, { "epoch": 0.08061862454754853, "grad_norm": 2.663390290821616, "learning_rate": 4.838935214881335e-06, "loss": 0.8583, "step": 6615 }, { "epoch": 0.08067956077169634, "grad_norm": 2.575308313557433, "learning_rate": 4.838614496472098e-06, "loss": 0.848, "step": 6620 }, { "epoch": 0.08074049699584415, "grad_norm": 2.370940467131572, "learning_rate": 4.838293778062862e-06, "loss": 0.7708, "step": 6625 }, { "epoch": 0.08080143321999196, "grad_norm": 2.4689180833640996, "learning_rate": 4.837973059653625e-06, "loss": 0.8173, "step": 6630 }, { "epoch": 0.08086236944413977, "grad_norm": 4.006615128741335, "learning_rate": 4.837652341244388e-06, "loss": 0.9091, "step": 6635 }, { "epoch": 0.08092330566828757, "grad_norm": 2.9728161188005156, "learning_rate": 4.837331622835152e-06, "loss": 0.8242, "step": 6640 }, { "epoch": 0.08098424189243537, "grad_norm": 2.6526388468734616, "learning_rate": 4.837010904425915e-06, "loss": 0.8466, "step": 6645 }, { "epoch": 0.08104517811658318, "grad_norm": 2.6642092671402637, "learning_rate": 4.836690186016678e-06, "loss": 0.9855, "step": 6650 }, { "epoch": 0.08110611434073099, "grad_norm": 2.529557092549904, "learning_rate": 4.8363694676074415e-06, "loss": 0.8354, "step": 6655 }, { "epoch": 0.0811670505648788, "grad_norm": 2.530220517439869, "learning_rate": 4.8360487491982045e-06, "loss": 0.7922, "step": 6660 }, { "epoch": 0.08122798678902661, "grad_norm": 2.8639394421510045, "learning_rate": 4.8357280307889675e-06, "loss": 0.9069, "step": 6665 }, { "epoch": 0.0812889230131744, "grad_norm": 2.746019884622609, "learning_rate": 4.835407312379731e-06, "loss": 0.967, "step": 6670 }, { "epoch": 0.08134985923732221, "grad_norm": 2.4822801581170655, "learning_rate": 4.835086593970494e-06, "loss": 0.8825, "step": 6675 }, { "epoch": 0.08141079546147002, "grad_norm": 3.4486794687523203, "learning_rate": 4.8347658755612574e-06, "loss": 0.9287, "step": 6680 }, { "epoch": 0.08147173168561783, "grad_norm": 3.0625485356370787, "learning_rate": 4.8344451571520204e-06, "loss": 0.8565, "step": 6685 }, { "epoch": 0.08153266790976564, "grad_norm": 2.5880533059477293, "learning_rate": 4.834124438742784e-06, "loss": 0.9119, "step": 6690 }, { "epoch": 0.08159360413391345, "grad_norm": 2.567714203438355, "learning_rate": 4.833803720333547e-06, "loss": 0.853, "step": 6695 }, { "epoch": 0.08165454035806126, "grad_norm": 2.7329664930615554, "learning_rate": 4.83348300192431e-06, "loss": 0.9057, "step": 6700 }, { "epoch": 0.08171547658220905, "grad_norm": 2.8352458052656506, "learning_rate": 4.833162283515074e-06, "loss": 0.8467, "step": 6705 }, { "epoch": 0.08177641280635686, "grad_norm": 3.1496065111109957, "learning_rate": 4.832841565105837e-06, "loss": 0.8492, "step": 6710 }, { "epoch": 0.08183734903050467, "grad_norm": 2.2488698669522416, "learning_rate": 4.8325208466966e-06, "loss": 0.8517, "step": 6715 }, { "epoch": 0.08189828525465248, "grad_norm": 2.6143742710317692, "learning_rate": 4.832200128287364e-06, "loss": 0.7523, "step": 6720 }, { "epoch": 0.08195922147880029, "grad_norm": 3.3658563529182737, "learning_rate": 4.831879409878127e-06, "loss": 0.869, "step": 6725 }, { "epoch": 0.0820201577029481, "grad_norm": 2.470364649328891, "learning_rate": 4.831558691468891e-06, "loss": 0.8285, "step": 6730 }, { "epoch": 0.08208109392709591, "grad_norm": 2.590263272623977, "learning_rate": 4.831237973059654e-06, "loss": 0.8017, "step": 6735 }, { "epoch": 0.0821420301512437, "grad_norm": 2.5707611459520545, "learning_rate": 4.830917254650417e-06, "loss": 0.8947, "step": 6740 }, { "epoch": 0.08220296637539151, "grad_norm": 2.5491021541364467, "learning_rate": 4.830596536241181e-06, "loss": 0.9103, "step": 6745 }, { "epoch": 0.08226390259953932, "grad_norm": 2.9284386436064924, "learning_rate": 4.830275817831944e-06, "loss": 0.9292, "step": 6750 }, { "epoch": 0.08232483882368713, "grad_norm": 2.8757244100490555, "learning_rate": 4.829955099422708e-06, "loss": 0.8802, "step": 6755 }, { "epoch": 0.08238577504783494, "grad_norm": 2.2259028726940593, "learning_rate": 4.829634381013471e-06, "loss": 0.8965, "step": 6760 }, { "epoch": 0.08244671127198275, "grad_norm": 3.351020319323359, "learning_rate": 4.829313662604234e-06, "loss": 0.8973, "step": 6765 }, { "epoch": 0.08250764749613054, "grad_norm": 2.922657299255136, "learning_rate": 4.828992944194998e-06, "loss": 0.8157, "step": 6770 }, { "epoch": 0.08256858372027835, "grad_norm": 2.702492033619328, "learning_rate": 4.828672225785761e-06, "loss": 0.8558, "step": 6775 }, { "epoch": 0.08262951994442616, "grad_norm": 2.3280803924124216, "learning_rate": 4.828351507376524e-06, "loss": 0.7868, "step": 6780 }, { "epoch": 0.08269045616857397, "grad_norm": 2.384140704425735, "learning_rate": 4.8280307889672876e-06, "loss": 0.8915, "step": 6785 }, { "epoch": 0.08275139239272178, "grad_norm": 2.9835035518686857, "learning_rate": 4.8277100705580506e-06, "loss": 0.8705, "step": 6790 }, { "epoch": 0.08281232861686959, "grad_norm": 3.5287297856658877, "learning_rate": 4.827389352148814e-06, "loss": 0.8835, "step": 6795 }, { "epoch": 0.0828732648410174, "grad_norm": 2.825955439059589, "learning_rate": 4.8270686337395774e-06, "loss": 0.8562, "step": 6800 }, { "epoch": 0.08293420106516519, "grad_norm": 2.765838701056871, "learning_rate": 4.8267479153303405e-06, "loss": 0.87, "step": 6805 }, { "epoch": 0.082995137289313, "grad_norm": 2.789939511608546, "learning_rate": 4.8264271969211035e-06, "loss": 0.8933, "step": 6810 }, { "epoch": 0.08305607351346081, "grad_norm": 3.0140550034290094, "learning_rate": 4.826106478511867e-06, "loss": 0.8415, "step": 6815 }, { "epoch": 0.08311700973760862, "grad_norm": 3.1936453809551884, "learning_rate": 4.82578576010263e-06, "loss": 0.8645, "step": 6820 }, { "epoch": 0.08317794596175643, "grad_norm": 2.3762812678921157, "learning_rate": 4.825465041693393e-06, "loss": 0.8589, "step": 6825 }, { "epoch": 0.08323888218590424, "grad_norm": 2.325022091840473, "learning_rate": 4.825144323284157e-06, "loss": 0.7801, "step": 6830 }, { "epoch": 0.08329981841005205, "grad_norm": 2.710482702827174, "learning_rate": 4.82482360487492e-06, "loss": 0.7909, "step": 6835 }, { "epoch": 0.08336075463419984, "grad_norm": 2.765848700227099, "learning_rate": 4.824502886465683e-06, "loss": 0.8041, "step": 6840 }, { "epoch": 0.08342169085834765, "grad_norm": 2.7632902426251493, "learning_rate": 4.824182168056446e-06, "loss": 0.8777, "step": 6845 }, { "epoch": 0.08348262708249546, "grad_norm": 3.214404797627722, "learning_rate": 4.82386144964721e-06, "loss": 0.8276, "step": 6850 }, { "epoch": 0.08354356330664327, "grad_norm": 2.716282983458209, "learning_rate": 4.823540731237973e-06, "loss": 0.8917, "step": 6855 }, { "epoch": 0.08360449953079108, "grad_norm": 3.1208788158763188, "learning_rate": 4.823220012828736e-06, "loss": 0.8102, "step": 6860 }, { "epoch": 0.08366543575493889, "grad_norm": 2.558408680778257, "learning_rate": 4.8228992944195e-06, "loss": 0.825, "step": 6865 }, { "epoch": 0.0837263719790867, "grad_norm": 4.572522091957608, "learning_rate": 4.822578576010263e-06, "loss": 0.8605, "step": 6870 }, { "epoch": 0.08378730820323449, "grad_norm": 2.232527562360925, "learning_rate": 4.822257857601026e-06, "loss": 0.8448, "step": 6875 }, { "epoch": 0.0838482444273823, "grad_norm": 3.2798043119127445, "learning_rate": 4.82193713919179e-06, "loss": 0.8928, "step": 6880 }, { "epoch": 0.08390918065153011, "grad_norm": 2.2925957345825125, "learning_rate": 4.821616420782553e-06, "loss": 0.8333, "step": 6885 }, { "epoch": 0.08397011687567792, "grad_norm": 3.4077966111285494, "learning_rate": 4.821295702373317e-06, "loss": 0.8827, "step": 6890 }, { "epoch": 0.08403105309982573, "grad_norm": 3.635400541480438, "learning_rate": 4.82097498396408e-06, "loss": 0.9306, "step": 6895 }, { "epoch": 0.08409198932397353, "grad_norm": 3.296204623362377, "learning_rate": 4.820654265554843e-06, "loss": 0.8173, "step": 6900 }, { "epoch": 0.08415292554812133, "grad_norm": 2.709232608629326, "learning_rate": 4.820333547145607e-06, "loss": 0.9292, "step": 6905 }, { "epoch": 0.08421386177226914, "grad_norm": 2.7306492612450564, "learning_rate": 4.82001282873637e-06, "loss": 0.9321, "step": 6910 }, { "epoch": 0.08427479799641695, "grad_norm": 2.410909144244358, "learning_rate": 4.819692110327134e-06, "loss": 0.9114, "step": 6915 }, { "epoch": 0.08433573422056476, "grad_norm": 2.7411955765179363, "learning_rate": 4.819371391917897e-06, "loss": 0.8585, "step": 6920 }, { "epoch": 0.08439667044471257, "grad_norm": 2.1431711652466374, "learning_rate": 4.81905067350866e-06, "loss": 0.9171, "step": 6925 }, { "epoch": 0.08445760666886037, "grad_norm": 2.552497722248388, "learning_rate": 4.8187299550994235e-06, "loss": 0.8779, "step": 6930 }, { "epoch": 0.08451854289300818, "grad_norm": 2.79627646133807, "learning_rate": 4.8184092366901865e-06, "loss": 0.8298, "step": 6935 }, { "epoch": 0.08457947911715598, "grad_norm": 2.8068998482459095, "learning_rate": 4.8180885182809495e-06, "loss": 0.8683, "step": 6940 }, { "epoch": 0.08464041534130379, "grad_norm": 2.384498037236954, "learning_rate": 4.817767799871713e-06, "loss": 0.8446, "step": 6945 }, { "epoch": 0.0847013515654516, "grad_norm": 2.7763368427132145, "learning_rate": 4.8174470814624764e-06, "loss": 0.8488, "step": 6950 }, { "epoch": 0.0847622877895994, "grad_norm": 2.8735273712875937, "learning_rate": 4.8171263630532394e-06, "loss": 0.766, "step": 6955 }, { "epoch": 0.08482322401374721, "grad_norm": 2.8429743583041445, "learning_rate": 4.816805644644003e-06, "loss": 0.7963, "step": 6960 }, { "epoch": 0.08488416023789502, "grad_norm": 3.834965710443235, "learning_rate": 4.816484926234766e-06, "loss": 0.8194, "step": 6965 }, { "epoch": 0.08494509646204283, "grad_norm": 2.86488460221493, "learning_rate": 4.816164207825529e-06, "loss": 0.922, "step": 6970 }, { "epoch": 0.08500603268619063, "grad_norm": 2.2434057657185726, "learning_rate": 4.815843489416293e-06, "loss": 0.821, "step": 6975 }, { "epoch": 0.08506696891033844, "grad_norm": 3.449648763179701, "learning_rate": 4.815522771007056e-06, "loss": 0.84, "step": 6980 }, { "epoch": 0.08512790513448625, "grad_norm": 2.5241786616162853, "learning_rate": 4.815202052597819e-06, "loss": 0.8834, "step": 6985 }, { "epoch": 0.08518884135863405, "grad_norm": 2.869274612540115, "learning_rate": 4.814881334188583e-06, "loss": 0.9027, "step": 6990 }, { "epoch": 0.08524977758278186, "grad_norm": 3.205579549795543, "learning_rate": 4.814560615779346e-06, "loss": 0.9192, "step": 6995 }, { "epoch": 0.08531071380692967, "grad_norm": 2.8040434188317587, "learning_rate": 4.814239897370109e-06, "loss": 0.819, "step": 7000 }, { "epoch": 0.08537165003107748, "grad_norm": 2.815337518436173, "learning_rate": 4.813919178960873e-06, "loss": 0.8735, "step": 7005 }, { "epoch": 0.08543258625522528, "grad_norm": 2.588880154628117, "learning_rate": 4.813598460551636e-06, "loss": 0.8978, "step": 7010 }, { "epoch": 0.08549352247937309, "grad_norm": 2.788275537668778, "learning_rate": 4.813277742142399e-06, "loss": 0.796, "step": 7015 }, { "epoch": 0.0855544587035209, "grad_norm": 2.4181017018064392, "learning_rate": 4.812957023733162e-06, "loss": 0.8254, "step": 7020 }, { "epoch": 0.0856153949276687, "grad_norm": 2.636963890781852, "learning_rate": 4.812636305323926e-06, "loss": 0.8257, "step": 7025 }, { "epoch": 0.08567633115181651, "grad_norm": 2.9330815337191884, "learning_rate": 4.812315586914689e-06, "loss": 0.8729, "step": 7030 }, { "epoch": 0.08573726737596432, "grad_norm": 3.3340656326725497, "learning_rate": 4.811994868505453e-06, "loss": 0.948, "step": 7035 }, { "epoch": 0.08579820360011212, "grad_norm": 2.760061820989868, "learning_rate": 4.811674150096216e-06, "loss": 0.8832, "step": 7040 }, { "epoch": 0.08585913982425993, "grad_norm": 2.479513482363077, "learning_rate": 4.811353431686979e-06, "loss": 0.8302, "step": 7045 }, { "epoch": 0.08592007604840773, "grad_norm": 2.9870004977726383, "learning_rate": 4.811032713277743e-06, "loss": 0.8915, "step": 7050 }, { "epoch": 0.08598101227255554, "grad_norm": 2.305287797808691, "learning_rate": 4.810711994868506e-06, "loss": 0.7676, "step": 7055 }, { "epoch": 0.08604194849670335, "grad_norm": 2.8042626271647833, "learning_rate": 4.8103912764592696e-06, "loss": 0.795, "step": 7060 }, { "epoch": 0.08610288472085116, "grad_norm": 3.286635325158722, "learning_rate": 4.810070558050033e-06, "loss": 0.7924, "step": 7065 }, { "epoch": 0.08616382094499897, "grad_norm": 3.068869536675906, "learning_rate": 4.809749839640796e-06, "loss": 0.8799, "step": 7070 }, { "epoch": 0.08622475716914677, "grad_norm": 2.6818454392339954, "learning_rate": 4.8094291212315595e-06, "loss": 0.8516, "step": 7075 }, { "epoch": 0.08628569339329457, "grad_norm": 2.671464923160576, "learning_rate": 4.8091084028223225e-06, "loss": 0.8721, "step": 7080 }, { "epoch": 0.08634662961744238, "grad_norm": 4.060100950517446, "learning_rate": 4.808787684413086e-06, "loss": 0.9045, "step": 7085 }, { "epoch": 0.08640756584159019, "grad_norm": 2.269664910953204, "learning_rate": 4.808466966003849e-06, "loss": 0.8674, "step": 7090 }, { "epoch": 0.086468502065738, "grad_norm": 2.528314514813041, "learning_rate": 4.808146247594612e-06, "loss": 0.855, "step": 7095 }, { "epoch": 0.08652943828988581, "grad_norm": 3.7256889446196877, "learning_rate": 4.807825529185375e-06, "loss": 0.8353, "step": 7100 }, { "epoch": 0.08659037451403362, "grad_norm": 3.4883574361782737, "learning_rate": 4.807504810776139e-06, "loss": 0.9002, "step": 7105 }, { "epoch": 0.08665131073818141, "grad_norm": 2.604621316543766, "learning_rate": 4.807184092366902e-06, "loss": 0.7935, "step": 7110 }, { "epoch": 0.08671224696232922, "grad_norm": 3.5322380054379714, "learning_rate": 4.806863373957665e-06, "loss": 0.8731, "step": 7115 }, { "epoch": 0.08677318318647703, "grad_norm": 2.7375898411539272, "learning_rate": 4.806542655548429e-06, "loss": 0.7723, "step": 7120 }, { "epoch": 0.08683411941062484, "grad_norm": 2.435864402110654, "learning_rate": 4.806221937139192e-06, "loss": 0.8309, "step": 7125 }, { "epoch": 0.08689505563477265, "grad_norm": 2.427234106946636, "learning_rate": 4.805901218729955e-06, "loss": 0.849, "step": 7130 }, { "epoch": 0.08695599185892046, "grad_norm": 3.150072980743759, "learning_rate": 4.805580500320719e-06, "loss": 0.9038, "step": 7135 }, { "epoch": 0.08701692808306825, "grad_norm": 2.6830461709357905, "learning_rate": 4.805259781911482e-06, "loss": 0.8792, "step": 7140 }, { "epoch": 0.08707786430721606, "grad_norm": 2.7047679571248837, "learning_rate": 4.804939063502245e-06, "loss": 0.8568, "step": 7145 }, { "epoch": 0.08713880053136387, "grad_norm": 3.052357628271182, "learning_rate": 4.804618345093009e-06, "loss": 0.8711, "step": 7150 }, { "epoch": 0.08719973675551168, "grad_norm": 3.1440543698352026, "learning_rate": 4.804297626683772e-06, "loss": 0.9024, "step": 7155 }, { "epoch": 0.08726067297965949, "grad_norm": 3.0891376107470174, "learning_rate": 4.803976908274535e-06, "loss": 0.9035, "step": 7160 }, { "epoch": 0.0873216092038073, "grad_norm": 3.161321533830179, "learning_rate": 4.803656189865299e-06, "loss": 0.8418, "step": 7165 }, { "epoch": 0.08738254542795511, "grad_norm": 2.824318145385574, "learning_rate": 4.803335471456062e-06, "loss": 0.7987, "step": 7170 }, { "epoch": 0.0874434816521029, "grad_norm": 2.9438926245861112, "learning_rate": 4.803014753046825e-06, "loss": 0.8573, "step": 7175 }, { "epoch": 0.08750441787625071, "grad_norm": 2.859773306600539, "learning_rate": 4.802694034637588e-06, "loss": 0.843, "step": 7180 }, { "epoch": 0.08756535410039852, "grad_norm": 2.5496601271052546, "learning_rate": 4.802373316228352e-06, "loss": 0.858, "step": 7185 }, { "epoch": 0.08762629032454633, "grad_norm": 2.687480730550482, "learning_rate": 4.802052597819115e-06, "loss": 0.8313, "step": 7190 }, { "epoch": 0.08768722654869414, "grad_norm": 3.1329504091902085, "learning_rate": 4.801731879409879e-06, "loss": 0.8722, "step": 7195 }, { "epoch": 0.08774816277284195, "grad_norm": 2.7553890134854186, "learning_rate": 4.801411161000642e-06, "loss": 0.858, "step": 7200 }, { "epoch": 0.08780909899698976, "grad_norm": 2.548701870473228, "learning_rate": 4.8010904425914055e-06, "loss": 0.8974, "step": 7205 }, { "epoch": 0.08787003522113755, "grad_norm": 2.984686131632753, "learning_rate": 4.8007697241821685e-06, "loss": 0.8918, "step": 7210 }, { "epoch": 0.08793097144528536, "grad_norm": 3.194540543261117, "learning_rate": 4.8004490057729316e-06, "loss": 0.7813, "step": 7215 }, { "epoch": 0.08799190766943317, "grad_norm": 2.348958906522067, "learning_rate": 4.800128287363695e-06, "loss": 0.7874, "step": 7220 }, { "epoch": 0.08805284389358098, "grad_norm": 2.662360360838027, "learning_rate": 4.7998075689544584e-06, "loss": 0.8502, "step": 7225 }, { "epoch": 0.08811378011772879, "grad_norm": 2.8668538720589276, "learning_rate": 4.799486850545222e-06, "loss": 0.8644, "step": 7230 }, { "epoch": 0.0881747163418766, "grad_norm": 3.1963523825457676, "learning_rate": 4.799166132135985e-06, "loss": 0.848, "step": 7235 }, { "epoch": 0.0882356525660244, "grad_norm": 3.2733928212845953, "learning_rate": 4.798845413726748e-06, "loss": 0.7767, "step": 7240 }, { "epoch": 0.0882965887901722, "grad_norm": 2.438470090063787, "learning_rate": 4.798524695317512e-06, "loss": 0.9017, "step": 7245 }, { "epoch": 0.08835752501432001, "grad_norm": 2.5988892117980855, "learning_rate": 4.798203976908275e-06, "loss": 0.9507, "step": 7250 }, { "epoch": 0.08841846123846782, "grad_norm": 3.0620277697318747, "learning_rate": 4.797883258499038e-06, "loss": 0.831, "step": 7255 }, { "epoch": 0.08847939746261563, "grad_norm": 3.4447785065271774, "learning_rate": 4.797562540089802e-06, "loss": 0.8496, "step": 7260 }, { "epoch": 0.08854033368676344, "grad_norm": 2.3014554496350175, "learning_rate": 4.797241821680565e-06, "loss": 0.8718, "step": 7265 }, { "epoch": 0.08860126991091125, "grad_norm": 2.835455756312699, "learning_rate": 4.796921103271328e-06, "loss": 0.8122, "step": 7270 }, { "epoch": 0.08866220613505904, "grad_norm": 2.8228429465548945, "learning_rate": 4.796600384862091e-06, "loss": 0.856, "step": 7275 }, { "epoch": 0.08872314235920685, "grad_norm": 2.9221496185273126, "learning_rate": 4.796279666452855e-06, "loss": 0.8429, "step": 7280 }, { "epoch": 0.08878407858335466, "grad_norm": 2.47867996399806, "learning_rate": 4.795958948043618e-06, "loss": 0.884, "step": 7285 }, { "epoch": 0.08884501480750247, "grad_norm": 2.1052110652800473, "learning_rate": 4.795638229634381e-06, "loss": 0.7688, "step": 7290 }, { "epoch": 0.08890595103165028, "grad_norm": 4.516409567577048, "learning_rate": 4.795317511225145e-06, "loss": 0.8733, "step": 7295 }, { "epoch": 0.08896688725579809, "grad_norm": 2.3871689220327363, "learning_rate": 4.794996792815908e-06, "loss": 0.8861, "step": 7300 }, { "epoch": 0.0890278234799459, "grad_norm": 2.9960521011333094, "learning_rate": 4.794676074406671e-06, "loss": 0.8606, "step": 7305 }, { "epoch": 0.08908875970409369, "grad_norm": 3.5716271484730684, "learning_rate": 4.794355355997435e-06, "loss": 0.8347, "step": 7310 }, { "epoch": 0.0891496959282415, "grad_norm": 2.5143690312104363, "learning_rate": 4.794034637588198e-06, "loss": 0.8357, "step": 7315 }, { "epoch": 0.08921063215238931, "grad_norm": 4.948915349818703, "learning_rate": 4.793713919178961e-06, "loss": 0.745, "step": 7320 }, { "epoch": 0.08927156837653712, "grad_norm": 2.2471256091541223, "learning_rate": 4.793393200769725e-06, "loss": 0.8131, "step": 7325 }, { "epoch": 0.08933250460068493, "grad_norm": 2.504799182423116, "learning_rate": 4.793072482360488e-06, "loss": 0.8271, "step": 7330 }, { "epoch": 0.08939344082483273, "grad_norm": 2.457611336164446, "learning_rate": 4.792751763951251e-06, "loss": 0.9405, "step": 7335 }, { "epoch": 0.08945437704898054, "grad_norm": 2.3003177861913087, "learning_rate": 4.792431045542015e-06, "loss": 0.8756, "step": 7340 }, { "epoch": 0.08951531327312834, "grad_norm": 2.7235234490412155, "learning_rate": 4.792110327132778e-06, "loss": 0.8805, "step": 7345 }, { "epoch": 0.08957624949727615, "grad_norm": 2.439737687903953, "learning_rate": 4.791789608723541e-06, "loss": 0.8627, "step": 7350 }, { "epoch": 0.08963718572142396, "grad_norm": 3.4675537789789135, "learning_rate": 4.7914688903143045e-06, "loss": 0.8598, "step": 7355 }, { "epoch": 0.08969812194557177, "grad_norm": 2.5272135342350004, "learning_rate": 4.7911481719050675e-06, "loss": 0.8386, "step": 7360 }, { "epoch": 0.08975905816971957, "grad_norm": 3.2717703668757934, "learning_rate": 4.790827453495831e-06, "loss": 0.8003, "step": 7365 }, { "epoch": 0.08981999439386738, "grad_norm": 2.4239346696361315, "learning_rate": 4.790506735086594e-06, "loss": 0.9167, "step": 7370 }, { "epoch": 0.08988093061801518, "grad_norm": 2.929378236275021, "learning_rate": 4.790186016677357e-06, "loss": 0.8586, "step": 7375 }, { "epoch": 0.08994186684216299, "grad_norm": 2.628782305332029, "learning_rate": 4.789865298268121e-06, "loss": 0.8232, "step": 7380 }, { "epoch": 0.0900028030663108, "grad_norm": 2.270155824277019, "learning_rate": 4.789544579858884e-06, "loss": 0.8524, "step": 7385 }, { "epoch": 0.0900637392904586, "grad_norm": 2.4351084878318203, "learning_rate": 4.789223861449648e-06, "loss": 0.831, "step": 7390 }, { "epoch": 0.09012467551460641, "grad_norm": 2.628526186659023, "learning_rate": 4.788903143040411e-06, "loss": 0.8068, "step": 7395 }, { "epoch": 0.09018561173875422, "grad_norm": 2.504190296061061, "learning_rate": 4.788582424631174e-06, "loss": 0.9379, "step": 7400 }, { "epoch": 0.09024654796290203, "grad_norm": 2.605124117788533, "learning_rate": 4.788261706221938e-06, "loss": 0.886, "step": 7405 }, { "epoch": 0.09030748418704983, "grad_norm": 4.391152301540347, "learning_rate": 4.787940987812701e-06, "loss": 0.8236, "step": 7410 }, { "epoch": 0.09036842041119764, "grad_norm": 2.1061051691539365, "learning_rate": 4.787620269403464e-06, "loss": 0.8411, "step": 7415 }, { "epoch": 0.09042935663534545, "grad_norm": 2.426180017485586, "learning_rate": 4.787299550994228e-06, "loss": 0.8985, "step": 7420 }, { "epoch": 0.09049029285949325, "grad_norm": 3.2145924642138066, "learning_rate": 4.786978832584991e-06, "loss": 0.8145, "step": 7425 }, { "epoch": 0.09055122908364106, "grad_norm": 2.9193012034082413, "learning_rate": 4.786658114175754e-06, "loss": 0.8513, "step": 7430 }, { "epoch": 0.09061216530778887, "grad_norm": 2.4560754161274923, "learning_rate": 4.786337395766517e-06, "loss": 0.8758, "step": 7435 }, { "epoch": 0.09067310153193668, "grad_norm": 2.282336111058483, "learning_rate": 4.786016677357281e-06, "loss": 0.9142, "step": 7440 }, { "epoch": 0.09073403775608448, "grad_norm": 2.7700449893249166, "learning_rate": 4.785695958948044e-06, "loss": 0.8179, "step": 7445 }, { "epoch": 0.09079497398023229, "grad_norm": 2.4385853386954723, "learning_rate": 4.785375240538807e-06, "loss": 0.8983, "step": 7450 }, { "epoch": 0.0908559102043801, "grad_norm": 2.8726268988528756, "learning_rate": 4.785054522129571e-06, "loss": 0.8427, "step": 7455 }, { "epoch": 0.0909168464285279, "grad_norm": 2.1568875796063782, "learning_rate": 4.784733803720334e-06, "loss": 0.8806, "step": 7460 }, { "epoch": 0.09097778265267571, "grad_norm": 2.8744746044486122, "learning_rate": 4.784413085311097e-06, "loss": 0.8587, "step": 7465 }, { "epoch": 0.09103871887682352, "grad_norm": 2.539246315290489, "learning_rate": 4.784092366901861e-06, "loss": 0.8875, "step": 7470 }, { "epoch": 0.09109965510097133, "grad_norm": 2.4858944625981416, "learning_rate": 4.783771648492624e-06, "loss": 0.8284, "step": 7475 }, { "epoch": 0.09116059132511913, "grad_norm": 3.080356734669538, "learning_rate": 4.783450930083387e-06, "loss": 0.8529, "step": 7480 }, { "epoch": 0.09122152754926693, "grad_norm": 2.2667903580524102, "learning_rate": 4.7831302116741506e-06, "loss": 0.9543, "step": 7485 }, { "epoch": 0.09128246377341474, "grad_norm": 3.650097305077686, "learning_rate": 4.7828094932649136e-06, "loss": 0.8373, "step": 7490 }, { "epoch": 0.09134339999756255, "grad_norm": 3.014789184940621, "learning_rate": 4.782488774855677e-06, "loss": 0.8604, "step": 7495 }, { "epoch": 0.09140433622171036, "grad_norm": 2.644945368547107, "learning_rate": 4.7821680564464405e-06, "loss": 0.7266, "step": 7500 }, { "epoch": 0.09146527244585817, "grad_norm": 2.1381617677881404, "learning_rate": 4.7818473380372035e-06, "loss": 0.8774, "step": 7505 }, { "epoch": 0.09152620867000597, "grad_norm": 2.351259068000418, "learning_rate": 4.781526619627967e-06, "loss": 0.8647, "step": 7510 }, { "epoch": 0.09158714489415377, "grad_norm": 2.3224990125857174, "learning_rate": 4.78120590121873e-06, "loss": 0.8803, "step": 7515 }, { "epoch": 0.09164808111830158, "grad_norm": 2.484830587429624, "learning_rate": 4.780885182809493e-06, "loss": 0.8181, "step": 7520 }, { "epoch": 0.09170901734244939, "grad_norm": 2.6826180759540854, "learning_rate": 4.780564464400257e-06, "loss": 0.9099, "step": 7525 }, { "epoch": 0.0917699535665972, "grad_norm": 2.778852035110871, "learning_rate": 4.78024374599102e-06, "loss": 0.8923, "step": 7530 }, { "epoch": 0.09183088979074501, "grad_norm": 2.2933100021626944, "learning_rate": 4.779923027581784e-06, "loss": 0.859, "step": 7535 }, { "epoch": 0.09189182601489282, "grad_norm": 2.929887055216616, "learning_rate": 4.779602309172547e-06, "loss": 0.8093, "step": 7540 }, { "epoch": 0.09195276223904061, "grad_norm": 3.401624288470503, "learning_rate": 4.77928159076331e-06, "loss": 0.9002, "step": 7545 }, { "epoch": 0.09201369846318842, "grad_norm": 3.026538925993065, "learning_rate": 4.778960872354074e-06, "loss": 0.7655, "step": 7550 }, { "epoch": 0.09207463468733623, "grad_norm": 3.0912513878147596, "learning_rate": 4.778640153944837e-06, "loss": 0.8652, "step": 7555 }, { "epoch": 0.09213557091148404, "grad_norm": 2.8619487338745233, "learning_rate": 4.7783194355356e-06, "loss": 0.8418, "step": 7560 }, { "epoch": 0.09219650713563185, "grad_norm": 3.3139419901351195, "learning_rate": 4.777998717126364e-06, "loss": 0.8865, "step": 7565 }, { "epoch": 0.09225744335977966, "grad_norm": 2.654485967793291, "learning_rate": 4.777677998717127e-06, "loss": 0.7661, "step": 7570 }, { "epoch": 0.09231837958392747, "grad_norm": 2.8798671723011857, "learning_rate": 4.77735728030789e-06, "loss": 0.8599, "step": 7575 }, { "epoch": 0.09237931580807526, "grad_norm": 4.290628005983056, "learning_rate": 4.777036561898654e-06, "loss": 0.8367, "step": 7580 }, { "epoch": 0.09244025203222307, "grad_norm": 2.448749375873463, "learning_rate": 4.776715843489417e-06, "loss": 0.8268, "step": 7585 }, { "epoch": 0.09250118825637088, "grad_norm": 2.8844867065641564, "learning_rate": 4.77639512508018e-06, "loss": 0.7597, "step": 7590 }, { "epoch": 0.09256212448051869, "grad_norm": 2.844749126583627, "learning_rate": 4.776074406670944e-06, "loss": 0.8181, "step": 7595 }, { "epoch": 0.0926230607046665, "grad_norm": 2.711589603801342, "learning_rate": 4.775753688261707e-06, "loss": 0.8085, "step": 7600 }, { "epoch": 0.09268399692881431, "grad_norm": 2.509314417491921, "learning_rate": 4.77543296985247e-06, "loss": 0.9047, "step": 7605 }, { "epoch": 0.0927449331529621, "grad_norm": 2.7585177014108293, "learning_rate": 4.775112251443233e-06, "loss": 0.8708, "step": 7610 }, { "epoch": 0.09280586937710991, "grad_norm": 2.5960197213235254, "learning_rate": 4.774791533033997e-06, "loss": 0.8798, "step": 7615 }, { "epoch": 0.09286680560125772, "grad_norm": 2.5192273217977594, "learning_rate": 4.77447081462476e-06, "loss": 0.8709, "step": 7620 }, { "epoch": 0.09292774182540553, "grad_norm": 2.9990144073242977, "learning_rate": 4.774150096215523e-06, "loss": 0.7967, "step": 7625 }, { "epoch": 0.09298867804955334, "grad_norm": 3.2460929592072847, "learning_rate": 4.7738293778062865e-06, "loss": 0.808, "step": 7630 }, { "epoch": 0.09304961427370115, "grad_norm": 2.835980120080118, "learning_rate": 4.7735086593970495e-06, "loss": 0.825, "step": 7635 }, { "epoch": 0.09311055049784896, "grad_norm": 3.3629180913331354, "learning_rate": 4.7731879409878125e-06, "loss": 1.0056, "step": 7640 }, { "epoch": 0.09317148672199675, "grad_norm": 2.72064699828704, "learning_rate": 4.772867222578576e-06, "loss": 0.8646, "step": 7645 }, { "epoch": 0.09323242294614456, "grad_norm": 3.090680684622464, "learning_rate": 4.7725465041693394e-06, "loss": 0.9322, "step": 7650 }, { "epoch": 0.09329335917029237, "grad_norm": 2.19077860796642, "learning_rate": 4.7722257857601024e-06, "loss": 0.8567, "step": 7655 }, { "epoch": 0.09335429539444018, "grad_norm": 2.5452897244804906, "learning_rate": 4.771905067350866e-06, "loss": 0.8594, "step": 7660 }, { "epoch": 0.09341523161858799, "grad_norm": 2.654725850736061, "learning_rate": 4.771584348941629e-06, "loss": 0.8277, "step": 7665 }, { "epoch": 0.0934761678427358, "grad_norm": 2.36482222095674, "learning_rate": 4.771263630532393e-06, "loss": 0.8924, "step": 7670 }, { "epoch": 0.0935371040668836, "grad_norm": 2.2543814671883577, "learning_rate": 4.770942912123156e-06, "loss": 0.8507, "step": 7675 }, { "epoch": 0.0935980402910314, "grad_norm": 2.728715035655238, "learning_rate": 4.77062219371392e-06, "loss": 0.8897, "step": 7680 }, { "epoch": 0.09365897651517921, "grad_norm": 2.6557632411070715, "learning_rate": 4.770301475304683e-06, "loss": 0.8495, "step": 7685 }, { "epoch": 0.09371991273932702, "grad_norm": 2.4885728151110733, "learning_rate": 4.769980756895446e-06, "loss": 0.8343, "step": 7690 }, { "epoch": 0.09378084896347483, "grad_norm": 2.420931800397961, "learning_rate": 4.76966003848621e-06, "loss": 0.8582, "step": 7695 }, { "epoch": 0.09384178518762264, "grad_norm": 2.463633726219958, "learning_rate": 4.769339320076973e-06, "loss": 0.8386, "step": 7700 }, { "epoch": 0.09390272141177045, "grad_norm": 2.1970933240800505, "learning_rate": 4.769018601667736e-06, "loss": 0.7667, "step": 7705 }, { "epoch": 0.09396365763591825, "grad_norm": 2.8516065270031223, "learning_rate": 4.7686978832585e-06, "loss": 0.9083, "step": 7710 }, { "epoch": 0.09402459386006605, "grad_norm": 2.5577231758804753, "learning_rate": 4.768377164849263e-06, "loss": 0.8335, "step": 7715 }, { "epoch": 0.09408553008421386, "grad_norm": 2.468249806743949, "learning_rate": 4.768056446440026e-06, "loss": 0.8408, "step": 7720 }, { "epoch": 0.09414646630836167, "grad_norm": 2.7899543817242263, "learning_rate": 4.76773572803079e-06, "loss": 0.8321, "step": 7725 }, { "epoch": 0.09420740253250948, "grad_norm": 2.853009503421813, "learning_rate": 4.767415009621553e-06, "loss": 0.8546, "step": 7730 }, { "epoch": 0.09426833875665729, "grad_norm": 2.3816843502114375, "learning_rate": 4.767094291212316e-06, "loss": 0.8946, "step": 7735 }, { "epoch": 0.0943292749808051, "grad_norm": 3.4396785966973, "learning_rate": 4.76677357280308e-06, "loss": 0.8581, "step": 7740 }, { "epoch": 0.09439021120495289, "grad_norm": 2.569322095975001, "learning_rate": 4.766452854393843e-06, "loss": 0.8913, "step": 7745 }, { "epoch": 0.0944511474291007, "grad_norm": 3.1721840104424106, "learning_rate": 4.766132135984606e-06, "loss": 0.7845, "step": 7750 }, { "epoch": 0.09451208365324851, "grad_norm": 2.3881317028331863, "learning_rate": 4.7658114175753696e-06, "loss": 0.8212, "step": 7755 }, { "epoch": 0.09457301987739632, "grad_norm": 2.961124593663435, "learning_rate": 4.7654906991661326e-06, "loss": 0.8295, "step": 7760 }, { "epoch": 0.09463395610154413, "grad_norm": 2.2812236096704743, "learning_rate": 4.765169980756896e-06, "loss": 0.8118, "step": 7765 }, { "epoch": 0.09469489232569193, "grad_norm": 2.585327338529876, "learning_rate": 4.764849262347659e-06, "loss": 0.7883, "step": 7770 }, { "epoch": 0.09475582854983974, "grad_norm": 2.6266545619021446, "learning_rate": 4.7645285439384225e-06, "loss": 0.7689, "step": 7775 }, { "epoch": 0.09481676477398754, "grad_norm": 2.3655802653712703, "learning_rate": 4.7642078255291855e-06, "loss": 0.869, "step": 7780 }, { "epoch": 0.09487770099813535, "grad_norm": 2.607451258024601, "learning_rate": 4.7638871071199485e-06, "loss": 0.8382, "step": 7785 }, { "epoch": 0.09493863722228316, "grad_norm": 2.6169898273351873, "learning_rate": 4.763566388710712e-06, "loss": 0.8488, "step": 7790 }, { "epoch": 0.09499957344643097, "grad_norm": 2.2920846640601473, "learning_rate": 4.763245670301475e-06, "loss": 0.8452, "step": 7795 }, { "epoch": 0.09506050967057877, "grad_norm": 2.6005824275393308, "learning_rate": 4.762924951892238e-06, "loss": 0.9133, "step": 7800 }, { "epoch": 0.09512144589472658, "grad_norm": 3.365404424660561, "learning_rate": 4.762604233483002e-06, "loss": 0.8977, "step": 7805 }, { "epoch": 0.09518238211887439, "grad_norm": 2.556048784632668, "learning_rate": 4.762283515073765e-06, "loss": 0.8264, "step": 7810 }, { "epoch": 0.09524331834302219, "grad_norm": 2.305757383562308, "learning_rate": 4.761962796664529e-06, "loss": 0.8791, "step": 7815 }, { "epoch": 0.09530425456717, "grad_norm": 2.96441625065145, "learning_rate": 4.761642078255292e-06, "loss": 0.8246, "step": 7820 }, { "epoch": 0.0953651907913178, "grad_norm": 4.615852601260201, "learning_rate": 4.761321359846055e-06, "loss": 0.8153, "step": 7825 }, { "epoch": 0.09542612701546561, "grad_norm": 2.9176701662416384, "learning_rate": 4.761000641436819e-06, "loss": 0.884, "step": 7830 }, { "epoch": 0.09548706323961342, "grad_norm": 2.2840616703928105, "learning_rate": 4.760679923027582e-06, "loss": 0.8231, "step": 7835 }, { "epoch": 0.09554799946376123, "grad_norm": 3.127229587853727, "learning_rate": 4.760359204618346e-06, "loss": 0.8208, "step": 7840 }, { "epoch": 0.09560893568790904, "grad_norm": 2.631313457799733, "learning_rate": 4.760038486209109e-06, "loss": 0.8609, "step": 7845 }, { "epoch": 0.09566987191205684, "grad_norm": 3.11136413438192, "learning_rate": 4.759717767799872e-06, "loss": 0.8482, "step": 7850 }, { "epoch": 0.09573080813620465, "grad_norm": 2.4586748914520826, "learning_rate": 4.759397049390636e-06, "loss": 0.8319, "step": 7855 }, { "epoch": 0.09579174436035245, "grad_norm": 2.74612821626372, "learning_rate": 4.759076330981399e-06, "loss": 0.8633, "step": 7860 }, { "epoch": 0.09585268058450026, "grad_norm": 2.8866213512462484, "learning_rate": 4.758755612572162e-06, "loss": 0.8574, "step": 7865 }, { "epoch": 0.09591361680864807, "grad_norm": 2.259184501386377, "learning_rate": 4.758434894162926e-06, "loss": 0.8234, "step": 7870 }, { "epoch": 0.09597455303279588, "grad_norm": 2.1980722214326636, "learning_rate": 4.758114175753689e-06, "loss": 0.8647, "step": 7875 }, { "epoch": 0.09603548925694368, "grad_norm": 2.779427216563622, "learning_rate": 4.757793457344452e-06, "loss": 0.8613, "step": 7880 }, { "epoch": 0.09609642548109149, "grad_norm": 2.9061205747307306, "learning_rate": 4.757472738935216e-06, "loss": 0.9281, "step": 7885 }, { "epoch": 0.0961573617052393, "grad_norm": 3.8043983472059493, "learning_rate": 4.757152020525979e-06, "loss": 0.8061, "step": 7890 }, { "epoch": 0.0962182979293871, "grad_norm": 2.9243991048522675, "learning_rate": 4.756831302116742e-06, "loss": 0.841, "step": 7895 }, { "epoch": 0.09627923415353491, "grad_norm": 2.4452745352309306, "learning_rate": 4.7565105837075055e-06, "loss": 0.8347, "step": 7900 }, { "epoch": 0.09634017037768272, "grad_norm": 2.2104894247722133, "learning_rate": 4.7561898652982685e-06, "loss": 0.8185, "step": 7905 }, { "epoch": 0.09640110660183053, "grad_norm": 2.7046272628258308, "learning_rate": 4.7558691468890315e-06, "loss": 0.8792, "step": 7910 }, { "epoch": 0.09646204282597833, "grad_norm": 2.917336402598289, "learning_rate": 4.755548428479795e-06, "loss": 0.8515, "step": 7915 }, { "epoch": 0.09652297905012613, "grad_norm": 2.5032880623435827, "learning_rate": 4.7552277100705584e-06, "loss": 0.767, "step": 7920 }, { "epoch": 0.09658391527427394, "grad_norm": 3.9377786499344305, "learning_rate": 4.7549069916613214e-06, "loss": 0.8039, "step": 7925 }, { "epoch": 0.09664485149842175, "grad_norm": 2.782901064396551, "learning_rate": 4.754586273252085e-06, "loss": 0.8195, "step": 7930 }, { "epoch": 0.09670578772256956, "grad_norm": 2.41606519865962, "learning_rate": 4.754265554842848e-06, "loss": 0.8366, "step": 7935 }, { "epoch": 0.09676672394671737, "grad_norm": 2.736000508944498, "learning_rate": 4.753944836433611e-06, "loss": 0.8956, "step": 7940 }, { "epoch": 0.09682766017086518, "grad_norm": 2.946014452666215, "learning_rate": 4.753624118024374e-06, "loss": 0.8607, "step": 7945 }, { "epoch": 0.09688859639501297, "grad_norm": 2.602726951721681, "learning_rate": 4.753303399615138e-06, "loss": 0.8836, "step": 7950 }, { "epoch": 0.09694953261916078, "grad_norm": 2.4604174931415574, "learning_rate": 4.752982681205901e-06, "loss": 0.8108, "step": 7955 }, { "epoch": 0.09701046884330859, "grad_norm": 4.185769234290961, "learning_rate": 4.752661962796665e-06, "loss": 0.9009, "step": 7960 }, { "epoch": 0.0970714050674564, "grad_norm": 2.836965979113239, "learning_rate": 4.752341244387428e-06, "loss": 0.797, "step": 7965 }, { "epoch": 0.09713234129160421, "grad_norm": 2.869520093952833, "learning_rate": 4.752020525978191e-06, "loss": 0.8431, "step": 7970 }, { "epoch": 0.09719327751575202, "grad_norm": 3.7215269243816573, "learning_rate": 4.751699807568955e-06, "loss": 0.9435, "step": 7975 }, { "epoch": 0.09725421373989981, "grad_norm": 2.404745927313566, "learning_rate": 4.751379089159718e-06, "loss": 0.8359, "step": 7980 }, { "epoch": 0.09731514996404762, "grad_norm": 2.771420321269201, "learning_rate": 4.751058370750482e-06, "loss": 0.8382, "step": 7985 }, { "epoch": 0.09737608618819543, "grad_norm": 3.193562665794275, "learning_rate": 4.750737652341245e-06, "loss": 0.8025, "step": 7990 }, { "epoch": 0.09743702241234324, "grad_norm": 2.7344615085858814, "learning_rate": 4.750416933932008e-06, "loss": 0.8631, "step": 7995 }, { "epoch": 0.09749795863649105, "grad_norm": 2.6802258951103415, "learning_rate": 4.750096215522772e-06, "loss": 0.7956, "step": 8000 }, { "epoch": 0.09755889486063886, "grad_norm": 2.7191591845757603, "learning_rate": 4.749775497113535e-06, "loss": 0.841, "step": 8005 }, { "epoch": 0.09761983108478667, "grad_norm": 2.6095509913989097, "learning_rate": 4.749454778704299e-06, "loss": 0.8424, "step": 8010 }, { "epoch": 0.09768076730893446, "grad_norm": 3.1111476632016593, "learning_rate": 4.749134060295062e-06, "loss": 0.8639, "step": 8015 }, { "epoch": 0.09774170353308227, "grad_norm": 2.1264972799535107, "learning_rate": 4.748813341885825e-06, "loss": 0.8615, "step": 8020 }, { "epoch": 0.09780263975723008, "grad_norm": 5.960054438006607, "learning_rate": 4.748492623476588e-06, "loss": 0.8678, "step": 8025 }, { "epoch": 0.09786357598137789, "grad_norm": 2.381773996898253, "learning_rate": 4.7481719050673516e-06, "loss": 0.8624, "step": 8030 }, { "epoch": 0.0979245122055257, "grad_norm": 3.243007611318737, "learning_rate": 4.747851186658115e-06, "loss": 0.8316, "step": 8035 }, { "epoch": 0.09798544842967351, "grad_norm": 2.5565290841870163, "learning_rate": 4.747530468248878e-06, "loss": 0.8291, "step": 8040 }, { "epoch": 0.09804638465382132, "grad_norm": 2.391526814066558, "learning_rate": 4.7472097498396415e-06, "loss": 0.8854, "step": 8045 }, { "epoch": 0.09810732087796911, "grad_norm": 2.655484931168656, "learning_rate": 4.7468890314304045e-06, "loss": 0.8232, "step": 8050 }, { "epoch": 0.09816825710211692, "grad_norm": 2.647322740004338, "learning_rate": 4.7465683130211675e-06, "loss": 0.9528, "step": 8055 }, { "epoch": 0.09822919332626473, "grad_norm": 2.4709906462833735, "learning_rate": 4.746247594611931e-06, "loss": 0.8389, "step": 8060 }, { "epoch": 0.09829012955041254, "grad_norm": 2.8078947334446474, "learning_rate": 4.745926876202694e-06, "loss": 0.8095, "step": 8065 }, { "epoch": 0.09835106577456035, "grad_norm": 4.434758213814205, "learning_rate": 4.745606157793457e-06, "loss": 0.7523, "step": 8070 }, { "epoch": 0.09841200199870816, "grad_norm": 2.440179752977626, "learning_rate": 4.745285439384221e-06, "loss": 0.8638, "step": 8075 }, { "epoch": 0.09847293822285597, "grad_norm": 2.4882808082559893, "learning_rate": 4.744964720974984e-06, "loss": 0.8083, "step": 8080 }, { "epoch": 0.09853387444700376, "grad_norm": 2.509145048018523, "learning_rate": 4.744644002565747e-06, "loss": 0.8986, "step": 8085 }, { "epoch": 0.09859481067115157, "grad_norm": 2.878969151637076, "learning_rate": 4.744323284156511e-06, "loss": 0.8421, "step": 8090 }, { "epoch": 0.09865574689529938, "grad_norm": 2.269726999912501, "learning_rate": 4.744002565747274e-06, "loss": 0.8642, "step": 8095 }, { "epoch": 0.09871668311944719, "grad_norm": 6.128907051034938, "learning_rate": 4.743681847338037e-06, "loss": 0.8724, "step": 8100 }, { "epoch": 0.098777619343595, "grad_norm": 2.477621023168912, "learning_rate": 4.7433611289288e-06, "loss": 0.8435, "step": 8105 }, { "epoch": 0.0988385555677428, "grad_norm": 2.8303473897208566, "learning_rate": 4.743040410519564e-06, "loss": 0.8155, "step": 8110 }, { "epoch": 0.0988994917918906, "grad_norm": 2.383355337215699, "learning_rate": 4.742719692110327e-06, "loss": 0.8171, "step": 8115 }, { "epoch": 0.09896042801603841, "grad_norm": 2.489130005941881, "learning_rate": 4.742398973701091e-06, "loss": 0.7948, "step": 8120 }, { "epoch": 0.09902136424018622, "grad_norm": 2.8763857938331263, "learning_rate": 4.742078255291854e-06, "loss": 0.8722, "step": 8125 }, { "epoch": 0.09908230046433403, "grad_norm": 2.7296209956356074, "learning_rate": 4.741757536882618e-06, "loss": 0.8237, "step": 8130 }, { "epoch": 0.09914323668848184, "grad_norm": 2.2768463796846543, "learning_rate": 4.741436818473381e-06, "loss": 0.7658, "step": 8135 }, { "epoch": 0.09920417291262965, "grad_norm": 2.761634157290273, "learning_rate": 4.741116100064144e-06, "loss": 0.8559, "step": 8140 }, { "epoch": 0.09926510913677745, "grad_norm": 2.408409327291253, "learning_rate": 4.740795381654908e-06, "loss": 0.8284, "step": 8145 }, { "epoch": 0.09932604536092525, "grad_norm": 3.1774226491914646, "learning_rate": 4.740474663245671e-06, "loss": 0.9301, "step": 8150 }, { "epoch": 0.09938698158507306, "grad_norm": 4.303921694598057, "learning_rate": 4.740153944836435e-06, "loss": 0.7919, "step": 8155 }, { "epoch": 0.09944791780922087, "grad_norm": 2.997778669617834, "learning_rate": 4.739833226427198e-06, "loss": 0.9288, "step": 8160 }, { "epoch": 0.09950885403336868, "grad_norm": 2.088728642544774, "learning_rate": 4.739512508017961e-06, "loss": 0.8099, "step": 8165 }, { "epoch": 0.09956979025751649, "grad_norm": 2.588697394525019, "learning_rate": 4.7391917896087245e-06, "loss": 0.9279, "step": 8170 }, { "epoch": 0.0996307264816643, "grad_norm": 3.847152501773407, "learning_rate": 4.7388710711994875e-06, "loss": 0.8886, "step": 8175 }, { "epoch": 0.0996916627058121, "grad_norm": 2.799206752368967, "learning_rate": 4.7385503527902505e-06, "loss": 0.8795, "step": 8180 }, { "epoch": 0.0997525989299599, "grad_norm": 2.786911602625894, "learning_rate": 4.738229634381014e-06, "loss": 0.8399, "step": 8185 }, { "epoch": 0.09981353515410771, "grad_norm": 2.651510801186984, "learning_rate": 4.7379089159717774e-06, "loss": 0.8299, "step": 8190 }, { "epoch": 0.09987447137825552, "grad_norm": 2.520180229871374, "learning_rate": 4.7375881975625404e-06, "loss": 0.7836, "step": 8195 }, { "epoch": 0.09993540760240333, "grad_norm": 2.2947272772173033, "learning_rate": 4.7372674791533035e-06, "loss": 0.8737, "step": 8200 }, { "epoch": 0.09999634382655113, "grad_norm": 2.1774190667435223, "learning_rate": 4.736946760744067e-06, "loss": 0.8194, "step": 8205 }, { "epoch": 0.10005728005069894, "grad_norm": 3.0376668367270048, "learning_rate": 4.73662604233483e-06, "loss": 0.7952, "step": 8210 }, { "epoch": 0.10011821627484674, "grad_norm": 2.4610150763293976, "learning_rate": 4.736305323925593e-06, "loss": 0.8571, "step": 8215 }, { "epoch": 0.10017915249899455, "grad_norm": 2.7260836765323315, "learning_rate": 4.735984605516357e-06, "loss": 0.9279, "step": 8220 }, { "epoch": 0.10024008872314236, "grad_norm": 3.7848751897367943, "learning_rate": 4.73566388710712e-06, "loss": 0.8961, "step": 8225 }, { "epoch": 0.10030102494729017, "grad_norm": 2.434655471332336, "learning_rate": 4.735343168697883e-06, "loss": 0.8649, "step": 8230 }, { "epoch": 0.10036196117143797, "grad_norm": 2.429911003635556, "learning_rate": 4.735022450288647e-06, "loss": 0.7206, "step": 8235 }, { "epoch": 0.10042289739558578, "grad_norm": 2.7394256926891773, "learning_rate": 4.73470173187941e-06, "loss": 0.8595, "step": 8240 }, { "epoch": 0.10048383361973359, "grad_norm": 2.3274972379195544, "learning_rate": 4.734381013470173e-06, "loss": 0.7889, "step": 8245 }, { "epoch": 0.10054476984388139, "grad_norm": 2.6245998213733017, "learning_rate": 4.734060295060937e-06, "loss": 0.874, "step": 8250 }, { "epoch": 0.1006057060680292, "grad_norm": 2.5043380373687376, "learning_rate": 4.7337395766517e-06, "loss": 0.8156, "step": 8255 }, { "epoch": 0.100666642292177, "grad_norm": 2.497284512315024, "learning_rate": 4.733418858242463e-06, "loss": 0.8257, "step": 8260 }, { "epoch": 0.10072757851632481, "grad_norm": 2.7600974533860057, "learning_rate": 4.733098139833227e-06, "loss": 0.8104, "step": 8265 }, { "epoch": 0.10078851474047262, "grad_norm": 2.774715560493593, "learning_rate": 4.73277742142399e-06, "loss": 0.8177, "step": 8270 }, { "epoch": 0.10084945096462043, "grad_norm": 3.297322246382584, "learning_rate": 4.732456703014753e-06, "loss": 0.8172, "step": 8275 }, { "epoch": 0.10091038718876824, "grad_norm": 2.2917734019404743, "learning_rate": 4.732135984605517e-06, "loss": 0.8259, "step": 8280 }, { "epoch": 0.10097132341291604, "grad_norm": 2.3617140353919126, "learning_rate": 4.73181526619628e-06, "loss": 0.8005, "step": 8285 }, { "epoch": 0.10103225963706385, "grad_norm": 3.070263945307317, "learning_rate": 4.731494547787044e-06, "loss": 0.9456, "step": 8290 }, { "epoch": 0.10109319586121165, "grad_norm": 2.6582417239259604, "learning_rate": 4.731173829377807e-06, "loss": 0.8509, "step": 8295 }, { "epoch": 0.10115413208535946, "grad_norm": 3.098889097233841, "learning_rate": 4.73085311096857e-06, "loss": 0.8048, "step": 8300 }, { "epoch": 0.10121506830950727, "grad_norm": 2.482076472362873, "learning_rate": 4.730532392559334e-06, "loss": 0.8133, "step": 8305 }, { "epoch": 0.10127600453365508, "grad_norm": 2.47900932965735, "learning_rate": 4.730211674150097e-06, "loss": 0.8175, "step": 8310 }, { "epoch": 0.10133694075780289, "grad_norm": 3.2620038730048933, "learning_rate": 4.7298909557408605e-06, "loss": 0.78, "step": 8315 }, { "epoch": 0.10139787698195069, "grad_norm": 2.154739536413719, "learning_rate": 4.7295702373316235e-06, "loss": 0.8072, "step": 8320 }, { "epoch": 0.1014588132060985, "grad_norm": 3.338083342813916, "learning_rate": 4.7292495189223865e-06, "loss": 0.7639, "step": 8325 }, { "epoch": 0.1015197494302463, "grad_norm": 2.8515597679912665, "learning_rate": 4.72892880051315e-06, "loss": 0.9009, "step": 8330 }, { "epoch": 0.10158068565439411, "grad_norm": 2.479458722677545, "learning_rate": 4.728608082103913e-06, "loss": 0.8004, "step": 8335 }, { "epoch": 0.10164162187854192, "grad_norm": 2.727833387599494, "learning_rate": 4.728287363694676e-06, "loss": 0.7734, "step": 8340 }, { "epoch": 0.10170255810268973, "grad_norm": 2.8054600535558403, "learning_rate": 4.72796664528544e-06, "loss": 0.7761, "step": 8345 }, { "epoch": 0.10176349432683753, "grad_norm": 2.581665354118104, "learning_rate": 4.727645926876203e-06, "loss": 0.7755, "step": 8350 }, { "epoch": 0.10182443055098533, "grad_norm": 2.739962361864636, "learning_rate": 4.727325208466966e-06, "loss": 0.8359, "step": 8355 }, { "epoch": 0.10188536677513314, "grad_norm": 2.693655107911649, "learning_rate": 4.727004490057729e-06, "loss": 0.8196, "step": 8360 }, { "epoch": 0.10194630299928095, "grad_norm": 2.6348814169630583, "learning_rate": 4.726683771648493e-06, "loss": 0.8485, "step": 8365 }, { "epoch": 0.10200723922342876, "grad_norm": 2.329759544058956, "learning_rate": 4.726363053239256e-06, "loss": 0.7648, "step": 8370 }, { "epoch": 0.10206817544757657, "grad_norm": 2.6290669017614654, "learning_rate": 4.726042334830019e-06, "loss": 0.8358, "step": 8375 }, { "epoch": 0.10212911167172438, "grad_norm": 2.7741636148566173, "learning_rate": 4.725721616420783e-06, "loss": 0.8095, "step": 8380 }, { "epoch": 0.10219004789587217, "grad_norm": 2.5003277085680784, "learning_rate": 4.725400898011546e-06, "loss": 0.9247, "step": 8385 }, { "epoch": 0.10225098412001998, "grad_norm": 2.7912307826775895, "learning_rate": 4.725080179602309e-06, "loss": 0.8285, "step": 8390 }, { "epoch": 0.10231192034416779, "grad_norm": 2.4649461268970914, "learning_rate": 4.724759461193073e-06, "loss": 0.8359, "step": 8395 }, { "epoch": 0.1023728565683156, "grad_norm": 2.2247634225071953, "learning_rate": 4.724438742783836e-06, "loss": 0.835, "step": 8400 }, { "epoch": 0.10243379279246341, "grad_norm": 3.204006489276389, "learning_rate": 4.724118024374599e-06, "loss": 0.9342, "step": 8405 }, { "epoch": 0.10249472901661122, "grad_norm": 3.500131743245725, "learning_rate": 4.723797305965363e-06, "loss": 0.8509, "step": 8410 }, { "epoch": 0.10255566524075903, "grad_norm": 2.2701134553460904, "learning_rate": 4.723476587556126e-06, "loss": 0.818, "step": 8415 }, { "epoch": 0.10261660146490682, "grad_norm": 2.7491658578159623, "learning_rate": 4.723155869146889e-06, "loss": 0.8573, "step": 8420 }, { "epoch": 0.10267753768905463, "grad_norm": 2.637241473569354, "learning_rate": 4.722835150737653e-06, "loss": 0.8247, "step": 8425 }, { "epoch": 0.10273847391320244, "grad_norm": 2.9354301703939503, "learning_rate": 4.722514432328416e-06, "loss": 0.8189, "step": 8430 }, { "epoch": 0.10279941013735025, "grad_norm": 2.8198637144642156, "learning_rate": 4.72219371391918e-06, "loss": 0.8261, "step": 8435 }, { "epoch": 0.10286034636149806, "grad_norm": 2.22430139539637, "learning_rate": 4.721872995509943e-06, "loss": 0.8724, "step": 8440 }, { "epoch": 0.10292128258564587, "grad_norm": 2.449853230987862, "learning_rate": 4.721552277100706e-06, "loss": 0.7652, "step": 8445 }, { "epoch": 0.10298221880979366, "grad_norm": 2.328939315360364, "learning_rate": 4.7212315586914695e-06, "loss": 0.8736, "step": 8450 }, { "epoch": 0.10304315503394147, "grad_norm": 2.8012454345494593, "learning_rate": 4.7209108402822326e-06, "loss": 0.8591, "step": 8455 }, { "epoch": 0.10310409125808928, "grad_norm": 2.0473222153455723, "learning_rate": 4.7205901218729964e-06, "loss": 0.8034, "step": 8460 }, { "epoch": 0.10316502748223709, "grad_norm": 2.7601564215215237, "learning_rate": 4.7202694034637594e-06, "loss": 0.8398, "step": 8465 }, { "epoch": 0.1032259637063849, "grad_norm": 2.4483017224212804, "learning_rate": 4.7199486850545225e-06, "loss": 0.834, "step": 8470 }, { "epoch": 0.10328689993053271, "grad_norm": 2.6564762927651886, "learning_rate": 4.719627966645286e-06, "loss": 0.8387, "step": 8475 }, { "epoch": 0.10334783615468052, "grad_norm": 2.276651843212709, "learning_rate": 4.719307248236049e-06, "loss": 0.7616, "step": 8480 }, { "epoch": 0.10340877237882831, "grad_norm": 3.0390836918115935, "learning_rate": 4.718986529826812e-06, "loss": 0.8916, "step": 8485 }, { "epoch": 0.10346970860297612, "grad_norm": 3.423111401407227, "learning_rate": 4.718665811417576e-06, "loss": 0.8573, "step": 8490 }, { "epoch": 0.10353064482712393, "grad_norm": 2.921892034000696, "learning_rate": 4.718345093008339e-06, "loss": 0.8689, "step": 8495 }, { "epoch": 0.10359158105127174, "grad_norm": 2.6907391837286445, "learning_rate": 4.718024374599102e-06, "loss": 0.8346, "step": 8500 }, { "epoch": 0.10365251727541955, "grad_norm": 2.803461591103307, "learning_rate": 4.717703656189866e-06, "loss": 0.9149, "step": 8505 }, { "epoch": 0.10371345349956736, "grad_norm": 4.091028006282986, "learning_rate": 4.717382937780629e-06, "loss": 0.8295, "step": 8510 }, { "epoch": 0.10377438972371517, "grad_norm": 2.632580601761895, "learning_rate": 4.717062219371392e-06, "loss": 0.8168, "step": 8515 }, { "epoch": 0.10383532594786296, "grad_norm": 2.4893987427439037, "learning_rate": 4.716741500962156e-06, "loss": 0.8817, "step": 8520 }, { "epoch": 0.10389626217201077, "grad_norm": 3.0182329421845973, "learning_rate": 4.716420782552919e-06, "loss": 0.8645, "step": 8525 }, { "epoch": 0.10395719839615858, "grad_norm": 3.1134715482364057, "learning_rate": 4.716100064143682e-06, "loss": 0.8659, "step": 8530 }, { "epoch": 0.10401813462030639, "grad_norm": 2.4666389430531876, "learning_rate": 4.715779345734445e-06, "loss": 0.8836, "step": 8535 }, { "epoch": 0.1040790708444542, "grad_norm": 3.461884364206578, "learning_rate": 4.715458627325209e-06, "loss": 0.8534, "step": 8540 }, { "epoch": 0.104140007068602, "grad_norm": 2.7477864419053177, "learning_rate": 4.715137908915972e-06, "loss": 0.8437, "step": 8545 }, { "epoch": 0.10420094329274981, "grad_norm": 2.467849199917634, "learning_rate": 4.714817190506735e-06, "loss": 0.8872, "step": 8550 }, { "epoch": 0.10426187951689761, "grad_norm": 5.0075405145767915, "learning_rate": 4.714496472097499e-06, "loss": 0.8253, "step": 8555 }, { "epoch": 0.10432281574104542, "grad_norm": 3.88697916686542, "learning_rate": 4.714175753688262e-06, "loss": 0.8343, "step": 8560 }, { "epoch": 0.10438375196519323, "grad_norm": 3.2160106495355842, "learning_rate": 4.713855035279025e-06, "loss": 0.8437, "step": 8565 }, { "epoch": 0.10444468818934104, "grad_norm": 2.2814205489847685, "learning_rate": 4.713534316869789e-06, "loss": 0.8919, "step": 8570 }, { "epoch": 0.10450562441348885, "grad_norm": 2.411425890294762, "learning_rate": 4.713213598460552e-06, "loss": 0.8998, "step": 8575 }, { "epoch": 0.10456656063763665, "grad_norm": 2.5713023760635014, "learning_rate": 4.712892880051315e-06, "loss": 0.7975, "step": 8580 }, { "epoch": 0.10462749686178445, "grad_norm": 2.5841337020600172, "learning_rate": 4.712572161642079e-06, "loss": 0.8336, "step": 8585 }, { "epoch": 0.10468843308593226, "grad_norm": 3.862977458913468, "learning_rate": 4.712251443232842e-06, "loss": 0.7937, "step": 8590 }, { "epoch": 0.10474936931008007, "grad_norm": 2.730394807085496, "learning_rate": 4.7119307248236055e-06, "loss": 0.8761, "step": 8595 }, { "epoch": 0.10481030553422788, "grad_norm": 2.696152389052144, "learning_rate": 4.7116100064143685e-06, "loss": 0.9106, "step": 8600 }, { "epoch": 0.10487124175837569, "grad_norm": 3.1949670815787092, "learning_rate": 4.711289288005132e-06, "loss": 0.8241, "step": 8605 }, { "epoch": 0.1049321779825235, "grad_norm": 2.440655426162255, "learning_rate": 4.710968569595895e-06, "loss": 0.7914, "step": 8610 }, { "epoch": 0.1049931142066713, "grad_norm": 2.792646057057905, "learning_rate": 4.710647851186658e-06, "loss": 0.8538, "step": 8615 }, { "epoch": 0.1050540504308191, "grad_norm": 2.455843017717229, "learning_rate": 4.710327132777422e-06, "loss": 0.7956, "step": 8620 }, { "epoch": 0.10511498665496691, "grad_norm": 2.9581195516736707, "learning_rate": 4.710006414368185e-06, "loss": 0.7661, "step": 8625 }, { "epoch": 0.10517592287911472, "grad_norm": 2.6786259792057514, "learning_rate": 4.709685695958948e-06, "loss": 0.832, "step": 8630 }, { "epoch": 0.10523685910326253, "grad_norm": 2.4212649318853368, "learning_rate": 4.709364977549712e-06, "loss": 0.9041, "step": 8635 }, { "epoch": 0.10529779532741033, "grad_norm": 2.9925133994608752, "learning_rate": 4.709044259140475e-06, "loss": 0.8254, "step": 8640 }, { "epoch": 0.10535873155155814, "grad_norm": 2.7705966934431916, "learning_rate": 4.708723540731238e-06, "loss": 0.9292, "step": 8645 }, { "epoch": 0.10541966777570595, "grad_norm": 2.3217612502167357, "learning_rate": 4.708402822322002e-06, "loss": 0.9088, "step": 8650 }, { "epoch": 0.10548060399985375, "grad_norm": 2.4851982541992688, "learning_rate": 4.708082103912765e-06, "loss": 0.8082, "step": 8655 }, { "epoch": 0.10554154022400156, "grad_norm": 2.3411643213053983, "learning_rate": 4.707761385503528e-06, "loss": 0.8405, "step": 8660 }, { "epoch": 0.10560247644814937, "grad_norm": 3.2330339729557074, "learning_rate": 4.707440667094292e-06, "loss": 0.8636, "step": 8665 }, { "epoch": 0.10566341267229717, "grad_norm": 2.7171798686603106, "learning_rate": 4.707119948685055e-06, "loss": 0.8932, "step": 8670 }, { "epoch": 0.10572434889644498, "grad_norm": 3.3040681018293157, "learning_rate": 4.706799230275818e-06, "loss": 0.8225, "step": 8675 }, { "epoch": 0.10578528512059279, "grad_norm": 3.8871413335631546, "learning_rate": 4.706478511866582e-06, "loss": 0.8115, "step": 8680 }, { "epoch": 0.1058462213447406, "grad_norm": 2.5798612123017612, "learning_rate": 4.706157793457345e-06, "loss": 0.876, "step": 8685 }, { "epoch": 0.1059071575688884, "grad_norm": 2.3523579414549003, "learning_rate": 4.705837075048108e-06, "loss": 0.8166, "step": 8690 }, { "epoch": 0.1059680937930362, "grad_norm": 2.8325307075906685, "learning_rate": 4.705516356638871e-06, "loss": 0.8213, "step": 8695 }, { "epoch": 0.10602903001718401, "grad_norm": 2.2480116286669523, "learning_rate": 4.705195638229635e-06, "loss": 0.8209, "step": 8700 }, { "epoch": 0.10608996624133182, "grad_norm": 2.154091090775952, "learning_rate": 4.704874919820398e-06, "loss": 0.8099, "step": 8705 }, { "epoch": 0.10615090246547963, "grad_norm": 2.480092036710685, "learning_rate": 4.704554201411161e-06, "loss": 0.7706, "step": 8710 }, { "epoch": 0.10621183868962744, "grad_norm": 3.085082013157658, "learning_rate": 4.704233483001925e-06, "loss": 0.8476, "step": 8715 }, { "epoch": 0.10627277491377524, "grad_norm": 3.265449961834223, "learning_rate": 4.703912764592688e-06, "loss": 0.8123, "step": 8720 }, { "epoch": 0.10633371113792305, "grad_norm": 2.3008744781071355, "learning_rate": 4.703592046183451e-06, "loss": 0.7776, "step": 8725 }, { "epoch": 0.10639464736207085, "grad_norm": 2.6461328639273574, "learning_rate": 4.703271327774215e-06, "loss": 0.8418, "step": 8730 }, { "epoch": 0.10645558358621866, "grad_norm": 2.568914255776438, "learning_rate": 4.702950609364978e-06, "loss": 0.9001, "step": 8735 }, { "epoch": 0.10651651981036647, "grad_norm": 2.8009367802376812, "learning_rate": 4.7026298909557415e-06, "loss": 0.7603, "step": 8740 }, { "epoch": 0.10657745603451428, "grad_norm": 2.6586515840658245, "learning_rate": 4.7023091725465045e-06, "loss": 0.8203, "step": 8745 }, { "epoch": 0.10663839225866209, "grad_norm": 2.728283057910764, "learning_rate": 4.7019884541372675e-06, "loss": 0.8661, "step": 8750 }, { "epoch": 0.10669932848280989, "grad_norm": 3.1304487023123735, "learning_rate": 4.701667735728031e-06, "loss": 0.8994, "step": 8755 }, { "epoch": 0.1067602647069577, "grad_norm": 2.4979402781196356, "learning_rate": 4.701347017318794e-06, "loss": 0.872, "step": 8760 }, { "epoch": 0.1068212009311055, "grad_norm": 3.1295682811858385, "learning_rate": 4.701026298909558e-06, "loss": 0.885, "step": 8765 }, { "epoch": 0.10688213715525331, "grad_norm": 4.018192589263609, "learning_rate": 4.700705580500321e-06, "loss": 0.907, "step": 8770 }, { "epoch": 0.10694307337940112, "grad_norm": 2.2117785290072147, "learning_rate": 4.700384862091084e-06, "loss": 0.8406, "step": 8775 }, { "epoch": 0.10700400960354893, "grad_norm": 2.5241546600840508, "learning_rate": 4.700064143681848e-06, "loss": 0.8382, "step": 8780 }, { "epoch": 0.10706494582769674, "grad_norm": 2.1242194931041234, "learning_rate": 4.699743425272611e-06, "loss": 0.8341, "step": 8785 }, { "epoch": 0.10712588205184453, "grad_norm": 2.797751241684311, "learning_rate": 4.699422706863374e-06, "loss": 0.8341, "step": 8790 }, { "epoch": 0.10718681827599234, "grad_norm": 2.604519670667714, "learning_rate": 4.699101988454138e-06, "loss": 0.8078, "step": 8795 }, { "epoch": 0.10724775450014015, "grad_norm": 2.655755782674357, "learning_rate": 4.698781270044901e-06, "loss": 0.821, "step": 8800 }, { "epoch": 0.10730869072428796, "grad_norm": 2.645441294774117, "learning_rate": 4.698460551635664e-06, "loss": 0.8201, "step": 8805 }, { "epoch": 0.10736962694843577, "grad_norm": 2.7022672144868003, "learning_rate": 4.698139833226428e-06, "loss": 0.8399, "step": 8810 }, { "epoch": 0.10743056317258358, "grad_norm": 3.096309234695005, "learning_rate": 4.697819114817191e-06, "loss": 0.8004, "step": 8815 }, { "epoch": 0.10749149939673137, "grad_norm": 2.891064547999768, "learning_rate": 4.697498396407954e-06, "loss": 0.8286, "step": 8820 }, { "epoch": 0.10755243562087918, "grad_norm": 2.692078631863523, "learning_rate": 4.697177677998718e-06, "loss": 0.9156, "step": 8825 }, { "epoch": 0.10761337184502699, "grad_norm": 2.5128594586647273, "learning_rate": 4.696856959589481e-06, "loss": 0.8011, "step": 8830 }, { "epoch": 0.1076743080691748, "grad_norm": 2.402176267001279, "learning_rate": 4.696536241180244e-06, "loss": 0.832, "step": 8835 }, { "epoch": 0.10773524429332261, "grad_norm": 3.798936245365384, "learning_rate": 4.696215522771008e-06, "loss": 0.8743, "step": 8840 }, { "epoch": 0.10779618051747042, "grad_norm": 2.851566677722446, "learning_rate": 4.695894804361771e-06, "loss": 0.8375, "step": 8845 }, { "epoch": 0.10785711674161823, "grad_norm": 2.596372345741828, "learning_rate": 4.695574085952534e-06, "loss": 0.9789, "step": 8850 }, { "epoch": 0.10791805296576602, "grad_norm": 2.416270460699255, "learning_rate": 4.695253367543298e-06, "loss": 0.8144, "step": 8855 }, { "epoch": 0.10797898918991383, "grad_norm": 3.3953831813660615, "learning_rate": 4.694932649134061e-06, "loss": 0.8787, "step": 8860 }, { "epoch": 0.10803992541406164, "grad_norm": 2.6336245073950724, "learning_rate": 4.694611930724824e-06, "loss": 0.9233, "step": 8865 }, { "epoch": 0.10810086163820945, "grad_norm": 2.1138094193634367, "learning_rate": 4.694291212315587e-06, "loss": 0.7928, "step": 8870 }, { "epoch": 0.10816179786235726, "grad_norm": 2.522325475580007, "learning_rate": 4.6939704939063505e-06, "loss": 0.8036, "step": 8875 }, { "epoch": 0.10822273408650507, "grad_norm": 2.482728495232376, "learning_rate": 4.6936497754971136e-06, "loss": 0.9203, "step": 8880 }, { "epoch": 0.10828367031065288, "grad_norm": 2.683073002831388, "learning_rate": 4.693329057087877e-06, "loss": 0.8485, "step": 8885 }, { "epoch": 0.10834460653480067, "grad_norm": 3.2310513756408596, "learning_rate": 4.6930083386786404e-06, "loss": 0.8862, "step": 8890 }, { "epoch": 0.10840554275894848, "grad_norm": 2.689233903082577, "learning_rate": 4.6926876202694035e-06, "loss": 0.9224, "step": 8895 }, { "epoch": 0.10846647898309629, "grad_norm": 2.8142128647969025, "learning_rate": 4.692366901860167e-06, "loss": 0.8575, "step": 8900 }, { "epoch": 0.1085274152072441, "grad_norm": 2.473939121964984, "learning_rate": 4.69204618345093e-06, "loss": 0.8641, "step": 8905 }, { "epoch": 0.10858835143139191, "grad_norm": 2.4761552757532135, "learning_rate": 4.691725465041694e-06, "loss": 0.905, "step": 8910 }, { "epoch": 0.10864928765553972, "grad_norm": 3.0039389236182816, "learning_rate": 4.691404746632457e-06, "loss": 0.859, "step": 8915 }, { "epoch": 0.10871022387968753, "grad_norm": 2.7138354998324594, "learning_rate": 4.69108402822322e-06, "loss": 0.8384, "step": 8920 }, { "epoch": 0.10877116010383532, "grad_norm": 3.1825538327054015, "learning_rate": 4.690763309813984e-06, "loss": 0.8586, "step": 8925 }, { "epoch": 0.10883209632798313, "grad_norm": 4.626878552738346, "learning_rate": 4.690442591404747e-06, "loss": 0.8577, "step": 8930 }, { "epoch": 0.10889303255213094, "grad_norm": 2.772320989593744, "learning_rate": 4.690121872995511e-06, "loss": 0.8444, "step": 8935 }, { "epoch": 0.10895396877627875, "grad_norm": 2.6966663018747616, "learning_rate": 4.689801154586274e-06, "loss": 0.8918, "step": 8940 }, { "epoch": 0.10901490500042656, "grad_norm": 2.443496120383345, "learning_rate": 4.689480436177037e-06, "loss": 0.9126, "step": 8945 }, { "epoch": 0.10907584122457437, "grad_norm": 2.214847867579048, "learning_rate": 4.6891597177678e-06, "loss": 0.8071, "step": 8950 }, { "epoch": 0.10913677744872216, "grad_norm": 2.560758840304402, "learning_rate": 4.688838999358564e-06, "loss": 0.8825, "step": 8955 }, { "epoch": 0.10919771367286997, "grad_norm": 3.3405848855462503, "learning_rate": 4.688518280949327e-06, "loss": 0.8675, "step": 8960 }, { "epoch": 0.10925864989701778, "grad_norm": 2.870630062031381, "learning_rate": 4.68819756254009e-06, "loss": 0.7969, "step": 8965 }, { "epoch": 0.10931958612116559, "grad_norm": 4.118771956973885, "learning_rate": 4.687876844130854e-06, "loss": 0.8077, "step": 8970 }, { "epoch": 0.1093805223453134, "grad_norm": 2.3523456108585528, "learning_rate": 4.687556125721617e-06, "loss": 0.7896, "step": 8975 }, { "epoch": 0.1094414585694612, "grad_norm": 2.172215248775857, "learning_rate": 4.68723540731238e-06, "loss": 0.7753, "step": 8980 }, { "epoch": 0.10950239479360901, "grad_norm": 3.098486197984045, "learning_rate": 4.686914688903144e-06, "loss": 0.8173, "step": 8985 }, { "epoch": 0.10956333101775681, "grad_norm": 2.708262715992569, "learning_rate": 4.686593970493907e-06, "loss": 0.8275, "step": 8990 }, { "epoch": 0.10962426724190462, "grad_norm": 2.6948383228691197, "learning_rate": 4.68627325208467e-06, "loss": 0.8573, "step": 8995 }, { "epoch": 0.10968520346605243, "grad_norm": 2.4300547166841113, "learning_rate": 4.685952533675434e-06, "loss": 0.8355, "step": 9000 }, { "epoch": 0.10974613969020024, "grad_norm": 2.67060680780725, "learning_rate": 4.685631815266197e-06, "loss": 0.8356, "step": 9005 }, { "epoch": 0.10980707591434805, "grad_norm": 3.2217996107443034, "learning_rate": 4.68531109685696e-06, "loss": 0.8698, "step": 9010 }, { "epoch": 0.10986801213849585, "grad_norm": 2.993068685711324, "learning_rate": 4.6849903784477235e-06, "loss": 0.8775, "step": 9015 }, { "epoch": 0.10992894836264366, "grad_norm": 2.8413489202773383, "learning_rate": 4.6846696600384865e-06, "loss": 0.7826, "step": 9020 }, { "epoch": 0.10998988458679146, "grad_norm": 3.095261179480442, "learning_rate": 4.6843489416292495e-06, "loss": 0.9381, "step": 9025 }, { "epoch": 0.11005082081093927, "grad_norm": 2.370989106410282, "learning_rate": 4.6840282232200125e-06, "loss": 0.8036, "step": 9030 }, { "epoch": 0.11011175703508708, "grad_norm": 2.710168760591295, "learning_rate": 4.683707504810776e-06, "loss": 0.8275, "step": 9035 }, { "epoch": 0.11017269325923489, "grad_norm": 2.2264680381946738, "learning_rate": 4.683386786401539e-06, "loss": 0.8562, "step": 9040 }, { "epoch": 0.1102336294833827, "grad_norm": 2.6992271246023245, "learning_rate": 4.683066067992303e-06, "loss": 0.8249, "step": 9045 }, { "epoch": 0.1102945657075305, "grad_norm": 3.0383802994363998, "learning_rate": 4.682745349583066e-06, "loss": 0.891, "step": 9050 }, { "epoch": 0.1103555019316783, "grad_norm": 2.7121871274599, "learning_rate": 4.682424631173829e-06, "loss": 0.8435, "step": 9055 }, { "epoch": 0.11041643815582611, "grad_norm": 3.1311665055079554, "learning_rate": 4.682103912764593e-06, "loss": 0.9402, "step": 9060 }, { "epoch": 0.11047737437997392, "grad_norm": 2.555519812192759, "learning_rate": 4.681783194355356e-06, "loss": 0.846, "step": 9065 }, { "epoch": 0.11053831060412173, "grad_norm": 2.5024847687154645, "learning_rate": 4.68146247594612e-06, "loss": 0.8658, "step": 9070 }, { "epoch": 0.11059924682826953, "grad_norm": 2.5256104713805643, "learning_rate": 4.681141757536883e-06, "loss": 0.8586, "step": 9075 }, { "epoch": 0.11066018305241734, "grad_norm": 2.807822770502616, "learning_rate": 4.680821039127647e-06, "loss": 0.8742, "step": 9080 }, { "epoch": 0.11072111927656515, "grad_norm": 2.656091713381647, "learning_rate": 4.68050032071841e-06, "loss": 0.8202, "step": 9085 }, { "epoch": 0.11078205550071295, "grad_norm": 2.9893839264403654, "learning_rate": 4.680179602309173e-06, "loss": 0.8631, "step": 9090 }, { "epoch": 0.11084299172486076, "grad_norm": 2.7850592447627345, "learning_rate": 4.679858883899937e-06, "loss": 0.7949, "step": 9095 }, { "epoch": 0.11090392794900857, "grad_norm": 3.2874467706179837, "learning_rate": 4.6795381654907e-06, "loss": 0.9004, "step": 9100 }, { "epoch": 0.11096486417315637, "grad_norm": 2.78436472123566, "learning_rate": 4.679217447081463e-06, "loss": 0.8616, "step": 9105 }, { "epoch": 0.11102580039730418, "grad_norm": 2.5239252014612883, "learning_rate": 4.678896728672227e-06, "loss": 0.8169, "step": 9110 }, { "epoch": 0.11108673662145199, "grad_norm": 3.1291975979287163, "learning_rate": 4.67857601026299e-06, "loss": 0.8683, "step": 9115 }, { "epoch": 0.1111476728455998, "grad_norm": 2.29971468562094, "learning_rate": 4.678255291853753e-06, "loss": 0.8438, "step": 9120 }, { "epoch": 0.1112086090697476, "grad_norm": 2.781326888934166, "learning_rate": 4.677934573444516e-06, "loss": 0.8686, "step": 9125 }, { "epoch": 0.1112695452938954, "grad_norm": 2.762359782870235, "learning_rate": 4.67761385503528e-06, "loss": 0.7278, "step": 9130 }, { "epoch": 0.11133048151804321, "grad_norm": 2.3144135219613178, "learning_rate": 4.677293136626043e-06, "loss": 0.7757, "step": 9135 }, { "epoch": 0.11139141774219102, "grad_norm": 2.35037225346413, "learning_rate": 4.676972418216806e-06, "loss": 0.844, "step": 9140 }, { "epoch": 0.11145235396633883, "grad_norm": 2.861669618248622, "learning_rate": 4.6766516998075695e-06, "loss": 0.8041, "step": 9145 }, { "epoch": 0.11151329019048664, "grad_norm": 2.773706683674752, "learning_rate": 4.6763309813983326e-06, "loss": 0.8162, "step": 9150 }, { "epoch": 0.11157422641463445, "grad_norm": 2.368314778434702, "learning_rate": 4.6760102629890956e-06, "loss": 0.963, "step": 9155 }, { "epoch": 0.11163516263878225, "grad_norm": 3.420127195589313, "learning_rate": 4.6756895445798594e-06, "loss": 0.8498, "step": 9160 }, { "epoch": 0.11169609886293005, "grad_norm": 2.534905881939748, "learning_rate": 4.6753688261706225e-06, "loss": 0.8443, "step": 9165 }, { "epoch": 0.11175703508707786, "grad_norm": 2.1847897861829475, "learning_rate": 4.6750481077613855e-06, "loss": 0.891, "step": 9170 }, { "epoch": 0.11181797131122567, "grad_norm": 2.5968144765000623, "learning_rate": 4.674727389352149e-06, "loss": 0.8628, "step": 9175 }, { "epoch": 0.11187890753537348, "grad_norm": 2.161795035928039, "learning_rate": 4.674406670942912e-06, "loss": 0.8563, "step": 9180 }, { "epoch": 0.11193984375952129, "grad_norm": 2.435634165233099, "learning_rate": 4.674085952533675e-06, "loss": 0.7962, "step": 9185 }, { "epoch": 0.11200077998366909, "grad_norm": 3.1109002021455514, "learning_rate": 4.673765234124439e-06, "loss": 0.8515, "step": 9190 }, { "epoch": 0.1120617162078169, "grad_norm": 2.511551182980243, "learning_rate": 4.673444515715202e-06, "loss": 0.8029, "step": 9195 }, { "epoch": 0.1121226524319647, "grad_norm": 4.122075136409341, "learning_rate": 4.673123797305965e-06, "loss": 0.8784, "step": 9200 }, { "epoch": 0.11218358865611251, "grad_norm": 2.913857292191816, "learning_rate": 4.672803078896729e-06, "loss": 0.8818, "step": 9205 }, { "epoch": 0.11224452488026032, "grad_norm": 2.958401121775661, "learning_rate": 4.672482360487492e-06, "loss": 0.8437, "step": 9210 }, { "epoch": 0.11230546110440813, "grad_norm": 2.38024578191384, "learning_rate": 4.672161642078256e-06, "loss": 0.8071, "step": 9215 }, { "epoch": 0.11236639732855594, "grad_norm": 2.790718977672933, "learning_rate": 4.671840923669019e-06, "loss": 0.7966, "step": 9220 }, { "epoch": 0.11242733355270373, "grad_norm": 2.3395488150189796, "learning_rate": 4.671520205259782e-06, "loss": 0.8743, "step": 9225 }, { "epoch": 0.11248826977685154, "grad_norm": 2.622069305071239, "learning_rate": 4.671199486850546e-06, "loss": 0.8775, "step": 9230 }, { "epoch": 0.11254920600099935, "grad_norm": 3.425681596000832, "learning_rate": 4.670878768441309e-06, "loss": 0.7985, "step": 9235 }, { "epoch": 0.11261014222514716, "grad_norm": 2.6033159587085004, "learning_rate": 4.670558050032073e-06, "loss": 0.8118, "step": 9240 }, { "epoch": 0.11267107844929497, "grad_norm": 2.741563871833394, "learning_rate": 4.670237331622836e-06, "loss": 0.8982, "step": 9245 }, { "epoch": 0.11273201467344278, "grad_norm": 2.387749490150414, "learning_rate": 4.669916613213599e-06, "loss": 0.7897, "step": 9250 }, { "epoch": 0.11279295089759059, "grad_norm": 2.472910601422677, "learning_rate": 4.669595894804363e-06, "loss": 0.8126, "step": 9255 }, { "epoch": 0.11285388712173838, "grad_norm": 2.8629943123036, "learning_rate": 4.669275176395126e-06, "loss": 0.8236, "step": 9260 }, { "epoch": 0.11291482334588619, "grad_norm": 2.4458531866086237, "learning_rate": 4.668954457985889e-06, "loss": 0.83, "step": 9265 }, { "epoch": 0.112975759570034, "grad_norm": 3.0696165363290153, "learning_rate": 4.668633739576653e-06, "loss": 0.8158, "step": 9270 }, { "epoch": 0.11303669579418181, "grad_norm": 2.600408216890128, "learning_rate": 4.668313021167416e-06, "loss": 0.8168, "step": 9275 }, { "epoch": 0.11309763201832962, "grad_norm": 2.5246830844874983, "learning_rate": 4.667992302758179e-06, "loss": 0.837, "step": 9280 }, { "epoch": 0.11315856824247743, "grad_norm": 2.3700822610054173, "learning_rate": 4.667671584348942e-06, "loss": 0.7578, "step": 9285 }, { "epoch": 0.11321950446662522, "grad_norm": 2.440378689401936, "learning_rate": 4.6673508659397055e-06, "loss": 0.813, "step": 9290 }, { "epoch": 0.11328044069077303, "grad_norm": 2.712890528311778, "learning_rate": 4.6670301475304685e-06, "loss": 0.8192, "step": 9295 }, { "epoch": 0.11334137691492084, "grad_norm": 2.485651518853747, "learning_rate": 4.6667094291212315e-06, "loss": 0.88, "step": 9300 }, { "epoch": 0.11340231313906865, "grad_norm": 2.7626395920096942, "learning_rate": 4.666388710711995e-06, "loss": 0.8407, "step": 9305 }, { "epoch": 0.11346324936321646, "grad_norm": 3.4483991431381265, "learning_rate": 4.666067992302758e-06, "loss": 0.8809, "step": 9310 }, { "epoch": 0.11352418558736427, "grad_norm": 2.700986478566484, "learning_rate": 4.6657472738935214e-06, "loss": 0.8128, "step": 9315 }, { "epoch": 0.11358512181151208, "grad_norm": 2.6529459970498883, "learning_rate": 4.665426555484285e-06, "loss": 0.7918, "step": 9320 }, { "epoch": 0.11364605803565987, "grad_norm": 2.733831567362919, "learning_rate": 4.665105837075048e-06, "loss": 0.9129, "step": 9325 }, { "epoch": 0.11370699425980768, "grad_norm": 2.2511193605434037, "learning_rate": 4.664785118665811e-06, "loss": 0.8669, "step": 9330 }, { "epoch": 0.11376793048395549, "grad_norm": 2.434167821193212, "learning_rate": 4.664464400256575e-06, "loss": 0.8418, "step": 9335 }, { "epoch": 0.1138288667081033, "grad_norm": 2.787011467534136, "learning_rate": 4.664143681847338e-06, "loss": 0.7848, "step": 9340 }, { "epoch": 0.11388980293225111, "grad_norm": 2.2430551654902833, "learning_rate": 4.663822963438101e-06, "loss": 0.8381, "step": 9345 }, { "epoch": 0.11395073915639892, "grad_norm": 2.5807336948404505, "learning_rate": 4.663502245028865e-06, "loss": 0.8551, "step": 9350 }, { "epoch": 0.11401167538054673, "grad_norm": 2.4595526599835327, "learning_rate": 4.663181526619628e-06, "loss": 0.8617, "step": 9355 }, { "epoch": 0.11407261160469452, "grad_norm": 2.275387098770469, "learning_rate": 4.662860808210392e-06, "loss": 0.7736, "step": 9360 }, { "epoch": 0.11413354782884233, "grad_norm": 2.8887699773236095, "learning_rate": 4.662540089801155e-06, "loss": 0.7522, "step": 9365 }, { "epoch": 0.11419448405299014, "grad_norm": 2.8055919809756045, "learning_rate": 4.662219371391918e-06, "loss": 0.8181, "step": 9370 }, { "epoch": 0.11425542027713795, "grad_norm": 2.2795163531256515, "learning_rate": 4.661898652982682e-06, "loss": 0.7891, "step": 9375 }, { "epoch": 0.11431635650128576, "grad_norm": 2.432159390197254, "learning_rate": 4.661577934573445e-06, "loss": 0.8274, "step": 9380 }, { "epoch": 0.11437729272543357, "grad_norm": 2.5587580107040417, "learning_rate": 4.661257216164209e-06, "loss": 0.838, "step": 9385 }, { "epoch": 0.11443822894958137, "grad_norm": 3.010437140579431, "learning_rate": 4.660936497754972e-06, "loss": 0.8265, "step": 9390 }, { "epoch": 0.11449916517372917, "grad_norm": 3.2951320864538527, "learning_rate": 4.660615779345735e-06, "loss": 0.8081, "step": 9395 }, { "epoch": 0.11456010139787698, "grad_norm": 2.630440677201849, "learning_rate": 4.660295060936499e-06, "loss": 0.9109, "step": 9400 }, { "epoch": 0.11462103762202479, "grad_norm": 2.551367451723947, "learning_rate": 4.659974342527262e-06, "loss": 0.8609, "step": 9405 }, { "epoch": 0.1146819738461726, "grad_norm": 4.516192262289279, "learning_rate": 4.659653624118025e-06, "loss": 0.8894, "step": 9410 }, { "epoch": 0.1147429100703204, "grad_norm": 2.052431395135729, "learning_rate": 4.6593329057087885e-06, "loss": 0.7922, "step": 9415 }, { "epoch": 0.11480384629446821, "grad_norm": 2.5737367111583507, "learning_rate": 4.6590121872995516e-06, "loss": 0.8124, "step": 9420 }, { "epoch": 0.11486478251861601, "grad_norm": 2.388542045001864, "learning_rate": 4.6586914688903146e-06, "loss": 0.8639, "step": 9425 }, { "epoch": 0.11492571874276382, "grad_norm": 3.1344007993026275, "learning_rate": 4.6583707504810784e-06, "loss": 0.881, "step": 9430 }, { "epoch": 0.11498665496691163, "grad_norm": 2.208596818567149, "learning_rate": 4.6580500320718415e-06, "loss": 0.8228, "step": 9435 }, { "epoch": 0.11504759119105944, "grad_norm": 3.96920563237054, "learning_rate": 4.6577293136626045e-06, "loss": 0.7592, "step": 9440 }, { "epoch": 0.11510852741520725, "grad_norm": 2.8085334035109977, "learning_rate": 4.657408595253368e-06, "loss": 0.8163, "step": 9445 }, { "epoch": 0.11516946363935505, "grad_norm": 2.3856024334198334, "learning_rate": 4.657087876844131e-06, "loss": 0.8735, "step": 9450 }, { "epoch": 0.11523039986350286, "grad_norm": 2.3082199141312674, "learning_rate": 4.656767158434894e-06, "loss": 0.8518, "step": 9455 }, { "epoch": 0.11529133608765066, "grad_norm": 2.168021387904763, "learning_rate": 4.656446440025657e-06, "loss": 0.8761, "step": 9460 }, { "epoch": 0.11535227231179847, "grad_norm": 2.7590092060151523, "learning_rate": 4.656125721616421e-06, "loss": 0.7296, "step": 9465 }, { "epoch": 0.11541320853594628, "grad_norm": 2.437946356864181, "learning_rate": 4.655805003207184e-06, "loss": 0.8021, "step": 9470 }, { "epoch": 0.11547414476009409, "grad_norm": 2.832899168688555, "learning_rate": 4.655484284797947e-06, "loss": 0.8274, "step": 9475 }, { "epoch": 0.1155350809842419, "grad_norm": 2.7686677071102186, "learning_rate": 4.655163566388711e-06, "loss": 0.8675, "step": 9480 }, { "epoch": 0.1155960172083897, "grad_norm": 2.6405657004713254, "learning_rate": 4.654842847979474e-06, "loss": 0.8772, "step": 9485 }, { "epoch": 0.11565695343253751, "grad_norm": 3.415691657426994, "learning_rate": 4.654522129570237e-06, "loss": 0.8261, "step": 9490 }, { "epoch": 0.11571788965668531, "grad_norm": 2.740615252411195, "learning_rate": 4.654201411161001e-06, "loss": 0.8696, "step": 9495 }, { "epoch": 0.11577882588083312, "grad_norm": 2.3410854863775197, "learning_rate": 4.653880692751764e-06, "loss": 0.837, "step": 9500 }, { "epoch": 0.11583976210498093, "grad_norm": 2.412916049795172, "learning_rate": 4.653559974342527e-06, "loss": 0.8639, "step": 9505 }, { "epoch": 0.11590069832912873, "grad_norm": 2.5261362582438083, "learning_rate": 4.653239255933291e-06, "loss": 0.8954, "step": 9510 }, { "epoch": 0.11596163455327654, "grad_norm": 2.628949566435798, "learning_rate": 4.652918537524054e-06, "loss": 0.8224, "step": 9515 }, { "epoch": 0.11602257077742435, "grad_norm": 2.271144699931716, "learning_rate": 4.652597819114818e-06, "loss": 0.7814, "step": 9520 }, { "epoch": 0.11608350700157216, "grad_norm": 2.7270901176319, "learning_rate": 4.652277100705581e-06, "loss": 0.8571, "step": 9525 }, { "epoch": 0.11614444322571996, "grad_norm": 2.04540042781121, "learning_rate": 4.651956382296344e-06, "loss": 0.7793, "step": 9530 }, { "epoch": 0.11620537944986777, "grad_norm": 2.567411331664875, "learning_rate": 4.651635663887108e-06, "loss": 0.8729, "step": 9535 }, { "epoch": 0.11626631567401557, "grad_norm": 2.5820440133149325, "learning_rate": 4.651314945477871e-06, "loss": 0.8644, "step": 9540 }, { "epoch": 0.11632725189816338, "grad_norm": 2.944563972470646, "learning_rate": 4.650994227068635e-06, "loss": 0.8297, "step": 9545 }, { "epoch": 0.11638818812231119, "grad_norm": 2.623358639816364, "learning_rate": 4.650673508659398e-06, "loss": 0.7944, "step": 9550 }, { "epoch": 0.116449124346459, "grad_norm": 2.690925961908765, "learning_rate": 4.650352790250161e-06, "loss": 0.796, "step": 9555 }, { "epoch": 0.1165100605706068, "grad_norm": 2.9930340824460644, "learning_rate": 4.6500320718409245e-06, "loss": 0.8488, "step": 9560 }, { "epoch": 0.1165709967947546, "grad_norm": 2.6530046502238065, "learning_rate": 4.6497113534316875e-06, "loss": 0.8954, "step": 9565 }, { "epoch": 0.11663193301890241, "grad_norm": 2.6798055056026016, "learning_rate": 4.6493906350224505e-06, "loss": 0.7886, "step": 9570 }, { "epoch": 0.11669286924305022, "grad_norm": 2.2546084182001223, "learning_rate": 4.649069916613214e-06, "loss": 0.8714, "step": 9575 }, { "epoch": 0.11675380546719803, "grad_norm": 2.3048579939476785, "learning_rate": 4.648749198203977e-06, "loss": 0.7775, "step": 9580 }, { "epoch": 0.11681474169134584, "grad_norm": 2.8773240214621136, "learning_rate": 4.6484284797947404e-06, "loss": 0.9223, "step": 9585 }, { "epoch": 0.11687567791549365, "grad_norm": 3.8366300720181075, "learning_rate": 4.648107761385504e-06, "loss": 0.7993, "step": 9590 }, { "epoch": 0.11693661413964145, "grad_norm": 2.5603606144969344, "learning_rate": 4.647787042976267e-06, "loss": 0.8855, "step": 9595 }, { "epoch": 0.11699755036378925, "grad_norm": 2.561834745157596, "learning_rate": 4.64746632456703e-06, "loss": 0.8156, "step": 9600 }, { "epoch": 0.11705848658793706, "grad_norm": 2.88621184457524, "learning_rate": 4.647145606157794e-06, "loss": 0.8546, "step": 9605 }, { "epoch": 0.11711942281208487, "grad_norm": 2.5908451416015197, "learning_rate": 4.646824887748557e-06, "loss": 0.8019, "step": 9610 }, { "epoch": 0.11718035903623268, "grad_norm": 2.1399598008121514, "learning_rate": 4.64650416933932e-06, "loss": 0.8859, "step": 9615 }, { "epoch": 0.11724129526038049, "grad_norm": 2.751243249011589, "learning_rate": 4.646183450930083e-06, "loss": 0.8797, "step": 9620 }, { "epoch": 0.1173022314845283, "grad_norm": 2.9198811412430583, "learning_rate": 4.645862732520847e-06, "loss": 0.9042, "step": 9625 }, { "epoch": 0.1173631677086761, "grad_norm": 2.579315075194812, "learning_rate": 4.64554201411161e-06, "loss": 0.9007, "step": 9630 }, { "epoch": 0.1174241039328239, "grad_norm": 3.3038700924126556, "learning_rate": 4.645221295702373e-06, "loss": 0.7832, "step": 9635 }, { "epoch": 0.11748504015697171, "grad_norm": 3.8431664613266685, "learning_rate": 4.644900577293137e-06, "loss": 0.8331, "step": 9640 }, { "epoch": 0.11754597638111952, "grad_norm": 2.3988207806549786, "learning_rate": 4.6445798588839e-06, "loss": 0.8434, "step": 9645 }, { "epoch": 0.11760691260526733, "grad_norm": 2.974021767556471, "learning_rate": 4.644259140474663e-06, "loss": 0.8036, "step": 9650 }, { "epoch": 0.11766784882941514, "grad_norm": 2.6601932587239614, "learning_rate": 4.643938422065427e-06, "loss": 0.8145, "step": 9655 }, { "epoch": 0.11772878505356293, "grad_norm": 2.583526820734343, "learning_rate": 4.64361770365619e-06, "loss": 0.8732, "step": 9660 }, { "epoch": 0.11778972127771074, "grad_norm": 3.109056799283622, "learning_rate": 4.643296985246954e-06, "loss": 0.8238, "step": 9665 }, { "epoch": 0.11785065750185855, "grad_norm": 2.5528947241690236, "learning_rate": 4.642976266837717e-06, "loss": 0.8808, "step": 9670 }, { "epoch": 0.11791159372600636, "grad_norm": 2.6809209546047628, "learning_rate": 4.64265554842848e-06, "loss": 0.8393, "step": 9675 }, { "epoch": 0.11797252995015417, "grad_norm": 2.359041187751163, "learning_rate": 4.642334830019244e-06, "loss": 0.8091, "step": 9680 }, { "epoch": 0.11803346617430198, "grad_norm": 2.4417535315530183, "learning_rate": 4.642014111610007e-06, "loss": 0.8444, "step": 9685 }, { "epoch": 0.11809440239844979, "grad_norm": 3.6901102226173768, "learning_rate": 4.6416933932007706e-06, "loss": 0.8704, "step": 9690 }, { "epoch": 0.11815533862259758, "grad_norm": 2.3003427047825014, "learning_rate": 4.6413726747915336e-06, "loss": 0.8172, "step": 9695 }, { "epoch": 0.11821627484674539, "grad_norm": 2.7997423726588497, "learning_rate": 4.641051956382297e-06, "loss": 0.8785, "step": 9700 }, { "epoch": 0.1182772110708932, "grad_norm": 2.2373475126257363, "learning_rate": 4.6407312379730605e-06, "loss": 0.7194, "step": 9705 }, { "epoch": 0.11833814729504101, "grad_norm": 3.0167097065241255, "learning_rate": 4.6404105195638235e-06, "loss": 0.8283, "step": 9710 }, { "epoch": 0.11839908351918882, "grad_norm": 2.1546649625845795, "learning_rate": 4.6400898011545865e-06, "loss": 0.818, "step": 9715 }, { "epoch": 0.11846001974333663, "grad_norm": 2.5139989848350535, "learning_rate": 4.63976908274535e-06, "loss": 0.7869, "step": 9720 }, { "epoch": 0.11852095596748444, "grad_norm": 3.05567409006331, "learning_rate": 4.639448364336113e-06, "loss": 0.8181, "step": 9725 }, { "epoch": 0.11858189219163223, "grad_norm": 4.463306631442775, "learning_rate": 4.639127645926876e-06, "loss": 0.7686, "step": 9730 }, { "epoch": 0.11864282841578004, "grad_norm": 2.595632142600226, "learning_rate": 4.63880692751764e-06, "loss": 0.8371, "step": 9735 }, { "epoch": 0.11870376463992785, "grad_norm": 2.4337500107570045, "learning_rate": 4.638486209108403e-06, "loss": 0.8816, "step": 9740 }, { "epoch": 0.11876470086407566, "grad_norm": 3.144135976739132, "learning_rate": 4.638165490699166e-06, "loss": 0.8859, "step": 9745 }, { "epoch": 0.11882563708822347, "grad_norm": 2.9964729423768226, "learning_rate": 4.63784477228993e-06, "loss": 0.8344, "step": 9750 }, { "epoch": 0.11888657331237128, "grad_norm": 2.945371068078929, "learning_rate": 4.637524053880693e-06, "loss": 0.8671, "step": 9755 }, { "epoch": 0.11894750953651909, "grad_norm": 2.393142898619731, "learning_rate": 4.637203335471456e-06, "loss": 0.829, "step": 9760 }, { "epoch": 0.11900844576066688, "grad_norm": 2.599191568573328, "learning_rate": 4.63688261706222e-06, "loss": 0.8442, "step": 9765 }, { "epoch": 0.11906938198481469, "grad_norm": 2.977268747862327, "learning_rate": 4.636561898652983e-06, "loss": 0.8871, "step": 9770 }, { "epoch": 0.1191303182089625, "grad_norm": 2.680343910015979, "learning_rate": 4.636241180243746e-06, "loss": 0.9239, "step": 9775 }, { "epoch": 0.11919125443311031, "grad_norm": 3.147472276022003, "learning_rate": 4.63592046183451e-06, "loss": 0.7969, "step": 9780 }, { "epoch": 0.11925219065725812, "grad_norm": 2.1938008303392986, "learning_rate": 4.635599743425273e-06, "loss": 0.8334, "step": 9785 }, { "epoch": 0.11931312688140593, "grad_norm": 2.925998368301547, "learning_rate": 4.635279025016036e-06, "loss": 0.831, "step": 9790 }, { "epoch": 0.11937406310555372, "grad_norm": 2.654619689370484, "learning_rate": 4.634958306606799e-06, "loss": 0.8819, "step": 9795 }, { "epoch": 0.11943499932970153, "grad_norm": 2.9976447052794373, "learning_rate": 4.634637588197563e-06, "loss": 0.8081, "step": 9800 }, { "epoch": 0.11949593555384934, "grad_norm": 2.712278801532607, "learning_rate": 4.634316869788326e-06, "loss": 0.9045, "step": 9805 }, { "epoch": 0.11955687177799715, "grad_norm": 2.4348491101715513, "learning_rate": 4.63399615137909e-06, "loss": 0.8342, "step": 9810 }, { "epoch": 0.11961780800214496, "grad_norm": 2.1846576284300503, "learning_rate": 4.633675432969853e-06, "loss": 0.8067, "step": 9815 }, { "epoch": 0.11967874422629277, "grad_norm": 2.737686009410329, "learning_rate": 4.633354714560616e-06, "loss": 0.7786, "step": 9820 }, { "epoch": 0.11973968045044057, "grad_norm": 3.1756666907641096, "learning_rate": 4.63303399615138e-06, "loss": 0.9018, "step": 9825 }, { "epoch": 0.11980061667458837, "grad_norm": 2.6508554812547267, "learning_rate": 4.632713277742143e-06, "loss": 0.8435, "step": 9830 }, { "epoch": 0.11986155289873618, "grad_norm": 2.362056358896513, "learning_rate": 4.6323925593329065e-06, "loss": 0.8403, "step": 9835 }, { "epoch": 0.11992248912288399, "grad_norm": 3.298676088959585, "learning_rate": 4.6320718409236695e-06, "loss": 0.8546, "step": 9840 }, { "epoch": 0.1199834253470318, "grad_norm": 1.9549724786894345, "learning_rate": 4.6317511225144325e-06, "loss": 0.8645, "step": 9845 }, { "epoch": 0.1200443615711796, "grad_norm": 2.777234359990965, "learning_rate": 4.631430404105196e-06, "loss": 0.7788, "step": 9850 }, { "epoch": 0.12010529779532741, "grad_norm": 2.180586962164478, "learning_rate": 4.6311096856959594e-06, "loss": 0.833, "step": 9855 }, { "epoch": 0.12016623401947522, "grad_norm": 2.327518321032544, "learning_rate": 4.630788967286723e-06, "loss": 0.8047, "step": 9860 }, { "epoch": 0.12022717024362302, "grad_norm": 2.956102171288704, "learning_rate": 4.630468248877486e-06, "loss": 0.7931, "step": 9865 }, { "epoch": 0.12028810646777083, "grad_norm": 2.70013613278913, "learning_rate": 4.630147530468249e-06, "loss": 0.8135, "step": 9870 }, { "epoch": 0.12034904269191864, "grad_norm": 2.8015874707911483, "learning_rate": 4.629826812059012e-06, "loss": 0.8799, "step": 9875 }, { "epoch": 0.12040997891606645, "grad_norm": 2.561501427811778, "learning_rate": 4.629506093649776e-06, "loss": 0.7917, "step": 9880 }, { "epoch": 0.12047091514021425, "grad_norm": 2.671270573581388, "learning_rate": 4.629185375240539e-06, "loss": 0.8487, "step": 9885 }, { "epoch": 0.12053185136436206, "grad_norm": 3.293800146844999, "learning_rate": 4.628864656831302e-06, "loss": 0.8663, "step": 9890 }, { "epoch": 0.12059278758850986, "grad_norm": 2.513895591620523, "learning_rate": 4.628543938422066e-06, "loss": 0.8886, "step": 9895 }, { "epoch": 0.12065372381265767, "grad_norm": 2.70363605770466, "learning_rate": 4.628223220012829e-06, "loss": 0.8433, "step": 9900 }, { "epoch": 0.12071466003680548, "grad_norm": 4.2291532898487585, "learning_rate": 4.627902501603592e-06, "loss": 0.853, "step": 9905 }, { "epoch": 0.12077559626095329, "grad_norm": 2.897960747723877, "learning_rate": 4.627581783194356e-06, "loss": 0.8267, "step": 9910 }, { "epoch": 0.1208365324851011, "grad_norm": 3.054378406738382, "learning_rate": 4.627261064785119e-06, "loss": 0.8634, "step": 9915 }, { "epoch": 0.1208974687092489, "grad_norm": 3.1554713597560937, "learning_rate": 4.626940346375882e-06, "loss": 0.7748, "step": 9920 }, { "epoch": 0.12095840493339671, "grad_norm": 2.3829551965809572, "learning_rate": 4.626619627966646e-06, "loss": 0.7923, "step": 9925 }, { "epoch": 0.12101934115754451, "grad_norm": 2.83778172183312, "learning_rate": 4.626298909557409e-06, "loss": 0.8973, "step": 9930 }, { "epoch": 0.12108027738169232, "grad_norm": 2.707105198765455, "learning_rate": 4.625978191148172e-06, "loss": 0.8702, "step": 9935 }, { "epoch": 0.12114121360584013, "grad_norm": 2.5679584334341015, "learning_rate": 4.625657472738936e-06, "loss": 0.8213, "step": 9940 }, { "epoch": 0.12120214982998793, "grad_norm": 3.0032963067666683, "learning_rate": 4.625336754329699e-06, "loss": 0.881, "step": 9945 }, { "epoch": 0.12126308605413574, "grad_norm": 2.3209852460431195, "learning_rate": 4.625016035920462e-06, "loss": 0.833, "step": 9950 }, { "epoch": 0.12132402227828355, "grad_norm": 2.1852746274214816, "learning_rate": 4.624695317511225e-06, "loss": 0.7375, "step": 9955 }, { "epoch": 0.12138495850243136, "grad_norm": 2.345532396382708, "learning_rate": 4.624374599101989e-06, "loss": 0.8637, "step": 9960 }, { "epoch": 0.12144589472657916, "grad_norm": 4.619293989019405, "learning_rate": 4.624053880692752e-06, "loss": 0.8032, "step": 9965 }, { "epoch": 0.12150683095072697, "grad_norm": 2.2163003107665737, "learning_rate": 4.623733162283516e-06, "loss": 0.7376, "step": 9970 }, { "epoch": 0.12156776717487477, "grad_norm": 2.6081051748458206, "learning_rate": 4.623412443874279e-06, "loss": 0.8479, "step": 9975 }, { "epoch": 0.12162870339902258, "grad_norm": 2.456896376273514, "learning_rate": 4.623091725465042e-06, "loss": 0.8857, "step": 9980 }, { "epoch": 0.12168963962317039, "grad_norm": 2.7054208028824815, "learning_rate": 4.6227710070558055e-06, "loss": 0.9206, "step": 9985 }, { "epoch": 0.1217505758473182, "grad_norm": 3.039591362245489, "learning_rate": 4.6224502886465685e-06, "loss": 0.7926, "step": 9990 }, { "epoch": 0.12181151207146601, "grad_norm": 2.8019524618828004, "learning_rate": 4.622129570237332e-06, "loss": 0.8317, "step": 9995 }, { "epoch": 0.1218724482956138, "grad_norm": 3.0724476782174897, "learning_rate": 4.621808851828095e-06, "loss": 0.846, "step": 10000 }, { "epoch": 0.12193338451976161, "grad_norm": 2.9099708262200616, "learning_rate": 4.621488133418859e-06, "loss": 0.9236, "step": 10005 }, { "epoch": 0.12199432074390942, "grad_norm": 2.8354818397187467, "learning_rate": 4.621167415009622e-06, "loss": 0.888, "step": 10010 }, { "epoch": 0.12205525696805723, "grad_norm": 2.964738009555753, "learning_rate": 4.620846696600385e-06, "loss": 0.8105, "step": 10015 }, { "epoch": 0.12211619319220504, "grad_norm": 2.3523435342872245, "learning_rate": 4.620525978191149e-06, "loss": 0.8096, "step": 10020 }, { "epoch": 0.12217712941635285, "grad_norm": 2.444637164142619, "learning_rate": 4.620205259781912e-06, "loss": 0.8114, "step": 10025 }, { "epoch": 0.12223806564050065, "grad_norm": 4.193951421165095, "learning_rate": 4.619884541372675e-06, "loss": 0.8737, "step": 10030 }, { "epoch": 0.12229900186464845, "grad_norm": 3.4315793975661246, "learning_rate": 4.619563822963439e-06, "loss": 0.893, "step": 10035 }, { "epoch": 0.12235993808879626, "grad_norm": 2.7845796957230164, "learning_rate": 4.619243104554202e-06, "loss": 0.8516, "step": 10040 }, { "epoch": 0.12242087431294407, "grad_norm": 3.2591219583840756, "learning_rate": 4.618922386144965e-06, "loss": 0.8849, "step": 10045 }, { "epoch": 0.12248181053709188, "grad_norm": 3.3615164280373717, "learning_rate": 4.618601667735728e-06, "loss": 0.8412, "step": 10050 }, { "epoch": 0.12254274676123969, "grad_norm": 2.607264831713631, "learning_rate": 4.618280949326492e-06, "loss": 0.9021, "step": 10055 }, { "epoch": 0.1226036829853875, "grad_norm": 2.3293492792889343, "learning_rate": 4.617960230917255e-06, "loss": 0.7659, "step": 10060 }, { "epoch": 0.1226646192095353, "grad_norm": 2.821970132881241, "learning_rate": 4.617639512508018e-06, "loss": 0.8564, "step": 10065 }, { "epoch": 0.1227255554336831, "grad_norm": 2.7722325340410214, "learning_rate": 4.617318794098782e-06, "loss": 0.9072, "step": 10070 }, { "epoch": 0.12278649165783091, "grad_norm": 2.311062474475224, "learning_rate": 4.616998075689545e-06, "loss": 0.8172, "step": 10075 }, { "epoch": 0.12284742788197872, "grad_norm": 2.905617287658309, "learning_rate": 4.616677357280308e-06, "loss": 0.8066, "step": 10080 }, { "epoch": 0.12290836410612653, "grad_norm": 2.4259852171455214, "learning_rate": 4.616356638871072e-06, "loss": 0.7585, "step": 10085 }, { "epoch": 0.12296930033027434, "grad_norm": 2.7422181287510643, "learning_rate": 4.616035920461835e-06, "loss": 0.7983, "step": 10090 }, { "epoch": 0.12303023655442215, "grad_norm": 2.554264783603265, "learning_rate": 4.615715202052598e-06, "loss": 0.8321, "step": 10095 }, { "epoch": 0.12309117277856994, "grad_norm": 3.2316379256130605, "learning_rate": 4.615394483643362e-06, "loss": 0.8225, "step": 10100 }, { "epoch": 0.12315210900271775, "grad_norm": 3.2973923154709674, "learning_rate": 4.615073765234125e-06, "loss": 0.864, "step": 10105 }, { "epoch": 0.12321304522686556, "grad_norm": 2.263971557266218, "learning_rate": 4.614753046824888e-06, "loss": 0.814, "step": 10110 }, { "epoch": 0.12327398145101337, "grad_norm": 2.4196898788469112, "learning_rate": 4.6144323284156515e-06, "loss": 0.8297, "step": 10115 }, { "epoch": 0.12333491767516118, "grad_norm": 2.6531658597333383, "learning_rate": 4.6141116100064146e-06, "loss": 0.7423, "step": 10120 }, { "epoch": 0.12339585389930899, "grad_norm": 2.5034359978106875, "learning_rate": 4.613790891597178e-06, "loss": 0.8581, "step": 10125 }, { "epoch": 0.12345679012345678, "grad_norm": 2.7636382575003493, "learning_rate": 4.6134701731879414e-06, "loss": 0.7699, "step": 10130 }, { "epoch": 0.12351772634760459, "grad_norm": 2.0561246789865533, "learning_rate": 4.6131494547787045e-06, "loss": 0.8235, "step": 10135 }, { "epoch": 0.1235786625717524, "grad_norm": 2.4031649825624126, "learning_rate": 4.612828736369468e-06, "loss": 0.8337, "step": 10140 }, { "epoch": 0.12363959879590021, "grad_norm": 2.0740082287376564, "learning_rate": 4.612508017960231e-06, "loss": 0.7967, "step": 10145 }, { "epoch": 0.12370053502004802, "grad_norm": 2.9833707955609015, "learning_rate": 4.612187299550994e-06, "loss": 0.799, "step": 10150 }, { "epoch": 0.12376147124419583, "grad_norm": 2.169301069046072, "learning_rate": 4.611866581141758e-06, "loss": 0.8663, "step": 10155 }, { "epoch": 0.12382240746834364, "grad_norm": 2.6085710503425283, "learning_rate": 4.611545862732521e-06, "loss": 0.8848, "step": 10160 }, { "epoch": 0.12388334369249143, "grad_norm": 2.55620078573397, "learning_rate": 4.611225144323285e-06, "loss": 0.8185, "step": 10165 }, { "epoch": 0.12394427991663924, "grad_norm": 3.4328777481126647, "learning_rate": 4.610904425914048e-06, "loss": 0.8589, "step": 10170 }, { "epoch": 0.12400521614078705, "grad_norm": 2.5962956444906977, "learning_rate": 4.610583707504811e-06, "loss": 0.8635, "step": 10175 }, { "epoch": 0.12406615236493486, "grad_norm": 2.0008748411840633, "learning_rate": 4.610262989095575e-06, "loss": 0.7944, "step": 10180 }, { "epoch": 0.12412708858908267, "grad_norm": 2.5099217857796554, "learning_rate": 4.609942270686338e-06, "loss": 0.8012, "step": 10185 }, { "epoch": 0.12418802481323048, "grad_norm": 4.5664882735356445, "learning_rate": 4.609621552277101e-06, "loss": 0.7486, "step": 10190 }, { "epoch": 0.12424896103737829, "grad_norm": 4.83736073851455, "learning_rate": 4.609300833867865e-06, "loss": 0.8461, "step": 10195 }, { "epoch": 0.12430989726152608, "grad_norm": 2.500232412106216, "learning_rate": 4.608980115458628e-06, "loss": 0.8472, "step": 10200 }, { "epoch": 0.12437083348567389, "grad_norm": 2.7645800491829666, "learning_rate": 4.608659397049391e-06, "loss": 0.8286, "step": 10205 }, { "epoch": 0.1244317697098217, "grad_norm": 2.60542841016891, "learning_rate": 4.608338678640154e-06, "loss": 0.7477, "step": 10210 }, { "epoch": 0.12449270593396951, "grad_norm": 2.5953578939509176, "learning_rate": 4.608017960230918e-06, "loss": 0.8127, "step": 10215 }, { "epoch": 0.12455364215811732, "grad_norm": 2.7130930644098137, "learning_rate": 4.607697241821681e-06, "loss": 0.9014, "step": 10220 }, { "epoch": 0.12461457838226513, "grad_norm": 3.2535183192091353, "learning_rate": 4.607376523412444e-06, "loss": 0.8789, "step": 10225 }, { "epoch": 0.12467551460641293, "grad_norm": 3.2907211412607715, "learning_rate": 4.607055805003208e-06, "loss": 0.8636, "step": 10230 }, { "epoch": 0.12473645083056073, "grad_norm": 2.424548866453788, "learning_rate": 4.606735086593971e-06, "loss": 0.8129, "step": 10235 }, { "epoch": 0.12479738705470854, "grad_norm": 2.9552758565826562, "learning_rate": 4.606414368184734e-06, "loss": 0.8801, "step": 10240 }, { "epoch": 0.12485832327885635, "grad_norm": 2.2977979213295128, "learning_rate": 4.606093649775498e-06, "loss": 0.8453, "step": 10245 }, { "epoch": 0.12491925950300416, "grad_norm": 3.0144056597926636, "learning_rate": 4.605772931366261e-06, "loss": 0.8475, "step": 10250 }, { "epoch": 0.12498019572715197, "grad_norm": 2.542438286544562, "learning_rate": 4.605452212957024e-06, "loss": 0.8447, "step": 10255 }, { "epoch": 0.12504113195129976, "grad_norm": 2.3628650771961888, "learning_rate": 4.6051314945477875e-06, "loss": 0.7998, "step": 10260 }, { "epoch": 0.12510206817544758, "grad_norm": 2.742989387837836, "learning_rate": 4.6048107761385505e-06, "loss": 0.8416, "step": 10265 }, { "epoch": 0.12516300439959538, "grad_norm": 2.144613786599207, "learning_rate": 4.6044900577293135e-06, "loss": 0.7969, "step": 10270 }, { "epoch": 0.1252239406237432, "grad_norm": 2.947238099552633, "learning_rate": 4.604169339320077e-06, "loss": 0.8541, "step": 10275 }, { "epoch": 0.125284876847891, "grad_norm": 2.1005407809752854, "learning_rate": 4.60384862091084e-06, "loss": 0.8002, "step": 10280 }, { "epoch": 0.1253458130720388, "grad_norm": 2.664597251863618, "learning_rate": 4.603527902501604e-06, "loss": 0.8105, "step": 10285 }, { "epoch": 0.12540674929618661, "grad_norm": 3.1988935749165965, "learning_rate": 4.603207184092367e-06, "loss": 0.9181, "step": 10290 }, { "epoch": 0.1254676855203344, "grad_norm": 2.3822654662870755, "learning_rate": 4.60288646568313e-06, "loss": 0.8473, "step": 10295 }, { "epoch": 0.12552862174448223, "grad_norm": 2.399432120266119, "learning_rate": 4.602565747273894e-06, "loss": 0.8194, "step": 10300 }, { "epoch": 0.12558955796863003, "grad_norm": 3.071213419489679, "learning_rate": 4.602245028864657e-06, "loss": 0.8518, "step": 10305 }, { "epoch": 0.12565049419277785, "grad_norm": 3.2665754108594744, "learning_rate": 4.601924310455421e-06, "loss": 0.7888, "step": 10310 }, { "epoch": 0.12571143041692565, "grad_norm": 2.664637310996576, "learning_rate": 4.601603592046184e-06, "loss": 0.8692, "step": 10315 }, { "epoch": 0.12577236664107344, "grad_norm": 2.3001065299697334, "learning_rate": 4.601282873636947e-06, "loss": 0.8562, "step": 10320 }, { "epoch": 0.12583330286522126, "grad_norm": 2.687346818243259, "learning_rate": 4.600962155227711e-06, "loss": 0.7808, "step": 10325 }, { "epoch": 0.12589423908936906, "grad_norm": 2.8749673595553396, "learning_rate": 4.600641436818474e-06, "loss": 0.885, "step": 10330 }, { "epoch": 0.12595517531351688, "grad_norm": 2.697764434542629, "learning_rate": 4.600320718409237e-06, "loss": 0.8633, "step": 10335 }, { "epoch": 0.12601611153766468, "grad_norm": 2.53648356257053, "learning_rate": 4.600000000000001e-06, "loss": 0.8843, "step": 10340 }, { "epoch": 0.1260770477618125, "grad_norm": 2.934633297856724, "learning_rate": 4.599679281590764e-06, "loss": 0.8714, "step": 10345 }, { "epoch": 0.1261379839859603, "grad_norm": 3.0826890453876374, "learning_rate": 4.599358563181527e-06, "loss": 0.8418, "step": 10350 }, { "epoch": 0.1261989202101081, "grad_norm": 3.150631264410657, "learning_rate": 4.599037844772291e-06, "loss": 0.8409, "step": 10355 }, { "epoch": 0.1262598564342559, "grad_norm": 2.7757313069142415, "learning_rate": 4.598717126363054e-06, "loss": 0.7676, "step": 10360 }, { "epoch": 0.1263207926584037, "grad_norm": 2.552278728865554, "learning_rate": 4.598396407953817e-06, "loss": 0.8362, "step": 10365 }, { "epoch": 0.12638172888255153, "grad_norm": 2.4770024315840717, "learning_rate": 4.598075689544581e-06, "loss": 0.8751, "step": 10370 }, { "epoch": 0.12644266510669933, "grad_norm": 2.6314665909001884, "learning_rate": 4.597754971135344e-06, "loss": 0.8096, "step": 10375 }, { "epoch": 0.12650360133084715, "grad_norm": 2.7075574201024857, "learning_rate": 4.597434252726107e-06, "loss": 0.8683, "step": 10380 }, { "epoch": 0.12656453755499494, "grad_norm": 3.8278162017185426, "learning_rate": 4.59711353431687e-06, "loss": 0.8151, "step": 10385 }, { "epoch": 0.12662547377914274, "grad_norm": 2.311352976784156, "learning_rate": 4.5967928159076336e-06, "loss": 0.865, "step": 10390 }, { "epoch": 0.12668641000329056, "grad_norm": 2.355921279533189, "learning_rate": 4.5964720974983966e-06, "loss": 0.9041, "step": 10395 }, { "epoch": 0.12674734622743836, "grad_norm": 2.430321421621109, "learning_rate": 4.59615137908916e-06, "loss": 0.7977, "step": 10400 }, { "epoch": 0.12680828245158618, "grad_norm": 2.243345686811902, "learning_rate": 4.5958306606799235e-06, "loss": 0.8089, "step": 10405 }, { "epoch": 0.12686921867573397, "grad_norm": 2.690297831756223, "learning_rate": 4.5955099422706865e-06, "loss": 0.7668, "step": 10410 }, { "epoch": 0.1269301548998818, "grad_norm": 2.799246367728642, "learning_rate": 4.5951892238614495e-06, "loss": 0.8856, "step": 10415 }, { "epoch": 0.1269910911240296, "grad_norm": 3.257195929585442, "learning_rate": 4.594868505452213e-06, "loss": 0.8454, "step": 10420 }, { "epoch": 0.1270520273481774, "grad_norm": 3.9704471186320625, "learning_rate": 4.594547787042976e-06, "loss": 0.8236, "step": 10425 }, { "epoch": 0.1271129635723252, "grad_norm": 2.3688833936034133, "learning_rate": 4.594227068633739e-06, "loss": 0.853, "step": 10430 }, { "epoch": 0.127173899796473, "grad_norm": 2.646512905135452, "learning_rate": 4.593906350224503e-06, "loss": 0.8563, "step": 10435 }, { "epoch": 0.12723483602062083, "grad_norm": 2.3481560375251833, "learning_rate": 4.593585631815266e-06, "loss": 0.825, "step": 10440 }, { "epoch": 0.12729577224476862, "grad_norm": 2.077407767557016, "learning_rate": 4.59326491340603e-06, "loss": 0.8298, "step": 10445 }, { "epoch": 0.12735670846891642, "grad_norm": 2.350628201384193, "learning_rate": 4.592944194996793e-06, "loss": 0.8587, "step": 10450 }, { "epoch": 0.12741764469306424, "grad_norm": 2.294113588618649, "learning_rate": 4.592623476587556e-06, "loss": 0.813, "step": 10455 }, { "epoch": 0.12747858091721204, "grad_norm": 4.363934754122648, "learning_rate": 4.59230275817832e-06, "loss": 0.8991, "step": 10460 }, { "epoch": 0.12753951714135986, "grad_norm": 3.163788078470705, "learning_rate": 4.591982039769083e-06, "loss": 0.8485, "step": 10465 }, { "epoch": 0.12760045336550765, "grad_norm": 3.3010328132885474, "learning_rate": 4.591661321359847e-06, "loss": 0.835, "step": 10470 }, { "epoch": 0.12766138958965548, "grad_norm": 2.926161209291865, "learning_rate": 4.59134060295061e-06, "loss": 0.8374, "step": 10475 }, { "epoch": 0.12772232581380327, "grad_norm": 2.8015255324077404, "learning_rate": 4.591019884541373e-06, "loss": 0.788, "step": 10480 }, { "epoch": 0.12778326203795107, "grad_norm": 2.3340413062432996, "learning_rate": 4.590699166132137e-06, "loss": 0.8319, "step": 10485 }, { "epoch": 0.1278441982620989, "grad_norm": 3.1008410303664915, "learning_rate": 4.5903784477229e-06, "loss": 0.7987, "step": 10490 }, { "epoch": 0.12790513448624669, "grad_norm": 2.2722451337471328, "learning_rate": 4.590057729313663e-06, "loss": 0.8549, "step": 10495 }, { "epoch": 0.1279660707103945, "grad_norm": 2.4928194849635186, "learning_rate": 4.589737010904427e-06, "loss": 0.8017, "step": 10500 }, { "epoch": 0.1280270069345423, "grad_norm": 2.2719449278272386, "learning_rate": 4.58941629249519e-06, "loss": 0.8173, "step": 10505 }, { "epoch": 0.12808794315869013, "grad_norm": 2.3690277266052275, "learning_rate": 4.589095574085953e-06, "loss": 0.7761, "step": 10510 }, { "epoch": 0.12814887938283792, "grad_norm": 2.310004406499513, "learning_rate": 4.588774855676717e-06, "loss": 0.8136, "step": 10515 }, { "epoch": 0.12820981560698572, "grad_norm": 2.2735725868262397, "learning_rate": 4.58845413726748e-06, "loss": 0.8738, "step": 10520 }, { "epoch": 0.12827075183113354, "grad_norm": 2.524427911456163, "learning_rate": 4.588133418858243e-06, "loss": 0.7605, "step": 10525 }, { "epoch": 0.12833168805528133, "grad_norm": 2.5228079003836794, "learning_rate": 4.5878127004490065e-06, "loss": 0.8434, "step": 10530 }, { "epoch": 0.12839262427942916, "grad_norm": 2.3708765817233624, "learning_rate": 4.5874919820397695e-06, "loss": 0.8571, "step": 10535 }, { "epoch": 0.12845356050357695, "grad_norm": 2.588347868864194, "learning_rate": 4.5871712636305325e-06, "loss": 0.8175, "step": 10540 }, { "epoch": 0.12851449672772478, "grad_norm": 2.4557701933168823, "learning_rate": 4.5868505452212955e-06, "loss": 0.8346, "step": 10545 }, { "epoch": 0.12857543295187257, "grad_norm": 2.6879289175633336, "learning_rate": 4.586529826812059e-06, "loss": 0.8195, "step": 10550 }, { "epoch": 0.12863636917602037, "grad_norm": 2.2017193007848466, "learning_rate": 4.5862091084028224e-06, "loss": 0.8339, "step": 10555 }, { "epoch": 0.1286973054001682, "grad_norm": 2.714046395079642, "learning_rate": 4.5858883899935854e-06, "loss": 0.7904, "step": 10560 }, { "epoch": 0.12875824162431598, "grad_norm": 3.3334551936420116, "learning_rate": 4.585567671584349e-06, "loss": 0.762, "step": 10565 }, { "epoch": 0.1288191778484638, "grad_norm": 2.376298488101415, "learning_rate": 4.585246953175112e-06, "loss": 0.8522, "step": 10570 }, { "epoch": 0.1288801140726116, "grad_norm": 2.643805340450024, "learning_rate": 4.584926234765875e-06, "loss": 0.8683, "step": 10575 }, { "epoch": 0.12894105029675942, "grad_norm": 2.264201819194009, "learning_rate": 4.584605516356639e-06, "loss": 0.7898, "step": 10580 }, { "epoch": 0.12900198652090722, "grad_norm": 2.505432505252659, "learning_rate": 4.584284797947402e-06, "loss": 0.8233, "step": 10585 }, { "epoch": 0.12906292274505501, "grad_norm": 2.8002362600445148, "learning_rate": 4.583964079538166e-06, "loss": 0.9563, "step": 10590 }, { "epoch": 0.12912385896920284, "grad_norm": 3.0563986762867024, "learning_rate": 4.583643361128929e-06, "loss": 0.8839, "step": 10595 }, { "epoch": 0.12918479519335063, "grad_norm": 2.6132499105535634, "learning_rate": 4.583322642719692e-06, "loss": 0.819, "step": 10600 }, { "epoch": 0.12924573141749846, "grad_norm": 2.379501773530409, "learning_rate": 4.583001924310456e-06, "loss": 0.8412, "step": 10605 }, { "epoch": 0.12930666764164625, "grad_norm": 2.4842422770611434, "learning_rate": 4.582681205901219e-06, "loss": 0.7936, "step": 10610 }, { "epoch": 0.12936760386579407, "grad_norm": 2.3890163733883725, "learning_rate": 4.582360487491983e-06, "loss": 0.8021, "step": 10615 }, { "epoch": 0.12942854008994187, "grad_norm": 2.479990294376371, "learning_rate": 4.582039769082746e-06, "loss": 0.9007, "step": 10620 }, { "epoch": 0.12948947631408966, "grad_norm": 2.4082785993132982, "learning_rate": 4.581719050673509e-06, "loss": 0.8417, "step": 10625 }, { "epoch": 0.12955041253823749, "grad_norm": 2.340384634131006, "learning_rate": 4.581398332264273e-06, "loss": 0.7789, "step": 10630 }, { "epoch": 0.12961134876238528, "grad_norm": 4.07070173283748, "learning_rate": 4.581077613855036e-06, "loss": 0.7877, "step": 10635 }, { "epoch": 0.1296722849865331, "grad_norm": 2.689765378629632, "learning_rate": 4.580756895445799e-06, "loss": 0.814, "step": 10640 }, { "epoch": 0.1297332212106809, "grad_norm": 2.6127639550348003, "learning_rate": 4.580436177036563e-06, "loss": 0.8052, "step": 10645 }, { "epoch": 0.12979415743482872, "grad_norm": 2.725782234509965, "learning_rate": 4.580115458627326e-06, "loss": 0.8191, "step": 10650 }, { "epoch": 0.12985509365897652, "grad_norm": 2.247342701918733, "learning_rate": 4.579794740218089e-06, "loss": 0.7976, "step": 10655 }, { "epoch": 0.1299160298831243, "grad_norm": 2.446702658598765, "learning_rate": 4.5794740218088526e-06, "loss": 0.8186, "step": 10660 }, { "epoch": 0.12997696610727213, "grad_norm": 2.7146099546902827, "learning_rate": 4.5791533033996156e-06, "loss": 0.8484, "step": 10665 }, { "epoch": 0.13003790233141993, "grad_norm": 2.695871009563973, "learning_rate": 4.578832584990379e-06, "loss": 0.7946, "step": 10670 }, { "epoch": 0.13009883855556775, "grad_norm": 3.1996951442318187, "learning_rate": 4.5785118665811425e-06, "loss": 0.8838, "step": 10675 }, { "epoch": 0.13015977477971555, "grad_norm": 2.5936841177929124, "learning_rate": 4.5781911481719055e-06, "loss": 0.854, "step": 10680 }, { "epoch": 0.13022071100386334, "grad_norm": 2.7907843920038298, "learning_rate": 4.5778704297626685e-06, "loss": 0.862, "step": 10685 }, { "epoch": 0.13028164722801117, "grad_norm": 2.4437007737700776, "learning_rate": 4.577549711353432e-06, "loss": 0.8136, "step": 10690 }, { "epoch": 0.13034258345215896, "grad_norm": 2.5029905196356688, "learning_rate": 4.577228992944195e-06, "loss": 0.8529, "step": 10695 }, { "epoch": 0.13040351967630678, "grad_norm": 3.095997172985068, "learning_rate": 4.576908274534958e-06, "loss": 0.8276, "step": 10700 }, { "epoch": 0.13046445590045458, "grad_norm": 2.154978285250455, "learning_rate": 4.576587556125722e-06, "loss": 0.724, "step": 10705 }, { "epoch": 0.1305253921246024, "grad_norm": 1.978250072153543, "learning_rate": 4.576266837716485e-06, "loss": 0.8255, "step": 10710 }, { "epoch": 0.1305863283487502, "grad_norm": 2.359113793272633, "learning_rate": 4.575946119307248e-06, "loss": 0.785, "step": 10715 }, { "epoch": 0.130647264572898, "grad_norm": 3.173246531671163, "learning_rate": 4.575625400898011e-06, "loss": 0.8302, "step": 10720 }, { "epoch": 0.13070820079704581, "grad_norm": 2.6620091325359927, "learning_rate": 4.575304682488775e-06, "loss": 0.778, "step": 10725 }, { "epoch": 0.1307691370211936, "grad_norm": 3.201144839760429, "learning_rate": 4.574983964079538e-06, "loss": 0.8036, "step": 10730 }, { "epoch": 0.13083007324534143, "grad_norm": 4.713269221545807, "learning_rate": 4.574663245670302e-06, "loss": 0.9021, "step": 10735 }, { "epoch": 0.13089100946948923, "grad_norm": 2.4354864703192747, "learning_rate": 4.574342527261065e-06, "loss": 0.8408, "step": 10740 }, { "epoch": 0.13095194569363705, "grad_norm": 2.64867430913356, "learning_rate": 4.574021808851828e-06, "loss": 0.8192, "step": 10745 }, { "epoch": 0.13101288191778485, "grad_norm": 2.3130867207552694, "learning_rate": 4.573701090442592e-06, "loss": 0.8044, "step": 10750 }, { "epoch": 0.13107381814193264, "grad_norm": 2.706260655272545, "learning_rate": 4.573380372033355e-06, "loss": 0.8269, "step": 10755 }, { "epoch": 0.13113475436608046, "grad_norm": 3.0973434001191054, "learning_rate": 4.573059653624119e-06, "loss": 0.9174, "step": 10760 }, { "epoch": 0.13119569059022826, "grad_norm": 2.322129154537646, "learning_rate": 4.572738935214882e-06, "loss": 0.8772, "step": 10765 }, { "epoch": 0.13125662681437608, "grad_norm": 2.430595409078014, "learning_rate": 4.572418216805645e-06, "loss": 0.8286, "step": 10770 }, { "epoch": 0.13131756303852388, "grad_norm": 2.178162517368979, "learning_rate": 4.572097498396409e-06, "loss": 0.7403, "step": 10775 }, { "epoch": 0.1313784992626717, "grad_norm": 2.841686881221622, "learning_rate": 4.571776779987172e-06, "loss": 0.8186, "step": 10780 }, { "epoch": 0.1314394354868195, "grad_norm": 2.914584511666546, "learning_rate": 4.571456061577936e-06, "loss": 0.8451, "step": 10785 }, { "epoch": 0.1315003717109673, "grad_norm": 2.705495192592007, "learning_rate": 4.571135343168699e-06, "loss": 0.7871, "step": 10790 }, { "epoch": 0.1315613079351151, "grad_norm": 3.4978089725992874, "learning_rate": 4.570814624759462e-06, "loss": 0.8005, "step": 10795 }, { "epoch": 0.1316222441592629, "grad_norm": 2.067172743171093, "learning_rate": 4.570493906350225e-06, "loss": 0.7564, "step": 10800 }, { "epoch": 0.13168318038341073, "grad_norm": 2.799969839979968, "learning_rate": 4.5701731879409885e-06, "loss": 0.7852, "step": 10805 }, { "epoch": 0.13174411660755853, "grad_norm": 2.536308021827387, "learning_rate": 4.5698524695317515e-06, "loss": 0.8712, "step": 10810 }, { "epoch": 0.13180505283170635, "grad_norm": 2.6427853505245182, "learning_rate": 4.5695317511225145e-06, "loss": 0.7941, "step": 10815 }, { "epoch": 0.13186598905585414, "grad_norm": 2.577754964829617, "learning_rate": 4.569211032713278e-06, "loss": 0.8236, "step": 10820 }, { "epoch": 0.13192692528000194, "grad_norm": 2.457555092398948, "learning_rate": 4.5688903143040414e-06, "loss": 0.8122, "step": 10825 }, { "epoch": 0.13198786150414976, "grad_norm": 4.043251080659957, "learning_rate": 4.5685695958948044e-06, "loss": 0.8505, "step": 10830 }, { "epoch": 0.13204879772829756, "grad_norm": 2.2871623660094316, "learning_rate": 4.568248877485568e-06, "loss": 0.8359, "step": 10835 }, { "epoch": 0.13210973395244538, "grad_norm": 2.955168480414197, "learning_rate": 4.567928159076331e-06, "loss": 0.8191, "step": 10840 }, { "epoch": 0.13217067017659317, "grad_norm": 2.812307910827023, "learning_rate": 4.567607440667094e-06, "loss": 0.8587, "step": 10845 }, { "epoch": 0.132231606400741, "grad_norm": 2.610895385236474, "learning_rate": 4.567286722257858e-06, "loss": 0.726, "step": 10850 }, { "epoch": 0.1322925426248888, "grad_norm": 2.579738386191212, "learning_rate": 4.566966003848621e-06, "loss": 0.7977, "step": 10855 }, { "epoch": 0.1323534788490366, "grad_norm": 2.7359353758304064, "learning_rate": 4.566645285439384e-06, "loss": 0.8302, "step": 10860 }, { "epoch": 0.1324144150731844, "grad_norm": 2.6626706361217747, "learning_rate": 4.566324567030148e-06, "loss": 0.8175, "step": 10865 }, { "epoch": 0.1324753512973322, "grad_norm": 2.940785089012482, "learning_rate": 4.566003848620911e-06, "loss": 0.8985, "step": 10870 }, { "epoch": 0.13253628752148003, "grad_norm": 2.705218625859163, "learning_rate": 4.565683130211674e-06, "loss": 0.8843, "step": 10875 }, { "epoch": 0.13259722374562782, "grad_norm": 2.4743258137484077, "learning_rate": 4.565362411802438e-06, "loss": 0.8645, "step": 10880 }, { "epoch": 0.13265815996977565, "grad_norm": 2.582761152465364, "learning_rate": 4.565041693393201e-06, "loss": 0.861, "step": 10885 }, { "epoch": 0.13271909619392344, "grad_norm": 2.8702780487650768, "learning_rate": 4.564720974983964e-06, "loss": 0.8152, "step": 10890 }, { "epoch": 0.13278003241807124, "grad_norm": 2.576852770834548, "learning_rate": 4.564400256574728e-06, "loss": 0.818, "step": 10895 }, { "epoch": 0.13284096864221906, "grad_norm": 3.2193236631250617, "learning_rate": 4.564079538165491e-06, "loss": 0.877, "step": 10900 }, { "epoch": 0.13290190486636685, "grad_norm": 6.295373450891764, "learning_rate": 4.563758819756254e-06, "loss": 0.8206, "step": 10905 }, { "epoch": 0.13296284109051468, "grad_norm": 2.4520795473664143, "learning_rate": 4.563438101347018e-06, "loss": 0.8111, "step": 10910 }, { "epoch": 0.13302377731466247, "grad_norm": 2.124734947501478, "learning_rate": 4.563117382937781e-06, "loss": 0.8283, "step": 10915 }, { "epoch": 0.13308471353881027, "grad_norm": 2.437683094876568, "learning_rate": 4.562796664528545e-06, "loss": 0.8755, "step": 10920 }, { "epoch": 0.1331456497629581, "grad_norm": 2.260530849620307, "learning_rate": 4.562475946119308e-06, "loss": 0.8191, "step": 10925 }, { "epoch": 0.13320658598710589, "grad_norm": 2.8344666501720166, "learning_rate": 4.562155227710071e-06, "loss": 0.8864, "step": 10930 }, { "epoch": 0.1332675222112537, "grad_norm": 2.3768189790703, "learning_rate": 4.5618345093008346e-06, "loss": 0.81, "step": 10935 }, { "epoch": 0.1333284584354015, "grad_norm": 2.389190786986883, "learning_rate": 4.561513790891598e-06, "loss": 0.9134, "step": 10940 }, { "epoch": 0.13338939465954933, "grad_norm": 2.4763977493020572, "learning_rate": 4.5611930724823615e-06, "loss": 0.9258, "step": 10945 }, { "epoch": 0.13345033088369712, "grad_norm": 3.016282874771636, "learning_rate": 4.5608723540731245e-06, "loss": 0.8375, "step": 10950 }, { "epoch": 0.13351126710784492, "grad_norm": 2.1661448986757312, "learning_rate": 4.5605516356638875e-06, "loss": 0.8625, "step": 10955 }, { "epoch": 0.13357220333199274, "grad_norm": 2.6207779326006904, "learning_rate": 4.560230917254651e-06, "loss": 0.7935, "step": 10960 }, { "epoch": 0.13363313955614053, "grad_norm": 2.1638227874314437, "learning_rate": 4.559910198845414e-06, "loss": 0.8778, "step": 10965 }, { "epoch": 0.13369407578028836, "grad_norm": 2.704876790607501, "learning_rate": 4.559589480436177e-06, "loss": 0.851, "step": 10970 }, { "epoch": 0.13375501200443615, "grad_norm": 2.735052390080688, "learning_rate": 4.55926876202694e-06, "loss": 0.8256, "step": 10975 }, { "epoch": 0.13381594822858398, "grad_norm": 2.4380953024723317, "learning_rate": 4.558948043617704e-06, "loss": 0.822, "step": 10980 }, { "epoch": 0.13387688445273177, "grad_norm": 2.371572461876744, "learning_rate": 4.558627325208467e-06, "loss": 0.8461, "step": 10985 }, { "epoch": 0.13393782067687957, "grad_norm": 2.9434268106016805, "learning_rate": 4.55830660679923e-06, "loss": 0.9561, "step": 10990 }, { "epoch": 0.1339987569010274, "grad_norm": 2.57880682027888, "learning_rate": 4.557985888389994e-06, "loss": 0.8363, "step": 10995 }, { "epoch": 0.13405969312517518, "grad_norm": 2.2936638032699266, "learning_rate": 4.557665169980757e-06, "loss": 0.7984, "step": 11000 }, { "epoch": 0.134120629349323, "grad_norm": 2.6038416480989004, "learning_rate": 4.55734445157152e-06, "loss": 0.8513, "step": 11005 }, { "epoch": 0.1341815655734708, "grad_norm": 2.6617096580205017, "learning_rate": 4.557023733162284e-06, "loss": 0.886, "step": 11010 }, { "epoch": 0.13424250179761862, "grad_norm": 3.2080056251178597, "learning_rate": 4.556703014753047e-06, "loss": 0.7993, "step": 11015 }, { "epoch": 0.13430343802176642, "grad_norm": 2.369739973686082, "learning_rate": 4.55638229634381e-06, "loss": 0.8937, "step": 11020 }, { "epoch": 0.13436437424591421, "grad_norm": 2.5586952355184183, "learning_rate": 4.556061577934574e-06, "loss": 0.8066, "step": 11025 }, { "epoch": 0.13442531047006204, "grad_norm": 2.228940616810025, "learning_rate": 4.555740859525337e-06, "loss": 0.8466, "step": 11030 }, { "epoch": 0.13448624669420983, "grad_norm": 4.022260934726613, "learning_rate": 4.5554201411161e-06, "loss": 0.9258, "step": 11035 }, { "epoch": 0.13454718291835766, "grad_norm": 3.294133494359404, "learning_rate": 4.555099422706864e-06, "loss": 0.8053, "step": 11040 }, { "epoch": 0.13460811914250545, "grad_norm": 2.4595278379639938, "learning_rate": 4.554778704297627e-06, "loss": 0.7588, "step": 11045 }, { "epoch": 0.13466905536665327, "grad_norm": 2.3223970545304073, "learning_rate": 4.55445798588839e-06, "loss": 0.8361, "step": 11050 }, { "epoch": 0.13472999159080107, "grad_norm": 2.4424243118965254, "learning_rate": 4.554137267479154e-06, "loss": 0.8002, "step": 11055 }, { "epoch": 0.13479092781494886, "grad_norm": 2.659627407897677, "learning_rate": 4.553816549069917e-06, "loss": 0.8628, "step": 11060 }, { "epoch": 0.13485186403909669, "grad_norm": 2.893652651293891, "learning_rate": 4.553495830660681e-06, "loss": 0.8074, "step": 11065 }, { "epoch": 0.13491280026324448, "grad_norm": 2.6503115406466122, "learning_rate": 4.553175112251444e-06, "loss": 0.764, "step": 11070 }, { "epoch": 0.1349737364873923, "grad_norm": 2.4285572203392127, "learning_rate": 4.552854393842207e-06, "loss": 0.7903, "step": 11075 }, { "epoch": 0.1350346727115401, "grad_norm": 2.0077364007392644, "learning_rate": 4.5525336754329705e-06, "loss": 0.8039, "step": 11080 }, { "epoch": 0.13509560893568792, "grad_norm": 2.6513062556733678, "learning_rate": 4.5522129570237335e-06, "loss": 0.8413, "step": 11085 }, { "epoch": 0.13515654515983572, "grad_norm": 2.451895156907674, "learning_rate": 4.551892238614497e-06, "loss": 0.819, "step": 11090 }, { "epoch": 0.1352174813839835, "grad_norm": 2.7185434344385726, "learning_rate": 4.5515715202052604e-06, "loss": 0.8047, "step": 11095 }, { "epoch": 0.13527841760813133, "grad_norm": 2.765328030780297, "learning_rate": 4.5512508017960234e-06, "loss": 0.834, "step": 11100 }, { "epoch": 0.13533935383227913, "grad_norm": 2.7704499756018954, "learning_rate": 4.550930083386787e-06, "loss": 0.7988, "step": 11105 }, { "epoch": 0.13540029005642695, "grad_norm": 2.311317590914321, "learning_rate": 4.55060936497755e-06, "loss": 0.8482, "step": 11110 }, { "epoch": 0.13546122628057475, "grad_norm": 2.457920027520665, "learning_rate": 4.550288646568313e-06, "loss": 0.8703, "step": 11115 }, { "epoch": 0.13552216250472257, "grad_norm": 2.7635865597720346, "learning_rate": 4.549967928159077e-06, "loss": 0.8362, "step": 11120 }, { "epoch": 0.13558309872887037, "grad_norm": 2.40391267233418, "learning_rate": 4.54964720974984e-06, "loss": 0.7982, "step": 11125 }, { "epoch": 0.13564403495301816, "grad_norm": 3.205894832635791, "learning_rate": 4.549326491340603e-06, "loss": 0.9333, "step": 11130 }, { "epoch": 0.13570497117716598, "grad_norm": 2.678622406862769, "learning_rate": 4.549005772931366e-06, "loss": 0.7855, "step": 11135 }, { "epoch": 0.13576590740131378, "grad_norm": 2.300814873962997, "learning_rate": 4.54868505452213e-06, "loss": 0.882, "step": 11140 }, { "epoch": 0.1358268436254616, "grad_norm": 2.4921609993266567, "learning_rate": 4.548364336112893e-06, "loss": 0.7834, "step": 11145 }, { "epoch": 0.1358877798496094, "grad_norm": 2.7630364180684555, "learning_rate": 4.548043617703656e-06, "loss": 0.8704, "step": 11150 }, { "epoch": 0.1359487160737572, "grad_norm": 2.0188789975788604, "learning_rate": 4.54772289929442e-06, "loss": 0.8218, "step": 11155 }, { "epoch": 0.13600965229790501, "grad_norm": 2.3337448924149027, "learning_rate": 4.547402180885183e-06, "loss": 0.9029, "step": 11160 }, { "epoch": 0.1360705885220528, "grad_norm": 2.8847322415078662, "learning_rate": 4.547081462475946e-06, "loss": 0.8894, "step": 11165 }, { "epoch": 0.13613152474620063, "grad_norm": 2.453371293990783, "learning_rate": 4.54676074406671e-06, "loss": 0.7913, "step": 11170 }, { "epoch": 0.13619246097034843, "grad_norm": 2.5932816284507, "learning_rate": 4.546440025657473e-06, "loss": 0.8595, "step": 11175 }, { "epoch": 0.13625339719449625, "grad_norm": 3.127709955986735, "learning_rate": 4.546119307248236e-06, "loss": 0.906, "step": 11180 }, { "epoch": 0.13631433341864405, "grad_norm": 2.417942160959735, "learning_rate": 4.545798588839e-06, "loss": 0.8428, "step": 11185 }, { "epoch": 0.13637526964279184, "grad_norm": 2.199734133258657, "learning_rate": 4.545477870429763e-06, "loss": 0.8197, "step": 11190 }, { "epoch": 0.13643620586693966, "grad_norm": 2.2217742988984517, "learning_rate": 4.545157152020526e-06, "loss": 0.8182, "step": 11195 }, { "epoch": 0.13649714209108746, "grad_norm": 2.633837822106439, "learning_rate": 4.54483643361129e-06, "loss": 0.8726, "step": 11200 }, { "epoch": 0.13655807831523528, "grad_norm": 2.6251798805578637, "learning_rate": 4.544515715202053e-06, "loss": 0.9195, "step": 11205 }, { "epoch": 0.13661901453938308, "grad_norm": 2.4152256667287495, "learning_rate": 4.544194996792817e-06, "loss": 0.8489, "step": 11210 }, { "epoch": 0.1366799507635309, "grad_norm": 2.3125997712829958, "learning_rate": 4.54387427838358e-06, "loss": 0.8169, "step": 11215 }, { "epoch": 0.1367408869876787, "grad_norm": 2.425611137665306, "learning_rate": 4.543553559974343e-06, "loss": 0.8534, "step": 11220 }, { "epoch": 0.1368018232118265, "grad_norm": 2.8328128029351176, "learning_rate": 4.5432328415651065e-06, "loss": 0.8666, "step": 11225 }, { "epoch": 0.1368627594359743, "grad_norm": 2.472506472856047, "learning_rate": 4.5429121231558695e-06, "loss": 0.8082, "step": 11230 }, { "epoch": 0.1369236956601221, "grad_norm": 2.1679584090592567, "learning_rate": 4.542591404746633e-06, "loss": 0.7824, "step": 11235 }, { "epoch": 0.13698463188426993, "grad_norm": 2.6891686921903273, "learning_rate": 4.542270686337396e-06, "loss": 0.8393, "step": 11240 }, { "epoch": 0.13704556810841773, "grad_norm": 2.740446445981198, "learning_rate": 4.541949967928159e-06, "loss": 0.7495, "step": 11245 }, { "epoch": 0.13710650433256555, "grad_norm": 2.755257595425299, "learning_rate": 4.541629249518923e-06, "loss": 0.862, "step": 11250 }, { "epoch": 0.13716744055671334, "grad_norm": 2.606215702806311, "learning_rate": 4.541308531109686e-06, "loss": 0.8773, "step": 11255 }, { "epoch": 0.13722837678086114, "grad_norm": 2.55311648097493, "learning_rate": 4.540987812700449e-06, "loss": 0.7745, "step": 11260 }, { "epoch": 0.13728931300500896, "grad_norm": 2.7269122652724005, "learning_rate": 4.540667094291213e-06, "loss": 0.7978, "step": 11265 }, { "epoch": 0.13735024922915676, "grad_norm": 2.7671297855715937, "learning_rate": 4.540346375881976e-06, "loss": 0.8089, "step": 11270 }, { "epoch": 0.13741118545330458, "grad_norm": 2.760176588612993, "learning_rate": 4.540025657472739e-06, "loss": 0.8774, "step": 11275 }, { "epoch": 0.13747212167745237, "grad_norm": 2.280951640233903, "learning_rate": 4.539704939063503e-06, "loss": 0.8276, "step": 11280 }, { "epoch": 0.1375330579016002, "grad_norm": 2.624285461298716, "learning_rate": 4.539384220654266e-06, "loss": 0.7908, "step": 11285 }, { "epoch": 0.137593994125748, "grad_norm": 2.309654196064651, "learning_rate": 4.539063502245029e-06, "loss": 0.8102, "step": 11290 }, { "epoch": 0.1376549303498958, "grad_norm": 2.71900996611388, "learning_rate": 4.538742783835793e-06, "loss": 0.814, "step": 11295 }, { "epoch": 0.1377158665740436, "grad_norm": 2.8876137020071293, "learning_rate": 4.538422065426556e-06, "loss": 0.7867, "step": 11300 }, { "epoch": 0.1377768027981914, "grad_norm": 2.590640029386098, "learning_rate": 4.538101347017319e-06, "loss": 0.8288, "step": 11305 }, { "epoch": 0.13783773902233923, "grad_norm": 2.8985841136261152, "learning_rate": 4.537780628608082e-06, "loss": 0.8733, "step": 11310 }, { "epoch": 0.13789867524648702, "grad_norm": 2.6222397554768233, "learning_rate": 4.537459910198846e-06, "loss": 0.8358, "step": 11315 }, { "epoch": 0.13795961147063485, "grad_norm": 2.1538369444308123, "learning_rate": 4.537139191789609e-06, "loss": 0.754, "step": 11320 }, { "epoch": 0.13802054769478264, "grad_norm": 2.737446030004279, "learning_rate": 4.536818473380372e-06, "loss": 0.8562, "step": 11325 }, { "epoch": 0.13808148391893044, "grad_norm": 2.6660797495579596, "learning_rate": 4.536497754971136e-06, "loss": 0.8806, "step": 11330 }, { "epoch": 0.13814242014307826, "grad_norm": 2.4887897012097016, "learning_rate": 4.536177036561899e-06, "loss": 0.7842, "step": 11335 }, { "epoch": 0.13820335636722605, "grad_norm": 2.7971300552970684, "learning_rate": 4.535856318152662e-06, "loss": 0.8021, "step": 11340 }, { "epoch": 0.13826429259137388, "grad_norm": 2.768387517219318, "learning_rate": 4.535535599743426e-06, "loss": 0.8744, "step": 11345 }, { "epoch": 0.13832522881552167, "grad_norm": 2.2838948983907374, "learning_rate": 4.535214881334189e-06, "loss": 0.8574, "step": 11350 }, { "epoch": 0.1383861650396695, "grad_norm": 3.7770016994911657, "learning_rate": 4.534894162924952e-06, "loss": 0.7684, "step": 11355 }, { "epoch": 0.1384471012638173, "grad_norm": 2.693084643266042, "learning_rate": 4.5345734445157156e-06, "loss": 0.8047, "step": 11360 }, { "epoch": 0.13850803748796509, "grad_norm": 3.1591551966511857, "learning_rate": 4.534252726106479e-06, "loss": 0.7944, "step": 11365 }, { "epoch": 0.1385689737121129, "grad_norm": 2.2448211549954737, "learning_rate": 4.5339320076972424e-06, "loss": 0.7766, "step": 11370 }, { "epoch": 0.1386299099362607, "grad_norm": 2.5912423572777845, "learning_rate": 4.5336112892880055e-06, "loss": 0.7829, "step": 11375 }, { "epoch": 0.13869084616040853, "grad_norm": 3.338636188737104, "learning_rate": 4.5332905708787685e-06, "loss": 0.806, "step": 11380 }, { "epoch": 0.13875178238455632, "grad_norm": 2.65856791164376, "learning_rate": 4.532969852469532e-06, "loss": 0.76, "step": 11385 }, { "epoch": 0.13881271860870412, "grad_norm": 2.8475653161064964, "learning_rate": 4.532649134060295e-06, "loss": 0.7866, "step": 11390 }, { "epoch": 0.13887365483285194, "grad_norm": 2.5168674062839855, "learning_rate": 4.532328415651059e-06, "loss": 0.7707, "step": 11395 }, { "epoch": 0.13893459105699973, "grad_norm": 2.49953147707061, "learning_rate": 4.532007697241822e-06, "loss": 0.8162, "step": 11400 }, { "epoch": 0.13899552728114756, "grad_norm": 3.330825279178977, "learning_rate": 4.531686978832585e-06, "loss": 0.8664, "step": 11405 }, { "epoch": 0.13905646350529535, "grad_norm": 2.7411250881809526, "learning_rate": 4.531366260423349e-06, "loss": 0.8307, "step": 11410 }, { "epoch": 0.13911739972944318, "grad_norm": 2.659926324004207, "learning_rate": 4.531045542014112e-06, "loss": 0.8285, "step": 11415 }, { "epoch": 0.13917833595359097, "grad_norm": 2.4217094011566345, "learning_rate": 4.530724823604875e-06, "loss": 0.8112, "step": 11420 }, { "epoch": 0.13923927217773877, "grad_norm": 2.8847794356035203, "learning_rate": 4.530404105195639e-06, "loss": 0.7095, "step": 11425 }, { "epoch": 0.1393002084018866, "grad_norm": 2.504593447841199, "learning_rate": 4.530083386786402e-06, "loss": 0.7429, "step": 11430 }, { "epoch": 0.13936114462603438, "grad_norm": 2.549537969770664, "learning_rate": 4.529762668377165e-06, "loss": 0.7866, "step": 11435 }, { "epoch": 0.1394220808501822, "grad_norm": 2.426513586371192, "learning_rate": 4.529441949967929e-06, "loss": 0.8648, "step": 11440 }, { "epoch": 0.13948301707433, "grad_norm": 2.620000513984028, "learning_rate": 4.529121231558692e-06, "loss": 0.8271, "step": 11445 }, { "epoch": 0.13954395329847782, "grad_norm": 2.7838641657406, "learning_rate": 4.528800513149455e-06, "loss": 0.7585, "step": 11450 }, { "epoch": 0.13960488952262562, "grad_norm": 2.7507594175924033, "learning_rate": 4.528479794740219e-06, "loss": 0.784, "step": 11455 }, { "epoch": 0.13966582574677341, "grad_norm": 2.9362375649252583, "learning_rate": 4.528159076330982e-06, "loss": 0.8039, "step": 11460 }, { "epoch": 0.13972676197092124, "grad_norm": 2.020937446307028, "learning_rate": 4.527838357921745e-06, "loss": 0.7636, "step": 11465 }, { "epoch": 0.13978769819506903, "grad_norm": 2.6261852793272533, "learning_rate": 4.527517639512508e-06, "loss": 0.8431, "step": 11470 }, { "epoch": 0.13984863441921686, "grad_norm": 2.4485329181384463, "learning_rate": 4.527196921103272e-06, "loss": 0.8214, "step": 11475 }, { "epoch": 0.13990957064336465, "grad_norm": 2.5702185151903327, "learning_rate": 4.526876202694035e-06, "loss": 0.8738, "step": 11480 }, { "epoch": 0.13997050686751247, "grad_norm": 2.605677189750642, "learning_rate": 4.526555484284798e-06, "loss": 0.8329, "step": 11485 }, { "epoch": 0.14003144309166027, "grad_norm": 2.5752486843048557, "learning_rate": 4.526234765875562e-06, "loss": 0.8198, "step": 11490 }, { "epoch": 0.14009237931580806, "grad_norm": 2.714076360847522, "learning_rate": 4.525914047466325e-06, "loss": 0.8575, "step": 11495 }, { "epoch": 0.14015331553995589, "grad_norm": 2.4982960734629067, "learning_rate": 4.525593329057088e-06, "loss": 0.8774, "step": 11500 }, { "epoch": 0.14021425176410368, "grad_norm": 2.4423440098579916, "learning_rate": 4.5252726106478515e-06, "loss": 0.845, "step": 11505 }, { "epoch": 0.1402751879882515, "grad_norm": 3.1665843694148754, "learning_rate": 4.5249518922386145e-06, "loss": 0.9267, "step": 11510 }, { "epoch": 0.1403361242123993, "grad_norm": 2.4220304215086514, "learning_rate": 4.524631173829378e-06, "loss": 0.8422, "step": 11515 }, { "epoch": 0.14039706043654712, "grad_norm": 2.3463799592546715, "learning_rate": 4.524310455420141e-06, "loss": 0.808, "step": 11520 }, { "epoch": 0.14045799666069492, "grad_norm": 2.5671914677726018, "learning_rate": 4.5239897370109044e-06, "loss": 0.8703, "step": 11525 }, { "epoch": 0.1405189328848427, "grad_norm": 3.0265567836402343, "learning_rate": 4.523669018601668e-06, "loss": 0.8308, "step": 11530 }, { "epoch": 0.14057986910899054, "grad_norm": 2.608367542528865, "learning_rate": 4.523348300192431e-06, "loss": 0.7962, "step": 11535 }, { "epoch": 0.14064080533313833, "grad_norm": 2.3416765965071953, "learning_rate": 4.523027581783195e-06, "loss": 0.7868, "step": 11540 }, { "epoch": 0.14070174155728615, "grad_norm": 5.437976846770925, "learning_rate": 4.522706863373958e-06, "loss": 0.8529, "step": 11545 }, { "epoch": 0.14076267778143395, "grad_norm": 2.1950990910220565, "learning_rate": 4.522386144964721e-06, "loss": 0.8212, "step": 11550 }, { "epoch": 0.14082361400558177, "grad_norm": 3.020437745780796, "learning_rate": 4.522065426555485e-06, "loss": 0.8488, "step": 11555 }, { "epoch": 0.14088455022972957, "grad_norm": 2.6748783115722854, "learning_rate": 4.521744708146248e-06, "loss": 0.7079, "step": 11560 }, { "epoch": 0.14094548645387736, "grad_norm": 2.2062432729571535, "learning_rate": 4.521423989737011e-06, "loss": 0.8396, "step": 11565 }, { "epoch": 0.14100642267802518, "grad_norm": 3.2405693753707028, "learning_rate": 4.521103271327775e-06, "loss": 0.8161, "step": 11570 }, { "epoch": 0.14106735890217298, "grad_norm": 2.3678055943238583, "learning_rate": 4.520782552918538e-06, "loss": 0.8374, "step": 11575 }, { "epoch": 0.1411282951263208, "grad_norm": 2.499791500695587, "learning_rate": 4.520461834509301e-06, "loss": 0.8412, "step": 11580 }, { "epoch": 0.1411892313504686, "grad_norm": 2.504652311097742, "learning_rate": 4.520141116100065e-06, "loss": 0.8854, "step": 11585 }, { "epoch": 0.14125016757461642, "grad_norm": 2.5350041440233375, "learning_rate": 4.519820397690828e-06, "loss": 0.893, "step": 11590 }, { "epoch": 0.14131110379876421, "grad_norm": 2.514199497661426, "learning_rate": 4.519499679281591e-06, "loss": 0.8344, "step": 11595 }, { "epoch": 0.141372040022912, "grad_norm": 3.125024876542771, "learning_rate": 4.519178960872355e-06, "loss": 0.7927, "step": 11600 }, { "epoch": 0.14143297624705983, "grad_norm": 2.364254154166402, "learning_rate": 4.518858242463118e-06, "loss": 0.7791, "step": 11605 }, { "epoch": 0.14149391247120763, "grad_norm": 3.221692495880939, "learning_rate": 4.518537524053881e-06, "loss": 0.9454, "step": 11610 }, { "epoch": 0.14155484869535545, "grad_norm": 2.1420217919010565, "learning_rate": 4.518216805644645e-06, "loss": 0.7641, "step": 11615 }, { "epoch": 0.14161578491950325, "grad_norm": 3.2372513166107684, "learning_rate": 4.517896087235408e-06, "loss": 0.7976, "step": 11620 }, { "epoch": 0.14167672114365104, "grad_norm": 2.5255348175801076, "learning_rate": 4.517575368826171e-06, "loss": 0.8342, "step": 11625 }, { "epoch": 0.14173765736779886, "grad_norm": 3.1078530434157248, "learning_rate": 4.5172546504169346e-06, "loss": 0.84, "step": 11630 }, { "epoch": 0.14179859359194666, "grad_norm": 2.245015961460696, "learning_rate": 4.516933932007698e-06, "loss": 0.7833, "step": 11635 }, { "epoch": 0.14185952981609448, "grad_norm": 2.4076481145215527, "learning_rate": 4.516613213598461e-06, "loss": 0.8982, "step": 11640 }, { "epoch": 0.14192046604024228, "grad_norm": 2.583945027491171, "learning_rate": 4.516292495189224e-06, "loss": 0.8669, "step": 11645 }, { "epoch": 0.1419814022643901, "grad_norm": 2.5763689474944584, "learning_rate": 4.5159717767799875e-06, "loss": 0.7907, "step": 11650 }, { "epoch": 0.1420423384885379, "grad_norm": 2.806324763355603, "learning_rate": 4.5156510583707505e-06, "loss": 0.812, "step": 11655 }, { "epoch": 0.1421032747126857, "grad_norm": 2.413566815229415, "learning_rate": 4.5153303399615135e-06, "loss": 0.7684, "step": 11660 }, { "epoch": 0.1421642109368335, "grad_norm": 2.447996283145635, "learning_rate": 4.515009621552277e-06, "loss": 0.7792, "step": 11665 }, { "epoch": 0.1422251471609813, "grad_norm": 2.4663363400026634, "learning_rate": 4.51468890314304e-06, "loss": 0.8538, "step": 11670 }, { "epoch": 0.14228608338512913, "grad_norm": 2.5024539556753402, "learning_rate": 4.514368184733804e-06, "loss": 0.8468, "step": 11675 }, { "epoch": 0.14234701960927693, "grad_norm": 2.1527220599919934, "learning_rate": 4.514047466324567e-06, "loss": 0.8089, "step": 11680 }, { "epoch": 0.14240795583342475, "grad_norm": 3.2630323097101837, "learning_rate": 4.513726747915331e-06, "loss": 0.8206, "step": 11685 }, { "epoch": 0.14246889205757254, "grad_norm": 2.2997598717084946, "learning_rate": 4.513406029506094e-06, "loss": 0.858, "step": 11690 }, { "epoch": 0.14252982828172034, "grad_norm": 2.4933581428083276, "learning_rate": 4.513085311096857e-06, "loss": 0.897, "step": 11695 }, { "epoch": 0.14259076450586816, "grad_norm": 2.520347958206229, "learning_rate": 4.512764592687621e-06, "loss": 0.8068, "step": 11700 }, { "epoch": 0.14265170073001596, "grad_norm": 2.810918347326647, "learning_rate": 4.512443874278384e-06, "loss": 0.7933, "step": 11705 }, { "epoch": 0.14271263695416378, "grad_norm": 3.0296463442229715, "learning_rate": 4.512123155869148e-06, "loss": 0.8316, "step": 11710 }, { "epoch": 0.14277357317831157, "grad_norm": 3.0756080901534952, "learning_rate": 4.511802437459911e-06, "loss": 0.8036, "step": 11715 }, { "epoch": 0.1428345094024594, "grad_norm": 2.306161970659242, "learning_rate": 4.511481719050674e-06, "loss": 0.7853, "step": 11720 }, { "epoch": 0.1428954456266072, "grad_norm": 2.1622604025999665, "learning_rate": 4.511161000641437e-06, "loss": 0.8355, "step": 11725 }, { "epoch": 0.142956381850755, "grad_norm": 2.216564676644916, "learning_rate": 4.510840282232201e-06, "loss": 0.8743, "step": 11730 }, { "epoch": 0.1430173180749028, "grad_norm": 4.346932140517125, "learning_rate": 4.510519563822964e-06, "loss": 0.9013, "step": 11735 }, { "epoch": 0.1430782542990506, "grad_norm": 2.37065553986765, "learning_rate": 4.510198845413727e-06, "loss": 0.9581, "step": 11740 }, { "epoch": 0.14313919052319843, "grad_norm": 3.2676852634576963, "learning_rate": 4.509878127004491e-06, "loss": 0.8619, "step": 11745 }, { "epoch": 0.14320012674734622, "grad_norm": 2.470256033285945, "learning_rate": 4.509557408595254e-06, "loss": 0.711, "step": 11750 }, { "epoch": 0.14326106297149405, "grad_norm": 2.3772752937712505, "learning_rate": 4.509236690186017e-06, "loss": 0.8979, "step": 11755 }, { "epoch": 0.14332199919564184, "grad_norm": 2.413938258507704, "learning_rate": 4.508915971776781e-06, "loss": 0.7977, "step": 11760 }, { "epoch": 0.14338293541978964, "grad_norm": 2.3022080111984153, "learning_rate": 4.508595253367544e-06, "loss": 0.7501, "step": 11765 }, { "epoch": 0.14344387164393746, "grad_norm": 2.901974773821197, "learning_rate": 4.508274534958307e-06, "loss": 0.8388, "step": 11770 }, { "epoch": 0.14350480786808525, "grad_norm": 3.4852675715424204, "learning_rate": 4.5079538165490705e-06, "loss": 0.779, "step": 11775 }, { "epoch": 0.14356574409223308, "grad_norm": 2.3518023499394554, "learning_rate": 4.5076330981398335e-06, "loss": 0.791, "step": 11780 }, { "epoch": 0.14362668031638087, "grad_norm": 2.3556346605063023, "learning_rate": 4.5073123797305966e-06, "loss": 0.8038, "step": 11785 }, { "epoch": 0.1436876165405287, "grad_norm": 2.1332698091439672, "learning_rate": 4.50699166132136e-06, "loss": 0.8603, "step": 11790 }, { "epoch": 0.1437485527646765, "grad_norm": 2.2894040524032047, "learning_rate": 4.5066709429121234e-06, "loss": 0.8141, "step": 11795 }, { "epoch": 0.14380948898882429, "grad_norm": 2.711274055890039, "learning_rate": 4.5063502245028865e-06, "loss": 0.8182, "step": 11800 }, { "epoch": 0.1438704252129721, "grad_norm": 3.0072384033143176, "learning_rate": 4.50602950609365e-06, "loss": 0.8149, "step": 11805 }, { "epoch": 0.1439313614371199, "grad_norm": 2.811419803688519, "learning_rate": 4.505708787684413e-06, "loss": 0.6587, "step": 11810 }, { "epoch": 0.14399229766126773, "grad_norm": 2.536030376243053, "learning_rate": 4.505388069275176e-06, "loss": 0.8155, "step": 11815 }, { "epoch": 0.14405323388541552, "grad_norm": 2.448844072540269, "learning_rate": 4.50506735086594e-06, "loss": 0.8238, "step": 11820 }, { "epoch": 0.14411417010956334, "grad_norm": 2.1903587207430277, "learning_rate": 4.504746632456703e-06, "loss": 0.8443, "step": 11825 }, { "epoch": 0.14417510633371114, "grad_norm": 3.408244137417174, "learning_rate": 4.504425914047466e-06, "loss": 0.8541, "step": 11830 }, { "epoch": 0.14423604255785893, "grad_norm": 2.7635460798566465, "learning_rate": 4.50410519563823e-06, "loss": 0.8001, "step": 11835 }, { "epoch": 0.14429697878200676, "grad_norm": 2.6441031551652032, "learning_rate": 4.503784477228993e-06, "loss": 0.842, "step": 11840 }, { "epoch": 0.14435791500615455, "grad_norm": 2.0743722986311464, "learning_rate": 4.503463758819757e-06, "loss": 0.856, "step": 11845 }, { "epoch": 0.14441885123030238, "grad_norm": 2.5110358409076605, "learning_rate": 4.50314304041052e-06, "loss": 0.816, "step": 11850 }, { "epoch": 0.14447978745445017, "grad_norm": 2.2886153978828236, "learning_rate": 4.502822322001283e-06, "loss": 0.8354, "step": 11855 }, { "epoch": 0.144540723678598, "grad_norm": 2.5486770486405446, "learning_rate": 4.502501603592047e-06, "loss": 0.7633, "step": 11860 }, { "epoch": 0.1446016599027458, "grad_norm": 2.7142393050685474, "learning_rate": 4.50218088518281e-06, "loss": 0.85, "step": 11865 }, { "epoch": 0.14466259612689358, "grad_norm": 2.3861135337507013, "learning_rate": 4.501860166773574e-06, "loss": 0.8805, "step": 11870 }, { "epoch": 0.1447235323510414, "grad_norm": 2.2325815352933436, "learning_rate": 4.501539448364337e-06, "loss": 0.89, "step": 11875 }, { "epoch": 0.1447844685751892, "grad_norm": 2.9097813532410095, "learning_rate": 4.5012187299551e-06, "loss": 0.8523, "step": 11880 }, { "epoch": 0.14484540479933702, "grad_norm": 2.064141771157856, "learning_rate": 4.500898011545864e-06, "loss": 0.7807, "step": 11885 }, { "epoch": 0.14490634102348482, "grad_norm": 3.353830699931521, "learning_rate": 4.500577293136627e-06, "loss": 0.8762, "step": 11890 }, { "epoch": 0.14496727724763261, "grad_norm": 2.351909316468043, "learning_rate": 4.50025657472739e-06, "loss": 0.8365, "step": 11895 }, { "epoch": 0.14502821347178044, "grad_norm": 2.198031313207028, "learning_rate": 4.499935856318153e-06, "loss": 0.8011, "step": 11900 }, { "epoch": 0.14508914969592823, "grad_norm": 2.7208796896479326, "learning_rate": 4.499615137908917e-06, "loss": 0.8064, "step": 11905 }, { "epoch": 0.14515008592007606, "grad_norm": 3.172278380611892, "learning_rate": 4.49929441949968e-06, "loss": 0.8211, "step": 11910 }, { "epoch": 0.14521102214422385, "grad_norm": 3.975501813260602, "learning_rate": 4.498973701090443e-06, "loss": 0.7718, "step": 11915 }, { "epoch": 0.14527195836837167, "grad_norm": 2.560636604419548, "learning_rate": 4.4986529826812065e-06, "loss": 0.798, "step": 11920 }, { "epoch": 0.14533289459251947, "grad_norm": 2.4677918678964588, "learning_rate": 4.4983322642719695e-06, "loss": 0.7864, "step": 11925 }, { "epoch": 0.14539383081666726, "grad_norm": 2.7463708225485632, "learning_rate": 4.4980115458627325e-06, "loss": 0.7718, "step": 11930 }, { "epoch": 0.14545476704081509, "grad_norm": 2.7640476657746413, "learning_rate": 4.497690827453496e-06, "loss": 0.8334, "step": 11935 }, { "epoch": 0.14551570326496288, "grad_norm": 2.5798371265467313, "learning_rate": 4.497370109044259e-06, "loss": 0.8398, "step": 11940 }, { "epoch": 0.1455766394891107, "grad_norm": 2.532947764475343, "learning_rate": 4.497049390635022e-06, "loss": 0.8466, "step": 11945 }, { "epoch": 0.1456375757132585, "grad_norm": 2.678941969689388, "learning_rate": 4.496728672225786e-06, "loss": 0.8715, "step": 11950 }, { "epoch": 0.14569851193740632, "grad_norm": 3.2564097549503317, "learning_rate": 4.496407953816549e-06, "loss": 0.8877, "step": 11955 }, { "epoch": 0.14575944816155412, "grad_norm": 2.925561935498262, "learning_rate": 4.496087235407312e-06, "loss": 0.8697, "step": 11960 }, { "epoch": 0.1458203843857019, "grad_norm": 2.741032283286981, "learning_rate": 4.495766516998076e-06, "loss": 0.9047, "step": 11965 }, { "epoch": 0.14588132060984974, "grad_norm": 2.0258352626189655, "learning_rate": 4.495445798588839e-06, "loss": 0.779, "step": 11970 }, { "epoch": 0.14594225683399753, "grad_norm": 3.07378857021625, "learning_rate": 4.495125080179602e-06, "loss": 0.8167, "step": 11975 }, { "epoch": 0.14600319305814535, "grad_norm": 2.866871210991669, "learning_rate": 4.494804361770366e-06, "loss": 1.0024, "step": 11980 }, { "epoch": 0.14606412928229315, "grad_norm": 2.5264740245822424, "learning_rate": 4.494483643361129e-06, "loss": 0.802, "step": 11985 }, { "epoch": 0.14612506550644097, "grad_norm": 2.2857699642348113, "learning_rate": 4.494162924951893e-06, "loss": 0.8287, "step": 11990 }, { "epoch": 0.14618600173058877, "grad_norm": 2.3372146733808057, "learning_rate": 4.493842206542656e-06, "loss": 0.8594, "step": 11995 }, { "epoch": 0.14624693795473656, "grad_norm": 2.8171523020853844, "learning_rate": 4.493521488133419e-06, "loss": 0.7786, "step": 12000 }, { "epoch": 0.14630787417888438, "grad_norm": 2.2962432573539533, "learning_rate": 4.493200769724183e-06, "loss": 0.8367, "step": 12005 }, { "epoch": 0.14636881040303218, "grad_norm": 2.379337546670301, "learning_rate": 4.492880051314946e-06, "loss": 0.8731, "step": 12010 }, { "epoch": 0.14642974662718, "grad_norm": 2.791912762797138, "learning_rate": 4.49255933290571e-06, "loss": 0.8413, "step": 12015 }, { "epoch": 0.1464906828513278, "grad_norm": 3.5530837880373527, "learning_rate": 4.492238614496473e-06, "loss": 0.7918, "step": 12020 }, { "epoch": 0.14655161907547562, "grad_norm": 2.871697403157752, "learning_rate": 4.491917896087236e-06, "loss": 0.7886, "step": 12025 }, { "epoch": 0.14661255529962341, "grad_norm": 2.7781915495465084, "learning_rate": 4.491597177678e-06, "loss": 0.8545, "step": 12030 }, { "epoch": 0.1466734915237712, "grad_norm": 2.3690072527377306, "learning_rate": 4.491276459268763e-06, "loss": 0.8375, "step": 12035 }, { "epoch": 0.14673442774791903, "grad_norm": 2.314164346616921, "learning_rate": 4.490955740859526e-06, "loss": 0.8487, "step": 12040 }, { "epoch": 0.14679536397206683, "grad_norm": 2.5903977355028567, "learning_rate": 4.4906350224502895e-06, "loss": 0.8303, "step": 12045 }, { "epoch": 0.14685630019621465, "grad_norm": 2.48710719044481, "learning_rate": 4.4903143040410525e-06, "loss": 0.8307, "step": 12050 }, { "epoch": 0.14691723642036245, "grad_norm": 2.5287261032234762, "learning_rate": 4.4899935856318156e-06, "loss": 0.8918, "step": 12055 }, { "epoch": 0.14697817264451027, "grad_norm": 3.0296360246027287, "learning_rate": 4.4896728672225786e-06, "loss": 0.822, "step": 12060 }, { "epoch": 0.14703910886865806, "grad_norm": 2.788200739228271, "learning_rate": 4.4893521488133424e-06, "loss": 0.8217, "step": 12065 }, { "epoch": 0.14710004509280586, "grad_norm": 2.3339705693371604, "learning_rate": 4.4890314304041055e-06, "loss": 0.7774, "step": 12070 }, { "epoch": 0.14716098131695368, "grad_norm": 2.6165325637813104, "learning_rate": 4.4887107119948685e-06, "loss": 0.8432, "step": 12075 }, { "epoch": 0.14722191754110148, "grad_norm": 2.5675205070020555, "learning_rate": 4.488389993585632e-06, "loss": 0.7877, "step": 12080 }, { "epoch": 0.1472828537652493, "grad_norm": 2.427306908808464, "learning_rate": 4.488069275176395e-06, "loss": 0.8396, "step": 12085 }, { "epoch": 0.1473437899893971, "grad_norm": 2.464910668966299, "learning_rate": 4.487748556767158e-06, "loss": 0.9064, "step": 12090 }, { "epoch": 0.14740472621354492, "grad_norm": 2.557191765746551, "learning_rate": 4.487427838357922e-06, "loss": 0.8701, "step": 12095 }, { "epoch": 0.1474656624376927, "grad_norm": 2.511376667724663, "learning_rate": 4.487107119948685e-06, "loss": 0.7785, "step": 12100 }, { "epoch": 0.1475265986618405, "grad_norm": 2.3787901607910453, "learning_rate": 4.486786401539448e-06, "loss": 0.8242, "step": 12105 }, { "epoch": 0.14758753488598833, "grad_norm": 2.3824457774491936, "learning_rate": 4.486465683130212e-06, "loss": 0.8131, "step": 12110 }, { "epoch": 0.14764847111013613, "grad_norm": 2.091341935129773, "learning_rate": 4.486144964720975e-06, "loss": 0.8568, "step": 12115 }, { "epoch": 0.14770940733428395, "grad_norm": 2.271184929357517, "learning_rate": 4.485824246311738e-06, "loss": 0.8094, "step": 12120 }, { "epoch": 0.14777034355843174, "grad_norm": 2.455397020403647, "learning_rate": 4.485503527902502e-06, "loss": 0.8159, "step": 12125 }, { "epoch": 0.14783127978257954, "grad_norm": 2.167323796671032, "learning_rate": 4.485182809493265e-06, "loss": 0.8501, "step": 12130 }, { "epoch": 0.14789221600672736, "grad_norm": 2.719217849786967, "learning_rate": 4.484862091084028e-06, "loss": 0.7459, "step": 12135 }, { "epoch": 0.14795315223087516, "grad_norm": 3.372142527852453, "learning_rate": 4.484541372674792e-06, "loss": 0.8671, "step": 12140 }, { "epoch": 0.14801408845502298, "grad_norm": 2.3268185467932225, "learning_rate": 4.484220654265555e-06, "loss": 0.7652, "step": 12145 }, { "epoch": 0.14807502467917077, "grad_norm": 2.7605292404784576, "learning_rate": 4.483899935856319e-06, "loss": 0.8501, "step": 12150 }, { "epoch": 0.1481359609033186, "grad_norm": 2.468325309126572, "learning_rate": 4.483579217447082e-06, "loss": 0.8247, "step": 12155 }, { "epoch": 0.1481968971274664, "grad_norm": 5.835925202726789, "learning_rate": 4.483258499037846e-06, "loss": 0.9343, "step": 12160 }, { "epoch": 0.1482578333516142, "grad_norm": 2.5075095846171123, "learning_rate": 4.482937780628609e-06, "loss": 0.7573, "step": 12165 }, { "epoch": 0.148318769575762, "grad_norm": 2.317727942463395, "learning_rate": 4.482617062219372e-06, "loss": 0.8528, "step": 12170 }, { "epoch": 0.1483797057999098, "grad_norm": 2.4978864151838027, "learning_rate": 4.482296343810136e-06, "loss": 0.827, "step": 12175 }, { "epoch": 0.14844064202405763, "grad_norm": 2.7698558709486485, "learning_rate": 4.481975625400899e-06, "loss": 0.8978, "step": 12180 }, { "epoch": 0.14850157824820542, "grad_norm": 2.163364026032009, "learning_rate": 4.481654906991662e-06, "loss": 0.8035, "step": 12185 }, { "epoch": 0.14856251447235325, "grad_norm": 2.4805987717387086, "learning_rate": 4.4813341885824255e-06, "loss": 0.864, "step": 12190 }, { "epoch": 0.14862345069650104, "grad_norm": 2.610771712776324, "learning_rate": 4.4810134701731885e-06, "loss": 0.8927, "step": 12195 }, { "epoch": 0.14868438692064884, "grad_norm": 2.386561980377411, "learning_rate": 4.4806927517639515e-06, "loss": 0.8441, "step": 12200 }, { "epoch": 0.14874532314479666, "grad_norm": 2.567478106638533, "learning_rate": 4.480372033354715e-06, "loss": 0.864, "step": 12205 }, { "epoch": 0.14880625936894445, "grad_norm": 2.2236776354484586, "learning_rate": 4.480051314945478e-06, "loss": 0.8063, "step": 12210 }, { "epoch": 0.14886719559309228, "grad_norm": 2.6442873453450333, "learning_rate": 4.479730596536241e-06, "loss": 0.7958, "step": 12215 }, { "epoch": 0.14892813181724007, "grad_norm": 2.544496884605308, "learning_rate": 4.479409878127005e-06, "loss": 0.7816, "step": 12220 }, { "epoch": 0.1489890680413879, "grad_norm": 2.0247732311940396, "learning_rate": 4.479089159717768e-06, "loss": 0.7959, "step": 12225 }, { "epoch": 0.1490500042655357, "grad_norm": 2.419342496379681, "learning_rate": 4.478768441308531e-06, "loss": 0.8195, "step": 12230 }, { "epoch": 0.14911094048968349, "grad_norm": 2.714845853919872, "learning_rate": 4.478447722899294e-06, "loss": 0.7707, "step": 12235 }, { "epoch": 0.1491718767138313, "grad_norm": 2.4975822735937347, "learning_rate": 4.478127004490058e-06, "loss": 0.7811, "step": 12240 }, { "epoch": 0.1492328129379791, "grad_norm": 2.3843317663712327, "learning_rate": 4.477806286080821e-06, "loss": 0.8102, "step": 12245 }, { "epoch": 0.14929374916212693, "grad_norm": 2.4727274217993425, "learning_rate": 4.477485567671584e-06, "loss": 0.8015, "step": 12250 }, { "epoch": 0.14935468538627472, "grad_norm": 3.141471335519782, "learning_rate": 4.477164849262348e-06, "loss": 0.8967, "step": 12255 }, { "epoch": 0.14941562161042254, "grad_norm": 3.1047175081620324, "learning_rate": 4.476844130853111e-06, "loss": 0.8744, "step": 12260 }, { "epoch": 0.14947655783457034, "grad_norm": 2.420494200758851, "learning_rate": 4.476523412443874e-06, "loss": 0.7898, "step": 12265 }, { "epoch": 0.14953749405871813, "grad_norm": 2.9047202956507565, "learning_rate": 4.476202694034638e-06, "loss": 0.9218, "step": 12270 }, { "epoch": 0.14959843028286596, "grad_norm": 2.7407672958638742, "learning_rate": 4.475881975625401e-06, "loss": 0.8903, "step": 12275 }, { "epoch": 0.14965936650701375, "grad_norm": 2.5946727696098164, "learning_rate": 4.475561257216164e-06, "loss": 0.8718, "step": 12280 }, { "epoch": 0.14972030273116158, "grad_norm": 2.3023613167949466, "learning_rate": 4.475240538806928e-06, "loss": 0.7988, "step": 12285 }, { "epoch": 0.14978123895530937, "grad_norm": 2.6661298779864797, "learning_rate": 4.474919820397691e-06, "loss": 0.7522, "step": 12290 }, { "epoch": 0.1498421751794572, "grad_norm": 2.8104450946217527, "learning_rate": 4.474599101988455e-06, "loss": 0.852, "step": 12295 }, { "epoch": 0.149903111403605, "grad_norm": 2.3548973078000874, "learning_rate": 4.474278383579218e-06, "loss": 0.8621, "step": 12300 }, { "epoch": 0.14996404762775278, "grad_norm": 2.5620336143364097, "learning_rate": 4.473957665169981e-06, "loss": 0.868, "step": 12305 }, { "epoch": 0.1500249838519006, "grad_norm": 2.711112341346696, "learning_rate": 4.473636946760745e-06, "loss": 0.8572, "step": 12310 }, { "epoch": 0.1500859200760484, "grad_norm": 2.7113541818047096, "learning_rate": 4.473316228351508e-06, "loss": 0.8193, "step": 12315 }, { "epoch": 0.15014685630019622, "grad_norm": 2.217130572135602, "learning_rate": 4.4729955099422715e-06, "loss": 0.7744, "step": 12320 }, { "epoch": 0.15020779252434402, "grad_norm": 2.399849357906141, "learning_rate": 4.4726747915330346e-06, "loss": 0.8407, "step": 12325 }, { "epoch": 0.15026872874849184, "grad_norm": 2.766429720000118, "learning_rate": 4.4723540731237976e-06, "loss": 0.8087, "step": 12330 }, { "epoch": 0.15032966497263964, "grad_norm": 2.195486809612489, "learning_rate": 4.4720333547145614e-06, "loss": 0.7634, "step": 12335 }, { "epoch": 0.15039060119678743, "grad_norm": 2.7572577891316734, "learning_rate": 4.4717126363053245e-06, "loss": 0.7479, "step": 12340 }, { "epoch": 0.15045153742093526, "grad_norm": 2.514286766912472, "learning_rate": 4.4713919178960875e-06, "loss": 0.8474, "step": 12345 }, { "epoch": 0.15051247364508305, "grad_norm": 2.8159392728477393, "learning_rate": 4.471071199486851e-06, "loss": 0.8645, "step": 12350 }, { "epoch": 0.15057340986923087, "grad_norm": 2.2782379640735746, "learning_rate": 4.470750481077614e-06, "loss": 0.8189, "step": 12355 }, { "epoch": 0.15063434609337867, "grad_norm": 2.8214500489779253, "learning_rate": 4.470429762668377e-06, "loss": 0.8318, "step": 12360 }, { "epoch": 0.15069528231752646, "grad_norm": 14.210443336499258, "learning_rate": 4.470109044259141e-06, "loss": 0.7894, "step": 12365 }, { "epoch": 0.1507562185416743, "grad_norm": 3.129069880370372, "learning_rate": 4.469788325849904e-06, "loss": 0.8049, "step": 12370 }, { "epoch": 0.15081715476582208, "grad_norm": 2.5689827846115123, "learning_rate": 4.469467607440667e-06, "loss": 0.8329, "step": 12375 }, { "epoch": 0.1508780909899699, "grad_norm": 2.5342641923235614, "learning_rate": 4.469146889031431e-06, "loss": 0.7882, "step": 12380 }, { "epoch": 0.1509390272141177, "grad_norm": 2.4420512831819288, "learning_rate": 4.468826170622194e-06, "loss": 0.7705, "step": 12385 }, { "epoch": 0.15099996343826552, "grad_norm": 2.77772299712558, "learning_rate": 4.468505452212957e-06, "loss": 0.8278, "step": 12390 }, { "epoch": 0.15106089966241332, "grad_norm": 4.121868363357606, "learning_rate": 4.468184733803721e-06, "loss": 0.8652, "step": 12395 }, { "epoch": 0.1511218358865611, "grad_norm": 2.3949467525447496, "learning_rate": 4.467864015394484e-06, "loss": 0.8491, "step": 12400 }, { "epoch": 0.15118277211070894, "grad_norm": 2.510991507448384, "learning_rate": 4.467543296985247e-06, "loss": 0.8, "step": 12405 }, { "epoch": 0.15124370833485673, "grad_norm": 2.4881333199679387, "learning_rate": 4.46722257857601e-06, "loss": 0.9428, "step": 12410 }, { "epoch": 0.15130464455900455, "grad_norm": 2.4918245677804816, "learning_rate": 4.466901860166774e-06, "loss": 0.8509, "step": 12415 }, { "epoch": 0.15136558078315235, "grad_norm": 5.004505646004331, "learning_rate": 4.466581141757537e-06, "loss": 0.8111, "step": 12420 }, { "epoch": 0.15142651700730017, "grad_norm": 2.4765239234426626, "learning_rate": 4.4662604233483e-06, "loss": 0.8339, "step": 12425 }, { "epoch": 0.15148745323144797, "grad_norm": 2.9736828650318032, "learning_rate": 4.465939704939064e-06, "loss": 0.8104, "step": 12430 }, { "epoch": 0.15154838945559576, "grad_norm": 1.9964223412875286, "learning_rate": 4.465618986529827e-06, "loss": 0.7492, "step": 12435 }, { "epoch": 0.15160932567974358, "grad_norm": 2.461678117550111, "learning_rate": 4.465298268120591e-06, "loss": 0.7482, "step": 12440 }, { "epoch": 0.15167026190389138, "grad_norm": 2.394406994827936, "learning_rate": 4.464977549711354e-06, "loss": 0.7705, "step": 12445 }, { "epoch": 0.1517311981280392, "grad_norm": 2.397423906533841, "learning_rate": 4.464656831302117e-06, "loss": 0.8816, "step": 12450 }, { "epoch": 0.151792134352187, "grad_norm": 3.3548689034790984, "learning_rate": 4.464336112892881e-06, "loss": 0.8549, "step": 12455 }, { "epoch": 0.15185307057633482, "grad_norm": 3.2760279664040746, "learning_rate": 4.464015394483644e-06, "loss": 0.8023, "step": 12460 }, { "epoch": 0.15191400680048261, "grad_norm": 2.69837144530562, "learning_rate": 4.4636946760744075e-06, "loss": 0.8159, "step": 12465 }, { "epoch": 0.1519749430246304, "grad_norm": 2.750986690949459, "learning_rate": 4.4633739576651705e-06, "loss": 0.7952, "step": 12470 }, { "epoch": 0.15203587924877823, "grad_norm": 2.6310247019382214, "learning_rate": 4.4630532392559335e-06, "loss": 0.7353, "step": 12475 }, { "epoch": 0.15209681547292603, "grad_norm": 2.475847476753581, "learning_rate": 4.462732520846697e-06, "loss": 0.8208, "step": 12480 }, { "epoch": 0.15215775169707385, "grad_norm": 2.6731691284785204, "learning_rate": 4.46241180243746e-06, "loss": 0.8376, "step": 12485 }, { "epoch": 0.15221868792122165, "grad_norm": 3.6468475985639413, "learning_rate": 4.4620910840282234e-06, "loss": 0.8136, "step": 12490 }, { "epoch": 0.15227962414536947, "grad_norm": 2.2377965592344045, "learning_rate": 4.461770365618987e-06, "loss": 0.7324, "step": 12495 }, { "epoch": 0.15234056036951726, "grad_norm": 2.4079065725239746, "learning_rate": 4.46144964720975e-06, "loss": 0.7547, "step": 12500 }, { "epoch": 0.15240149659366506, "grad_norm": 2.6650494577992525, "learning_rate": 4.461128928800513e-06, "loss": 0.8355, "step": 12505 }, { "epoch": 0.15246243281781288, "grad_norm": 3.836688100152824, "learning_rate": 4.460808210391277e-06, "loss": 0.7905, "step": 12510 }, { "epoch": 0.15252336904196068, "grad_norm": 2.4925051130183156, "learning_rate": 4.46048749198204e-06, "loss": 0.7361, "step": 12515 }, { "epoch": 0.1525843052661085, "grad_norm": 2.9485515125615778, "learning_rate": 4.460166773572803e-06, "loss": 0.7855, "step": 12520 }, { "epoch": 0.1526452414902563, "grad_norm": 2.1730184935904773, "learning_rate": 4.459846055163567e-06, "loss": 0.7893, "step": 12525 }, { "epoch": 0.15270617771440412, "grad_norm": 3.233637845563128, "learning_rate": 4.45952533675433e-06, "loss": 0.856, "step": 12530 }, { "epoch": 0.1527671139385519, "grad_norm": 2.8676296844700735, "learning_rate": 4.459204618345093e-06, "loss": 0.8468, "step": 12535 }, { "epoch": 0.1528280501626997, "grad_norm": 2.7210899786453915, "learning_rate": 4.458883899935857e-06, "loss": 0.8266, "step": 12540 }, { "epoch": 0.15288898638684753, "grad_norm": 2.519105889870656, "learning_rate": 4.45856318152662e-06, "loss": 0.8996, "step": 12545 }, { "epoch": 0.15294992261099533, "grad_norm": 3.1326334925818813, "learning_rate": 4.458242463117383e-06, "loss": 0.9755, "step": 12550 }, { "epoch": 0.15301085883514315, "grad_norm": 2.2772857837492624, "learning_rate": 4.457921744708147e-06, "loss": 0.7815, "step": 12555 }, { "epoch": 0.15307179505929094, "grad_norm": 2.7448180355831697, "learning_rate": 4.45760102629891e-06, "loss": 0.8319, "step": 12560 }, { "epoch": 0.15313273128343877, "grad_norm": 3.5771014436667596, "learning_rate": 4.457280307889673e-06, "loss": 0.7782, "step": 12565 }, { "epoch": 0.15319366750758656, "grad_norm": 2.7972802932106435, "learning_rate": 4.456959589480436e-06, "loss": 0.8311, "step": 12570 }, { "epoch": 0.15325460373173436, "grad_norm": 2.5066377076929447, "learning_rate": 4.4566388710712e-06, "loss": 0.8045, "step": 12575 }, { "epoch": 0.15331553995588218, "grad_norm": 2.272443866331798, "learning_rate": 4.456318152661963e-06, "loss": 0.8047, "step": 12580 }, { "epoch": 0.15337647618002997, "grad_norm": 2.2318112737483675, "learning_rate": 4.455997434252726e-06, "loss": 0.7902, "step": 12585 }, { "epoch": 0.1534374124041778, "grad_norm": 2.6403395577147206, "learning_rate": 4.45567671584349e-06, "loss": 0.7309, "step": 12590 }, { "epoch": 0.1534983486283256, "grad_norm": 2.24741288138426, "learning_rate": 4.455355997434253e-06, "loss": 0.8428, "step": 12595 }, { "epoch": 0.1535592848524734, "grad_norm": 2.4716770808453967, "learning_rate": 4.4550352790250166e-06, "loss": 0.9089, "step": 12600 }, { "epoch": 0.1536202210766212, "grad_norm": 2.789640527810272, "learning_rate": 4.45471456061578e-06, "loss": 0.8103, "step": 12605 }, { "epoch": 0.153681157300769, "grad_norm": 2.2333307949272383, "learning_rate": 4.4543938422065435e-06, "loss": 0.8719, "step": 12610 }, { "epoch": 0.15374209352491683, "grad_norm": 2.572983622639778, "learning_rate": 4.4540731237973065e-06, "loss": 0.7661, "step": 12615 }, { "epoch": 0.15380302974906462, "grad_norm": 3.426572980666649, "learning_rate": 4.4537524053880695e-06, "loss": 0.8842, "step": 12620 }, { "epoch": 0.15386396597321245, "grad_norm": 2.5008374136199474, "learning_rate": 4.453431686978833e-06, "loss": 0.8875, "step": 12625 }, { "epoch": 0.15392490219736024, "grad_norm": 2.4565368675174426, "learning_rate": 4.453110968569596e-06, "loss": 0.7816, "step": 12630 }, { "epoch": 0.15398583842150804, "grad_norm": 2.5224573024726196, "learning_rate": 4.45279025016036e-06, "loss": 0.7418, "step": 12635 }, { "epoch": 0.15404677464565586, "grad_norm": 3.085572619649971, "learning_rate": 4.452469531751123e-06, "loss": 0.837, "step": 12640 }, { "epoch": 0.15410771086980365, "grad_norm": 2.478301877896995, "learning_rate": 4.452148813341886e-06, "loss": 0.7635, "step": 12645 }, { "epoch": 0.15416864709395148, "grad_norm": 2.338189440565043, "learning_rate": 4.451828094932649e-06, "loss": 0.8495, "step": 12650 }, { "epoch": 0.15422958331809927, "grad_norm": 2.454469011082537, "learning_rate": 4.451507376523413e-06, "loss": 0.8319, "step": 12655 }, { "epoch": 0.1542905195422471, "grad_norm": 2.1958089493527804, "learning_rate": 4.451186658114176e-06, "loss": 0.825, "step": 12660 }, { "epoch": 0.1543514557663949, "grad_norm": 2.7540386182050764, "learning_rate": 4.450865939704939e-06, "loss": 0.7952, "step": 12665 }, { "epoch": 0.15441239199054269, "grad_norm": 2.283029039600286, "learning_rate": 4.450545221295703e-06, "loss": 0.8974, "step": 12670 }, { "epoch": 0.1544733282146905, "grad_norm": 2.409838281353191, "learning_rate": 4.450224502886466e-06, "loss": 0.7274, "step": 12675 }, { "epoch": 0.1545342644388383, "grad_norm": 2.218508657038975, "learning_rate": 4.449903784477229e-06, "loss": 0.8872, "step": 12680 }, { "epoch": 0.15459520066298613, "grad_norm": 2.7095333511046884, "learning_rate": 4.449583066067993e-06, "loss": 0.9329, "step": 12685 }, { "epoch": 0.15465613688713392, "grad_norm": 2.893352786663448, "learning_rate": 4.449262347658756e-06, "loss": 0.8604, "step": 12690 }, { "epoch": 0.15471707311128174, "grad_norm": 2.7279526095539524, "learning_rate": 4.448941629249519e-06, "loss": 0.805, "step": 12695 }, { "epoch": 0.15477800933542954, "grad_norm": 2.6760378408992285, "learning_rate": 4.448620910840283e-06, "loss": 0.815, "step": 12700 }, { "epoch": 0.15483894555957733, "grad_norm": 4.262661923368921, "learning_rate": 4.448300192431046e-06, "loss": 0.8355, "step": 12705 }, { "epoch": 0.15489988178372516, "grad_norm": 4.873685552348894, "learning_rate": 4.447979474021809e-06, "loss": 0.8166, "step": 12710 }, { "epoch": 0.15496081800787295, "grad_norm": 2.162458845115357, "learning_rate": 4.447658755612573e-06, "loss": 0.8199, "step": 12715 }, { "epoch": 0.15502175423202078, "grad_norm": 1.9686941276733994, "learning_rate": 4.447338037203336e-06, "loss": 0.8237, "step": 12720 }, { "epoch": 0.15508269045616857, "grad_norm": 2.7909880207048534, "learning_rate": 4.447017318794099e-06, "loss": 0.8838, "step": 12725 }, { "epoch": 0.1551436266803164, "grad_norm": 2.3495699128293346, "learning_rate": 4.446696600384863e-06, "loss": 0.7723, "step": 12730 }, { "epoch": 0.1552045629044642, "grad_norm": 2.3317818010160853, "learning_rate": 4.446375881975626e-06, "loss": 0.8117, "step": 12735 }, { "epoch": 0.15526549912861198, "grad_norm": 3.1394174927367793, "learning_rate": 4.446055163566389e-06, "loss": 0.8828, "step": 12740 }, { "epoch": 0.1553264353527598, "grad_norm": 3.007754684351662, "learning_rate": 4.4457344451571525e-06, "loss": 0.8281, "step": 12745 }, { "epoch": 0.1553873715769076, "grad_norm": 2.2721186604875836, "learning_rate": 4.4454137267479155e-06, "loss": 0.8144, "step": 12750 }, { "epoch": 0.15544830780105542, "grad_norm": 2.2980337349247058, "learning_rate": 4.4450930083386786e-06, "loss": 0.8712, "step": 12755 }, { "epoch": 0.15550924402520322, "grad_norm": 2.5681638582881305, "learning_rate": 4.4447722899294424e-06, "loss": 0.801, "step": 12760 }, { "epoch": 0.15557018024935104, "grad_norm": 2.35871661977661, "learning_rate": 4.4444515715202054e-06, "loss": 0.8404, "step": 12765 }, { "epoch": 0.15563111647349884, "grad_norm": 2.3834954804837136, "learning_rate": 4.444130853110969e-06, "loss": 0.7228, "step": 12770 }, { "epoch": 0.15569205269764663, "grad_norm": 2.105341592147178, "learning_rate": 4.443810134701732e-06, "loss": 0.8179, "step": 12775 }, { "epoch": 0.15575298892179446, "grad_norm": 2.7704407074186794, "learning_rate": 4.443489416292495e-06, "loss": 0.8153, "step": 12780 }, { "epoch": 0.15581392514594225, "grad_norm": 2.6995603492518447, "learning_rate": 4.443168697883259e-06, "loss": 0.7862, "step": 12785 }, { "epoch": 0.15587486137009007, "grad_norm": 2.75037439078628, "learning_rate": 4.442847979474022e-06, "loss": 0.838, "step": 12790 }, { "epoch": 0.15593579759423787, "grad_norm": 3.9208816613513986, "learning_rate": 4.442527261064786e-06, "loss": 0.8132, "step": 12795 }, { "epoch": 0.1559967338183857, "grad_norm": 2.4421935158587234, "learning_rate": 4.442206542655549e-06, "loss": 0.7973, "step": 12800 }, { "epoch": 0.1560576700425335, "grad_norm": 2.099990193984415, "learning_rate": 4.441885824246312e-06, "loss": 0.8609, "step": 12805 }, { "epoch": 0.15611860626668128, "grad_norm": 2.4267111317151344, "learning_rate": 4.441565105837076e-06, "loss": 0.8387, "step": 12810 }, { "epoch": 0.1561795424908291, "grad_norm": 2.6653926011090086, "learning_rate": 4.441244387427839e-06, "loss": 0.8064, "step": 12815 }, { "epoch": 0.1562404787149769, "grad_norm": 2.601584560043188, "learning_rate": 4.440923669018602e-06, "loss": 0.7816, "step": 12820 }, { "epoch": 0.15630141493912472, "grad_norm": 2.312921520823042, "learning_rate": 4.440602950609365e-06, "loss": 0.8007, "step": 12825 }, { "epoch": 0.15636235116327252, "grad_norm": 2.9431976362625556, "learning_rate": 4.440282232200129e-06, "loss": 0.788, "step": 12830 }, { "epoch": 0.1564232873874203, "grad_norm": 2.775698085064821, "learning_rate": 4.439961513790892e-06, "loss": 0.8323, "step": 12835 }, { "epoch": 0.15648422361156814, "grad_norm": 3.530548406619051, "learning_rate": 4.439640795381655e-06, "loss": 0.8191, "step": 12840 }, { "epoch": 0.15654515983571593, "grad_norm": 2.165565950936691, "learning_rate": 4.439320076972419e-06, "loss": 0.7959, "step": 12845 }, { "epoch": 0.15660609605986375, "grad_norm": 2.4514361614084126, "learning_rate": 4.438999358563182e-06, "loss": 0.7864, "step": 12850 }, { "epoch": 0.15666703228401155, "grad_norm": 2.889886753418441, "learning_rate": 4.438678640153945e-06, "loss": 0.8121, "step": 12855 }, { "epoch": 0.15672796850815937, "grad_norm": 4.4239053059607745, "learning_rate": 4.438357921744709e-06, "loss": 0.8108, "step": 12860 }, { "epoch": 0.15678890473230717, "grad_norm": 3.050916968163432, "learning_rate": 4.438037203335472e-06, "loss": 0.8459, "step": 12865 }, { "epoch": 0.15684984095645496, "grad_norm": 2.5185908435184445, "learning_rate": 4.437716484926235e-06, "loss": 0.7743, "step": 12870 }, { "epoch": 0.15691077718060278, "grad_norm": 2.7711554804460956, "learning_rate": 4.437395766516999e-06, "loss": 0.795, "step": 12875 }, { "epoch": 0.15697171340475058, "grad_norm": 3.3534442016127106, "learning_rate": 4.437075048107762e-06, "loss": 0.8139, "step": 12880 }, { "epoch": 0.1570326496288984, "grad_norm": 3.1155482926991986, "learning_rate": 4.436754329698525e-06, "loss": 0.7952, "step": 12885 }, { "epoch": 0.1570935858530462, "grad_norm": 2.7566836059612982, "learning_rate": 4.4364336112892885e-06, "loss": 0.8296, "step": 12890 }, { "epoch": 0.15715452207719402, "grad_norm": 2.4048478085199405, "learning_rate": 4.4361128928800515e-06, "loss": 0.8519, "step": 12895 }, { "epoch": 0.15721545830134181, "grad_norm": 2.71531171368877, "learning_rate": 4.4357921744708145e-06, "loss": 0.7961, "step": 12900 }, { "epoch": 0.1572763945254896, "grad_norm": 3.400187596591222, "learning_rate": 4.435471456061578e-06, "loss": 0.8846, "step": 12905 }, { "epoch": 0.15733733074963743, "grad_norm": 2.4086769885266364, "learning_rate": 4.435150737652341e-06, "loss": 0.8391, "step": 12910 }, { "epoch": 0.15739826697378523, "grad_norm": 2.503904901497808, "learning_rate": 4.434830019243105e-06, "loss": 0.8026, "step": 12915 }, { "epoch": 0.15745920319793305, "grad_norm": 2.512187085309515, "learning_rate": 4.434509300833868e-06, "loss": 0.861, "step": 12920 }, { "epoch": 0.15752013942208085, "grad_norm": 2.4785785344790927, "learning_rate": 4.434188582424631e-06, "loss": 0.8926, "step": 12925 }, { "epoch": 0.15758107564622867, "grad_norm": 2.654299489784575, "learning_rate": 4.433867864015395e-06, "loss": 0.7152, "step": 12930 }, { "epoch": 0.15764201187037646, "grad_norm": 2.697644512859286, "learning_rate": 4.433547145606158e-06, "loss": 0.781, "step": 12935 }, { "epoch": 0.15770294809452426, "grad_norm": 2.3855991727741936, "learning_rate": 4.433226427196922e-06, "loss": 0.7828, "step": 12940 }, { "epoch": 0.15776388431867208, "grad_norm": 2.3340276588974005, "learning_rate": 4.432905708787685e-06, "loss": 0.8543, "step": 12945 }, { "epoch": 0.15782482054281988, "grad_norm": 2.487894113092017, "learning_rate": 4.432584990378448e-06, "loss": 0.8582, "step": 12950 }, { "epoch": 0.1578857567669677, "grad_norm": 2.4378651564746097, "learning_rate": 4.432264271969212e-06, "loss": 0.8024, "step": 12955 }, { "epoch": 0.1579466929911155, "grad_norm": 2.9286179809052384, "learning_rate": 4.431943553559975e-06, "loss": 0.8101, "step": 12960 }, { "epoch": 0.15800762921526332, "grad_norm": 2.9947926464070402, "learning_rate": 4.431622835150738e-06, "loss": 0.7992, "step": 12965 }, { "epoch": 0.1580685654394111, "grad_norm": 2.641454934153168, "learning_rate": 4.431302116741502e-06, "loss": 0.8194, "step": 12970 }, { "epoch": 0.1581295016635589, "grad_norm": 2.3601594851647305, "learning_rate": 4.430981398332265e-06, "loss": 0.8716, "step": 12975 }, { "epoch": 0.15819043788770673, "grad_norm": 3.183916153552578, "learning_rate": 4.430660679923028e-06, "loss": 0.8142, "step": 12980 }, { "epoch": 0.15825137411185453, "grad_norm": 2.4808559575098466, "learning_rate": 4.430339961513791e-06, "loss": 0.7918, "step": 12985 }, { "epoch": 0.15831231033600235, "grad_norm": 2.632264135154112, "learning_rate": 4.430019243104555e-06, "loss": 0.84, "step": 12990 }, { "epoch": 0.15837324656015014, "grad_norm": 3.256562902100702, "learning_rate": 4.429698524695318e-06, "loss": 0.8792, "step": 12995 }, { "epoch": 0.15843418278429797, "grad_norm": 2.253863192444847, "learning_rate": 4.429377806286081e-06, "loss": 0.7538, "step": 13000 }, { "epoch": 0.15849511900844576, "grad_norm": 2.536817356723006, "learning_rate": 4.429057087876845e-06, "loss": 0.8555, "step": 13005 }, { "epoch": 0.15855605523259356, "grad_norm": 3.038661245029888, "learning_rate": 4.428736369467608e-06, "loss": 0.8024, "step": 13010 }, { "epoch": 0.15861699145674138, "grad_norm": 2.7740383419612957, "learning_rate": 4.428415651058371e-06, "loss": 0.7589, "step": 13015 }, { "epoch": 0.15867792768088917, "grad_norm": 1.9073027874241555, "learning_rate": 4.4280949326491345e-06, "loss": 0.8524, "step": 13020 }, { "epoch": 0.158738863905037, "grad_norm": 2.736845697125748, "learning_rate": 4.4277742142398976e-06, "loss": 0.9135, "step": 13025 }, { "epoch": 0.1587998001291848, "grad_norm": 2.44043564035735, "learning_rate": 4.427453495830661e-06, "loss": 0.7878, "step": 13030 }, { "epoch": 0.15886073635333262, "grad_norm": 2.641954418307011, "learning_rate": 4.4271327774214244e-06, "loss": 0.7941, "step": 13035 }, { "epoch": 0.1589216725774804, "grad_norm": 2.3996487217539464, "learning_rate": 4.4268120590121875e-06, "loss": 0.8165, "step": 13040 }, { "epoch": 0.1589826088016282, "grad_norm": 3.232997265387044, "learning_rate": 4.4264913406029505e-06, "loss": 0.7965, "step": 13045 }, { "epoch": 0.15904354502577603, "grad_norm": 2.314320036136368, "learning_rate": 4.426170622193714e-06, "loss": 0.8937, "step": 13050 }, { "epoch": 0.15910448124992382, "grad_norm": 2.318170386931763, "learning_rate": 4.425849903784477e-06, "loss": 0.8611, "step": 13055 }, { "epoch": 0.15916541747407165, "grad_norm": 2.739972286555444, "learning_rate": 4.42552918537524e-06, "loss": 0.7598, "step": 13060 }, { "epoch": 0.15922635369821944, "grad_norm": 2.56364248990887, "learning_rate": 4.425208466966004e-06, "loss": 0.8375, "step": 13065 }, { "epoch": 0.15928728992236724, "grad_norm": 2.9213976317060104, "learning_rate": 4.424887748556767e-06, "loss": 0.8099, "step": 13070 }, { "epoch": 0.15934822614651506, "grad_norm": 3.9668167008645163, "learning_rate": 4.424567030147531e-06, "loss": 0.8829, "step": 13075 }, { "epoch": 0.15940916237066285, "grad_norm": 2.202562410525413, "learning_rate": 4.424246311738294e-06, "loss": 0.7708, "step": 13080 }, { "epoch": 0.15947009859481068, "grad_norm": 2.796557116897562, "learning_rate": 4.423925593329058e-06, "loss": 0.873, "step": 13085 }, { "epoch": 0.15953103481895847, "grad_norm": 2.5393025987164153, "learning_rate": 4.423604874919821e-06, "loss": 0.8816, "step": 13090 }, { "epoch": 0.1595919710431063, "grad_norm": 2.864286396885171, "learning_rate": 4.423284156510584e-06, "loss": 0.845, "step": 13095 }, { "epoch": 0.1596529072672541, "grad_norm": 2.332135520713914, "learning_rate": 4.422963438101348e-06, "loss": 0.8052, "step": 13100 }, { "epoch": 0.15971384349140189, "grad_norm": 2.5901918151459142, "learning_rate": 4.422642719692111e-06, "loss": 0.8795, "step": 13105 }, { "epoch": 0.1597747797155497, "grad_norm": 2.5425594004341905, "learning_rate": 4.422322001282874e-06, "loss": 0.9179, "step": 13110 }, { "epoch": 0.1598357159396975, "grad_norm": 2.324709046470209, "learning_rate": 4.422001282873638e-06, "loss": 0.8557, "step": 13115 }, { "epoch": 0.15989665216384533, "grad_norm": 3.2804272868894864, "learning_rate": 4.421680564464401e-06, "loss": 0.9061, "step": 13120 }, { "epoch": 0.15995758838799312, "grad_norm": 2.4806954489447937, "learning_rate": 4.421359846055164e-06, "loss": 0.8251, "step": 13125 }, { "epoch": 0.16001852461214094, "grad_norm": 3.0109455839536587, "learning_rate": 4.421039127645928e-06, "loss": 0.752, "step": 13130 }, { "epoch": 0.16007946083628874, "grad_norm": 2.6479851480776446, "learning_rate": 4.420718409236691e-06, "loss": 0.8281, "step": 13135 }, { "epoch": 0.16014039706043653, "grad_norm": 2.338423248422472, "learning_rate": 4.420397690827454e-06, "loss": 0.8449, "step": 13140 }, { "epoch": 0.16020133328458436, "grad_norm": 3.1296177759852535, "learning_rate": 4.420076972418218e-06, "loss": 0.7979, "step": 13145 }, { "epoch": 0.16026226950873215, "grad_norm": 2.4568350638526217, "learning_rate": 4.419756254008981e-06, "loss": 0.885, "step": 13150 }, { "epoch": 0.16032320573287998, "grad_norm": 2.7171825522469732, "learning_rate": 4.419435535599744e-06, "loss": 0.8732, "step": 13155 }, { "epoch": 0.16038414195702777, "grad_norm": 3.199450373424602, "learning_rate": 4.419114817190507e-06, "loss": 0.8054, "step": 13160 }, { "epoch": 0.1604450781811756, "grad_norm": 2.217038634610919, "learning_rate": 4.4187940987812705e-06, "loss": 0.9314, "step": 13165 }, { "epoch": 0.1605060144053234, "grad_norm": 2.3133206436410303, "learning_rate": 4.4184733803720335e-06, "loss": 0.8106, "step": 13170 }, { "epoch": 0.16056695062947118, "grad_norm": 2.718196382227275, "learning_rate": 4.4181526619627965e-06, "loss": 0.8363, "step": 13175 }, { "epoch": 0.160627886853619, "grad_norm": 3.1847848508351535, "learning_rate": 4.41783194355356e-06, "loss": 0.8707, "step": 13180 }, { "epoch": 0.1606888230777668, "grad_norm": 3.912646817477765, "learning_rate": 4.417511225144323e-06, "loss": 0.8292, "step": 13185 }, { "epoch": 0.16074975930191462, "grad_norm": 2.1978140508127715, "learning_rate": 4.4171905067350864e-06, "loss": 0.8194, "step": 13190 }, { "epoch": 0.16081069552606242, "grad_norm": 2.7801957195121925, "learning_rate": 4.41686978832585e-06, "loss": 0.8165, "step": 13195 }, { "epoch": 0.16087163175021024, "grad_norm": 2.6770132139064176, "learning_rate": 4.416549069916613e-06, "loss": 0.8294, "step": 13200 }, { "epoch": 0.16093256797435804, "grad_norm": 2.1473976989316648, "learning_rate": 4.416228351507376e-06, "loss": 0.783, "step": 13205 }, { "epoch": 0.16099350419850583, "grad_norm": 3.0243508387776723, "learning_rate": 4.41590763309814e-06, "loss": 0.8276, "step": 13210 }, { "epoch": 0.16105444042265366, "grad_norm": 2.367842425254505, "learning_rate": 4.415586914688903e-06, "loss": 0.7894, "step": 13215 }, { "epoch": 0.16111537664680145, "grad_norm": 4.105536562755503, "learning_rate": 4.415266196279667e-06, "loss": 0.864, "step": 13220 }, { "epoch": 0.16117631287094927, "grad_norm": 2.695864642596959, "learning_rate": 4.41494547787043e-06, "loss": 0.8133, "step": 13225 }, { "epoch": 0.16123724909509707, "grad_norm": 3.775461658123507, "learning_rate": 4.414624759461193e-06, "loss": 0.7864, "step": 13230 }, { "epoch": 0.1612981853192449, "grad_norm": 2.5705418406144678, "learning_rate": 4.414304041051957e-06, "loss": 0.8313, "step": 13235 }, { "epoch": 0.1613591215433927, "grad_norm": 2.9087765125119054, "learning_rate": 4.41398332264272e-06, "loss": 0.8349, "step": 13240 }, { "epoch": 0.16142005776754048, "grad_norm": 2.370104723453178, "learning_rate": 4.413662604233484e-06, "loss": 0.8077, "step": 13245 }, { "epoch": 0.1614809939916883, "grad_norm": 2.7733008977470184, "learning_rate": 4.413341885824247e-06, "loss": 0.8724, "step": 13250 }, { "epoch": 0.1615419302158361, "grad_norm": 3.1040617281120695, "learning_rate": 4.41302116741501e-06, "loss": 0.8237, "step": 13255 }, { "epoch": 0.16160286643998392, "grad_norm": 3.029679351917366, "learning_rate": 4.412700449005774e-06, "loss": 0.8612, "step": 13260 }, { "epoch": 0.16166380266413172, "grad_norm": 2.847388670777644, "learning_rate": 4.412379730596537e-06, "loss": 0.801, "step": 13265 }, { "epoch": 0.16172473888827954, "grad_norm": 2.78960503473119, "learning_rate": 4.4120590121873e-06, "loss": 0.7804, "step": 13270 }, { "epoch": 0.16178567511242734, "grad_norm": 2.5488733965458126, "learning_rate": 4.411738293778064e-06, "loss": 0.8275, "step": 13275 }, { "epoch": 0.16184661133657513, "grad_norm": 2.720271179268038, "learning_rate": 4.411417575368827e-06, "loss": 0.8225, "step": 13280 }, { "epoch": 0.16190754756072295, "grad_norm": 3.236665894530376, "learning_rate": 4.41109685695959e-06, "loss": 0.9033, "step": 13285 }, { "epoch": 0.16196848378487075, "grad_norm": 4.755591465162074, "learning_rate": 4.4107761385503535e-06, "loss": 0.7517, "step": 13290 }, { "epoch": 0.16202942000901857, "grad_norm": 2.273548325478277, "learning_rate": 4.4104554201411166e-06, "loss": 0.8123, "step": 13295 }, { "epoch": 0.16209035623316637, "grad_norm": 2.4026593345089693, "learning_rate": 4.41013470173188e-06, "loss": 0.8262, "step": 13300 }, { "epoch": 0.16215129245731416, "grad_norm": 2.273641155398537, "learning_rate": 4.4098139833226434e-06, "loss": 0.8474, "step": 13305 }, { "epoch": 0.16221222868146198, "grad_norm": 3.256090010609184, "learning_rate": 4.4094932649134065e-06, "loss": 0.862, "step": 13310 }, { "epoch": 0.16227316490560978, "grad_norm": 3.1802677849913876, "learning_rate": 4.4091725465041695e-06, "loss": 0.8557, "step": 13315 }, { "epoch": 0.1623341011297576, "grad_norm": 3.0635056686963025, "learning_rate": 4.408851828094933e-06, "loss": 0.9077, "step": 13320 }, { "epoch": 0.1623950373539054, "grad_norm": 2.2064002714453816, "learning_rate": 4.408531109685696e-06, "loss": 0.8354, "step": 13325 }, { "epoch": 0.16245597357805322, "grad_norm": 2.6787828388175363, "learning_rate": 4.408210391276459e-06, "loss": 0.8875, "step": 13330 }, { "epoch": 0.16251690980220102, "grad_norm": 2.6028408946510027, "learning_rate": 4.407889672867222e-06, "loss": 0.8148, "step": 13335 }, { "epoch": 0.1625778460263488, "grad_norm": 2.2534683284621524, "learning_rate": 4.407568954457986e-06, "loss": 0.8075, "step": 13340 }, { "epoch": 0.16263878225049663, "grad_norm": 2.62470564623886, "learning_rate": 4.407248236048749e-06, "loss": 0.8483, "step": 13345 }, { "epoch": 0.16269971847464443, "grad_norm": 3.1240794001627754, "learning_rate": 4.406927517639512e-06, "loss": 0.7722, "step": 13350 }, { "epoch": 0.16276065469879225, "grad_norm": 2.9015131354250006, "learning_rate": 4.406606799230276e-06, "loss": 0.7754, "step": 13355 }, { "epoch": 0.16282159092294005, "grad_norm": 2.483260800996564, "learning_rate": 4.406286080821039e-06, "loss": 0.856, "step": 13360 }, { "epoch": 0.16288252714708787, "grad_norm": 2.3873682890512096, "learning_rate": 4.405965362411803e-06, "loss": 0.8037, "step": 13365 }, { "epoch": 0.16294346337123566, "grad_norm": 2.2936460980502225, "learning_rate": 4.405644644002566e-06, "loss": 0.8141, "step": 13370 }, { "epoch": 0.16300439959538346, "grad_norm": 2.2198716225602997, "learning_rate": 4.405323925593329e-06, "loss": 0.8035, "step": 13375 }, { "epoch": 0.16306533581953128, "grad_norm": 2.387063337316443, "learning_rate": 4.405003207184093e-06, "loss": 0.8484, "step": 13380 }, { "epoch": 0.16312627204367908, "grad_norm": 2.3595996870428615, "learning_rate": 4.404682488774856e-06, "loss": 0.7996, "step": 13385 }, { "epoch": 0.1631872082678269, "grad_norm": 2.8909210831451997, "learning_rate": 4.40436177036562e-06, "loss": 0.7764, "step": 13390 }, { "epoch": 0.1632481444919747, "grad_norm": 2.511691948952491, "learning_rate": 4.404041051956383e-06, "loss": 0.8117, "step": 13395 }, { "epoch": 0.16330908071612252, "grad_norm": 3.2896862617687592, "learning_rate": 4.403720333547146e-06, "loss": 0.7953, "step": 13400 }, { "epoch": 0.1633700169402703, "grad_norm": 2.7709682447125012, "learning_rate": 4.40339961513791e-06, "loss": 0.8362, "step": 13405 }, { "epoch": 0.1634309531644181, "grad_norm": 2.532626575895731, "learning_rate": 4.403078896728673e-06, "loss": 0.8217, "step": 13410 }, { "epoch": 0.16349188938856593, "grad_norm": 2.2009295496868853, "learning_rate": 4.402758178319436e-06, "loss": 0.8782, "step": 13415 }, { "epoch": 0.16355282561271373, "grad_norm": 2.4920338204139663, "learning_rate": 4.4024374599102e-06, "loss": 0.7933, "step": 13420 }, { "epoch": 0.16361376183686155, "grad_norm": 2.464502786370415, "learning_rate": 4.402116741500963e-06, "loss": 0.8787, "step": 13425 }, { "epoch": 0.16367469806100934, "grad_norm": 2.0740415408913786, "learning_rate": 4.401796023091726e-06, "loss": 0.826, "step": 13430 }, { "epoch": 0.16373563428515717, "grad_norm": 2.7581681016307216, "learning_rate": 4.4014753046824895e-06, "loss": 0.8666, "step": 13435 }, { "epoch": 0.16379657050930496, "grad_norm": 3.209510175722561, "learning_rate": 4.4011545862732525e-06, "loss": 0.7627, "step": 13440 }, { "epoch": 0.16385750673345276, "grad_norm": 2.6294239934982273, "learning_rate": 4.4008338678640155e-06, "loss": 0.8045, "step": 13445 }, { "epoch": 0.16391844295760058, "grad_norm": 1.9604456310092258, "learning_rate": 4.400513149454779e-06, "loss": 0.8365, "step": 13450 }, { "epoch": 0.16397937918174837, "grad_norm": 2.486844144273644, "learning_rate": 4.400192431045542e-06, "loss": 0.8764, "step": 13455 }, { "epoch": 0.1640403154058962, "grad_norm": 2.2562264968982393, "learning_rate": 4.3998717126363054e-06, "loss": 0.7613, "step": 13460 }, { "epoch": 0.164101251630044, "grad_norm": 2.333537212069734, "learning_rate": 4.399550994227069e-06, "loss": 0.8159, "step": 13465 }, { "epoch": 0.16416218785419182, "grad_norm": 2.387111326417385, "learning_rate": 4.399230275817832e-06, "loss": 0.8379, "step": 13470 }, { "epoch": 0.1642231240783396, "grad_norm": 3.0004771270181, "learning_rate": 4.398909557408595e-06, "loss": 0.7783, "step": 13475 }, { "epoch": 0.1642840603024874, "grad_norm": 2.1596495377844516, "learning_rate": 4.398588838999359e-06, "loss": 0.8576, "step": 13480 }, { "epoch": 0.16434499652663523, "grad_norm": 2.7439137707258734, "learning_rate": 4.398268120590122e-06, "loss": 0.8103, "step": 13485 }, { "epoch": 0.16440593275078302, "grad_norm": 2.4598611147155824, "learning_rate": 4.397947402180885e-06, "loss": 0.8492, "step": 13490 }, { "epoch": 0.16446686897493085, "grad_norm": 2.3242365796521622, "learning_rate": 4.397626683771648e-06, "loss": 0.7604, "step": 13495 }, { "epoch": 0.16452780519907864, "grad_norm": 2.374613047990915, "learning_rate": 4.397305965362412e-06, "loss": 0.8433, "step": 13500 }, { "epoch": 0.16458874142322646, "grad_norm": 2.410422647125352, "learning_rate": 4.396985246953175e-06, "loss": 0.8374, "step": 13505 }, { "epoch": 0.16464967764737426, "grad_norm": 2.43073514252389, "learning_rate": 4.396664528543938e-06, "loss": 0.8346, "step": 13510 }, { "epoch": 0.16471061387152205, "grad_norm": 2.782833771889508, "learning_rate": 4.396343810134702e-06, "loss": 0.7274, "step": 13515 }, { "epoch": 0.16477155009566988, "grad_norm": 2.8913620698826175, "learning_rate": 4.396023091725465e-06, "loss": 0.8307, "step": 13520 }, { "epoch": 0.16483248631981767, "grad_norm": 2.713540965137213, "learning_rate": 4.395702373316229e-06, "loss": 0.7878, "step": 13525 }, { "epoch": 0.1648934225439655, "grad_norm": 2.598993833365645, "learning_rate": 4.395381654906992e-06, "loss": 0.8311, "step": 13530 }, { "epoch": 0.1649543587681133, "grad_norm": 2.3204106725558455, "learning_rate": 4.395060936497755e-06, "loss": 0.7699, "step": 13535 }, { "epoch": 0.16501529499226109, "grad_norm": 2.190887141681019, "learning_rate": 4.394740218088519e-06, "loss": 0.7848, "step": 13540 }, { "epoch": 0.1650762312164089, "grad_norm": 3.3556934882093836, "learning_rate": 4.394419499679282e-06, "loss": 0.9254, "step": 13545 }, { "epoch": 0.1651371674405567, "grad_norm": 2.8894128194517372, "learning_rate": 4.394098781270046e-06, "loss": 0.8185, "step": 13550 }, { "epoch": 0.16519810366470453, "grad_norm": 2.3070697590219154, "learning_rate": 4.393778062860809e-06, "loss": 0.8013, "step": 13555 }, { "epoch": 0.16525903988885232, "grad_norm": 4.464537657339907, "learning_rate": 4.3934573444515725e-06, "loss": 0.7868, "step": 13560 }, { "epoch": 0.16531997611300014, "grad_norm": 2.9629991203102333, "learning_rate": 4.3931366260423356e-06, "loss": 0.7603, "step": 13565 }, { "epoch": 0.16538091233714794, "grad_norm": 2.605868528314545, "learning_rate": 4.3928159076330986e-06, "loss": 0.8621, "step": 13570 }, { "epoch": 0.16544184856129573, "grad_norm": 3.5010368286085356, "learning_rate": 4.392495189223862e-06, "loss": 0.7867, "step": 13575 }, { "epoch": 0.16550278478544356, "grad_norm": 4.169705579614332, "learning_rate": 4.3921744708146255e-06, "loss": 0.8717, "step": 13580 }, { "epoch": 0.16556372100959135, "grad_norm": 2.3772949704374318, "learning_rate": 4.3918537524053885e-06, "loss": 0.7963, "step": 13585 }, { "epoch": 0.16562465723373918, "grad_norm": 2.6289264083029367, "learning_rate": 4.3915330339961515e-06, "loss": 0.9811, "step": 13590 }, { "epoch": 0.16568559345788697, "grad_norm": 2.0400548874474866, "learning_rate": 4.391212315586915e-06, "loss": 0.8335, "step": 13595 }, { "epoch": 0.1657465296820348, "grad_norm": 2.3953775277795586, "learning_rate": 4.390891597177678e-06, "loss": 0.8367, "step": 13600 }, { "epoch": 0.1658074659061826, "grad_norm": 3.4839408542969563, "learning_rate": 4.390570878768441e-06, "loss": 0.8449, "step": 13605 }, { "epoch": 0.16586840213033038, "grad_norm": 2.954663756889377, "learning_rate": 4.390250160359205e-06, "loss": 0.8394, "step": 13610 }, { "epoch": 0.1659293383544782, "grad_norm": 3.0080805591766873, "learning_rate": 4.389929441949968e-06, "loss": 0.9162, "step": 13615 }, { "epoch": 0.165990274578626, "grad_norm": 2.2804830643386147, "learning_rate": 4.389608723540731e-06, "loss": 0.8025, "step": 13620 }, { "epoch": 0.16605121080277382, "grad_norm": 2.235189091498331, "learning_rate": 4.389288005131495e-06, "loss": 0.8546, "step": 13625 }, { "epoch": 0.16611214702692162, "grad_norm": 2.74984837569289, "learning_rate": 4.388967286722258e-06, "loss": 0.8347, "step": 13630 }, { "epoch": 0.16617308325106944, "grad_norm": 4.889439353574321, "learning_rate": 4.388646568313021e-06, "loss": 0.8449, "step": 13635 }, { "epoch": 0.16623401947521724, "grad_norm": 2.406779147638753, "learning_rate": 4.388325849903785e-06, "loss": 0.8554, "step": 13640 }, { "epoch": 0.16629495569936503, "grad_norm": 2.159671829941558, "learning_rate": 4.388005131494548e-06, "loss": 0.8867, "step": 13645 }, { "epoch": 0.16635589192351286, "grad_norm": 2.681578015044193, "learning_rate": 4.387684413085311e-06, "loss": 0.7843, "step": 13650 }, { "epoch": 0.16641682814766065, "grad_norm": 3.364864436015627, "learning_rate": 4.387363694676075e-06, "loss": 0.7981, "step": 13655 }, { "epoch": 0.16647776437180847, "grad_norm": 2.7747626000760808, "learning_rate": 4.387042976266838e-06, "loss": 0.857, "step": 13660 }, { "epoch": 0.16653870059595627, "grad_norm": 2.5834532091363944, "learning_rate": 4.386722257857601e-06, "loss": 0.8104, "step": 13665 }, { "epoch": 0.1665996368201041, "grad_norm": 2.0975403192642257, "learning_rate": 4.386401539448365e-06, "loss": 0.8093, "step": 13670 }, { "epoch": 0.1666605730442519, "grad_norm": 3.6139912063724, "learning_rate": 4.386080821039128e-06, "loss": 0.8262, "step": 13675 }, { "epoch": 0.16672150926839968, "grad_norm": 2.4112509010430383, "learning_rate": 4.385760102629891e-06, "loss": 0.818, "step": 13680 }, { "epoch": 0.1667824454925475, "grad_norm": 2.321691742597194, "learning_rate": 4.385439384220655e-06, "loss": 0.7959, "step": 13685 }, { "epoch": 0.1668433817166953, "grad_norm": 2.7406863876857077, "learning_rate": 4.385118665811418e-06, "loss": 0.8241, "step": 13690 }, { "epoch": 0.16690431794084312, "grad_norm": 2.6622628744398193, "learning_rate": 4.384797947402182e-06, "loss": 0.8125, "step": 13695 }, { "epoch": 0.16696525416499092, "grad_norm": 2.3069997262605986, "learning_rate": 4.384477228992945e-06, "loss": 0.9305, "step": 13700 }, { "epoch": 0.16702619038913874, "grad_norm": 4.67992591302255, "learning_rate": 4.384156510583708e-06, "loss": 0.902, "step": 13705 }, { "epoch": 0.16708712661328654, "grad_norm": 2.806755827547964, "learning_rate": 4.3838357921744715e-06, "loss": 0.8845, "step": 13710 }, { "epoch": 0.16714806283743433, "grad_norm": 3.1062767035091117, "learning_rate": 4.3835150737652345e-06, "loss": 0.8931, "step": 13715 }, { "epoch": 0.16720899906158215, "grad_norm": 2.323879885473426, "learning_rate": 4.383194355355998e-06, "loss": 0.8335, "step": 13720 }, { "epoch": 0.16726993528572995, "grad_norm": 2.4170603434545077, "learning_rate": 4.382873636946761e-06, "loss": 0.8174, "step": 13725 }, { "epoch": 0.16733087150987777, "grad_norm": 2.7089070840019085, "learning_rate": 4.3825529185375244e-06, "loss": 0.8168, "step": 13730 }, { "epoch": 0.16739180773402557, "grad_norm": 2.3817828879757483, "learning_rate": 4.382232200128288e-06, "loss": 0.8633, "step": 13735 }, { "epoch": 0.1674527439581734, "grad_norm": 2.5502833199019133, "learning_rate": 4.381911481719051e-06, "loss": 0.8225, "step": 13740 }, { "epoch": 0.16751368018232118, "grad_norm": 2.63554756249381, "learning_rate": 4.381590763309814e-06, "loss": 0.8305, "step": 13745 }, { "epoch": 0.16757461640646898, "grad_norm": 2.2427905573177336, "learning_rate": 4.381270044900577e-06, "loss": 0.774, "step": 13750 }, { "epoch": 0.1676355526306168, "grad_norm": 3.1031663640233713, "learning_rate": 4.380949326491341e-06, "loss": 0.7521, "step": 13755 }, { "epoch": 0.1676964888547646, "grad_norm": 3.3958992950855578, "learning_rate": 4.380628608082104e-06, "loss": 0.8627, "step": 13760 }, { "epoch": 0.16775742507891242, "grad_norm": 3.244664969203454, "learning_rate": 4.380307889672867e-06, "loss": 0.9085, "step": 13765 }, { "epoch": 0.16781836130306022, "grad_norm": 2.550994752019137, "learning_rate": 4.379987171263631e-06, "loss": 0.8492, "step": 13770 }, { "epoch": 0.16787929752720804, "grad_norm": 2.4927807794275085, "learning_rate": 4.379666452854394e-06, "loss": 0.8518, "step": 13775 }, { "epoch": 0.16794023375135583, "grad_norm": 2.7509050237762964, "learning_rate": 4.379345734445157e-06, "loss": 0.89, "step": 13780 }, { "epoch": 0.16800116997550363, "grad_norm": 2.431161263925845, "learning_rate": 4.379025016035921e-06, "loss": 0.7746, "step": 13785 }, { "epoch": 0.16806210619965145, "grad_norm": 2.589786702650192, "learning_rate": 4.378704297626684e-06, "loss": 0.887, "step": 13790 }, { "epoch": 0.16812304242379925, "grad_norm": 2.5466506944610963, "learning_rate": 4.378383579217447e-06, "loss": 0.7676, "step": 13795 }, { "epoch": 0.16818397864794707, "grad_norm": 2.466048100872762, "learning_rate": 4.378062860808211e-06, "loss": 0.7928, "step": 13800 }, { "epoch": 0.16824491487209486, "grad_norm": 2.6570529405416146, "learning_rate": 4.377742142398974e-06, "loss": 0.8146, "step": 13805 }, { "epoch": 0.16830585109624266, "grad_norm": 2.567785770626387, "learning_rate": 4.377421423989737e-06, "loss": 0.8361, "step": 13810 }, { "epoch": 0.16836678732039048, "grad_norm": 2.316188208525241, "learning_rate": 4.377100705580501e-06, "loss": 0.8625, "step": 13815 }, { "epoch": 0.16842772354453828, "grad_norm": 2.6729119241464256, "learning_rate": 4.376779987171264e-06, "loss": 0.8306, "step": 13820 }, { "epoch": 0.1684886597686861, "grad_norm": 2.111344551886294, "learning_rate": 4.376459268762027e-06, "loss": 0.7829, "step": 13825 }, { "epoch": 0.1685495959928339, "grad_norm": 2.611249688473021, "learning_rate": 4.376138550352791e-06, "loss": 0.8311, "step": 13830 }, { "epoch": 0.16861053221698172, "grad_norm": 2.452420454016818, "learning_rate": 4.375817831943554e-06, "loss": 0.835, "step": 13835 }, { "epoch": 0.1686714684411295, "grad_norm": 2.6527514377021886, "learning_rate": 4.3754971135343176e-06, "loss": 0.8422, "step": 13840 }, { "epoch": 0.1687324046652773, "grad_norm": 2.6582636556145838, "learning_rate": 4.375176395125081e-06, "loss": 0.815, "step": 13845 }, { "epoch": 0.16879334088942513, "grad_norm": 3.0666798725955497, "learning_rate": 4.374855676715844e-06, "loss": 0.7913, "step": 13850 }, { "epoch": 0.16885427711357293, "grad_norm": 2.3355307486272903, "learning_rate": 4.3745349583066075e-06, "loss": 0.7481, "step": 13855 }, { "epoch": 0.16891521333772075, "grad_norm": 2.675609565668435, "learning_rate": 4.3742142398973705e-06, "loss": 0.8546, "step": 13860 }, { "epoch": 0.16897614956186854, "grad_norm": 2.9086756768635134, "learning_rate": 4.373893521488134e-06, "loss": 0.7201, "step": 13865 }, { "epoch": 0.16903708578601637, "grad_norm": 2.5740916351408902, "learning_rate": 4.373572803078897e-06, "loss": 0.855, "step": 13870 }, { "epoch": 0.16909802201016416, "grad_norm": 2.997405310044171, "learning_rate": 4.37325208466966e-06, "loss": 0.7677, "step": 13875 }, { "epoch": 0.16915895823431196, "grad_norm": 3.0376397979237133, "learning_rate": 4.372931366260424e-06, "loss": 0.8636, "step": 13880 }, { "epoch": 0.16921989445845978, "grad_norm": 37.839422435464236, "learning_rate": 4.372610647851187e-06, "loss": 0.9628, "step": 13885 }, { "epoch": 0.16928083068260757, "grad_norm": 2.5167844410436944, "learning_rate": 4.37228992944195e-06, "loss": 0.8397, "step": 13890 }, { "epoch": 0.1693417669067554, "grad_norm": 2.529877533786642, "learning_rate": 4.371969211032714e-06, "loss": 0.8238, "step": 13895 }, { "epoch": 0.1694027031309032, "grad_norm": 2.3952212732965585, "learning_rate": 4.371648492623477e-06, "loss": 0.7547, "step": 13900 }, { "epoch": 0.16946363935505102, "grad_norm": 3.8175168984951293, "learning_rate": 4.37132777421424e-06, "loss": 0.8818, "step": 13905 }, { "epoch": 0.1695245755791988, "grad_norm": 2.3265915225844265, "learning_rate": 4.371007055805004e-06, "loss": 0.8423, "step": 13910 }, { "epoch": 0.1695855118033466, "grad_norm": 6.166329526310238, "learning_rate": 4.370686337395767e-06, "loss": 0.7464, "step": 13915 }, { "epoch": 0.16964644802749443, "grad_norm": 2.321824483258232, "learning_rate": 4.37036561898653e-06, "loss": 0.8191, "step": 13920 }, { "epoch": 0.16970738425164222, "grad_norm": 2.7167042584212573, "learning_rate": 4.370044900577293e-06, "loss": 0.8188, "step": 13925 }, { "epoch": 0.16976832047579005, "grad_norm": 2.7022792518216305, "learning_rate": 4.369724182168057e-06, "loss": 0.8344, "step": 13930 }, { "epoch": 0.16982925669993784, "grad_norm": 3.1457972285770635, "learning_rate": 4.36940346375882e-06, "loss": 0.8142, "step": 13935 }, { "epoch": 0.16989019292408566, "grad_norm": 2.93021937459129, "learning_rate": 4.369082745349583e-06, "loss": 0.874, "step": 13940 }, { "epoch": 0.16995112914823346, "grad_norm": 2.7162372023848427, "learning_rate": 4.368762026940347e-06, "loss": 0.8139, "step": 13945 }, { "epoch": 0.17001206537238125, "grad_norm": 2.2477504538501276, "learning_rate": 4.36844130853111e-06, "loss": 0.8568, "step": 13950 }, { "epoch": 0.17007300159652908, "grad_norm": 2.2879439220756694, "learning_rate": 4.368120590121873e-06, "loss": 0.7249, "step": 13955 }, { "epoch": 0.17013393782067687, "grad_norm": 2.3055368295970484, "learning_rate": 4.367799871712637e-06, "loss": 0.8088, "step": 13960 }, { "epoch": 0.1701948740448247, "grad_norm": 2.471353634467188, "learning_rate": 4.3674791533034e-06, "loss": 0.8125, "step": 13965 }, { "epoch": 0.1702558102689725, "grad_norm": 2.63994467389613, "learning_rate": 4.367158434894163e-06, "loss": 0.7874, "step": 13970 }, { "epoch": 0.1703167464931203, "grad_norm": 2.76820284820381, "learning_rate": 4.366837716484927e-06, "loss": 0.9091, "step": 13975 }, { "epoch": 0.1703776827172681, "grad_norm": 2.6810162663235433, "learning_rate": 4.36651699807569e-06, "loss": 0.8117, "step": 13980 }, { "epoch": 0.1704386189414159, "grad_norm": 3.1021994542345017, "learning_rate": 4.366196279666453e-06, "loss": 0.7461, "step": 13985 }, { "epoch": 0.17049955516556373, "grad_norm": 3.1334322397142196, "learning_rate": 4.3658755612572165e-06, "loss": 0.84, "step": 13990 }, { "epoch": 0.17056049138971152, "grad_norm": 2.4947286327691174, "learning_rate": 4.3655548428479796e-06, "loss": 0.8639, "step": 13995 }, { "epoch": 0.17062142761385934, "grad_norm": 4.146283946564717, "learning_rate": 4.3652341244387434e-06, "loss": 0.8027, "step": 14000 }, { "epoch": 0.17068236383800714, "grad_norm": 2.5789887956843294, "learning_rate": 4.3649134060295064e-06, "loss": 0.7543, "step": 14005 }, { "epoch": 0.17074330006215496, "grad_norm": 3.2679498660482023, "learning_rate": 4.3645926876202695e-06, "loss": 0.7659, "step": 14010 }, { "epoch": 0.17080423628630276, "grad_norm": 2.3313589970021567, "learning_rate": 4.364271969211033e-06, "loss": 0.7857, "step": 14015 }, { "epoch": 0.17086517251045055, "grad_norm": 3.241190822206523, "learning_rate": 4.363951250801796e-06, "loss": 0.7768, "step": 14020 }, { "epoch": 0.17092610873459838, "grad_norm": 1.9621469729126102, "learning_rate": 4.36363053239256e-06, "loss": 0.7912, "step": 14025 }, { "epoch": 0.17098704495874617, "grad_norm": 2.503183519098635, "learning_rate": 4.363309813983323e-06, "loss": 0.8103, "step": 14030 }, { "epoch": 0.171047981182894, "grad_norm": 2.3918231448255662, "learning_rate": 4.362989095574086e-06, "loss": 0.7552, "step": 14035 }, { "epoch": 0.1711089174070418, "grad_norm": 2.3657257276096595, "learning_rate": 4.36266837716485e-06, "loss": 0.864, "step": 14040 }, { "epoch": 0.17116985363118958, "grad_norm": 1.9407215674964307, "learning_rate": 4.362347658755613e-06, "loss": 0.8222, "step": 14045 }, { "epoch": 0.1712307898553374, "grad_norm": 2.5922212710427632, "learning_rate": 4.362026940346376e-06, "loss": 0.8034, "step": 14050 }, { "epoch": 0.1712917260794852, "grad_norm": 2.0980645393587416, "learning_rate": 4.36170622193714e-06, "loss": 0.8692, "step": 14055 }, { "epoch": 0.17135266230363302, "grad_norm": 2.5125213468708294, "learning_rate": 4.361385503527903e-06, "loss": 0.8363, "step": 14060 }, { "epoch": 0.17141359852778082, "grad_norm": 2.3949989602732886, "learning_rate": 4.361064785118666e-06, "loss": 0.7547, "step": 14065 }, { "epoch": 0.17147453475192864, "grad_norm": 3.068308818601299, "learning_rate": 4.36074406670943e-06, "loss": 0.789, "step": 14070 }, { "epoch": 0.17153547097607644, "grad_norm": 2.638183310663367, "learning_rate": 4.360423348300193e-06, "loss": 0.7776, "step": 14075 }, { "epoch": 0.17159640720022423, "grad_norm": 2.365585809447112, "learning_rate": 4.360102629890956e-06, "loss": 0.7875, "step": 14080 }, { "epoch": 0.17165734342437206, "grad_norm": 2.699844279010565, "learning_rate": 4.359781911481719e-06, "loss": 0.8188, "step": 14085 }, { "epoch": 0.17171827964851985, "grad_norm": 2.4991572583730184, "learning_rate": 4.359461193072483e-06, "loss": 0.8438, "step": 14090 }, { "epoch": 0.17177921587266767, "grad_norm": 2.597320023579527, "learning_rate": 4.359140474663246e-06, "loss": 0.7654, "step": 14095 }, { "epoch": 0.17184015209681547, "grad_norm": 2.419420971305752, "learning_rate": 4.358819756254009e-06, "loss": 0.8277, "step": 14100 }, { "epoch": 0.1719010883209633, "grad_norm": 2.4268975041201926, "learning_rate": 4.358499037844773e-06, "loss": 0.8186, "step": 14105 }, { "epoch": 0.1719620245451111, "grad_norm": 2.5315936647146944, "learning_rate": 4.358178319435536e-06, "loss": 0.7402, "step": 14110 }, { "epoch": 0.17202296076925888, "grad_norm": 2.2404766253960235, "learning_rate": 4.357857601026299e-06, "loss": 0.8366, "step": 14115 }, { "epoch": 0.1720838969934067, "grad_norm": 2.7606148337925185, "learning_rate": 4.357536882617063e-06, "loss": 0.7587, "step": 14120 }, { "epoch": 0.1721448332175545, "grad_norm": 2.539104129758799, "learning_rate": 4.357216164207826e-06, "loss": 0.8267, "step": 14125 }, { "epoch": 0.17220576944170232, "grad_norm": 2.1460866977165622, "learning_rate": 4.356895445798589e-06, "loss": 0.8196, "step": 14130 }, { "epoch": 0.17226670566585012, "grad_norm": 2.5384228966913036, "learning_rate": 4.3565747273893525e-06, "loss": 0.8773, "step": 14135 }, { "epoch": 0.17232764188999794, "grad_norm": 3.427370636005628, "learning_rate": 4.3562540089801155e-06, "loss": 0.8582, "step": 14140 }, { "epoch": 0.17238857811414574, "grad_norm": 2.3136078036666903, "learning_rate": 4.355933290570879e-06, "loss": 0.8517, "step": 14145 }, { "epoch": 0.17244951433829353, "grad_norm": 2.278570990155896, "learning_rate": 4.355612572161642e-06, "loss": 0.7927, "step": 14150 }, { "epoch": 0.17251045056244135, "grad_norm": 3.53913195293507, "learning_rate": 4.355291853752405e-06, "loss": 0.8684, "step": 14155 }, { "epoch": 0.17257138678658915, "grad_norm": 2.472958811654536, "learning_rate": 4.354971135343169e-06, "loss": 0.827, "step": 14160 }, { "epoch": 0.17263232301073697, "grad_norm": 2.1939946615726758, "learning_rate": 4.354650416933932e-06, "loss": 0.8461, "step": 14165 }, { "epoch": 0.17269325923488477, "grad_norm": 2.932327782697273, "learning_rate": 4.354329698524696e-06, "loss": 0.7693, "step": 14170 }, { "epoch": 0.1727541954590326, "grad_norm": 2.417872685134013, "learning_rate": 4.354008980115459e-06, "loss": 0.819, "step": 14175 }, { "epoch": 0.17281513168318038, "grad_norm": 2.742124622597748, "learning_rate": 4.353688261706222e-06, "loss": 0.9031, "step": 14180 }, { "epoch": 0.17287606790732818, "grad_norm": 2.5133765968858537, "learning_rate": 4.353367543296986e-06, "loss": 0.8006, "step": 14185 }, { "epoch": 0.172937004131476, "grad_norm": 3.193596978752012, "learning_rate": 4.353046824887749e-06, "loss": 0.8635, "step": 14190 }, { "epoch": 0.1729979403556238, "grad_norm": 2.8796077178178483, "learning_rate": 4.352726106478512e-06, "loss": 0.842, "step": 14195 }, { "epoch": 0.17305887657977162, "grad_norm": 2.098797782116719, "learning_rate": 4.352405388069276e-06, "loss": 0.7889, "step": 14200 }, { "epoch": 0.17311981280391942, "grad_norm": 2.8924857476443213, "learning_rate": 4.352084669660039e-06, "loss": 0.8137, "step": 14205 }, { "epoch": 0.17318074902806724, "grad_norm": 3.909697204751309, "learning_rate": 4.351763951250802e-06, "loss": 0.8747, "step": 14210 }, { "epoch": 0.17324168525221503, "grad_norm": 2.530222037726758, "learning_rate": 4.351443232841566e-06, "loss": 0.8383, "step": 14215 }, { "epoch": 0.17330262147636283, "grad_norm": 2.509347442145136, "learning_rate": 4.351122514432329e-06, "loss": 0.8707, "step": 14220 }, { "epoch": 0.17336355770051065, "grad_norm": 2.178546005936529, "learning_rate": 4.350801796023092e-06, "loss": 0.7943, "step": 14225 }, { "epoch": 0.17342449392465845, "grad_norm": 2.821209920765655, "learning_rate": 4.350481077613856e-06, "loss": 0.84, "step": 14230 }, { "epoch": 0.17348543014880627, "grad_norm": 2.2385565063691355, "learning_rate": 4.350160359204619e-06, "loss": 0.745, "step": 14235 }, { "epoch": 0.17354636637295406, "grad_norm": 2.5698464689157245, "learning_rate": 4.349839640795382e-06, "loss": 0.8639, "step": 14240 }, { "epoch": 0.1736073025971019, "grad_norm": 2.984902317977823, "learning_rate": 4.349518922386146e-06, "loss": 0.8322, "step": 14245 }, { "epoch": 0.17366823882124968, "grad_norm": 2.595490560847595, "learning_rate": 4.349198203976909e-06, "loss": 0.804, "step": 14250 }, { "epoch": 0.17372917504539748, "grad_norm": 2.769944474430749, "learning_rate": 4.348877485567672e-06, "loss": 0.7855, "step": 14255 }, { "epoch": 0.1737901112695453, "grad_norm": 2.6889318486026936, "learning_rate": 4.348556767158435e-06, "loss": 0.8796, "step": 14260 }, { "epoch": 0.1738510474936931, "grad_norm": 2.5916516569421497, "learning_rate": 4.3482360487491986e-06, "loss": 0.84, "step": 14265 }, { "epoch": 0.17391198371784092, "grad_norm": 2.4193813114120615, "learning_rate": 4.347915330339962e-06, "loss": 0.8497, "step": 14270 }, { "epoch": 0.1739729199419887, "grad_norm": 2.478250995933822, "learning_rate": 4.347594611930725e-06, "loss": 0.807, "step": 14275 }, { "epoch": 0.1740338561661365, "grad_norm": 2.142996204268109, "learning_rate": 4.3472738935214885e-06, "loss": 0.8295, "step": 14280 }, { "epoch": 0.17409479239028433, "grad_norm": 2.593690816016696, "learning_rate": 4.3469531751122515e-06, "loss": 0.7954, "step": 14285 }, { "epoch": 0.17415572861443213, "grad_norm": 2.650525051548979, "learning_rate": 4.346632456703015e-06, "loss": 0.7998, "step": 14290 }, { "epoch": 0.17421666483857995, "grad_norm": 3.2186200631030157, "learning_rate": 4.346311738293778e-06, "loss": 0.8556, "step": 14295 }, { "epoch": 0.17427760106272774, "grad_norm": 2.8299100408492044, "learning_rate": 4.345991019884541e-06, "loss": 0.8111, "step": 14300 }, { "epoch": 0.17433853728687557, "grad_norm": 2.106712330318189, "learning_rate": 4.345670301475305e-06, "loss": 0.8885, "step": 14305 }, { "epoch": 0.17439947351102336, "grad_norm": 2.5449071769853844, "learning_rate": 4.345349583066068e-06, "loss": 0.8397, "step": 14310 }, { "epoch": 0.17446040973517116, "grad_norm": 2.4816320597624957, "learning_rate": 4.345028864656832e-06, "loss": 0.7388, "step": 14315 }, { "epoch": 0.17452134595931898, "grad_norm": 2.4711803544748885, "learning_rate": 4.344708146247595e-06, "loss": 0.8492, "step": 14320 }, { "epoch": 0.17458228218346677, "grad_norm": 2.347493882832955, "learning_rate": 4.344387427838358e-06, "loss": 0.8174, "step": 14325 }, { "epoch": 0.1746432184076146, "grad_norm": 4.437818967587673, "learning_rate": 4.344066709429122e-06, "loss": 0.7682, "step": 14330 }, { "epoch": 0.1747041546317624, "grad_norm": 2.2918157589051322, "learning_rate": 4.343745991019885e-06, "loss": 0.7883, "step": 14335 }, { "epoch": 0.17476509085591022, "grad_norm": 2.2033368124736503, "learning_rate": 4.343425272610648e-06, "loss": 0.8149, "step": 14340 }, { "epoch": 0.174826027080058, "grad_norm": 2.7357903006239574, "learning_rate": 4.343104554201412e-06, "loss": 0.9023, "step": 14345 }, { "epoch": 0.1748869633042058, "grad_norm": 2.07800487877536, "learning_rate": 4.342783835792175e-06, "loss": 0.7511, "step": 14350 }, { "epoch": 0.17494789952835363, "grad_norm": 2.4507509915388774, "learning_rate": 4.342463117382938e-06, "loss": 0.8423, "step": 14355 }, { "epoch": 0.17500883575250142, "grad_norm": 3.103525643768962, "learning_rate": 4.342142398973702e-06, "loss": 0.8077, "step": 14360 }, { "epoch": 0.17506977197664925, "grad_norm": 2.4397766189641095, "learning_rate": 4.341821680564465e-06, "loss": 0.8266, "step": 14365 }, { "epoch": 0.17513070820079704, "grad_norm": 2.284744221283015, "learning_rate": 4.341500962155228e-06, "loss": 0.7986, "step": 14370 }, { "epoch": 0.17519164442494486, "grad_norm": 2.4610855047838283, "learning_rate": 4.341180243745992e-06, "loss": 0.8443, "step": 14375 }, { "epoch": 0.17525258064909266, "grad_norm": 2.1620111518688776, "learning_rate": 4.340859525336755e-06, "loss": 0.8129, "step": 14380 }, { "epoch": 0.17531351687324045, "grad_norm": 2.805868393076045, "learning_rate": 4.340538806927518e-06, "loss": 0.8599, "step": 14385 }, { "epoch": 0.17537445309738828, "grad_norm": 2.326194291408666, "learning_rate": 4.340218088518282e-06, "loss": 0.8542, "step": 14390 }, { "epoch": 0.17543538932153607, "grad_norm": 2.7006449297781594, "learning_rate": 4.339897370109045e-06, "loss": 0.7761, "step": 14395 }, { "epoch": 0.1754963255456839, "grad_norm": 2.7783360097951744, "learning_rate": 4.339576651699808e-06, "loss": 0.8742, "step": 14400 }, { "epoch": 0.1755572617698317, "grad_norm": 2.9470173841163243, "learning_rate": 4.3392559332905715e-06, "loss": 0.7353, "step": 14405 }, { "epoch": 0.1756181979939795, "grad_norm": 2.5667591831091645, "learning_rate": 4.3389352148813345e-06, "loss": 0.7969, "step": 14410 }, { "epoch": 0.1756791342181273, "grad_norm": 2.7646168587776696, "learning_rate": 4.3386144964720975e-06, "loss": 0.838, "step": 14415 }, { "epoch": 0.1757400704422751, "grad_norm": 2.633446211981135, "learning_rate": 4.3382937780628606e-06, "loss": 0.7636, "step": 14420 }, { "epoch": 0.17580100666642293, "grad_norm": 2.4046517673750527, "learning_rate": 4.337973059653624e-06, "loss": 0.8436, "step": 14425 }, { "epoch": 0.17586194289057072, "grad_norm": 2.556381972398049, "learning_rate": 4.3376523412443874e-06, "loss": 0.8083, "step": 14430 }, { "epoch": 0.17592287911471854, "grad_norm": 2.4091902340404237, "learning_rate": 4.3373316228351505e-06, "loss": 0.7106, "step": 14435 }, { "epoch": 0.17598381533886634, "grad_norm": 3.3291859774323647, "learning_rate": 4.337010904425914e-06, "loss": 0.8551, "step": 14440 }, { "epoch": 0.17604475156301416, "grad_norm": 2.742458205015005, "learning_rate": 4.336690186016677e-06, "loss": 0.8494, "step": 14445 }, { "epoch": 0.17610568778716196, "grad_norm": 2.176013889530715, "learning_rate": 4.336369467607441e-06, "loss": 0.7821, "step": 14450 }, { "epoch": 0.17616662401130975, "grad_norm": 2.421623440132819, "learning_rate": 4.336048749198204e-06, "loss": 0.7877, "step": 14455 }, { "epoch": 0.17622756023545758, "grad_norm": 2.338264591173946, "learning_rate": 4.335728030788967e-06, "loss": 0.8312, "step": 14460 }, { "epoch": 0.17628849645960537, "grad_norm": 2.5066388233077914, "learning_rate": 4.335407312379731e-06, "loss": 0.8353, "step": 14465 }, { "epoch": 0.1763494326837532, "grad_norm": 2.4548754654282092, "learning_rate": 4.335086593970494e-06, "loss": 0.7784, "step": 14470 }, { "epoch": 0.176410368907901, "grad_norm": 3.425294845101292, "learning_rate": 4.334765875561258e-06, "loss": 0.8263, "step": 14475 }, { "epoch": 0.1764713051320488, "grad_norm": 2.3377389459148574, "learning_rate": 4.334445157152021e-06, "loss": 0.7851, "step": 14480 }, { "epoch": 0.1765322413561966, "grad_norm": 2.5915501573984763, "learning_rate": 4.334124438742785e-06, "loss": 0.8054, "step": 14485 }, { "epoch": 0.1765931775803444, "grad_norm": 2.251769183124685, "learning_rate": 4.333803720333548e-06, "loss": 0.8642, "step": 14490 }, { "epoch": 0.17665411380449222, "grad_norm": 2.5091530884290574, "learning_rate": 4.333483001924311e-06, "loss": 0.8185, "step": 14495 }, { "epoch": 0.17671505002864002, "grad_norm": 2.4676652064607456, "learning_rate": 4.333162283515075e-06, "loss": 0.8235, "step": 14500 }, { "epoch": 0.17677598625278784, "grad_norm": 2.279586685127262, "learning_rate": 4.332841565105838e-06, "loss": 0.8347, "step": 14505 }, { "epoch": 0.17683692247693564, "grad_norm": 2.8987632178330216, "learning_rate": 4.332520846696601e-06, "loss": 0.8385, "step": 14510 }, { "epoch": 0.17689785870108343, "grad_norm": 3.5392018913055194, "learning_rate": 4.332200128287364e-06, "loss": 0.7928, "step": 14515 }, { "epoch": 0.17695879492523126, "grad_norm": 2.1384506206243743, "learning_rate": 4.331879409878128e-06, "loss": 0.8313, "step": 14520 }, { "epoch": 0.17701973114937905, "grad_norm": 2.6972529047751737, "learning_rate": 4.331558691468891e-06, "loss": 0.8147, "step": 14525 }, { "epoch": 0.17708066737352687, "grad_norm": 2.229615381231739, "learning_rate": 4.331237973059654e-06, "loss": 0.7657, "step": 14530 }, { "epoch": 0.17714160359767467, "grad_norm": 2.978435194112832, "learning_rate": 4.3309172546504176e-06, "loss": 0.8338, "step": 14535 }, { "epoch": 0.1772025398218225, "grad_norm": 2.71649414768874, "learning_rate": 4.330596536241181e-06, "loss": 0.8342, "step": 14540 }, { "epoch": 0.1772634760459703, "grad_norm": 2.560058559656059, "learning_rate": 4.330275817831944e-06, "loss": 0.8918, "step": 14545 }, { "epoch": 0.17732441227011808, "grad_norm": 2.18439634678091, "learning_rate": 4.3299550994227075e-06, "loss": 0.7789, "step": 14550 }, { "epoch": 0.1773853484942659, "grad_norm": 2.6616764475168515, "learning_rate": 4.3296343810134705e-06, "loss": 0.8243, "step": 14555 }, { "epoch": 0.1774462847184137, "grad_norm": 2.301614346905958, "learning_rate": 4.3293136626042335e-06, "loss": 0.8237, "step": 14560 }, { "epoch": 0.17750722094256152, "grad_norm": 4.439364954012152, "learning_rate": 4.328992944194997e-06, "loss": 0.8416, "step": 14565 }, { "epoch": 0.17756815716670932, "grad_norm": 2.4293106638895328, "learning_rate": 4.32867222578576e-06, "loss": 0.8108, "step": 14570 }, { "epoch": 0.17762909339085714, "grad_norm": 2.6100767847922595, "learning_rate": 4.328351507376523e-06, "loss": 0.844, "step": 14575 }, { "epoch": 0.17769002961500494, "grad_norm": 2.9467807268735036, "learning_rate": 4.328030788967287e-06, "loss": 0.8316, "step": 14580 }, { "epoch": 0.17775096583915273, "grad_norm": 2.2551369615139345, "learning_rate": 4.32771007055805e-06, "loss": 0.7911, "step": 14585 }, { "epoch": 0.17781190206330055, "grad_norm": 2.3280170282466375, "learning_rate": 4.327389352148813e-06, "loss": 0.8243, "step": 14590 }, { "epoch": 0.17787283828744835, "grad_norm": 2.5933056470905953, "learning_rate": 4.327068633739577e-06, "loss": 0.7198, "step": 14595 }, { "epoch": 0.17793377451159617, "grad_norm": 2.7585152542209928, "learning_rate": 4.32674791533034e-06, "loss": 0.8493, "step": 14600 }, { "epoch": 0.17799471073574397, "grad_norm": 2.4393158133542516, "learning_rate": 4.326427196921103e-06, "loss": 0.8198, "step": 14605 }, { "epoch": 0.1780556469598918, "grad_norm": 2.6167972565085122, "learning_rate": 4.326106478511867e-06, "loss": 0.8683, "step": 14610 }, { "epoch": 0.17811658318403958, "grad_norm": 2.6040376783859753, "learning_rate": 4.32578576010263e-06, "loss": 0.8129, "step": 14615 }, { "epoch": 0.17817751940818738, "grad_norm": 3.2406324335993935, "learning_rate": 4.325465041693394e-06, "loss": 0.8345, "step": 14620 }, { "epoch": 0.1782384556323352, "grad_norm": 2.114976409741658, "learning_rate": 4.325144323284157e-06, "loss": 0.7651, "step": 14625 }, { "epoch": 0.178299391856483, "grad_norm": 2.5593472864044555, "learning_rate": 4.32482360487492e-06, "loss": 0.8408, "step": 14630 }, { "epoch": 0.17836032808063082, "grad_norm": 2.674685985193136, "learning_rate": 4.324502886465684e-06, "loss": 0.8197, "step": 14635 }, { "epoch": 0.17842126430477862, "grad_norm": 2.33389010129982, "learning_rate": 4.324182168056447e-06, "loss": 0.801, "step": 14640 }, { "epoch": 0.17848220052892644, "grad_norm": 2.4586607435097214, "learning_rate": 4.323861449647211e-06, "loss": 0.7911, "step": 14645 }, { "epoch": 0.17854313675307423, "grad_norm": 2.872413862186376, "learning_rate": 4.323540731237974e-06, "loss": 0.8493, "step": 14650 }, { "epoch": 0.17860407297722203, "grad_norm": 2.228248387904611, "learning_rate": 4.323220012828737e-06, "loss": 0.813, "step": 14655 }, { "epoch": 0.17866500920136985, "grad_norm": 2.168346214396712, "learning_rate": 4.322899294419501e-06, "loss": 0.825, "step": 14660 }, { "epoch": 0.17872594542551765, "grad_norm": 3.1821588716914215, "learning_rate": 4.322578576010264e-06, "loss": 0.7972, "step": 14665 }, { "epoch": 0.17878688164966547, "grad_norm": 2.216499946780645, "learning_rate": 4.322257857601027e-06, "loss": 0.7595, "step": 14670 }, { "epoch": 0.17884781787381326, "grad_norm": 2.3903214123555996, "learning_rate": 4.32193713919179e-06, "loss": 0.8196, "step": 14675 }, { "epoch": 0.1789087540979611, "grad_norm": 2.0045564653061456, "learning_rate": 4.3216164207825535e-06, "loss": 0.8235, "step": 14680 }, { "epoch": 0.17896969032210888, "grad_norm": 3.1582447632180997, "learning_rate": 4.3212957023733165e-06, "loss": 0.8552, "step": 14685 }, { "epoch": 0.17903062654625668, "grad_norm": 3.363409222472913, "learning_rate": 4.3209749839640796e-06, "loss": 0.8151, "step": 14690 }, { "epoch": 0.1790915627704045, "grad_norm": 2.8267546756301947, "learning_rate": 4.320654265554843e-06, "loss": 0.7379, "step": 14695 }, { "epoch": 0.1791524989945523, "grad_norm": 2.690812816951875, "learning_rate": 4.3203335471456064e-06, "loss": 0.8444, "step": 14700 }, { "epoch": 0.17921343521870012, "grad_norm": 2.4659057202578465, "learning_rate": 4.3200128287363695e-06, "loss": 0.8511, "step": 14705 }, { "epoch": 0.1792743714428479, "grad_norm": 2.33700854920545, "learning_rate": 4.319692110327133e-06, "loss": 0.8173, "step": 14710 }, { "epoch": 0.17933530766699574, "grad_norm": 2.6042272541305302, "learning_rate": 4.319371391917896e-06, "loss": 0.8, "step": 14715 }, { "epoch": 0.17939624389114353, "grad_norm": 2.9115049438031266, "learning_rate": 4.319050673508659e-06, "loss": 0.7828, "step": 14720 }, { "epoch": 0.17945718011529133, "grad_norm": 2.464192083596995, "learning_rate": 4.318729955099423e-06, "loss": 0.8014, "step": 14725 }, { "epoch": 0.17951811633943915, "grad_norm": 2.44418938083978, "learning_rate": 4.318409236690186e-06, "loss": 0.8272, "step": 14730 }, { "epoch": 0.17957905256358694, "grad_norm": 4.535095170537161, "learning_rate": 4.318088518280949e-06, "loss": 0.8391, "step": 14735 }, { "epoch": 0.17963998878773477, "grad_norm": 3.226340404130251, "learning_rate": 4.317767799871713e-06, "loss": 0.7609, "step": 14740 }, { "epoch": 0.17970092501188256, "grad_norm": 2.9619460921627954, "learning_rate": 4.317447081462476e-06, "loss": 0.8031, "step": 14745 }, { "epoch": 0.17976186123603036, "grad_norm": 1.9497380558685837, "learning_rate": 4.317126363053239e-06, "loss": 0.7874, "step": 14750 }, { "epoch": 0.17982279746017818, "grad_norm": 2.6151545604102275, "learning_rate": 4.316805644644003e-06, "loss": 0.8812, "step": 14755 }, { "epoch": 0.17988373368432597, "grad_norm": 2.798979916697666, "learning_rate": 4.316484926234766e-06, "loss": 0.7666, "step": 14760 }, { "epoch": 0.1799446699084738, "grad_norm": 2.5515717384854475, "learning_rate": 4.31616420782553e-06, "loss": 0.8821, "step": 14765 }, { "epoch": 0.1800056061326216, "grad_norm": 2.8965349996244556, "learning_rate": 4.315843489416293e-06, "loss": 0.824, "step": 14770 }, { "epoch": 0.18006654235676942, "grad_norm": 2.4979044622154207, "learning_rate": 4.315522771007056e-06, "loss": 0.7899, "step": 14775 }, { "epoch": 0.1801274785809172, "grad_norm": 3.3247868356078665, "learning_rate": 4.31520205259782e-06, "loss": 0.7995, "step": 14780 }, { "epoch": 0.180188414805065, "grad_norm": 2.6526441720817275, "learning_rate": 4.314881334188583e-06, "loss": 0.8782, "step": 14785 }, { "epoch": 0.18024935102921283, "grad_norm": 4.394631642020108, "learning_rate": 4.314560615779347e-06, "loss": 0.8312, "step": 14790 }, { "epoch": 0.18031028725336062, "grad_norm": 2.523573418981286, "learning_rate": 4.31423989737011e-06, "loss": 0.7623, "step": 14795 }, { "epoch": 0.18037122347750845, "grad_norm": 2.6043067838310643, "learning_rate": 4.313919178960873e-06, "loss": 0.761, "step": 14800 }, { "epoch": 0.18043215970165624, "grad_norm": 2.742715719985171, "learning_rate": 4.3135984605516366e-06, "loss": 0.7926, "step": 14805 }, { "epoch": 0.18049309592580406, "grad_norm": 2.4187077349939328, "learning_rate": 4.3132777421424e-06, "loss": 0.831, "step": 14810 }, { "epoch": 0.18055403214995186, "grad_norm": 2.4540060757059936, "learning_rate": 4.312957023733163e-06, "loss": 0.7892, "step": 14815 }, { "epoch": 0.18061496837409965, "grad_norm": 2.70568994258826, "learning_rate": 4.3126363053239265e-06, "loss": 0.7957, "step": 14820 }, { "epoch": 0.18067590459824748, "grad_norm": 2.5434173824579154, "learning_rate": 4.3123155869146895e-06, "loss": 0.7467, "step": 14825 }, { "epoch": 0.18073684082239527, "grad_norm": 4.790024187607177, "learning_rate": 4.3119948685054525e-06, "loss": 0.7462, "step": 14830 }, { "epoch": 0.1807977770465431, "grad_norm": 2.7802651733618107, "learning_rate": 4.311674150096216e-06, "loss": 0.8142, "step": 14835 }, { "epoch": 0.1808587132706909, "grad_norm": 3.4559537699860168, "learning_rate": 4.311353431686979e-06, "loss": 0.8743, "step": 14840 }, { "epoch": 0.1809196494948387, "grad_norm": 2.594893695433867, "learning_rate": 4.311032713277742e-06, "loss": 0.8797, "step": 14845 }, { "epoch": 0.1809805857189865, "grad_norm": 4.2999598994711805, "learning_rate": 4.310711994868505e-06, "loss": 0.8674, "step": 14850 }, { "epoch": 0.1810415219431343, "grad_norm": 2.544703160749262, "learning_rate": 4.310391276459269e-06, "loss": 0.7956, "step": 14855 }, { "epoch": 0.18110245816728213, "grad_norm": 2.639565758610178, "learning_rate": 4.310070558050032e-06, "loss": 0.877, "step": 14860 }, { "epoch": 0.18116339439142992, "grad_norm": 2.8341987334493273, "learning_rate": 4.309749839640795e-06, "loss": 0.8261, "step": 14865 }, { "epoch": 0.18122433061557774, "grad_norm": 2.8388745189906412, "learning_rate": 4.309429121231559e-06, "loss": 0.8679, "step": 14870 }, { "epoch": 0.18128526683972554, "grad_norm": 2.9416804188459653, "learning_rate": 4.309108402822322e-06, "loss": 0.8359, "step": 14875 }, { "epoch": 0.18134620306387336, "grad_norm": 2.1258201170292876, "learning_rate": 4.308787684413085e-06, "loss": 0.7514, "step": 14880 }, { "epoch": 0.18140713928802116, "grad_norm": 2.4685744113351897, "learning_rate": 4.308466966003849e-06, "loss": 0.8503, "step": 14885 }, { "epoch": 0.18146807551216895, "grad_norm": 2.8719387364116207, "learning_rate": 4.308146247594612e-06, "loss": 0.7696, "step": 14890 }, { "epoch": 0.18152901173631678, "grad_norm": 2.965019739093924, "learning_rate": 4.307825529185375e-06, "loss": 0.8607, "step": 14895 }, { "epoch": 0.18158994796046457, "grad_norm": 3.5829775882335375, "learning_rate": 4.307504810776139e-06, "loss": 0.8245, "step": 14900 }, { "epoch": 0.1816508841846124, "grad_norm": 2.6635393922334982, "learning_rate": 4.307184092366902e-06, "loss": 0.7939, "step": 14905 }, { "epoch": 0.1817118204087602, "grad_norm": 2.073072689866906, "learning_rate": 4.306863373957665e-06, "loss": 0.788, "step": 14910 }, { "epoch": 0.181772756632908, "grad_norm": 3.363205716446053, "learning_rate": 4.306542655548429e-06, "loss": 0.8121, "step": 14915 }, { "epoch": 0.1818336928570558, "grad_norm": 2.764349914668055, "learning_rate": 4.306221937139192e-06, "loss": 0.8108, "step": 14920 }, { "epoch": 0.1818946290812036, "grad_norm": 2.1991837804646495, "learning_rate": 4.305901218729956e-06, "loss": 0.8161, "step": 14925 }, { "epoch": 0.18195556530535142, "grad_norm": 3.3177268845898844, "learning_rate": 4.305580500320719e-06, "loss": 0.8327, "step": 14930 }, { "epoch": 0.18201650152949922, "grad_norm": 3.4235027938454006, "learning_rate": 4.305259781911482e-06, "loss": 0.8425, "step": 14935 }, { "epoch": 0.18207743775364704, "grad_norm": 3.3775158537640175, "learning_rate": 4.304939063502246e-06, "loss": 0.8796, "step": 14940 }, { "epoch": 0.18213837397779484, "grad_norm": 2.393648020463527, "learning_rate": 4.304618345093009e-06, "loss": 0.8138, "step": 14945 }, { "epoch": 0.18219931020194266, "grad_norm": 2.666074887472769, "learning_rate": 4.3042976266837725e-06, "loss": 0.7969, "step": 14950 }, { "epoch": 0.18226024642609046, "grad_norm": 3.2854068212777108, "learning_rate": 4.3039769082745355e-06, "loss": 0.8629, "step": 14955 }, { "epoch": 0.18232118265023825, "grad_norm": 2.3926315520405708, "learning_rate": 4.3036561898652986e-06, "loss": 0.8462, "step": 14960 }, { "epoch": 0.18238211887438607, "grad_norm": 2.1493996718110284, "learning_rate": 4.303335471456062e-06, "loss": 0.8847, "step": 14965 }, { "epoch": 0.18244305509853387, "grad_norm": 3.1324735915035182, "learning_rate": 4.3030147530468254e-06, "loss": 0.7902, "step": 14970 }, { "epoch": 0.1825039913226817, "grad_norm": 2.813852547502117, "learning_rate": 4.3026940346375885e-06, "loss": 0.7853, "step": 14975 }, { "epoch": 0.1825649275468295, "grad_norm": 3.156168745173519, "learning_rate": 4.302373316228352e-06, "loss": 0.8307, "step": 14980 }, { "epoch": 0.18262586377097728, "grad_norm": 2.8609248779361307, "learning_rate": 4.302052597819115e-06, "loss": 0.8472, "step": 14985 }, { "epoch": 0.1826867999951251, "grad_norm": 2.3343350856198573, "learning_rate": 4.301731879409878e-06, "loss": 0.7962, "step": 14990 }, { "epoch": 0.1827477362192729, "grad_norm": 2.7045816032281818, "learning_rate": 4.301411161000642e-06, "loss": 0.7958, "step": 14995 }, { "epoch": 0.18280867244342072, "grad_norm": 3.7376364316831228, "learning_rate": 4.301090442591405e-06, "loss": 0.7521, "step": 15000 }, { "epoch": 0.18286960866756852, "grad_norm": 2.5885219817925935, "learning_rate": 4.300769724182168e-06, "loss": 0.7862, "step": 15005 }, { "epoch": 0.18293054489171634, "grad_norm": 3.2614273964730565, "learning_rate": 4.300449005772931e-06, "loss": 0.8053, "step": 15010 }, { "epoch": 0.18299148111586414, "grad_norm": 2.7460466459563193, "learning_rate": 4.300128287363695e-06, "loss": 0.9217, "step": 15015 }, { "epoch": 0.18305241734001193, "grad_norm": 2.3874859500346277, "learning_rate": 4.299807568954458e-06, "loss": 0.8281, "step": 15020 }, { "epoch": 0.18311335356415975, "grad_norm": 2.5468178722753385, "learning_rate": 4.299486850545221e-06, "loss": 0.8156, "step": 15025 }, { "epoch": 0.18317428978830755, "grad_norm": 2.3954075139650373, "learning_rate": 4.299166132135985e-06, "loss": 0.7953, "step": 15030 }, { "epoch": 0.18323522601245537, "grad_norm": 3.0562462405989033, "learning_rate": 4.298845413726748e-06, "loss": 0.7284, "step": 15035 }, { "epoch": 0.18329616223660317, "grad_norm": 2.3676081301801184, "learning_rate": 4.298524695317511e-06, "loss": 0.8178, "step": 15040 }, { "epoch": 0.183357098460751, "grad_norm": 2.90215032939397, "learning_rate": 4.298203976908275e-06, "loss": 0.8196, "step": 15045 }, { "epoch": 0.18341803468489878, "grad_norm": 2.132105955347871, "learning_rate": 4.297883258499038e-06, "loss": 0.8519, "step": 15050 }, { "epoch": 0.18347897090904658, "grad_norm": 2.5321686016686122, "learning_rate": 4.297562540089801e-06, "loss": 0.804, "step": 15055 }, { "epoch": 0.1835399071331944, "grad_norm": 2.7975544656013684, "learning_rate": 4.297241821680565e-06, "loss": 0.8506, "step": 15060 }, { "epoch": 0.1836008433573422, "grad_norm": 2.8338834588662425, "learning_rate": 4.296921103271328e-06, "loss": 0.8472, "step": 15065 }, { "epoch": 0.18366177958149002, "grad_norm": 2.419287938662134, "learning_rate": 4.296600384862092e-06, "loss": 0.7939, "step": 15070 }, { "epoch": 0.18372271580563782, "grad_norm": 2.3779808524430455, "learning_rate": 4.296279666452855e-06, "loss": 0.8533, "step": 15075 }, { "epoch": 0.18378365202978564, "grad_norm": 2.5754257891996852, "learning_rate": 4.295958948043618e-06, "loss": 0.88, "step": 15080 }, { "epoch": 0.18384458825393343, "grad_norm": 2.4207740009262984, "learning_rate": 4.295638229634382e-06, "loss": 0.7978, "step": 15085 }, { "epoch": 0.18390552447808123, "grad_norm": 2.757455693381202, "learning_rate": 4.295317511225145e-06, "loss": 0.8451, "step": 15090 }, { "epoch": 0.18396646070222905, "grad_norm": 2.1834343207448113, "learning_rate": 4.2949967928159085e-06, "loss": 0.6907, "step": 15095 }, { "epoch": 0.18402739692637685, "grad_norm": 3.6816694239592396, "learning_rate": 4.2946760744066715e-06, "loss": 0.8209, "step": 15100 }, { "epoch": 0.18408833315052467, "grad_norm": 2.4535723100285445, "learning_rate": 4.2943553559974345e-06, "loss": 0.813, "step": 15105 }, { "epoch": 0.18414926937467246, "grad_norm": 2.505036525850394, "learning_rate": 4.294034637588198e-06, "loss": 0.8187, "step": 15110 }, { "epoch": 0.1842102055988203, "grad_norm": 2.8189659458934457, "learning_rate": 4.293713919178961e-06, "loss": 0.804, "step": 15115 }, { "epoch": 0.18427114182296808, "grad_norm": 2.5797633117615697, "learning_rate": 4.293393200769724e-06, "loss": 0.8752, "step": 15120 }, { "epoch": 0.18433207804711588, "grad_norm": 3.054363753630027, "learning_rate": 4.293072482360488e-06, "loss": 0.7842, "step": 15125 }, { "epoch": 0.1843930142712637, "grad_norm": 2.88250089009054, "learning_rate": 4.292751763951251e-06, "loss": 0.8472, "step": 15130 }, { "epoch": 0.1844539504954115, "grad_norm": 2.1557474158725265, "learning_rate": 4.292431045542014e-06, "loss": 0.8029, "step": 15135 }, { "epoch": 0.18451488671955932, "grad_norm": 2.4716595352693056, "learning_rate": 4.292110327132778e-06, "loss": 0.7904, "step": 15140 }, { "epoch": 0.1845758229437071, "grad_norm": 2.3734851182418084, "learning_rate": 4.291789608723541e-06, "loss": 0.8047, "step": 15145 }, { "epoch": 0.18463675916785494, "grad_norm": 2.5882055732964955, "learning_rate": 4.291468890314304e-06, "loss": 0.921, "step": 15150 }, { "epoch": 0.18469769539200273, "grad_norm": 2.859086234022615, "learning_rate": 4.291148171905068e-06, "loss": 0.7884, "step": 15155 }, { "epoch": 0.18475863161615053, "grad_norm": 2.7409589947491906, "learning_rate": 4.290827453495831e-06, "loss": 0.8708, "step": 15160 }, { "epoch": 0.18481956784029835, "grad_norm": 3.2408645925282333, "learning_rate": 4.290506735086594e-06, "loss": 0.8102, "step": 15165 }, { "epoch": 0.18488050406444614, "grad_norm": 3.2920149879690435, "learning_rate": 4.290186016677358e-06, "loss": 0.7879, "step": 15170 }, { "epoch": 0.18494144028859397, "grad_norm": 2.9195293893600613, "learning_rate": 4.289865298268121e-06, "loss": 0.8024, "step": 15175 }, { "epoch": 0.18500237651274176, "grad_norm": 2.313981026492822, "learning_rate": 4.289544579858884e-06, "loss": 0.816, "step": 15180 }, { "epoch": 0.18506331273688958, "grad_norm": 3.4747602790914356, "learning_rate": 4.289223861449647e-06, "loss": 0.8295, "step": 15185 }, { "epoch": 0.18512424896103738, "grad_norm": 2.72890340294029, "learning_rate": 4.288903143040411e-06, "loss": 0.8303, "step": 15190 }, { "epoch": 0.18518518518518517, "grad_norm": 2.4047428006095837, "learning_rate": 4.288582424631174e-06, "loss": 0.8344, "step": 15195 }, { "epoch": 0.185246121409333, "grad_norm": 3.8938507205149997, "learning_rate": 4.288261706221937e-06, "loss": 0.8614, "step": 15200 }, { "epoch": 0.1853070576334808, "grad_norm": 2.6256982079237807, "learning_rate": 4.287940987812701e-06, "loss": 0.7493, "step": 15205 }, { "epoch": 0.18536799385762862, "grad_norm": 2.8527849075104155, "learning_rate": 4.287620269403464e-06, "loss": 0.7746, "step": 15210 }, { "epoch": 0.1854289300817764, "grad_norm": 2.0753088364133627, "learning_rate": 4.287299550994227e-06, "loss": 0.7649, "step": 15215 }, { "epoch": 0.1854898663059242, "grad_norm": 4.316378149876947, "learning_rate": 4.286978832584991e-06, "loss": 0.8044, "step": 15220 }, { "epoch": 0.18555080253007203, "grad_norm": 2.7641292393064543, "learning_rate": 4.286658114175754e-06, "loss": 0.8307, "step": 15225 }, { "epoch": 0.18561173875421982, "grad_norm": 2.673231239191506, "learning_rate": 4.2863373957665176e-06, "loss": 0.8144, "step": 15230 }, { "epoch": 0.18567267497836765, "grad_norm": 2.472295007622169, "learning_rate": 4.2860166773572806e-06, "loss": 0.8351, "step": 15235 }, { "epoch": 0.18573361120251544, "grad_norm": 2.326075391584306, "learning_rate": 4.2856959589480444e-06, "loss": 0.7738, "step": 15240 }, { "epoch": 0.18579454742666326, "grad_norm": 4.572027802044097, "learning_rate": 4.2853752405388075e-06, "loss": 0.8019, "step": 15245 }, { "epoch": 0.18585548365081106, "grad_norm": 2.7757845262592364, "learning_rate": 4.2850545221295705e-06, "loss": 0.8149, "step": 15250 }, { "epoch": 0.18591641987495885, "grad_norm": 3.097656152214308, "learning_rate": 4.284733803720334e-06, "loss": 0.9104, "step": 15255 }, { "epoch": 0.18597735609910668, "grad_norm": 2.6386915078798485, "learning_rate": 4.284413085311097e-06, "loss": 0.7318, "step": 15260 }, { "epoch": 0.18603829232325447, "grad_norm": 2.9095840970948474, "learning_rate": 4.28409236690186e-06, "loss": 0.8603, "step": 15265 }, { "epoch": 0.1860992285474023, "grad_norm": 2.758238201481725, "learning_rate": 4.283771648492624e-06, "loss": 0.8117, "step": 15270 }, { "epoch": 0.1861601647715501, "grad_norm": 2.2372654826308613, "learning_rate": 4.283450930083387e-06, "loss": 0.8186, "step": 15275 }, { "epoch": 0.1862211009956979, "grad_norm": 2.7460658391534762, "learning_rate": 4.28313021167415e-06, "loss": 0.8772, "step": 15280 }, { "epoch": 0.1862820372198457, "grad_norm": 2.4661964099746214, "learning_rate": 4.282809493264914e-06, "loss": 0.8478, "step": 15285 }, { "epoch": 0.1863429734439935, "grad_norm": 3.4222740506029363, "learning_rate": 4.282488774855677e-06, "loss": 0.7406, "step": 15290 }, { "epoch": 0.18640390966814133, "grad_norm": 2.8227666231201796, "learning_rate": 4.28216805644644e-06, "loss": 0.8197, "step": 15295 }, { "epoch": 0.18646484589228912, "grad_norm": 2.4346606532397312, "learning_rate": 4.281847338037204e-06, "loss": 0.7637, "step": 15300 }, { "epoch": 0.18652578211643694, "grad_norm": 3.583866701387017, "learning_rate": 4.281526619627967e-06, "loss": 0.8912, "step": 15305 }, { "epoch": 0.18658671834058474, "grad_norm": 2.5024012833802853, "learning_rate": 4.28120590121873e-06, "loss": 0.894, "step": 15310 }, { "epoch": 0.18664765456473256, "grad_norm": 2.8490218896805337, "learning_rate": 4.280885182809494e-06, "loss": 0.7786, "step": 15315 }, { "epoch": 0.18670859078888036, "grad_norm": 2.7389503660207186, "learning_rate": 4.280564464400257e-06, "loss": 0.7932, "step": 15320 }, { "epoch": 0.18676952701302815, "grad_norm": 2.2071067342983075, "learning_rate": 4.28024374599102e-06, "loss": 0.8168, "step": 15325 }, { "epoch": 0.18683046323717598, "grad_norm": 2.364071907864032, "learning_rate": 4.279923027581784e-06, "loss": 0.7977, "step": 15330 }, { "epoch": 0.18689139946132377, "grad_norm": 2.4641436883913066, "learning_rate": 4.279602309172547e-06, "loss": 0.8247, "step": 15335 }, { "epoch": 0.1869523356854716, "grad_norm": 2.828139811311176, "learning_rate": 4.27928159076331e-06, "loss": 0.8479, "step": 15340 }, { "epoch": 0.1870132719096194, "grad_norm": 3.4196448536289252, "learning_rate": 4.278960872354073e-06, "loss": 0.8675, "step": 15345 }, { "epoch": 0.1870742081337672, "grad_norm": 2.4580678595811447, "learning_rate": 4.278640153944837e-06, "loss": 0.8273, "step": 15350 }, { "epoch": 0.187135144357915, "grad_norm": 2.679051966598302, "learning_rate": 4.2783194355356e-06, "loss": 0.8083, "step": 15355 }, { "epoch": 0.1871960805820628, "grad_norm": 2.692700029437895, "learning_rate": 4.277998717126363e-06, "loss": 0.8028, "step": 15360 }, { "epoch": 0.18725701680621062, "grad_norm": 2.877173638620109, "learning_rate": 4.277677998717127e-06, "loss": 0.7086, "step": 15365 }, { "epoch": 0.18731795303035842, "grad_norm": 2.5455315026306664, "learning_rate": 4.27735728030789e-06, "loss": 0.7709, "step": 15370 }, { "epoch": 0.18737888925450624, "grad_norm": 3.227446509055206, "learning_rate": 4.2770365618986535e-06, "loss": 0.8752, "step": 15375 }, { "epoch": 0.18743982547865404, "grad_norm": 2.4581624746606314, "learning_rate": 4.2767158434894165e-06, "loss": 0.8421, "step": 15380 }, { "epoch": 0.18750076170280186, "grad_norm": 3.427920866863407, "learning_rate": 4.2763951250801795e-06, "loss": 0.9027, "step": 15385 }, { "epoch": 0.18756169792694966, "grad_norm": 2.872178454169183, "learning_rate": 4.276074406670943e-06, "loss": 0.8379, "step": 15390 }, { "epoch": 0.18762263415109745, "grad_norm": 2.112668317510055, "learning_rate": 4.2757536882617064e-06, "loss": 0.8002, "step": 15395 }, { "epoch": 0.18768357037524527, "grad_norm": 2.9971526928572962, "learning_rate": 4.27543296985247e-06, "loss": 0.9013, "step": 15400 }, { "epoch": 0.18774450659939307, "grad_norm": 2.807421459608953, "learning_rate": 4.275112251443233e-06, "loss": 0.8436, "step": 15405 }, { "epoch": 0.1878054428235409, "grad_norm": 2.8201305002170196, "learning_rate": 4.274791533033996e-06, "loss": 0.8107, "step": 15410 }, { "epoch": 0.1878663790476887, "grad_norm": 2.8349751667576775, "learning_rate": 4.27447081462476e-06, "loss": 0.8181, "step": 15415 }, { "epoch": 0.1879273152718365, "grad_norm": 2.4682366202974824, "learning_rate": 4.274150096215523e-06, "loss": 0.7917, "step": 15420 }, { "epoch": 0.1879882514959843, "grad_norm": 2.758202493845624, "learning_rate": 4.273829377806287e-06, "loss": 0.8413, "step": 15425 }, { "epoch": 0.1880491877201321, "grad_norm": 2.254588761074366, "learning_rate": 4.27350865939705e-06, "loss": 0.8559, "step": 15430 }, { "epoch": 0.18811012394427992, "grad_norm": 2.1399215694865408, "learning_rate": 4.273187940987813e-06, "loss": 0.8601, "step": 15435 }, { "epoch": 0.18817106016842772, "grad_norm": 2.9236359961308174, "learning_rate": 4.272867222578576e-06, "loss": 0.8477, "step": 15440 }, { "epoch": 0.18823199639257554, "grad_norm": 2.714708058634138, "learning_rate": 4.27254650416934e-06, "loss": 0.8767, "step": 15445 }, { "epoch": 0.18829293261672334, "grad_norm": 2.4084351364204886, "learning_rate": 4.272225785760103e-06, "loss": 0.8256, "step": 15450 }, { "epoch": 0.18835386884087116, "grad_norm": 2.715756079623808, "learning_rate": 4.271905067350866e-06, "loss": 0.8289, "step": 15455 }, { "epoch": 0.18841480506501895, "grad_norm": 2.7911738174841823, "learning_rate": 4.27158434894163e-06, "loss": 0.8058, "step": 15460 }, { "epoch": 0.18847574128916675, "grad_norm": 2.5578629577089456, "learning_rate": 4.271263630532393e-06, "loss": 0.7983, "step": 15465 }, { "epoch": 0.18853667751331457, "grad_norm": 6.746413453765723, "learning_rate": 4.270942912123156e-06, "loss": 0.8016, "step": 15470 }, { "epoch": 0.18859761373746237, "grad_norm": 2.6159088083236077, "learning_rate": 4.27062219371392e-06, "loss": 0.8343, "step": 15475 }, { "epoch": 0.1886585499616102, "grad_norm": 2.2940786636780053, "learning_rate": 4.270301475304683e-06, "loss": 0.8287, "step": 15480 }, { "epoch": 0.18871948618575798, "grad_norm": 2.8695325354171413, "learning_rate": 4.269980756895446e-06, "loss": 0.8518, "step": 15485 }, { "epoch": 0.18878042240990578, "grad_norm": 2.3760798255020434, "learning_rate": 4.26966003848621e-06, "loss": 0.7733, "step": 15490 }, { "epoch": 0.1888413586340536, "grad_norm": 2.3808351201925504, "learning_rate": 4.269339320076973e-06, "loss": 0.7871, "step": 15495 }, { "epoch": 0.1889022948582014, "grad_norm": 2.627627400282136, "learning_rate": 4.269018601667736e-06, "loss": 0.8459, "step": 15500 }, { "epoch": 0.18896323108234922, "grad_norm": 2.8489191451761378, "learning_rate": 4.2686978832584996e-06, "loss": 0.8355, "step": 15505 }, { "epoch": 0.18902416730649702, "grad_norm": 2.2626144512791746, "learning_rate": 4.268377164849263e-06, "loss": 0.7627, "step": 15510 }, { "epoch": 0.18908510353064484, "grad_norm": 2.8087436161143113, "learning_rate": 4.268056446440026e-06, "loss": 0.8185, "step": 15515 }, { "epoch": 0.18914603975479263, "grad_norm": 2.347590264999584, "learning_rate": 4.2677357280307895e-06, "loss": 0.734, "step": 15520 }, { "epoch": 0.18920697597894043, "grad_norm": 4.199196404496875, "learning_rate": 4.2674150096215525e-06, "loss": 0.7982, "step": 15525 }, { "epoch": 0.18926791220308825, "grad_norm": 2.535212681842002, "learning_rate": 4.2670942912123155e-06, "loss": 0.8658, "step": 15530 }, { "epoch": 0.18932884842723605, "grad_norm": 2.6404336440197786, "learning_rate": 4.266773572803079e-06, "loss": 0.7535, "step": 15535 }, { "epoch": 0.18938978465138387, "grad_norm": 3.3716947015056857, "learning_rate": 4.266452854393842e-06, "loss": 0.8075, "step": 15540 }, { "epoch": 0.18945072087553166, "grad_norm": 2.5819759319201014, "learning_rate": 4.266132135984606e-06, "loss": 0.8344, "step": 15545 }, { "epoch": 0.1895116570996795, "grad_norm": 3.062636788000424, "learning_rate": 4.265811417575369e-06, "loss": 0.9211, "step": 15550 }, { "epoch": 0.18957259332382728, "grad_norm": 3.210999147204217, "learning_rate": 4.265490699166132e-06, "loss": 0.8334, "step": 15555 }, { "epoch": 0.18963352954797508, "grad_norm": 3.600916013534467, "learning_rate": 4.265169980756896e-06, "loss": 0.8238, "step": 15560 }, { "epoch": 0.1896944657721229, "grad_norm": 6.888942660328385, "learning_rate": 4.264849262347659e-06, "loss": 0.7698, "step": 15565 }, { "epoch": 0.1897554019962707, "grad_norm": 3.092906115433652, "learning_rate": 4.264528543938423e-06, "loss": 0.8254, "step": 15570 }, { "epoch": 0.18981633822041852, "grad_norm": 2.727000719497872, "learning_rate": 4.264207825529186e-06, "loss": 0.7623, "step": 15575 }, { "epoch": 0.1898772744445663, "grad_norm": 2.8315864342488486, "learning_rate": 4.263887107119949e-06, "loss": 0.867, "step": 15580 }, { "epoch": 0.18993821066871414, "grad_norm": 2.26995359113283, "learning_rate": 4.263566388710713e-06, "loss": 0.8231, "step": 15585 }, { "epoch": 0.18999914689286193, "grad_norm": 3.041955872876288, "learning_rate": 4.263245670301476e-06, "loss": 0.8499, "step": 15590 }, { "epoch": 0.19006008311700973, "grad_norm": 2.0642246051023085, "learning_rate": 4.262924951892239e-06, "loss": 0.7503, "step": 15595 }, { "epoch": 0.19012101934115755, "grad_norm": 2.7355629259071303, "learning_rate": 4.262604233483002e-06, "loss": 0.8352, "step": 15600 }, { "epoch": 0.19018195556530534, "grad_norm": 2.544560352113982, "learning_rate": 4.262283515073766e-06, "loss": 0.8019, "step": 15605 }, { "epoch": 0.19024289178945317, "grad_norm": 2.722647035575082, "learning_rate": 4.261962796664529e-06, "loss": 0.8117, "step": 15610 }, { "epoch": 0.19030382801360096, "grad_norm": 2.5426429778520743, "learning_rate": 4.261642078255292e-06, "loss": 0.9042, "step": 15615 }, { "epoch": 0.19036476423774878, "grad_norm": 2.605298706175902, "learning_rate": 4.261321359846056e-06, "loss": 0.7248, "step": 15620 }, { "epoch": 0.19042570046189658, "grad_norm": 2.3013418240006747, "learning_rate": 4.261000641436819e-06, "loss": 0.8204, "step": 15625 }, { "epoch": 0.19048663668604437, "grad_norm": 2.603331332886921, "learning_rate": 4.260679923027582e-06, "loss": 0.7712, "step": 15630 }, { "epoch": 0.1905475729101922, "grad_norm": 2.7776785025755473, "learning_rate": 4.260359204618346e-06, "loss": 0.7799, "step": 15635 }, { "epoch": 0.19060850913434, "grad_norm": 2.760907421851427, "learning_rate": 4.260038486209109e-06, "loss": 0.797, "step": 15640 }, { "epoch": 0.19066944535848782, "grad_norm": 2.5508577338460974, "learning_rate": 4.259717767799872e-06, "loss": 0.791, "step": 15645 }, { "epoch": 0.1907303815826356, "grad_norm": 2.4617300620617835, "learning_rate": 4.2593970493906355e-06, "loss": 0.7713, "step": 15650 }, { "epoch": 0.19079131780678343, "grad_norm": 2.5325670056742844, "learning_rate": 4.2590763309813985e-06, "loss": 0.8714, "step": 15655 }, { "epoch": 0.19085225403093123, "grad_norm": 2.332943697253937, "learning_rate": 4.2587556125721616e-06, "loss": 0.7918, "step": 15660 }, { "epoch": 0.19091319025507902, "grad_norm": 2.7355157370247762, "learning_rate": 4.2584348941629254e-06, "loss": 0.857, "step": 15665 }, { "epoch": 0.19097412647922685, "grad_norm": 2.301322270581886, "learning_rate": 4.2581141757536884e-06, "loss": 0.8227, "step": 15670 }, { "epoch": 0.19103506270337464, "grad_norm": 2.5684531770853742, "learning_rate": 4.2577934573444515e-06, "loss": 0.7844, "step": 15675 }, { "epoch": 0.19109599892752246, "grad_norm": 3.173530984383203, "learning_rate": 4.257472738935215e-06, "loss": 0.8379, "step": 15680 }, { "epoch": 0.19115693515167026, "grad_norm": 2.174785780681385, "learning_rate": 4.257152020525978e-06, "loss": 0.8033, "step": 15685 }, { "epoch": 0.19121787137581808, "grad_norm": 2.572360568546852, "learning_rate": 4.256831302116742e-06, "loss": 0.8512, "step": 15690 }, { "epoch": 0.19127880759996588, "grad_norm": 2.2208983675011846, "learning_rate": 4.256510583707505e-06, "loss": 0.7981, "step": 15695 }, { "epoch": 0.19133974382411367, "grad_norm": 2.4456080993861313, "learning_rate": 4.256189865298268e-06, "loss": 0.8226, "step": 15700 }, { "epoch": 0.1914006800482615, "grad_norm": 2.9286555407086468, "learning_rate": 4.255869146889032e-06, "loss": 0.8516, "step": 15705 }, { "epoch": 0.1914616162724093, "grad_norm": 2.338086231377474, "learning_rate": 4.255548428479795e-06, "loss": 0.8256, "step": 15710 }, { "epoch": 0.1915225524965571, "grad_norm": 2.5272411529988625, "learning_rate": 4.255227710070559e-06, "loss": 0.759, "step": 15715 }, { "epoch": 0.1915834887207049, "grad_norm": 3.2930638968704433, "learning_rate": 4.254906991661322e-06, "loss": 0.8363, "step": 15720 }, { "epoch": 0.1916444249448527, "grad_norm": 2.532074394336008, "learning_rate": 4.254586273252085e-06, "loss": 0.7333, "step": 15725 }, { "epoch": 0.19170536116900053, "grad_norm": 2.742014248676192, "learning_rate": 4.254265554842849e-06, "loss": 0.8121, "step": 15730 }, { "epoch": 0.19176629739314832, "grad_norm": 3.6740815092956747, "learning_rate": 4.253944836433612e-06, "loss": 0.6834, "step": 15735 }, { "epoch": 0.19182723361729614, "grad_norm": 2.281023088713783, "learning_rate": 4.253624118024375e-06, "loss": 0.7346, "step": 15740 }, { "epoch": 0.19188816984144394, "grad_norm": 2.754270816674074, "learning_rate": 4.253303399615139e-06, "loss": 0.8837, "step": 15745 }, { "epoch": 0.19194910606559176, "grad_norm": 2.532056909023041, "learning_rate": 4.252982681205902e-06, "loss": 0.7877, "step": 15750 }, { "epoch": 0.19201004228973956, "grad_norm": 2.4235292681990694, "learning_rate": 4.252661962796665e-06, "loss": 0.823, "step": 15755 }, { "epoch": 0.19207097851388735, "grad_norm": 2.291196259233068, "learning_rate": 4.252341244387429e-06, "loss": 0.7829, "step": 15760 }, { "epoch": 0.19213191473803518, "grad_norm": 2.7552521775724443, "learning_rate": 4.252020525978192e-06, "loss": 0.8249, "step": 15765 }, { "epoch": 0.19219285096218297, "grad_norm": 2.5600353858398415, "learning_rate": 4.251699807568955e-06, "loss": 0.8345, "step": 15770 }, { "epoch": 0.1922537871863308, "grad_norm": 2.4601154164854666, "learning_rate": 4.251379089159718e-06, "loss": 0.7918, "step": 15775 }, { "epoch": 0.1923147234104786, "grad_norm": 2.7572264129893465, "learning_rate": 4.251058370750482e-06, "loss": 0.7038, "step": 15780 }, { "epoch": 0.1923756596346264, "grad_norm": 2.314103385485637, "learning_rate": 4.250737652341245e-06, "loss": 0.8667, "step": 15785 }, { "epoch": 0.1924365958587742, "grad_norm": 2.6429228032241845, "learning_rate": 4.250416933932008e-06, "loss": 0.7898, "step": 15790 }, { "epoch": 0.192497532082922, "grad_norm": 2.9630201456397636, "learning_rate": 4.2500962155227715e-06, "loss": 0.8644, "step": 15795 }, { "epoch": 0.19255846830706982, "grad_norm": 2.485133858797359, "learning_rate": 4.2497754971135345e-06, "loss": 0.805, "step": 15800 }, { "epoch": 0.19261940453121762, "grad_norm": 2.741999860456906, "learning_rate": 4.2494547787042975e-06, "loss": 0.7883, "step": 15805 }, { "epoch": 0.19268034075536544, "grad_norm": 2.198944244793579, "learning_rate": 4.249134060295061e-06, "loss": 0.8708, "step": 15810 }, { "epoch": 0.19274127697951324, "grad_norm": 2.564510347466662, "learning_rate": 4.248813341885824e-06, "loss": 0.7853, "step": 15815 }, { "epoch": 0.19280221320366106, "grad_norm": 2.687281455590482, "learning_rate": 4.248492623476587e-06, "loss": 0.8484, "step": 15820 }, { "epoch": 0.19286314942780886, "grad_norm": 3.024103681938331, "learning_rate": 4.248171905067351e-06, "loss": 0.7727, "step": 15825 }, { "epoch": 0.19292408565195665, "grad_norm": 2.6864169141945378, "learning_rate": 4.247851186658114e-06, "loss": 0.8315, "step": 15830 }, { "epoch": 0.19298502187610447, "grad_norm": 2.2850015570765905, "learning_rate": 4.247530468248877e-06, "loss": 0.7551, "step": 15835 }, { "epoch": 0.19304595810025227, "grad_norm": 2.3925096579266842, "learning_rate": 4.247209749839641e-06, "loss": 0.8281, "step": 15840 }, { "epoch": 0.1931068943244001, "grad_norm": 2.2086367457532043, "learning_rate": 4.246889031430404e-06, "loss": 0.7134, "step": 15845 }, { "epoch": 0.1931678305485479, "grad_norm": 2.620486068813879, "learning_rate": 4.246568313021168e-06, "loss": 0.8437, "step": 15850 }, { "epoch": 0.1932287667726957, "grad_norm": 2.4456054403149343, "learning_rate": 4.246247594611931e-06, "loss": 0.7972, "step": 15855 }, { "epoch": 0.1932897029968435, "grad_norm": 2.4240733190721206, "learning_rate": 4.245926876202694e-06, "loss": 0.872, "step": 15860 }, { "epoch": 0.1933506392209913, "grad_norm": 3.129418081600166, "learning_rate": 4.245606157793458e-06, "loss": 0.9116, "step": 15865 }, { "epoch": 0.19341157544513912, "grad_norm": 2.5903075912101903, "learning_rate": 4.245285439384221e-06, "loss": 0.7787, "step": 15870 }, { "epoch": 0.19347251166928692, "grad_norm": 2.5186121589634083, "learning_rate": 4.244964720974985e-06, "loss": 0.8237, "step": 15875 }, { "epoch": 0.19353344789343474, "grad_norm": 2.1321787539873687, "learning_rate": 4.244644002565748e-06, "loss": 0.7726, "step": 15880 }, { "epoch": 0.19359438411758254, "grad_norm": 2.64919864216287, "learning_rate": 4.244323284156511e-06, "loss": 0.7643, "step": 15885 }, { "epoch": 0.19365532034173036, "grad_norm": 2.4903551070057492, "learning_rate": 4.244002565747275e-06, "loss": 0.8502, "step": 15890 }, { "epoch": 0.19371625656587815, "grad_norm": 2.6094447494360953, "learning_rate": 4.243681847338038e-06, "loss": 0.8711, "step": 15895 }, { "epoch": 0.19377719279002595, "grad_norm": 3.7600969173581436, "learning_rate": 4.243361128928801e-06, "loss": 0.8359, "step": 15900 }, { "epoch": 0.19383812901417377, "grad_norm": 2.338529508869509, "learning_rate": 4.243040410519565e-06, "loss": 0.8166, "step": 15905 }, { "epoch": 0.19389906523832157, "grad_norm": 2.469552459344897, "learning_rate": 4.242719692110328e-06, "loss": 0.7473, "step": 15910 }, { "epoch": 0.1939600014624694, "grad_norm": 2.6715687120410214, "learning_rate": 4.242398973701091e-06, "loss": 0.8611, "step": 15915 }, { "epoch": 0.19402093768661718, "grad_norm": 2.541617768871225, "learning_rate": 4.2420782552918545e-06, "loss": 0.8125, "step": 15920 }, { "epoch": 0.194081873910765, "grad_norm": 2.7734747108708646, "learning_rate": 4.2417575368826175e-06, "loss": 0.884, "step": 15925 }, { "epoch": 0.1941428101349128, "grad_norm": 2.5432581007695236, "learning_rate": 4.2414368184733806e-06, "loss": 0.813, "step": 15930 }, { "epoch": 0.1942037463590606, "grad_norm": 2.505846843261633, "learning_rate": 4.241116100064144e-06, "loss": 0.8574, "step": 15935 }, { "epoch": 0.19426468258320842, "grad_norm": 3.627050081842283, "learning_rate": 4.2407953816549074e-06, "loss": 0.8253, "step": 15940 }, { "epoch": 0.19432561880735622, "grad_norm": 2.6434497040572236, "learning_rate": 4.2404746632456705e-06, "loss": 0.8687, "step": 15945 }, { "epoch": 0.19438655503150404, "grad_norm": 2.296822267446635, "learning_rate": 4.2401539448364335e-06, "loss": 0.7874, "step": 15950 }, { "epoch": 0.19444749125565183, "grad_norm": 2.6292640436551267, "learning_rate": 4.239833226427197e-06, "loss": 0.7746, "step": 15955 }, { "epoch": 0.19450842747979963, "grad_norm": 2.59076959435189, "learning_rate": 4.23951250801796e-06, "loss": 0.8064, "step": 15960 }, { "epoch": 0.19456936370394745, "grad_norm": 2.4311127778275914, "learning_rate": 4.239191789608723e-06, "loss": 0.7895, "step": 15965 }, { "epoch": 0.19463029992809525, "grad_norm": 2.3982007790424444, "learning_rate": 4.238871071199487e-06, "loss": 0.8167, "step": 15970 }, { "epoch": 0.19469123615224307, "grad_norm": 2.33675093166489, "learning_rate": 4.23855035279025e-06, "loss": 0.779, "step": 15975 }, { "epoch": 0.19475217237639086, "grad_norm": 2.4174035326674437, "learning_rate": 4.238229634381013e-06, "loss": 0.8547, "step": 15980 }, { "epoch": 0.1948131086005387, "grad_norm": 2.459320872131381, "learning_rate": 4.237908915971777e-06, "loss": 0.8354, "step": 15985 }, { "epoch": 0.19487404482468648, "grad_norm": 2.037634553513837, "learning_rate": 4.23758819756254e-06, "loss": 0.7359, "step": 15990 }, { "epoch": 0.19493498104883428, "grad_norm": 3.283298642925068, "learning_rate": 4.237267479153304e-06, "loss": 0.8081, "step": 15995 }, { "epoch": 0.1949959172729821, "grad_norm": 3.974412235351684, "learning_rate": 4.236946760744067e-06, "loss": 0.8509, "step": 16000 }, { "epoch": 0.1950568534971299, "grad_norm": 3.213372459077361, "learning_rate": 4.23662604233483e-06, "loss": 0.7732, "step": 16005 }, { "epoch": 0.19511778972127772, "grad_norm": 2.4588401571572023, "learning_rate": 4.236305323925594e-06, "loss": 0.8592, "step": 16010 }, { "epoch": 0.1951787259454255, "grad_norm": 2.422482331190248, "learning_rate": 4.235984605516357e-06, "loss": 0.7469, "step": 16015 }, { "epoch": 0.19523966216957334, "grad_norm": 2.5447327249487817, "learning_rate": 4.235663887107121e-06, "loss": 0.7731, "step": 16020 }, { "epoch": 0.19530059839372113, "grad_norm": 2.6566645385048493, "learning_rate": 4.235343168697884e-06, "loss": 0.7981, "step": 16025 }, { "epoch": 0.19536153461786893, "grad_norm": 2.5316204036399803, "learning_rate": 4.235022450288647e-06, "loss": 0.7752, "step": 16030 }, { "epoch": 0.19542247084201675, "grad_norm": 2.3303079641745543, "learning_rate": 4.234701731879411e-06, "loss": 0.8569, "step": 16035 }, { "epoch": 0.19548340706616454, "grad_norm": 2.672372674256919, "learning_rate": 4.234381013470174e-06, "loss": 0.7498, "step": 16040 }, { "epoch": 0.19554434329031237, "grad_norm": 2.5373768174373756, "learning_rate": 4.234060295060937e-06, "loss": 0.8051, "step": 16045 }, { "epoch": 0.19560527951446016, "grad_norm": 2.3055467614720913, "learning_rate": 4.233739576651701e-06, "loss": 0.8572, "step": 16050 }, { "epoch": 0.19566621573860798, "grad_norm": 2.6583478480801648, "learning_rate": 4.233418858242464e-06, "loss": 0.8533, "step": 16055 }, { "epoch": 0.19572715196275578, "grad_norm": 2.5518108474439, "learning_rate": 4.233098139833227e-06, "loss": 0.8072, "step": 16060 }, { "epoch": 0.19578808818690357, "grad_norm": 2.3868123714435785, "learning_rate": 4.2327774214239905e-06, "loss": 0.7859, "step": 16065 }, { "epoch": 0.1958490244110514, "grad_norm": 2.759237149576442, "learning_rate": 4.2324567030147535e-06, "loss": 0.8356, "step": 16070 }, { "epoch": 0.1959099606351992, "grad_norm": 2.2582322963923085, "learning_rate": 4.2321359846055165e-06, "loss": 0.8, "step": 16075 }, { "epoch": 0.19597089685934702, "grad_norm": 2.7177175335118497, "learning_rate": 4.23181526619628e-06, "loss": 0.8077, "step": 16080 }, { "epoch": 0.1960318330834948, "grad_norm": 3.1529789514196316, "learning_rate": 4.231494547787043e-06, "loss": 0.8284, "step": 16085 }, { "epoch": 0.19609276930764263, "grad_norm": 2.327761614523199, "learning_rate": 4.231173829377806e-06, "loss": 0.7927, "step": 16090 }, { "epoch": 0.19615370553179043, "grad_norm": 2.231841402298584, "learning_rate": 4.23085311096857e-06, "loss": 0.8197, "step": 16095 }, { "epoch": 0.19621464175593822, "grad_norm": 2.203676582573319, "learning_rate": 4.230532392559333e-06, "loss": 0.7582, "step": 16100 }, { "epoch": 0.19627557798008605, "grad_norm": 2.423935480872519, "learning_rate": 4.230211674150096e-06, "loss": 0.8124, "step": 16105 }, { "epoch": 0.19633651420423384, "grad_norm": 2.5731466486939363, "learning_rate": 4.229890955740859e-06, "loss": 0.7891, "step": 16110 }, { "epoch": 0.19639745042838166, "grad_norm": 3.0625323803103925, "learning_rate": 4.229570237331623e-06, "loss": 0.851, "step": 16115 }, { "epoch": 0.19645838665252946, "grad_norm": 2.4451691990076094, "learning_rate": 4.229249518922386e-06, "loss": 0.8304, "step": 16120 }, { "epoch": 0.19651932287667728, "grad_norm": 2.6836202396630715, "learning_rate": 4.228928800513149e-06, "loss": 0.7849, "step": 16125 }, { "epoch": 0.19658025910082508, "grad_norm": 2.6425418118343105, "learning_rate": 4.228608082103913e-06, "loss": 0.7861, "step": 16130 }, { "epoch": 0.19664119532497287, "grad_norm": 2.5537599246103877, "learning_rate": 4.228287363694676e-06, "loss": 0.7902, "step": 16135 }, { "epoch": 0.1967021315491207, "grad_norm": 2.2806087319189654, "learning_rate": 4.227966645285439e-06, "loss": 0.8114, "step": 16140 }, { "epoch": 0.1967630677732685, "grad_norm": 2.461096872764485, "learning_rate": 4.227645926876203e-06, "loss": 0.7542, "step": 16145 }, { "epoch": 0.1968240039974163, "grad_norm": 2.6676817758692195, "learning_rate": 4.227325208466966e-06, "loss": 0.7522, "step": 16150 }, { "epoch": 0.1968849402215641, "grad_norm": 2.5054270777047023, "learning_rate": 4.22700449005773e-06, "loss": 0.8379, "step": 16155 }, { "epoch": 0.19694587644571193, "grad_norm": 3.1927507343359873, "learning_rate": 4.226683771648493e-06, "loss": 0.8043, "step": 16160 }, { "epoch": 0.19700681266985973, "grad_norm": 2.1852888191171287, "learning_rate": 4.226363053239257e-06, "loss": 0.9237, "step": 16165 }, { "epoch": 0.19706774889400752, "grad_norm": 1.9843489603277458, "learning_rate": 4.22604233483002e-06, "loss": 0.8134, "step": 16170 }, { "epoch": 0.19712868511815534, "grad_norm": 2.8073227736852986, "learning_rate": 4.225721616420783e-06, "loss": 0.8249, "step": 16175 }, { "epoch": 0.19718962134230314, "grad_norm": 2.4978734859502674, "learning_rate": 4.225400898011547e-06, "loss": 0.8411, "step": 16180 }, { "epoch": 0.19725055756645096, "grad_norm": 2.726066516679269, "learning_rate": 4.22508017960231e-06, "loss": 0.9244, "step": 16185 }, { "epoch": 0.19731149379059876, "grad_norm": 2.301817140543679, "learning_rate": 4.224759461193073e-06, "loss": 0.8256, "step": 16190 }, { "epoch": 0.19737243001474655, "grad_norm": 2.4906871974789797, "learning_rate": 4.2244387427838365e-06, "loss": 0.873, "step": 16195 }, { "epoch": 0.19743336623889438, "grad_norm": 3.0145053395369534, "learning_rate": 4.2241180243745996e-06, "loss": 0.7999, "step": 16200 }, { "epoch": 0.19749430246304217, "grad_norm": 2.26047621727106, "learning_rate": 4.223797305965363e-06, "loss": 0.7652, "step": 16205 }, { "epoch": 0.19755523868719, "grad_norm": 2.982287656804249, "learning_rate": 4.2234765875561264e-06, "loss": 0.8202, "step": 16210 }, { "epoch": 0.1976161749113378, "grad_norm": 3.186137766789023, "learning_rate": 4.2231558691468895e-06, "loss": 0.7765, "step": 16215 }, { "epoch": 0.1976771111354856, "grad_norm": 1.97801016007544, "learning_rate": 4.2228351507376525e-06, "loss": 0.8139, "step": 16220 }, { "epoch": 0.1977380473596334, "grad_norm": 2.316374517864735, "learning_rate": 4.222514432328416e-06, "loss": 0.8025, "step": 16225 }, { "epoch": 0.1977989835837812, "grad_norm": 2.2440176412459802, "learning_rate": 4.222193713919179e-06, "loss": 0.7214, "step": 16230 }, { "epoch": 0.19785991980792902, "grad_norm": 2.30476692673018, "learning_rate": 4.221872995509942e-06, "loss": 0.8191, "step": 16235 }, { "epoch": 0.19792085603207682, "grad_norm": 2.4800811873660455, "learning_rate": 4.221552277100706e-06, "loss": 0.7843, "step": 16240 }, { "epoch": 0.19798179225622464, "grad_norm": 2.2453044141756466, "learning_rate": 4.221231558691469e-06, "loss": 0.802, "step": 16245 }, { "epoch": 0.19804272848037244, "grad_norm": 2.152296768678005, "learning_rate": 4.220910840282232e-06, "loss": 0.7685, "step": 16250 }, { "epoch": 0.19810366470452026, "grad_norm": 2.304812384631705, "learning_rate": 4.220590121872996e-06, "loss": 0.8326, "step": 16255 }, { "epoch": 0.19816460092866806, "grad_norm": 2.4941067998858464, "learning_rate": 4.220269403463759e-06, "loss": 0.8104, "step": 16260 }, { "epoch": 0.19822553715281585, "grad_norm": 2.82639745777362, "learning_rate": 4.219948685054522e-06, "loss": 0.8644, "step": 16265 }, { "epoch": 0.19828647337696367, "grad_norm": 2.1017217304190114, "learning_rate": 4.219627966645285e-06, "loss": 0.7644, "step": 16270 }, { "epoch": 0.19834740960111147, "grad_norm": 2.9808911934996294, "learning_rate": 4.219307248236049e-06, "loss": 0.8554, "step": 16275 }, { "epoch": 0.1984083458252593, "grad_norm": 2.2276592556942068, "learning_rate": 4.218986529826812e-06, "loss": 0.7886, "step": 16280 }, { "epoch": 0.1984692820494071, "grad_norm": 2.495232710450291, "learning_rate": 4.218665811417575e-06, "loss": 0.8302, "step": 16285 }, { "epoch": 0.1985302182735549, "grad_norm": 2.948028036281596, "learning_rate": 4.218345093008339e-06, "loss": 0.859, "step": 16290 }, { "epoch": 0.1985911544977027, "grad_norm": 2.9441951065375345, "learning_rate": 4.218024374599102e-06, "loss": 0.8797, "step": 16295 }, { "epoch": 0.1986520907218505, "grad_norm": 2.438108607948034, "learning_rate": 4.217703656189866e-06, "loss": 0.8302, "step": 16300 }, { "epoch": 0.19871302694599832, "grad_norm": 2.481579687319748, "learning_rate": 4.217382937780629e-06, "loss": 0.7533, "step": 16305 }, { "epoch": 0.19877396317014612, "grad_norm": 2.260203872687831, "learning_rate": 4.217062219371392e-06, "loss": 0.8258, "step": 16310 }, { "epoch": 0.19883489939429394, "grad_norm": 2.674079332539364, "learning_rate": 4.216741500962156e-06, "loss": 0.774, "step": 16315 }, { "epoch": 0.19889583561844174, "grad_norm": 2.5023591720112752, "learning_rate": 4.216420782552919e-06, "loss": 0.7883, "step": 16320 }, { "epoch": 0.19895677184258956, "grad_norm": 2.6146847024693196, "learning_rate": 4.216100064143683e-06, "loss": 0.7324, "step": 16325 }, { "epoch": 0.19901770806673735, "grad_norm": 3.1551330012710004, "learning_rate": 4.215779345734446e-06, "loss": 0.8734, "step": 16330 }, { "epoch": 0.19907864429088515, "grad_norm": 2.906472642167845, "learning_rate": 4.215458627325209e-06, "loss": 0.7858, "step": 16335 }, { "epoch": 0.19913958051503297, "grad_norm": 2.1811612463812593, "learning_rate": 4.2151379089159725e-06, "loss": 0.7911, "step": 16340 }, { "epoch": 0.19920051673918077, "grad_norm": 3.1219168343906616, "learning_rate": 4.2148171905067355e-06, "loss": 0.8805, "step": 16345 }, { "epoch": 0.1992614529633286, "grad_norm": 3.2409464532737497, "learning_rate": 4.214496472097499e-06, "loss": 0.8505, "step": 16350 }, { "epoch": 0.19932238918747638, "grad_norm": 2.2979809609129282, "learning_rate": 4.214175753688262e-06, "loss": 0.8287, "step": 16355 }, { "epoch": 0.1993833254116242, "grad_norm": 3.0469501242706962, "learning_rate": 4.213855035279025e-06, "loss": 0.9229, "step": 16360 }, { "epoch": 0.199444261635772, "grad_norm": 2.8136402226999833, "learning_rate": 4.2135343168697884e-06, "loss": 0.8272, "step": 16365 }, { "epoch": 0.1995051978599198, "grad_norm": 2.316908303841752, "learning_rate": 4.213213598460552e-06, "loss": 0.7926, "step": 16370 }, { "epoch": 0.19956613408406762, "grad_norm": 2.698875789579822, "learning_rate": 4.212892880051315e-06, "loss": 0.753, "step": 16375 }, { "epoch": 0.19962707030821542, "grad_norm": 2.5750807288604025, "learning_rate": 4.212572161642078e-06, "loss": 0.8362, "step": 16380 }, { "epoch": 0.19968800653236324, "grad_norm": 2.7153046184848635, "learning_rate": 4.212251443232842e-06, "loss": 0.86, "step": 16385 }, { "epoch": 0.19974894275651103, "grad_norm": 4.106015875492547, "learning_rate": 4.211930724823605e-06, "loss": 0.8024, "step": 16390 }, { "epoch": 0.19980987898065886, "grad_norm": 2.2050181273237257, "learning_rate": 4.211610006414368e-06, "loss": 0.8034, "step": 16395 }, { "epoch": 0.19987081520480665, "grad_norm": 2.373665961890377, "learning_rate": 4.211289288005132e-06, "loss": 0.7121, "step": 16400 }, { "epoch": 0.19993175142895445, "grad_norm": 2.2107888548519328, "learning_rate": 4.210968569595895e-06, "loss": 0.8314, "step": 16405 }, { "epoch": 0.19999268765310227, "grad_norm": 2.545304085068842, "learning_rate": 4.210647851186658e-06, "loss": 0.7933, "step": 16410 }, { "epoch": 0.20005362387725006, "grad_norm": 2.2824897233834958, "learning_rate": 4.210327132777422e-06, "loss": 0.8262, "step": 16415 }, { "epoch": 0.2001145601013979, "grad_norm": 2.157445270096657, "learning_rate": 4.210006414368185e-06, "loss": 0.8453, "step": 16420 }, { "epoch": 0.20017549632554568, "grad_norm": 2.4385651312323535, "learning_rate": 4.209685695958948e-06, "loss": 0.8718, "step": 16425 }, { "epoch": 0.20023643254969348, "grad_norm": 2.3971766722918635, "learning_rate": 4.209364977549712e-06, "loss": 0.8328, "step": 16430 }, { "epoch": 0.2002973687738413, "grad_norm": 4.186305005928242, "learning_rate": 4.209044259140475e-06, "loss": 0.7708, "step": 16435 }, { "epoch": 0.2003583049979891, "grad_norm": 2.3523517554308904, "learning_rate": 4.208723540731238e-06, "loss": 0.8164, "step": 16440 }, { "epoch": 0.20041924122213692, "grad_norm": 2.326838416772218, "learning_rate": 4.208402822322002e-06, "loss": 0.8162, "step": 16445 }, { "epoch": 0.2004801774462847, "grad_norm": 2.6103246678662817, "learning_rate": 4.208082103912765e-06, "loss": 0.9005, "step": 16450 }, { "epoch": 0.20054111367043254, "grad_norm": 2.8790221875869726, "learning_rate": 4.207761385503528e-06, "loss": 0.765, "step": 16455 }, { "epoch": 0.20060204989458033, "grad_norm": 2.7682696649202954, "learning_rate": 4.207440667094292e-06, "loss": 0.7903, "step": 16460 }, { "epoch": 0.20066298611872813, "grad_norm": 2.406271273419196, "learning_rate": 4.207119948685055e-06, "loss": 0.8181, "step": 16465 }, { "epoch": 0.20072392234287595, "grad_norm": 1.9776380446031643, "learning_rate": 4.2067992302758186e-06, "loss": 0.7653, "step": 16470 }, { "epoch": 0.20078485856702374, "grad_norm": 2.9749076603006626, "learning_rate": 4.206478511866582e-06, "loss": 0.7955, "step": 16475 }, { "epoch": 0.20084579479117157, "grad_norm": 2.454757990665022, "learning_rate": 4.206157793457345e-06, "loss": 0.8287, "step": 16480 }, { "epoch": 0.20090673101531936, "grad_norm": 2.7855963402596844, "learning_rate": 4.2058370750481085e-06, "loss": 0.9293, "step": 16485 }, { "epoch": 0.20096766723946718, "grad_norm": 2.1626345708097383, "learning_rate": 4.2055163566388715e-06, "loss": 0.8232, "step": 16490 }, { "epoch": 0.20102860346361498, "grad_norm": 4.380020167130986, "learning_rate": 4.205195638229635e-06, "loss": 0.8541, "step": 16495 }, { "epoch": 0.20108953968776277, "grad_norm": 3.281218607366078, "learning_rate": 4.204874919820398e-06, "loss": 0.8939, "step": 16500 }, { "epoch": 0.2011504759119106, "grad_norm": 2.797832288841159, "learning_rate": 4.204554201411161e-06, "loss": 0.8445, "step": 16505 }, { "epoch": 0.2012114121360584, "grad_norm": 3.3401150404158253, "learning_rate": 4.204233483001925e-06, "loss": 0.8338, "step": 16510 }, { "epoch": 0.20127234836020622, "grad_norm": 2.722474210114128, "learning_rate": 4.203912764592688e-06, "loss": 0.8655, "step": 16515 }, { "epoch": 0.201333284584354, "grad_norm": 2.515845477278942, "learning_rate": 4.203592046183451e-06, "loss": 0.8126, "step": 16520 }, { "epoch": 0.20139422080850183, "grad_norm": 3.2422519137353856, "learning_rate": 4.203271327774214e-06, "loss": 0.8259, "step": 16525 }, { "epoch": 0.20145515703264963, "grad_norm": 3.5988993243112333, "learning_rate": 4.202950609364978e-06, "loss": 0.7478, "step": 16530 }, { "epoch": 0.20151609325679742, "grad_norm": 3.9903238945156922, "learning_rate": 4.202629890955741e-06, "loss": 0.7388, "step": 16535 }, { "epoch": 0.20157702948094525, "grad_norm": 2.573097613106827, "learning_rate": 4.202309172546504e-06, "loss": 0.8031, "step": 16540 }, { "epoch": 0.20163796570509304, "grad_norm": 3.035023716181565, "learning_rate": 4.201988454137268e-06, "loss": 0.825, "step": 16545 }, { "epoch": 0.20169890192924086, "grad_norm": 2.1787732799682806, "learning_rate": 4.201667735728031e-06, "loss": 0.75, "step": 16550 }, { "epoch": 0.20175983815338866, "grad_norm": 2.589993203591742, "learning_rate": 4.201347017318794e-06, "loss": 0.8591, "step": 16555 }, { "epoch": 0.20182077437753648, "grad_norm": 2.356064455208509, "learning_rate": 4.201026298909558e-06, "loss": 0.796, "step": 16560 }, { "epoch": 0.20188171060168428, "grad_norm": 2.9616836407347007, "learning_rate": 4.200705580500321e-06, "loss": 0.8001, "step": 16565 }, { "epoch": 0.20194264682583207, "grad_norm": 2.773811399927535, "learning_rate": 4.200384862091084e-06, "loss": 0.821, "step": 16570 }, { "epoch": 0.2020035830499799, "grad_norm": 3.883817286945125, "learning_rate": 4.200064143681848e-06, "loss": 0.7967, "step": 16575 }, { "epoch": 0.2020645192741277, "grad_norm": 2.3420706268787748, "learning_rate": 4.199743425272611e-06, "loss": 0.81, "step": 16580 }, { "epoch": 0.2021254554982755, "grad_norm": 5.870043622400904, "learning_rate": 4.199422706863374e-06, "loss": 0.7999, "step": 16585 }, { "epoch": 0.2021863917224233, "grad_norm": 2.7867369698657276, "learning_rate": 4.199101988454138e-06, "loss": 0.7745, "step": 16590 }, { "epoch": 0.20224732794657113, "grad_norm": 2.474911743117282, "learning_rate": 4.198781270044901e-06, "loss": 0.8162, "step": 16595 }, { "epoch": 0.20230826417071893, "grad_norm": 2.651072861510352, "learning_rate": 4.198460551635664e-06, "loss": 0.8834, "step": 16600 }, { "epoch": 0.20236920039486672, "grad_norm": 2.6677214485034044, "learning_rate": 4.198139833226428e-06, "loss": 0.8616, "step": 16605 }, { "epoch": 0.20243013661901454, "grad_norm": 2.4552149029820405, "learning_rate": 4.197819114817191e-06, "loss": 0.8068, "step": 16610 }, { "epoch": 0.20249107284316234, "grad_norm": 3.6495550471975204, "learning_rate": 4.197498396407954e-06, "loss": 0.8081, "step": 16615 }, { "epoch": 0.20255200906731016, "grad_norm": 2.3461293027303767, "learning_rate": 4.1971776779987175e-06, "loss": 0.8123, "step": 16620 }, { "epoch": 0.20261294529145796, "grad_norm": 2.3783891077127453, "learning_rate": 4.1968569595894806e-06, "loss": 0.8051, "step": 16625 }, { "epoch": 0.20267388151560578, "grad_norm": 2.322087893420533, "learning_rate": 4.196536241180244e-06, "loss": 0.7877, "step": 16630 }, { "epoch": 0.20273481773975358, "grad_norm": 3.5192229334738947, "learning_rate": 4.1962155227710074e-06, "loss": 0.7892, "step": 16635 }, { "epoch": 0.20279575396390137, "grad_norm": 3.0654481995145217, "learning_rate": 4.195894804361771e-06, "loss": 0.7528, "step": 16640 }, { "epoch": 0.2028566901880492, "grad_norm": 2.726952155482881, "learning_rate": 4.195574085952534e-06, "loss": 0.8203, "step": 16645 }, { "epoch": 0.202917626412197, "grad_norm": 3.137521279867999, "learning_rate": 4.195253367543297e-06, "loss": 0.8403, "step": 16650 }, { "epoch": 0.2029785626363448, "grad_norm": 2.3244393180784253, "learning_rate": 4.194932649134061e-06, "loss": 0.8526, "step": 16655 }, { "epoch": 0.2030394988604926, "grad_norm": 3.855911764312113, "learning_rate": 4.194611930724824e-06, "loss": 0.7851, "step": 16660 }, { "epoch": 0.2031004350846404, "grad_norm": 2.545554205144709, "learning_rate": 4.194291212315587e-06, "loss": 0.8389, "step": 16665 }, { "epoch": 0.20316137130878822, "grad_norm": 2.3677704480505906, "learning_rate": 4.193970493906351e-06, "loss": 0.873, "step": 16670 }, { "epoch": 0.20322230753293602, "grad_norm": 3.0521158305463927, "learning_rate": 4.193649775497114e-06, "loss": 0.8854, "step": 16675 }, { "epoch": 0.20328324375708384, "grad_norm": 2.1967615259081925, "learning_rate": 4.193329057087877e-06, "loss": 0.7338, "step": 16680 }, { "epoch": 0.20334417998123164, "grad_norm": 2.2418790014831504, "learning_rate": 4.193008338678641e-06, "loss": 0.7533, "step": 16685 }, { "epoch": 0.20340511620537946, "grad_norm": 3.094917072587923, "learning_rate": 4.192687620269404e-06, "loss": 0.8535, "step": 16690 }, { "epoch": 0.20346605242952726, "grad_norm": 2.595401474080579, "learning_rate": 4.192366901860167e-06, "loss": 0.7656, "step": 16695 }, { "epoch": 0.20352698865367505, "grad_norm": 3.180917328541424, "learning_rate": 4.19204618345093e-06, "loss": 0.8059, "step": 16700 }, { "epoch": 0.20358792487782287, "grad_norm": 2.361640433904933, "learning_rate": 4.191725465041694e-06, "loss": 0.7801, "step": 16705 }, { "epoch": 0.20364886110197067, "grad_norm": 2.8058533354754096, "learning_rate": 4.191404746632457e-06, "loss": 0.79, "step": 16710 }, { "epoch": 0.2037097973261185, "grad_norm": 2.3302316734746817, "learning_rate": 4.19108402822322e-06, "loss": 0.7302, "step": 16715 }, { "epoch": 0.2037707335502663, "grad_norm": 2.583101530697828, "learning_rate": 4.190763309813984e-06, "loss": 0.7621, "step": 16720 }, { "epoch": 0.2038316697744141, "grad_norm": 2.7220760741101837, "learning_rate": 4.190442591404747e-06, "loss": 0.7685, "step": 16725 }, { "epoch": 0.2038926059985619, "grad_norm": 3.7985546685803255, "learning_rate": 4.19012187299551e-06, "loss": 0.8119, "step": 16730 }, { "epoch": 0.2039535422227097, "grad_norm": 3.4059360169824853, "learning_rate": 4.189801154586274e-06, "loss": 0.7789, "step": 16735 }, { "epoch": 0.20401447844685752, "grad_norm": 2.407380726649681, "learning_rate": 4.189480436177037e-06, "loss": 0.8451, "step": 16740 }, { "epoch": 0.20407541467100532, "grad_norm": 2.49393723611366, "learning_rate": 4.1891597177678e-06, "loss": 0.7922, "step": 16745 }, { "epoch": 0.20413635089515314, "grad_norm": 2.4501278453806283, "learning_rate": 4.188838999358564e-06, "loss": 0.8185, "step": 16750 }, { "epoch": 0.20419728711930094, "grad_norm": 2.8582222590408084, "learning_rate": 4.188518280949327e-06, "loss": 0.9326, "step": 16755 }, { "epoch": 0.20425822334344876, "grad_norm": 2.4194792092926303, "learning_rate": 4.18819756254009e-06, "loss": 0.8139, "step": 16760 }, { "epoch": 0.20431915956759655, "grad_norm": 2.3794405067502966, "learning_rate": 4.1878768441308535e-06, "loss": 0.7853, "step": 16765 }, { "epoch": 0.20438009579174435, "grad_norm": 2.117681719006398, "learning_rate": 4.1875561257216165e-06, "loss": 0.8025, "step": 16770 }, { "epoch": 0.20444103201589217, "grad_norm": 2.2404312443803382, "learning_rate": 4.18723540731238e-06, "loss": 0.7489, "step": 16775 }, { "epoch": 0.20450196824003997, "grad_norm": 2.442717365780896, "learning_rate": 4.186914688903143e-06, "loss": 0.7541, "step": 16780 }, { "epoch": 0.2045629044641878, "grad_norm": 2.2847609946611986, "learning_rate": 4.186593970493906e-06, "loss": 0.7937, "step": 16785 }, { "epoch": 0.20462384068833558, "grad_norm": 2.3657887747781183, "learning_rate": 4.18627325208467e-06, "loss": 0.8126, "step": 16790 }, { "epoch": 0.2046847769124834, "grad_norm": 3.194536990622242, "learning_rate": 4.185952533675433e-06, "loss": 0.7883, "step": 16795 }, { "epoch": 0.2047457131366312, "grad_norm": 2.6170515084853414, "learning_rate": 4.185631815266197e-06, "loss": 0.7521, "step": 16800 }, { "epoch": 0.204806649360779, "grad_norm": 2.8422039247058795, "learning_rate": 4.18531109685696e-06, "loss": 0.889, "step": 16805 }, { "epoch": 0.20486758558492682, "grad_norm": 2.407226677735587, "learning_rate": 4.184990378447723e-06, "loss": 0.847, "step": 16810 }, { "epoch": 0.20492852180907462, "grad_norm": 2.6315999862599684, "learning_rate": 4.184669660038487e-06, "loss": 0.715, "step": 16815 }, { "epoch": 0.20498945803322244, "grad_norm": 2.5563268504957333, "learning_rate": 4.18434894162925e-06, "loss": 0.814, "step": 16820 }, { "epoch": 0.20505039425737023, "grad_norm": 2.6316869503984233, "learning_rate": 4.184028223220013e-06, "loss": 0.8654, "step": 16825 }, { "epoch": 0.20511133048151806, "grad_norm": 2.2229577342257265, "learning_rate": 4.183707504810777e-06, "loss": 0.8152, "step": 16830 }, { "epoch": 0.20517226670566585, "grad_norm": 2.5252807581665397, "learning_rate": 4.18338678640154e-06, "loss": 0.8293, "step": 16835 }, { "epoch": 0.20523320292981365, "grad_norm": 3.5873473553814055, "learning_rate": 4.183066067992303e-06, "loss": 0.8956, "step": 16840 }, { "epoch": 0.20529413915396147, "grad_norm": 2.4765233711168575, "learning_rate": 4.182745349583067e-06, "loss": 0.7213, "step": 16845 }, { "epoch": 0.20535507537810926, "grad_norm": 2.209696226478323, "learning_rate": 4.18242463117383e-06, "loss": 0.7468, "step": 16850 }, { "epoch": 0.2054160116022571, "grad_norm": 4.008275432255284, "learning_rate": 4.182103912764593e-06, "loss": 0.8456, "step": 16855 }, { "epoch": 0.20547694782640488, "grad_norm": 2.3037027399206824, "learning_rate": 4.181783194355356e-06, "loss": 0.8586, "step": 16860 }, { "epoch": 0.2055378840505527, "grad_norm": 2.4664756060253676, "learning_rate": 4.18146247594612e-06, "loss": 0.8321, "step": 16865 }, { "epoch": 0.2055988202747005, "grad_norm": 2.08938717332664, "learning_rate": 4.181141757536883e-06, "loss": 0.7606, "step": 16870 }, { "epoch": 0.2056597564988483, "grad_norm": 2.5125606114937455, "learning_rate": 4.180821039127646e-06, "loss": 0.8666, "step": 16875 }, { "epoch": 0.20572069272299612, "grad_norm": 2.214287592282616, "learning_rate": 4.18050032071841e-06, "loss": 0.8883, "step": 16880 }, { "epoch": 0.2057816289471439, "grad_norm": 2.3795078582804274, "learning_rate": 4.180179602309173e-06, "loss": 0.8195, "step": 16885 }, { "epoch": 0.20584256517129174, "grad_norm": 2.548592453343297, "learning_rate": 4.179858883899936e-06, "loss": 0.7898, "step": 16890 }, { "epoch": 0.20590350139543953, "grad_norm": 2.6590708885979173, "learning_rate": 4.1795381654906996e-06, "loss": 0.8284, "step": 16895 }, { "epoch": 0.20596443761958733, "grad_norm": 2.7830352567570356, "learning_rate": 4.1792174470814626e-06, "loss": 0.8229, "step": 16900 }, { "epoch": 0.20602537384373515, "grad_norm": 3.0358704290523275, "learning_rate": 4.178896728672226e-06, "loss": 0.7769, "step": 16905 }, { "epoch": 0.20608631006788294, "grad_norm": 2.8650678627061072, "learning_rate": 4.1785760102629894e-06, "loss": 0.8658, "step": 16910 }, { "epoch": 0.20614724629203077, "grad_norm": 1.9592127051466413, "learning_rate": 4.1782552918537525e-06, "loss": 0.807, "step": 16915 }, { "epoch": 0.20620818251617856, "grad_norm": 2.4780585761401155, "learning_rate": 4.177934573444516e-06, "loss": 0.7639, "step": 16920 }, { "epoch": 0.20626911874032638, "grad_norm": 2.29700446501766, "learning_rate": 4.177613855035279e-06, "loss": 0.7606, "step": 16925 }, { "epoch": 0.20633005496447418, "grad_norm": 2.322868173912785, "learning_rate": 4.177293136626042e-06, "loss": 0.8036, "step": 16930 }, { "epoch": 0.20639099118862198, "grad_norm": 2.7228238136742604, "learning_rate": 4.176972418216806e-06, "loss": 0.8731, "step": 16935 }, { "epoch": 0.2064519274127698, "grad_norm": 2.1805813398885294, "learning_rate": 4.176651699807569e-06, "loss": 0.7998, "step": 16940 }, { "epoch": 0.2065128636369176, "grad_norm": 2.3696360096690756, "learning_rate": 4.176330981398333e-06, "loss": 0.8331, "step": 16945 }, { "epoch": 0.20657379986106542, "grad_norm": 2.367248689091741, "learning_rate": 4.176010262989096e-06, "loss": 0.8337, "step": 16950 }, { "epoch": 0.2066347360852132, "grad_norm": 3.010744024355826, "learning_rate": 4.175689544579859e-06, "loss": 0.834, "step": 16955 }, { "epoch": 0.20669567230936103, "grad_norm": 2.5217750628406645, "learning_rate": 4.175368826170623e-06, "loss": 0.818, "step": 16960 }, { "epoch": 0.20675660853350883, "grad_norm": 2.591885023843673, "learning_rate": 4.175048107761386e-06, "loss": 0.8432, "step": 16965 }, { "epoch": 0.20681754475765662, "grad_norm": 2.324926438478561, "learning_rate": 4.174727389352149e-06, "loss": 0.7774, "step": 16970 }, { "epoch": 0.20687848098180445, "grad_norm": 2.328210952159431, "learning_rate": 4.174406670942913e-06, "loss": 0.783, "step": 16975 }, { "epoch": 0.20693941720595224, "grad_norm": 6.992437949745421, "learning_rate": 4.174085952533676e-06, "loss": 0.8272, "step": 16980 }, { "epoch": 0.20700035343010006, "grad_norm": 2.8551048450447154, "learning_rate": 4.173765234124439e-06, "loss": 0.8104, "step": 16985 }, { "epoch": 0.20706128965424786, "grad_norm": 2.2159432413220044, "learning_rate": 4.173444515715203e-06, "loss": 0.8687, "step": 16990 }, { "epoch": 0.20712222587839568, "grad_norm": 3.13232594362764, "learning_rate": 4.173123797305966e-06, "loss": 0.921, "step": 16995 }, { "epoch": 0.20718316210254348, "grad_norm": 2.633422087908539, "learning_rate": 4.172803078896729e-06, "loss": 0.7458, "step": 17000 }, { "epoch": 0.20724409832669127, "grad_norm": 4.789867631986185, "learning_rate": 4.172482360487493e-06, "loss": 0.8409, "step": 17005 }, { "epoch": 0.2073050345508391, "grad_norm": 2.6608612521182198, "learning_rate": 4.172161642078256e-06, "loss": 0.7663, "step": 17010 }, { "epoch": 0.2073659707749869, "grad_norm": 2.9365165367301223, "learning_rate": 4.171840923669019e-06, "loss": 0.8091, "step": 17015 }, { "epoch": 0.2074269069991347, "grad_norm": 3.1457529464677854, "learning_rate": 4.171520205259783e-06, "loss": 0.8803, "step": 17020 }, { "epoch": 0.2074878432232825, "grad_norm": 2.854827768456537, "learning_rate": 4.171199486850546e-06, "loss": 0.7864, "step": 17025 }, { "epoch": 0.20754877944743033, "grad_norm": 4.2432514726089465, "learning_rate": 4.170878768441309e-06, "loss": 0.9269, "step": 17030 }, { "epoch": 0.20760971567157813, "grad_norm": 2.7591205822441447, "learning_rate": 4.170558050032072e-06, "loss": 0.7918, "step": 17035 }, { "epoch": 0.20767065189572592, "grad_norm": 3.229286693370043, "learning_rate": 4.1702373316228355e-06, "loss": 0.7587, "step": 17040 }, { "epoch": 0.20773158811987374, "grad_norm": 2.543143299969961, "learning_rate": 4.1699166132135985e-06, "loss": 0.8512, "step": 17045 }, { "epoch": 0.20779252434402154, "grad_norm": 1.9174396203795487, "learning_rate": 4.1695958948043615e-06, "loss": 0.8202, "step": 17050 }, { "epoch": 0.20785346056816936, "grad_norm": 2.944630649168242, "learning_rate": 4.169275176395125e-06, "loss": 0.8225, "step": 17055 }, { "epoch": 0.20791439679231716, "grad_norm": 2.922012218820017, "learning_rate": 4.168954457985888e-06, "loss": 0.8455, "step": 17060 }, { "epoch": 0.20797533301646498, "grad_norm": 1.9595114183350297, "learning_rate": 4.1686337395766514e-06, "loss": 0.7998, "step": 17065 }, { "epoch": 0.20803626924061278, "grad_norm": 3.05490603581683, "learning_rate": 4.168313021167415e-06, "loss": 0.8731, "step": 17070 }, { "epoch": 0.20809720546476057, "grad_norm": 2.456450295263709, "learning_rate": 4.167992302758178e-06, "loss": 0.8332, "step": 17075 }, { "epoch": 0.2081581416889084, "grad_norm": 2.957850440294652, "learning_rate": 4.167671584348942e-06, "loss": 0.8197, "step": 17080 }, { "epoch": 0.2082190779130562, "grad_norm": 2.125128502514182, "learning_rate": 4.167350865939705e-06, "loss": 0.7368, "step": 17085 }, { "epoch": 0.208280014137204, "grad_norm": 2.60533357795387, "learning_rate": 4.167030147530468e-06, "loss": 0.7456, "step": 17090 }, { "epoch": 0.2083409503613518, "grad_norm": 2.1533796754078427, "learning_rate": 4.166709429121232e-06, "loss": 0.732, "step": 17095 }, { "epoch": 0.20840188658549963, "grad_norm": 2.8294093662692643, "learning_rate": 4.166388710711995e-06, "loss": 0.8412, "step": 17100 }, { "epoch": 0.20846282280964742, "grad_norm": 2.3411680699186634, "learning_rate": 4.166067992302759e-06, "loss": 0.7576, "step": 17105 }, { "epoch": 0.20852375903379522, "grad_norm": 2.352512947266822, "learning_rate": 4.165747273893522e-06, "loss": 0.8356, "step": 17110 }, { "epoch": 0.20858469525794304, "grad_norm": 2.0395650341714755, "learning_rate": 4.165426555484285e-06, "loss": 0.8257, "step": 17115 }, { "epoch": 0.20864563148209084, "grad_norm": 2.8640460023972936, "learning_rate": 4.165105837075049e-06, "loss": 0.802, "step": 17120 }, { "epoch": 0.20870656770623866, "grad_norm": 2.484061908068094, "learning_rate": 4.164785118665812e-06, "loss": 0.8669, "step": 17125 }, { "epoch": 0.20876750393038646, "grad_norm": 2.62977576685203, "learning_rate": 4.164464400256575e-06, "loss": 0.847, "step": 17130 }, { "epoch": 0.20882844015453425, "grad_norm": 2.671752700487501, "learning_rate": 4.164143681847339e-06, "loss": 0.7318, "step": 17135 }, { "epoch": 0.20888937637868207, "grad_norm": 3.3066032497475755, "learning_rate": 4.163822963438102e-06, "loss": 0.7234, "step": 17140 }, { "epoch": 0.20895031260282987, "grad_norm": 2.670143700482445, "learning_rate": 4.163502245028865e-06, "loss": 0.8182, "step": 17145 }, { "epoch": 0.2090112488269777, "grad_norm": 3.0238201554590063, "learning_rate": 4.163181526619629e-06, "loss": 0.8019, "step": 17150 }, { "epoch": 0.2090721850511255, "grad_norm": 2.8244892751070583, "learning_rate": 4.162860808210392e-06, "loss": 0.786, "step": 17155 }, { "epoch": 0.2091331212752733, "grad_norm": 3.166607730859401, "learning_rate": 4.162540089801155e-06, "loss": 0.7575, "step": 17160 }, { "epoch": 0.2091940574994211, "grad_norm": 2.6708354601300734, "learning_rate": 4.1622193713919185e-06, "loss": 0.7294, "step": 17165 }, { "epoch": 0.2092549937235689, "grad_norm": 2.160913073822679, "learning_rate": 4.1618986529826816e-06, "loss": 0.7347, "step": 17170 }, { "epoch": 0.20931592994771672, "grad_norm": 2.647188875610811, "learning_rate": 4.161577934573445e-06, "loss": 0.8704, "step": 17175 }, { "epoch": 0.20937686617186452, "grad_norm": 2.7257660061865567, "learning_rate": 4.1612572161642084e-06, "loss": 0.7865, "step": 17180 }, { "epoch": 0.20943780239601234, "grad_norm": 1.9714439491702351, "learning_rate": 4.1609364977549715e-06, "loss": 0.7812, "step": 17185 }, { "epoch": 0.20949873862016014, "grad_norm": 2.916039464126211, "learning_rate": 4.1606157793457345e-06, "loss": 0.7918, "step": 17190 }, { "epoch": 0.20955967484430796, "grad_norm": 5.136575456748296, "learning_rate": 4.1602950609364975e-06, "loss": 0.781, "step": 17195 }, { "epoch": 0.20962061106845575, "grad_norm": 2.3098234924595777, "learning_rate": 4.159974342527261e-06, "loss": 0.8055, "step": 17200 }, { "epoch": 0.20968154729260355, "grad_norm": 2.525923154376747, "learning_rate": 4.159653624118024e-06, "loss": 0.793, "step": 17205 }, { "epoch": 0.20974248351675137, "grad_norm": 2.913230193071562, "learning_rate": 4.159332905708787e-06, "loss": 0.9528, "step": 17210 }, { "epoch": 0.20980341974089917, "grad_norm": 2.540228910888029, "learning_rate": 4.159012187299551e-06, "loss": 0.8849, "step": 17215 }, { "epoch": 0.209864355965047, "grad_norm": 2.713538691363308, "learning_rate": 4.158691468890314e-06, "loss": 0.8275, "step": 17220 }, { "epoch": 0.20992529218919478, "grad_norm": 2.6302864220479405, "learning_rate": 4.158370750481078e-06, "loss": 0.8515, "step": 17225 }, { "epoch": 0.2099862284133426, "grad_norm": 2.350350186525078, "learning_rate": 4.158050032071841e-06, "loss": 0.815, "step": 17230 }, { "epoch": 0.2100471646374904, "grad_norm": 2.075082990726309, "learning_rate": 4.157729313662604e-06, "loss": 0.831, "step": 17235 }, { "epoch": 0.2101081008616382, "grad_norm": 2.682441773654495, "learning_rate": 4.157408595253368e-06, "loss": 0.869, "step": 17240 }, { "epoch": 0.21016903708578602, "grad_norm": 2.896330361254195, "learning_rate": 4.157087876844131e-06, "loss": 0.9016, "step": 17245 }, { "epoch": 0.21022997330993382, "grad_norm": 2.559612650795352, "learning_rate": 4.156767158434895e-06, "loss": 0.8715, "step": 17250 }, { "epoch": 0.21029090953408164, "grad_norm": 2.8252273573196938, "learning_rate": 4.156446440025658e-06, "loss": 0.8312, "step": 17255 }, { "epoch": 0.21035184575822943, "grad_norm": 3.3306585988008, "learning_rate": 4.156125721616421e-06, "loss": 0.9015, "step": 17260 }, { "epoch": 0.21041278198237726, "grad_norm": 2.0564828398435226, "learning_rate": 4.155805003207185e-06, "loss": 0.7489, "step": 17265 }, { "epoch": 0.21047371820652505, "grad_norm": 2.6047013730440742, "learning_rate": 4.155484284797948e-06, "loss": 0.7975, "step": 17270 }, { "epoch": 0.21053465443067285, "grad_norm": 2.6100605382406497, "learning_rate": 4.155163566388712e-06, "loss": 0.7991, "step": 17275 }, { "epoch": 0.21059559065482067, "grad_norm": 2.396400984099455, "learning_rate": 4.154842847979475e-06, "loss": 0.8526, "step": 17280 }, { "epoch": 0.21065652687896846, "grad_norm": 3.8914506203555344, "learning_rate": 4.154522129570238e-06, "loss": 0.8216, "step": 17285 }, { "epoch": 0.2107174631031163, "grad_norm": 2.777087431296513, "learning_rate": 4.154201411161001e-06, "loss": 0.8333, "step": 17290 }, { "epoch": 0.21077839932726408, "grad_norm": 2.4756163249903116, "learning_rate": 4.153880692751765e-06, "loss": 0.8107, "step": 17295 }, { "epoch": 0.2108393355514119, "grad_norm": 1.8383906140634843, "learning_rate": 4.153559974342528e-06, "loss": 0.7967, "step": 17300 }, { "epoch": 0.2109002717755597, "grad_norm": 2.876732037174024, "learning_rate": 4.153239255933291e-06, "loss": 0.8038, "step": 17305 }, { "epoch": 0.2109612079997075, "grad_norm": 2.2247770190712592, "learning_rate": 4.1529185375240545e-06, "loss": 0.7916, "step": 17310 }, { "epoch": 0.21102214422385532, "grad_norm": 2.3592491086526355, "learning_rate": 4.1525978191148175e-06, "loss": 0.7897, "step": 17315 }, { "epoch": 0.2110830804480031, "grad_norm": 2.4283832429229038, "learning_rate": 4.1522771007055805e-06, "loss": 0.8649, "step": 17320 }, { "epoch": 0.21114401667215094, "grad_norm": 2.439809910913531, "learning_rate": 4.151956382296344e-06, "loss": 0.8236, "step": 17325 }, { "epoch": 0.21120495289629873, "grad_norm": 2.215375076814853, "learning_rate": 4.151635663887107e-06, "loss": 0.8429, "step": 17330 }, { "epoch": 0.21126588912044655, "grad_norm": 2.184777388760867, "learning_rate": 4.1513149454778704e-06, "loss": 0.8548, "step": 17335 }, { "epoch": 0.21132682534459435, "grad_norm": 2.749765284682635, "learning_rate": 4.150994227068634e-06, "loss": 0.753, "step": 17340 }, { "epoch": 0.21138776156874214, "grad_norm": 2.9837421555107717, "learning_rate": 4.150673508659397e-06, "loss": 0.9304, "step": 17345 }, { "epoch": 0.21144869779288997, "grad_norm": 2.5450999669430536, "learning_rate": 4.15035279025016e-06, "loss": 0.8068, "step": 17350 }, { "epoch": 0.21150963401703776, "grad_norm": 2.194561815002663, "learning_rate": 4.150032071840924e-06, "loss": 0.7829, "step": 17355 }, { "epoch": 0.21157057024118558, "grad_norm": 2.8716356055650327, "learning_rate": 4.149711353431687e-06, "loss": 0.8275, "step": 17360 }, { "epoch": 0.21163150646533338, "grad_norm": 2.9688100802689297, "learning_rate": 4.14939063502245e-06, "loss": 0.8476, "step": 17365 }, { "epoch": 0.2116924426894812, "grad_norm": 4.092542852976362, "learning_rate": 4.149069916613214e-06, "loss": 0.8051, "step": 17370 }, { "epoch": 0.211753378913629, "grad_norm": 2.462969432426367, "learning_rate": 4.148749198203977e-06, "loss": 0.7542, "step": 17375 }, { "epoch": 0.2118143151377768, "grad_norm": 4.714817122162352, "learning_rate": 4.14842847979474e-06, "loss": 0.8301, "step": 17380 }, { "epoch": 0.21187525136192462, "grad_norm": 2.654669234419754, "learning_rate": 4.148107761385504e-06, "loss": 0.8253, "step": 17385 }, { "epoch": 0.2119361875860724, "grad_norm": 2.3850195204860487, "learning_rate": 4.147787042976267e-06, "loss": 0.79, "step": 17390 }, { "epoch": 0.21199712381022023, "grad_norm": 2.535643786619077, "learning_rate": 4.147466324567031e-06, "loss": 0.8563, "step": 17395 }, { "epoch": 0.21205806003436803, "grad_norm": 2.5971560601992416, "learning_rate": 4.147145606157794e-06, "loss": 0.8306, "step": 17400 }, { "epoch": 0.21211899625851582, "grad_norm": 2.987267024765732, "learning_rate": 4.146824887748557e-06, "loss": 0.9431, "step": 17405 }, { "epoch": 0.21217993248266365, "grad_norm": 2.197422363664815, "learning_rate": 4.146504169339321e-06, "loss": 0.6951, "step": 17410 }, { "epoch": 0.21224086870681144, "grad_norm": 2.713771643093416, "learning_rate": 4.146183450930084e-06, "loss": 0.7623, "step": 17415 }, { "epoch": 0.21230180493095926, "grad_norm": 2.453520996144487, "learning_rate": 4.145862732520848e-06, "loss": 0.7794, "step": 17420 }, { "epoch": 0.21236274115510706, "grad_norm": 2.6810674851308938, "learning_rate": 4.145542014111611e-06, "loss": 0.8508, "step": 17425 }, { "epoch": 0.21242367737925488, "grad_norm": 3.256528905768252, "learning_rate": 4.145221295702374e-06, "loss": 0.7846, "step": 17430 }, { "epoch": 0.21248461360340268, "grad_norm": 3.075252385436257, "learning_rate": 4.1449005772931375e-06, "loss": 0.8672, "step": 17435 }, { "epoch": 0.21254554982755047, "grad_norm": 3.462441433325625, "learning_rate": 4.1445798588839006e-06, "loss": 0.764, "step": 17440 }, { "epoch": 0.2126064860516983, "grad_norm": 2.354798089774542, "learning_rate": 4.144259140474664e-06, "loss": 0.8261, "step": 17445 }, { "epoch": 0.2126674222758461, "grad_norm": 2.694858228540868, "learning_rate": 4.143938422065427e-06, "loss": 0.808, "step": 17450 }, { "epoch": 0.2127283584999939, "grad_norm": 2.0798486929141684, "learning_rate": 4.1436177036561905e-06, "loss": 0.8506, "step": 17455 }, { "epoch": 0.2127892947241417, "grad_norm": 4.033953363604117, "learning_rate": 4.1432969852469535e-06, "loss": 0.7902, "step": 17460 }, { "epoch": 0.21285023094828953, "grad_norm": 2.8918495044971295, "learning_rate": 4.1429762668377165e-06, "loss": 0.795, "step": 17465 }, { "epoch": 0.21291116717243733, "grad_norm": 2.8874484988012847, "learning_rate": 4.14265554842848e-06, "loss": 0.7626, "step": 17470 }, { "epoch": 0.21297210339658512, "grad_norm": 2.647480226392493, "learning_rate": 4.142334830019243e-06, "loss": 0.8196, "step": 17475 }, { "epoch": 0.21303303962073294, "grad_norm": 3.1676416649966272, "learning_rate": 4.142014111610006e-06, "loss": 0.8187, "step": 17480 }, { "epoch": 0.21309397584488074, "grad_norm": 2.6389601714938338, "learning_rate": 4.14169339320077e-06, "loss": 0.7412, "step": 17485 }, { "epoch": 0.21315491206902856, "grad_norm": 2.6982695895053155, "learning_rate": 4.141372674791533e-06, "loss": 0.9108, "step": 17490 }, { "epoch": 0.21321584829317636, "grad_norm": 2.6995024525551994, "learning_rate": 4.141051956382296e-06, "loss": 0.9216, "step": 17495 }, { "epoch": 0.21327678451732418, "grad_norm": 2.734113440183965, "learning_rate": 4.14073123797306e-06, "loss": 0.8484, "step": 17500 }, { "epoch": 0.21333772074147198, "grad_norm": 2.625272193883513, "learning_rate": 4.140410519563823e-06, "loss": 0.7512, "step": 17505 }, { "epoch": 0.21339865696561977, "grad_norm": 2.9220287567693086, "learning_rate": 4.140089801154586e-06, "loss": 0.7821, "step": 17510 }, { "epoch": 0.2134595931897676, "grad_norm": 2.5331880889163414, "learning_rate": 4.13976908274535e-06, "loss": 0.7648, "step": 17515 }, { "epoch": 0.2135205294139154, "grad_norm": 2.3502308112723314, "learning_rate": 4.139448364336113e-06, "loss": 0.8345, "step": 17520 }, { "epoch": 0.2135814656380632, "grad_norm": 2.3516602543423804, "learning_rate": 4.139127645926876e-06, "loss": 0.7448, "step": 17525 }, { "epoch": 0.213642401862211, "grad_norm": 2.775465914878336, "learning_rate": 4.13880692751764e-06, "loss": 0.8543, "step": 17530 }, { "epoch": 0.21370333808635883, "grad_norm": 2.401861249813763, "learning_rate": 4.138486209108403e-06, "loss": 0.824, "step": 17535 }, { "epoch": 0.21376427431050662, "grad_norm": 2.630348481954369, "learning_rate": 4.138165490699166e-06, "loss": 0.7791, "step": 17540 }, { "epoch": 0.21382521053465442, "grad_norm": 3.463394266680614, "learning_rate": 4.13784477228993e-06, "loss": 0.8006, "step": 17545 }, { "epoch": 0.21388614675880224, "grad_norm": 2.481006988278923, "learning_rate": 4.137524053880693e-06, "loss": 0.7913, "step": 17550 }, { "epoch": 0.21394708298295004, "grad_norm": 2.9395043769207403, "learning_rate": 4.137203335471457e-06, "loss": 0.8228, "step": 17555 }, { "epoch": 0.21400801920709786, "grad_norm": 2.2325455100981566, "learning_rate": 4.13688261706222e-06, "loss": 0.8409, "step": 17560 }, { "epoch": 0.21406895543124566, "grad_norm": 2.310544480287628, "learning_rate": 4.136561898652984e-06, "loss": 0.8169, "step": 17565 }, { "epoch": 0.21412989165539348, "grad_norm": 2.5192125206676246, "learning_rate": 4.136241180243747e-06, "loss": 0.7679, "step": 17570 }, { "epoch": 0.21419082787954127, "grad_norm": 2.937794173228548, "learning_rate": 4.13592046183451e-06, "loss": 0.7896, "step": 17575 }, { "epoch": 0.21425176410368907, "grad_norm": 2.7144428918511845, "learning_rate": 4.1355997434252735e-06, "loss": 0.8223, "step": 17580 }, { "epoch": 0.2143127003278369, "grad_norm": 2.3174340322542757, "learning_rate": 4.1352790250160365e-06, "loss": 0.7755, "step": 17585 }, { "epoch": 0.2143736365519847, "grad_norm": 2.394501680816304, "learning_rate": 4.1349583066067995e-06, "loss": 0.8393, "step": 17590 }, { "epoch": 0.2144345727761325, "grad_norm": 3.4790569642172584, "learning_rate": 4.134637588197563e-06, "loss": 0.8005, "step": 17595 }, { "epoch": 0.2144955090002803, "grad_norm": 3.006337891625088, "learning_rate": 4.134316869788326e-06, "loss": 0.8066, "step": 17600 }, { "epoch": 0.21455644522442813, "grad_norm": 2.730088595235043, "learning_rate": 4.1339961513790894e-06, "loss": 0.8208, "step": 17605 }, { "epoch": 0.21461738144857592, "grad_norm": 2.2301314874033062, "learning_rate": 4.133675432969853e-06, "loss": 0.8061, "step": 17610 }, { "epoch": 0.21467831767272372, "grad_norm": 2.1935477808130135, "learning_rate": 4.133354714560616e-06, "loss": 0.7827, "step": 17615 }, { "epoch": 0.21473925389687154, "grad_norm": 2.6297722224414475, "learning_rate": 4.133033996151379e-06, "loss": 0.8218, "step": 17620 }, { "epoch": 0.21480019012101934, "grad_norm": 3.0272209662401566, "learning_rate": 4.132713277742142e-06, "loss": 0.8982, "step": 17625 }, { "epoch": 0.21486112634516716, "grad_norm": 2.7477098451244126, "learning_rate": 4.132392559332906e-06, "loss": 0.8617, "step": 17630 }, { "epoch": 0.21492206256931495, "grad_norm": 2.898192909151332, "learning_rate": 4.132071840923669e-06, "loss": 0.7927, "step": 17635 }, { "epoch": 0.21498299879346275, "grad_norm": 2.458245775081552, "learning_rate": 4.131751122514432e-06, "loss": 0.846, "step": 17640 }, { "epoch": 0.21504393501761057, "grad_norm": 2.513213822547066, "learning_rate": 4.131430404105196e-06, "loss": 0.8554, "step": 17645 }, { "epoch": 0.21510487124175837, "grad_norm": 2.7550347979947643, "learning_rate": 4.131109685695959e-06, "loss": 0.8138, "step": 17650 }, { "epoch": 0.2151658074659062, "grad_norm": 2.560037826852595, "learning_rate": 4.130788967286722e-06, "loss": 0.7229, "step": 17655 }, { "epoch": 0.21522674369005398, "grad_norm": 3.7220020853760976, "learning_rate": 4.130468248877486e-06, "loss": 0.817, "step": 17660 }, { "epoch": 0.2152876799142018, "grad_norm": 2.7182749596754507, "learning_rate": 4.130147530468249e-06, "loss": 0.8164, "step": 17665 }, { "epoch": 0.2153486161383496, "grad_norm": 2.4294735337529723, "learning_rate": 4.129826812059012e-06, "loss": 0.7716, "step": 17670 }, { "epoch": 0.2154095523624974, "grad_norm": 2.398911924880864, "learning_rate": 4.129506093649776e-06, "loss": 0.8233, "step": 17675 }, { "epoch": 0.21547048858664522, "grad_norm": 2.8232831221819557, "learning_rate": 4.129185375240539e-06, "loss": 0.9439, "step": 17680 }, { "epoch": 0.21553142481079302, "grad_norm": 2.0287841380227243, "learning_rate": 4.128864656831302e-06, "loss": 0.8235, "step": 17685 }, { "epoch": 0.21559236103494084, "grad_norm": 3.7702782493622653, "learning_rate": 4.128543938422066e-06, "loss": 0.8401, "step": 17690 }, { "epoch": 0.21565329725908863, "grad_norm": 2.3813910769881703, "learning_rate": 4.128223220012829e-06, "loss": 0.7782, "step": 17695 }, { "epoch": 0.21571423348323646, "grad_norm": 2.3514401492757, "learning_rate": 4.127902501603593e-06, "loss": 0.7462, "step": 17700 }, { "epoch": 0.21577516970738425, "grad_norm": 2.33198712082723, "learning_rate": 4.127581783194356e-06, "loss": 0.8297, "step": 17705 }, { "epoch": 0.21583610593153205, "grad_norm": 2.352327837976083, "learning_rate": 4.127261064785119e-06, "loss": 0.7953, "step": 17710 }, { "epoch": 0.21589704215567987, "grad_norm": 2.4334507750182413, "learning_rate": 4.126940346375883e-06, "loss": 0.8897, "step": 17715 }, { "epoch": 0.21595797837982766, "grad_norm": 2.354472167322169, "learning_rate": 4.126619627966646e-06, "loss": 0.8433, "step": 17720 }, { "epoch": 0.2160189146039755, "grad_norm": 2.5879887888088673, "learning_rate": 4.1262989095574095e-06, "loss": 0.8284, "step": 17725 }, { "epoch": 0.21607985082812328, "grad_norm": 2.522175615322144, "learning_rate": 4.1259781911481725e-06, "loss": 0.8118, "step": 17730 }, { "epoch": 0.2161407870522711, "grad_norm": 2.799562082553949, "learning_rate": 4.1256574727389355e-06, "loss": 0.8051, "step": 17735 }, { "epoch": 0.2162017232764189, "grad_norm": 2.5229181376793313, "learning_rate": 4.125336754329699e-06, "loss": 0.8114, "step": 17740 }, { "epoch": 0.2162626595005667, "grad_norm": 2.9432120656346186, "learning_rate": 4.125016035920462e-06, "loss": 0.9107, "step": 17745 }, { "epoch": 0.21632359572471452, "grad_norm": 2.4136572373988323, "learning_rate": 4.124695317511225e-06, "loss": 0.7884, "step": 17750 }, { "epoch": 0.2163845319488623, "grad_norm": 2.527716024423328, "learning_rate": 4.124374599101989e-06, "loss": 0.8478, "step": 17755 }, { "epoch": 0.21644546817301014, "grad_norm": 2.1785015060897086, "learning_rate": 4.124053880692752e-06, "loss": 0.7932, "step": 17760 }, { "epoch": 0.21650640439715793, "grad_norm": 2.09146196970205, "learning_rate": 4.123733162283515e-06, "loss": 0.7567, "step": 17765 }, { "epoch": 0.21656734062130575, "grad_norm": 2.4782420739533535, "learning_rate": 4.123412443874279e-06, "loss": 0.7561, "step": 17770 }, { "epoch": 0.21662827684545355, "grad_norm": 3.562484873254949, "learning_rate": 4.123091725465042e-06, "loss": 0.8172, "step": 17775 }, { "epoch": 0.21668921306960134, "grad_norm": 2.312245703378561, "learning_rate": 4.122771007055805e-06, "loss": 0.8247, "step": 17780 }, { "epoch": 0.21675014929374917, "grad_norm": 2.397897644180359, "learning_rate": 4.122450288646568e-06, "loss": 0.8463, "step": 17785 }, { "epoch": 0.21681108551789696, "grad_norm": 2.7320500937234318, "learning_rate": 4.122129570237332e-06, "loss": 0.8089, "step": 17790 }, { "epoch": 0.21687202174204478, "grad_norm": 2.530800517193086, "learning_rate": 4.121808851828095e-06, "loss": 0.9006, "step": 17795 }, { "epoch": 0.21693295796619258, "grad_norm": 2.3860311725935124, "learning_rate": 4.121488133418858e-06, "loss": 0.8226, "step": 17800 }, { "epoch": 0.2169938941903404, "grad_norm": 2.7492077579891223, "learning_rate": 4.121167415009622e-06, "loss": 0.8934, "step": 17805 }, { "epoch": 0.2170548304144882, "grad_norm": 2.3825230799129518, "learning_rate": 4.120846696600385e-06, "loss": 0.8095, "step": 17810 }, { "epoch": 0.217115766638636, "grad_norm": 3.224844883849033, "learning_rate": 4.120525978191148e-06, "loss": 0.8135, "step": 17815 }, { "epoch": 0.21717670286278382, "grad_norm": 2.8131464087242803, "learning_rate": 4.120205259781912e-06, "loss": 0.7928, "step": 17820 }, { "epoch": 0.2172376390869316, "grad_norm": 2.6977457373335607, "learning_rate": 4.119884541372675e-06, "loss": 0.8726, "step": 17825 }, { "epoch": 0.21729857531107943, "grad_norm": 4.242245046020889, "learning_rate": 4.119563822963438e-06, "loss": 0.8656, "step": 17830 }, { "epoch": 0.21735951153522723, "grad_norm": 2.1645659971750164, "learning_rate": 4.119243104554202e-06, "loss": 0.8143, "step": 17835 }, { "epoch": 0.21742044775937505, "grad_norm": 3.794922546235325, "learning_rate": 4.118922386144965e-06, "loss": 0.8368, "step": 17840 }, { "epoch": 0.21748138398352285, "grad_norm": 2.4875531523629704, "learning_rate": 4.118601667735729e-06, "loss": 0.8652, "step": 17845 }, { "epoch": 0.21754232020767064, "grad_norm": 2.1046860981602618, "learning_rate": 4.118280949326492e-06, "loss": 0.8371, "step": 17850 }, { "epoch": 0.21760325643181846, "grad_norm": 2.761718147871823, "learning_rate": 4.117960230917255e-06, "loss": 0.8054, "step": 17855 }, { "epoch": 0.21766419265596626, "grad_norm": 2.3738195125984873, "learning_rate": 4.1176395125080185e-06, "loss": 0.8061, "step": 17860 }, { "epoch": 0.21772512888011408, "grad_norm": 2.5216954396009834, "learning_rate": 4.1173187940987816e-06, "loss": 0.8135, "step": 17865 }, { "epoch": 0.21778606510426188, "grad_norm": 2.323411626860716, "learning_rate": 4.116998075689545e-06, "loss": 0.7918, "step": 17870 }, { "epoch": 0.21784700132840967, "grad_norm": 2.5840216951219124, "learning_rate": 4.1166773572803084e-06, "loss": 0.7835, "step": 17875 }, { "epoch": 0.2179079375525575, "grad_norm": 3.0095380815135786, "learning_rate": 4.1163566388710715e-06, "loss": 0.8558, "step": 17880 }, { "epoch": 0.2179688737767053, "grad_norm": 2.611323713758553, "learning_rate": 4.116035920461835e-06, "loss": 0.8062, "step": 17885 }, { "epoch": 0.2180298100008531, "grad_norm": 2.2357643251621706, "learning_rate": 4.115715202052598e-06, "loss": 0.7599, "step": 17890 }, { "epoch": 0.2180907462250009, "grad_norm": 2.2626570847439167, "learning_rate": 4.115394483643361e-06, "loss": 0.8199, "step": 17895 }, { "epoch": 0.21815168244914873, "grad_norm": 2.6388275360228377, "learning_rate": 4.115073765234125e-06, "loss": 0.9215, "step": 17900 }, { "epoch": 0.21821261867329653, "grad_norm": 2.9717009624367203, "learning_rate": 4.114753046824888e-06, "loss": 0.7656, "step": 17905 }, { "epoch": 0.21827355489744432, "grad_norm": 2.308437174615003, "learning_rate": 4.114432328415651e-06, "loss": 0.8783, "step": 17910 }, { "epoch": 0.21833449112159214, "grad_norm": 3.366996875098473, "learning_rate": 4.114111610006415e-06, "loss": 0.822, "step": 17915 }, { "epoch": 0.21839542734573994, "grad_norm": 2.5731487947977763, "learning_rate": 4.113790891597178e-06, "loss": 0.8772, "step": 17920 }, { "epoch": 0.21845636356988776, "grad_norm": 2.4963498945880764, "learning_rate": 4.113470173187941e-06, "loss": 0.8203, "step": 17925 }, { "epoch": 0.21851729979403556, "grad_norm": 3.016466900716168, "learning_rate": 4.113149454778705e-06, "loss": 0.9049, "step": 17930 }, { "epoch": 0.21857823601818338, "grad_norm": 2.7487064992748613, "learning_rate": 4.112828736369468e-06, "loss": 0.8124, "step": 17935 }, { "epoch": 0.21863917224233118, "grad_norm": 2.9301275657402113, "learning_rate": 4.112508017960231e-06, "loss": 0.8518, "step": 17940 }, { "epoch": 0.21870010846647897, "grad_norm": 2.516479640548111, "learning_rate": 4.112187299550995e-06, "loss": 0.8741, "step": 17945 }, { "epoch": 0.2187610446906268, "grad_norm": 3.1207233466787687, "learning_rate": 4.111866581141758e-06, "loss": 0.8613, "step": 17950 }, { "epoch": 0.2188219809147746, "grad_norm": 2.0215706411712153, "learning_rate": 4.111545862732521e-06, "loss": 0.7521, "step": 17955 }, { "epoch": 0.2188829171389224, "grad_norm": 2.65520229634116, "learning_rate": 4.111225144323284e-06, "loss": 0.8418, "step": 17960 }, { "epoch": 0.2189438533630702, "grad_norm": 4.1214785185522596, "learning_rate": 4.110904425914048e-06, "loss": 0.7912, "step": 17965 }, { "epoch": 0.21900478958721803, "grad_norm": 2.5984094623813334, "learning_rate": 4.110583707504811e-06, "loss": 0.8402, "step": 17970 }, { "epoch": 0.21906572581136582, "grad_norm": 2.330870039706067, "learning_rate": 4.110262989095574e-06, "loss": 0.7169, "step": 17975 }, { "epoch": 0.21912666203551362, "grad_norm": 4.2861835257700145, "learning_rate": 4.109942270686338e-06, "loss": 0.7952, "step": 17980 }, { "epoch": 0.21918759825966144, "grad_norm": 2.1971754163983324, "learning_rate": 4.109621552277101e-06, "loss": 0.8625, "step": 17985 }, { "epoch": 0.21924853448380924, "grad_norm": 2.5561088684357904, "learning_rate": 4.109300833867864e-06, "loss": 0.7676, "step": 17990 }, { "epoch": 0.21930947070795706, "grad_norm": 2.4378156465978797, "learning_rate": 4.108980115458628e-06, "loss": 0.8194, "step": 17995 }, { "epoch": 0.21937040693210486, "grad_norm": 3.1970800847594196, "learning_rate": 4.108659397049391e-06, "loss": 0.782, "step": 18000 }, { "epoch": 0.21943134315625268, "grad_norm": 3.5037742811389387, "learning_rate": 4.1083386786401545e-06, "loss": 0.7485, "step": 18005 }, { "epoch": 0.21949227938040047, "grad_norm": 2.4329164271691144, "learning_rate": 4.1080179602309175e-06, "loss": 0.8938, "step": 18010 }, { "epoch": 0.21955321560454827, "grad_norm": 2.8873198857999323, "learning_rate": 4.1076972418216805e-06, "loss": 0.8023, "step": 18015 }, { "epoch": 0.2196141518286961, "grad_norm": 2.5723012512417003, "learning_rate": 4.107376523412444e-06, "loss": 0.7841, "step": 18020 }, { "epoch": 0.2196750880528439, "grad_norm": 2.4679597515766236, "learning_rate": 4.107055805003207e-06, "loss": 0.8216, "step": 18025 }, { "epoch": 0.2197360242769917, "grad_norm": 2.8127227598030617, "learning_rate": 4.106735086593971e-06, "loss": 0.812, "step": 18030 }, { "epoch": 0.2197969605011395, "grad_norm": 2.2724462491508417, "learning_rate": 4.106414368184734e-06, "loss": 0.8546, "step": 18035 }, { "epoch": 0.21985789672528733, "grad_norm": 2.7094864368200327, "learning_rate": 4.106093649775497e-06, "loss": 0.8056, "step": 18040 }, { "epoch": 0.21991883294943512, "grad_norm": 2.7810201949144453, "learning_rate": 4.105772931366261e-06, "loss": 0.9103, "step": 18045 }, { "epoch": 0.21997976917358292, "grad_norm": 2.6461184189840026, "learning_rate": 4.105452212957024e-06, "loss": 0.8562, "step": 18050 }, { "epoch": 0.22004070539773074, "grad_norm": 2.5207212771802765, "learning_rate": 4.105131494547787e-06, "loss": 0.8028, "step": 18055 }, { "epoch": 0.22010164162187854, "grad_norm": 2.028039963166844, "learning_rate": 4.104810776138551e-06, "loss": 0.8717, "step": 18060 }, { "epoch": 0.22016257784602636, "grad_norm": 1.832262279425719, "learning_rate": 4.104490057729314e-06, "loss": 0.7489, "step": 18065 }, { "epoch": 0.22022351407017415, "grad_norm": 3.2370317035908918, "learning_rate": 4.104169339320077e-06, "loss": 0.8045, "step": 18070 }, { "epoch": 0.22028445029432198, "grad_norm": 2.3329875363826433, "learning_rate": 4.103848620910841e-06, "loss": 0.8034, "step": 18075 }, { "epoch": 0.22034538651846977, "grad_norm": 2.4884820945971007, "learning_rate": 4.103527902501604e-06, "loss": 0.8332, "step": 18080 }, { "epoch": 0.22040632274261757, "grad_norm": 2.6993015915654484, "learning_rate": 4.103207184092367e-06, "loss": 0.762, "step": 18085 }, { "epoch": 0.2204672589667654, "grad_norm": 2.216915161285217, "learning_rate": 4.102886465683131e-06, "loss": 0.8085, "step": 18090 }, { "epoch": 0.22052819519091318, "grad_norm": 2.3534824850446014, "learning_rate": 4.102565747273894e-06, "loss": 0.7885, "step": 18095 }, { "epoch": 0.220589131415061, "grad_norm": 3.122638077907853, "learning_rate": 4.102245028864657e-06, "loss": 0.8104, "step": 18100 }, { "epoch": 0.2206500676392088, "grad_norm": 2.151956304437177, "learning_rate": 4.101924310455421e-06, "loss": 0.8553, "step": 18105 }, { "epoch": 0.2207110038633566, "grad_norm": 3.146629061639849, "learning_rate": 4.101603592046184e-06, "loss": 0.7713, "step": 18110 }, { "epoch": 0.22077194008750442, "grad_norm": 2.259161347114758, "learning_rate": 4.101282873636947e-06, "loss": 0.8004, "step": 18115 }, { "epoch": 0.22083287631165222, "grad_norm": 2.1154900729530492, "learning_rate": 4.10096215522771e-06, "loss": 0.8444, "step": 18120 }, { "epoch": 0.22089381253580004, "grad_norm": 2.4388272332871344, "learning_rate": 4.100641436818474e-06, "loss": 0.8342, "step": 18125 }, { "epoch": 0.22095474875994783, "grad_norm": 2.952427104690228, "learning_rate": 4.100320718409237e-06, "loss": 0.8333, "step": 18130 }, { "epoch": 0.22101568498409566, "grad_norm": 2.41853482523395, "learning_rate": 4.1e-06, "loss": 0.8067, "step": 18135 }, { "epoch": 0.22107662120824345, "grad_norm": 4.217364369933473, "learning_rate": 4.0996792815907636e-06, "loss": 0.8655, "step": 18140 }, { "epoch": 0.22113755743239125, "grad_norm": 2.6557107559699995, "learning_rate": 4.099358563181527e-06, "loss": 0.8266, "step": 18145 }, { "epoch": 0.22119849365653907, "grad_norm": 2.7341577916150976, "learning_rate": 4.0990378447722905e-06, "loss": 0.8658, "step": 18150 }, { "epoch": 0.22125942988068686, "grad_norm": 2.633615289785402, "learning_rate": 4.0987171263630535e-06, "loss": 0.7884, "step": 18155 }, { "epoch": 0.2213203661048347, "grad_norm": 3.4715572928677596, "learning_rate": 4.0983964079538165e-06, "loss": 0.7684, "step": 18160 }, { "epoch": 0.22138130232898248, "grad_norm": 2.5230636992220252, "learning_rate": 4.09807568954458e-06, "loss": 0.8482, "step": 18165 }, { "epoch": 0.2214422385531303, "grad_norm": 2.592324037684483, "learning_rate": 4.097754971135343e-06, "loss": 0.8752, "step": 18170 }, { "epoch": 0.2215031747772781, "grad_norm": 2.997852947512907, "learning_rate": 4.097434252726107e-06, "loss": 0.805, "step": 18175 }, { "epoch": 0.2215641110014259, "grad_norm": 2.7895578422651015, "learning_rate": 4.09711353431687e-06, "loss": 0.802, "step": 18180 }, { "epoch": 0.22162504722557372, "grad_norm": 2.7038568711780355, "learning_rate": 4.096792815907633e-06, "loss": 0.8877, "step": 18185 }, { "epoch": 0.2216859834497215, "grad_norm": 2.231114792242392, "learning_rate": 4.096472097498397e-06, "loss": 0.8086, "step": 18190 }, { "epoch": 0.22174691967386934, "grad_norm": 2.230322352613395, "learning_rate": 4.09615137908916e-06, "loss": 0.8114, "step": 18195 }, { "epoch": 0.22180785589801713, "grad_norm": 4.224143638078149, "learning_rate": 4.095830660679924e-06, "loss": 0.7388, "step": 18200 }, { "epoch": 0.22186879212216495, "grad_norm": 2.348044984464851, "learning_rate": 4.095509942270687e-06, "loss": 0.8353, "step": 18205 }, { "epoch": 0.22192972834631275, "grad_norm": 2.3342934730781164, "learning_rate": 4.09518922386145e-06, "loss": 0.6857, "step": 18210 }, { "epoch": 0.22199066457046054, "grad_norm": 2.689941619173221, "learning_rate": 4.094868505452213e-06, "loss": 0.805, "step": 18215 }, { "epoch": 0.22205160079460837, "grad_norm": 3.1146013600489466, "learning_rate": 4.094547787042977e-06, "loss": 0.7833, "step": 18220 }, { "epoch": 0.22211253701875616, "grad_norm": 3.9279293864694056, "learning_rate": 4.09422706863374e-06, "loss": 0.8985, "step": 18225 }, { "epoch": 0.22217347324290398, "grad_norm": 2.162768863543211, "learning_rate": 4.093906350224503e-06, "loss": 0.7331, "step": 18230 }, { "epoch": 0.22223440946705178, "grad_norm": 3.084252366231732, "learning_rate": 4.093585631815267e-06, "loss": 0.7509, "step": 18235 }, { "epoch": 0.2222953456911996, "grad_norm": 2.2492898877271736, "learning_rate": 4.09326491340603e-06, "loss": 0.8026, "step": 18240 }, { "epoch": 0.2223562819153474, "grad_norm": 2.6687793236227333, "learning_rate": 4.092944194996793e-06, "loss": 0.862, "step": 18245 }, { "epoch": 0.2224172181394952, "grad_norm": 2.0782846885646604, "learning_rate": 4.092623476587557e-06, "loss": 0.8411, "step": 18250 }, { "epoch": 0.22247815436364302, "grad_norm": 2.3476344024417357, "learning_rate": 4.09230275817832e-06, "loss": 0.7396, "step": 18255 }, { "epoch": 0.2225390905877908, "grad_norm": 2.2419751281627778, "learning_rate": 4.091982039769083e-06, "loss": 0.8034, "step": 18260 }, { "epoch": 0.22260002681193863, "grad_norm": 2.9459923881604433, "learning_rate": 4.091661321359847e-06, "loss": 0.8546, "step": 18265 }, { "epoch": 0.22266096303608643, "grad_norm": 3.6451622370354126, "learning_rate": 4.09134060295061e-06, "loss": 0.8091, "step": 18270 }, { "epoch": 0.22272189926023425, "grad_norm": 2.881668031008612, "learning_rate": 4.091019884541373e-06, "loss": 0.8609, "step": 18275 }, { "epoch": 0.22278283548438205, "grad_norm": 2.854515196749019, "learning_rate": 4.0906991661321365e-06, "loss": 0.8453, "step": 18280 }, { "epoch": 0.22284377170852984, "grad_norm": 3.7959095449446276, "learning_rate": 4.0903784477228995e-06, "loss": 0.8932, "step": 18285 }, { "epoch": 0.22290470793267766, "grad_norm": 2.444700122315024, "learning_rate": 4.0900577293136625e-06, "loss": 0.8231, "step": 18290 }, { "epoch": 0.22296564415682546, "grad_norm": 2.2722277468066485, "learning_rate": 4.0897370109044256e-06, "loss": 0.8288, "step": 18295 }, { "epoch": 0.22302658038097328, "grad_norm": 2.6971178344338607, "learning_rate": 4.0894162924951894e-06, "loss": 0.8209, "step": 18300 }, { "epoch": 0.22308751660512108, "grad_norm": 2.1923249086057752, "learning_rate": 4.0890955740859524e-06, "loss": 0.8565, "step": 18305 }, { "epoch": 0.2231484528292689, "grad_norm": 4.389019400573741, "learning_rate": 4.088774855676716e-06, "loss": 0.7986, "step": 18310 }, { "epoch": 0.2232093890534167, "grad_norm": 2.187117910905374, "learning_rate": 4.088454137267479e-06, "loss": 0.7994, "step": 18315 }, { "epoch": 0.2232703252775645, "grad_norm": 2.488515450275126, "learning_rate": 4.088133418858243e-06, "loss": 0.8222, "step": 18320 }, { "epoch": 0.2233312615017123, "grad_norm": 2.480565158249754, "learning_rate": 4.087812700449006e-06, "loss": 0.7899, "step": 18325 }, { "epoch": 0.2233921977258601, "grad_norm": 2.097771353782602, "learning_rate": 4.087491982039769e-06, "loss": 0.812, "step": 18330 }, { "epoch": 0.22345313395000793, "grad_norm": 3.773052093184026, "learning_rate": 4.087171263630533e-06, "loss": 0.7463, "step": 18335 }, { "epoch": 0.22351407017415573, "grad_norm": 3.614316514283819, "learning_rate": 4.086850545221296e-06, "loss": 0.819, "step": 18340 }, { "epoch": 0.22357500639830352, "grad_norm": 2.5653624171435614, "learning_rate": 4.08652982681206e-06, "loss": 0.8092, "step": 18345 }, { "epoch": 0.22363594262245134, "grad_norm": 2.8184637442521874, "learning_rate": 4.086209108402823e-06, "loss": 0.8356, "step": 18350 }, { "epoch": 0.22369687884659914, "grad_norm": 2.255980688044725, "learning_rate": 4.085888389993586e-06, "loss": 0.7618, "step": 18355 }, { "epoch": 0.22375781507074696, "grad_norm": 2.85218802166111, "learning_rate": 4.08556767158435e-06, "loss": 0.804, "step": 18360 }, { "epoch": 0.22381875129489476, "grad_norm": 2.847907454128724, "learning_rate": 4.085246953175113e-06, "loss": 0.8095, "step": 18365 }, { "epoch": 0.22387968751904258, "grad_norm": 2.4166843751507443, "learning_rate": 4.084926234765876e-06, "loss": 0.8671, "step": 18370 }, { "epoch": 0.22394062374319038, "grad_norm": 2.3069868691434214, "learning_rate": 4.084605516356639e-06, "loss": 0.7613, "step": 18375 }, { "epoch": 0.22400155996733817, "grad_norm": 2.6323409493818164, "learning_rate": 4.084284797947403e-06, "loss": 0.831, "step": 18380 }, { "epoch": 0.224062496191486, "grad_norm": 2.013725777652388, "learning_rate": 4.083964079538166e-06, "loss": 0.8742, "step": 18385 }, { "epoch": 0.2241234324156338, "grad_norm": 2.322449149642942, "learning_rate": 4.083643361128929e-06, "loss": 0.8393, "step": 18390 }, { "epoch": 0.2241843686397816, "grad_norm": 2.3296879781248205, "learning_rate": 4.083322642719693e-06, "loss": 0.7943, "step": 18395 }, { "epoch": 0.2242453048639294, "grad_norm": 2.4854786936564603, "learning_rate": 4.083001924310456e-06, "loss": 0.7885, "step": 18400 }, { "epoch": 0.22430624108807723, "grad_norm": 3.1711596146710885, "learning_rate": 4.082681205901219e-06, "loss": 0.8769, "step": 18405 }, { "epoch": 0.22436717731222502, "grad_norm": 2.6462698318481386, "learning_rate": 4.0823604874919826e-06, "loss": 0.796, "step": 18410 }, { "epoch": 0.22442811353637282, "grad_norm": 2.3763161604124057, "learning_rate": 4.082039769082746e-06, "loss": 0.7648, "step": 18415 }, { "epoch": 0.22448904976052064, "grad_norm": 4.064554411574056, "learning_rate": 4.081719050673509e-06, "loss": 0.7851, "step": 18420 }, { "epoch": 0.22454998598466844, "grad_norm": 2.5080223285154895, "learning_rate": 4.0813983322642725e-06, "loss": 0.7726, "step": 18425 }, { "epoch": 0.22461092220881626, "grad_norm": 2.5030523206253155, "learning_rate": 4.0810776138550355e-06, "loss": 0.7395, "step": 18430 }, { "epoch": 0.22467185843296406, "grad_norm": 2.661818161186452, "learning_rate": 4.0807568954457985e-06, "loss": 0.8098, "step": 18435 }, { "epoch": 0.22473279465711188, "grad_norm": 2.3281455668043174, "learning_rate": 4.080436177036562e-06, "loss": 0.7838, "step": 18440 }, { "epoch": 0.22479373088125967, "grad_norm": 2.190727724049778, "learning_rate": 4.080115458627325e-06, "loss": 0.8647, "step": 18445 }, { "epoch": 0.22485466710540747, "grad_norm": 4.827611344964269, "learning_rate": 4.079794740218088e-06, "loss": 0.8434, "step": 18450 }, { "epoch": 0.2249156033295553, "grad_norm": 2.429511295458162, "learning_rate": 4.079474021808852e-06, "loss": 0.7515, "step": 18455 }, { "epoch": 0.2249765395537031, "grad_norm": 2.822774397409981, "learning_rate": 4.079153303399615e-06, "loss": 0.7654, "step": 18460 }, { "epoch": 0.2250374757778509, "grad_norm": 2.343856059854339, "learning_rate": 4.078832584990378e-06, "loss": 0.8198, "step": 18465 }, { "epoch": 0.2250984120019987, "grad_norm": 2.6255309167781906, "learning_rate": 4.078511866581142e-06, "loss": 0.8094, "step": 18470 }, { "epoch": 0.22515934822614653, "grad_norm": 4.499587338617334, "learning_rate": 4.078191148171905e-06, "loss": 0.839, "step": 18475 }, { "epoch": 0.22522028445029432, "grad_norm": 2.634837510427818, "learning_rate": 4.077870429762669e-06, "loss": 0.9574, "step": 18480 }, { "epoch": 0.22528122067444212, "grad_norm": 2.6152678134070246, "learning_rate": 4.077549711353432e-06, "loss": 0.8029, "step": 18485 }, { "epoch": 0.22534215689858994, "grad_norm": 2.5535729553370365, "learning_rate": 4.077228992944195e-06, "loss": 0.86, "step": 18490 }, { "epoch": 0.22540309312273774, "grad_norm": 2.7484590761821566, "learning_rate": 4.076908274534959e-06, "loss": 0.8981, "step": 18495 }, { "epoch": 0.22546402934688556, "grad_norm": 2.2969988432792166, "learning_rate": 4.076587556125722e-06, "loss": 0.7606, "step": 18500 }, { "epoch": 0.22552496557103335, "grad_norm": 2.9709349950065516, "learning_rate": 4.076266837716486e-06, "loss": 0.7947, "step": 18505 }, { "epoch": 0.22558590179518118, "grad_norm": 2.3153753298725355, "learning_rate": 4.075946119307249e-06, "loss": 0.7649, "step": 18510 }, { "epoch": 0.22564683801932897, "grad_norm": 2.4337775418250995, "learning_rate": 4.075625400898012e-06, "loss": 0.7948, "step": 18515 }, { "epoch": 0.22570777424347677, "grad_norm": 2.6075901913618673, "learning_rate": 4.075304682488776e-06, "loss": 0.9068, "step": 18520 }, { "epoch": 0.2257687104676246, "grad_norm": 2.698203304580025, "learning_rate": 4.074983964079539e-06, "loss": 0.8489, "step": 18525 }, { "epoch": 0.22582964669177238, "grad_norm": 2.2925067789000297, "learning_rate": 4.074663245670302e-06, "loss": 0.8324, "step": 18530 }, { "epoch": 0.2258905829159202, "grad_norm": 2.9613355627758677, "learning_rate": 4.074342527261066e-06, "loss": 0.7456, "step": 18535 }, { "epoch": 0.225951519140068, "grad_norm": 2.560549073963876, "learning_rate": 4.074021808851829e-06, "loss": 0.8245, "step": 18540 }, { "epoch": 0.22601245536421583, "grad_norm": 3.0528184063982895, "learning_rate": 4.073701090442592e-06, "loss": 0.7677, "step": 18545 }, { "epoch": 0.22607339158836362, "grad_norm": 2.8709679787968074, "learning_rate": 4.073380372033355e-06, "loss": 0.7808, "step": 18550 }, { "epoch": 0.22613432781251142, "grad_norm": 2.10679471365178, "learning_rate": 4.0730596536241185e-06, "loss": 0.8149, "step": 18555 }, { "epoch": 0.22619526403665924, "grad_norm": 2.230853420861976, "learning_rate": 4.0727389352148815e-06, "loss": 0.778, "step": 18560 }, { "epoch": 0.22625620026080703, "grad_norm": 2.566868420858164, "learning_rate": 4.0724182168056446e-06, "loss": 0.8478, "step": 18565 }, { "epoch": 0.22631713648495486, "grad_norm": 2.5561586960592217, "learning_rate": 4.0720974983964084e-06, "loss": 0.7877, "step": 18570 }, { "epoch": 0.22637807270910265, "grad_norm": 2.7103022026133567, "learning_rate": 4.0717767799871714e-06, "loss": 0.7184, "step": 18575 }, { "epoch": 0.22643900893325045, "grad_norm": 2.439721735222994, "learning_rate": 4.0714560615779345e-06, "loss": 0.8449, "step": 18580 }, { "epoch": 0.22649994515739827, "grad_norm": 2.293489789670148, "learning_rate": 4.071135343168698e-06, "loss": 0.805, "step": 18585 }, { "epoch": 0.22656088138154606, "grad_norm": 2.5469702256874247, "learning_rate": 4.070814624759461e-06, "loss": 0.8588, "step": 18590 }, { "epoch": 0.2266218176056939, "grad_norm": 2.488130601698423, "learning_rate": 4.070493906350224e-06, "loss": 0.6959, "step": 18595 }, { "epoch": 0.22668275382984168, "grad_norm": 2.2338593877382293, "learning_rate": 4.070173187940988e-06, "loss": 0.8106, "step": 18600 }, { "epoch": 0.2267436900539895, "grad_norm": 2.147365981123933, "learning_rate": 4.069852469531751e-06, "loss": 0.7823, "step": 18605 }, { "epoch": 0.2268046262781373, "grad_norm": 1.9858625992989682, "learning_rate": 4.069531751122514e-06, "loss": 0.805, "step": 18610 }, { "epoch": 0.2268655625022851, "grad_norm": 2.6431714751288973, "learning_rate": 4.069211032713278e-06, "loss": 0.805, "step": 18615 }, { "epoch": 0.22692649872643292, "grad_norm": 2.2135485092487843, "learning_rate": 4.068890314304041e-06, "loss": 0.8576, "step": 18620 }, { "epoch": 0.2269874349505807, "grad_norm": 2.6004220969673715, "learning_rate": 4.068569595894805e-06, "loss": 0.8586, "step": 18625 }, { "epoch": 0.22704837117472854, "grad_norm": 4.453161219411458, "learning_rate": 4.068248877485568e-06, "loss": 0.8388, "step": 18630 }, { "epoch": 0.22710930739887633, "grad_norm": 2.358259601556452, "learning_rate": 4.067928159076331e-06, "loss": 0.696, "step": 18635 }, { "epoch": 0.22717024362302415, "grad_norm": 2.082329215975053, "learning_rate": 4.067607440667095e-06, "loss": 0.722, "step": 18640 }, { "epoch": 0.22723117984717195, "grad_norm": 2.3929257245638453, "learning_rate": 4.067286722257858e-06, "loss": 0.7894, "step": 18645 }, { "epoch": 0.22729211607131974, "grad_norm": 2.665449064551019, "learning_rate": 4.066966003848622e-06, "loss": 0.7299, "step": 18650 }, { "epoch": 0.22735305229546757, "grad_norm": 2.6492423591634595, "learning_rate": 4.066645285439385e-06, "loss": 0.7962, "step": 18655 }, { "epoch": 0.22741398851961536, "grad_norm": 2.498255723573009, "learning_rate": 4.066324567030148e-06, "loss": 0.7657, "step": 18660 }, { "epoch": 0.22747492474376318, "grad_norm": 2.9226283274197717, "learning_rate": 4.066003848620912e-06, "loss": 0.7719, "step": 18665 }, { "epoch": 0.22753586096791098, "grad_norm": 2.0136201674416867, "learning_rate": 4.065683130211675e-06, "loss": 0.8222, "step": 18670 }, { "epoch": 0.2275967971920588, "grad_norm": 2.380464145632817, "learning_rate": 4.065362411802438e-06, "loss": 0.857, "step": 18675 }, { "epoch": 0.2276577334162066, "grad_norm": 2.8457340295156386, "learning_rate": 4.0650416933932016e-06, "loss": 0.7822, "step": 18680 }, { "epoch": 0.2277186696403544, "grad_norm": 2.498735707257184, "learning_rate": 4.064720974983965e-06, "loss": 0.8273, "step": 18685 }, { "epoch": 0.22777960586450222, "grad_norm": 3.1688051068935237, "learning_rate": 4.064400256574728e-06, "loss": 0.7842, "step": 18690 }, { "epoch": 0.22784054208865, "grad_norm": 2.9130915932303147, "learning_rate": 4.0640795381654915e-06, "loss": 0.793, "step": 18695 }, { "epoch": 0.22790147831279783, "grad_norm": 3.2617216765651924, "learning_rate": 4.0637588197562545e-06, "loss": 0.8072, "step": 18700 }, { "epoch": 0.22796241453694563, "grad_norm": 2.5336428922416907, "learning_rate": 4.0634381013470175e-06, "loss": 0.8134, "step": 18705 }, { "epoch": 0.22802335076109345, "grad_norm": 2.360595120141422, "learning_rate": 4.0631173829377805e-06, "loss": 0.8499, "step": 18710 }, { "epoch": 0.22808428698524125, "grad_norm": 2.6069531383099767, "learning_rate": 4.062796664528544e-06, "loss": 0.8287, "step": 18715 }, { "epoch": 0.22814522320938904, "grad_norm": 3.911270912250534, "learning_rate": 4.062475946119307e-06, "loss": 0.7829, "step": 18720 }, { "epoch": 0.22820615943353686, "grad_norm": 2.4375594298673247, "learning_rate": 4.06215522771007e-06, "loss": 0.8119, "step": 18725 }, { "epoch": 0.22826709565768466, "grad_norm": 3.024527021552274, "learning_rate": 4.061834509300834e-06, "loss": 0.829, "step": 18730 }, { "epoch": 0.22832803188183248, "grad_norm": 2.9373737996879785, "learning_rate": 4.061513790891597e-06, "loss": 0.8774, "step": 18735 }, { "epoch": 0.22838896810598028, "grad_norm": 2.6047403169466787, "learning_rate": 4.06119307248236e-06, "loss": 0.8243, "step": 18740 }, { "epoch": 0.2284499043301281, "grad_norm": 2.6404711409263744, "learning_rate": 4.060872354073124e-06, "loss": 0.8131, "step": 18745 }, { "epoch": 0.2285108405542759, "grad_norm": 2.9621693961528304, "learning_rate": 4.060551635663887e-06, "loss": 0.8024, "step": 18750 }, { "epoch": 0.2285717767784237, "grad_norm": 2.047651274311135, "learning_rate": 4.06023091725465e-06, "loss": 0.8085, "step": 18755 }, { "epoch": 0.2286327130025715, "grad_norm": 4.036804918489285, "learning_rate": 4.059910198845414e-06, "loss": 0.7778, "step": 18760 }, { "epoch": 0.2286936492267193, "grad_norm": 2.6459466630000352, "learning_rate": 4.059589480436177e-06, "loss": 0.8439, "step": 18765 }, { "epoch": 0.22875458545086713, "grad_norm": 2.289686731084695, "learning_rate": 4.059268762026941e-06, "loss": 0.8682, "step": 18770 }, { "epoch": 0.22881552167501493, "grad_norm": 2.690393059972656, "learning_rate": 4.058948043617704e-06, "loss": 0.8595, "step": 18775 }, { "epoch": 0.22887645789916275, "grad_norm": 2.4601046985487356, "learning_rate": 4.058627325208467e-06, "loss": 0.7579, "step": 18780 }, { "epoch": 0.22893739412331054, "grad_norm": 2.8719285965721917, "learning_rate": 4.058306606799231e-06, "loss": 0.8542, "step": 18785 }, { "epoch": 0.22899833034745834, "grad_norm": 2.9150699130763114, "learning_rate": 4.057985888389994e-06, "loss": 0.8108, "step": 18790 }, { "epoch": 0.22905926657160616, "grad_norm": 2.3267720266318035, "learning_rate": 4.057665169980758e-06, "loss": 0.7491, "step": 18795 }, { "epoch": 0.22912020279575396, "grad_norm": 2.1970818063532533, "learning_rate": 4.057344451571521e-06, "loss": 0.7885, "step": 18800 }, { "epoch": 0.22918113901990178, "grad_norm": 2.3543023625745585, "learning_rate": 4.057023733162284e-06, "loss": 0.8279, "step": 18805 }, { "epoch": 0.22924207524404958, "grad_norm": 2.3224328324988606, "learning_rate": 4.056703014753048e-06, "loss": 0.7743, "step": 18810 }, { "epoch": 0.22930301146819737, "grad_norm": 2.2671983237381284, "learning_rate": 4.056382296343811e-06, "loss": 0.7539, "step": 18815 }, { "epoch": 0.2293639476923452, "grad_norm": 2.3647139037633296, "learning_rate": 4.056061577934574e-06, "loss": 0.815, "step": 18820 }, { "epoch": 0.229424883916493, "grad_norm": 2.0772061522473977, "learning_rate": 4.0557408595253375e-06, "loss": 0.7357, "step": 18825 }, { "epoch": 0.2294858201406408, "grad_norm": 2.2649520472695914, "learning_rate": 4.0554201411161005e-06, "loss": 0.831, "step": 18830 }, { "epoch": 0.2295467563647886, "grad_norm": 2.4941708214469904, "learning_rate": 4.0550994227068636e-06, "loss": 0.8198, "step": 18835 }, { "epoch": 0.22960769258893643, "grad_norm": 2.6934106629033767, "learning_rate": 4.0547787042976274e-06, "loss": 0.8244, "step": 18840 }, { "epoch": 0.22966862881308422, "grad_norm": 2.774782774525028, "learning_rate": 4.0544579858883904e-06, "loss": 0.8224, "step": 18845 }, { "epoch": 0.22972956503723202, "grad_norm": 2.5123959767756436, "learning_rate": 4.0541372674791535e-06, "loss": 0.8747, "step": 18850 }, { "epoch": 0.22979050126137984, "grad_norm": 2.441393699251173, "learning_rate": 4.053816549069917e-06, "loss": 0.8073, "step": 18855 }, { "epoch": 0.22985143748552764, "grad_norm": 3.1871491282172513, "learning_rate": 4.05349583066068e-06, "loss": 0.8015, "step": 18860 }, { "epoch": 0.22991237370967546, "grad_norm": 2.2773931112022874, "learning_rate": 4.053175112251443e-06, "loss": 0.8583, "step": 18865 }, { "epoch": 0.22997330993382326, "grad_norm": 2.510548458734944, "learning_rate": 4.052854393842207e-06, "loss": 0.8479, "step": 18870 }, { "epoch": 0.23003424615797108, "grad_norm": 2.626463091960324, "learning_rate": 4.05253367543297e-06, "loss": 0.7371, "step": 18875 }, { "epoch": 0.23009518238211887, "grad_norm": 2.3310921211159115, "learning_rate": 4.052212957023733e-06, "loss": 0.847, "step": 18880 }, { "epoch": 0.23015611860626667, "grad_norm": 2.5574896664510876, "learning_rate": 4.051892238614496e-06, "loss": 0.8244, "step": 18885 }, { "epoch": 0.2302170548304145, "grad_norm": 2.6201989746851595, "learning_rate": 4.05157152020526e-06, "loss": 0.7678, "step": 18890 }, { "epoch": 0.2302779910545623, "grad_norm": 2.8802854251896273, "learning_rate": 4.051250801796023e-06, "loss": 0.8392, "step": 18895 }, { "epoch": 0.2303389272787101, "grad_norm": 2.5271659730042555, "learning_rate": 4.050930083386786e-06, "loss": 0.7905, "step": 18900 }, { "epoch": 0.2303998635028579, "grad_norm": 2.297628274192448, "learning_rate": 4.05060936497755e-06, "loss": 0.713, "step": 18905 }, { "epoch": 0.23046079972700573, "grad_norm": 2.2157284024497907, "learning_rate": 4.050288646568313e-06, "loss": 0.798, "step": 18910 }, { "epoch": 0.23052173595115352, "grad_norm": 2.267240216996835, "learning_rate": 4.049967928159076e-06, "loss": 0.8511, "step": 18915 }, { "epoch": 0.23058267217530132, "grad_norm": 3.4577636953927606, "learning_rate": 4.04964720974984e-06, "loss": 0.8625, "step": 18920 }, { "epoch": 0.23064360839944914, "grad_norm": 3.538528109712986, "learning_rate": 4.049326491340603e-06, "loss": 0.7987, "step": 18925 }, { "epoch": 0.23070454462359694, "grad_norm": 2.700533329142815, "learning_rate": 4.049005772931367e-06, "loss": 0.8281, "step": 18930 }, { "epoch": 0.23076548084774476, "grad_norm": 2.4355596161150905, "learning_rate": 4.04868505452213e-06, "loss": 0.8528, "step": 18935 }, { "epoch": 0.23082641707189255, "grad_norm": 3.2950754834657294, "learning_rate": 4.048364336112893e-06, "loss": 0.9255, "step": 18940 }, { "epoch": 0.23088735329604038, "grad_norm": 2.8459656469709786, "learning_rate": 4.048043617703657e-06, "loss": 0.7419, "step": 18945 }, { "epoch": 0.23094828952018817, "grad_norm": 2.698187057173033, "learning_rate": 4.04772289929442e-06, "loss": 0.7904, "step": 18950 }, { "epoch": 0.23100922574433597, "grad_norm": 2.601668944105857, "learning_rate": 4.047402180885184e-06, "loss": 0.839, "step": 18955 }, { "epoch": 0.2310701619684838, "grad_norm": 2.8742519817796057, "learning_rate": 4.047081462475947e-06, "loss": 0.8412, "step": 18960 }, { "epoch": 0.23113109819263158, "grad_norm": 2.6251688471927164, "learning_rate": 4.04676074406671e-06, "loss": 0.7843, "step": 18965 }, { "epoch": 0.2311920344167794, "grad_norm": 2.504258698956891, "learning_rate": 4.0464400256574735e-06, "loss": 0.733, "step": 18970 }, { "epoch": 0.2312529706409272, "grad_norm": 3.168117119905577, "learning_rate": 4.0461193072482365e-06, "loss": 0.8594, "step": 18975 }, { "epoch": 0.23131390686507503, "grad_norm": 2.4767043438762397, "learning_rate": 4.0457985888389995e-06, "loss": 0.7968, "step": 18980 }, { "epoch": 0.23137484308922282, "grad_norm": 3.0541476439830437, "learning_rate": 4.045477870429763e-06, "loss": 0.7535, "step": 18985 }, { "epoch": 0.23143577931337062, "grad_norm": 2.624575563199193, "learning_rate": 4.045157152020526e-06, "loss": 0.76, "step": 18990 }, { "epoch": 0.23149671553751844, "grad_norm": 2.398179987438419, "learning_rate": 4.044836433611289e-06, "loss": 0.8417, "step": 18995 }, { "epoch": 0.23155765176166623, "grad_norm": 2.7211376192236343, "learning_rate": 4.044515715202053e-06, "loss": 0.7436, "step": 19000 }, { "epoch": 0.23161858798581406, "grad_norm": 3.1899801761597244, "learning_rate": 4.044194996792816e-06, "loss": 0.7876, "step": 19005 }, { "epoch": 0.23167952420996185, "grad_norm": 2.4625333363173825, "learning_rate": 4.043874278383579e-06, "loss": 0.7584, "step": 19010 }, { "epoch": 0.23174046043410967, "grad_norm": 4.267604526463293, "learning_rate": 4.043553559974343e-06, "loss": 0.7802, "step": 19015 }, { "epoch": 0.23180139665825747, "grad_norm": 2.1828476392881675, "learning_rate": 4.043232841565106e-06, "loss": 0.7191, "step": 19020 }, { "epoch": 0.23186233288240526, "grad_norm": 2.5510718907021532, "learning_rate": 4.042912123155869e-06, "loss": 0.8385, "step": 19025 }, { "epoch": 0.2319232691065531, "grad_norm": 2.5111880804403968, "learning_rate": 4.042591404746633e-06, "loss": 0.8191, "step": 19030 }, { "epoch": 0.23198420533070088, "grad_norm": 2.793164042737185, "learning_rate": 4.042270686337396e-06, "loss": 0.8256, "step": 19035 }, { "epoch": 0.2320451415548487, "grad_norm": 2.770152486142192, "learning_rate": 4.041949967928159e-06, "loss": 0.8356, "step": 19040 }, { "epoch": 0.2321060777789965, "grad_norm": 2.87868076425482, "learning_rate": 4.041629249518923e-06, "loss": 0.9018, "step": 19045 }, { "epoch": 0.23216701400314432, "grad_norm": 3.0014173910645603, "learning_rate": 4.041308531109686e-06, "loss": 0.8629, "step": 19050 }, { "epoch": 0.23222795022729212, "grad_norm": 2.7850251408351308, "learning_rate": 4.040987812700449e-06, "loss": 0.8638, "step": 19055 }, { "epoch": 0.2322888864514399, "grad_norm": 2.761543021883016, "learning_rate": 4.040667094291212e-06, "loss": 0.9478, "step": 19060 }, { "epoch": 0.23234982267558774, "grad_norm": 2.5032552866558886, "learning_rate": 4.040346375881976e-06, "loss": 0.8011, "step": 19065 }, { "epoch": 0.23241075889973553, "grad_norm": 2.5688830537840355, "learning_rate": 4.040025657472739e-06, "loss": 0.8131, "step": 19070 }, { "epoch": 0.23247169512388335, "grad_norm": 2.9738607490681663, "learning_rate": 4.039704939063503e-06, "loss": 0.7609, "step": 19075 }, { "epoch": 0.23253263134803115, "grad_norm": 2.6360319309586044, "learning_rate": 4.039384220654266e-06, "loss": 0.8098, "step": 19080 }, { "epoch": 0.23259356757217894, "grad_norm": 2.8587619385732284, "learning_rate": 4.039063502245029e-06, "loss": 0.7967, "step": 19085 }, { "epoch": 0.23265450379632677, "grad_norm": 2.5385166777999553, "learning_rate": 4.038742783835793e-06, "loss": 0.742, "step": 19090 }, { "epoch": 0.23271544002047456, "grad_norm": 2.567368691136843, "learning_rate": 4.038422065426556e-06, "loss": 0.8564, "step": 19095 }, { "epoch": 0.23277637624462238, "grad_norm": 2.2193992344721223, "learning_rate": 4.0381013470173195e-06, "loss": 0.8824, "step": 19100 }, { "epoch": 0.23283731246877018, "grad_norm": 2.526920116115757, "learning_rate": 4.0377806286080826e-06, "loss": 0.816, "step": 19105 }, { "epoch": 0.232898248692918, "grad_norm": 2.4019695707682875, "learning_rate": 4.037459910198846e-06, "loss": 0.7452, "step": 19110 }, { "epoch": 0.2329591849170658, "grad_norm": 2.466447323636906, "learning_rate": 4.0371391917896094e-06, "loss": 0.7338, "step": 19115 }, { "epoch": 0.2330201211412136, "grad_norm": 2.224558379001198, "learning_rate": 4.0368184733803725e-06, "loss": 0.8495, "step": 19120 }, { "epoch": 0.23308105736536142, "grad_norm": 2.948307982228322, "learning_rate": 4.036497754971136e-06, "loss": 0.853, "step": 19125 }, { "epoch": 0.2331419935895092, "grad_norm": 2.360819538932814, "learning_rate": 4.036177036561899e-06, "loss": 0.7118, "step": 19130 }, { "epoch": 0.23320292981365703, "grad_norm": 2.174079459054712, "learning_rate": 4.035856318152662e-06, "loss": 0.808, "step": 19135 }, { "epoch": 0.23326386603780483, "grad_norm": 2.37133124076865, "learning_rate": 4.035535599743425e-06, "loss": 0.7631, "step": 19140 }, { "epoch": 0.23332480226195265, "grad_norm": 2.542025724112476, "learning_rate": 4.035214881334189e-06, "loss": 0.8151, "step": 19145 }, { "epoch": 0.23338573848610045, "grad_norm": 2.2024573102962575, "learning_rate": 4.034894162924952e-06, "loss": 0.8357, "step": 19150 }, { "epoch": 0.23344667471024824, "grad_norm": 2.515306373814416, "learning_rate": 4.034573444515715e-06, "loss": 0.7997, "step": 19155 }, { "epoch": 0.23350761093439606, "grad_norm": 2.6064414671075213, "learning_rate": 4.034252726106479e-06, "loss": 0.8064, "step": 19160 }, { "epoch": 0.23356854715854386, "grad_norm": 2.4970563183932666, "learning_rate": 4.033932007697242e-06, "loss": 0.736, "step": 19165 }, { "epoch": 0.23362948338269168, "grad_norm": 3.5492815038690564, "learning_rate": 4.033611289288005e-06, "loss": 0.8904, "step": 19170 }, { "epoch": 0.23369041960683948, "grad_norm": 2.7185404749590814, "learning_rate": 4.033290570878769e-06, "loss": 0.8472, "step": 19175 }, { "epoch": 0.2337513558309873, "grad_norm": 2.3454572786658088, "learning_rate": 4.032969852469532e-06, "loss": 0.8506, "step": 19180 }, { "epoch": 0.2338122920551351, "grad_norm": 2.8576191524004537, "learning_rate": 4.032649134060295e-06, "loss": 0.7839, "step": 19185 }, { "epoch": 0.2338732282792829, "grad_norm": 2.950957872826438, "learning_rate": 4.032328415651059e-06, "loss": 0.7495, "step": 19190 }, { "epoch": 0.2339341645034307, "grad_norm": 2.290494949873018, "learning_rate": 4.032007697241822e-06, "loss": 0.8028, "step": 19195 }, { "epoch": 0.2339951007275785, "grad_norm": 2.6772145991979026, "learning_rate": 4.031686978832585e-06, "loss": 0.8205, "step": 19200 }, { "epoch": 0.23405603695172633, "grad_norm": 2.5588925056160763, "learning_rate": 4.031366260423349e-06, "loss": 0.804, "step": 19205 }, { "epoch": 0.23411697317587413, "grad_norm": 2.3992282763460935, "learning_rate": 4.031045542014112e-06, "loss": 0.8057, "step": 19210 }, { "epoch": 0.23417790940002195, "grad_norm": 2.59634368247731, "learning_rate": 4.030724823604875e-06, "loss": 0.78, "step": 19215 }, { "epoch": 0.23423884562416974, "grad_norm": 2.7478147304957354, "learning_rate": 4.030404105195638e-06, "loss": 0.8586, "step": 19220 }, { "epoch": 0.23429978184831754, "grad_norm": 2.6298216131289203, "learning_rate": 4.030083386786402e-06, "loss": 0.7806, "step": 19225 }, { "epoch": 0.23436071807246536, "grad_norm": 2.1823067712566124, "learning_rate": 4.029762668377165e-06, "loss": 0.7857, "step": 19230 }, { "epoch": 0.23442165429661316, "grad_norm": 1.9525046874862049, "learning_rate": 4.029441949967929e-06, "loss": 0.7512, "step": 19235 }, { "epoch": 0.23448259052076098, "grad_norm": 2.576092187139755, "learning_rate": 4.029121231558692e-06, "loss": 0.8408, "step": 19240 }, { "epoch": 0.23454352674490878, "grad_norm": 2.007966142188226, "learning_rate": 4.0288005131494555e-06, "loss": 0.7925, "step": 19245 }, { "epoch": 0.2346044629690566, "grad_norm": 3.2531778169689827, "learning_rate": 4.0284797947402185e-06, "loss": 0.8337, "step": 19250 }, { "epoch": 0.2346653991932044, "grad_norm": 2.377154501401322, "learning_rate": 4.0281590763309815e-06, "loss": 0.7374, "step": 19255 }, { "epoch": 0.2347263354173522, "grad_norm": 2.1712783756846967, "learning_rate": 4.027838357921745e-06, "loss": 0.7738, "step": 19260 }, { "epoch": 0.2347872716415, "grad_norm": 3.9379851381255784, "learning_rate": 4.027517639512508e-06, "loss": 0.8309, "step": 19265 }, { "epoch": 0.2348482078656478, "grad_norm": 3.5166441115409075, "learning_rate": 4.027196921103272e-06, "loss": 0.8091, "step": 19270 }, { "epoch": 0.23490914408979563, "grad_norm": 3.599209123715691, "learning_rate": 4.026876202694035e-06, "loss": 0.8149, "step": 19275 }, { "epoch": 0.23497008031394342, "grad_norm": 2.43748702540158, "learning_rate": 4.026555484284798e-06, "loss": 0.7734, "step": 19280 }, { "epoch": 0.23503101653809125, "grad_norm": 3.2517645622956337, "learning_rate": 4.026234765875562e-06, "loss": 0.8253, "step": 19285 }, { "epoch": 0.23509195276223904, "grad_norm": 3.777551266007837, "learning_rate": 4.025914047466325e-06, "loss": 0.7397, "step": 19290 }, { "epoch": 0.23515288898638684, "grad_norm": 2.3719898507110546, "learning_rate": 4.025593329057088e-06, "loss": 0.8805, "step": 19295 }, { "epoch": 0.23521382521053466, "grad_norm": 3.7301586946570016, "learning_rate": 4.025272610647851e-06, "loss": 0.7975, "step": 19300 }, { "epoch": 0.23527476143468246, "grad_norm": 2.2875041752828182, "learning_rate": 4.024951892238615e-06, "loss": 0.7181, "step": 19305 }, { "epoch": 0.23533569765883028, "grad_norm": 2.0769065840043845, "learning_rate": 4.024631173829378e-06, "loss": 0.7553, "step": 19310 }, { "epoch": 0.23539663388297807, "grad_norm": 2.6088137167115124, "learning_rate": 4.024310455420141e-06, "loss": 0.8365, "step": 19315 }, { "epoch": 0.23545757010712587, "grad_norm": 3.0028282100550547, "learning_rate": 4.023989737010905e-06, "loss": 0.8178, "step": 19320 }, { "epoch": 0.2355185063312737, "grad_norm": 2.6748203365754417, "learning_rate": 4.023669018601668e-06, "loss": 0.8119, "step": 19325 }, { "epoch": 0.2355794425554215, "grad_norm": 2.462948751386872, "learning_rate": 4.023348300192431e-06, "loss": 0.7652, "step": 19330 }, { "epoch": 0.2356403787795693, "grad_norm": 2.514654894042605, "learning_rate": 4.023027581783195e-06, "loss": 0.7837, "step": 19335 }, { "epoch": 0.2357013150037171, "grad_norm": 2.2994594779929542, "learning_rate": 4.022706863373958e-06, "loss": 0.8231, "step": 19340 }, { "epoch": 0.23576225122786493, "grad_norm": 2.016803589734924, "learning_rate": 4.022386144964721e-06, "loss": 0.81, "step": 19345 }, { "epoch": 0.23582318745201272, "grad_norm": 2.490298742137112, "learning_rate": 4.022065426555485e-06, "loss": 0.8236, "step": 19350 }, { "epoch": 0.23588412367616052, "grad_norm": 2.7708484661765755, "learning_rate": 4.021744708146248e-06, "loss": 0.8384, "step": 19355 }, { "epoch": 0.23594505990030834, "grad_norm": 2.3524599937347803, "learning_rate": 4.021423989737011e-06, "loss": 0.7828, "step": 19360 }, { "epoch": 0.23600599612445614, "grad_norm": 2.684981722372244, "learning_rate": 4.021103271327775e-06, "loss": 0.811, "step": 19365 }, { "epoch": 0.23606693234860396, "grad_norm": 2.897321275510482, "learning_rate": 4.020782552918538e-06, "loss": 0.8509, "step": 19370 }, { "epoch": 0.23612786857275175, "grad_norm": 2.2624271296558716, "learning_rate": 4.020461834509301e-06, "loss": 0.8602, "step": 19375 }, { "epoch": 0.23618880479689958, "grad_norm": 2.8940877105346794, "learning_rate": 4.020141116100065e-06, "loss": 0.8041, "step": 19380 }, { "epoch": 0.23624974102104737, "grad_norm": 2.42749624227191, "learning_rate": 4.019820397690828e-06, "loss": 0.837, "step": 19385 }, { "epoch": 0.23631067724519517, "grad_norm": 2.0125376863258757, "learning_rate": 4.019499679281591e-06, "loss": 0.8074, "step": 19390 }, { "epoch": 0.236371613469343, "grad_norm": 3.010130687627199, "learning_rate": 4.0191789608723545e-06, "loss": 0.7153, "step": 19395 }, { "epoch": 0.23643254969349078, "grad_norm": 2.474734033639747, "learning_rate": 4.0188582424631175e-06, "loss": 0.8435, "step": 19400 }, { "epoch": 0.2364934859176386, "grad_norm": 2.394722239911721, "learning_rate": 4.018537524053881e-06, "loss": 0.7841, "step": 19405 }, { "epoch": 0.2365544221417864, "grad_norm": 2.5337060006896372, "learning_rate": 4.018216805644644e-06, "loss": 0.7587, "step": 19410 }, { "epoch": 0.23661535836593423, "grad_norm": 2.2038207729447143, "learning_rate": 4.017896087235407e-06, "loss": 0.8362, "step": 19415 }, { "epoch": 0.23667629459008202, "grad_norm": 2.757941332255622, "learning_rate": 4.017575368826171e-06, "loss": 0.7758, "step": 19420 }, { "epoch": 0.23673723081422982, "grad_norm": 2.2830499830794557, "learning_rate": 4.017254650416934e-06, "loss": 0.8058, "step": 19425 }, { "epoch": 0.23679816703837764, "grad_norm": 2.529616899456153, "learning_rate": 4.016933932007698e-06, "loss": 0.7926, "step": 19430 }, { "epoch": 0.23685910326252543, "grad_norm": 3.4206358743705976, "learning_rate": 4.016613213598461e-06, "loss": 0.8947, "step": 19435 }, { "epoch": 0.23692003948667326, "grad_norm": 2.666816500845431, "learning_rate": 4.016292495189224e-06, "loss": 0.7995, "step": 19440 }, { "epoch": 0.23698097571082105, "grad_norm": 2.4205863373356813, "learning_rate": 4.015971776779988e-06, "loss": 0.8241, "step": 19445 }, { "epoch": 0.23704191193496887, "grad_norm": 2.502459122835189, "learning_rate": 4.015651058370751e-06, "loss": 0.82, "step": 19450 }, { "epoch": 0.23710284815911667, "grad_norm": 2.194934438729732, "learning_rate": 4.015330339961514e-06, "loss": 0.7944, "step": 19455 }, { "epoch": 0.23716378438326446, "grad_norm": 2.4698875908086846, "learning_rate": 4.015009621552278e-06, "loss": 0.8617, "step": 19460 }, { "epoch": 0.2372247206074123, "grad_norm": 2.602844295062105, "learning_rate": 4.014688903143041e-06, "loss": 0.8292, "step": 19465 }, { "epoch": 0.23728565683156008, "grad_norm": 2.131322004225199, "learning_rate": 4.014368184733804e-06, "loss": 0.7897, "step": 19470 }, { "epoch": 0.2373465930557079, "grad_norm": 2.496700428804466, "learning_rate": 4.014047466324567e-06, "loss": 0.8381, "step": 19475 }, { "epoch": 0.2374075292798557, "grad_norm": 2.2949797605617257, "learning_rate": 4.013726747915331e-06, "loss": 0.7377, "step": 19480 }, { "epoch": 0.23746846550400352, "grad_norm": 2.0793479628288836, "learning_rate": 4.013406029506094e-06, "loss": 0.8297, "step": 19485 }, { "epoch": 0.23752940172815132, "grad_norm": 2.7807851440782043, "learning_rate": 4.013085311096857e-06, "loss": 0.7958, "step": 19490 }, { "epoch": 0.2375903379522991, "grad_norm": 2.6372575161432197, "learning_rate": 4.012764592687621e-06, "loss": 0.8008, "step": 19495 }, { "epoch": 0.23765127417644694, "grad_norm": 2.187546390104816, "learning_rate": 4.012443874278384e-06, "loss": 0.7733, "step": 19500 }, { "epoch": 0.23771221040059473, "grad_norm": 2.225845471956329, "learning_rate": 4.012123155869147e-06, "loss": 0.8368, "step": 19505 }, { "epoch": 0.23777314662474255, "grad_norm": 2.7763757171059753, "learning_rate": 4.011802437459911e-06, "loss": 0.765, "step": 19510 }, { "epoch": 0.23783408284889035, "grad_norm": 2.2645814097850536, "learning_rate": 4.011481719050674e-06, "loss": 0.7765, "step": 19515 }, { "epoch": 0.23789501907303817, "grad_norm": 2.2351238227091588, "learning_rate": 4.011161000641437e-06, "loss": 0.7985, "step": 19520 }, { "epoch": 0.23795595529718597, "grad_norm": 2.562852503537302, "learning_rate": 4.0108402822322005e-06, "loss": 0.7992, "step": 19525 }, { "epoch": 0.23801689152133376, "grad_norm": 2.7739804619670174, "learning_rate": 4.0105195638229636e-06, "loss": 0.8036, "step": 19530 }, { "epoch": 0.23807782774548158, "grad_norm": 2.9717822694256504, "learning_rate": 4.0101988454137266e-06, "loss": 0.8538, "step": 19535 }, { "epoch": 0.23813876396962938, "grad_norm": 2.03566234315105, "learning_rate": 4.0098781270044904e-06, "loss": 0.7994, "step": 19540 }, { "epoch": 0.2381997001937772, "grad_norm": 2.742658999247678, "learning_rate": 4.0095574085952534e-06, "loss": 0.7791, "step": 19545 }, { "epoch": 0.238260636417925, "grad_norm": 2.3200500548140055, "learning_rate": 4.009236690186017e-06, "loss": 0.8565, "step": 19550 }, { "epoch": 0.2383215726420728, "grad_norm": 2.3432354703209133, "learning_rate": 4.00891597177678e-06, "loss": 0.83, "step": 19555 }, { "epoch": 0.23838250886622062, "grad_norm": 2.3574547751551433, "learning_rate": 4.008595253367543e-06, "loss": 0.8465, "step": 19560 }, { "epoch": 0.2384434450903684, "grad_norm": 2.2898647324789625, "learning_rate": 4.008274534958307e-06, "loss": 0.8871, "step": 19565 }, { "epoch": 0.23850438131451623, "grad_norm": 2.4119109287902276, "learning_rate": 4.00795381654907e-06, "loss": 0.7957, "step": 19570 }, { "epoch": 0.23856531753866403, "grad_norm": 2.332151355270292, "learning_rate": 4.007633098139834e-06, "loss": 0.8059, "step": 19575 }, { "epoch": 0.23862625376281185, "grad_norm": 2.169198664651881, "learning_rate": 4.007312379730597e-06, "loss": 0.8529, "step": 19580 }, { "epoch": 0.23868718998695965, "grad_norm": 2.4823302417658364, "learning_rate": 4.00699166132136e-06, "loss": 0.8173, "step": 19585 }, { "epoch": 0.23874812621110744, "grad_norm": 2.1646172476225187, "learning_rate": 4.006670942912124e-06, "loss": 0.759, "step": 19590 }, { "epoch": 0.23880906243525526, "grad_norm": 2.5695487696708907, "learning_rate": 4.006350224502887e-06, "loss": 0.7902, "step": 19595 }, { "epoch": 0.23886999865940306, "grad_norm": 2.6686541922552283, "learning_rate": 4.00602950609365e-06, "loss": 0.8468, "step": 19600 }, { "epoch": 0.23893093488355088, "grad_norm": 2.480263268031272, "learning_rate": 4.005708787684414e-06, "loss": 0.8102, "step": 19605 }, { "epoch": 0.23899187110769868, "grad_norm": 2.5276020982421996, "learning_rate": 4.005388069275177e-06, "loss": 0.7622, "step": 19610 }, { "epoch": 0.2390528073318465, "grad_norm": 2.1974608443838646, "learning_rate": 4.00506735086594e-06, "loss": 0.7973, "step": 19615 }, { "epoch": 0.2391137435559943, "grad_norm": 2.430057818798686, "learning_rate": 4.004746632456704e-06, "loss": 0.7409, "step": 19620 }, { "epoch": 0.2391746797801421, "grad_norm": 2.5341347648972836, "learning_rate": 4.004425914047467e-06, "loss": 0.8427, "step": 19625 }, { "epoch": 0.2392356160042899, "grad_norm": 3.0254382679934255, "learning_rate": 4.00410519563823e-06, "loss": 0.8205, "step": 19630 }, { "epoch": 0.2392965522284377, "grad_norm": 2.4589432867562997, "learning_rate": 4.003784477228994e-06, "loss": 0.7411, "step": 19635 }, { "epoch": 0.23935748845258553, "grad_norm": 3.045783402638936, "learning_rate": 4.003463758819757e-06, "loss": 0.8091, "step": 19640 }, { "epoch": 0.23941842467673333, "grad_norm": 2.8355397020351183, "learning_rate": 4.00314304041052e-06, "loss": 0.8179, "step": 19645 }, { "epoch": 0.23947936090088115, "grad_norm": 3.035435918250518, "learning_rate": 4.002822322001283e-06, "loss": 0.7596, "step": 19650 }, { "epoch": 0.23954029712502894, "grad_norm": 2.5602439894595492, "learning_rate": 4.002501603592047e-06, "loss": 0.8375, "step": 19655 }, { "epoch": 0.23960123334917674, "grad_norm": 2.2771344112552305, "learning_rate": 4.00218088518281e-06, "loss": 0.8614, "step": 19660 }, { "epoch": 0.23966216957332456, "grad_norm": 4.794154159741414, "learning_rate": 4.001860166773573e-06, "loss": 0.859, "step": 19665 }, { "epoch": 0.23972310579747236, "grad_norm": 2.7924661719551254, "learning_rate": 4.0015394483643365e-06, "loss": 0.7924, "step": 19670 }, { "epoch": 0.23978404202162018, "grad_norm": 2.3631126564830245, "learning_rate": 4.0012187299550995e-06, "loss": 0.8144, "step": 19675 }, { "epoch": 0.23984497824576798, "grad_norm": 2.462346506750877, "learning_rate": 4.0008980115458625e-06, "loss": 0.7854, "step": 19680 }, { "epoch": 0.2399059144699158, "grad_norm": 2.7679679622433997, "learning_rate": 4.000577293136626e-06, "loss": 0.8161, "step": 19685 }, { "epoch": 0.2399668506940636, "grad_norm": 2.7109040068034562, "learning_rate": 4.000256574727389e-06, "loss": 0.8342, "step": 19690 }, { "epoch": 0.2400277869182114, "grad_norm": 2.716138482459134, "learning_rate": 3.999935856318152e-06, "loss": 0.7492, "step": 19695 }, { "epoch": 0.2400887231423592, "grad_norm": 2.7277358596442682, "learning_rate": 3.999615137908916e-06, "loss": 0.7596, "step": 19700 }, { "epoch": 0.240149659366507, "grad_norm": 3.4666686080322076, "learning_rate": 3.999294419499679e-06, "loss": 0.8661, "step": 19705 }, { "epoch": 0.24021059559065483, "grad_norm": 2.1491552753185714, "learning_rate": 3.998973701090443e-06, "loss": 0.7563, "step": 19710 }, { "epoch": 0.24027153181480262, "grad_norm": 2.4947965753803447, "learning_rate": 3.998652982681206e-06, "loss": 0.8845, "step": 19715 }, { "epoch": 0.24033246803895045, "grad_norm": 2.007552651729979, "learning_rate": 3.99833226427197e-06, "loss": 0.769, "step": 19720 }, { "epoch": 0.24039340426309824, "grad_norm": 2.5759013579709333, "learning_rate": 3.998011545862733e-06, "loss": 0.7945, "step": 19725 }, { "epoch": 0.24045434048724604, "grad_norm": 2.359858960714545, "learning_rate": 3.997690827453496e-06, "loss": 0.7888, "step": 19730 }, { "epoch": 0.24051527671139386, "grad_norm": 3.0384825017464605, "learning_rate": 3.99737010904426e-06, "loss": 0.8454, "step": 19735 }, { "epoch": 0.24057621293554166, "grad_norm": 3.0608387742485914, "learning_rate": 3.997049390635023e-06, "loss": 0.814, "step": 19740 }, { "epoch": 0.24063714915968948, "grad_norm": 2.2403508705823856, "learning_rate": 3.996728672225786e-06, "loss": 0.7123, "step": 19745 }, { "epoch": 0.24069808538383727, "grad_norm": 2.2145547107720915, "learning_rate": 3.99640795381655e-06, "loss": 0.6925, "step": 19750 }, { "epoch": 0.2407590216079851, "grad_norm": 2.701352636594727, "learning_rate": 3.996087235407313e-06, "loss": 0.8697, "step": 19755 }, { "epoch": 0.2408199578321329, "grad_norm": 3.354061611544648, "learning_rate": 3.995766516998076e-06, "loss": 0.7906, "step": 19760 }, { "epoch": 0.2408808940562807, "grad_norm": 2.518473630521438, "learning_rate": 3.99544579858884e-06, "loss": 0.7403, "step": 19765 }, { "epoch": 0.2409418302804285, "grad_norm": 2.6301428949345627, "learning_rate": 3.995125080179603e-06, "loss": 0.8125, "step": 19770 }, { "epoch": 0.2410027665045763, "grad_norm": 2.971290221443768, "learning_rate": 3.994804361770366e-06, "loss": 0.8034, "step": 19775 }, { "epoch": 0.24106370272872413, "grad_norm": 2.807542724690376, "learning_rate": 3.99448364336113e-06, "loss": 0.7845, "step": 19780 }, { "epoch": 0.24112463895287192, "grad_norm": 2.8147552560120905, "learning_rate": 3.994162924951893e-06, "loss": 0.8075, "step": 19785 }, { "epoch": 0.24118557517701972, "grad_norm": 2.0905104217183386, "learning_rate": 3.993842206542656e-06, "loss": 0.8283, "step": 19790 }, { "epoch": 0.24124651140116754, "grad_norm": 2.732569520350251, "learning_rate": 3.9935214881334195e-06, "loss": 0.7926, "step": 19795 }, { "epoch": 0.24130744762531534, "grad_norm": 3.6916990183824874, "learning_rate": 3.9932007697241826e-06, "loss": 0.7649, "step": 19800 }, { "epoch": 0.24136838384946316, "grad_norm": 2.3193933764469907, "learning_rate": 3.9928800513149456e-06, "loss": 0.8531, "step": 19805 }, { "epoch": 0.24142932007361095, "grad_norm": 2.7914217683666944, "learning_rate": 3.992559332905709e-06, "loss": 0.8119, "step": 19810 }, { "epoch": 0.24149025629775878, "grad_norm": 3.016403038671884, "learning_rate": 3.9922386144964724e-06, "loss": 0.8123, "step": 19815 }, { "epoch": 0.24155119252190657, "grad_norm": 3.0361539183911703, "learning_rate": 3.9919178960872355e-06, "loss": 0.7758, "step": 19820 }, { "epoch": 0.24161212874605437, "grad_norm": 2.8235881106916834, "learning_rate": 3.9915971776779985e-06, "loss": 0.7566, "step": 19825 }, { "epoch": 0.2416730649702022, "grad_norm": 3.391178990662964, "learning_rate": 3.991276459268762e-06, "loss": 0.8474, "step": 19830 }, { "epoch": 0.24173400119434998, "grad_norm": 2.1543838239047166, "learning_rate": 3.990955740859525e-06, "loss": 0.7265, "step": 19835 }, { "epoch": 0.2417949374184978, "grad_norm": 2.106561984312957, "learning_rate": 3.990635022450288e-06, "loss": 0.7965, "step": 19840 }, { "epoch": 0.2418558736426456, "grad_norm": 2.6807767160709925, "learning_rate": 3.990314304041052e-06, "loss": 0.8404, "step": 19845 }, { "epoch": 0.24191680986679343, "grad_norm": 2.7505708672984754, "learning_rate": 3.989993585631815e-06, "loss": 0.7409, "step": 19850 }, { "epoch": 0.24197774609094122, "grad_norm": 3.0245282475258963, "learning_rate": 3.989672867222579e-06, "loss": 0.7971, "step": 19855 }, { "epoch": 0.24203868231508902, "grad_norm": 2.822036572597339, "learning_rate": 3.989352148813342e-06, "loss": 0.7741, "step": 19860 }, { "epoch": 0.24209961853923684, "grad_norm": 3.2691224650162196, "learning_rate": 3.989031430404105e-06, "loss": 0.7922, "step": 19865 }, { "epoch": 0.24216055476338463, "grad_norm": 3.0694249770053332, "learning_rate": 3.988710711994869e-06, "loss": 0.8489, "step": 19870 }, { "epoch": 0.24222149098753246, "grad_norm": 3.834636052959086, "learning_rate": 3.988389993585632e-06, "loss": 0.7819, "step": 19875 }, { "epoch": 0.24228242721168025, "grad_norm": 2.3192927127095544, "learning_rate": 3.988069275176396e-06, "loss": 0.7866, "step": 19880 }, { "epoch": 0.24234336343582807, "grad_norm": 3.3805577828218363, "learning_rate": 3.987748556767159e-06, "loss": 0.8395, "step": 19885 }, { "epoch": 0.24240429965997587, "grad_norm": 2.385045232329229, "learning_rate": 3.987427838357922e-06, "loss": 0.8645, "step": 19890 }, { "epoch": 0.24246523588412366, "grad_norm": 2.522148365164916, "learning_rate": 3.987107119948686e-06, "loss": 0.7796, "step": 19895 }, { "epoch": 0.2425261721082715, "grad_norm": 2.8323208356798553, "learning_rate": 3.986786401539449e-06, "loss": 0.8346, "step": 19900 }, { "epoch": 0.24258710833241928, "grad_norm": 2.408125866149086, "learning_rate": 3.986465683130212e-06, "loss": 0.8231, "step": 19905 }, { "epoch": 0.2426480445565671, "grad_norm": 2.5250826041722005, "learning_rate": 3.986144964720976e-06, "loss": 0.8543, "step": 19910 }, { "epoch": 0.2427089807807149, "grad_norm": 2.4911117025289413, "learning_rate": 3.985824246311739e-06, "loss": 0.8855, "step": 19915 }, { "epoch": 0.24276991700486272, "grad_norm": 2.245268681822658, "learning_rate": 3.985503527902502e-06, "loss": 0.7966, "step": 19920 }, { "epoch": 0.24283085322901052, "grad_norm": 2.4569356958990376, "learning_rate": 3.985182809493266e-06, "loss": 0.7931, "step": 19925 }, { "epoch": 0.2428917894531583, "grad_norm": 2.6211986315173967, "learning_rate": 3.984862091084029e-06, "loss": 0.8515, "step": 19930 }, { "epoch": 0.24295272567730614, "grad_norm": 2.8382459510158666, "learning_rate": 3.984541372674792e-06, "loss": 0.7735, "step": 19935 }, { "epoch": 0.24301366190145393, "grad_norm": 2.7523672331797697, "learning_rate": 3.9842206542655555e-06, "loss": 0.7928, "step": 19940 }, { "epoch": 0.24307459812560175, "grad_norm": 2.432334939932047, "learning_rate": 3.9838999358563185e-06, "loss": 0.7818, "step": 19945 }, { "epoch": 0.24313553434974955, "grad_norm": 3.1461319302551125, "learning_rate": 3.9835792174470815e-06, "loss": 0.7746, "step": 19950 }, { "epoch": 0.24319647057389737, "grad_norm": 2.145069317449062, "learning_rate": 3.983258499037845e-06, "loss": 0.802, "step": 19955 }, { "epoch": 0.24325740679804517, "grad_norm": 3.519948454253214, "learning_rate": 3.982937780628608e-06, "loss": 0.817, "step": 19960 }, { "epoch": 0.24331834302219296, "grad_norm": 2.583513203327948, "learning_rate": 3.982617062219371e-06, "loss": 0.8125, "step": 19965 }, { "epoch": 0.24337927924634079, "grad_norm": 2.472081055242854, "learning_rate": 3.982296343810135e-06, "loss": 0.8132, "step": 19970 }, { "epoch": 0.24344021547048858, "grad_norm": 2.8689711046859165, "learning_rate": 3.981975625400898e-06, "loss": 0.7991, "step": 19975 }, { "epoch": 0.2435011516946364, "grad_norm": 2.5666952693466727, "learning_rate": 3.981654906991661e-06, "loss": 0.7628, "step": 19980 }, { "epoch": 0.2435620879187842, "grad_norm": 2.4736487504458995, "learning_rate": 3.981334188582424e-06, "loss": 0.821, "step": 19985 }, { "epoch": 0.24362302414293202, "grad_norm": 2.64491656143583, "learning_rate": 3.981013470173188e-06, "loss": 0.7985, "step": 19990 }, { "epoch": 0.24368396036707982, "grad_norm": 2.6601398080005643, "learning_rate": 3.980692751763951e-06, "loss": 0.7786, "step": 19995 }, { "epoch": 0.2437448965912276, "grad_norm": 2.251398209558364, "learning_rate": 3.980372033354715e-06, "loss": 0.869, "step": 20000 }, { "epoch": 0.24380583281537543, "grad_norm": 2.204186705194341, "learning_rate": 3.980051314945478e-06, "loss": 0.8073, "step": 20005 }, { "epoch": 0.24386676903952323, "grad_norm": 3.057958755131592, "learning_rate": 3.979730596536241e-06, "loss": 0.8562, "step": 20010 }, { "epoch": 0.24392770526367105, "grad_norm": 2.2419217935794205, "learning_rate": 3.979409878127005e-06, "loss": 0.7826, "step": 20015 }, { "epoch": 0.24398864148781885, "grad_norm": 2.9014234571690953, "learning_rate": 3.979089159717768e-06, "loss": 0.7766, "step": 20020 }, { "epoch": 0.24404957771196664, "grad_norm": 2.858659496409016, "learning_rate": 3.978768441308532e-06, "loss": 0.8137, "step": 20025 }, { "epoch": 0.24411051393611446, "grad_norm": 2.8915853753008025, "learning_rate": 3.978447722899295e-06, "loss": 0.825, "step": 20030 }, { "epoch": 0.24417145016026226, "grad_norm": 2.432707901436571, "learning_rate": 3.978127004490058e-06, "loss": 0.7034, "step": 20035 }, { "epoch": 0.24423238638441008, "grad_norm": 2.200642163405643, "learning_rate": 3.977806286080822e-06, "loss": 0.8196, "step": 20040 }, { "epoch": 0.24429332260855788, "grad_norm": 2.3124979841255513, "learning_rate": 3.977485567671585e-06, "loss": 0.8016, "step": 20045 }, { "epoch": 0.2443542588327057, "grad_norm": 3.3952534541040884, "learning_rate": 3.977164849262349e-06, "loss": 0.7221, "step": 20050 }, { "epoch": 0.2444151950568535, "grad_norm": 2.466912647749775, "learning_rate": 3.976844130853112e-06, "loss": 0.7405, "step": 20055 }, { "epoch": 0.2444761312810013, "grad_norm": 2.56396703530949, "learning_rate": 3.976523412443875e-06, "loss": 0.7997, "step": 20060 }, { "epoch": 0.2445370675051491, "grad_norm": 2.961883039094653, "learning_rate": 3.976202694034638e-06, "loss": 0.751, "step": 20065 }, { "epoch": 0.2445980037292969, "grad_norm": 2.2048958716007676, "learning_rate": 3.9758819756254016e-06, "loss": 0.8236, "step": 20070 }, { "epoch": 0.24465893995344473, "grad_norm": 2.0793122038136294, "learning_rate": 3.9755612572161646e-06, "loss": 0.8386, "step": 20075 }, { "epoch": 0.24471987617759253, "grad_norm": 2.656398441409582, "learning_rate": 3.975240538806928e-06, "loss": 0.7486, "step": 20080 }, { "epoch": 0.24478081240174035, "grad_norm": 2.77446943369274, "learning_rate": 3.9749198203976914e-06, "loss": 0.8353, "step": 20085 }, { "epoch": 0.24484174862588814, "grad_norm": 2.7960665171405665, "learning_rate": 3.9745991019884545e-06, "loss": 0.7543, "step": 20090 }, { "epoch": 0.24490268485003594, "grad_norm": 2.3114957276474613, "learning_rate": 3.9742783835792175e-06, "loss": 0.7744, "step": 20095 }, { "epoch": 0.24496362107418376, "grad_norm": 2.5190261279324972, "learning_rate": 3.973957665169981e-06, "loss": 0.7649, "step": 20100 }, { "epoch": 0.24502455729833156, "grad_norm": 2.706657268922746, "learning_rate": 3.973636946760744e-06, "loss": 0.7919, "step": 20105 }, { "epoch": 0.24508549352247938, "grad_norm": 1.804522889203391, "learning_rate": 3.973316228351507e-06, "loss": 0.7474, "step": 20110 }, { "epoch": 0.24514642974662718, "grad_norm": 2.2665465697689293, "learning_rate": 3.972995509942271e-06, "loss": 0.7182, "step": 20115 }, { "epoch": 0.245207365970775, "grad_norm": 2.642060394998278, "learning_rate": 3.972674791533034e-06, "loss": 0.7927, "step": 20120 }, { "epoch": 0.2452683021949228, "grad_norm": 2.1026425406129414, "learning_rate": 3.972354073123797e-06, "loss": 0.7795, "step": 20125 }, { "epoch": 0.2453292384190706, "grad_norm": 2.4914036837459452, "learning_rate": 3.972033354714561e-06, "loss": 0.8297, "step": 20130 }, { "epoch": 0.2453901746432184, "grad_norm": 2.4697642830212923, "learning_rate": 3.971712636305324e-06, "loss": 0.7974, "step": 20135 }, { "epoch": 0.2454511108673662, "grad_norm": 3.149188704594975, "learning_rate": 3.971391917896087e-06, "loss": 0.8659, "step": 20140 }, { "epoch": 0.24551204709151403, "grad_norm": 2.2417502519048895, "learning_rate": 3.97107119948685e-06, "loss": 0.7834, "step": 20145 }, { "epoch": 0.24557298331566182, "grad_norm": 2.3864016507959667, "learning_rate": 3.970750481077614e-06, "loss": 0.7924, "step": 20150 }, { "epoch": 0.24563391953980965, "grad_norm": 2.6755028751424734, "learning_rate": 3.970429762668377e-06, "loss": 0.8058, "step": 20155 }, { "epoch": 0.24569485576395744, "grad_norm": 2.1998165305571185, "learning_rate": 3.970109044259141e-06, "loss": 0.8065, "step": 20160 }, { "epoch": 0.24575579198810524, "grad_norm": 2.68264954168057, "learning_rate": 3.969788325849904e-06, "loss": 0.8172, "step": 20165 }, { "epoch": 0.24581672821225306, "grad_norm": 2.7276519898876264, "learning_rate": 3.969467607440668e-06, "loss": 0.8224, "step": 20170 }, { "epoch": 0.24587766443640086, "grad_norm": 2.5135265839084755, "learning_rate": 3.969146889031431e-06, "loss": 0.7698, "step": 20175 }, { "epoch": 0.24593860066054868, "grad_norm": 2.422271034610516, "learning_rate": 3.968826170622194e-06, "loss": 0.8116, "step": 20180 }, { "epoch": 0.24599953688469647, "grad_norm": 2.2431799708946674, "learning_rate": 3.968505452212958e-06, "loss": 0.8076, "step": 20185 }, { "epoch": 0.2460604731088443, "grad_norm": 2.7334986834894406, "learning_rate": 3.968184733803721e-06, "loss": 0.8097, "step": 20190 }, { "epoch": 0.2461214093329921, "grad_norm": 2.8316034143261586, "learning_rate": 3.967864015394485e-06, "loss": 0.8182, "step": 20195 }, { "epoch": 0.2461823455571399, "grad_norm": 2.3129157355531005, "learning_rate": 3.967543296985248e-06, "loss": 0.7613, "step": 20200 }, { "epoch": 0.2462432817812877, "grad_norm": 2.3168437777401376, "learning_rate": 3.967222578576011e-06, "loss": 0.7512, "step": 20205 }, { "epoch": 0.2463042180054355, "grad_norm": 3.682195543103755, "learning_rate": 3.9669018601667745e-06, "loss": 0.8475, "step": 20210 }, { "epoch": 0.24636515422958333, "grad_norm": 2.272029673011724, "learning_rate": 3.9665811417575375e-06, "loss": 0.8283, "step": 20215 }, { "epoch": 0.24642609045373112, "grad_norm": 3.110851715243513, "learning_rate": 3.9662604233483005e-06, "loss": 0.8342, "step": 20220 }, { "epoch": 0.24648702667787895, "grad_norm": 2.9459686340457916, "learning_rate": 3.9659397049390635e-06, "loss": 0.743, "step": 20225 }, { "epoch": 0.24654796290202674, "grad_norm": 2.463013913568064, "learning_rate": 3.965618986529827e-06, "loss": 0.7974, "step": 20230 }, { "epoch": 0.24660889912617454, "grad_norm": 2.7126315550207316, "learning_rate": 3.96529826812059e-06, "loss": 0.788, "step": 20235 }, { "epoch": 0.24666983535032236, "grad_norm": 2.8471991478572196, "learning_rate": 3.9649775497113534e-06, "loss": 0.85, "step": 20240 }, { "epoch": 0.24673077157447015, "grad_norm": 2.2982864533161416, "learning_rate": 3.964656831302117e-06, "loss": 0.7125, "step": 20245 }, { "epoch": 0.24679170779861798, "grad_norm": 2.463532348532444, "learning_rate": 3.96433611289288e-06, "loss": 0.8249, "step": 20250 }, { "epoch": 0.24685264402276577, "grad_norm": 2.662758555864386, "learning_rate": 3.964015394483643e-06, "loss": 0.7612, "step": 20255 }, { "epoch": 0.24691358024691357, "grad_norm": 2.519884287111111, "learning_rate": 3.963694676074407e-06, "loss": 0.7723, "step": 20260 }, { "epoch": 0.2469745164710614, "grad_norm": 2.116746807591162, "learning_rate": 3.96337395766517e-06, "loss": 0.7494, "step": 20265 }, { "epoch": 0.24703545269520918, "grad_norm": 3.2242526920804693, "learning_rate": 3.963053239255933e-06, "loss": 0.7832, "step": 20270 }, { "epoch": 0.247096388919357, "grad_norm": 4.062909770355136, "learning_rate": 3.962732520846697e-06, "loss": 0.7327, "step": 20275 }, { "epoch": 0.2471573251435048, "grad_norm": 2.625115649172671, "learning_rate": 3.96241180243746e-06, "loss": 0.7447, "step": 20280 }, { "epoch": 0.24721826136765263, "grad_norm": 2.0915280944138646, "learning_rate": 3.962091084028223e-06, "loss": 0.7743, "step": 20285 }, { "epoch": 0.24727919759180042, "grad_norm": 2.7704605862776623, "learning_rate": 3.961770365618987e-06, "loss": 0.8586, "step": 20290 }, { "epoch": 0.24734013381594822, "grad_norm": 2.5484964881127556, "learning_rate": 3.96144964720975e-06, "loss": 0.8561, "step": 20295 }, { "epoch": 0.24740107004009604, "grad_norm": 2.4124502425600145, "learning_rate": 3.961128928800513e-06, "loss": 0.7905, "step": 20300 }, { "epoch": 0.24746200626424383, "grad_norm": 5.237352644968336, "learning_rate": 3.960808210391277e-06, "loss": 0.7609, "step": 20305 }, { "epoch": 0.24752294248839166, "grad_norm": 2.455352936368243, "learning_rate": 3.96048749198204e-06, "loss": 0.8331, "step": 20310 }, { "epoch": 0.24758387871253945, "grad_norm": 2.0975204176237505, "learning_rate": 3.960166773572803e-06, "loss": 0.7994, "step": 20315 }, { "epoch": 0.24764481493668727, "grad_norm": 2.502235135361707, "learning_rate": 3.959846055163567e-06, "loss": 0.8287, "step": 20320 }, { "epoch": 0.24770575116083507, "grad_norm": 2.5180781826678413, "learning_rate": 3.95952533675433e-06, "loss": 0.8191, "step": 20325 }, { "epoch": 0.24776668738498286, "grad_norm": 2.4839565472513536, "learning_rate": 3.959204618345094e-06, "loss": 0.8098, "step": 20330 }, { "epoch": 0.2478276236091307, "grad_norm": 2.245348847174481, "learning_rate": 3.958883899935857e-06, "loss": 0.7998, "step": 20335 }, { "epoch": 0.24788855983327848, "grad_norm": 2.8554685306347642, "learning_rate": 3.95856318152662e-06, "loss": 0.7965, "step": 20340 }, { "epoch": 0.2479494960574263, "grad_norm": 2.6013583467942136, "learning_rate": 3.9582424631173836e-06, "loss": 0.6932, "step": 20345 }, { "epoch": 0.2480104322815741, "grad_norm": 1.9678475910008404, "learning_rate": 3.957921744708147e-06, "loss": 0.7462, "step": 20350 }, { "epoch": 0.24807136850572192, "grad_norm": 3.2485298712784245, "learning_rate": 3.9576010262989104e-06, "loss": 0.8037, "step": 20355 }, { "epoch": 0.24813230472986972, "grad_norm": 3.0766781707917055, "learning_rate": 3.9572803078896735e-06, "loss": 0.7854, "step": 20360 }, { "epoch": 0.2481932409540175, "grad_norm": 2.7994216049987424, "learning_rate": 3.9569595894804365e-06, "loss": 0.7796, "step": 20365 }, { "epoch": 0.24825417717816534, "grad_norm": 2.539445316153097, "learning_rate": 3.9566388710712e-06, "loss": 0.8325, "step": 20370 }, { "epoch": 0.24831511340231313, "grad_norm": 3.062694321746974, "learning_rate": 3.956318152661963e-06, "loss": 0.8204, "step": 20375 }, { "epoch": 0.24837604962646095, "grad_norm": 2.723372537670909, "learning_rate": 3.955997434252726e-06, "loss": 0.7713, "step": 20380 }, { "epoch": 0.24843698585060875, "grad_norm": 2.4081569414053967, "learning_rate": 3.95567671584349e-06, "loss": 0.8229, "step": 20385 }, { "epoch": 0.24849792207475657, "grad_norm": 2.2236507569047337, "learning_rate": 3.955355997434253e-06, "loss": 0.8527, "step": 20390 }, { "epoch": 0.24855885829890437, "grad_norm": 2.021902445122906, "learning_rate": 3.955035279025016e-06, "loss": 0.7, "step": 20395 }, { "epoch": 0.24861979452305216, "grad_norm": 2.0699341428303595, "learning_rate": 3.954714560615779e-06, "loss": 0.7662, "step": 20400 }, { "epoch": 0.24868073074719999, "grad_norm": 2.2834747110834375, "learning_rate": 3.954393842206543e-06, "loss": 0.8038, "step": 20405 }, { "epoch": 0.24874166697134778, "grad_norm": 6.024163392412209, "learning_rate": 3.954073123797306e-06, "loss": 0.7714, "step": 20410 }, { "epoch": 0.2488026031954956, "grad_norm": 2.449130337944904, "learning_rate": 3.953752405388069e-06, "loss": 0.7764, "step": 20415 }, { "epoch": 0.2488635394196434, "grad_norm": 2.5885201431319307, "learning_rate": 3.953431686978833e-06, "loss": 0.8301, "step": 20420 }, { "epoch": 0.24892447564379122, "grad_norm": 3.729742031638099, "learning_rate": 3.953110968569596e-06, "loss": 0.805, "step": 20425 }, { "epoch": 0.24898541186793902, "grad_norm": 2.0991969797955123, "learning_rate": 3.952790250160359e-06, "loss": 0.847, "step": 20430 }, { "epoch": 0.2490463480920868, "grad_norm": 2.4586508729962873, "learning_rate": 3.952469531751123e-06, "loss": 0.8036, "step": 20435 }, { "epoch": 0.24910728431623463, "grad_norm": 3.247513044159095, "learning_rate": 3.952148813341886e-06, "loss": 0.7851, "step": 20440 }, { "epoch": 0.24916822054038243, "grad_norm": 2.619448644139294, "learning_rate": 3.951828094932649e-06, "loss": 0.822, "step": 20445 }, { "epoch": 0.24922915676453025, "grad_norm": 2.5232479467872926, "learning_rate": 3.951507376523413e-06, "loss": 0.8467, "step": 20450 }, { "epoch": 0.24929009298867805, "grad_norm": 2.7183535708727153, "learning_rate": 3.951186658114176e-06, "loss": 0.8061, "step": 20455 }, { "epoch": 0.24935102921282587, "grad_norm": 2.5774818948350884, "learning_rate": 3.950865939704939e-06, "loss": 0.7376, "step": 20460 }, { "epoch": 0.24941196543697366, "grad_norm": 3.006309303775127, "learning_rate": 3.950545221295703e-06, "loss": 0.7601, "step": 20465 }, { "epoch": 0.24947290166112146, "grad_norm": 2.5851507650688417, "learning_rate": 3.950224502886466e-06, "loss": 0.8773, "step": 20470 }, { "epoch": 0.24953383788526928, "grad_norm": 2.260297186807659, "learning_rate": 3.94990378447723e-06, "loss": 0.8188, "step": 20475 }, { "epoch": 0.24959477410941708, "grad_norm": 2.106217519068278, "learning_rate": 3.949583066067993e-06, "loss": 0.8219, "step": 20480 }, { "epoch": 0.2496557103335649, "grad_norm": 2.4429333945703706, "learning_rate": 3.949262347658756e-06, "loss": 0.8141, "step": 20485 }, { "epoch": 0.2497166465577127, "grad_norm": 2.4787677564335495, "learning_rate": 3.9489416292495195e-06, "loss": 0.7855, "step": 20490 }, { "epoch": 0.2497775827818605, "grad_norm": 3.3114905710883153, "learning_rate": 3.9486209108402825e-06, "loss": 0.7652, "step": 20495 }, { "epoch": 0.2498385190060083, "grad_norm": 2.7440637448892025, "learning_rate": 3.948300192431046e-06, "loss": 0.8006, "step": 20500 }, { "epoch": 0.2498994552301561, "grad_norm": 2.3173288070802944, "learning_rate": 3.947979474021809e-06, "loss": 0.8291, "step": 20505 }, { "epoch": 0.24996039145430393, "grad_norm": 2.2991936454189354, "learning_rate": 3.9476587556125724e-06, "loss": 0.7775, "step": 20510 }, { "epoch": 0.25002132767845175, "grad_norm": 3.0186155490249678, "learning_rate": 3.947338037203336e-06, "loss": 0.825, "step": 20515 }, { "epoch": 0.2500822639025995, "grad_norm": 3.1490959601164437, "learning_rate": 3.947017318794099e-06, "loss": 0.8375, "step": 20520 }, { "epoch": 0.25014320012674734, "grad_norm": 2.710827053110915, "learning_rate": 3.946696600384862e-06, "loss": 0.8755, "step": 20525 }, { "epoch": 0.25020413635089517, "grad_norm": 2.5732001858421145, "learning_rate": 3.946375881975626e-06, "loss": 0.7916, "step": 20530 }, { "epoch": 0.25026507257504294, "grad_norm": 3.0731585581251744, "learning_rate": 3.946055163566389e-06, "loss": 0.8121, "step": 20535 }, { "epoch": 0.25032600879919076, "grad_norm": 2.4717324834742462, "learning_rate": 3.945734445157152e-06, "loss": 0.7935, "step": 20540 }, { "epoch": 0.2503869450233386, "grad_norm": 2.1071813809760953, "learning_rate": 3.945413726747916e-06, "loss": 0.7685, "step": 20545 }, { "epoch": 0.2504478812474864, "grad_norm": 2.2940918034062614, "learning_rate": 3.945093008338679e-06, "loss": 0.7727, "step": 20550 }, { "epoch": 0.25050881747163417, "grad_norm": 4.499945011656237, "learning_rate": 3.944772289929442e-06, "loss": 0.7803, "step": 20555 }, { "epoch": 0.250569753695782, "grad_norm": 3.2370565024268707, "learning_rate": 3.944451571520206e-06, "loss": 0.8329, "step": 20560 }, { "epoch": 0.2506306899199298, "grad_norm": 3.386719946710499, "learning_rate": 3.944130853110969e-06, "loss": 0.8383, "step": 20565 }, { "epoch": 0.2506916261440776, "grad_norm": 2.8092488250161605, "learning_rate": 3.943810134701732e-06, "loss": 0.7608, "step": 20570 }, { "epoch": 0.2507525623682254, "grad_norm": 2.2132694600000478, "learning_rate": 3.943489416292495e-06, "loss": 0.7945, "step": 20575 }, { "epoch": 0.25081349859237323, "grad_norm": 2.192057702546031, "learning_rate": 3.943168697883259e-06, "loss": 0.8013, "step": 20580 }, { "epoch": 0.25087443481652105, "grad_norm": 2.9865391904498835, "learning_rate": 3.942847979474022e-06, "loss": 0.8015, "step": 20585 }, { "epoch": 0.2509353710406688, "grad_norm": 2.816753892835236, "learning_rate": 3.942527261064785e-06, "loss": 0.7786, "step": 20590 }, { "epoch": 0.25099630726481664, "grad_norm": 2.5163041420323533, "learning_rate": 3.942206542655549e-06, "loss": 0.8143, "step": 20595 }, { "epoch": 0.25105724348896447, "grad_norm": 2.236182982637496, "learning_rate": 3.941885824246312e-06, "loss": 0.7778, "step": 20600 }, { "epoch": 0.25111817971311223, "grad_norm": 2.8456284458248864, "learning_rate": 3.941565105837075e-06, "loss": 0.882, "step": 20605 }, { "epoch": 0.25117911593726006, "grad_norm": 2.5675517477828893, "learning_rate": 3.941244387427839e-06, "loss": 0.783, "step": 20610 }, { "epoch": 0.2512400521614079, "grad_norm": 3.1245983406519864, "learning_rate": 3.940923669018602e-06, "loss": 0.7861, "step": 20615 }, { "epoch": 0.2513009883855557, "grad_norm": 2.655798231416198, "learning_rate": 3.940602950609365e-06, "loss": 0.8339, "step": 20620 }, { "epoch": 0.25136192460970347, "grad_norm": 14.30079566014474, "learning_rate": 3.940282232200129e-06, "loss": 0.7934, "step": 20625 }, { "epoch": 0.2514228608338513, "grad_norm": 2.464207391320279, "learning_rate": 3.939961513790892e-06, "loss": 0.7807, "step": 20630 }, { "epoch": 0.2514837970579991, "grad_norm": 2.4137840844394804, "learning_rate": 3.9396407953816555e-06, "loss": 0.755, "step": 20635 }, { "epoch": 0.2515447332821469, "grad_norm": 2.0657146803441613, "learning_rate": 3.9393200769724185e-06, "loss": 0.821, "step": 20640 }, { "epoch": 0.2516056695062947, "grad_norm": 2.4715526356235515, "learning_rate": 3.938999358563182e-06, "loss": 0.83, "step": 20645 }, { "epoch": 0.2516666057304425, "grad_norm": 2.8364565218116105, "learning_rate": 3.938678640153945e-06, "loss": 0.7853, "step": 20650 }, { "epoch": 0.25172754195459035, "grad_norm": 2.3291108043901634, "learning_rate": 3.938357921744708e-06, "loss": 0.7839, "step": 20655 }, { "epoch": 0.2517884781787381, "grad_norm": 2.6654428863027286, "learning_rate": 3.938037203335472e-06, "loss": 0.8428, "step": 20660 }, { "epoch": 0.25184941440288594, "grad_norm": 2.2919260171669307, "learning_rate": 3.937716484926235e-06, "loss": 0.703, "step": 20665 }, { "epoch": 0.25191035062703376, "grad_norm": 2.292457772764277, "learning_rate": 3.937395766516998e-06, "loss": 0.7194, "step": 20670 }, { "epoch": 0.25197128685118153, "grad_norm": 2.2504703897719085, "learning_rate": 3.937075048107762e-06, "loss": 0.7732, "step": 20675 }, { "epoch": 0.25203222307532935, "grad_norm": 3.161293177132859, "learning_rate": 3.936754329698525e-06, "loss": 0.8128, "step": 20680 }, { "epoch": 0.2520931592994772, "grad_norm": 1.9190787732122372, "learning_rate": 3.936433611289288e-06, "loss": 0.7654, "step": 20685 }, { "epoch": 0.252154095523625, "grad_norm": 2.594096610243809, "learning_rate": 3.936112892880052e-06, "loss": 0.8006, "step": 20690 }, { "epoch": 0.25221503174777277, "grad_norm": 2.7647959868751957, "learning_rate": 3.935792174470815e-06, "loss": 0.8205, "step": 20695 }, { "epoch": 0.2522759679719206, "grad_norm": 2.396074203160668, "learning_rate": 3.935471456061578e-06, "loss": 0.8524, "step": 20700 }, { "epoch": 0.2523369041960684, "grad_norm": 2.472254328927975, "learning_rate": 3.935150737652342e-06, "loss": 0.8208, "step": 20705 }, { "epoch": 0.2523978404202162, "grad_norm": 3.382435588471781, "learning_rate": 3.934830019243105e-06, "loss": 0.8365, "step": 20710 }, { "epoch": 0.252458776644364, "grad_norm": 2.372047598362367, "learning_rate": 3.934509300833868e-06, "loss": 0.7501, "step": 20715 }, { "epoch": 0.2525197128685118, "grad_norm": 2.1279994013512313, "learning_rate": 3.934188582424632e-06, "loss": 0.8838, "step": 20720 }, { "epoch": 0.25258064909265965, "grad_norm": 2.4510815158651242, "learning_rate": 3.933867864015395e-06, "loss": 0.7938, "step": 20725 }, { "epoch": 0.2526415853168074, "grad_norm": 2.767311364483028, "learning_rate": 3.933547145606158e-06, "loss": 0.7935, "step": 20730 }, { "epoch": 0.25270252154095524, "grad_norm": 2.801375227653777, "learning_rate": 3.933226427196921e-06, "loss": 0.7987, "step": 20735 }, { "epoch": 0.25276345776510306, "grad_norm": 2.3967845401418892, "learning_rate": 3.932905708787685e-06, "loss": 0.772, "step": 20740 }, { "epoch": 0.25282439398925083, "grad_norm": 2.4812789883790836, "learning_rate": 3.932584990378448e-06, "loss": 0.866, "step": 20745 }, { "epoch": 0.25288533021339865, "grad_norm": 2.274098792245197, "learning_rate": 3.932264271969211e-06, "loss": 0.7041, "step": 20750 }, { "epoch": 0.2529462664375465, "grad_norm": 3.0606022189508177, "learning_rate": 3.931943553559975e-06, "loss": 0.8633, "step": 20755 }, { "epoch": 0.2530072026616943, "grad_norm": 2.327075775086555, "learning_rate": 3.931622835150738e-06, "loss": 0.7769, "step": 20760 }, { "epoch": 0.25306813888584206, "grad_norm": 3.0185256517596866, "learning_rate": 3.931302116741501e-06, "loss": 0.7847, "step": 20765 }, { "epoch": 0.2531290751099899, "grad_norm": 2.649886853236115, "learning_rate": 3.9309813983322646e-06, "loss": 0.8693, "step": 20770 }, { "epoch": 0.2531900113341377, "grad_norm": 2.199285801439584, "learning_rate": 3.9306606799230276e-06, "loss": 0.7347, "step": 20775 }, { "epoch": 0.2532509475582855, "grad_norm": 2.4436468629069377, "learning_rate": 3.9303399615137914e-06, "loss": 0.7727, "step": 20780 }, { "epoch": 0.2533118837824333, "grad_norm": 2.414092722918712, "learning_rate": 3.9300192431045545e-06, "loss": 0.7934, "step": 20785 }, { "epoch": 0.2533728200065811, "grad_norm": 3.0132887788435023, "learning_rate": 3.9296985246953175e-06, "loss": 0.828, "step": 20790 }, { "epoch": 0.25343375623072895, "grad_norm": 3.0253375525954476, "learning_rate": 3.929377806286081e-06, "loss": 0.8435, "step": 20795 }, { "epoch": 0.2534946924548767, "grad_norm": 3.0850820678876096, "learning_rate": 3.929057087876844e-06, "loss": 0.811, "step": 20800 }, { "epoch": 0.25355562867902454, "grad_norm": 2.3716107811741507, "learning_rate": 3.928736369467608e-06, "loss": 0.7965, "step": 20805 }, { "epoch": 0.25361656490317236, "grad_norm": 3.1589682034756628, "learning_rate": 3.928415651058371e-06, "loss": 0.7638, "step": 20810 }, { "epoch": 0.2536775011273201, "grad_norm": 2.324265336614555, "learning_rate": 3.928094932649134e-06, "loss": 0.8466, "step": 20815 }, { "epoch": 0.25373843735146795, "grad_norm": 2.2176537525333955, "learning_rate": 3.927774214239898e-06, "loss": 0.7814, "step": 20820 }, { "epoch": 0.25379937357561577, "grad_norm": 2.9494298083879515, "learning_rate": 3.927453495830661e-06, "loss": 0.774, "step": 20825 }, { "epoch": 0.2538603097997636, "grad_norm": 3.2282783976332436, "learning_rate": 3.927132777421424e-06, "loss": 0.9108, "step": 20830 }, { "epoch": 0.25392124602391136, "grad_norm": 2.259584924532896, "learning_rate": 3.926812059012188e-06, "loss": 0.7929, "step": 20835 }, { "epoch": 0.2539821822480592, "grad_norm": 2.5689043962674347, "learning_rate": 3.926491340602951e-06, "loss": 0.8679, "step": 20840 }, { "epoch": 0.254043118472207, "grad_norm": 2.494165962692826, "learning_rate": 3.926170622193714e-06, "loss": 0.7496, "step": 20845 }, { "epoch": 0.2541040546963548, "grad_norm": 2.0946695061252174, "learning_rate": 3.925849903784478e-06, "loss": 0.8452, "step": 20850 }, { "epoch": 0.2541649909205026, "grad_norm": 1.795050580655689, "learning_rate": 3.925529185375241e-06, "loss": 0.7557, "step": 20855 }, { "epoch": 0.2542259271446504, "grad_norm": 2.470984929160852, "learning_rate": 3.925208466966004e-06, "loss": 0.85, "step": 20860 }, { "epoch": 0.2542868633687982, "grad_norm": 2.057723749306403, "learning_rate": 3.924887748556768e-06, "loss": 0.7821, "step": 20865 }, { "epoch": 0.254347799592946, "grad_norm": 2.7927896146732527, "learning_rate": 3.924567030147531e-06, "loss": 0.8023, "step": 20870 }, { "epoch": 0.25440873581709383, "grad_norm": 2.420282433927807, "learning_rate": 3.924246311738294e-06, "loss": 0.8978, "step": 20875 }, { "epoch": 0.25446967204124166, "grad_norm": 2.4803123294205536, "learning_rate": 3.923925593329058e-06, "loss": 0.8397, "step": 20880 }, { "epoch": 0.2545306082653894, "grad_norm": 2.6442544104383257, "learning_rate": 3.923604874919821e-06, "loss": 0.7795, "step": 20885 }, { "epoch": 0.25459154448953725, "grad_norm": 2.6555351648419165, "learning_rate": 3.923284156510584e-06, "loss": 0.8413, "step": 20890 }, { "epoch": 0.25465248071368507, "grad_norm": 2.1833953902679757, "learning_rate": 3.922963438101348e-06, "loss": 0.7565, "step": 20895 }, { "epoch": 0.25471341693783284, "grad_norm": 2.4650499371848245, "learning_rate": 3.922642719692111e-06, "loss": 0.8458, "step": 20900 }, { "epoch": 0.25477435316198066, "grad_norm": 2.5516347680357834, "learning_rate": 3.922322001282874e-06, "loss": 0.796, "step": 20905 }, { "epoch": 0.2548352893861285, "grad_norm": 2.8204498739319535, "learning_rate": 3.922001282873637e-06, "loss": 0.7797, "step": 20910 }, { "epoch": 0.2548962256102763, "grad_norm": 2.28962759254275, "learning_rate": 3.9216805644644005e-06, "loss": 0.7656, "step": 20915 }, { "epoch": 0.2549571618344241, "grad_norm": 2.375327140988328, "learning_rate": 3.9213598460551635e-06, "loss": 0.7419, "step": 20920 }, { "epoch": 0.2550180980585719, "grad_norm": 2.186061409652883, "learning_rate": 3.921039127645927e-06, "loss": 0.8495, "step": 20925 }, { "epoch": 0.2550790342827197, "grad_norm": 2.2547611327972845, "learning_rate": 3.92071840923669e-06, "loss": 0.8124, "step": 20930 }, { "epoch": 0.2551399705068675, "grad_norm": 3.264027853135335, "learning_rate": 3.9203976908274534e-06, "loss": 0.7902, "step": 20935 }, { "epoch": 0.2552009067310153, "grad_norm": 2.1348045485925238, "learning_rate": 3.920076972418217e-06, "loss": 0.8736, "step": 20940 }, { "epoch": 0.25526184295516313, "grad_norm": 4.124865275340642, "learning_rate": 3.91975625400898e-06, "loss": 0.8674, "step": 20945 }, { "epoch": 0.25532277917931095, "grad_norm": 2.9306413470856847, "learning_rate": 3.919435535599744e-06, "loss": 0.7887, "step": 20950 }, { "epoch": 0.2553837154034587, "grad_norm": 2.680984260735789, "learning_rate": 3.919114817190507e-06, "loss": 0.876, "step": 20955 }, { "epoch": 0.25544465162760654, "grad_norm": 2.9504166640823475, "learning_rate": 3.91879409878127e-06, "loss": 0.8677, "step": 20960 }, { "epoch": 0.25550558785175437, "grad_norm": 2.330595349520742, "learning_rate": 3.918473380372034e-06, "loss": 0.8184, "step": 20965 }, { "epoch": 0.25556652407590214, "grad_norm": 2.660308515370511, "learning_rate": 3.918152661962797e-06, "loss": 0.8061, "step": 20970 }, { "epoch": 0.25562746030004996, "grad_norm": 2.315094249171166, "learning_rate": 3.917831943553561e-06, "loss": 0.8712, "step": 20975 }, { "epoch": 0.2556883965241978, "grad_norm": 2.6867904888674388, "learning_rate": 3.917511225144324e-06, "loss": 0.8762, "step": 20980 }, { "epoch": 0.2557493327483456, "grad_norm": 2.3029687852373137, "learning_rate": 3.917190506735087e-06, "loss": 0.8116, "step": 20985 }, { "epoch": 0.25581026897249337, "grad_norm": 2.854720010734203, "learning_rate": 3.91686978832585e-06, "loss": 0.8013, "step": 20990 }, { "epoch": 0.2558712051966412, "grad_norm": 3.1102066860778885, "learning_rate": 3.916549069916614e-06, "loss": 0.75, "step": 20995 }, { "epoch": 0.255932141420789, "grad_norm": 2.7965576568757786, "learning_rate": 3.916228351507377e-06, "loss": 0.8591, "step": 21000 }, { "epoch": 0.2559930776449368, "grad_norm": 2.738030662535416, "learning_rate": 3.91590763309814e-06, "loss": 0.851, "step": 21005 }, { "epoch": 0.2560540138690846, "grad_norm": 3.290493208445107, "learning_rate": 3.915586914688904e-06, "loss": 0.9253, "step": 21010 }, { "epoch": 0.25611495009323243, "grad_norm": 2.686514650107786, "learning_rate": 3.915266196279667e-06, "loss": 0.8344, "step": 21015 }, { "epoch": 0.25617588631738025, "grad_norm": 3.177877970248613, "learning_rate": 3.91494547787043e-06, "loss": 0.8142, "step": 21020 }, { "epoch": 0.256236822541528, "grad_norm": 1.9720955151869737, "learning_rate": 3.914624759461194e-06, "loss": 0.7949, "step": 21025 }, { "epoch": 0.25629775876567584, "grad_norm": 2.567974043581349, "learning_rate": 3.914304041051957e-06, "loss": 0.7516, "step": 21030 }, { "epoch": 0.25635869498982367, "grad_norm": 2.9978855296704485, "learning_rate": 3.91398332264272e-06, "loss": 0.9065, "step": 21035 }, { "epoch": 0.25641963121397143, "grad_norm": 2.175466558249937, "learning_rate": 3.9136626042334836e-06, "loss": 0.7269, "step": 21040 }, { "epoch": 0.25648056743811926, "grad_norm": 2.352059291214077, "learning_rate": 3.9133418858242466e-06, "loss": 0.7465, "step": 21045 }, { "epoch": 0.2565415036622671, "grad_norm": 2.641747984945525, "learning_rate": 3.91302116741501e-06, "loss": 0.7649, "step": 21050 }, { "epoch": 0.2566024398864149, "grad_norm": 3.4193291108967565, "learning_rate": 3.9127004490057735e-06, "loss": 0.8515, "step": 21055 }, { "epoch": 0.25666337611056267, "grad_norm": 2.557464436330652, "learning_rate": 3.9123797305965365e-06, "loss": 0.8656, "step": 21060 }, { "epoch": 0.2567243123347105, "grad_norm": 3.303564716815219, "learning_rate": 3.9120590121872995e-06, "loss": 0.8403, "step": 21065 }, { "epoch": 0.2567852485588583, "grad_norm": 2.6108864945094292, "learning_rate": 3.9117382937780625e-06, "loss": 0.7884, "step": 21070 }, { "epoch": 0.2568461847830061, "grad_norm": 2.499174299430942, "learning_rate": 3.911417575368826e-06, "loss": 0.7308, "step": 21075 }, { "epoch": 0.2569071210071539, "grad_norm": 2.629056219294094, "learning_rate": 3.911096856959589e-06, "loss": 0.8534, "step": 21080 }, { "epoch": 0.2569680572313017, "grad_norm": 2.2530273084298464, "learning_rate": 3.910776138550353e-06, "loss": 0.8083, "step": 21085 }, { "epoch": 0.25702899345544955, "grad_norm": 2.2505185292620125, "learning_rate": 3.910455420141116e-06, "loss": 0.8076, "step": 21090 }, { "epoch": 0.2570899296795973, "grad_norm": 4.295798940891858, "learning_rate": 3.910134701731879e-06, "loss": 0.7816, "step": 21095 }, { "epoch": 0.25715086590374514, "grad_norm": 2.8728156851418225, "learning_rate": 3.909813983322643e-06, "loss": 0.7958, "step": 21100 }, { "epoch": 0.25721180212789296, "grad_norm": 2.8268144378602846, "learning_rate": 3.909493264913406e-06, "loss": 0.7875, "step": 21105 }, { "epoch": 0.25727273835204073, "grad_norm": 2.1898424887143504, "learning_rate": 3.90917254650417e-06, "loss": 0.7616, "step": 21110 }, { "epoch": 0.25733367457618855, "grad_norm": 2.65080162638402, "learning_rate": 3.908851828094933e-06, "loss": 0.7762, "step": 21115 }, { "epoch": 0.2573946108003364, "grad_norm": 2.695309309422186, "learning_rate": 3.908531109685697e-06, "loss": 0.7523, "step": 21120 }, { "epoch": 0.2574555470244842, "grad_norm": 2.2095191967727423, "learning_rate": 3.90821039127646e-06, "loss": 0.8345, "step": 21125 }, { "epoch": 0.25751648324863197, "grad_norm": 2.2008885361093906, "learning_rate": 3.907889672867223e-06, "loss": 0.7975, "step": 21130 }, { "epoch": 0.2575774194727798, "grad_norm": 2.1412484003094585, "learning_rate": 3.907568954457987e-06, "loss": 0.766, "step": 21135 }, { "epoch": 0.2576383556969276, "grad_norm": 2.6059332785674045, "learning_rate": 3.90724823604875e-06, "loss": 0.8134, "step": 21140 }, { "epoch": 0.2576992919210754, "grad_norm": 3.241043038935712, "learning_rate": 3.906927517639513e-06, "loss": 0.8603, "step": 21145 }, { "epoch": 0.2577602281452232, "grad_norm": 3.764234701344585, "learning_rate": 3.906606799230277e-06, "loss": 0.8005, "step": 21150 }, { "epoch": 0.257821164369371, "grad_norm": 2.707669915533596, "learning_rate": 3.90628608082104e-06, "loss": 0.8217, "step": 21155 }, { "epoch": 0.25788210059351885, "grad_norm": 2.480795969620893, "learning_rate": 3.905965362411803e-06, "loss": 0.7534, "step": 21160 }, { "epoch": 0.2579430368176666, "grad_norm": 2.6513166109305177, "learning_rate": 3.905644644002566e-06, "loss": 0.761, "step": 21165 }, { "epoch": 0.25800397304181444, "grad_norm": 2.0353807876556904, "learning_rate": 3.90532392559333e-06, "loss": 0.8025, "step": 21170 }, { "epoch": 0.25806490926596226, "grad_norm": 2.18479729900699, "learning_rate": 3.905003207184093e-06, "loss": 0.6668, "step": 21175 }, { "epoch": 0.25812584549011003, "grad_norm": 2.3016258887559458, "learning_rate": 3.904682488774856e-06, "loss": 0.733, "step": 21180 }, { "epoch": 0.25818678171425785, "grad_norm": 2.226621305365339, "learning_rate": 3.9043617703656195e-06, "loss": 0.7341, "step": 21185 }, { "epoch": 0.2582477179384057, "grad_norm": 2.1636252027305605, "learning_rate": 3.9040410519563825e-06, "loss": 0.7652, "step": 21190 }, { "epoch": 0.2583086541625535, "grad_norm": 2.65125349788888, "learning_rate": 3.9037203335471455e-06, "loss": 0.7723, "step": 21195 }, { "epoch": 0.25836959038670126, "grad_norm": 2.392019247573646, "learning_rate": 3.903399615137909e-06, "loss": 0.7857, "step": 21200 }, { "epoch": 0.2584305266108491, "grad_norm": 2.244349358761298, "learning_rate": 3.9030788967286724e-06, "loss": 0.7763, "step": 21205 }, { "epoch": 0.2584914628349969, "grad_norm": 2.3255636762252268, "learning_rate": 3.9027581783194354e-06, "loss": 0.7837, "step": 21210 }, { "epoch": 0.2585523990591447, "grad_norm": 2.803185279156316, "learning_rate": 3.902437459910199e-06, "loss": 0.7385, "step": 21215 }, { "epoch": 0.2586133352832925, "grad_norm": 2.2502093967859707, "learning_rate": 3.902116741500962e-06, "loss": 0.7904, "step": 21220 }, { "epoch": 0.2586742715074403, "grad_norm": 3.0028030250088618, "learning_rate": 3.901796023091725e-06, "loss": 0.7521, "step": 21225 }, { "epoch": 0.25873520773158815, "grad_norm": 2.2248583503115027, "learning_rate": 3.901475304682489e-06, "loss": 0.8325, "step": 21230 }, { "epoch": 0.2587961439557359, "grad_norm": 2.4489045744703573, "learning_rate": 3.901154586273252e-06, "loss": 0.8286, "step": 21235 }, { "epoch": 0.25885708017988374, "grad_norm": 2.747090392739879, "learning_rate": 3.900833867864015e-06, "loss": 0.8112, "step": 21240 }, { "epoch": 0.25891801640403156, "grad_norm": 3.1281960955306216, "learning_rate": 3.900513149454779e-06, "loss": 0.7543, "step": 21245 }, { "epoch": 0.2589789526281793, "grad_norm": 2.8823246216736713, "learning_rate": 3.900192431045542e-06, "loss": 0.8424, "step": 21250 }, { "epoch": 0.25903988885232715, "grad_norm": 3.532165220055151, "learning_rate": 3.899871712636306e-06, "loss": 0.8247, "step": 21255 }, { "epoch": 0.25910082507647497, "grad_norm": 2.8193546305898254, "learning_rate": 3.899550994227069e-06, "loss": 0.8516, "step": 21260 }, { "epoch": 0.2591617613006228, "grad_norm": 2.6530128998507463, "learning_rate": 3.899230275817832e-06, "loss": 0.8058, "step": 21265 }, { "epoch": 0.25922269752477056, "grad_norm": 2.78008091978892, "learning_rate": 3.898909557408596e-06, "loss": 0.8575, "step": 21270 }, { "epoch": 0.2592836337489184, "grad_norm": 2.6315001911421207, "learning_rate": 3.898588838999359e-06, "loss": 0.8113, "step": 21275 }, { "epoch": 0.2593445699730662, "grad_norm": 2.4135107969007996, "learning_rate": 3.898268120590123e-06, "loss": 0.8312, "step": 21280 }, { "epoch": 0.259405506197214, "grad_norm": 2.4742996011472456, "learning_rate": 3.897947402180886e-06, "loss": 0.7979, "step": 21285 }, { "epoch": 0.2594664424213618, "grad_norm": 2.868070634314652, "learning_rate": 3.897626683771649e-06, "loss": 0.8416, "step": 21290 }, { "epoch": 0.2595273786455096, "grad_norm": 2.4254895740835765, "learning_rate": 3.897305965362413e-06, "loss": 0.7987, "step": 21295 }, { "epoch": 0.25958831486965744, "grad_norm": 2.6427995854162742, "learning_rate": 3.896985246953176e-06, "loss": 0.7868, "step": 21300 }, { "epoch": 0.2596492510938052, "grad_norm": 2.438272817281824, "learning_rate": 3.896664528543939e-06, "loss": 0.7217, "step": 21305 }, { "epoch": 0.25971018731795303, "grad_norm": 2.390823511499925, "learning_rate": 3.8963438101347026e-06, "loss": 0.7644, "step": 21310 }, { "epoch": 0.25977112354210086, "grad_norm": 2.499956052098615, "learning_rate": 3.8960230917254656e-06, "loss": 0.7746, "step": 21315 }, { "epoch": 0.2598320597662486, "grad_norm": 3.333659060409637, "learning_rate": 3.895702373316229e-06, "loss": 0.7509, "step": 21320 }, { "epoch": 0.25989299599039645, "grad_norm": 2.3236146927858816, "learning_rate": 3.895381654906992e-06, "loss": 0.7699, "step": 21325 }, { "epoch": 0.25995393221454427, "grad_norm": 2.5924004730699033, "learning_rate": 3.8950609364977555e-06, "loss": 0.862, "step": 21330 }, { "epoch": 0.26001486843869204, "grad_norm": 2.8312928375114343, "learning_rate": 3.8947402180885185e-06, "loss": 0.8651, "step": 21335 }, { "epoch": 0.26007580466283986, "grad_norm": 3.345993197843464, "learning_rate": 3.8944194996792815e-06, "loss": 0.7981, "step": 21340 }, { "epoch": 0.2601367408869877, "grad_norm": 2.861827123569188, "learning_rate": 3.894098781270045e-06, "loss": 0.8017, "step": 21345 }, { "epoch": 0.2601976771111355, "grad_norm": 2.2585005524046835, "learning_rate": 3.893778062860808e-06, "loss": 0.7529, "step": 21350 }, { "epoch": 0.2602586133352833, "grad_norm": 2.6545058698812802, "learning_rate": 3.893457344451571e-06, "loss": 0.8103, "step": 21355 }, { "epoch": 0.2603195495594311, "grad_norm": 2.484643690087101, "learning_rate": 3.893136626042335e-06, "loss": 0.7204, "step": 21360 }, { "epoch": 0.2603804857835789, "grad_norm": 2.161115418247262, "learning_rate": 3.892815907633098e-06, "loss": 0.8173, "step": 21365 }, { "epoch": 0.2604414220077267, "grad_norm": 2.186454556176188, "learning_rate": 3.892495189223861e-06, "loss": 0.8053, "step": 21370 }, { "epoch": 0.2605023582318745, "grad_norm": 2.6132841639754494, "learning_rate": 3.892174470814625e-06, "loss": 0.8087, "step": 21375 }, { "epoch": 0.26056329445602233, "grad_norm": 3.0846498214040285, "learning_rate": 3.891853752405388e-06, "loss": 0.8427, "step": 21380 }, { "epoch": 0.26062423068017015, "grad_norm": 2.410016719551074, "learning_rate": 3.891533033996151e-06, "loss": 0.807, "step": 21385 }, { "epoch": 0.2606851669043179, "grad_norm": 2.430170480884077, "learning_rate": 3.891212315586915e-06, "loss": 0.8424, "step": 21390 }, { "epoch": 0.26074610312846574, "grad_norm": 3.1714393152398097, "learning_rate": 3.890891597177678e-06, "loss": 0.7257, "step": 21395 }, { "epoch": 0.26080703935261357, "grad_norm": 2.8161152732267882, "learning_rate": 3.890570878768442e-06, "loss": 0.7959, "step": 21400 }, { "epoch": 0.26086797557676134, "grad_norm": 3.4298445944538454, "learning_rate": 3.890250160359205e-06, "loss": 0.8778, "step": 21405 }, { "epoch": 0.26092891180090916, "grad_norm": 3.1266813521704147, "learning_rate": 3.889929441949968e-06, "loss": 0.7648, "step": 21410 }, { "epoch": 0.260989848025057, "grad_norm": 2.1706453680852147, "learning_rate": 3.889608723540732e-06, "loss": 0.8243, "step": 21415 }, { "epoch": 0.2610507842492048, "grad_norm": 2.1734495474614746, "learning_rate": 3.889288005131495e-06, "loss": 0.6956, "step": 21420 }, { "epoch": 0.26111172047335257, "grad_norm": 2.325624405053858, "learning_rate": 3.888967286722259e-06, "loss": 0.7532, "step": 21425 }, { "epoch": 0.2611726566975004, "grad_norm": 2.2249045646890506, "learning_rate": 3.888646568313022e-06, "loss": 0.8236, "step": 21430 }, { "epoch": 0.2612335929216482, "grad_norm": 2.056611083120303, "learning_rate": 3.888325849903785e-06, "loss": 0.7991, "step": 21435 }, { "epoch": 0.261294529145796, "grad_norm": 2.498821175543575, "learning_rate": 3.888005131494549e-06, "loss": 0.7921, "step": 21440 }, { "epoch": 0.2613554653699438, "grad_norm": 2.307484764858277, "learning_rate": 3.887684413085312e-06, "loss": 0.7562, "step": 21445 }, { "epoch": 0.26141640159409163, "grad_norm": 2.6339292513086616, "learning_rate": 3.887363694676075e-06, "loss": 0.8212, "step": 21450 }, { "epoch": 0.26147733781823945, "grad_norm": 2.6371279586720084, "learning_rate": 3.8870429762668385e-06, "loss": 0.7901, "step": 21455 }, { "epoch": 0.2615382740423872, "grad_norm": 2.356256249923502, "learning_rate": 3.8867222578576015e-06, "loss": 0.7855, "step": 21460 }, { "epoch": 0.26159921026653504, "grad_norm": 2.5852055693778917, "learning_rate": 3.8864015394483645e-06, "loss": 0.7883, "step": 21465 }, { "epoch": 0.26166014649068287, "grad_norm": 2.4285028174995897, "learning_rate": 3.886080821039128e-06, "loss": 0.8352, "step": 21470 }, { "epoch": 0.26172108271483063, "grad_norm": 2.1322171013587954, "learning_rate": 3.8857601026298914e-06, "loss": 0.7705, "step": 21475 }, { "epoch": 0.26178201893897846, "grad_norm": 2.7722040319884624, "learning_rate": 3.8854393842206544e-06, "loss": 0.827, "step": 21480 }, { "epoch": 0.2618429551631263, "grad_norm": 2.851171246246609, "learning_rate": 3.885118665811418e-06, "loss": 0.7803, "step": 21485 }, { "epoch": 0.2619038913872741, "grad_norm": 2.772938164708684, "learning_rate": 3.884797947402181e-06, "loss": 0.8083, "step": 21490 }, { "epoch": 0.26196482761142187, "grad_norm": 2.3715866986367606, "learning_rate": 3.884477228992944e-06, "loss": 0.895, "step": 21495 }, { "epoch": 0.2620257638355697, "grad_norm": 2.186897806458742, "learning_rate": 3.884156510583707e-06, "loss": 0.7342, "step": 21500 }, { "epoch": 0.2620867000597175, "grad_norm": 2.7251376969913514, "learning_rate": 3.883835792174471e-06, "loss": 0.7963, "step": 21505 }, { "epoch": 0.2621476362838653, "grad_norm": 2.2685637442598567, "learning_rate": 3.883515073765234e-06, "loss": 0.709, "step": 21510 }, { "epoch": 0.2622085725080131, "grad_norm": 2.9951019071981326, "learning_rate": 3.883194355355997e-06, "loss": 0.7791, "step": 21515 }, { "epoch": 0.2622695087321609, "grad_norm": 2.4699464279177668, "learning_rate": 3.882873636946761e-06, "loss": 0.7337, "step": 21520 }, { "epoch": 0.26233044495630875, "grad_norm": 2.1799370842510326, "learning_rate": 3.882552918537524e-06, "loss": 0.8009, "step": 21525 }, { "epoch": 0.2623913811804565, "grad_norm": 3.10770549244442, "learning_rate": 3.882232200128287e-06, "loss": 0.8066, "step": 21530 }, { "epoch": 0.26245231740460434, "grad_norm": 2.7597262148526904, "learning_rate": 3.881911481719051e-06, "loss": 0.8127, "step": 21535 }, { "epoch": 0.26251325362875216, "grad_norm": 2.7729936434429323, "learning_rate": 3.881590763309814e-06, "loss": 0.8032, "step": 21540 }, { "epoch": 0.26257418985289993, "grad_norm": 2.7794180525484142, "learning_rate": 3.881270044900577e-06, "loss": 0.8569, "step": 21545 }, { "epoch": 0.26263512607704775, "grad_norm": 2.6789709641163992, "learning_rate": 3.880949326491341e-06, "loss": 0.8215, "step": 21550 }, { "epoch": 0.2626960623011956, "grad_norm": 2.474030104597557, "learning_rate": 3.880628608082104e-06, "loss": 0.8118, "step": 21555 }, { "epoch": 0.2627569985253434, "grad_norm": 2.177066066987592, "learning_rate": 3.880307889672868e-06, "loss": 0.8518, "step": 21560 }, { "epoch": 0.26281793474949117, "grad_norm": 3.13248519347151, "learning_rate": 3.879987171263631e-06, "loss": 0.8429, "step": 21565 }, { "epoch": 0.262878870973639, "grad_norm": 3.026010578301877, "learning_rate": 3.879666452854394e-06, "loss": 0.8732, "step": 21570 }, { "epoch": 0.2629398071977868, "grad_norm": 2.768556969355474, "learning_rate": 3.879345734445158e-06, "loss": 0.8032, "step": 21575 }, { "epoch": 0.2630007434219346, "grad_norm": 2.001530665475593, "learning_rate": 3.879025016035921e-06, "loss": 0.7917, "step": 21580 }, { "epoch": 0.2630616796460824, "grad_norm": 2.3764447865038036, "learning_rate": 3.8787042976266846e-06, "loss": 0.8391, "step": 21585 }, { "epoch": 0.2631226158702302, "grad_norm": 2.7344161559450346, "learning_rate": 3.878383579217448e-06, "loss": 0.7498, "step": 21590 }, { "epoch": 0.26318355209437805, "grad_norm": 2.5611000513308997, "learning_rate": 3.878062860808211e-06, "loss": 0.779, "step": 21595 }, { "epoch": 0.2632444883185258, "grad_norm": 2.6206798965613567, "learning_rate": 3.8777421423989745e-06, "loss": 0.8029, "step": 21600 }, { "epoch": 0.26330542454267364, "grad_norm": 3.544259808138337, "learning_rate": 3.8774214239897375e-06, "loss": 0.8316, "step": 21605 }, { "epoch": 0.26336636076682146, "grad_norm": 2.57060960412981, "learning_rate": 3.8771007055805005e-06, "loss": 0.7001, "step": 21610 }, { "epoch": 0.26342729699096923, "grad_norm": 2.660914312105452, "learning_rate": 3.876779987171264e-06, "loss": 0.8191, "step": 21615 }, { "epoch": 0.26348823321511705, "grad_norm": 2.377271857870618, "learning_rate": 3.876459268762027e-06, "loss": 0.8958, "step": 21620 }, { "epoch": 0.2635491694392649, "grad_norm": 2.0214031354540882, "learning_rate": 3.87613855035279e-06, "loss": 0.7351, "step": 21625 }, { "epoch": 0.2636101056634127, "grad_norm": 2.962906634264143, "learning_rate": 3.875817831943554e-06, "loss": 0.9012, "step": 21630 }, { "epoch": 0.26367104188756046, "grad_norm": 2.024872150065436, "learning_rate": 3.875497113534317e-06, "loss": 0.8433, "step": 21635 }, { "epoch": 0.2637319781117083, "grad_norm": 2.7033706076177184, "learning_rate": 3.87517639512508e-06, "loss": 0.779, "step": 21640 }, { "epoch": 0.2637929143358561, "grad_norm": 4.910902830183906, "learning_rate": 3.874855676715844e-06, "loss": 0.855, "step": 21645 }, { "epoch": 0.2638538505600039, "grad_norm": 2.179169478445868, "learning_rate": 3.874534958306607e-06, "loss": 0.7969, "step": 21650 }, { "epoch": 0.2639147867841517, "grad_norm": 2.174726114445365, "learning_rate": 3.87421423989737e-06, "loss": 0.7109, "step": 21655 }, { "epoch": 0.2639757230082995, "grad_norm": 2.5341640394971874, "learning_rate": 3.873893521488133e-06, "loss": 0.789, "step": 21660 }, { "epoch": 0.26403665923244735, "grad_norm": 2.4790284029029173, "learning_rate": 3.873572803078897e-06, "loss": 0.8469, "step": 21665 }, { "epoch": 0.2640975954565951, "grad_norm": 2.353009727384318, "learning_rate": 3.87325208466966e-06, "loss": 0.6979, "step": 21670 }, { "epoch": 0.26415853168074294, "grad_norm": 2.8445105183320796, "learning_rate": 3.872931366260423e-06, "loss": 0.8444, "step": 21675 }, { "epoch": 0.26421946790489076, "grad_norm": 2.4533297169920325, "learning_rate": 3.872610647851187e-06, "loss": 0.8041, "step": 21680 }, { "epoch": 0.2642804041290385, "grad_norm": 2.9456686651072768, "learning_rate": 3.87228992944195e-06, "loss": 0.7619, "step": 21685 }, { "epoch": 0.26434134035318635, "grad_norm": 2.4898227234636354, "learning_rate": 3.871969211032713e-06, "loss": 0.8104, "step": 21690 }, { "epoch": 0.26440227657733417, "grad_norm": 2.4136542030369244, "learning_rate": 3.871648492623477e-06, "loss": 0.791, "step": 21695 }, { "epoch": 0.264463212801482, "grad_norm": 2.012538572175343, "learning_rate": 3.87132777421424e-06, "loss": 0.7115, "step": 21700 }, { "epoch": 0.26452414902562976, "grad_norm": 2.4221160022053536, "learning_rate": 3.871007055805004e-06, "loss": 0.8469, "step": 21705 }, { "epoch": 0.2645850852497776, "grad_norm": 5.074272275964162, "learning_rate": 3.870686337395767e-06, "loss": 0.8319, "step": 21710 }, { "epoch": 0.2646460214739254, "grad_norm": 3.1099364854455436, "learning_rate": 3.87036561898653e-06, "loss": 0.827, "step": 21715 }, { "epoch": 0.2647069576980732, "grad_norm": 2.1081852415600637, "learning_rate": 3.870044900577294e-06, "loss": 0.6999, "step": 21720 }, { "epoch": 0.264767893922221, "grad_norm": 2.459222896925642, "learning_rate": 3.869724182168057e-06, "loss": 0.8808, "step": 21725 }, { "epoch": 0.2648288301463688, "grad_norm": 3.0091011551283766, "learning_rate": 3.8694034637588205e-06, "loss": 0.836, "step": 21730 }, { "epoch": 0.26488976637051664, "grad_norm": 2.3080407948797803, "learning_rate": 3.8690827453495835e-06, "loss": 0.7713, "step": 21735 }, { "epoch": 0.2649507025946644, "grad_norm": 2.5268733410292152, "learning_rate": 3.8687620269403466e-06, "loss": 0.8099, "step": 21740 }, { "epoch": 0.26501163881881223, "grad_norm": 2.9636873148902207, "learning_rate": 3.8684413085311104e-06, "loss": 0.8388, "step": 21745 }, { "epoch": 0.26507257504296006, "grad_norm": 2.4068068006481167, "learning_rate": 3.8681205901218734e-06, "loss": 0.8026, "step": 21750 }, { "epoch": 0.2651335112671078, "grad_norm": 2.609671676480033, "learning_rate": 3.8677998717126365e-06, "loss": 0.7705, "step": 21755 }, { "epoch": 0.26519444749125565, "grad_norm": 2.2220133080667535, "learning_rate": 3.8674791533034e-06, "loss": 0.7592, "step": 21760 }, { "epoch": 0.26525538371540347, "grad_norm": 3.737382767549086, "learning_rate": 3.867158434894163e-06, "loss": 0.7323, "step": 21765 }, { "epoch": 0.2653163199395513, "grad_norm": 2.2041557944323267, "learning_rate": 3.866837716484926e-06, "loss": 0.8063, "step": 21770 }, { "epoch": 0.26537725616369906, "grad_norm": 2.2330633097240282, "learning_rate": 3.86651699807569e-06, "loss": 0.7839, "step": 21775 }, { "epoch": 0.2654381923878469, "grad_norm": 2.3884430668255625, "learning_rate": 3.866196279666453e-06, "loss": 0.741, "step": 21780 }, { "epoch": 0.2654991286119947, "grad_norm": 2.688281176339731, "learning_rate": 3.865875561257216e-06, "loss": 0.8373, "step": 21785 }, { "epoch": 0.2655600648361425, "grad_norm": 2.4374931172307295, "learning_rate": 3.86555484284798e-06, "loss": 0.7883, "step": 21790 }, { "epoch": 0.2656210010602903, "grad_norm": 2.636701751175713, "learning_rate": 3.865234124438743e-06, "loss": 0.7638, "step": 21795 }, { "epoch": 0.2656819372844381, "grad_norm": 2.444277845233089, "learning_rate": 3.864913406029506e-06, "loss": 0.7613, "step": 21800 }, { "epoch": 0.26574287350858594, "grad_norm": 3.016728810485744, "learning_rate": 3.86459268762027e-06, "loss": 0.7397, "step": 21805 }, { "epoch": 0.2658038097327337, "grad_norm": 3.861689686354738, "learning_rate": 3.864271969211033e-06, "loss": 0.8233, "step": 21810 }, { "epoch": 0.26586474595688153, "grad_norm": 2.2181804893113495, "learning_rate": 3.863951250801796e-06, "loss": 0.8499, "step": 21815 }, { "epoch": 0.26592568218102935, "grad_norm": 2.280349665069425, "learning_rate": 3.86363053239256e-06, "loss": 0.8384, "step": 21820 }, { "epoch": 0.2659866184051771, "grad_norm": 3.3780803190305075, "learning_rate": 3.863309813983323e-06, "loss": 0.7395, "step": 21825 }, { "epoch": 0.26604755462932494, "grad_norm": 2.5200205328202236, "learning_rate": 3.862989095574086e-06, "loss": 0.8088, "step": 21830 }, { "epoch": 0.26610849085347277, "grad_norm": 2.7171274639698235, "learning_rate": 3.862668377164849e-06, "loss": 0.758, "step": 21835 }, { "epoch": 0.26616942707762054, "grad_norm": 2.8134369628740306, "learning_rate": 3.862347658755613e-06, "loss": 0.7981, "step": 21840 }, { "epoch": 0.26623036330176836, "grad_norm": 3.3943954953904485, "learning_rate": 3.862026940346376e-06, "loss": 0.816, "step": 21845 }, { "epoch": 0.2662912995259162, "grad_norm": 3.408517425224496, "learning_rate": 3.86170622193714e-06, "loss": 0.755, "step": 21850 }, { "epoch": 0.266352235750064, "grad_norm": 2.1684436156658697, "learning_rate": 3.861385503527903e-06, "loss": 0.7747, "step": 21855 }, { "epoch": 0.26641317197421177, "grad_norm": 2.4000268142116994, "learning_rate": 3.861064785118666e-06, "loss": 0.8053, "step": 21860 }, { "epoch": 0.2664741081983596, "grad_norm": 2.694269688217431, "learning_rate": 3.86074406670943e-06, "loss": 0.7777, "step": 21865 }, { "epoch": 0.2665350444225074, "grad_norm": 2.6341283677395606, "learning_rate": 3.860423348300193e-06, "loss": 0.7336, "step": 21870 }, { "epoch": 0.2665959806466552, "grad_norm": 2.366289736822697, "learning_rate": 3.8601026298909565e-06, "loss": 0.771, "step": 21875 }, { "epoch": 0.266656916870803, "grad_norm": 2.220847608550225, "learning_rate": 3.8597819114817195e-06, "loss": 0.8024, "step": 21880 }, { "epoch": 0.26671785309495083, "grad_norm": 2.1765854007175873, "learning_rate": 3.8594611930724825e-06, "loss": 0.672, "step": 21885 }, { "epoch": 0.26677878931909865, "grad_norm": 2.144313733430268, "learning_rate": 3.859140474663246e-06, "loss": 0.7673, "step": 21890 }, { "epoch": 0.2668397255432464, "grad_norm": 2.0685744039426943, "learning_rate": 3.858819756254009e-06, "loss": 0.7372, "step": 21895 }, { "epoch": 0.26690066176739424, "grad_norm": 2.873310351615447, "learning_rate": 3.858499037844773e-06, "loss": 0.7867, "step": 21900 }, { "epoch": 0.26696159799154207, "grad_norm": 2.304915824545241, "learning_rate": 3.858178319435536e-06, "loss": 0.8177, "step": 21905 }, { "epoch": 0.26702253421568983, "grad_norm": 3.3487596754529028, "learning_rate": 3.857857601026299e-06, "loss": 0.7709, "step": 21910 }, { "epoch": 0.26708347043983766, "grad_norm": 3.1685629054195372, "learning_rate": 3.857536882617062e-06, "loss": 0.766, "step": 21915 }, { "epoch": 0.2671444066639855, "grad_norm": 2.1470651882949428, "learning_rate": 3.857216164207826e-06, "loss": 0.8597, "step": 21920 }, { "epoch": 0.2672053428881333, "grad_norm": 2.511994873770223, "learning_rate": 3.856895445798589e-06, "loss": 0.8459, "step": 21925 }, { "epoch": 0.26726627911228107, "grad_norm": 2.5166281764234406, "learning_rate": 3.856574727389352e-06, "loss": 0.7603, "step": 21930 }, { "epoch": 0.2673272153364289, "grad_norm": 2.698859484720524, "learning_rate": 3.856254008980116e-06, "loss": 0.7908, "step": 21935 }, { "epoch": 0.2673881515605767, "grad_norm": 2.270927133797604, "learning_rate": 3.855933290570879e-06, "loss": 0.7527, "step": 21940 }, { "epoch": 0.2674490877847245, "grad_norm": 2.5954178201702733, "learning_rate": 3.855612572161642e-06, "loss": 0.7414, "step": 21945 }, { "epoch": 0.2675100240088723, "grad_norm": 2.543908362182132, "learning_rate": 3.855291853752406e-06, "loss": 0.7512, "step": 21950 }, { "epoch": 0.2675709602330201, "grad_norm": 2.5491965947315944, "learning_rate": 3.854971135343169e-06, "loss": 0.7821, "step": 21955 }, { "epoch": 0.26763189645716795, "grad_norm": 2.3797438885461064, "learning_rate": 3.854650416933932e-06, "loss": 0.8269, "step": 21960 }, { "epoch": 0.2676928326813157, "grad_norm": 2.9755699138906047, "learning_rate": 3.854329698524696e-06, "loss": 0.8308, "step": 21965 }, { "epoch": 0.26775376890546354, "grad_norm": 2.338805824574647, "learning_rate": 3.854008980115459e-06, "loss": 0.7999, "step": 21970 }, { "epoch": 0.26781470512961136, "grad_norm": 2.672269089422876, "learning_rate": 3.853688261706222e-06, "loss": 0.7911, "step": 21975 }, { "epoch": 0.26787564135375913, "grad_norm": 2.418445533219837, "learning_rate": 3.853367543296986e-06, "loss": 0.7581, "step": 21980 }, { "epoch": 0.26793657757790695, "grad_norm": 2.2739320996079266, "learning_rate": 3.853046824887749e-06, "loss": 0.7682, "step": 21985 }, { "epoch": 0.2679975138020548, "grad_norm": 2.571968315452112, "learning_rate": 3.852726106478512e-06, "loss": 0.8703, "step": 21990 }, { "epoch": 0.2680584500262026, "grad_norm": 2.3389300010266036, "learning_rate": 3.852405388069275e-06, "loss": 0.8634, "step": 21995 }, { "epoch": 0.26811938625035037, "grad_norm": 2.4597935110214313, "learning_rate": 3.852084669660039e-06, "loss": 0.8117, "step": 22000 }, { "epoch": 0.2681803224744982, "grad_norm": 2.552159065865116, "learning_rate": 3.851763951250802e-06, "loss": 0.8278, "step": 22005 }, { "epoch": 0.268241258698646, "grad_norm": 2.8183988753254474, "learning_rate": 3.8514432328415656e-06, "loss": 0.7925, "step": 22010 }, { "epoch": 0.2683021949227938, "grad_norm": 2.5969548256373227, "learning_rate": 3.851122514432329e-06, "loss": 0.8276, "step": 22015 }, { "epoch": 0.2683631311469416, "grad_norm": 2.673386330637138, "learning_rate": 3.850801796023092e-06, "loss": 0.8763, "step": 22020 }, { "epoch": 0.2684240673710894, "grad_norm": 2.472164169758023, "learning_rate": 3.8504810776138555e-06, "loss": 0.7568, "step": 22025 }, { "epoch": 0.26848500359523725, "grad_norm": 3.3651479131763034, "learning_rate": 3.8501603592046185e-06, "loss": 0.9165, "step": 22030 }, { "epoch": 0.268545939819385, "grad_norm": 2.4157083170659885, "learning_rate": 3.849839640795382e-06, "loss": 0.7487, "step": 22035 }, { "epoch": 0.26860687604353284, "grad_norm": 2.647376380859605, "learning_rate": 3.849518922386145e-06, "loss": 0.7689, "step": 22040 }, { "epoch": 0.26866781226768066, "grad_norm": 2.4302646679699915, "learning_rate": 3.849198203976909e-06, "loss": 0.8396, "step": 22045 }, { "epoch": 0.26872874849182843, "grad_norm": 2.6464275688148557, "learning_rate": 3.848877485567672e-06, "loss": 0.8186, "step": 22050 }, { "epoch": 0.26878968471597625, "grad_norm": 2.157189426818874, "learning_rate": 3.848556767158435e-06, "loss": 0.8063, "step": 22055 }, { "epoch": 0.2688506209401241, "grad_norm": 2.66142217894457, "learning_rate": 3.848236048749199e-06, "loss": 0.8176, "step": 22060 }, { "epoch": 0.2689115571642719, "grad_norm": 3.1693652310498206, "learning_rate": 3.847915330339962e-06, "loss": 0.8211, "step": 22065 }, { "epoch": 0.26897249338841966, "grad_norm": 2.8617778194877497, "learning_rate": 3.847594611930725e-06, "loss": 0.7964, "step": 22070 }, { "epoch": 0.2690334296125675, "grad_norm": 2.326828811868148, "learning_rate": 3.847273893521489e-06, "loss": 0.895, "step": 22075 }, { "epoch": 0.2690943658367153, "grad_norm": 2.6845632953803507, "learning_rate": 3.846953175112252e-06, "loss": 0.7412, "step": 22080 }, { "epoch": 0.2691553020608631, "grad_norm": 2.3865421321861433, "learning_rate": 3.846632456703015e-06, "loss": 0.8045, "step": 22085 }, { "epoch": 0.2692162382850109, "grad_norm": 2.50181945742999, "learning_rate": 3.846311738293778e-06, "loss": 0.8344, "step": 22090 }, { "epoch": 0.2692771745091587, "grad_norm": 2.667095901847292, "learning_rate": 3.845991019884542e-06, "loss": 0.8689, "step": 22095 }, { "epoch": 0.26933811073330655, "grad_norm": 2.348372717058885, "learning_rate": 3.845670301475305e-06, "loss": 0.9249, "step": 22100 }, { "epoch": 0.2693990469574543, "grad_norm": 2.414120471807413, "learning_rate": 3.845349583066068e-06, "loss": 0.8134, "step": 22105 }, { "epoch": 0.26945998318160214, "grad_norm": 2.947495482096071, "learning_rate": 3.845028864656832e-06, "loss": 0.8461, "step": 22110 }, { "epoch": 0.26952091940574996, "grad_norm": 2.8200463163589795, "learning_rate": 3.844708146247595e-06, "loss": 0.8583, "step": 22115 }, { "epoch": 0.2695818556298977, "grad_norm": 2.021832138208013, "learning_rate": 3.844387427838358e-06, "loss": 0.834, "step": 22120 }, { "epoch": 0.26964279185404555, "grad_norm": 2.7198143316413312, "learning_rate": 3.844066709429122e-06, "loss": 0.7601, "step": 22125 }, { "epoch": 0.26970372807819337, "grad_norm": 2.6174132840195927, "learning_rate": 3.843745991019885e-06, "loss": 0.8133, "step": 22130 }, { "epoch": 0.2697646643023412, "grad_norm": 2.4692797153729624, "learning_rate": 3.843425272610648e-06, "loss": 0.793, "step": 22135 }, { "epoch": 0.26982560052648896, "grad_norm": 2.6080917366714194, "learning_rate": 3.843104554201412e-06, "loss": 0.7791, "step": 22140 }, { "epoch": 0.2698865367506368, "grad_norm": 2.5817757943854076, "learning_rate": 3.842783835792175e-06, "loss": 0.7631, "step": 22145 }, { "epoch": 0.2699474729747846, "grad_norm": 2.649469285087237, "learning_rate": 3.842463117382938e-06, "loss": 0.8075, "step": 22150 }, { "epoch": 0.2700084091989324, "grad_norm": 2.6251102955066647, "learning_rate": 3.8421423989737015e-06, "loss": 0.7836, "step": 22155 }, { "epoch": 0.2700693454230802, "grad_norm": 2.814679189683621, "learning_rate": 3.8418216805644645e-06, "loss": 0.7671, "step": 22160 }, { "epoch": 0.270130281647228, "grad_norm": 2.8341574038394226, "learning_rate": 3.8415009621552276e-06, "loss": 0.7629, "step": 22165 }, { "epoch": 0.27019121787137584, "grad_norm": 2.1383601793913236, "learning_rate": 3.841180243745991e-06, "loss": 0.8149, "step": 22170 }, { "epoch": 0.2702521540955236, "grad_norm": 2.4955396355526993, "learning_rate": 3.8408595253367544e-06, "loss": 0.8126, "step": 22175 }, { "epoch": 0.27031309031967143, "grad_norm": 3.3518596437713173, "learning_rate": 3.840538806927518e-06, "loss": 0.7853, "step": 22180 }, { "epoch": 0.27037402654381926, "grad_norm": 2.2765875683605072, "learning_rate": 3.840218088518281e-06, "loss": 0.7162, "step": 22185 }, { "epoch": 0.270434962767967, "grad_norm": 2.5436278468889277, "learning_rate": 3.839897370109044e-06, "loss": 0.8262, "step": 22190 }, { "epoch": 0.27049589899211485, "grad_norm": 2.081089357934547, "learning_rate": 3.839576651699808e-06, "loss": 0.742, "step": 22195 }, { "epoch": 0.27055683521626267, "grad_norm": 2.9424805391385833, "learning_rate": 3.839255933290571e-06, "loss": 0.827, "step": 22200 }, { "epoch": 0.2706177714404105, "grad_norm": 1.8964508779448979, "learning_rate": 3.838935214881335e-06, "loss": 0.7558, "step": 22205 }, { "epoch": 0.27067870766455826, "grad_norm": 2.4777518713559767, "learning_rate": 3.838614496472098e-06, "loss": 0.7687, "step": 22210 }, { "epoch": 0.2707396438887061, "grad_norm": 2.3275506341172347, "learning_rate": 3.838293778062861e-06, "loss": 0.7962, "step": 22215 }, { "epoch": 0.2708005801128539, "grad_norm": 2.7008637522486207, "learning_rate": 3.837973059653625e-06, "loss": 0.8373, "step": 22220 }, { "epoch": 0.2708615163370017, "grad_norm": 2.576109809801201, "learning_rate": 3.837652341244388e-06, "loss": 0.8011, "step": 22225 }, { "epoch": 0.2709224525611495, "grad_norm": 2.452872274163908, "learning_rate": 3.837331622835151e-06, "loss": 0.7818, "step": 22230 }, { "epoch": 0.2709833887852973, "grad_norm": 2.688005378433989, "learning_rate": 3.837010904425915e-06, "loss": 0.8401, "step": 22235 }, { "epoch": 0.27104432500944514, "grad_norm": 2.842843863608547, "learning_rate": 3.836690186016678e-06, "loss": 0.8782, "step": 22240 }, { "epoch": 0.2711052612335929, "grad_norm": 2.6019259303562694, "learning_rate": 3.836369467607441e-06, "loss": 0.8443, "step": 22245 }, { "epoch": 0.27116619745774073, "grad_norm": 3.3367391203908827, "learning_rate": 3.836048749198204e-06, "loss": 0.7681, "step": 22250 }, { "epoch": 0.27122713368188855, "grad_norm": 2.379922058029851, "learning_rate": 3.835728030788968e-06, "loss": 0.7577, "step": 22255 }, { "epoch": 0.2712880699060363, "grad_norm": 2.4905634179144305, "learning_rate": 3.835407312379731e-06, "loss": 0.8379, "step": 22260 }, { "epoch": 0.27134900613018414, "grad_norm": 2.107172407636229, "learning_rate": 3.835086593970494e-06, "loss": 0.8312, "step": 22265 }, { "epoch": 0.27140994235433197, "grad_norm": 2.1668327016222064, "learning_rate": 3.834765875561258e-06, "loss": 0.8431, "step": 22270 }, { "epoch": 0.2714708785784798, "grad_norm": 2.6862549039820363, "learning_rate": 3.834445157152021e-06, "loss": 0.6985, "step": 22275 }, { "epoch": 0.27153181480262756, "grad_norm": 1.9984441300511655, "learning_rate": 3.834124438742784e-06, "loss": 0.786, "step": 22280 }, { "epoch": 0.2715927510267754, "grad_norm": 2.093986246628672, "learning_rate": 3.833803720333548e-06, "loss": 0.7814, "step": 22285 }, { "epoch": 0.2716536872509232, "grad_norm": 2.348466888642965, "learning_rate": 3.833483001924311e-06, "loss": 0.7953, "step": 22290 }, { "epoch": 0.27171462347507097, "grad_norm": 3.0504084719640328, "learning_rate": 3.833162283515074e-06, "loss": 0.7476, "step": 22295 }, { "epoch": 0.2717755596992188, "grad_norm": 2.5271834902310752, "learning_rate": 3.8328415651058375e-06, "loss": 0.805, "step": 22300 }, { "epoch": 0.2718364959233666, "grad_norm": 2.722450078320124, "learning_rate": 3.8325208466966005e-06, "loss": 0.8094, "step": 22305 }, { "epoch": 0.2718974321475144, "grad_norm": 2.5390437777618087, "learning_rate": 3.8322001282873635e-06, "loss": 0.7954, "step": 22310 }, { "epoch": 0.2719583683716622, "grad_norm": 2.2539799389904283, "learning_rate": 3.831879409878127e-06, "loss": 0.8804, "step": 22315 }, { "epoch": 0.27201930459581003, "grad_norm": 2.4523256434581007, "learning_rate": 3.83155869146889e-06, "loss": 0.6872, "step": 22320 }, { "epoch": 0.27208024081995785, "grad_norm": 1.98844712204248, "learning_rate": 3.831237973059654e-06, "loss": 0.7545, "step": 22325 }, { "epoch": 0.2721411770441056, "grad_norm": 2.4198240880886286, "learning_rate": 3.830917254650417e-06, "loss": 0.7888, "step": 22330 }, { "epoch": 0.27220211326825344, "grad_norm": 3.381144936873507, "learning_rate": 3.83059653624118e-06, "loss": 0.8438, "step": 22335 }, { "epoch": 0.27226304949240127, "grad_norm": 2.356199097264592, "learning_rate": 3.830275817831944e-06, "loss": 0.8443, "step": 22340 }, { "epoch": 0.27232398571654903, "grad_norm": 2.6781471108333106, "learning_rate": 3.829955099422707e-06, "loss": 0.7295, "step": 22345 }, { "epoch": 0.27238492194069686, "grad_norm": 2.4486767323589267, "learning_rate": 3.829634381013471e-06, "loss": 0.8116, "step": 22350 }, { "epoch": 0.2724458581648447, "grad_norm": 2.4298948683842974, "learning_rate": 3.829313662604234e-06, "loss": 0.8617, "step": 22355 }, { "epoch": 0.2725067943889925, "grad_norm": 2.584664916208336, "learning_rate": 3.828992944194997e-06, "loss": 0.7573, "step": 22360 }, { "epoch": 0.27256773061314027, "grad_norm": 2.3195010641204417, "learning_rate": 3.828672225785761e-06, "loss": 0.6967, "step": 22365 }, { "epoch": 0.2726286668372881, "grad_norm": 2.6726331651985027, "learning_rate": 3.828351507376524e-06, "loss": 0.6962, "step": 22370 }, { "epoch": 0.2726896030614359, "grad_norm": 2.6163300762093016, "learning_rate": 3.828030788967287e-06, "loss": 0.7711, "step": 22375 }, { "epoch": 0.2727505392855837, "grad_norm": 2.2738797838205618, "learning_rate": 3.827710070558051e-06, "loss": 0.8012, "step": 22380 }, { "epoch": 0.2728114755097315, "grad_norm": 3.012888734791863, "learning_rate": 3.827389352148814e-06, "loss": 0.8649, "step": 22385 }, { "epoch": 0.2728724117338793, "grad_norm": 1.9557960032665276, "learning_rate": 3.827068633739577e-06, "loss": 0.7923, "step": 22390 }, { "epoch": 0.27293334795802715, "grad_norm": 2.0952790524031113, "learning_rate": 3.826747915330341e-06, "loss": 0.7276, "step": 22395 }, { "epoch": 0.2729942841821749, "grad_norm": 2.272906373828099, "learning_rate": 3.826427196921104e-06, "loss": 0.7582, "step": 22400 }, { "epoch": 0.27305522040632274, "grad_norm": 2.158268602835746, "learning_rate": 3.826106478511867e-06, "loss": 0.7908, "step": 22405 }, { "epoch": 0.27311615663047056, "grad_norm": 3.0736940715543786, "learning_rate": 3.825785760102631e-06, "loss": 0.8302, "step": 22410 }, { "epoch": 0.27317709285461833, "grad_norm": 4.683299803278103, "learning_rate": 3.825465041693394e-06, "loss": 0.8163, "step": 22415 }, { "epoch": 0.27323802907876615, "grad_norm": 2.8864251708838684, "learning_rate": 3.825144323284157e-06, "loss": 0.8202, "step": 22420 }, { "epoch": 0.273298965302914, "grad_norm": 2.319056944529791, "learning_rate": 3.82482360487492e-06, "loss": 0.832, "step": 22425 }, { "epoch": 0.2733599015270618, "grad_norm": 2.12112446075055, "learning_rate": 3.8245028864656835e-06, "loss": 0.7724, "step": 22430 }, { "epoch": 0.27342083775120957, "grad_norm": 2.2859621105634065, "learning_rate": 3.8241821680564466e-06, "loss": 0.7368, "step": 22435 }, { "epoch": 0.2734817739753574, "grad_norm": 2.2776921093629365, "learning_rate": 3.8238614496472096e-06, "loss": 0.7402, "step": 22440 }, { "epoch": 0.2735427101995052, "grad_norm": 3.8337880252444028, "learning_rate": 3.8235407312379734e-06, "loss": 0.8196, "step": 22445 }, { "epoch": 0.273603646423653, "grad_norm": 2.9282338301501403, "learning_rate": 3.8232200128287364e-06, "loss": 0.8451, "step": 22450 }, { "epoch": 0.2736645826478008, "grad_norm": 2.9400012809170017, "learning_rate": 3.8228992944194995e-06, "loss": 0.8344, "step": 22455 }, { "epoch": 0.2737255188719486, "grad_norm": 2.7667103327239846, "learning_rate": 3.822578576010263e-06, "loss": 0.9075, "step": 22460 }, { "epoch": 0.27378645509609645, "grad_norm": 2.5799771486945233, "learning_rate": 3.822257857601026e-06, "loss": 0.7787, "step": 22465 }, { "epoch": 0.2738473913202442, "grad_norm": 2.2256869589104618, "learning_rate": 3.821937139191789e-06, "loss": 0.7409, "step": 22470 }, { "epoch": 0.27390832754439204, "grad_norm": 2.5380735695794145, "learning_rate": 3.821616420782553e-06, "loss": 0.6907, "step": 22475 }, { "epoch": 0.27396926376853986, "grad_norm": 2.170792526405764, "learning_rate": 3.821295702373316e-06, "loss": 0.7778, "step": 22480 }, { "epoch": 0.27403019999268763, "grad_norm": 2.53171806686253, "learning_rate": 3.82097498396408e-06, "loss": 0.7868, "step": 22485 }, { "epoch": 0.27409113621683545, "grad_norm": 2.586760316245553, "learning_rate": 3.820654265554843e-06, "loss": 0.8249, "step": 22490 }, { "epoch": 0.2741520724409833, "grad_norm": 2.8792081916610797, "learning_rate": 3.820333547145606e-06, "loss": 0.8438, "step": 22495 }, { "epoch": 0.2742130086651311, "grad_norm": 2.5556361137387067, "learning_rate": 3.82001282873637e-06, "loss": 0.8009, "step": 22500 }, { "epoch": 0.27427394488927886, "grad_norm": 3.1905373861253277, "learning_rate": 3.819692110327133e-06, "loss": 0.7371, "step": 22505 }, { "epoch": 0.2743348811134267, "grad_norm": 2.103491291221736, "learning_rate": 3.819371391917897e-06, "loss": 0.7875, "step": 22510 }, { "epoch": 0.2743958173375745, "grad_norm": 2.1642088580015724, "learning_rate": 3.81905067350866e-06, "loss": 0.7176, "step": 22515 }, { "epoch": 0.2744567535617223, "grad_norm": 2.581013036149419, "learning_rate": 3.818729955099423e-06, "loss": 0.7398, "step": 22520 }, { "epoch": 0.2745176897858701, "grad_norm": 2.988006327763918, "learning_rate": 3.818409236690187e-06, "loss": 0.7463, "step": 22525 }, { "epoch": 0.2745786260100179, "grad_norm": 2.5516778669565485, "learning_rate": 3.81808851828095e-06, "loss": 0.7235, "step": 22530 }, { "epoch": 0.27463956223416575, "grad_norm": 2.038375283150194, "learning_rate": 3.817767799871713e-06, "loss": 0.7322, "step": 22535 }, { "epoch": 0.2747004984583135, "grad_norm": 2.1951497317827453, "learning_rate": 3.817447081462477e-06, "loss": 0.7083, "step": 22540 }, { "epoch": 0.27476143468246134, "grad_norm": 2.2941973718718476, "learning_rate": 3.81712636305324e-06, "loss": 0.7877, "step": 22545 }, { "epoch": 0.27482237090660916, "grad_norm": 2.5538712560877572, "learning_rate": 3.816805644644003e-06, "loss": 0.7801, "step": 22550 }, { "epoch": 0.2748833071307569, "grad_norm": 2.441634315594998, "learning_rate": 3.816484926234767e-06, "loss": 0.8853, "step": 22555 }, { "epoch": 0.27494424335490475, "grad_norm": 2.240688623062423, "learning_rate": 3.81616420782553e-06, "loss": 0.8036, "step": 22560 }, { "epoch": 0.27500517957905257, "grad_norm": 1.8832538356333903, "learning_rate": 3.815843489416293e-06, "loss": 0.7933, "step": 22565 }, { "epoch": 0.2750661158032004, "grad_norm": 4.172560745227434, "learning_rate": 3.8155227710070565e-06, "loss": 0.7868, "step": 22570 }, { "epoch": 0.27512705202734816, "grad_norm": 2.6626101626791914, "learning_rate": 3.8152020525978195e-06, "loss": 0.7656, "step": 22575 }, { "epoch": 0.275187988251496, "grad_norm": 2.9600376990131307, "learning_rate": 3.8148813341885825e-06, "loss": 0.7752, "step": 22580 }, { "epoch": 0.2752489244756438, "grad_norm": 4.958832118353078, "learning_rate": 3.814560615779346e-06, "loss": 0.819, "step": 22585 }, { "epoch": 0.2753098606997916, "grad_norm": 2.5225411515367266, "learning_rate": 3.8142398973701094e-06, "loss": 0.7058, "step": 22590 }, { "epoch": 0.2753707969239394, "grad_norm": 2.5020197219428533, "learning_rate": 3.8139191789608724e-06, "loss": 0.8196, "step": 22595 }, { "epoch": 0.2754317331480872, "grad_norm": 2.3250241041120825, "learning_rate": 3.813598460551636e-06, "loss": 0.8129, "step": 22600 }, { "epoch": 0.27549266937223504, "grad_norm": 2.576418608540503, "learning_rate": 3.8132777421423993e-06, "loss": 0.8013, "step": 22605 }, { "epoch": 0.2755536055963828, "grad_norm": 3.2437868656868285, "learning_rate": 3.8129570237331627e-06, "loss": 0.7924, "step": 22610 }, { "epoch": 0.27561454182053063, "grad_norm": 2.973218905888109, "learning_rate": 3.8126363053239257e-06, "loss": 0.9182, "step": 22615 }, { "epoch": 0.27567547804467846, "grad_norm": 2.4774550713687393, "learning_rate": 3.812315586914689e-06, "loss": 0.7642, "step": 22620 }, { "epoch": 0.2757364142688262, "grad_norm": 2.7803767801447, "learning_rate": 3.8119948685054526e-06, "loss": 0.8289, "step": 22625 }, { "epoch": 0.27579735049297405, "grad_norm": 2.579170811102859, "learning_rate": 3.8116741500962156e-06, "loss": 0.8222, "step": 22630 }, { "epoch": 0.27585828671712187, "grad_norm": 3.7479697806120407, "learning_rate": 3.8113534316869795e-06, "loss": 0.7483, "step": 22635 }, { "epoch": 0.2759192229412697, "grad_norm": 2.5607587201081037, "learning_rate": 3.8110327132777425e-06, "loss": 0.7638, "step": 22640 }, { "epoch": 0.27598015916541746, "grad_norm": 2.3425053166184333, "learning_rate": 3.8107119948685055e-06, "loss": 0.8434, "step": 22645 }, { "epoch": 0.2760410953895653, "grad_norm": 2.4776339453716276, "learning_rate": 3.8103912764592694e-06, "loss": 0.6983, "step": 22650 }, { "epoch": 0.2761020316137131, "grad_norm": 2.393652793762377, "learning_rate": 3.8100705580500324e-06, "loss": 0.8931, "step": 22655 }, { "epoch": 0.2761629678378609, "grad_norm": 3.6578286322873432, "learning_rate": 3.8097498396407954e-06, "loss": 0.823, "step": 22660 }, { "epoch": 0.2762239040620087, "grad_norm": 3.5900809883931393, "learning_rate": 3.8094291212315593e-06, "loss": 0.8029, "step": 22665 }, { "epoch": 0.2762848402861565, "grad_norm": 2.378965653036895, "learning_rate": 3.8091084028223223e-06, "loss": 0.7493, "step": 22670 }, { "epoch": 0.27634577651030434, "grad_norm": 2.3276246311771684, "learning_rate": 3.8087876844130858e-06, "loss": 0.8003, "step": 22675 }, { "epoch": 0.2764067127344521, "grad_norm": 2.5940083972149157, "learning_rate": 3.8084669660038488e-06, "loss": 0.7921, "step": 22680 }, { "epoch": 0.27646764895859993, "grad_norm": 2.6198246641243537, "learning_rate": 3.8081462475946122e-06, "loss": 0.7693, "step": 22685 }, { "epoch": 0.27652858518274775, "grad_norm": 2.37592639304492, "learning_rate": 3.8078255291853757e-06, "loss": 0.7258, "step": 22690 }, { "epoch": 0.2765895214068955, "grad_norm": 2.6913700816058506, "learning_rate": 3.8075048107761387e-06, "loss": 0.8311, "step": 22695 }, { "epoch": 0.27665045763104334, "grad_norm": 2.4057408104263285, "learning_rate": 3.8071840923669025e-06, "loss": 0.7512, "step": 22700 }, { "epoch": 0.27671139385519117, "grad_norm": 2.615296726955149, "learning_rate": 3.8068633739576656e-06, "loss": 0.8399, "step": 22705 }, { "epoch": 0.276772330079339, "grad_norm": 2.184744482208722, "learning_rate": 3.8065426555484286e-06, "loss": 0.7228, "step": 22710 }, { "epoch": 0.27683326630348676, "grad_norm": 4.366009445765789, "learning_rate": 3.8062219371391924e-06, "loss": 0.8233, "step": 22715 }, { "epoch": 0.2768942025276346, "grad_norm": 2.2396335475994307, "learning_rate": 3.8059012187299554e-06, "loss": 0.7258, "step": 22720 }, { "epoch": 0.2769551387517824, "grad_norm": 2.439205100335051, "learning_rate": 3.8055805003207185e-06, "loss": 0.8094, "step": 22725 }, { "epoch": 0.27701607497593017, "grad_norm": 2.354731202185913, "learning_rate": 3.8052597819114823e-06, "loss": 0.7308, "step": 22730 }, { "epoch": 0.277077011200078, "grad_norm": 2.5778261508195786, "learning_rate": 3.8049390635022453e-06, "loss": 0.7857, "step": 22735 }, { "epoch": 0.2771379474242258, "grad_norm": 2.886411459007944, "learning_rate": 3.8046183450930084e-06, "loss": 0.8173, "step": 22740 }, { "epoch": 0.27719888364837364, "grad_norm": 2.541031641329215, "learning_rate": 3.8042976266837722e-06, "loss": 0.78, "step": 22745 }, { "epoch": 0.2772598198725214, "grad_norm": 2.6319911360745305, "learning_rate": 3.8039769082745352e-06, "loss": 0.8111, "step": 22750 }, { "epoch": 0.27732075609666923, "grad_norm": 3.2448516087473394, "learning_rate": 3.8036561898652987e-06, "loss": 0.7559, "step": 22755 }, { "epoch": 0.27738169232081705, "grad_norm": 2.42618927185568, "learning_rate": 3.8033354714560617e-06, "loss": 0.8374, "step": 22760 }, { "epoch": 0.2774426285449648, "grad_norm": 2.6253349357142985, "learning_rate": 3.803014753046825e-06, "loss": 0.7974, "step": 22765 }, { "epoch": 0.27750356476911264, "grad_norm": 2.3516917738870124, "learning_rate": 3.8026940346375886e-06, "loss": 0.7868, "step": 22770 }, { "epoch": 0.27756450099326047, "grad_norm": 2.553401025624843, "learning_rate": 3.8023733162283516e-06, "loss": 0.84, "step": 22775 }, { "epoch": 0.27762543721740823, "grad_norm": 3.7805764300511773, "learning_rate": 3.8020525978191155e-06, "loss": 0.8006, "step": 22780 }, { "epoch": 0.27768637344155606, "grad_norm": 2.654476603251456, "learning_rate": 3.8017318794098785e-06, "loss": 0.8025, "step": 22785 }, { "epoch": 0.2777473096657039, "grad_norm": 2.045160419183974, "learning_rate": 3.8014111610006415e-06, "loss": 0.8008, "step": 22790 }, { "epoch": 0.2778082458898517, "grad_norm": 2.1599718690964544, "learning_rate": 3.8010904425914054e-06, "loss": 0.8163, "step": 22795 }, { "epoch": 0.27786918211399947, "grad_norm": 3.1366754571484297, "learning_rate": 3.8007697241821684e-06, "loss": 0.8065, "step": 22800 }, { "epoch": 0.2779301183381473, "grad_norm": 2.323987859445046, "learning_rate": 3.8004490057729314e-06, "loss": 0.7099, "step": 22805 }, { "epoch": 0.2779910545622951, "grad_norm": 2.959543998413689, "learning_rate": 3.8001282873636953e-06, "loss": 0.7577, "step": 22810 }, { "epoch": 0.2780519907864429, "grad_norm": 2.2640748916704303, "learning_rate": 3.7998075689544583e-06, "loss": 0.751, "step": 22815 }, { "epoch": 0.2781129270105907, "grad_norm": 2.7601458744865606, "learning_rate": 3.7994868505452213e-06, "loss": 0.7036, "step": 22820 }, { "epoch": 0.2781738632347385, "grad_norm": 2.8386243774425535, "learning_rate": 3.799166132135985e-06, "loss": 0.7745, "step": 22825 }, { "epoch": 0.27823479945888635, "grad_norm": 2.0834817977762663, "learning_rate": 3.798845413726748e-06, "loss": 0.7621, "step": 22830 }, { "epoch": 0.2782957356830341, "grad_norm": 2.2787375034874024, "learning_rate": 3.7985246953175116e-06, "loss": 0.7217, "step": 22835 }, { "epoch": 0.27835667190718194, "grad_norm": 2.8761461196959273, "learning_rate": 3.7982039769082746e-06, "loss": 0.7967, "step": 22840 }, { "epoch": 0.27841760813132976, "grad_norm": 2.539999548885889, "learning_rate": 3.797883258499038e-06, "loss": 0.8035, "step": 22845 }, { "epoch": 0.27847854435547753, "grad_norm": 2.3580151671671654, "learning_rate": 3.7975625400898015e-06, "loss": 0.7864, "step": 22850 }, { "epoch": 0.27853948057962535, "grad_norm": 1.9974966083776662, "learning_rate": 3.7972418216805645e-06, "loss": 0.9072, "step": 22855 }, { "epoch": 0.2786004168037732, "grad_norm": 2.268738149238152, "learning_rate": 3.7969211032713284e-06, "loss": 0.8101, "step": 22860 }, { "epoch": 0.278661353027921, "grad_norm": 2.6993486495906964, "learning_rate": 3.7966003848620914e-06, "loss": 0.782, "step": 22865 }, { "epoch": 0.27872228925206877, "grad_norm": 3.396206578130067, "learning_rate": 3.7962796664528544e-06, "loss": 0.8538, "step": 22870 }, { "epoch": 0.2787832254762166, "grad_norm": 2.1002043046078667, "learning_rate": 3.7959589480436183e-06, "loss": 0.7464, "step": 22875 }, { "epoch": 0.2788441617003644, "grad_norm": 2.53300515995119, "learning_rate": 3.7956382296343813e-06, "loss": 0.7736, "step": 22880 }, { "epoch": 0.2789050979245122, "grad_norm": 2.717833504491963, "learning_rate": 3.7953175112251443e-06, "loss": 0.7872, "step": 22885 }, { "epoch": 0.27896603414866, "grad_norm": 2.2678374788852, "learning_rate": 3.794996792815908e-06, "loss": 0.8149, "step": 22890 }, { "epoch": 0.2790269703728078, "grad_norm": 2.1690624954380318, "learning_rate": 3.794676074406671e-06, "loss": 0.7919, "step": 22895 }, { "epoch": 0.27908790659695565, "grad_norm": 2.3513154312067086, "learning_rate": 3.7943553559974346e-06, "loss": 0.8065, "step": 22900 }, { "epoch": 0.2791488428211034, "grad_norm": 2.383331846235066, "learning_rate": 3.794034637588198e-06, "loss": 0.7915, "step": 22905 }, { "epoch": 0.27920977904525124, "grad_norm": 2.508493001282834, "learning_rate": 3.793713919178961e-06, "loss": 0.7849, "step": 22910 }, { "epoch": 0.27927071526939906, "grad_norm": 2.824171625742389, "learning_rate": 3.7933932007697245e-06, "loss": 0.7673, "step": 22915 }, { "epoch": 0.27933165149354683, "grad_norm": 2.397639768591638, "learning_rate": 3.7930724823604876e-06, "loss": 0.8096, "step": 22920 }, { "epoch": 0.27939258771769465, "grad_norm": 2.336033475206515, "learning_rate": 3.7927517639512514e-06, "loss": 0.7871, "step": 22925 }, { "epoch": 0.2794535239418425, "grad_norm": 2.0474461514468723, "learning_rate": 3.7924310455420144e-06, "loss": 0.7916, "step": 22930 }, { "epoch": 0.2795144601659903, "grad_norm": 2.6839618335709114, "learning_rate": 3.7921103271327775e-06, "loss": 0.8597, "step": 22935 }, { "epoch": 0.27957539639013806, "grad_norm": 2.171676714715684, "learning_rate": 3.7917896087235413e-06, "loss": 0.771, "step": 22940 }, { "epoch": 0.2796363326142859, "grad_norm": 2.1284576434006603, "learning_rate": 3.7914688903143043e-06, "loss": 0.7533, "step": 22945 }, { "epoch": 0.2796972688384337, "grad_norm": 2.8084320921060795, "learning_rate": 3.7911481719050673e-06, "loss": 0.8846, "step": 22950 }, { "epoch": 0.2797582050625815, "grad_norm": 2.3536524611434597, "learning_rate": 3.7908274534958312e-06, "loss": 0.8011, "step": 22955 }, { "epoch": 0.2798191412867293, "grad_norm": 2.4705172006426044, "learning_rate": 3.7905067350865942e-06, "loss": 0.7639, "step": 22960 }, { "epoch": 0.2798800775108771, "grad_norm": 2.637439231136627, "learning_rate": 3.7901860166773572e-06, "loss": 0.7774, "step": 22965 }, { "epoch": 0.27994101373502495, "grad_norm": 2.3597157972433402, "learning_rate": 3.789865298268121e-06, "loss": 0.8185, "step": 22970 }, { "epoch": 0.2800019499591727, "grad_norm": 2.69223715739802, "learning_rate": 3.789544579858884e-06, "loss": 0.7906, "step": 22975 }, { "epoch": 0.28006288618332054, "grad_norm": 2.2880668403114997, "learning_rate": 3.7892238614496476e-06, "loss": 0.8084, "step": 22980 }, { "epoch": 0.28012382240746836, "grad_norm": 2.2263096343800286, "learning_rate": 3.788903143040411e-06, "loss": 0.8038, "step": 22985 }, { "epoch": 0.2801847586316161, "grad_norm": 2.3192770644720158, "learning_rate": 3.788582424631174e-06, "loss": 0.7528, "step": 22990 }, { "epoch": 0.28024569485576395, "grad_norm": 2.778456564145669, "learning_rate": 3.7882617062219375e-06, "loss": 0.7639, "step": 22995 }, { "epoch": 0.28030663107991177, "grad_norm": 2.8850275164725803, "learning_rate": 3.787940987812701e-06, "loss": 0.7896, "step": 23000 }, { "epoch": 0.2803675673040596, "grad_norm": 2.1688484035816344, "learning_rate": 3.7876202694034643e-06, "loss": 0.8416, "step": 23005 }, { "epoch": 0.28042850352820736, "grad_norm": 3.012624692587084, "learning_rate": 3.7872995509942274e-06, "loss": 0.7291, "step": 23010 }, { "epoch": 0.2804894397523552, "grad_norm": 2.766944576695647, "learning_rate": 3.7869788325849904e-06, "loss": 0.8003, "step": 23015 }, { "epoch": 0.280550375976503, "grad_norm": 2.501242137676716, "learning_rate": 3.7866581141757542e-06, "loss": 0.7536, "step": 23020 }, { "epoch": 0.2806113122006508, "grad_norm": 2.0434572768652086, "learning_rate": 3.7863373957665173e-06, "loss": 0.776, "step": 23025 }, { "epoch": 0.2806722484247986, "grad_norm": 2.4624809543092385, "learning_rate": 3.7860166773572803e-06, "loss": 0.8131, "step": 23030 }, { "epoch": 0.2807331846489464, "grad_norm": 2.0953517276423668, "learning_rate": 3.785695958948044e-06, "loss": 0.7534, "step": 23035 }, { "epoch": 0.28079412087309424, "grad_norm": 2.351163048597541, "learning_rate": 3.785375240538807e-06, "loss": 0.9115, "step": 23040 }, { "epoch": 0.280855057097242, "grad_norm": 1.9810045295640581, "learning_rate": 3.78505452212957e-06, "loss": 0.7612, "step": 23045 }, { "epoch": 0.28091599332138983, "grad_norm": 3.1281230854263424, "learning_rate": 3.784733803720334e-06, "loss": 0.7559, "step": 23050 }, { "epoch": 0.28097692954553766, "grad_norm": 3.841231506040563, "learning_rate": 3.784413085311097e-06, "loss": 0.8924, "step": 23055 }, { "epoch": 0.2810378657696854, "grad_norm": 2.483349209809272, "learning_rate": 3.7840923669018605e-06, "loss": 0.746, "step": 23060 }, { "epoch": 0.28109880199383325, "grad_norm": 2.202739932919492, "learning_rate": 3.783771648492624e-06, "loss": 0.7698, "step": 23065 }, { "epoch": 0.28115973821798107, "grad_norm": 2.715422537485399, "learning_rate": 3.783450930083387e-06, "loss": 0.7401, "step": 23070 }, { "epoch": 0.2812206744421289, "grad_norm": 2.379127577634563, "learning_rate": 3.7831302116741504e-06, "loss": 0.8193, "step": 23075 }, { "epoch": 0.28128161066627666, "grad_norm": 2.4348619627083052, "learning_rate": 3.782809493264914e-06, "loss": 0.7833, "step": 23080 }, { "epoch": 0.2813425468904245, "grad_norm": 2.753725237689858, "learning_rate": 3.7824887748556773e-06, "loss": 0.8269, "step": 23085 }, { "epoch": 0.2814034831145723, "grad_norm": 2.537289235411602, "learning_rate": 3.7821680564464403e-06, "loss": 0.8017, "step": 23090 }, { "epoch": 0.2814644193387201, "grad_norm": 2.4213835299983795, "learning_rate": 3.7818473380372033e-06, "loss": 0.7284, "step": 23095 }, { "epoch": 0.2815253555628679, "grad_norm": 2.161190369443365, "learning_rate": 3.781526619627967e-06, "loss": 0.8203, "step": 23100 }, { "epoch": 0.2815862917870157, "grad_norm": 2.297547849295905, "learning_rate": 3.78120590121873e-06, "loss": 0.8517, "step": 23105 }, { "epoch": 0.28164722801116354, "grad_norm": 2.6180585453930583, "learning_rate": 3.780885182809493e-06, "loss": 0.8077, "step": 23110 }, { "epoch": 0.2817081642353113, "grad_norm": 2.6370303053863284, "learning_rate": 3.780564464400257e-06, "loss": 0.7727, "step": 23115 }, { "epoch": 0.28176910045945913, "grad_norm": 2.600993406765947, "learning_rate": 3.78024374599102e-06, "loss": 0.7697, "step": 23120 }, { "epoch": 0.28183003668360695, "grad_norm": 2.7854147500655118, "learning_rate": 3.779923027581783e-06, "loss": 0.763, "step": 23125 }, { "epoch": 0.2818909729077547, "grad_norm": 2.6591732550485205, "learning_rate": 3.779602309172547e-06, "loss": 0.8111, "step": 23130 }, { "epoch": 0.28195190913190255, "grad_norm": 2.2317621952489315, "learning_rate": 3.77928159076331e-06, "loss": 0.8861, "step": 23135 }, { "epoch": 0.28201284535605037, "grad_norm": 2.3740665464560364, "learning_rate": 3.7789608723540734e-06, "loss": 0.8082, "step": 23140 }, { "epoch": 0.2820737815801982, "grad_norm": 2.794277337209476, "learning_rate": 3.778640153944837e-06, "loss": 0.868, "step": 23145 }, { "epoch": 0.28213471780434596, "grad_norm": 2.542482098930802, "learning_rate": 3.7783194355356003e-06, "loss": 0.7485, "step": 23150 }, { "epoch": 0.2821956540284938, "grad_norm": 2.5144519897917728, "learning_rate": 3.7779987171263633e-06, "loss": 0.7342, "step": 23155 }, { "epoch": 0.2822565902526416, "grad_norm": 3.1273111570459244, "learning_rate": 3.7776779987171268e-06, "loss": 0.7734, "step": 23160 }, { "epoch": 0.28231752647678937, "grad_norm": 2.231847470321779, "learning_rate": 3.77735728030789e-06, "loss": 0.8106, "step": 23165 }, { "epoch": 0.2823784627009372, "grad_norm": 2.7864033758714126, "learning_rate": 3.7770365618986532e-06, "loss": 0.7577, "step": 23170 }, { "epoch": 0.282439398925085, "grad_norm": 2.4149448170928514, "learning_rate": 3.7767158434894162e-06, "loss": 0.7849, "step": 23175 }, { "epoch": 0.28250033514923284, "grad_norm": 2.7764328004608245, "learning_rate": 3.77639512508018e-06, "loss": 0.7845, "step": 23180 }, { "epoch": 0.2825612713733806, "grad_norm": 2.3120255347773675, "learning_rate": 3.776074406670943e-06, "loss": 0.7861, "step": 23185 }, { "epoch": 0.28262220759752843, "grad_norm": 2.7751504947026433, "learning_rate": 3.775753688261706e-06, "loss": 0.7701, "step": 23190 }, { "epoch": 0.28268314382167625, "grad_norm": 2.2246118336807226, "learning_rate": 3.77543296985247e-06, "loss": 0.7407, "step": 23195 }, { "epoch": 0.282744080045824, "grad_norm": 2.5704331552364796, "learning_rate": 3.775112251443233e-06, "loss": 0.8416, "step": 23200 }, { "epoch": 0.28280501626997184, "grad_norm": 2.495674706745655, "learning_rate": 3.7747915330339965e-06, "loss": 0.7824, "step": 23205 }, { "epoch": 0.28286595249411967, "grad_norm": 2.716059154287301, "learning_rate": 3.77447081462476e-06, "loss": 0.8124, "step": 23210 }, { "epoch": 0.2829268887182675, "grad_norm": 2.6259334130682284, "learning_rate": 3.774150096215523e-06, "loss": 0.8206, "step": 23215 }, { "epoch": 0.28298782494241526, "grad_norm": 2.4663352370039604, "learning_rate": 3.7738293778062863e-06, "loss": 0.7467, "step": 23220 }, { "epoch": 0.2830487611665631, "grad_norm": 2.2661948188248178, "learning_rate": 3.77350865939705e-06, "loss": 0.7475, "step": 23225 }, { "epoch": 0.2831096973907109, "grad_norm": 3.190936102871296, "learning_rate": 3.7731879409878132e-06, "loss": 0.7704, "step": 23230 }, { "epoch": 0.28317063361485867, "grad_norm": 2.5856391515395423, "learning_rate": 3.7728672225785762e-06, "loss": 0.8684, "step": 23235 }, { "epoch": 0.2832315698390065, "grad_norm": 2.827506060487509, "learning_rate": 3.7725465041693397e-06, "loss": 0.7564, "step": 23240 }, { "epoch": 0.2832925060631543, "grad_norm": 2.5526219819044704, "learning_rate": 3.772225785760103e-06, "loss": 0.8238, "step": 23245 }, { "epoch": 0.2833534422873021, "grad_norm": 2.552370052044267, "learning_rate": 3.771905067350866e-06, "loss": 0.7915, "step": 23250 }, { "epoch": 0.2834143785114499, "grad_norm": 2.673190793109055, "learning_rate": 3.771584348941629e-06, "loss": 0.8214, "step": 23255 }, { "epoch": 0.2834753147355977, "grad_norm": 2.31925524708692, "learning_rate": 3.771263630532393e-06, "loss": 0.7965, "step": 23260 }, { "epoch": 0.28353625095974555, "grad_norm": 2.181099382224293, "learning_rate": 3.770942912123156e-06, "loss": 0.771, "step": 23265 }, { "epoch": 0.2835971871838933, "grad_norm": 2.858477268338768, "learning_rate": 3.770622193713919e-06, "loss": 0.8122, "step": 23270 }, { "epoch": 0.28365812340804114, "grad_norm": 2.486449486925934, "learning_rate": 3.770301475304683e-06, "loss": 0.8284, "step": 23275 }, { "epoch": 0.28371905963218896, "grad_norm": 2.4990110613106293, "learning_rate": 3.769980756895446e-06, "loss": 0.8183, "step": 23280 }, { "epoch": 0.28377999585633673, "grad_norm": 2.87979732317349, "learning_rate": 3.7696600384862094e-06, "loss": 0.8148, "step": 23285 }, { "epoch": 0.28384093208048455, "grad_norm": 3.6588236951916686, "learning_rate": 3.769339320076973e-06, "loss": 0.8315, "step": 23290 }, { "epoch": 0.2839018683046324, "grad_norm": 3.6899129524701504, "learning_rate": 3.769018601667736e-06, "loss": 0.843, "step": 23295 }, { "epoch": 0.2839628045287802, "grad_norm": 2.0939921340258634, "learning_rate": 3.7686978832584993e-06, "loss": 0.7392, "step": 23300 }, { "epoch": 0.28402374075292797, "grad_norm": 2.6911918452601484, "learning_rate": 3.7683771648492627e-06, "loss": 0.8288, "step": 23305 }, { "epoch": 0.2840846769770758, "grad_norm": 3.036806893487541, "learning_rate": 3.768056446440026e-06, "loss": 0.8014, "step": 23310 }, { "epoch": 0.2841456132012236, "grad_norm": 2.236388169433395, "learning_rate": 3.767735728030789e-06, "loss": 0.7198, "step": 23315 }, { "epoch": 0.2842065494253714, "grad_norm": 2.244351706896842, "learning_rate": 3.7674150096215526e-06, "loss": 0.7914, "step": 23320 }, { "epoch": 0.2842674856495192, "grad_norm": 2.120214544639751, "learning_rate": 3.767094291212316e-06, "loss": 0.8198, "step": 23325 }, { "epoch": 0.284328421873667, "grad_norm": 2.2358776934570117, "learning_rate": 3.766773572803079e-06, "loss": 0.7544, "step": 23330 }, { "epoch": 0.28438935809781485, "grad_norm": 3.1929607253013588, "learning_rate": 3.766452854393843e-06, "loss": 0.86, "step": 23335 }, { "epoch": 0.2844502943219626, "grad_norm": 2.7551617231329324, "learning_rate": 3.766132135984606e-06, "loss": 0.7526, "step": 23340 }, { "epoch": 0.28451123054611044, "grad_norm": 2.055887520932828, "learning_rate": 3.765811417575369e-06, "loss": 0.7884, "step": 23345 }, { "epoch": 0.28457216677025826, "grad_norm": 2.5312656966780986, "learning_rate": 3.765490699166132e-06, "loss": 0.792, "step": 23350 }, { "epoch": 0.28463310299440603, "grad_norm": 2.784909814984572, "learning_rate": 3.765169980756896e-06, "loss": 0.8812, "step": 23355 }, { "epoch": 0.28469403921855385, "grad_norm": 2.323172162374101, "learning_rate": 3.764849262347659e-06, "loss": 0.7554, "step": 23360 }, { "epoch": 0.2847549754427017, "grad_norm": 2.761396359820642, "learning_rate": 3.7645285439384223e-06, "loss": 0.7416, "step": 23365 }, { "epoch": 0.2848159116668495, "grad_norm": 2.4595167489066037, "learning_rate": 3.7642078255291857e-06, "loss": 0.8352, "step": 23370 }, { "epoch": 0.28487684789099726, "grad_norm": 2.8606667492393, "learning_rate": 3.763887107119949e-06, "loss": 0.8762, "step": 23375 }, { "epoch": 0.2849377841151451, "grad_norm": 3.003957971231454, "learning_rate": 3.763566388710712e-06, "loss": 0.8679, "step": 23380 }, { "epoch": 0.2849987203392929, "grad_norm": 2.501460993456085, "learning_rate": 3.7632456703014756e-06, "loss": 0.7717, "step": 23385 }, { "epoch": 0.2850596565634407, "grad_norm": 2.1964324490262714, "learning_rate": 3.762924951892239e-06, "loss": 0.7935, "step": 23390 }, { "epoch": 0.2851205927875885, "grad_norm": 2.367503229675815, "learning_rate": 3.762604233483002e-06, "loss": 0.8223, "step": 23395 }, { "epoch": 0.2851815290117363, "grad_norm": 5.402540943590538, "learning_rate": 3.762283515073766e-06, "loss": 0.825, "step": 23400 }, { "epoch": 0.28524246523588415, "grad_norm": 2.387943631757795, "learning_rate": 3.761962796664529e-06, "loss": 0.6923, "step": 23405 }, { "epoch": 0.2853034014600319, "grad_norm": 2.5317471216433804, "learning_rate": 3.761642078255292e-06, "loss": 0.7633, "step": 23410 }, { "epoch": 0.28536433768417974, "grad_norm": 2.3530137026180324, "learning_rate": 3.761321359846056e-06, "loss": 0.8245, "step": 23415 }, { "epoch": 0.28542527390832756, "grad_norm": 2.6276339691843247, "learning_rate": 3.761000641436819e-06, "loss": 0.7517, "step": 23420 }, { "epoch": 0.2854862101324753, "grad_norm": 2.314778187657208, "learning_rate": 3.760679923027582e-06, "loss": 0.7819, "step": 23425 }, { "epoch": 0.28554714635662315, "grad_norm": 2.1843638066692215, "learning_rate": 3.7603592046183453e-06, "loss": 0.7248, "step": 23430 }, { "epoch": 0.28560808258077097, "grad_norm": 2.4397126531138187, "learning_rate": 3.7600384862091088e-06, "loss": 0.8518, "step": 23435 }, { "epoch": 0.2856690188049188, "grad_norm": 2.0126666487105678, "learning_rate": 3.759717767799872e-06, "loss": 0.8543, "step": 23440 }, { "epoch": 0.28572995502906656, "grad_norm": 2.717912220824062, "learning_rate": 3.7593970493906352e-06, "loss": 0.8073, "step": 23445 }, { "epoch": 0.2857908912532144, "grad_norm": 2.260468770960298, "learning_rate": 3.7590763309813987e-06, "loss": 0.7427, "step": 23450 }, { "epoch": 0.2858518274773622, "grad_norm": 2.608187396530756, "learning_rate": 3.758755612572162e-06, "loss": 0.782, "step": 23455 }, { "epoch": 0.28591276370151, "grad_norm": 2.1951595236787043, "learning_rate": 3.758434894162925e-06, "loss": 0.8, "step": 23460 }, { "epoch": 0.2859736999256578, "grad_norm": 3.0865350010801524, "learning_rate": 3.7581141757536886e-06, "loss": 0.7517, "step": 23465 }, { "epoch": 0.2860346361498056, "grad_norm": 2.7062762143808645, "learning_rate": 3.757793457344452e-06, "loss": 0.7094, "step": 23470 }, { "epoch": 0.28609557237395344, "grad_norm": 2.2418256063491033, "learning_rate": 3.757472738935215e-06, "loss": 0.7277, "step": 23475 }, { "epoch": 0.2861565085981012, "grad_norm": 2.3904776571107496, "learning_rate": 3.757152020525979e-06, "loss": 0.7438, "step": 23480 }, { "epoch": 0.28621744482224903, "grad_norm": 2.0297298936018886, "learning_rate": 3.756831302116742e-06, "loss": 0.7003, "step": 23485 }, { "epoch": 0.28627838104639686, "grad_norm": 2.170502405822902, "learning_rate": 3.756510583707505e-06, "loss": 0.7234, "step": 23490 }, { "epoch": 0.2863393172705446, "grad_norm": 3.1231227252927933, "learning_rate": 3.756189865298269e-06, "loss": 0.7735, "step": 23495 }, { "epoch": 0.28640025349469245, "grad_norm": 2.5386264983137137, "learning_rate": 3.755869146889032e-06, "loss": 0.8018, "step": 23500 }, { "epoch": 0.28646118971884027, "grad_norm": 2.5525014461793303, "learning_rate": 3.755548428479795e-06, "loss": 0.7665, "step": 23505 }, { "epoch": 0.2865221259429881, "grad_norm": 2.953185536253654, "learning_rate": 3.7552277100705583e-06, "loss": 0.7649, "step": 23510 }, { "epoch": 0.28658306216713586, "grad_norm": 2.2890023687409102, "learning_rate": 3.7549069916613217e-06, "loss": 0.8955, "step": 23515 }, { "epoch": 0.2866439983912837, "grad_norm": 2.3628481971315263, "learning_rate": 3.7545862732520847e-06, "loss": 0.7839, "step": 23520 }, { "epoch": 0.2867049346154315, "grad_norm": 2.3256396051902013, "learning_rate": 3.754265554842848e-06, "loss": 0.7548, "step": 23525 }, { "epoch": 0.2867658708395793, "grad_norm": 3.274140585668758, "learning_rate": 3.7539448364336116e-06, "loss": 0.9194, "step": 23530 }, { "epoch": 0.2868268070637271, "grad_norm": 2.4894922277956266, "learning_rate": 3.753624118024375e-06, "loss": 0.7523, "step": 23535 }, { "epoch": 0.2868877432878749, "grad_norm": 2.6162993078389256, "learning_rate": 3.753303399615138e-06, "loss": 0.8137, "step": 23540 }, { "epoch": 0.28694867951202274, "grad_norm": 2.315380950418846, "learning_rate": 3.7529826812059015e-06, "loss": 0.8151, "step": 23545 }, { "epoch": 0.2870096157361705, "grad_norm": 2.800442079716511, "learning_rate": 3.752661962796665e-06, "loss": 0.8003, "step": 23550 }, { "epoch": 0.28707055196031833, "grad_norm": 2.0739793547905108, "learning_rate": 3.752341244387428e-06, "loss": 0.774, "step": 23555 }, { "epoch": 0.28713148818446615, "grad_norm": 2.3555075061618767, "learning_rate": 3.752020525978192e-06, "loss": 0.8237, "step": 23560 }, { "epoch": 0.2871924244086139, "grad_norm": 2.519763727015213, "learning_rate": 3.751699807568955e-06, "loss": 0.7333, "step": 23565 }, { "epoch": 0.28725336063276175, "grad_norm": 2.3052631818901217, "learning_rate": 3.751379089159718e-06, "loss": 0.8317, "step": 23570 }, { "epoch": 0.28731429685690957, "grad_norm": 1.9515240915857257, "learning_rate": 3.7510583707504817e-06, "loss": 0.7986, "step": 23575 }, { "epoch": 0.2873752330810574, "grad_norm": 2.822858472669397, "learning_rate": 3.7507376523412447e-06, "loss": 0.7677, "step": 23580 }, { "epoch": 0.28743616930520516, "grad_norm": 2.2269147966123692, "learning_rate": 3.7504169339320077e-06, "loss": 0.7681, "step": 23585 }, { "epoch": 0.287497105529353, "grad_norm": 2.731751717490605, "learning_rate": 3.7500962155227716e-06, "loss": 0.7901, "step": 23590 }, { "epoch": 0.2875580417535008, "grad_norm": 2.423218495490564, "learning_rate": 3.7497754971135346e-06, "loss": 0.7622, "step": 23595 }, { "epoch": 0.28761897797764857, "grad_norm": 3.139775920802617, "learning_rate": 3.749454778704298e-06, "loss": 0.7904, "step": 23600 }, { "epoch": 0.2876799142017964, "grad_norm": 2.6322161617383264, "learning_rate": 3.749134060295061e-06, "loss": 0.7781, "step": 23605 }, { "epoch": 0.2877408504259442, "grad_norm": 2.1951875001455896, "learning_rate": 3.7488133418858245e-06, "loss": 0.7869, "step": 23610 }, { "epoch": 0.28780178665009204, "grad_norm": 2.7648188504696987, "learning_rate": 3.748492623476588e-06, "loss": 0.7753, "step": 23615 }, { "epoch": 0.2878627228742398, "grad_norm": 2.903191810596473, "learning_rate": 3.748171905067351e-06, "loss": 0.7193, "step": 23620 }, { "epoch": 0.28792365909838763, "grad_norm": 3.4088741077619757, "learning_rate": 3.747851186658115e-06, "loss": 0.8087, "step": 23625 }, { "epoch": 0.28798459532253545, "grad_norm": 2.2417628847535704, "learning_rate": 3.747530468248878e-06, "loss": 0.8251, "step": 23630 }, { "epoch": 0.2880455315466832, "grad_norm": 2.3077875585130325, "learning_rate": 3.747209749839641e-06, "loss": 0.8025, "step": 23635 }, { "epoch": 0.28810646777083104, "grad_norm": 2.3894546300975423, "learning_rate": 3.7468890314304047e-06, "loss": 0.8073, "step": 23640 }, { "epoch": 0.28816740399497887, "grad_norm": 2.373297552358332, "learning_rate": 3.7465683130211678e-06, "loss": 0.8068, "step": 23645 }, { "epoch": 0.2882283402191267, "grad_norm": 1.8077506202489169, "learning_rate": 3.7462475946119308e-06, "loss": 0.6873, "step": 23650 }, { "epoch": 0.28828927644327446, "grad_norm": 2.638667451214959, "learning_rate": 3.7459268762026946e-06, "loss": 0.7464, "step": 23655 }, { "epoch": 0.2883502126674223, "grad_norm": 3.5495503694433377, "learning_rate": 3.7456061577934577e-06, "loss": 0.7739, "step": 23660 }, { "epoch": 0.2884111488915701, "grad_norm": 3.0562888894303835, "learning_rate": 3.7452854393842207e-06, "loss": 0.8214, "step": 23665 }, { "epoch": 0.28847208511571787, "grad_norm": 2.4506428900382553, "learning_rate": 3.7449647209749845e-06, "loss": 0.8115, "step": 23670 }, { "epoch": 0.2885330213398657, "grad_norm": 2.536811817065727, "learning_rate": 3.7446440025657476e-06, "loss": 0.8164, "step": 23675 }, { "epoch": 0.2885939575640135, "grad_norm": 3.4360801829627166, "learning_rate": 3.744323284156511e-06, "loss": 0.8078, "step": 23680 }, { "epoch": 0.28865489378816134, "grad_norm": 2.428829636951061, "learning_rate": 3.744002565747274e-06, "loss": 0.7865, "step": 23685 }, { "epoch": 0.2887158300123091, "grad_norm": 2.474007955743832, "learning_rate": 3.7436818473380375e-06, "loss": 0.7814, "step": 23690 }, { "epoch": 0.2887767662364569, "grad_norm": 3.1869346644333114, "learning_rate": 3.743361128928801e-06, "loss": 0.7674, "step": 23695 }, { "epoch": 0.28883770246060475, "grad_norm": 4.587668729332756, "learning_rate": 3.743040410519564e-06, "loss": 0.8508, "step": 23700 }, { "epoch": 0.2888986386847525, "grad_norm": 3.7832136449778253, "learning_rate": 3.7427196921103278e-06, "loss": 0.8384, "step": 23705 }, { "epoch": 0.28895957490890034, "grad_norm": 2.4001721547887556, "learning_rate": 3.742398973701091e-06, "loss": 0.8094, "step": 23710 }, { "epoch": 0.28902051113304816, "grad_norm": 2.7262405481388177, "learning_rate": 3.742078255291854e-06, "loss": 0.8227, "step": 23715 }, { "epoch": 0.289081447357196, "grad_norm": 2.3864567313869425, "learning_rate": 3.7417575368826177e-06, "loss": 0.8074, "step": 23720 }, { "epoch": 0.28914238358134375, "grad_norm": 2.7476616021295244, "learning_rate": 3.7414368184733807e-06, "loss": 0.7478, "step": 23725 }, { "epoch": 0.2892033198054916, "grad_norm": 2.3935318558075496, "learning_rate": 3.7411161000641437e-06, "loss": 0.7771, "step": 23730 }, { "epoch": 0.2892642560296394, "grad_norm": 2.9450417048638404, "learning_rate": 3.7407953816549076e-06, "loss": 0.784, "step": 23735 }, { "epoch": 0.28932519225378717, "grad_norm": 2.385890403827362, "learning_rate": 3.7404746632456706e-06, "loss": 0.7828, "step": 23740 }, { "epoch": 0.289386128477935, "grad_norm": 2.2932906268795463, "learning_rate": 3.7401539448364336e-06, "loss": 0.7837, "step": 23745 }, { "epoch": 0.2894470647020828, "grad_norm": 2.3450740157601584, "learning_rate": 3.7398332264271975e-06, "loss": 0.752, "step": 23750 }, { "epoch": 0.2895080009262306, "grad_norm": 4.1316975239978415, "learning_rate": 3.7395125080179605e-06, "loss": 0.7733, "step": 23755 }, { "epoch": 0.2895689371503784, "grad_norm": 3.9588917351975077, "learning_rate": 3.739191789608724e-06, "loss": 0.7124, "step": 23760 }, { "epoch": 0.2896298733745262, "grad_norm": 2.6137468336381544, "learning_rate": 3.738871071199487e-06, "loss": 0.7935, "step": 23765 }, { "epoch": 0.28969080959867405, "grad_norm": 2.318721093801305, "learning_rate": 3.7385503527902504e-06, "loss": 0.7713, "step": 23770 }, { "epoch": 0.2897517458228218, "grad_norm": 2.1358460810623323, "learning_rate": 3.738229634381014e-06, "loss": 0.8088, "step": 23775 }, { "epoch": 0.28981268204696964, "grad_norm": 2.345923253213723, "learning_rate": 3.737908915971777e-06, "loss": 0.7904, "step": 23780 }, { "epoch": 0.28987361827111746, "grad_norm": 2.6557015231556615, "learning_rate": 3.7375881975625407e-06, "loss": 0.8391, "step": 23785 }, { "epoch": 0.28993455449526523, "grad_norm": 2.7813643336866676, "learning_rate": 3.7372674791533037e-06, "loss": 0.812, "step": 23790 }, { "epoch": 0.28999549071941305, "grad_norm": 2.453789362477919, "learning_rate": 3.7369467607440667e-06, "loss": 0.7985, "step": 23795 }, { "epoch": 0.2900564269435609, "grad_norm": 2.188560474761597, "learning_rate": 3.7366260423348306e-06, "loss": 0.8034, "step": 23800 }, { "epoch": 0.2901173631677087, "grad_norm": 2.813123031121262, "learning_rate": 3.7363053239255936e-06, "loss": 0.7532, "step": 23805 }, { "epoch": 0.29017829939185646, "grad_norm": 2.6323487763531612, "learning_rate": 3.7359846055163566e-06, "loss": 0.8109, "step": 23810 }, { "epoch": 0.2902392356160043, "grad_norm": 2.9352728109137187, "learning_rate": 3.7356638871071205e-06, "loss": 0.8242, "step": 23815 }, { "epoch": 0.2903001718401521, "grad_norm": 3.2076619935817803, "learning_rate": 3.7353431686978835e-06, "loss": 0.786, "step": 23820 }, { "epoch": 0.2903611080642999, "grad_norm": 2.1346068406330847, "learning_rate": 3.7350224502886465e-06, "loss": 0.7942, "step": 23825 }, { "epoch": 0.2904220442884477, "grad_norm": 3.5307858307415296, "learning_rate": 3.7347017318794104e-06, "loss": 0.8305, "step": 23830 }, { "epoch": 0.2904829805125955, "grad_norm": 2.9720554229103953, "learning_rate": 3.7343810134701734e-06, "loss": 0.7909, "step": 23835 }, { "epoch": 0.29054391673674335, "grad_norm": 2.115170191295117, "learning_rate": 3.734060295060937e-06, "loss": 0.7465, "step": 23840 }, { "epoch": 0.2906048529608911, "grad_norm": 2.468288207816008, "learning_rate": 3.7337395766517e-06, "loss": 0.832, "step": 23845 }, { "epoch": 0.29066578918503894, "grad_norm": 2.8428124664240877, "learning_rate": 3.7334188582424637e-06, "loss": 0.7559, "step": 23850 }, { "epoch": 0.29072672540918676, "grad_norm": 3.319817525378792, "learning_rate": 3.7330981398332267e-06, "loss": 0.7965, "step": 23855 }, { "epoch": 0.2907876616333345, "grad_norm": 2.2845571372693856, "learning_rate": 3.7327774214239898e-06, "loss": 0.7895, "step": 23860 }, { "epoch": 0.29084859785748235, "grad_norm": 2.40253107629292, "learning_rate": 3.7324567030147536e-06, "loss": 0.7894, "step": 23865 }, { "epoch": 0.29090953408163017, "grad_norm": 2.509002205141692, "learning_rate": 3.7321359846055166e-06, "loss": 0.7721, "step": 23870 }, { "epoch": 0.290970470305778, "grad_norm": 5.382883179584198, "learning_rate": 3.7318152661962797e-06, "loss": 0.8303, "step": 23875 }, { "epoch": 0.29103140652992576, "grad_norm": 2.243741945567149, "learning_rate": 3.7314945477870435e-06, "loss": 0.7881, "step": 23880 }, { "epoch": 0.2910923427540736, "grad_norm": 2.443794804278568, "learning_rate": 3.7311738293778065e-06, "loss": 0.7985, "step": 23885 }, { "epoch": 0.2911532789782214, "grad_norm": 2.5129174031107686, "learning_rate": 3.7308531109685696e-06, "loss": 0.7683, "step": 23890 }, { "epoch": 0.2912142152023692, "grad_norm": 3.1439014412817636, "learning_rate": 3.7305323925593334e-06, "loss": 0.8525, "step": 23895 }, { "epoch": 0.291275151426517, "grad_norm": 2.464509885232806, "learning_rate": 3.7302116741500964e-06, "loss": 0.8117, "step": 23900 }, { "epoch": 0.2913360876506648, "grad_norm": 2.6645168909251957, "learning_rate": 3.72989095574086e-06, "loss": 0.793, "step": 23905 }, { "epoch": 0.29139702387481264, "grad_norm": 2.1634263437888364, "learning_rate": 3.7295702373316233e-06, "loss": 0.8313, "step": 23910 }, { "epoch": 0.2914579600989604, "grad_norm": 2.1941275712620305, "learning_rate": 3.7292495189223863e-06, "loss": 0.8111, "step": 23915 }, { "epoch": 0.29151889632310823, "grad_norm": 2.1850750741390144, "learning_rate": 3.7289288005131498e-06, "loss": 0.792, "step": 23920 }, { "epoch": 0.29157983254725606, "grad_norm": 2.32461984033202, "learning_rate": 3.7286080821039132e-06, "loss": 0.7938, "step": 23925 }, { "epoch": 0.2916407687714038, "grad_norm": 2.4938919227008003, "learning_rate": 3.7282873636946767e-06, "loss": 0.8715, "step": 23930 }, { "epoch": 0.29170170499555165, "grad_norm": 2.401895716184021, "learning_rate": 3.7279666452854397e-06, "loss": 0.8121, "step": 23935 }, { "epoch": 0.29176264121969947, "grad_norm": 2.3632670293121367, "learning_rate": 3.7276459268762027e-06, "loss": 0.7648, "step": 23940 }, { "epoch": 0.2918235774438473, "grad_norm": 3.4789140160598593, "learning_rate": 3.7273252084669666e-06, "loss": 0.8374, "step": 23945 }, { "epoch": 0.29188451366799506, "grad_norm": 2.5424758540772956, "learning_rate": 3.7270044900577296e-06, "loss": 0.8035, "step": 23950 }, { "epoch": 0.2919454498921429, "grad_norm": 2.682859020938417, "learning_rate": 3.7266837716484926e-06, "loss": 0.8233, "step": 23955 }, { "epoch": 0.2920063861162907, "grad_norm": 3.1067464049900533, "learning_rate": 3.7263630532392565e-06, "loss": 0.8278, "step": 23960 }, { "epoch": 0.2920673223404385, "grad_norm": 2.3447863224359984, "learning_rate": 3.7260423348300195e-06, "loss": 0.8373, "step": 23965 }, { "epoch": 0.2921282585645863, "grad_norm": 2.4380762733058017, "learning_rate": 3.7257216164207825e-06, "loss": 0.7766, "step": 23970 }, { "epoch": 0.2921891947887341, "grad_norm": 2.8985886219950947, "learning_rate": 3.7254008980115464e-06, "loss": 0.7865, "step": 23975 }, { "epoch": 0.29225013101288194, "grad_norm": 2.0865835152181247, "learning_rate": 3.7250801796023094e-06, "loss": 0.8265, "step": 23980 }, { "epoch": 0.2923110672370297, "grad_norm": 2.6062984728590384, "learning_rate": 3.724759461193073e-06, "loss": 0.8255, "step": 23985 }, { "epoch": 0.29237200346117753, "grad_norm": 2.629555667648172, "learning_rate": 3.7244387427838362e-06, "loss": 0.7709, "step": 23990 }, { "epoch": 0.29243293968532535, "grad_norm": 2.520168184539418, "learning_rate": 3.7241180243745993e-06, "loss": 0.8055, "step": 23995 }, { "epoch": 0.2924938759094731, "grad_norm": 2.645916667566697, "learning_rate": 3.7237973059653627e-06, "loss": 0.7418, "step": 24000 }, { "epoch": 0.29255481213362095, "grad_norm": 3.9258950004333886, "learning_rate": 3.723476587556126e-06, "loss": 0.7762, "step": 24005 }, { "epoch": 0.29261574835776877, "grad_norm": 2.026457693039898, "learning_rate": 3.7231558691468896e-06, "loss": 0.7099, "step": 24010 }, { "epoch": 0.2926766845819166, "grad_norm": 1.9518061963378117, "learning_rate": 3.7228351507376526e-06, "loss": 0.7765, "step": 24015 }, { "epoch": 0.29273762080606436, "grad_norm": 2.5933937360517767, "learning_rate": 3.7225144323284156e-06, "loss": 0.8134, "step": 24020 }, { "epoch": 0.2927985570302122, "grad_norm": 3.2682015987516384, "learning_rate": 3.7221937139191795e-06, "loss": 0.7669, "step": 24025 }, { "epoch": 0.29285949325436, "grad_norm": 3.1892286826459775, "learning_rate": 3.7218729955099425e-06, "loss": 0.8434, "step": 24030 }, { "epoch": 0.29292042947850777, "grad_norm": 2.969825081846552, "learning_rate": 3.7215522771007055e-06, "loss": 0.8201, "step": 24035 }, { "epoch": 0.2929813657026556, "grad_norm": 2.355317290161163, "learning_rate": 3.7212315586914694e-06, "loss": 0.7293, "step": 24040 }, { "epoch": 0.2930423019268034, "grad_norm": 2.5939018921007353, "learning_rate": 3.7209108402822324e-06, "loss": 0.7724, "step": 24045 }, { "epoch": 0.29310323815095124, "grad_norm": 2.7428284338175777, "learning_rate": 3.7205901218729954e-06, "loss": 0.7853, "step": 24050 }, { "epoch": 0.293164174375099, "grad_norm": 2.603339831250658, "learning_rate": 3.7202694034637593e-06, "loss": 0.7224, "step": 24055 }, { "epoch": 0.29322511059924683, "grad_norm": 2.4529540762586315, "learning_rate": 3.7199486850545223e-06, "loss": 0.7726, "step": 24060 }, { "epoch": 0.29328604682339465, "grad_norm": 2.661051860563343, "learning_rate": 3.7196279666452857e-06, "loss": 0.7895, "step": 24065 }, { "epoch": 0.2933469830475424, "grad_norm": 4.866019248355621, "learning_rate": 3.719307248236049e-06, "loss": 0.8016, "step": 24070 }, { "epoch": 0.29340791927169024, "grad_norm": 2.58656739201851, "learning_rate": 3.7189865298268126e-06, "loss": 0.8108, "step": 24075 }, { "epoch": 0.29346885549583807, "grad_norm": 2.3943123886777022, "learning_rate": 3.7186658114175756e-06, "loss": 0.8754, "step": 24080 }, { "epoch": 0.2935297917199859, "grad_norm": 3.4549463805673772, "learning_rate": 3.718345093008339e-06, "loss": 0.7063, "step": 24085 }, { "epoch": 0.29359072794413366, "grad_norm": 2.498994738200179, "learning_rate": 3.7180243745991025e-06, "loss": 0.7562, "step": 24090 }, { "epoch": 0.2936516641682815, "grad_norm": 2.3340043692665247, "learning_rate": 3.7177036561898655e-06, "loss": 0.8336, "step": 24095 }, { "epoch": 0.2937126003924293, "grad_norm": 3.5070603661847675, "learning_rate": 3.7173829377806285e-06, "loss": 0.823, "step": 24100 }, { "epoch": 0.29377353661657707, "grad_norm": 3.1057022010964848, "learning_rate": 3.7170622193713924e-06, "loss": 0.7581, "step": 24105 }, { "epoch": 0.2938344728407249, "grad_norm": 2.2961166295527304, "learning_rate": 3.7167415009621554e-06, "loss": 0.7505, "step": 24110 }, { "epoch": 0.2938954090648727, "grad_norm": 3.1899302729734575, "learning_rate": 3.7164207825529184e-06, "loss": 0.8237, "step": 24115 }, { "epoch": 0.29395634528902054, "grad_norm": 2.244241413024477, "learning_rate": 3.7161000641436823e-06, "loss": 0.8121, "step": 24120 }, { "epoch": 0.2940172815131683, "grad_norm": 2.4490476552785054, "learning_rate": 3.7157793457344453e-06, "loss": 0.8306, "step": 24125 }, { "epoch": 0.2940782177373161, "grad_norm": 2.2723071405105952, "learning_rate": 3.7154586273252088e-06, "loss": 0.7744, "step": 24130 }, { "epoch": 0.29413915396146395, "grad_norm": 2.5727145095767217, "learning_rate": 3.715137908915972e-06, "loss": 0.7491, "step": 24135 }, { "epoch": 0.2942000901856117, "grad_norm": 2.8506295588533304, "learning_rate": 3.7148171905067352e-06, "loss": 0.7643, "step": 24140 }, { "epoch": 0.29426102640975954, "grad_norm": 2.414835423989739, "learning_rate": 3.7144964720974987e-06, "loss": 0.7768, "step": 24145 }, { "epoch": 0.29432196263390736, "grad_norm": 2.89989008800219, "learning_rate": 3.714175753688262e-06, "loss": 0.7901, "step": 24150 }, { "epoch": 0.2943828988580552, "grad_norm": 3.0244757484369376, "learning_rate": 3.7138550352790255e-06, "loss": 0.8164, "step": 24155 }, { "epoch": 0.29444383508220295, "grad_norm": 2.84017092175221, "learning_rate": 3.7135343168697886e-06, "loss": 0.8357, "step": 24160 }, { "epoch": 0.2945047713063508, "grad_norm": 2.411877922496409, "learning_rate": 3.713213598460552e-06, "loss": 0.8631, "step": 24165 }, { "epoch": 0.2945657075304986, "grad_norm": 2.620274449122814, "learning_rate": 3.7128928800513154e-06, "loss": 0.8158, "step": 24170 }, { "epoch": 0.29462664375464637, "grad_norm": 2.283459439568247, "learning_rate": 3.7125721616420785e-06, "loss": 0.6987, "step": 24175 }, { "epoch": 0.2946875799787942, "grad_norm": 2.8720508553370094, "learning_rate": 3.7122514432328423e-06, "loss": 0.8996, "step": 24180 }, { "epoch": 0.294748516202942, "grad_norm": 2.5752355431508906, "learning_rate": 3.7119307248236053e-06, "loss": 0.7328, "step": 24185 }, { "epoch": 0.29480945242708984, "grad_norm": 2.330558587804573, "learning_rate": 3.7116100064143684e-06, "loss": 0.7786, "step": 24190 }, { "epoch": 0.2948703886512376, "grad_norm": 2.639999655768854, "learning_rate": 3.7112892880051314e-06, "loss": 0.9005, "step": 24195 }, { "epoch": 0.2949313248753854, "grad_norm": 2.6359962351534247, "learning_rate": 3.7109685695958952e-06, "loss": 0.8085, "step": 24200 }, { "epoch": 0.29499226109953325, "grad_norm": 2.6240829353750006, "learning_rate": 3.7106478511866583e-06, "loss": 0.7937, "step": 24205 }, { "epoch": 0.295053197323681, "grad_norm": 2.0108849957031047, "learning_rate": 3.7103271327774217e-06, "loss": 0.7248, "step": 24210 }, { "epoch": 0.29511413354782884, "grad_norm": 2.3156259411429514, "learning_rate": 3.710006414368185e-06, "loss": 0.7729, "step": 24215 }, { "epoch": 0.29517506977197666, "grad_norm": 2.363467463832556, "learning_rate": 3.709685695958948e-06, "loss": 0.8375, "step": 24220 }, { "epoch": 0.29523600599612443, "grad_norm": 2.0755483202895864, "learning_rate": 3.7093649775497116e-06, "loss": 0.7484, "step": 24225 }, { "epoch": 0.29529694222027225, "grad_norm": 2.363663845046638, "learning_rate": 3.709044259140475e-06, "loss": 0.7857, "step": 24230 }, { "epoch": 0.2953578784444201, "grad_norm": 2.464005638516505, "learning_rate": 3.7087235407312385e-06, "loss": 0.7427, "step": 24235 }, { "epoch": 0.2954188146685679, "grad_norm": 3.023987799561688, "learning_rate": 3.7084028223220015e-06, "loss": 0.7785, "step": 24240 }, { "epoch": 0.29547975089271566, "grad_norm": 2.517254021217381, "learning_rate": 3.708082103912765e-06, "loss": 0.8462, "step": 24245 }, { "epoch": 0.2955406871168635, "grad_norm": 3.8616980531290284, "learning_rate": 3.7077613855035284e-06, "loss": 0.7385, "step": 24250 }, { "epoch": 0.2956016233410113, "grad_norm": 3.396936365258448, "learning_rate": 3.7074406670942914e-06, "loss": 0.7431, "step": 24255 }, { "epoch": 0.2956625595651591, "grad_norm": 2.5162877777081785, "learning_rate": 3.7071199486850552e-06, "loss": 0.799, "step": 24260 }, { "epoch": 0.2957234957893069, "grad_norm": 2.3808789697264285, "learning_rate": 3.7067992302758183e-06, "loss": 0.7312, "step": 24265 }, { "epoch": 0.2957844320134547, "grad_norm": 2.3656902056826756, "learning_rate": 3.7064785118665813e-06, "loss": 0.8018, "step": 24270 }, { "epoch": 0.29584536823760255, "grad_norm": 4.275652563238264, "learning_rate": 3.7061577934573443e-06, "loss": 0.7273, "step": 24275 }, { "epoch": 0.2959063044617503, "grad_norm": 2.421182641594776, "learning_rate": 3.705837075048108e-06, "loss": 0.7986, "step": 24280 }, { "epoch": 0.29596724068589814, "grad_norm": 2.6293446319939644, "learning_rate": 3.705516356638871e-06, "loss": 0.7836, "step": 24285 }, { "epoch": 0.29602817691004596, "grad_norm": 2.8365214361012634, "learning_rate": 3.7051956382296346e-06, "loss": 0.7335, "step": 24290 }, { "epoch": 0.2960891131341937, "grad_norm": 2.6687443380311477, "learning_rate": 3.704874919820398e-06, "loss": 0.8017, "step": 24295 }, { "epoch": 0.29615004935834155, "grad_norm": 2.6409481616926387, "learning_rate": 3.704554201411161e-06, "loss": 0.8175, "step": 24300 }, { "epoch": 0.2962109855824894, "grad_norm": 1.9133130568771435, "learning_rate": 3.7042334830019245e-06, "loss": 0.8079, "step": 24305 }, { "epoch": 0.2962719218066372, "grad_norm": 2.4454351944818593, "learning_rate": 3.703912764592688e-06, "loss": 0.8435, "step": 24310 }, { "epoch": 0.29633285803078496, "grad_norm": 2.622387041753203, "learning_rate": 3.7035920461834514e-06, "loss": 0.7928, "step": 24315 }, { "epoch": 0.2963937942549328, "grad_norm": 2.6290955685581556, "learning_rate": 3.7032713277742144e-06, "loss": 0.7225, "step": 24320 }, { "epoch": 0.2964547304790806, "grad_norm": 2.7316364860138775, "learning_rate": 3.7029506093649783e-06, "loss": 0.8556, "step": 24325 }, { "epoch": 0.2965156667032284, "grad_norm": 2.1740995056544015, "learning_rate": 3.7026298909557413e-06, "loss": 0.8765, "step": 24330 }, { "epoch": 0.2965766029273762, "grad_norm": 2.4129017439083027, "learning_rate": 3.7023091725465043e-06, "loss": 0.7819, "step": 24335 }, { "epoch": 0.296637539151524, "grad_norm": 2.8122244357703887, "learning_rate": 3.701988454137268e-06, "loss": 0.838, "step": 24340 }, { "epoch": 0.29669847537567184, "grad_norm": 2.6175927361278406, "learning_rate": 3.701667735728031e-06, "loss": 0.8207, "step": 24345 }, { "epoch": 0.2967594115998196, "grad_norm": 2.741169277396062, "learning_rate": 3.701347017318794e-06, "loss": 0.7432, "step": 24350 }, { "epoch": 0.29682034782396743, "grad_norm": 2.483487292690692, "learning_rate": 3.7010262989095576e-06, "loss": 0.778, "step": 24355 }, { "epoch": 0.29688128404811526, "grad_norm": 2.406160638908815, "learning_rate": 3.700705580500321e-06, "loss": 0.7142, "step": 24360 }, { "epoch": 0.296942220272263, "grad_norm": 2.435497139790917, "learning_rate": 3.700384862091084e-06, "loss": 0.8041, "step": 24365 }, { "epoch": 0.29700315649641085, "grad_norm": 2.577950580033555, "learning_rate": 3.7000641436818475e-06, "loss": 0.8389, "step": 24370 }, { "epoch": 0.29706409272055867, "grad_norm": 2.31616281002819, "learning_rate": 3.699743425272611e-06, "loss": 0.7518, "step": 24375 }, { "epoch": 0.2971250289447065, "grad_norm": 2.31367854742868, "learning_rate": 3.6994227068633744e-06, "loss": 0.8183, "step": 24380 }, { "epoch": 0.29718596516885426, "grad_norm": 2.79597569770001, "learning_rate": 3.6991019884541374e-06, "loss": 0.7684, "step": 24385 }, { "epoch": 0.2972469013930021, "grad_norm": 2.4428744306406998, "learning_rate": 3.698781270044901e-06, "loss": 0.7564, "step": 24390 }, { "epoch": 0.2973078376171499, "grad_norm": 2.241112071706865, "learning_rate": 3.6984605516356643e-06, "loss": 0.791, "step": 24395 }, { "epoch": 0.2973687738412977, "grad_norm": 2.6645241666326975, "learning_rate": 3.6981398332264273e-06, "loss": 0.7717, "step": 24400 }, { "epoch": 0.2974297100654455, "grad_norm": 3.109196517430789, "learning_rate": 3.697819114817191e-06, "loss": 0.8265, "step": 24405 }, { "epoch": 0.2974906462895933, "grad_norm": 2.718301353380589, "learning_rate": 3.6974983964079542e-06, "loss": 0.7792, "step": 24410 }, { "epoch": 0.29755158251374114, "grad_norm": 2.8615972154725458, "learning_rate": 3.6971776779987172e-06, "loss": 0.8231, "step": 24415 }, { "epoch": 0.2976125187378889, "grad_norm": 2.323607973040742, "learning_rate": 3.696856959589481e-06, "loss": 0.707, "step": 24420 }, { "epoch": 0.29767345496203673, "grad_norm": 2.4853833021785188, "learning_rate": 3.696536241180244e-06, "loss": 0.7849, "step": 24425 }, { "epoch": 0.29773439118618455, "grad_norm": 3.048153240208894, "learning_rate": 3.696215522771007e-06, "loss": 0.8195, "step": 24430 }, { "epoch": 0.2977953274103323, "grad_norm": 2.628898533602989, "learning_rate": 3.6958948043617706e-06, "loss": 0.7663, "step": 24435 }, { "epoch": 0.29785626363448015, "grad_norm": 2.850612971221949, "learning_rate": 3.695574085952534e-06, "loss": 0.7558, "step": 24440 }, { "epoch": 0.29791719985862797, "grad_norm": 2.4463713118774852, "learning_rate": 3.695253367543297e-06, "loss": 0.7878, "step": 24445 }, { "epoch": 0.2979781360827758, "grad_norm": 2.8084170727789326, "learning_rate": 3.6949326491340605e-06, "loss": 0.7404, "step": 24450 }, { "epoch": 0.29803907230692356, "grad_norm": 2.292226223786321, "learning_rate": 3.694611930724824e-06, "loss": 0.755, "step": 24455 }, { "epoch": 0.2981000085310714, "grad_norm": 2.1877978276560173, "learning_rate": 3.6942912123155874e-06, "loss": 0.8335, "step": 24460 }, { "epoch": 0.2981609447552192, "grad_norm": 2.914600063304247, "learning_rate": 3.6939704939063504e-06, "loss": 0.8592, "step": 24465 }, { "epoch": 0.29822188097936697, "grad_norm": 2.273140145615525, "learning_rate": 3.693649775497114e-06, "loss": 0.7411, "step": 24470 }, { "epoch": 0.2982828172035148, "grad_norm": 2.411061633888538, "learning_rate": 3.6933290570878773e-06, "loss": 0.7611, "step": 24475 }, { "epoch": 0.2983437534276626, "grad_norm": 2.4578692855971798, "learning_rate": 3.6930083386786403e-06, "loss": 0.7587, "step": 24480 }, { "epoch": 0.29840468965181044, "grad_norm": 2.2264785700731204, "learning_rate": 3.692687620269404e-06, "loss": 0.7715, "step": 24485 }, { "epoch": 0.2984656258759582, "grad_norm": 2.578157446673014, "learning_rate": 3.692366901860167e-06, "loss": 0.7811, "step": 24490 }, { "epoch": 0.29852656210010603, "grad_norm": 2.4544279762806447, "learning_rate": 3.69204618345093e-06, "loss": 0.7306, "step": 24495 }, { "epoch": 0.29858749832425385, "grad_norm": 2.299974317141006, "learning_rate": 3.691725465041694e-06, "loss": 0.8117, "step": 24500 }, { "epoch": 0.2986484345484016, "grad_norm": 2.7902660837070026, "learning_rate": 3.691404746632457e-06, "loss": 0.8486, "step": 24505 }, { "epoch": 0.29870937077254944, "grad_norm": 2.402383307068838, "learning_rate": 3.69108402822322e-06, "loss": 0.7305, "step": 24510 }, { "epoch": 0.29877030699669727, "grad_norm": 2.548341524179812, "learning_rate": 3.690763309813984e-06, "loss": 0.8293, "step": 24515 }, { "epoch": 0.2988312432208451, "grad_norm": 2.3914904548406035, "learning_rate": 3.690442591404747e-06, "loss": 0.748, "step": 24520 }, { "epoch": 0.29889217944499286, "grad_norm": 2.40104649410167, "learning_rate": 3.69012187299551e-06, "loss": 0.8269, "step": 24525 }, { "epoch": 0.2989531156691407, "grad_norm": 2.2605864112788536, "learning_rate": 3.6898011545862734e-06, "loss": 0.8421, "step": 24530 }, { "epoch": 0.2990140518932885, "grad_norm": 3.940763530469769, "learning_rate": 3.689480436177037e-06, "loss": 0.8175, "step": 24535 }, { "epoch": 0.29907498811743627, "grad_norm": 2.1895805236690027, "learning_rate": 3.6891597177678003e-06, "loss": 0.7921, "step": 24540 }, { "epoch": 0.2991359243415841, "grad_norm": 2.363904164485273, "learning_rate": 3.6888389993585633e-06, "loss": 0.7419, "step": 24545 }, { "epoch": 0.2991968605657319, "grad_norm": 2.376718960173416, "learning_rate": 3.688518280949327e-06, "loss": 0.8571, "step": 24550 }, { "epoch": 0.29925779678987974, "grad_norm": 2.8730615721075186, "learning_rate": 3.68819756254009e-06, "loss": 0.7289, "step": 24555 }, { "epoch": 0.2993187330140275, "grad_norm": 2.731026640079381, "learning_rate": 3.687876844130853e-06, "loss": 0.71, "step": 24560 }, { "epoch": 0.2993796692381753, "grad_norm": 2.6965153203351475, "learning_rate": 3.687556125721617e-06, "loss": 0.8024, "step": 24565 }, { "epoch": 0.29944060546232315, "grad_norm": 2.7443300464495235, "learning_rate": 3.68723540731238e-06, "loss": 0.7932, "step": 24570 }, { "epoch": 0.2995015416864709, "grad_norm": 2.511035418798763, "learning_rate": 3.686914688903143e-06, "loss": 0.7185, "step": 24575 }, { "epoch": 0.29956247791061874, "grad_norm": 3.2990038505127854, "learning_rate": 3.686593970493907e-06, "loss": 0.8476, "step": 24580 }, { "epoch": 0.29962341413476656, "grad_norm": 2.4324504124016344, "learning_rate": 3.68627325208467e-06, "loss": 0.8364, "step": 24585 }, { "epoch": 0.2996843503589144, "grad_norm": 2.5822247874235287, "learning_rate": 3.685952533675433e-06, "loss": 0.7361, "step": 24590 }, { "epoch": 0.29974528658306215, "grad_norm": 3.0637177431411153, "learning_rate": 3.685631815266197e-06, "loss": 0.9165, "step": 24595 }, { "epoch": 0.29980622280721, "grad_norm": 2.4890517335701823, "learning_rate": 3.68531109685696e-06, "loss": 0.7787, "step": 24600 }, { "epoch": 0.2998671590313578, "grad_norm": 4.307764013489707, "learning_rate": 3.6849903784477233e-06, "loss": 0.8026, "step": 24605 }, { "epoch": 0.29992809525550557, "grad_norm": 3.0363052361893677, "learning_rate": 3.6846696600384863e-06, "loss": 0.8458, "step": 24610 }, { "epoch": 0.2999890314796534, "grad_norm": 2.722352372939126, "learning_rate": 3.6843489416292498e-06, "loss": 0.8042, "step": 24615 }, { "epoch": 0.3000499677038012, "grad_norm": 2.493426348611991, "learning_rate": 3.684028223220013e-06, "loss": 0.7857, "step": 24620 }, { "epoch": 0.30011090392794904, "grad_norm": 2.661902731300313, "learning_rate": 3.6837075048107762e-06, "loss": 0.784, "step": 24625 }, { "epoch": 0.3001718401520968, "grad_norm": 3.5520213199114337, "learning_rate": 3.68338678640154e-06, "loss": 0.864, "step": 24630 }, { "epoch": 0.3002327763762446, "grad_norm": 2.3756838251753654, "learning_rate": 3.683066067992303e-06, "loss": 0.8016, "step": 24635 }, { "epoch": 0.30029371260039245, "grad_norm": 2.7263274245093188, "learning_rate": 3.682745349583066e-06, "loss": 0.7707, "step": 24640 }, { "epoch": 0.3003546488245402, "grad_norm": 2.2959503631587523, "learning_rate": 3.68242463117383e-06, "loss": 0.7426, "step": 24645 }, { "epoch": 0.30041558504868804, "grad_norm": 2.9234166708590723, "learning_rate": 3.682103912764593e-06, "loss": 0.9004, "step": 24650 }, { "epoch": 0.30047652127283586, "grad_norm": 1.7001222379912027, "learning_rate": 3.681783194355356e-06, "loss": 0.7183, "step": 24655 }, { "epoch": 0.3005374574969837, "grad_norm": 2.6834931853871153, "learning_rate": 3.68146247594612e-06, "loss": 0.8559, "step": 24660 }, { "epoch": 0.30059839372113145, "grad_norm": 2.3273637563984595, "learning_rate": 3.681141757536883e-06, "loss": 0.8185, "step": 24665 }, { "epoch": 0.3006593299452793, "grad_norm": 2.8756273222035578, "learning_rate": 3.680821039127646e-06, "loss": 0.8116, "step": 24670 }, { "epoch": 0.3007202661694271, "grad_norm": 2.407209659308882, "learning_rate": 3.6805003207184098e-06, "loss": 0.6707, "step": 24675 }, { "epoch": 0.30078120239357486, "grad_norm": 2.1453281661505526, "learning_rate": 3.680179602309173e-06, "loss": 0.7729, "step": 24680 }, { "epoch": 0.3008421386177227, "grad_norm": 2.983411210498301, "learning_rate": 3.6798588838999362e-06, "loss": 0.8725, "step": 24685 }, { "epoch": 0.3009030748418705, "grad_norm": 2.1605644431930133, "learning_rate": 3.6795381654906993e-06, "loss": 0.7536, "step": 24690 }, { "epoch": 0.3009640110660183, "grad_norm": 2.0888243480428765, "learning_rate": 3.6792174470814627e-06, "loss": 0.8711, "step": 24695 }, { "epoch": 0.3010249472901661, "grad_norm": 2.6920594127609516, "learning_rate": 3.678896728672226e-06, "loss": 0.7387, "step": 24700 }, { "epoch": 0.3010858835143139, "grad_norm": 2.4103020637741763, "learning_rate": 3.678576010262989e-06, "loss": 0.7297, "step": 24705 }, { "epoch": 0.30114681973846175, "grad_norm": 2.6467445761458146, "learning_rate": 3.678255291853753e-06, "loss": 0.7521, "step": 24710 }, { "epoch": 0.3012077559626095, "grad_norm": 1.92348000934365, "learning_rate": 3.677934573444516e-06, "loss": 0.7578, "step": 24715 }, { "epoch": 0.30126869218675734, "grad_norm": 2.4325343314655044, "learning_rate": 3.677613855035279e-06, "loss": 0.7304, "step": 24720 }, { "epoch": 0.30132962841090516, "grad_norm": 2.5744426353703314, "learning_rate": 3.677293136626043e-06, "loss": 0.7892, "step": 24725 }, { "epoch": 0.3013905646350529, "grad_norm": 3.757420503522773, "learning_rate": 3.676972418216806e-06, "loss": 0.8009, "step": 24730 }, { "epoch": 0.30145150085920075, "grad_norm": 2.4371240224950705, "learning_rate": 3.676651699807569e-06, "loss": 0.9474, "step": 24735 }, { "epoch": 0.3015124370833486, "grad_norm": 2.506922099271524, "learning_rate": 3.676330981398333e-06, "loss": 0.7638, "step": 24740 }, { "epoch": 0.3015733733074964, "grad_norm": 2.621682992005862, "learning_rate": 3.676010262989096e-06, "loss": 0.8879, "step": 24745 }, { "epoch": 0.30163430953164416, "grad_norm": 3.2178507037127333, "learning_rate": 3.675689544579859e-06, "loss": 0.7565, "step": 24750 }, { "epoch": 0.301695245755792, "grad_norm": 3.902215027052942, "learning_rate": 3.6753688261706227e-06, "loss": 0.7875, "step": 24755 }, { "epoch": 0.3017561819799398, "grad_norm": 3.4316437398391284, "learning_rate": 3.6750481077613857e-06, "loss": 0.7818, "step": 24760 }, { "epoch": 0.3018171182040876, "grad_norm": 2.308306017181254, "learning_rate": 3.674727389352149e-06, "loss": 0.7175, "step": 24765 }, { "epoch": 0.3018780544282354, "grad_norm": 2.086483719318684, "learning_rate": 3.6744066709429126e-06, "loss": 0.7555, "step": 24770 }, { "epoch": 0.3019389906523832, "grad_norm": 2.460617415779816, "learning_rate": 3.674085952533676e-06, "loss": 0.7772, "step": 24775 }, { "epoch": 0.30199992687653104, "grad_norm": 2.3448790080984634, "learning_rate": 3.673765234124439e-06, "loss": 0.8495, "step": 24780 }, { "epoch": 0.3020608631006788, "grad_norm": 2.2263792242481775, "learning_rate": 3.673444515715202e-06, "loss": 0.7795, "step": 24785 }, { "epoch": 0.30212179932482663, "grad_norm": 2.3222162524858385, "learning_rate": 3.673123797305966e-06, "loss": 0.7758, "step": 24790 }, { "epoch": 0.30218273554897446, "grad_norm": 2.730580970146934, "learning_rate": 3.672803078896729e-06, "loss": 0.8419, "step": 24795 }, { "epoch": 0.3022436717731222, "grad_norm": 3.133580795036917, "learning_rate": 3.672482360487492e-06, "loss": 0.7902, "step": 24800 }, { "epoch": 0.30230460799727005, "grad_norm": 2.4965548348178923, "learning_rate": 3.672161642078256e-06, "loss": 0.8224, "step": 24805 }, { "epoch": 0.30236554422141787, "grad_norm": 2.014606431806101, "learning_rate": 3.671840923669019e-06, "loss": 0.6727, "step": 24810 }, { "epoch": 0.3024264804455657, "grad_norm": 2.3521859812461314, "learning_rate": 3.671520205259782e-06, "loss": 0.7644, "step": 24815 }, { "epoch": 0.30248741666971346, "grad_norm": 2.7612022601543034, "learning_rate": 3.6711994868505457e-06, "loss": 0.7967, "step": 24820 }, { "epoch": 0.3025483528938613, "grad_norm": 2.7453751547313803, "learning_rate": 3.6708787684413088e-06, "loss": 0.8102, "step": 24825 }, { "epoch": 0.3026092891180091, "grad_norm": 2.612242012770178, "learning_rate": 3.670558050032072e-06, "loss": 0.7684, "step": 24830 }, { "epoch": 0.3026702253421569, "grad_norm": 2.702195675039849, "learning_rate": 3.6702373316228356e-06, "loss": 0.8171, "step": 24835 }, { "epoch": 0.3027311615663047, "grad_norm": 2.345301280059041, "learning_rate": 3.6699166132135987e-06, "loss": 0.8391, "step": 24840 }, { "epoch": 0.3027920977904525, "grad_norm": 2.394772514132336, "learning_rate": 3.669595894804362e-06, "loss": 0.7766, "step": 24845 }, { "epoch": 0.30285303401460034, "grad_norm": 2.662611050231238, "learning_rate": 3.6692751763951255e-06, "loss": 0.8277, "step": 24850 }, { "epoch": 0.3029139702387481, "grad_norm": 2.464709175425767, "learning_rate": 3.668954457985889e-06, "loss": 0.6801, "step": 24855 }, { "epoch": 0.30297490646289593, "grad_norm": 2.5533867580742755, "learning_rate": 3.668633739576652e-06, "loss": 0.7525, "step": 24860 }, { "epoch": 0.30303584268704375, "grad_norm": 2.5748374828124145, "learning_rate": 3.668313021167415e-06, "loss": 0.7982, "step": 24865 }, { "epoch": 0.3030967789111915, "grad_norm": 2.7847307518176394, "learning_rate": 3.667992302758179e-06, "loss": 0.8024, "step": 24870 }, { "epoch": 0.30315771513533935, "grad_norm": 2.503206730170944, "learning_rate": 3.667671584348942e-06, "loss": 0.7802, "step": 24875 }, { "epoch": 0.30321865135948717, "grad_norm": 2.035738221044822, "learning_rate": 3.667350865939705e-06, "loss": 0.7508, "step": 24880 }, { "epoch": 0.303279587583635, "grad_norm": 2.111517875078113, "learning_rate": 3.6670301475304688e-06, "loss": 0.7706, "step": 24885 }, { "epoch": 0.30334052380778276, "grad_norm": 2.5272033546493304, "learning_rate": 3.6667094291212318e-06, "loss": 0.8056, "step": 24890 }, { "epoch": 0.3034014600319306, "grad_norm": 2.6983266888308504, "learning_rate": 3.666388710711995e-06, "loss": 0.7265, "step": 24895 }, { "epoch": 0.3034623962560784, "grad_norm": 2.389281677631572, "learning_rate": 3.6660679923027587e-06, "loss": 0.8046, "step": 24900 }, { "epoch": 0.30352333248022617, "grad_norm": 2.5129983884045637, "learning_rate": 3.6657472738935217e-06, "loss": 0.7886, "step": 24905 }, { "epoch": 0.303584268704374, "grad_norm": 2.731553625545085, "learning_rate": 3.665426555484285e-06, "loss": 0.8259, "step": 24910 }, { "epoch": 0.3036452049285218, "grad_norm": 2.8130014981911344, "learning_rate": 3.6651058370750486e-06, "loss": 0.8431, "step": 24915 }, { "epoch": 0.30370614115266964, "grad_norm": 2.760679555894389, "learning_rate": 3.6647851186658116e-06, "loss": 0.7646, "step": 24920 }, { "epoch": 0.3037670773768174, "grad_norm": 3.0505732567156243, "learning_rate": 3.664464400256575e-06, "loss": 0.7711, "step": 24925 }, { "epoch": 0.30382801360096523, "grad_norm": 2.4147235499013617, "learning_rate": 3.6641436818473385e-06, "loss": 0.7442, "step": 24930 }, { "epoch": 0.30388894982511305, "grad_norm": 2.5079370304088378, "learning_rate": 3.663822963438102e-06, "loss": 0.7343, "step": 24935 }, { "epoch": 0.3039498860492608, "grad_norm": 2.4597618148823175, "learning_rate": 3.663502245028865e-06, "loss": 0.851, "step": 24940 }, { "epoch": 0.30401082227340864, "grad_norm": 2.2820408306114794, "learning_rate": 3.663181526619628e-06, "loss": 0.8062, "step": 24945 }, { "epoch": 0.30407175849755647, "grad_norm": 2.5989353567028184, "learning_rate": 3.662860808210392e-06, "loss": 0.8226, "step": 24950 }, { "epoch": 0.3041326947217043, "grad_norm": 3.0954055313071733, "learning_rate": 3.662540089801155e-06, "loss": 0.7782, "step": 24955 }, { "epoch": 0.30419363094585206, "grad_norm": 2.349124092994581, "learning_rate": 3.662219371391918e-06, "loss": 0.7598, "step": 24960 }, { "epoch": 0.3042545671699999, "grad_norm": 2.5416935012682993, "learning_rate": 3.6618986529826817e-06, "loss": 0.7628, "step": 24965 }, { "epoch": 0.3043155033941477, "grad_norm": 2.368862490894289, "learning_rate": 3.6615779345734447e-06, "loss": 0.751, "step": 24970 }, { "epoch": 0.30437643961829547, "grad_norm": 2.2440862217927253, "learning_rate": 3.6612572161642077e-06, "loss": 0.7275, "step": 24975 }, { "epoch": 0.3044373758424433, "grad_norm": 2.8567079857150404, "learning_rate": 3.6609364977549716e-06, "loss": 0.7624, "step": 24980 }, { "epoch": 0.3044983120665911, "grad_norm": 2.3665966005124632, "learning_rate": 3.6606157793457346e-06, "loss": 0.8255, "step": 24985 }, { "epoch": 0.30455924829073894, "grad_norm": 2.7188882528162317, "learning_rate": 3.660295060936498e-06, "loss": 0.7865, "step": 24990 }, { "epoch": 0.3046201845148867, "grad_norm": 3.0625502199468, "learning_rate": 3.6599743425272615e-06, "loss": 0.7423, "step": 24995 }, { "epoch": 0.3046811207390345, "grad_norm": 2.4114970012888906, "learning_rate": 3.6596536241180245e-06, "loss": 0.7745, "step": 25000 }, { "epoch": 0.30474205696318235, "grad_norm": 2.5352071969048433, "learning_rate": 3.659332905708788e-06, "loss": 0.7878, "step": 25005 }, { "epoch": 0.3048029931873301, "grad_norm": 2.2958933804825503, "learning_rate": 3.6590121872995514e-06, "loss": 0.7785, "step": 25010 }, { "epoch": 0.30486392941147794, "grad_norm": 4.4187609298243045, "learning_rate": 3.658691468890315e-06, "loss": 0.7663, "step": 25015 }, { "epoch": 0.30492486563562576, "grad_norm": 2.1263633279864083, "learning_rate": 3.658370750481078e-06, "loss": 0.8436, "step": 25020 }, { "epoch": 0.3049858018597736, "grad_norm": 2.505638717679922, "learning_rate": 3.658050032071841e-06, "loss": 0.8, "step": 25025 }, { "epoch": 0.30504673808392135, "grad_norm": 2.385483467888805, "learning_rate": 3.6577293136626047e-06, "loss": 0.7878, "step": 25030 }, { "epoch": 0.3051076743080692, "grad_norm": 2.591981909141721, "learning_rate": 3.6574085952533677e-06, "loss": 0.754, "step": 25035 }, { "epoch": 0.305168610532217, "grad_norm": 2.6474978413895043, "learning_rate": 3.6570878768441308e-06, "loss": 0.8459, "step": 25040 }, { "epoch": 0.30522954675636477, "grad_norm": 2.2359580756731603, "learning_rate": 3.6567671584348946e-06, "loss": 0.8294, "step": 25045 }, { "epoch": 0.3052904829805126, "grad_norm": 2.107695620762974, "learning_rate": 3.6564464400256576e-06, "loss": 0.7641, "step": 25050 }, { "epoch": 0.3053514192046604, "grad_norm": 2.6374813144512164, "learning_rate": 3.656125721616421e-06, "loss": 0.7807, "step": 25055 }, { "epoch": 0.30541235542880824, "grad_norm": 2.6792261722071804, "learning_rate": 3.6558050032071845e-06, "loss": 0.7855, "step": 25060 }, { "epoch": 0.305473291652956, "grad_norm": 2.937137310534942, "learning_rate": 3.6554842847979475e-06, "loss": 0.8605, "step": 25065 }, { "epoch": 0.3055342278771038, "grad_norm": 2.59613255420631, "learning_rate": 3.655163566388711e-06, "loss": 0.7002, "step": 25070 }, { "epoch": 0.30559516410125165, "grad_norm": 2.4431166991537605, "learning_rate": 3.6548428479794744e-06, "loss": 0.777, "step": 25075 }, { "epoch": 0.3056561003253994, "grad_norm": 2.2857188470822436, "learning_rate": 3.654522129570238e-06, "loss": 0.8119, "step": 25080 }, { "epoch": 0.30571703654954724, "grad_norm": 2.772653739905403, "learning_rate": 3.654201411161001e-06, "loss": 0.7652, "step": 25085 }, { "epoch": 0.30577797277369506, "grad_norm": 2.8002386587314345, "learning_rate": 3.6538806927517643e-06, "loss": 0.8111, "step": 25090 }, { "epoch": 0.3058389089978429, "grad_norm": 2.3868000784347303, "learning_rate": 3.6535599743425278e-06, "loss": 0.7574, "step": 25095 }, { "epoch": 0.30589984522199065, "grad_norm": 1.8317703426253726, "learning_rate": 3.6532392559332908e-06, "loss": 0.7609, "step": 25100 }, { "epoch": 0.3059607814461385, "grad_norm": 2.949138660405429, "learning_rate": 3.6529185375240546e-06, "loss": 0.818, "step": 25105 }, { "epoch": 0.3060217176702863, "grad_norm": 3.6118483866120656, "learning_rate": 3.6525978191148177e-06, "loss": 0.798, "step": 25110 }, { "epoch": 0.30608265389443406, "grad_norm": 3.423821897336604, "learning_rate": 3.6522771007055807e-06, "loss": 0.7855, "step": 25115 }, { "epoch": 0.3061435901185819, "grad_norm": 2.591526685186101, "learning_rate": 3.6519563822963437e-06, "loss": 0.7733, "step": 25120 }, { "epoch": 0.3062045263427297, "grad_norm": 3.524949886407684, "learning_rate": 3.6516356638871075e-06, "loss": 0.8073, "step": 25125 }, { "epoch": 0.30626546256687753, "grad_norm": 2.531872676585572, "learning_rate": 3.6513149454778706e-06, "loss": 0.6988, "step": 25130 }, { "epoch": 0.3063263987910253, "grad_norm": 2.503837132815184, "learning_rate": 3.650994227068634e-06, "loss": 0.7756, "step": 25135 }, { "epoch": 0.3063873350151731, "grad_norm": 2.375142094971121, "learning_rate": 3.6506735086593974e-06, "loss": 0.7447, "step": 25140 }, { "epoch": 0.30644827123932095, "grad_norm": 2.2922633646781185, "learning_rate": 3.6503527902501605e-06, "loss": 0.8284, "step": 25145 }, { "epoch": 0.3065092074634687, "grad_norm": 2.2371599014027472, "learning_rate": 3.650032071840924e-06, "loss": 0.8118, "step": 25150 }, { "epoch": 0.30657014368761654, "grad_norm": 2.959074987317082, "learning_rate": 3.6497113534316873e-06, "loss": 0.7483, "step": 25155 }, { "epoch": 0.30663107991176436, "grad_norm": 2.689695174267813, "learning_rate": 3.6493906350224508e-06, "loss": 0.7662, "step": 25160 }, { "epoch": 0.3066920161359121, "grad_norm": 2.3150804097537487, "learning_rate": 3.649069916613214e-06, "loss": 0.767, "step": 25165 }, { "epoch": 0.30675295236005995, "grad_norm": 2.534149183248411, "learning_rate": 3.6487491982039772e-06, "loss": 0.7846, "step": 25170 }, { "epoch": 0.3068138885842078, "grad_norm": 2.0758453910922836, "learning_rate": 3.6484284797947407e-06, "loss": 0.7575, "step": 25175 }, { "epoch": 0.3068748248083556, "grad_norm": 2.687244204239704, "learning_rate": 3.6481077613855037e-06, "loss": 0.8312, "step": 25180 }, { "epoch": 0.30693576103250336, "grad_norm": 2.719424110485127, "learning_rate": 3.6477870429762676e-06, "loss": 0.875, "step": 25185 }, { "epoch": 0.3069966972566512, "grad_norm": 2.758458939371929, "learning_rate": 3.6474663245670306e-06, "loss": 0.7447, "step": 25190 }, { "epoch": 0.307057633480799, "grad_norm": 2.149603747708448, "learning_rate": 3.6471456061577936e-06, "loss": 0.7443, "step": 25195 }, { "epoch": 0.3071185697049468, "grad_norm": 2.5264612401503825, "learning_rate": 3.6468248877485566e-06, "loss": 0.7885, "step": 25200 }, { "epoch": 0.3071795059290946, "grad_norm": 2.000727555705217, "learning_rate": 3.6465041693393205e-06, "loss": 0.7683, "step": 25205 }, { "epoch": 0.3072404421532424, "grad_norm": 4.151998898812123, "learning_rate": 3.6461834509300835e-06, "loss": 0.8047, "step": 25210 }, { "epoch": 0.30730137837739024, "grad_norm": 2.5641695848447705, "learning_rate": 3.645862732520847e-06, "loss": 0.8041, "step": 25215 }, { "epoch": 0.307362314601538, "grad_norm": 2.568535551948746, "learning_rate": 3.6455420141116104e-06, "loss": 0.7824, "step": 25220 }, { "epoch": 0.30742325082568583, "grad_norm": 2.570325024228811, "learning_rate": 3.6452212957023734e-06, "loss": 0.7911, "step": 25225 }, { "epoch": 0.30748418704983366, "grad_norm": 2.482213197047683, "learning_rate": 3.644900577293137e-06, "loss": 0.8216, "step": 25230 }, { "epoch": 0.3075451232739814, "grad_norm": 2.4611803954392135, "learning_rate": 3.6445798588839003e-06, "loss": 0.8144, "step": 25235 }, { "epoch": 0.30760605949812925, "grad_norm": 2.4280077632738744, "learning_rate": 3.6442591404746637e-06, "loss": 0.8334, "step": 25240 }, { "epoch": 0.30766699572227707, "grad_norm": 2.5743538865524025, "learning_rate": 3.6439384220654267e-06, "loss": 0.8627, "step": 25245 }, { "epoch": 0.3077279319464249, "grad_norm": 2.4265378285659263, "learning_rate": 3.6436177036561906e-06, "loss": 0.8125, "step": 25250 }, { "epoch": 0.30778886817057266, "grad_norm": 2.304522385910861, "learning_rate": 3.6432969852469536e-06, "loss": 0.8161, "step": 25255 }, { "epoch": 0.3078498043947205, "grad_norm": 2.0713846772102773, "learning_rate": 3.6429762668377166e-06, "loss": 0.8223, "step": 25260 }, { "epoch": 0.3079107406188683, "grad_norm": 3.143946811561394, "learning_rate": 3.6426555484284805e-06, "loss": 0.7635, "step": 25265 }, { "epoch": 0.3079716768430161, "grad_norm": 2.079024301120297, "learning_rate": 3.6423348300192435e-06, "loss": 0.7848, "step": 25270 }, { "epoch": 0.3080326130671639, "grad_norm": 2.5763689403296115, "learning_rate": 3.6420141116100065e-06, "loss": 0.6641, "step": 25275 }, { "epoch": 0.3080935492913117, "grad_norm": 2.3232882716779955, "learning_rate": 3.64169339320077e-06, "loss": 0.658, "step": 25280 }, { "epoch": 0.30815448551545954, "grad_norm": 4.046837187198487, "learning_rate": 3.6413726747915334e-06, "loss": 0.7911, "step": 25285 }, { "epoch": 0.3082154217396073, "grad_norm": 2.640094509716398, "learning_rate": 3.6410519563822964e-06, "loss": 0.8048, "step": 25290 }, { "epoch": 0.30827635796375513, "grad_norm": 2.3210589478275754, "learning_rate": 3.64073123797306e-06, "loss": 0.8046, "step": 25295 }, { "epoch": 0.30833729418790295, "grad_norm": 2.8357144819450886, "learning_rate": 3.6404105195638233e-06, "loss": 0.7392, "step": 25300 }, { "epoch": 0.3083982304120507, "grad_norm": 2.580440838104411, "learning_rate": 3.6400898011545867e-06, "loss": 0.7835, "step": 25305 }, { "epoch": 0.30845916663619855, "grad_norm": 2.389617692128025, "learning_rate": 3.6397690827453498e-06, "loss": 0.8219, "step": 25310 }, { "epoch": 0.30852010286034637, "grad_norm": 2.1312067767825615, "learning_rate": 3.639448364336113e-06, "loss": 0.849, "step": 25315 }, { "epoch": 0.3085810390844942, "grad_norm": 2.947528126483718, "learning_rate": 3.6391276459268766e-06, "loss": 0.6858, "step": 25320 }, { "epoch": 0.30864197530864196, "grad_norm": 2.8987949678699723, "learning_rate": 3.6388069275176397e-06, "loss": 0.8613, "step": 25325 }, { "epoch": 0.3087029115327898, "grad_norm": 4.1887709061853915, "learning_rate": 3.6384862091084035e-06, "loss": 0.7923, "step": 25330 }, { "epoch": 0.3087638477569376, "grad_norm": 3.141240500091646, "learning_rate": 3.6381654906991665e-06, "loss": 0.8227, "step": 25335 }, { "epoch": 0.30882478398108537, "grad_norm": 2.4141283861238185, "learning_rate": 3.6378447722899296e-06, "loss": 0.8089, "step": 25340 }, { "epoch": 0.3088857202052332, "grad_norm": 2.7420111401127616, "learning_rate": 3.6375240538806934e-06, "loss": 0.8271, "step": 25345 }, { "epoch": 0.308946656429381, "grad_norm": 3.4808048406999847, "learning_rate": 3.6372033354714564e-06, "loss": 0.7797, "step": 25350 }, { "epoch": 0.30900759265352884, "grad_norm": 2.284427236942231, "learning_rate": 3.6368826170622195e-06, "loss": 0.7751, "step": 25355 }, { "epoch": 0.3090685288776766, "grad_norm": 2.5212828436275756, "learning_rate": 3.636561898652983e-06, "loss": 0.7797, "step": 25360 }, { "epoch": 0.30912946510182443, "grad_norm": 2.296632259479761, "learning_rate": 3.6362411802437463e-06, "loss": 0.7634, "step": 25365 }, { "epoch": 0.30919040132597225, "grad_norm": 2.425216906176298, "learning_rate": 3.6359204618345093e-06, "loss": 0.7753, "step": 25370 }, { "epoch": 0.30925133755012, "grad_norm": 2.858537719999363, "learning_rate": 3.6355997434252728e-06, "loss": 0.8823, "step": 25375 }, { "epoch": 0.30931227377426784, "grad_norm": 2.4038990804573825, "learning_rate": 3.6352790250160362e-06, "loss": 0.7569, "step": 25380 }, { "epoch": 0.30937320999841567, "grad_norm": 1.7634309055310067, "learning_rate": 3.6349583066067997e-06, "loss": 0.7733, "step": 25385 }, { "epoch": 0.3094341462225635, "grad_norm": 2.7448625385222805, "learning_rate": 3.6346375881975627e-06, "loss": 0.6957, "step": 25390 }, { "epoch": 0.30949508244671126, "grad_norm": 2.753873689112692, "learning_rate": 3.634316869788326e-06, "loss": 0.7648, "step": 25395 }, { "epoch": 0.3095560186708591, "grad_norm": 2.718156819761232, "learning_rate": 3.6339961513790896e-06, "loss": 0.7945, "step": 25400 }, { "epoch": 0.3096169548950069, "grad_norm": 2.383240459278622, "learning_rate": 3.6336754329698526e-06, "loss": 0.8062, "step": 25405 }, { "epoch": 0.30967789111915467, "grad_norm": 2.4874461190848742, "learning_rate": 3.6333547145606164e-06, "loss": 0.7958, "step": 25410 }, { "epoch": 0.3097388273433025, "grad_norm": 2.3942916099982687, "learning_rate": 3.6330339961513795e-06, "loss": 0.7853, "step": 25415 }, { "epoch": 0.3097997635674503, "grad_norm": 2.3633025276026594, "learning_rate": 3.6327132777421425e-06, "loss": 0.7397, "step": 25420 }, { "epoch": 0.30986069979159814, "grad_norm": 2.428598599121947, "learning_rate": 3.6323925593329063e-06, "loss": 0.8292, "step": 25425 }, { "epoch": 0.3099216360157459, "grad_norm": 2.3252972077291902, "learning_rate": 3.6320718409236694e-06, "loss": 0.8151, "step": 25430 }, { "epoch": 0.30998257223989373, "grad_norm": 2.1138686498951684, "learning_rate": 3.6317511225144324e-06, "loss": 0.7833, "step": 25435 }, { "epoch": 0.31004350846404155, "grad_norm": 2.4422550913230108, "learning_rate": 3.6314304041051962e-06, "loss": 0.8073, "step": 25440 }, { "epoch": 0.3101044446881893, "grad_norm": 3.2864528402573225, "learning_rate": 3.6311096856959593e-06, "loss": 0.7775, "step": 25445 }, { "epoch": 0.31016538091233714, "grad_norm": 2.2619231610575716, "learning_rate": 3.6307889672867223e-06, "loss": 0.7521, "step": 25450 }, { "epoch": 0.31022631713648496, "grad_norm": 2.949503035347107, "learning_rate": 3.6304682488774857e-06, "loss": 0.7926, "step": 25455 }, { "epoch": 0.3102872533606328, "grad_norm": 2.4485387302168253, "learning_rate": 3.630147530468249e-06, "loss": 0.8151, "step": 25460 }, { "epoch": 0.31034818958478055, "grad_norm": 2.5015185055628244, "learning_rate": 3.6298268120590126e-06, "loss": 0.7878, "step": 25465 }, { "epoch": 0.3104091258089284, "grad_norm": 2.3109783855611714, "learning_rate": 3.6295060936497756e-06, "loss": 0.7937, "step": 25470 }, { "epoch": 0.3104700620330762, "grad_norm": 2.468124256316179, "learning_rate": 3.6291853752405395e-06, "loss": 0.7398, "step": 25475 }, { "epoch": 0.31053099825722397, "grad_norm": 2.9030491572542503, "learning_rate": 3.6288646568313025e-06, "loss": 0.7728, "step": 25480 }, { "epoch": 0.3105919344813718, "grad_norm": 2.291674017205179, "learning_rate": 3.6285439384220655e-06, "loss": 0.8482, "step": 25485 }, { "epoch": 0.3106528707055196, "grad_norm": 3.0934013311473234, "learning_rate": 3.6282232200128294e-06, "loss": 0.7557, "step": 25490 }, { "epoch": 0.31071380692966744, "grad_norm": 2.194801709757153, "learning_rate": 3.6279025016035924e-06, "loss": 0.7968, "step": 25495 }, { "epoch": 0.3107747431538152, "grad_norm": 2.9219342256782204, "learning_rate": 3.6275817831943554e-06, "loss": 0.8216, "step": 25500 }, { "epoch": 0.310835679377963, "grad_norm": 2.631361295627925, "learning_rate": 3.6272610647851193e-06, "loss": 0.7921, "step": 25505 }, { "epoch": 0.31089661560211085, "grad_norm": 2.191215592614459, "learning_rate": 3.6269403463758823e-06, "loss": 0.7283, "step": 25510 }, { "epoch": 0.3109575518262586, "grad_norm": 2.6217863260559815, "learning_rate": 3.6266196279666453e-06, "loss": 0.7556, "step": 25515 }, { "epoch": 0.31101848805040644, "grad_norm": 2.5657129300561743, "learning_rate": 3.626298909557409e-06, "loss": 0.8358, "step": 25520 }, { "epoch": 0.31107942427455426, "grad_norm": 3.276789433709153, "learning_rate": 3.625978191148172e-06, "loss": 0.842, "step": 25525 }, { "epoch": 0.3111403604987021, "grad_norm": 2.65122944932397, "learning_rate": 3.6256574727389356e-06, "loss": 0.8024, "step": 25530 }, { "epoch": 0.31120129672284985, "grad_norm": 2.5434831020778206, "learning_rate": 3.6253367543296986e-06, "loss": 0.7772, "step": 25535 }, { "epoch": 0.3112622329469977, "grad_norm": 2.7589808640157805, "learning_rate": 3.625016035920462e-06, "loss": 0.7839, "step": 25540 }, { "epoch": 0.3113231691711455, "grad_norm": 2.5859586462061728, "learning_rate": 3.6246953175112255e-06, "loss": 0.7552, "step": 25545 }, { "epoch": 0.31138410539529326, "grad_norm": 2.2366260592751956, "learning_rate": 3.6243745991019885e-06, "loss": 0.7598, "step": 25550 }, { "epoch": 0.3114450416194411, "grad_norm": 2.37951092822846, "learning_rate": 3.6240538806927524e-06, "loss": 0.7604, "step": 25555 }, { "epoch": 0.3115059778435889, "grad_norm": 2.689108507237751, "learning_rate": 3.6237331622835154e-06, "loss": 0.8225, "step": 25560 }, { "epoch": 0.31156691406773673, "grad_norm": 3.090347094229633, "learning_rate": 3.6234124438742784e-06, "loss": 0.7571, "step": 25565 }, { "epoch": 0.3116278502918845, "grad_norm": 2.605972281922873, "learning_rate": 3.6230917254650423e-06, "loss": 0.827, "step": 25570 }, { "epoch": 0.3116887865160323, "grad_norm": 2.1075053166207875, "learning_rate": 3.6227710070558053e-06, "loss": 0.7816, "step": 25575 }, { "epoch": 0.31174972274018015, "grad_norm": 2.3871030638936435, "learning_rate": 3.6224502886465683e-06, "loss": 0.7354, "step": 25580 }, { "epoch": 0.3118106589643279, "grad_norm": 2.7448836633770313, "learning_rate": 3.622129570237332e-06, "loss": 0.8155, "step": 25585 }, { "epoch": 0.31187159518847574, "grad_norm": 2.523061504030166, "learning_rate": 3.6218088518280952e-06, "loss": 0.7695, "step": 25590 }, { "epoch": 0.31193253141262356, "grad_norm": 2.878917579232786, "learning_rate": 3.6214881334188582e-06, "loss": 0.8436, "step": 25595 }, { "epoch": 0.3119934676367714, "grad_norm": 1.8699535458122385, "learning_rate": 3.621167415009622e-06, "loss": 0.807, "step": 25600 }, { "epoch": 0.31205440386091915, "grad_norm": 2.723584355680768, "learning_rate": 3.620846696600385e-06, "loss": 0.8114, "step": 25605 }, { "epoch": 0.312115340085067, "grad_norm": 2.641968514594393, "learning_rate": 3.6205259781911486e-06, "loss": 0.7346, "step": 25610 }, { "epoch": 0.3121762763092148, "grad_norm": 2.4443398867309316, "learning_rate": 3.6202052597819116e-06, "loss": 0.8277, "step": 25615 }, { "epoch": 0.31223721253336256, "grad_norm": 2.3332166724711523, "learning_rate": 3.619884541372675e-06, "loss": 0.8193, "step": 25620 }, { "epoch": 0.3122981487575104, "grad_norm": 2.341655714315896, "learning_rate": 3.6195638229634384e-06, "loss": 0.7941, "step": 25625 }, { "epoch": 0.3123590849816582, "grad_norm": 2.9325018262526226, "learning_rate": 3.6192431045542015e-06, "loss": 0.7646, "step": 25630 }, { "epoch": 0.31242002120580603, "grad_norm": 2.533985437767123, "learning_rate": 3.6189223861449653e-06, "loss": 0.7641, "step": 25635 }, { "epoch": 0.3124809574299538, "grad_norm": 2.242308571708756, "learning_rate": 3.6186016677357283e-06, "loss": 0.8129, "step": 25640 }, { "epoch": 0.3125418936541016, "grad_norm": 2.5532677972014888, "learning_rate": 3.6182809493264914e-06, "loss": 0.8555, "step": 25645 }, { "epoch": 0.31260282987824944, "grad_norm": 2.366323638534609, "learning_rate": 3.6179602309172552e-06, "loss": 0.7992, "step": 25650 }, { "epoch": 0.3126637661023972, "grad_norm": 2.4970356698058804, "learning_rate": 3.6176395125080182e-06, "loss": 0.7699, "step": 25655 }, { "epoch": 0.31272470232654503, "grad_norm": 2.1075629543287024, "learning_rate": 3.6173187940987813e-06, "loss": 0.8814, "step": 25660 }, { "epoch": 0.31278563855069286, "grad_norm": 3.473779460427936, "learning_rate": 3.616998075689545e-06, "loss": 0.7968, "step": 25665 }, { "epoch": 0.3128465747748406, "grad_norm": 2.3368071879604426, "learning_rate": 3.616677357280308e-06, "loss": 0.7097, "step": 25670 }, { "epoch": 0.31290751099898845, "grad_norm": 2.580736152892714, "learning_rate": 3.616356638871071e-06, "loss": 0.8153, "step": 25675 }, { "epoch": 0.31296844722313627, "grad_norm": 2.595750756455247, "learning_rate": 3.616035920461835e-06, "loss": 0.7831, "step": 25680 }, { "epoch": 0.3130293834472841, "grad_norm": 3.242729521375483, "learning_rate": 3.615715202052598e-06, "loss": 0.9087, "step": 25685 }, { "epoch": 0.31309031967143186, "grad_norm": 2.1425503032810362, "learning_rate": 3.6153944836433615e-06, "loss": 0.7503, "step": 25690 }, { "epoch": 0.3131512558955797, "grad_norm": 2.1227576254148732, "learning_rate": 3.615073765234125e-06, "loss": 0.7357, "step": 25695 }, { "epoch": 0.3132121921197275, "grad_norm": 2.273997214914158, "learning_rate": 3.614753046824888e-06, "loss": 0.8304, "step": 25700 }, { "epoch": 0.3132731283438753, "grad_norm": 4.10679752994813, "learning_rate": 3.6144323284156514e-06, "loss": 0.7687, "step": 25705 }, { "epoch": 0.3133340645680231, "grad_norm": 2.4303851580339355, "learning_rate": 3.6141116100064144e-06, "loss": 0.8311, "step": 25710 }, { "epoch": 0.3133950007921709, "grad_norm": 2.5281902915028285, "learning_rate": 3.6137908915971783e-06, "loss": 0.8229, "step": 25715 }, { "epoch": 0.31345593701631874, "grad_norm": 3.6912756078899007, "learning_rate": 3.6134701731879413e-06, "loss": 0.7849, "step": 25720 }, { "epoch": 0.3135168732404665, "grad_norm": 2.4753901304856334, "learning_rate": 3.6131494547787043e-06, "loss": 0.721, "step": 25725 }, { "epoch": 0.31357780946461433, "grad_norm": 3.0682880564357005, "learning_rate": 3.612828736369468e-06, "loss": 0.708, "step": 25730 }, { "epoch": 0.31363874568876215, "grad_norm": 2.684101863560887, "learning_rate": 3.612508017960231e-06, "loss": 0.7494, "step": 25735 }, { "epoch": 0.3136996819129099, "grad_norm": 2.2326822659508467, "learning_rate": 3.612187299550994e-06, "loss": 0.7797, "step": 25740 }, { "epoch": 0.31376061813705775, "grad_norm": 2.4279475057794184, "learning_rate": 3.611866581141758e-06, "loss": 0.8213, "step": 25745 }, { "epoch": 0.31382155436120557, "grad_norm": 3.0545826625939045, "learning_rate": 3.611545862732521e-06, "loss": 0.8273, "step": 25750 }, { "epoch": 0.3138824905853534, "grad_norm": 2.1716488523935267, "learning_rate": 3.6112251443232845e-06, "loss": 0.7289, "step": 25755 }, { "epoch": 0.31394342680950116, "grad_norm": 2.2439023889623653, "learning_rate": 3.610904425914048e-06, "loss": 0.7941, "step": 25760 }, { "epoch": 0.314004363033649, "grad_norm": 2.471367076854103, "learning_rate": 3.610583707504811e-06, "loss": 0.7729, "step": 25765 }, { "epoch": 0.3140652992577968, "grad_norm": 2.0938020706916767, "learning_rate": 3.6102629890955744e-06, "loss": 0.7883, "step": 25770 }, { "epoch": 0.31412623548194457, "grad_norm": 2.5064779043805445, "learning_rate": 3.609942270686338e-06, "loss": 0.8263, "step": 25775 }, { "epoch": 0.3141871717060924, "grad_norm": 2.3290057081644844, "learning_rate": 3.6096215522771013e-06, "loss": 0.7938, "step": 25780 }, { "epoch": 0.3142481079302402, "grad_norm": 2.4025498326662453, "learning_rate": 3.6093008338678643e-06, "loss": 0.7948, "step": 25785 }, { "epoch": 0.31430904415438804, "grad_norm": 2.122523047632996, "learning_rate": 3.6089801154586273e-06, "loss": 0.748, "step": 25790 }, { "epoch": 0.3143699803785358, "grad_norm": 2.423898913653329, "learning_rate": 3.608659397049391e-06, "loss": 0.7332, "step": 25795 }, { "epoch": 0.31443091660268363, "grad_norm": 2.503628947947295, "learning_rate": 3.608338678640154e-06, "loss": 0.8123, "step": 25800 }, { "epoch": 0.31449185282683145, "grad_norm": 2.4850364000269742, "learning_rate": 3.6080179602309172e-06, "loss": 0.8189, "step": 25805 }, { "epoch": 0.3145527890509792, "grad_norm": 2.6159715723503254, "learning_rate": 3.607697241821681e-06, "loss": 0.7587, "step": 25810 }, { "epoch": 0.31461372527512704, "grad_norm": 2.1474013914254737, "learning_rate": 3.607376523412444e-06, "loss": 0.8188, "step": 25815 }, { "epoch": 0.31467466149927487, "grad_norm": 2.496276052514402, "learning_rate": 3.607055805003207e-06, "loss": 0.7886, "step": 25820 }, { "epoch": 0.3147355977234227, "grad_norm": 2.6213439066882454, "learning_rate": 3.606735086593971e-06, "loss": 0.8044, "step": 25825 }, { "epoch": 0.31479653394757046, "grad_norm": 2.575720845052156, "learning_rate": 3.606414368184734e-06, "loss": 0.8317, "step": 25830 }, { "epoch": 0.3148574701717183, "grad_norm": 2.413727361406992, "learning_rate": 3.6060936497754974e-06, "loss": 0.7566, "step": 25835 }, { "epoch": 0.3149184063958661, "grad_norm": 2.298578553565037, "learning_rate": 3.605772931366261e-06, "loss": 0.7923, "step": 25840 }, { "epoch": 0.31497934262001387, "grad_norm": 1.9010983359464058, "learning_rate": 3.605452212957024e-06, "loss": 0.7431, "step": 25845 }, { "epoch": 0.3150402788441617, "grad_norm": 2.4587113600979196, "learning_rate": 3.6051314945477873e-06, "loss": 0.799, "step": 25850 }, { "epoch": 0.3151012150683095, "grad_norm": 2.686904039429391, "learning_rate": 3.6048107761385508e-06, "loss": 0.7351, "step": 25855 }, { "epoch": 0.31516215129245734, "grad_norm": 2.8613440420245295, "learning_rate": 3.6044900577293142e-06, "loss": 0.8484, "step": 25860 }, { "epoch": 0.3152230875166051, "grad_norm": 2.5434301753353714, "learning_rate": 3.6041693393200772e-06, "loss": 0.7812, "step": 25865 }, { "epoch": 0.31528402374075293, "grad_norm": 2.3903404952339824, "learning_rate": 3.6038486209108402e-06, "loss": 0.8454, "step": 25870 }, { "epoch": 0.31534495996490075, "grad_norm": 2.0095424976249787, "learning_rate": 3.603527902501604e-06, "loss": 0.7707, "step": 25875 }, { "epoch": 0.3154058961890485, "grad_norm": 2.391219370445016, "learning_rate": 3.603207184092367e-06, "loss": 0.7615, "step": 25880 }, { "epoch": 0.31546683241319634, "grad_norm": 2.5949002714044402, "learning_rate": 3.60288646568313e-06, "loss": 0.7263, "step": 25885 }, { "epoch": 0.31552776863734416, "grad_norm": 2.740417105400637, "learning_rate": 3.602565747273894e-06, "loss": 0.7992, "step": 25890 }, { "epoch": 0.315588704861492, "grad_norm": 2.2309407066487985, "learning_rate": 3.602245028864657e-06, "loss": 0.7522, "step": 25895 }, { "epoch": 0.31564964108563975, "grad_norm": 2.2591228353109454, "learning_rate": 3.60192431045542e-06, "loss": 0.7334, "step": 25900 }, { "epoch": 0.3157105773097876, "grad_norm": 2.508577144970683, "learning_rate": 3.601603592046184e-06, "loss": 0.8624, "step": 25905 }, { "epoch": 0.3157715135339354, "grad_norm": 2.4621778986113334, "learning_rate": 3.601282873636947e-06, "loss": 0.7225, "step": 25910 }, { "epoch": 0.31583244975808317, "grad_norm": 2.808998161603917, "learning_rate": 3.6009621552277104e-06, "loss": 0.786, "step": 25915 }, { "epoch": 0.315893385982231, "grad_norm": 2.450309930427547, "learning_rate": 3.600641436818474e-06, "loss": 0.7685, "step": 25920 }, { "epoch": 0.3159543222063788, "grad_norm": 2.9197427240207396, "learning_rate": 3.600320718409237e-06, "loss": 0.7012, "step": 25925 }, { "epoch": 0.31601525843052664, "grad_norm": 2.6068317827556062, "learning_rate": 3.6000000000000003e-06, "loss": 0.753, "step": 25930 }, { "epoch": 0.3160761946546744, "grad_norm": 2.1051977476869244, "learning_rate": 3.5996792815907637e-06, "loss": 0.7447, "step": 25935 }, { "epoch": 0.3161371308788222, "grad_norm": 2.752958289226501, "learning_rate": 3.599358563181527e-06, "loss": 0.8124, "step": 25940 }, { "epoch": 0.31619806710297005, "grad_norm": 2.505920813455712, "learning_rate": 3.59903784477229e-06, "loss": 0.7036, "step": 25945 }, { "epoch": 0.3162590033271178, "grad_norm": 2.1211415743310273, "learning_rate": 3.598717126363053e-06, "loss": 0.8002, "step": 25950 }, { "epoch": 0.31631993955126564, "grad_norm": 2.2525345362549514, "learning_rate": 3.598396407953817e-06, "loss": 0.8179, "step": 25955 }, { "epoch": 0.31638087577541346, "grad_norm": 4.335409272710077, "learning_rate": 3.59807568954458e-06, "loss": 0.8095, "step": 25960 }, { "epoch": 0.3164418119995613, "grad_norm": 3.447755372556143, "learning_rate": 3.597754971135343e-06, "loss": 0.8557, "step": 25965 }, { "epoch": 0.31650274822370905, "grad_norm": 2.79792532062623, "learning_rate": 3.597434252726107e-06, "loss": 0.8085, "step": 25970 }, { "epoch": 0.3165636844478569, "grad_norm": 2.7408109938978984, "learning_rate": 3.59711353431687e-06, "loss": 0.8033, "step": 25975 }, { "epoch": 0.3166246206720047, "grad_norm": 3.851304220100516, "learning_rate": 3.5967928159076334e-06, "loss": 0.8163, "step": 25980 }, { "epoch": 0.31668555689615246, "grad_norm": 3.0713096890701688, "learning_rate": 3.596472097498397e-06, "loss": 0.7907, "step": 25985 }, { "epoch": 0.3167464931203003, "grad_norm": 3.009284106276963, "learning_rate": 3.59615137908916e-06, "loss": 0.7632, "step": 25990 }, { "epoch": 0.3168074293444481, "grad_norm": 2.3305298429529078, "learning_rate": 3.5958306606799233e-06, "loss": 0.8187, "step": 25995 }, { "epoch": 0.31686836556859593, "grad_norm": 3.5522262896537664, "learning_rate": 3.5955099422706867e-06, "loss": 0.7413, "step": 26000 }, { "epoch": 0.3169293017927437, "grad_norm": 2.274678818221042, "learning_rate": 3.59518922386145e-06, "loss": 0.7782, "step": 26005 }, { "epoch": 0.3169902380168915, "grad_norm": 3.037990408075316, "learning_rate": 3.594868505452213e-06, "loss": 0.7625, "step": 26010 }, { "epoch": 0.31705117424103935, "grad_norm": 2.4515610556001306, "learning_rate": 3.5945477870429766e-06, "loss": 0.7912, "step": 26015 }, { "epoch": 0.3171121104651871, "grad_norm": 2.461061539288702, "learning_rate": 3.59422706863374e-06, "loss": 0.8497, "step": 26020 }, { "epoch": 0.31717304668933494, "grad_norm": 2.586455843957218, "learning_rate": 3.593906350224503e-06, "loss": 0.7859, "step": 26025 }, { "epoch": 0.31723398291348276, "grad_norm": 2.505172769743428, "learning_rate": 3.593585631815267e-06, "loss": 0.8623, "step": 26030 }, { "epoch": 0.3172949191376306, "grad_norm": 2.5766537182224694, "learning_rate": 3.59326491340603e-06, "loss": 0.7582, "step": 26035 }, { "epoch": 0.31735585536177835, "grad_norm": 2.327111434526185, "learning_rate": 3.592944194996793e-06, "loss": 0.7189, "step": 26040 }, { "epoch": 0.3174167915859262, "grad_norm": 2.895318220639585, "learning_rate": 3.592623476587556e-06, "loss": 0.7771, "step": 26045 }, { "epoch": 0.317477727810074, "grad_norm": 2.069924477389775, "learning_rate": 3.59230275817832e-06, "loss": 0.8178, "step": 26050 }, { "epoch": 0.31753866403422176, "grad_norm": 2.3842825985829026, "learning_rate": 3.591982039769083e-06, "loss": 0.8669, "step": 26055 }, { "epoch": 0.3175996002583696, "grad_norm": 2.635222819259874, "learning_rate": 3.5916613213598463e-06, "loss": 0.8332, "step": 26060 }, { "epoch": 0.3176605364825174, "grad_norm": 1.9783752769934433, "learning_rate": 3.5913406029506098e-06, "loss": 0.7572, "step": 26065 }, { "epoch": 0.31772147270666523, "grad_norm": 3.0508405430298935, "learning_rate": 3.5910198845413728e-06, "loss": 0.7498, "step": 26070 }, { "epoch": 0.317782408930813, "grad_norm": 2.6867265636115834, "learning_rate": 3.5906991661321362e-06, "loss": 0.7887, "step": 26075 }, { "epoch": 0.3178433451549608, "grad_norm": 2.3958439955787623, "learning_rate": 3.5903784477228997e-06, "loss": 0.8377, "step": 26080 }, { "epoch": 0.31790428137910864, "grad_norm": 2.652190772618026, "learning_rate": 3.590057729313663e-06, "loss": 0.7994, "step": 26085 }, { "epoch": 0.3179652176032564, "grad_norm": 2.39447460155419, "learning_rate": 3.589737010904426e-06, "loss": 0.8583, "step": 26090 }, { "epoch": 0.31802615382740423, "grad_norm": 2.622812202292, "learning_rate": 3.5894162924951896e-06, "loss": 0.8103, "step": 26095 }, { "epoch": 0.31808709005155206, "grad_norm": 2.5172325625648395, "learning_rate": 3.589095574085953e-06, "loss": 0.7651, "step": 26100 }, { "epoch": 0.3181480262756999, "grad_norm": 2.1603278234957566, "learning_rate": 3.588774855676716e-06, "loss": 0.8044, "step": 26105 }, { "epoch": 0.31820896249984765, "grad_norm": 3.4669119465621545, "learning_rate": 3.58845413726748e-06, "loss": 0.788, "step": 26110 }, { "epoch": 0.31826989872399547, "grad_norm": 2.219889947019716, "learning_rate": 3.588133418858243e-06, "loss": 0.7387, "step": 26115 }, { "epoch": 0.3183308349481433, "grad_norm": 2.390769591846635, "learning_rate": 3.587812700449006e-06, "loss": 0.8515, "step": 26120 }, { "epoch": 0.31839177117229106, "grad_norm": 2.472518740204368, "learning_rate": 3.587491982039769e-06, "loss": 0.8116, "step": 26125 }, { "epoch": 0.3184527073964389, "grad_norm": 2.4545579425560144, "learning_rate": 3.587171263630533e-06, "loss": 0.8656, "step": 26130 }, { "epoch": 0.3185136436205867, "grad_norm": 2.4420450814461923, "learning_rate": 3.586850545221296e-06, "loss": 0.7365, "step": 26135 }, { "epoch": 0.3185745798447345, "grad_norm": 2.4175197925227887, "learning_rate": 3.5865298268120592e-06, "loss": 0.7186, "step": 26140 }, { "epoch": 0.3186355160688823, "grad_norm": 3.026465508894478, "learning_rate": 3.5862091084028227e-06, "loss": 0.7933, "step": 26145 }, { "epoch": 0.3186964522930301, "grad_norm": 4.293542993566709, "learning_rate": 3.5858883899935857e-06, "loss": 0.7616, "step": 26150 }, { "epoch": 0.31875738851717794, "grad_norm": 2.0818080109570953, "learning_rate": 3.585567671584349e-06, "loss": 0.7111, "step": 26155 }, { "epoch": 0.3188183247413257, "grad_norm": 2.4937940417322597, "learning_rate": 3.5852469531751126e-06, "loss": 0.7765, "step": 26160 }, { "epoch": 0.31887926096547353, "grad_norm": 2.374041307051697, "learning_rate": 3.584926234765876e-06, "loss": 0.7401, "step": 26165 }, { "epoch": 0.31894019718962136, "grad_norm": 2.1312704132142835, "learning_rate": 3.584605516356639e-06, "loss": 0.7063, "step": 26170 }, { "epoch": 0.3190011334137691, "grad_norm": 2.817440073230196, "learning_rate": 3.584284797947403e-06, "loss": 0.8211, "step": 26175 }, { "epoch": 0.31906206963791695, "grad_norm": 5.334707095933033, "learning_rate": 3.583964079538166e-06, "loss": 0.8141, "step": 26180 }, { "epoch": 0.31912300586206477, "grad_norm": 2.7557829672764127, "learning_rate": 3.583643361128929e-06, "loss": 0.833, "step": 26185 }, { "epoch": 0.3191839420862126, "grad_norm": 2.1092694786624695, "learning_rate": 3.583322642719693e-06, "loss": 0.7682, "step": 26190 }, { "epoch": 0.31924487831036036, "grad_norm": 2.3128425693122012, "learning_rate": 3.583001924310456e-06, "loss": 0.7283, "step": 26195 }, { "epoch": 0.3193058145345082, "grad_norm": 1.8391227058195305, "learning_rate": 3.582681205901219e-06, "loss": 0.8134, "step": 26200 }, { "epoch": 0.319366750758656, "grad_norm": 2.37876137806081, "learning_rate": 3.582360487491982e-06, "loss": 0.7727, "step": 26205 }, { "epoch": 0.31942768698280377, "grad_norm": 2.31015215028898, "learning_rate": 3.5820397690827457e-06, "loss": 0.8012, "step": 26210 }, { "epoch": 0.3194886232069516, "grad_norm": 2.9822274551620134, "learning_rate": 3.5817190506735087e-06, "loss": 0.8126, "step": 26215 }, { "epoch": 0.3195495594310994, "grad_norm": 2.666052325631196, "learning_rate": 3.581398332264272e-06, "loss": 0.7684, "step": 26220 }, { "epoch": 0.31961049565524724, "grad_norm": 2.510896989344807, "learning_rate": 3.5810776138550356e-06, "loss": 0.8416, "step": 26225 }, { "epoch": 0.319671431879395, "grad_norm": 2.2965071460792292, "learning_rate": 3.580756895445799e-06, "loss": 0.7542, "step": 26230 }, { "epoch": 0.31973236810354283, "grad_norm": 2.820964162818647, "learning_rate": 3.580436177036562e-06, "loss": 0.8077, "step": 26235 }, { "epoch": 0.31979330432769065, "grad_norm": 2.562049143441627, "learning_rate": 3.5801154586273255e-06, "loss": 0.7548, "step": 26240 }, { "epoch": 0.3198542405518384, "grad_norm": 2.526021283783928, "learning_rate": 3.579794740218089e-06, "loss": 0.7224, "step": 26245 }, { "epoch": 0.31991517677598624, "grad_norm": 2.748108952637822, "learning_rate": 3.579474021808852e-06, "loss": 0.7328, "step": 26250 }, { "epoch": 0.31997611300013407, "grad_norm": 2.7556420017973786, "learning_rate": 3.579153303399616e-06, "loss": 0.8494, "step": 26255 }, { "epoch": 0.3200370492242819, "grad_norm": 2.1420440428914724, "learning_rate": 3.578832584990379e-06, "loss": 0.7545, "step": 26260 }, { "epoch": 0.32009798544842966, "grad_norm": 2.256663804739598, "learning_rate": 3.578511866581142e-06, "loss": 0.7756, "step": 26265 }, { "epoch": 0.3201589216725775, "grad_norm": 2.4077495158881006, "learning_rate": 3.5781911481719057e-06, "loss": 0.8536, "step": 26270 }, { "epoch": 0.3202198578967253, "grad_norm": 2.639484047380118, "learning_rate": 3.5778704297626687e-06, "loss": 0.8273, "step": 26275 }, { "epoch": 0.32028079412087307, "grad_norm": 1.9770716944420332, "learning_rate": 3.5775497113534318e-06, "loss": 0.837, "step": 26280 }, { "epoch": 0.3203417303450209, "grad_norm": 2.771981759641849, "learning_rate": 3.5772289929441956e-06, "loss": 0.7682, "step": 26285 }, { "epoch": 0.3204026665691687, "grad_norm": 2.9056210329951706, "learning_rate": 3.5769082745349586e-06, "loss": 0.8625, "step": 26290 }, { "epoch": 0.32046360279331654, "grad_norm": 4.850932832744671, "learning_rate": 3.5765875561257217e-06, "loss": 0.7483, "step": 26295 }, { "epoch": 0.3205245390174643, "grad_norm": 3.147441432868592, "learning_rate": 3.576266837716485e-06, "loss": 0.8086, "step": 26300 }, { "epoch": 0.32058547524161213, "grad_norm": 2.262672022069318, "learning_rate": 3.5759461193072485e-06, "loss": 0.8404, "step": 26305 }, { "epoch": 0.32064641146575995, "grad_norm": 3.016633595109521, "learning_rate": 3.575625400898012e-06, "loss": 0.7636, "step": 26310 }, { "epoch": 0.3207073476899077, "grad_norm": 2.3247406184759467, "learning_rate": 3.575304682488775e-06, "loss": 0.7645, "step": 26315 }, { "epoch": 0.32076828391405554, "grad_norm": 2.5730534719849714, "learning_rate": 3.5749839640795384e-06, "loss": 0.7746, "step": 26320 }, { "epoch": 0.32082922013820336, "grad_norm": 2.870571543430136, "learning_rate": 3.574663245670302e-06, "loss": 0.7774, "step": 26325 }, { "epoch": 0.3208901563623512, "grad_norm": 2.6114076293938977, "learning_rate": 3.574342527261065e-06, "loss": 0.747, "step": 26330 }, { "epoch": 0.32095109258649895, "grad_norm": 2.274864087832242, "learning_rate": 3.5740218088518288e-06, "loss": 0.7627, "step": 26335 }, { "epoch": 0.3210120288106468, "grad_norm": 2.5114122378096537, "learning_rate": 3.5737010904425918e-06, "loss": 0.8092, "step": 26340 }, { "epoch": 0.3210729650347946, "grad_norm": 2.6646651200296505, "learning_rate": 3.573380372033355e-06, "loss": 0.7529, "step": 26345 }, { "epoch": 0.32113390125894237, "grad_norm": 2.8644927345031768, "learning_rate": 3.5730596536241187e-06, "loss": 0.7569, "step": 26350 }, { "epoch": 0.3211948374830902, "grad_norm": 3.0907345223689413, "learning_rate": 3.5727389352148817e-06, "loss": 0.7851, "step": 26355 }, { "epoch": 0.321255773707238, "grad_norm": 2.5536340309317676, "learning_rate": 3.5724182168056447e-06, "loss": 0.8651, "step": 26360 }, { "epoch": 0.32131670993138584, "grad_norm": 2.7226000696853445, "learning_rate": 3.5720974983964086e-06, "loss": 0.7148, "step": 26365 }, { "epoch": 0.3213776461555336, "grad_norm": 2.4173958224271126, "learning_rate": 3.5717767799871716e-06, "loss": 0.7361, "step": 26370 }, { "epoch": 0.3214385823796814, "grad_norm": 2.6065610584072036, "learning_rate": 3.5714560615779346e-06, "loss": 0.8065, "step": 26375 }, { "epoch": 0.32149951860382925, "grad_norm": 2.3733953670713914, "learning_rate": 3.571135343168698e-06, "loss": 0.7805, "step": 26380 }, { "epoch": 0.321560454827977, "grad_norm": 3.600058928085326, "learning_rate": 3.5708146247594615e-06, "loss": 0.8121, "step": 26385 }, { "epoch": 0.32162139105212484, "grad_norm": 2.6916615792555105, "learning_rate": 3.570493906350225e-06, "loss": 0.7372, "step": 26390 }, { "epoch": 0.32168232727627266, "grad_norm": 3.027723273937677, "learning_rate": 3.570173187940988e-06, "loss": 0.7928, "step": 26395 }, { "epoch": 0.3217432635004205, "grad_norm": 2.1549538711641083, "learning_rate": 3.5698524695317514e-06, "loss": 0.7543, "step": 26400 }, { "epoch": 0.32180419972456825, "grad_norm": 2.432262894416165, "learning_rate": 3.569531751122515e-06, "loss": 0.7978, "step": 26405 }, { "epoch": 0.3218651359487161, "grad_norm": 2.573008018105906, "learning_rate": 3.569211032713278e-06, "loss": 0.794, "step": 26410 }, { "epoch": 0.3219260721728639, "grad_norm": 2.17026046913712, "learning_rate": 3.5688903143040417e-06, "loss": 0.7634, "step": 26415 }, { "epoch": 0.32198700839701166, "grad_norm": 2.616197607026502, "learning_rate": 3.5685695958948047e-06, "loss": 0.8322, "step": 26420 }, { "epoch": 0.3220479446211595, "grad_norm": 2.1296667563381124, "learning_rate": 3.5682488774855677e-06, "loss": 0.7783, "step": 26425 }, { "epoch": 0.3221088808453073, "grad_norm": 2.4431787165041547, "learning_rate": 3.5679281590763316e-06, "loss": 0.8156, "step": 26430 }, { "epoch": 0.32216981706945513, "grad_norm": 2.7328458260331767, "learning_rate": 3.5676074406670946e-06, "loss": 0.7915, "step": 26435 }, { "epoch": 0.3222307532936029, "grad_norm": 2.715123525181148, "learning_rate": 3.5672867222578576e-06, "loss": 0.8765, "step": 26440 }, { "epoch": 0.3222916895177507, "grad_norm": 2.2988778934504928, "learning_rate": 3.5669660038486215e-06, "loss": 0.7674, "step": 26445 }, { "epoch": 0.32235262574189855, "grad_norm": 2.331754557161545, "learning_rate": 3.5666452854393845e-06, "loss": 0.8108, "step": 26450 }, { "epoch": 0.3224135619660463, "grad_norm": 2.372662276776402, "learning_rate": 3.566324567030148e-06, "loss": 0.7653, "step": 26455 }, { "epoch": 0.32247449819019414, "grad_norm": 1.9557438513590084, "learning_rate": 3.566003848620911e-06, "loss": 0.7414, "step": 26460 }, { "epoch": 0.32253543441434196, "grad_norm": 2.8333366509513884, "learning_rate": 3.5656831302116744e-06, "loss": 0.8243, "step": 26465 }, { "epoch": 0.3225963706384898, "grad_norm": 2.3253510575194296, "learning_rate": 3.565362411802438e-06, "loss": 0.7629, "step": 26470 }, { "epoch": 0.32265730686263755, "grad_norm": 2.3299666862421264, "learning_rate": 3.565041693393201e-06, "loss": 0.9089, "step": 26475 }, { "epoch": 0.3227182430867854, "grad_norm": 2.953302120235367, "learning_rate": 3.5647209749839647e-06, "loss": 0.8487, "step": 26480 }, { "epoch": 0.3227791793109332, "grad_norm": 2.7416565311343586, "learning_rate": 3.5644002565747277e-06, "loss": 0.8234, "step": 26485 }, { "epoch": 0.32284011553508096, "grad_norm": 2.600531664430568, "learning_rate": 3.5640795381654908e-06, "loss": 0.7702, "step": 26490 }, { "epoch": 0.3229010517592288, "grad_norm": 2.2267567988263344, "learning_rate": 3.5637588197562546e-06, "loss": 0.7486, "step": 26495 }, { "epoch": 0.3229619879833766, "grad_norm": 2.8862322284176205, "learning_rate": 3.5634381013470176e-06, "loss": 0.7855, "step": 26500 }, { "epoch": 0.32302292420752443, "grad_norm": 2.5045061739965013, "learning_rate": 3.5631173829377806e-06, "loss": 0.7964, "step": 26505 }, { "epoch": 0.3230838604316722, "grad_norm": 2.106166759927997, "learning_rate": 3.5627966645285445e-06, "loss": 0.8133, "step": 26510 }, { "epoch": 0.32314479665582, "grad_norm": 2.7618562698299054, "learning_rate": 3.5624759461193075e-06, "loss": 0.8059, "step": 26515 }, { "epoch": 0.32320573287996784, "grad_norm": 2.412008982111841, "learning_rate": 3.5621552277100705e-06, "loss": 0.8347, "step": 26520 }, { "epoch": 0.3232666691041156, "grad_norm": 2.9071245250171116, "learning_rate": 3.5618345093008344e-06, "loss": 0.7227, "step": 26525 }, { "epoch": 0.32332760532826343, "grad_norm": 2.462325567546583, "learning_rate": 3.5615137908915974e-06, "loss": 0.8388, "step": 26530 }, { "epoch": 0.32338854155241126, "grad_norm": 2.366100130145532, "learning_rate": 3.561193072482361e-06, "loss": 0.791, "step": 26535 }, { "epoch": 0.3234494777765591, "grad_norm": 2.039221190091191, "learning_rate": 3.560872354073124e-06, "loss": 0.6584, "step": 26540 }, { "epoch": 0.32351041400070685, "grad_norm": 2.107986604410823, "learning_rate": 3.5605516356638873e-06, "loss": 0.733, "step": 26545 }, { "epoch": 0.32357135022485467, "grad_norm": 3.575053280982473, "learning_rate": 3.5602309172546508e-06, "loss": 0.8191, "step": 26550 }, { "epoch": 0.3236322864490025, "grad_norm": 2.6573162839620124, "learning_rate": 3.5599101988454138e-06, "loss": 0.7362, "step": 26555 }, { "epoch": 0.32369322267315026, "grad_norm": 3.108083596318087, "learning_rate": 3.5595894804361776e-06, "loss": 0.7287, "step": 26560 }, { "epoch": 0.3237541588972981, "grad_norm": 2.703416769839439, "learning_rate": 3.5592687620269407e-06, "loss": 0.8319, "step": 26565 }, { "epoch": 0.3238150951214459, "grad_norm": 2.314838053073056, "learning_rate": 3.5589480436177037e-06, "loss": 0.7867, "step": 26570 }, { "epoch": 0.32387603134559373, "grad_norm": 2.3327728981743636, "learning_rate": 3.5586273252084675e-06, "loss": 0.7988, "step": 26575 }, { "epoch": 0.3239369675697415, "grad_norm": 2.6503207193567926, "learning_rate": 3.5583066067992306e-06, "loss": 0.7211, "step": 26580 }, { "epoch": 0.3239979037938893, "grad_norm": 2.612893930094636, "learning_rate": 3.5579858883899936e-06, "loss": 0.7503, "step": 26585 }, { "epoch": 0.32405884001803714, "grad_norm": 2.373460615922765, "learning_rate": 3.5576651699807574e-06, "loss": 0.7948, "step": 26590 }, { "epoch": 0.3241197762421849, "grad_norm": 2.7034603178122207, "learning_rate": 3.5573444515715205e-06, "loss": 0.7837, "step": 26595 }, { "epoch": 0.32418071246633273, "grad_norm": 2.358399388153235, "learning_rate": 3.5570237331622835e-06, "loss": 0.8608, "step": 26600 }, { "epoch": 0.32424164869048056, "grad_norm": 2.5233546649537524, "learning_rate": 3.5567030147530473e-06, "loss": 0.7428, "step": 26605 }, { "epoch": 0.3243025849146283, "grad_norm": 2.851144659597652, "learning_rate": 3.5563822963438104e-06, "loss": 0.7907, "step": 26610 }, { "epoch": 0.32436352113877615, "grad_norm": 2.46945026978127, "learning_rate": 3.556061577934574e-06, "loss": 0.8375, "step": 26615 }, { "epoch": 0.32442445736292397, "grad_norm": 2.225521811504969, "learning_rate": 3.5557408595253372e-06, "loss": 0.7504, "step": 26620 }, { "epoch": 0.3244853935870718, "grad_norm": 2.453090803910921, "learning_rate": 3.5554201411161003e-06, "loss": 0.7845, "step": 26625 }, { "epoch": 0.32454632981121956, "grad_norm": 2.2893656101134283, "learning_rate": 3.5550994227068637e-06, "loss": 0.8696, "step": 26630 }, { "epoch": 0.3246072660353674, "grad_norm": 2.639412565038767, "learning_rate": 3.5547787042976267e-06, "loss": 0.8274, "step": 26635 }, { "epoch": 0.3246682022595152, "grad_norm": 2.3502015885277228, "learning_rate": 3.5544579858883906e-06, "loss": 0.8541, "step": 26640 }, { "epoch": 0.32472913848366297, "grad_norm": 2.595165381099908, "learning_rate": 3.5541372674791536e-06, "loss": 0.8039, "step": 26645 }, { "epoch": 0.3247900747078108, "grad_norm": 2.906797687564848, "learning_rate": 3.5538165490699166e-06, "loss": 0.8032, "step": 26650 }, { "epoch": 0.3248510109319586, "grad_norm": 2.2978415802734764, "learning_rate": 3.5534958306606805e-06, "loss": 0.7572, "step": 26655 }, { "epoch": 0.32491194715610644, "grad_norm": 2.6871513464732426, "learning_rate": 3.5531751122514435e-06, "loss": 0.7164, "step": 26660 }, { "epoch": 0.3249728833802542, "grad_norm": 2.254977887534057, "learning_rate": 3.5528543938422065e-06, "loss": 0.7922, "step": 26665 }, { "epoch": 0.32503381960440203, "grad_norm": 2.593314478283646, "learning_rate": 3.5525336754329704e-06, "loss": 0.8078, "step": 26670 }, { "epoch": 0.32509475582854985, "grad_norm": 3.6741013542871386, "learning_rate": 3.5522129570237334e-06, "loss": 0.7757, "step": 26675 }, { "epoch": 0.3251556920526976, "grad_norm": 2.436120950482879, "learning_rate": 3.551892238614497e-06, "loss": 0.852, "step": 26680 }, { "epoch": 0.32521662827684544, "grad_norm": 2.945288450796499, "learning_rate": 3.5515715202052603e-06, "loss": 0.7803, "step": 26685 }, { "epoch": 0.32527756450099327, "grad_norm": 3.0647992299256637, "learning_rate": 3.5512508017960233e-06, "loss": 0.8208, "step": 26690 }, { "epoch": 0.3253385007251411, "grad_norm": 2.059471002500727, "learning_rate": 3.5509300833867867e-06, "loss": 0.6892, "step": 26695 }, { "epoch": 0.32539943694928886, "grad_norm": 2.383541650954286, "learning_rate": 3.55060936497755e-06, "loss": 0.7212, "step": 26700 }, { "epoch": 0.3254603731734367, "grad_norm": 2.7359361571933243, "learning_rate": 3.5502886465683136e-06, "loss": 0.7184, "step": 26705 }, { "epoch": 0.3255213093975845, "grad_norm": 2.601612424180556, "learning_rate": 3.5499679281590766e-06, "loss": 0.7921, "step": 26710 }, { "epoch": 0.32558224562173227, "grad_norm": 2.5543077257797, "learning_rate": 3.5496472097498396e-06, "loss": 0.7815, "step": 26715 }, { "epoch": 0.3256431818458801, "grad_norm": 2.9483649066038673, "learning_rate": 3.5493264913406035e-06, "loss": 0.7841, "step": 26720 }, { "epoch": 0.3257041180700279, "grad_norm": 2.37466242191074, "learning_rate": 3.5490057729313665e-06, "loss": 0.7781, "step": 26725 }, { "epoch": 0.32576505429417574, "grad_norm": 2.2467033618782097, "learning_rate": 3.5486850545221295e-06, "loss": 0.7998, "step": 26730 }, { "epoch": 0.3258259905183235, "grad_norm": 2.423475812254205, "learning_rate": 3.5483643361128934e-06, "loss": 0.7632, "step": 26735 }, { "epoch": 0.32588692674247133, "grad_norm": 2.557141290153346, "learning_rate": 3.5480436177036564e-06, "loss": 0.7471, "step": 26740 }, { "epoch": 0.32594786296661915, "grad_norm": 2.279786948422927, "learning_rate": 3.5477228992944194e-06, "loss": 0.7796, "step": 26745 }, { "epoch": 0.3260087991907669, "grad_norm": 3.6661524151746576, "learning_rate": 3.5474021808851833e-06, "loss": 0.6988, "step": 26750 }, { "epoch": 0.32606973541491474, "grad_norm": 2.43852239243583, "learning_rate": 3.5470814624759463e-06, "loss": 0.8321, "step": 26755 }, { "epoch": 0.32613067163906256, "grad_norm": 2.2331683949149834, "learning_rate": 3.5467607440667097e-06, "loss": 0.8027, "step": 26760 }, { "epoch": 0.3261916078632104, "grad_norm": 2.3902360494571817, "learning_rate": 3.546440025657473e-06, "loss": 0.8834, "step": 26765 }, { "epoch": 0.32625254408735815, "grad_norm": 2.2365045265205126, "learning_rate": 3.546119307248236e-06, "loss": 0.7918, "step": 26770 }, { "epoch": 0.326313480311506, "grad_norm": 3.000330203969906, "learning_rate": 3.5457985888389996e-06, "loss": 0.7845, "step": 26775 }, { "epoch": 0.3263744165356538, "grad_norm": 2.354876089885394, "learning_rate": 3.545477870429763e-06, "loss": 0.7227, "step": 26780 }, { "epoch": 0.32643535275980157, "grad_norm": 2.5950015485386184, "learning_rate": 3.5451571520205265e-06, "loss": 0.8002, "step": 26785 }, { "epoch": 0.3264962889839494, "grad_norm": 2.350685988150772, "learning_rate": 3.5448364336112895e-06, "loss": 0.8023, "step": 26790 }, { "epoch": 0.3265572252080972, "grad_norm": 2.1935771425610393, "learning_rate": 3.5445157152020526e-06, "loss": 0.7307, "step": 26795 }, { "epoch": 0.32661816143224504, "grad_norm": 2.9183672777693266, "learning_rate": 3.5441949967928164e-06, "loss": 0.7521, "step": 26800 }, { "epoch": 0.3266790976563928, "grad_norm": 2.7140378935981655, "learning_rate": 3.5438742783835794e-06, "loss": 0.8527, "step": 26805 }, { "epoch": 0.3267400338805406, "grad_norm": 2.3057779617853305, "learning_rate": 3.5435535599743425e-06, "loss": 0.7883, "step": 26810 }, { "epoch": 0.32680097010468845, "grad_norm": 2.5328622550771303, "learning_rate": 3.5432328415651063e-06, "loss": 0.786, "step": 26815 }, { "epoch": 0.3268619063288362, "grad_norm": 2.6660975197537957, "learning_rate": 3.5429121231558693e-06, "loss": 0.828, "step": 26820 }, { "epoch": 0.32692284255298404, "grad_norm": 2.3539742604080853, "learning_rate": 3.5425914047466324e-06, "loss": 0.7869, "step": 26825 }, { "epoch": 0.32698377877713186, "grad_norm": 2.2817418368719165, "learning_rate": 3.5422706863373962e-06, "loss": 0.7767, "step": 26830 }, { "epoch": 0.3270447150012797, "grad_norm": 2.233074693839208, "learning_rate": 3.5419499679281592e-06, "loss": 0.8559, "step": 26835 }, { "epoch": 0.32710565122542745, "grad_norm": 2.9267946537778164, "learning_rate": 3.5416292495189227e-06, "loss": 0.7871, "step": 26840 }, { "epoch": 0.3271665874495753, "grad_norm": 2.184477203078521, "learning_rate": 3.541308531109686e-06, "loss": 0.7641, "step": 26845 }, { "epoch": 0.3272275236737231, "grad_norm": 2.6179737458154584, "learning_rate": 3.540987812700449e-06, "loss": 0.801, "step": 26850 }, { "epoch": 0.32728845989787086, "grad_norm": 3.444593615718046, "learning_rate": 3.5406670942912126e-06, "loss": 0.7494, "step": 26855 }, { "epoch": 0.3273493961220187, "grad_norm": 3.336071434433627, "learning_rate": 3.540346375881976e-06, "loss": 0.7426, "step": 26860 }, { "epoch": 0.3274103323461665, "grad_norm": 2.3958547573117106, "learning_rate": 3.5400256574727395e-06, "loss": 0.8099, "step": 26865 }, { "epoch": 0.32747126857031433, "grad_norm": 2.3348121078572373, "learning_rate": 3.5397049390635025e-06, "loss": 0.8383, "step": 26870 }, { "epoch": 0.3275322047944621, "grad_norm": 2.2262645055806862, "learning_rate": 3.5393842206542655e-06, "loss": 0.7735, "step": 26875 }, { "epoch": 0.3275931410186099, "grad_norm": 2.5978664132412224, "learning_rate": 3.5390635022450294e-06, "loss": 0.8211, "step": 26880 }, { "epoch": 0.32765407724275775, "grad_norm": 2.118183419456117, "learning_rate": 3.5387427838357924e-06, "loss": 0.7553, "step": 26885 }, { "epoch": 0.3277150134669055, "grad_norm": 2.2601732762720426, "learning_rate": 3.5384220654265554e-06, "loss": 0.7412, "step": 26890 }, { "epoch": 0.32777594969105334, "grad_norm": 2.550554022087005, "learning_rate": 3.5381013470173192e-06, "loss": 0.8174, "step": 26895 }, { "epoch": 0.32783688591520116, "grad_norm": 2.4121830173268255, "learning_rate": 3.5377806286080823e-06, "loss": 0.6713, "step": 26900 }, { "epoch": 0.327897822139349, "grad_norm": 2.7383578940315245, "learning_rate": 3.5374599101988453e-06, "loss": 0.8102, "step": 26905 }, { "epoch": 0.32795875836349675, "grad_norm": 2.586558556486434, "learning_rate": 3.537139191789609e-06, "loss": 0.7567, "step": 26910 }, { "epoch": 0.3280196945876446, "grad_norm": 2.8393444882926193, "learning_rate": 3.536818473380372e-06, "loss": 0.7466, "step": 26915 }, { "epoch": 0.3280806308117924, "grad_norm": 2.4953229049118435, "learning_rate": 3.5364977549711356e-06, "loss": 0.7737, "step": 26920 }, { "epoch": 0.32814156703594016, "grad_norm": 2.843690064243327, "learning_rate": 3.536177036561899e-06, "loss": 0.7935, "step": 26925 }, { "epoch": 0.328202503260088, "grad_norm": 2.969179551687346, "learning_rate": 3.5358563181526625e-06, "loss": 0.7979, "step": 26930 }, { "epoch": 0.3282634394842358, "grad_norm": 2.4419038567409026, "learning_rate": 3.5355355997434255e-06, "loss": 0.7861, "step": 26935 }, { "epoch": 0.32832437570838363, "grad_norm": 2.879202996177529, "learning_rate": 3.535214881334189e-06, "loss": 0.8375, "step": 26940 }, { "epoch": 0.3283853119325314, "grad_norm": 2.886419085465168, "learning_rate": 3.5348941629249524e-06, "loss": 0.8274, "step": 26945 }, { "epoch": 0.3284462481566792, "grad_norm": 2.256030878005913, "learning_rate": 3.5345734445157154e-06, "loss": 0.719, "step": 26950 }, { "epoch": 0.32850718438082704, "grad_norm": 2.129062156873949, "learning_rate": 3.5342527261064793e-06, "loss": 0.8012, "step": 26955 }, { "epoch": 0.3285681206049748, "grad_norm": 2.585732943907518, "learning_rate": 3.5339320076972423e-06, "loss": 0.8258, "step": 26960 }, { "epoch": 0.32862905682912263, "grad_norm": 2.1768851218272625, "learning_rate": 3.5336112892880053e-06, "loss": 0.7473, "step": 26965 }, { "epoch": 0.32868999305327046, "grad_norm": 2.6387005034874194, "learning_rate": 3.5332905708787683e-06, "loss": 0.8317, "step": 26970 }, { "epoch": 0.3287509292774183, "grad_norm": 2.1634646993153637, "learning_rate": 3.532969852469532e-06, "loss": 0.7496, "step": 26975 }, { "epoch": 0.32881186550156605, "grad_norm": 2.6098003231944955, "learning_rate": 3.532649134060295e-06, "loss": 0.7349, "step": 26980 }, { "epoch": 0.32887280172571387, "grad_norm": 2.256758544037013, "learning_rate": 3.5323284156510586e-06, "loss": 0.7496, "step": 26985 }, { "epoch": 0.3289337379498617, "grad_norm": 2.6022975939738333, "learning_rate": 3.532007697241822e-06, "loss": 0.8066, "step": 26990 }, { "epoch": 0.32899467417400946, "grad_norm": 2.9072423916939, "learning_rate": 3.531686978832585e-06, "loss": 0.7808, "step": 26995 }, { "epoch": 0.3290556103981573, "grad_norm": 3.1198309833281566, "learning_rate": 3.5313662604233485e-06, "loss": 0.7432, "step": 27000 }, { "epoch": 0.3291165466223051, "grad_norm": 2.0333764677972157, "learning_rate": 3.531045542014112e-06, "loss": 0.8154, "step": 27005 }, { "epoch": 0.32917748284645293, "grad_norm": 2.6368531947759877, "learning_rate": 3.5307248236048754e-06, "loss": 0.8267, "step": 27010 }, { "epoch": 0.3292384190706007, "grad_norm": 2.5875579492404457, "learning_rate": 3.5304041051956384e-06, "loss": 0.8154, "step": 27015 }, { "epoch": 0.3292993552947485, "grad_norm": 2.6909757080374295, "learning_rate": 3.530083386786402e-06, "loss": 0.7684, "step": 27020 }, { "epoch": 0.32936029151889634, "grad_norm": 2.66979917855135, "learning_rate": 3.5297626683771653e-06, "loss": 0.7254, "step": 27025 }, { "epoch": 0.3294212277430441, "grad_norm": 2.486285609147308, "learning_rate": 3.5294419499679283e-06, "loss": 0.7974, "step": 27030 }, { "epoch": 0.32948216396719193, "grad_norm": 2.476930193573253, "learning_rate": 3.529121231558692e-06, "loss": 0.8468, "step": 27035 }, { "epoch": 0.32954310019133976, "grad_norm": 2.095530139193248, "learning_rate": 3.528800513149455e-06, "loss": 0.8245, "step": 27040 }, { "epoch": 0.3296040364154876, "grad_norm": 3.761821265430123, "learning_rate": 3.5284797947402182e-06, "loss": 0.8262, "step": 27045 }, { "epoch": 0.32966497263963535, "grad_norm": 2.5006052232676916, "learning_rate": 3.5281590763309812e-06, "loss": 0.8355, "step": 27050 }, { "epoch": 0.32972590886378317, "grad_norm": 2.618602871447141, "learning_rate": 3.527838357921745e-06, "loss": 0.7551, "step": 27055 }, { "epoch": 0.329786845087931, "grad_norm": 2.6741142199366315, "learning_rate": 3.527517639512508e-06, "loss": 0.8759, "step": 27060 }, { "epoch": 0.32984778131207876, "grad_norm": 2.7438463819706547, "learning_rate": 3.5271969211032716e-06, "loss": 0.7789, "step": 27065 }, { "epoch": 0.3299087175362266, "grad_norm": 2.642434178872274, "learning_rate": 3.526876202694035e-06, "loss": 0.7333, "step": 27070 }, { "epoch": 0.3299696537603744, "grad_norm": 2.5907502258609174, "learning_rate": 3.526555484284798e-06, "loss": 0.79, "step": 27075 }, { "epoch": 0.33003058998452217, "grad_norm": 2.9728455040657837, "learning_rate": 3.5262347658755615e-06, "loss": 0.7177, "step": 27080 }, { "epoch": 0.33009152620867, "grad_norm": 2.8368848998776848, "learning_rate": 3.525914047466325e-06, "loss": 0.7267, "step": 27085 }, { "epoch": 0.3301524624328178, "grad_norm": 3.221908985120827, "learning_rate": 3.5255933290570883e-06, "loss": 0.7873, "step": 27090 }, { "epoch": 0.33021339865696564, "grad_norm": 4.006052295506497, "learning_rate": 3.5252726106478514e-06, "loss": 0.8502, "step": 27095 }, { "epoch": 0.3302743348811134, "grad_norm": 3.139848249072012, "learning_rate": 3.524951892238615e-06, "loss": 0.8288, "step": 27100 }, { "epoch": 0.33033527110526123, "grad_norm": 2.337796238680654, "learning_rate": 3.5246311738293782e-06, "loss": 0.7759, "step": 27105 }, { "epoch": 0.33039620732940905, "grad_norm": 2.413233682290267, "learning_rate": 3.5243104554201413e-06, "loss": 0.7793, "step": 27110 }, { "epoch": 0.3304571435535568, "grad_norm": 2.414653352729925, "learning_rate": 3.523989737010905e-06, "loss": 0.7899, "step": 27115 }, { "epoch": 0.33051807977770464, "grad_norm": 2.6369448316733286, "learning_rate": 3.523669018601668e-06, "loss": 0.833, "step": 27120 }, { "epoch": 0.33057901600185247, "grad_norm": 2.4339263937815567, "learning_rate": 3.523348300192431e-06, "loss": 0.8581, "step": 27125 }, { "epoch": 0.3306399522260003, "grad_norm": 2.2496592946234277, "learning_rate": 3.523027581783194e-06, "loss": 0.7371, "step": 27130 }, { "epoch": 0.33070088845014806, "grad_norm": 2.522090602282907, "learning_rate": 3.522706863373958e-06, "loss": 0.7148, "step": 27135 }, { "epoch": 0.3307618246742959, "grad_norm": 2.3914926981063083, "learning_rate": 3.522386144964721e-06, "loss": 0.8022, "step": 27140 }, { "epoch": 0.3308227608984437, "grad_norm": 2.365520628688189, "learning_rate": 3.5220654265554845e-06, "loss": 0.7762, "step": 27145 }, { "epoch": 0.33088369712259147, "grad_norm": 2.7731093104132194, "learning_rate": 3.521744708146248e-06, "loss": 0.8162, "step": 27150 }, { "epoch": 0.3309446333467393, "grad_norm": 2.357285838520772, "learning_rate": 3.5214239897370114e-06, "loss": 0.7428, "step": 27155 }, { "epoch": 0.3310055695708871, "grad_norm": 3.211043485697655, "learning_rate": 3.5211032713277744e-06, "loss": 0.7668, "step": 27160 }, { "epoch": 0.33106650579503494, "grad_norm": 1.8590842963064858, "learning_rate": 3.520782552918538e-06, "loss": 0.7477, "step": 27165 }, { "epoch": 0.3311274420191827, "grad_norm": 2.143614141064976, "learning_rate": 3.5204618345093013e-06, "loss": 0.7704, "step": 27170 }, { "epoch": 0.33118837824333053, "grad_norm": 2.973061534902744, "learning_rate": 3.5201411161000643e-06, "loss": 0.7511, "step": 27175 }, { "epoch": 0.33124931446747835, "grad_norm": 2.14206399767419, "learning_rate": 3.519820397690828e-06, "loss": 0.797, "step": 27180 }, { "epoch": 0.3313102506916261, "grad_norm": 2.796044394468421, "learning_rate": 3.519499679281591e-06, "loss": 0.8324, "step": 27185 }, { "epoch": 0.33137118691577394, "grad_norm": 2.2078087910663218, "learning_rate": 3.519178960872354e-06, "loss": 0.7764, "step": 27190 }, { "epoch": 0.33143212313992176, "grad_norm": 2.3673216938143695, "learning_rate": 3.518858242463118e-06, "loss": 0.7713, "step": 27195 }, { "epoch": 0.3314930593640696, "grad_norm": 2.557778508529669, "learning_rate": 3.518537524053881e-06, "loss": 0.7348, "step": 27200 }, { "epoch": 0.33155399558821735, "grad_norm": 1.9820460640610926, "learning_rate": 3.518216805644644e-06, "loss": 0.7884, "step": 27205 }, { "epoch": 0.3316149318123652, "grad_norm": 2.483384030369637, "learning_rate": 3.517896087235408e-06, "loss": 0.8036, "step": 27210 }, { "epoch": 0.331675868036513, "grad_norm": 2.258401832753174, "learning_rate": 3.517575368826171e-06, "loss": 0.7328, "step": 27215 }, { "epoch": 0.33173680426066077, "grad_norm": 2.4025180815151144, "learning_rate": 3.517254650416934e-06, "loss": 0.8662, "step": 27220 }, { "epoch": 0.3317977404848086, "grad_norm": 2.257329392043018, "learning_rate": 3.5169339320076974e-06, "loss": 0.6916, "step": 27225 }, { "epoch": 0.3318586767089564, "grad_norm": 2.65353171664209, "learning_rate": 3.516613213598461e-06, "loss": 0.7499, "step": 27230 }, { "epoch": 0.33191961293310424, "grad_norm": 2.723121658178223, "learning_rate": 3.5162924951892243e-06, "loss": 0.7654, "step": 27235 }, { "epoch": 0.331980549157252, "grad_norm": 1.9854986285427885, "learning_rate": 3.5159717767799873e-06, "loss": 0.8293, "step": 27240 }, { "epoch": 0.3320414853813998, "grad_norm": 2.3703024117874842, "learning_rate": 3.5156510583707508e-06, "loss": 0.8347, "step": 27245 }, { "epoch": 0.33210242160554765, "grad_norm": 3.4502085691877857, "learning_rate": 3.515330339961514e-06, "loss": 0.7623, "step": 27250 }, { "epoch": 0.3321633578296954, "grad_norm": 2.2584426760355685, "learning_rate": 3.515009621552277e-06, "loss": 0.7711, "step": 27255 }, { "epoch": 0.33222429405384324, "grad_norm": 2.4092034143164147, "learning_rate": 3.514688903143041e-06, "loss": 0.7714, "step": 27260 }, { "epoch": 0.33228523027799106, "grad_norm": 2.402603026992744, "learning_rate": 3.514368184733804e-06, "loss": 0.8196, "step": 27265 }, { "epoch": 0.3323461665021389, "grad_norm": 3.3049541446185775, "learning_rate": 3.514047466324567e-06, "loss": 0.7934, "step": 27270 }, { "epoch": 0.33240710272628665, "grad_norm": 2.4304910692195443, "learning_rate": 3.513726747915331e-06, "loss": 0.7922, "step": 27275 }, { "epoch": 0.3324680389504345, "grad_norm": 3.333697190950138, "learning_rate": 3.513406029506094e-06, "loss": 0.8521, "step": 27280 }, { "epoch": 0.3325289751745823, "grad_norm": 2.232840936832847, "learning_rate": 3.513085311096857e-06, "loss": 0.7679, "step": 27285 }, { "epoch": 0.33258991139873006, "grad_norm": 2.730682910354783, "learning_rate": 3.512764592687621e-06, "loss": 0.7447, "step": 27290 }, { "epoch": 0.3326508476228779, "grad_norm": 2.479349788700911, "learning_rate": 3.512443874278384e-06, "loss": 0.7082, "step": 27295 }, { "epoch": 0.3327117838470257, "grad_norm": 2.891924404909573, "learning_rate": 3.512123155869147e-06, "loss": 0.7194, "step": 27300 }, { "epoch": 0.33277272007117353, "grad_norm": 2.714988785861961, "learning_rate": 3.5118024374599103e-06, "loss": 0.8039, "step": 27305 }, { "epoch": 0.3328336562953213, "grad_norm": 3.671021511671086, "learning_rate": 3.5114817190506738e-06, "loss": 0.8589, "step": 27310 }, { "epoch": 0.3328945925194691, "grad_norm": 2.530137643804667, "learning_rate": 3.5111610006414372e-06, "loss": 0.7957, "step": 27315 }, { "epoch": 0.33295552874361695, "grad_norm": 2.930209172291461, "learning_rate": 3.5108402822322002e-06, "loss": 0.8006, "step": 27320 }, { "epoch": 0.3330164649677647, "grad_norm": 1.795990964957475, "learning_rate": 3.5105195638229637e-06, "loss": 0.761, "step": 27325 }, { "epoch": 0.33307740119191254, "grad_norm": 2.519462050160051, "learning_rate": 3.510198845413727e-06, "loss": 0.7289, "step": 27330 }, { "epoch": 0.33313833741606036, "grad_norm": 3.1491405353727204, "learning_rate": 3.50987812700449e-06, "loss": 0.8334, "step": 27335 }, { "epoch": 0.3331992736402082, "grad_norm": 2.4298467404028337, "learning_rate": 3.509557408595254e-06, "loss": 0.7377, "step": 27340 }, { "epoch": 0.33326020986435595, "grad_norm": 2.6777214140484973, "learning_rate": 3.509236690186017e-06, "loss": 0.7042, "step": 27345 }, { "epoch": 0.3333211460885038, "grad_norm": 2.3653309898263446, "learning_rate": 3.50891597177678e-06, "loss": 0.7579, "step": 27350 }, { "epoch": 0.3333820823126516, "grad_norm": 2.105770306178686, "learning_rate": 3.508595253367544e-06, "loss": 0.8123, "step": 27355 }, { "epoch": 0.33344301853679936, "grad_norm": 3.963180440715133, "learning_rate": 3.508274534958307e-06, "loss": 0.8299, "step": 27360 }, { "epoch": 0.3335039547609472, "grad_norm": 2.5217414716875317, "learning_rate": 3.50795381654907e-06, "loss": 0.7819, "step": 27365 }, { "epoch": 0.333564890985095, "grad_norm": 2.513853554364512, "learning_rate": 3.507633098139834e-06, "loss": 0.8242, "step": 27370 }, { "epoch": 0.33362582720924283, "grad_norm": 2.2178710046860792, "learning_rate": 3.507312379730597e-06, "loss": 0.8311, "step": 27375 }, { "epoch": 0.3336867634333906, "grad_norm": 2.8200675843513063, "learning_rate": 3.5069916613213603e-06, "loss": 0.8431, "step": 27380 }, { "epoch": 0.3337476996575384, "grad_norm": 2.174778082266431, "learning_rate": 3.5066709429121233e-06, "loss": 0.7571, "step": 27385 }, { "epoch": 0.33380863588168624, "grad_norm": 3.3896646045672814, "learning_rate": 3.5063502245028867e-06, "loss": 0.7107, "step": 27390 }, { "epoch": 0.333869572105834, "grad_norm": 2.383961914108259, "learning_rate": 3.50602950609365e-06, "loss": 0.7969, "step": 27395 }, { "epoch": 0.33393050832998183, "grad_norm": 3.57047300917656, "learning_rate": 3.505708787684413e-06, "loss": 0.794, "step": 27400 }, { "epoch": 0.33399144455412966, "grad_norm": 2.7174322986201895, "learning_rate": 3.505388069275177e-06, "loss": 0.7593, "step": 27405 }, { "epoch": 0.3340523807782775, "grad_norm": 2.491216012330704, "learning_rate": 3.50506735086594e-06, "loss": 0.7786, "step": 27410 }, { "epoch": 0.33411331700242525, "grad_norm": 2.897842037354935, "learning_rate": 3.504746632456703e-06, "loss": 0.6998, "step": 27415 }, { "epoch": 0.33417425322657307, "grad_norm": 2.128352661407559, "learning_rate": 3.504425914047467e-06, "loss": 0.7547, "step": 27420 }, { "epoch": 0.3342351894507209, "grad_norm": 2.415334769037727, "learning_rate": 3.50410519563823e-06, "loss": 0.7083, "step": 27425 }, { "epoch": 0.33429612567486866, "grad_norm": 3.168940145759539, "learning_rate": 3.503784477228993e-06, "loss": 0.7394, "step": 27430 }, { "epoch": 0.3343570618990165, "grad_norm": 2.133573443506565, "learning_rate": 3.503463758819757e-06, "loss": 0.7691, "step": 27435 }, { "epoch": 0.3344179981231643, "grad_norm": 2.9829391506801843, "learning_rate": 3.50314304041052e-06, "loss": 0.8206, "step": 27440 }, { "epoch": 0.33447893434731213, "grad_norm": 2.235864146562341, "learning_rate": 3.502822322001283e-06, "loss": 0.8166, "step": 27445 }, { "epoch": 0.3345398705714599, "grad_norm": 1.956808674895635, "learning_rate": 3.5025016035920467e-06, "loss": 0.7722, "step": 27450 }, { "epoch": 0.3346008067956077, "grad_norm": 2.485408850967152, "learning_rate": 3.5021808851828097e-06, "loss": 0.8269, "step": 27455 }, { "epoch": 0.33466174301975554, "grad_norm": 2.7437824197338316, "learning_rate": 3.501860166773573e-06, "loss": 0.7447, "step": 27460 }, { "epoch": 0.3347226792439033, "grad_norm": 2.759134081504267, "learning_rate": 3.501539448364336e-06, "loss": 0.7747, "step": 27465 }, { "epoch": 0.33478361546805113, "grad_norm": 2.4317080602276344, "learning_rate": 3.5012187299550996e-06, "loss": 0.7155, "step": 27470 }, { "epoch": 0.33484455169219896, "grad_norm": 2.8564178382891905, "learning_rate": 3.500898011545863e-06, "loss": 0.8757, "step": 27475 }, { "epoch": 0.3349054879163468, "grad_norm": 1.9279024155361233, "learning_rate": 3.500577293136626e-06, "loss": 0.7666, "step": 27480 }, { "epoch": 0.33496642414049455, "grad_norm": 2.2341414908706434, "learning_rate": 3.50025657472739e-06, "loss": 0.8105, "step": 27485 }, { "epoch": 0.33502736036464237, "grad_norm": 2.3723043224746303, "learning_rate": 3.499935856318153e-06, "loss": 0.7968, "step": 27490 }, { "epoch": 0.3350882965887902, "grad_norm": 2.6868226224481657, "learning_rate": 3.499615137908916e-06, "loss": 0.7088, "step": 27495 }, { "epoch": 0.33514923281293796, "grad_norm": 2.3551241189700054, "learning_rate": 3.49929441949968e-06, "loss": 0.7529, "step": 27500 }, { "epoch": 0.3352101690370858, "grad_norm": 2.749219856240465, "learning_rate": 3.498973701090443e-06, "loss": 0.81, "step": 27505 }, { "epoch": 0.3352711052612336, "grad_norm": 2.6496501348002655, "learning_rate": 3.498652982681206e-06, "loss": 0.7294, "step": 27510 }, { "epoch": 0.3353320414853814, "grad_norm": 2.08740669698577, "learning_rate": 3.4983322642719698e-06, "loss": 0.7642, "step": 27515 }, { "epoch": 0.3353929777095292, "grad_norm": 4.14535863360184, "learning_rate": 3.4980115458627328e-06, "loss": 0.769, "step": 27520 }, { "epoch": 0.335453913933677, "grad_norm": 2.646189646715722, "learning_rate": 3.4976908274534958e-06, "loss": 0.8091, "step": 27525 }, { "epoch": 0.33551485015782484, "grad_norm": 2.1850456877067295, "learning_rate": 3.4973701090442596e-06, "loss": 0.723, "step": 27530 }, { "epoch": 0.3355757863819726, "grad_norm": 2.2044911247832712, "learning_rate": 3.4970493906350227e-06, "loss": 0.74, "step": 27535 }, { "epoch": 0.33563672260612043, "grad_norm": 2.854340962540436, "learning_rate": 3.496728672225786e-06, "loss": 0.7483, "step": 27540 }, { "epoch": 0.33569765883026825, "grad_norm": 2.5119704194798165, "learning_rate": 3.4964079538165495e-06, "loss": 0.734, "step": 27545 }, { "epoch": 0.3357585950544161, "grad_norm": 2.7881081173720084, "learning_rate": 3.4960872354073126e-06, "loss": 0.6852, "step": 27550 }, { "epoch": 0.33581953127856384, "grad_norm": 2.437982043047618, "learning_rate": 3.495766516998076e-06, "loss": 0.7999, "step": 27555 }, { "epoch": 0.33588046750271167, "grad_norm": 2.279947997894072, "learning_rate": 3.495445798588839e-06, "loss": 0.7713, "step": 27560 }, { "epoch": 0.3359414037268595, "grad_norm": 2.7367684370152467, "learning_rate": 3.495125080179603e-06, "loss": 0.7441, "step": 27565 }, { "epoch": 0.33600233995100726, "grad_norm": 2.418070976374683, "learning_rate": 3.494804361770366e-06, "loss": 0.8096, "step": 27570 }, { "epoch": 0.3360632761751551, "grad_norm": 2.2985884664292326, "learning_rate": 3.494483643361129e-06, "loss": 0.7912, "step": 27575 }, { "epoch": 0.3361242123993029, "grad_norm": 2.454308370778224, "learning_rate": 3.4941629249518928e-06, "loss": 0.84, "step": 27580 }, { "epoch": 0.33618514862345067, "grad_norm": 2.6739417249654287, "learning_rate": 3.493842206542656e-06, "loss": 0.8385, "step": 27585 }, { "epoch": 0.3362460848475985, "grad_norm": 2.6254606973169037, "learning_rate": 3.493521488133419e-06, "loss": 0.6922, "step": 27590 }, { "epoch": 0.3363070210717463, "grad_norm": 2.252164589449479, "learning_rate": 3.4932007697241827e-06, "loss": 0.7458, "step": 27595 }, { "epoch": 0.33636795729589414, "grad_norm": 2.59534941701467, "learning_rate": 3.4928800513149457e-06, "loss": 0.8465, "step": 27600 }, { "epoch": 0.3364288935200419, "grad_norm": 2.875536230151007, "learning_rate": 3.4925593329057087e-06, "loss": 0.7508, "step": 27605 }, { "epoch": 0.33648982974418973, "grad_norm": 2.5190799992051223, "learning_rate": 3.4922386144964726e-06, "loss": 0.8059, "step": 27610 }, { "epoch": 0.33655076596833755, "grad_norm": 2.738144703526653, "learning_rate": 3.4919178960872356e-06, "loss": 0.7284, "step": 27615 }, { "epoch": 0.3366117021924853, "grad_norm": 2.264439576750254, "learning_rate": 3.491597177677999e-06, "loss": 0.7601, "step": 27620 }, { "epoch": 0.33667263841663314, "grad_norm": 2.0933955301226006, "learning_rate": 3.4912764592687625e-06, "loss": 0.7474, "step": 27625 }, { "epoch": 0.33673357464078096, "grad_norm": 2.416501458112799, "learning_rate": 3.490955740859526e-06, "loss": 0.7488, "step": 27630 }, { "epoch": 0.3367945108649288, "grad_norm": 2.72294908323816, "learning_rate": 3.490635022450289e-06, "loss": 0.767, "step": 27635 }, { "epoch": 0.33685544708907655, "grad_norm": 1.923880355864357, "learning_rate": 3.490314304041052e-06, "loss": 0.7722, "step": 27640 }, { "epoch": 0.3369163833132244, "grad_norm": 2.435664620993184, "learning_rate": 3.489993585631816e-06, "loss": 0.77, "step": 27645 }, { "epoch": 0.3369773195373722, "grad_norm": 2.397498840943125, "learning_rate": 3.489672867222579e-06, "loss": 0.7961, "step": 27650 }, { "epoch": 0.33703825576151997, "grad_norm": 2.1707212177005855, "learning_rate": 3.489352148813342e-06, "loss": 0.7953, "step": 27655 }, { "epoch": 0.3370991919856678, "grad_norm": 2.9704744787456607, "learning_rate": 3.4890314304041057e-06, "loss": 0.8351, "step": 27660 }, { "epoch": 0.3371601282098156, "grad_norm": 2.5503159918884397, "learning_rate": 3.4887107119948687e-06, "loss": 0.7831, "step": 27665 }, { "epoch": 0.33722106443396344, "grad_norm": 2.6171116608000005, "learning_rate": 3.4883899935856317e-06, "loss": 0.7969, "step": 27670 }, { "epoch": 0.3372820006581112, "grad_norm": 2.8972310341410474, "learning_rate": 3.4880692751763956e-06, "loss": 0.7657, "step": 27675 }, { "epoch": 0.337342936882259, "grad_norm": 2.485032548457239, "learning_rate": 3.4877485567671586e-06, "loss": 0.7716, "step": 27680 }, { "epoch": 0.33740387310640685, "grad_norm": 2.217942510928465, "learning_rate": 3.487427838357922e-06, "loss": 0.7802, "step": 27685 }, { "epoch": 0.3374648093305546, "grad_norm": 2.131725809021838, "learning_rate": 3.4871071199486855e-06, "loss": 0.7614, "step": 27690 }, { "epoch": 0.33752574555470244, "grad_norm": 2.935108600995068, "learning_rate": 3.4867864015394485e-06, "loss": 0.7563, "step": 27695 }, { "epoch": 0.33758668177885026, "grad_norm": 2.6845122767851883, "learning_rate": 3.486465683130212e-06, "loss": 0.7983, "step": 27700 }, { "epoch": 0.3376476180029981, "grad_norm": 2.4219731468844863, "learning_rate": 3.4861449647209754e-06, "loss": 0.7631, "step": 27705 }, { "epoch": 0.33770855422714585, "grad_norm": 2.6066988115964107, "learning_rate": 3.485824246311739e-06, "loss": 0.7691, "step": 27710 }, { "epoch": 0.3377694904512937, "grad_norm": 2.1420857973334946, "learning_rate": 3.485503527902502e-06, "loss": 0.7402, "step": 27715 }, { "epoch": 0.3378304266754415, "grad_norm": 2.527864400457504, "learning_rate": 3.485182809493265e-06, "loss": 0.8519, "step": 27720 }, { "epoch": 0.33789136289958926, "grad_norm": 2.56705926742595, "learning_rate": 3.4848620910840287e-06, "loss": 0.8154, "step": 27725 }, { "epoch": 0.3379522991237371, "grad_norm": 2.4343990491946754, "learning_rate": 3.4845413726747918e-06, "loss": 0.7704, "step": 27730 }, { "epoch": 0.3380132353478849, "grad_norm": 2.6254829410577107, "learning_rate": 3.4842206542655548e-06, "loss": 0.7893, "step": 27735 }, { "epoch": 0.33807417157203273, "grad_norm": 2.7880124370853197, "learning_rate": 3.4838999358563186e-06, "loss": 0.8187, "step": 27740 }, { "epoch": 0.3381351077961805, "grad_norm": 2.4839910816018116, "learning_rate": 3.4835792174470817e-06, "loss": 0.7323, "step": 27745 }, { "epoch": 0.3381960440203283, "grad_norm": 2.389777378963497, "learning_rate": 3.4832584990378447e-06, "loss": 0.7989, "step": 27750 }, { "epoch": 0.33825698024447615, "grad_norm": 1.994176269557435, "learning_rate": 3.4829377806286085e-06, "loss": 0.801, "step": 27755 }, { "epoch": 0.3383179164686239, "grad_norm": 2.6971296245687935, "learning_rate": 3.4826170622193716e-06, "loss": 0.7768, "step": 27760 }, { "epoch": 0.33837885269277174, "grad_norm": 1.819749313378063, "learning_rate": 3.482296343810135e-06, "loss": 0.7137, "step": 27765 }, { "epoch": 0.33843978891691956, "grad_norm": 2.338310496785419, "learning_rate": 3.4819756254008984e-06, "loss": 0.8395, "step": 27770 }, { "epoch": 0.3385007251410674, "grad_norm": 2.7698614361439335, "learning_rate": 3.4816549069916614e-06, "loss": 0.7796, "step": 27775 }, { "epoch": 0.33856166136521515, "grad_norm": 4.177442037619798, "learning_rate": 3.481334188582425e-06, "loss": 0.7822, "step": 27780 }, { "epoch": 0.338622597589363, "grad_norm": 2.2916660749625577, "learning_rate": 3.4810134701731883e-06, "loss": 0.7365, "step": 27785 }, { "epoch": 0.3386835338135108, "grad_norm": 2.4454873122719203, "learning_rate": 3.4806927517639518e-06, "loss": 0.802, "step": 27790 }, { "epoch": 0.33874447003765856, "grad_norm": 2.2452552313260155, "learning_rate": 3.4803720333547148e-06, "loss": 0.7499, "step": 27795 }, { "epoch": 0.3388054062618064, "grad_norm": 2.4597451348310067, "learning_rate": 3.4800513149454782e-06, "loss": 0.7487, "step": 27800 }, { "epoch": 0.3388663424859542, "grad_norm": 2.685000157967815, "learning_rate": 3.4797305965362417e-06, "loss": 0.7772, "step": 27805 }, { "epoch": 0.33892727871010203, "grad_norm": 2.681964999411752, "learning_rate": 3.4794098781270047e-06, "loss": 0.7578, "step": 27810 }, { "epoch": 0.3389882149342498, "grad_norm": 3.1314066876158586, "learning_rate": 3.4790891597177677e-06, "loss": 0.786, "step": 27815 }, { "epoch": 0.3390491511583976, "grad_norm": 2.0191476209469075, "learning_rate": 3.4787684413085316e-06, "loss": 0.7853, "step": 27820 }, { "epoch": 0.33911008738254544, "grad_norm": 2.78649557538906, "learning_rate": 3.4784477228992946e-06, "loss": 0.7482, "step": 27825 }, { "epoch": 0.3391710236066932, "grad_norm": 2.4177255840639154, "learning_rate": 3.4781270044900576e-06, "loss": 0.8075, "step": 27830 }, { "epoch": 0.33923195983084103, "grad_norm": 2.391673508014513, "learning_rate": 3.4778062860808215e-06, "loss": 0.7742, "step": 27835 }, { "epoch": 0.33929289605498886, "grad_norm": 2.3517561699403875, "learning_rate": 3.4774855676715845e-06, "loss": 0.8087, "step": 27840 }, { "epoch": 0.3393538322791367, "grad_norm": 3.0762350457632843, "learning_rate": 3.477164849262348e-06, "loss": 0.849, "step": 27845 }, { "epoch": 0.33941476850328445, "grad_norm": 3.0489013570486616, "learning_rate": 3.4768441308531114e-06, "loss": 0.8113, "step": 27850 }, { "epoch": 0.33947570472743227, "grad_norm": 2.3345752830468043, "learning_rate": 3.476523412443875e-06, "loss": 0.7579, "step": 27855 }, { "epoch": 0.3395366409515801, "grad_norm": 2.247126961932001, "learning_rate": 3.476202694034638e-06, "loss": 0.8705, "step": 27860 }, { "epoch": 0.33959757717572786, "grad_norm": 2.4559520449802745, "learning_rate": 3.4758819756254013e-06, "loss": 0.8457, "step": 27865 }, { "epoch": 0.3396585133998757, "grad_norm": 2.1835895190413175, "learning_rate": 3.4755612572161647e-06, "loss": 0.7615, "step": 27870 }, { "epoch": 0.3397194496240235, "grad_norm": 3.837422010455471, "learning_rate": 3.4752405388069277e-06, "loss": 0.7426, "step": 27875 }, { "epoch": 0.33978038584817133, "grad_norm": 3.2808198942503335, "learning_rate": 3.4749198203976916e-06, "loss": 0.7322, "step": 27880 }, { "epoch": 0.3398413220723191, "grad_norm": 2.815561004549425, "learning_rate": 3.4745991019884546e-06, "loss": 0.7554, "step": 27885 }, { "epoch": 0.3399022582964669, "grad_norm": 2.452807377291678, "learning_rate": 3.4742783835792176e-06, "loss": 0.773, "step": 27890 }, { "epoch": 0.33996319452061474, "grad_norm": 3.955872227353753, "learning_rate": 3.4739576651699806e-06, "loss": 0.7124, "step": 27895 }, { "epoch": 0.3400241307447625, "grad_norm": 2.1142686949400193, "learning_rate": 3.4736369467607445e-06, "loss": 0.8634, "step": 27900 }, { "epoch": 0.34008506696891033, "grad_norm": 2.227270526228777, "learning_rate": 3.4733162283515075e-06, "loss": 0.7498, "step": 27905 }, { "epoch": 0.34014600319305816, "grad_norm": 4.058991463903735, "learning_rate": 3.472995509942271e-06, "loss": 0.7935, "step": 27910 }, { "epoch": 0.340206939417206, "grad_norm": 2.096527724541881, "learning_rate": 3.4726747915330344e-06, "loss": 0.7043, "step": 27915 }, { "epoch": 0.34026787564135375, "grad_norm": 2.0725388711684514, "learning_rate": 3.4723540731237974e-06, "loss": 0.7311, "step": 27920 }, { "epoch": 0.34032881186550157, "grad_norm": 3.0400679406182207, "learning_rate": 3.472033354714561e-06, "loss": 0.8099, "step": 27925 }, { "epoch": 0.3403897480896494, "grad_norm": 1.8190559734392469, "learning_rate": 3.4717126363053243e-06, "loss": 0.7686, "step": 27930 }, { "epoch": 0.34045068431379716, "grad_norm": 4.389847044123188, "learning_rate": 3.4713919178960877e-06, "loss": 0.8529, "step": 27935 }, { "epoch": 0.340511620537945, "grad_norm": 2.0164792887485388, "learning_rate": 3.4710711994868507e-06, "loss": 0.784, "step": 27940 }, { "epoch": 0.3405725567620928, "grad_norm": 2.8599881359632118, "learning_rate": 3.470750481077614e-06, "loss": 0.7363, "step": 27945 }, { "epoch": 0.3406334929862406, "grad_norm": 2.4585849838349456, "learning_rate": 3.4704297626683776e-06, "loss": 0.7337, "step": 27950 }, { "epoch": 0.3406944292103884, "grad_norm": 2.124542423217743, "learning_rate": 3.4701090442591406e-06, "loss": 0.8127, "step": 27955 }, { "epoch": 0.3407553654345362, "grad_norm": 2.2283677748058177, "learning_rate": 3.4697883258499045e-06, "loss": 0.8782, "step": 27960 }, { "epoch": 0.34081630165868404, "grad_norm": 2.2772439455656786, "learning_rate": 3.4694676074406675e-06, "loss": 0.7407, "step": 27965 }, { "epoch": 0.3408772378828318, "grad_norm": 1.9968315397593928, "learning_rate": 3.4691468890314305e-06, "loss": 0.6962, "step": 27970 }, { "epoch": 0.34093817410697963, "grad_norm": 2.328580687680873, "learning_rate": 3.4688261706221936e-06, "loss": 0.7219, "step": 27975 }, { "epoch": 0.34099911033112745, "grad_norm": 3.109899128984444, "learning_rate": 3.4685054522129574e-06, "loss": 0.8188, "step": 27980 }, { "epoch": 0.3410600465552753, "grad_norm": 2.257784248105552, "learning_rate": 3.4681847338037204e-06, "loss": 0.7915, "step": 27985 }, { "epoch": 0.34112098277942304, "grad_norm": 2.166976260358971, "learning_rate": 3.467864015394484e-06, "loss": 0.7388, "step": 27990 }, { "epoch": 0.34118191900357087, "grad_norm": 2.7554366354731283, "learning_rate": 3.4675432969852473e-06, "loss": 0.7267, "step": 27995 }, { "epoch": 0.3412428552277187, "grad_norm": 3.433853233779811, "learning_rate": 3.4672225785760103e-06, "loss": 0.7335, "step": 28000 }, { "epoch": 0.34130379145186646, "grad_norm": 2.926471410514853, "learning_rate": 3.4669018601667738e-06, "loss": 0.7525, "step": 28005 }, { "epoch": 0.3413647276760143, "grad_norm": 2.225814574674199, "learning_rate": 3.4665811417575372e-06, "loss": 0.7873, "step": 28010 }, { "epoch": 0.3414256639001621, "grad_norm": 2.8232526001996088, "learning_rate": 3.4662604233483007e-06, "loss": 0.7952, "step": 28015 }, { "epoch": 0.3414866001243099, "grad_norm": 3.3567378454369288, "learning_rate": 3.4659397049390637e-06, "loss": 0.8613, "step": 28020 }, { "epoch": 0.3415475363484577, "grad_norm": 2.3436290896732297, "learning_rate": 3.465618986529827e-06, "loss": 0.8144, "step": 28025 }, { "epoch": 0.3416084725726055, "grad_norm": 2.837940068835568, "learning_rate": 3.4652982681205905e-06, "loss": 0.7987, "step": 28030 }, { "epoch": 0.34166940879675334, "grad_norm": 2.6305367013909544, "learning_rate": 3.4649775497113536e-06, "loss": 0.7698, "step": 28035 }, { "epoch": 0.3417303450209011, "grad_norm": 2.3458045051749323, "learning_rate": 3.4646568313021174e-06, "loss": 0.7946, "step": 28040 }, { "epoch": 0.34179128124504893, "grad_norm": 2.7164374193639644, "learning_rate": 3.4643361128928804e-06, "loss": 0.7691, "step": 28045 }, { "epoch": 0.34185221746919675, "grad_norm": 2.6600164527106136, "learning_rate": 3.4640153944836435e-06, "loss": 0.7344, "step": 28050 }, { "epoch": 0.3419131536933445, "grad_norm": 2.686213071572317, "learning_rate": 3.4636946760744065e-06, "loss": 0.8037, "step": 28055 }, { "epoch": 0.34197408991749234, "grad_norm": 2.599296223647737, "learning_rate": 3.4633739576651703e-06, "loss": 0.8357, "step": 28060 }, { "epoch": 0.34203502614164016, "grad_norm": 2.3685273776149303, "learning_rate": 3.4630532392559334e-06, "loss": 0.6637, "step": 28065 }, { "epoch": 0.342095962365788, "grad_norm": 2.1846998692309767, "learning_rate": 3.462732520846697e-06, "loss": 0.7738, "step": 28070 }, { "epoch": 0.34215689858993575, "grad_norm": 2.1195314035479758, "learning_rate": 3.4624118024374602e-06, "loss": 0.7383, "step": 28075 }, { "epoch": 0.3422178348140836, "grad_norm": 2.1908087860501353, "learning_rate": 3.4620910840282233e-06, "loss": 0.7452, "step": 28080 }, { "epoch": 0.3422787710382314, "grad_norm": 2.8080535814028327, "learning_rate": 3.4617703656189867e-06, "loss": 0.7401, "step": 28085 }, { "epoch": 0.34233970726237917, "grad_norm": 3.6154817031092836, "learning_rate": 3.46144964720975e-06, "loss": 0.8336, "step": 28090 }, { "epoch": 0.342400643486527, "grad_norm": 2.4851179550937412, "learning_rate": 3.4611289288005136e-06, "loss": 0.7461, "step": 28095 }, { "epoch": 0.3424615797106748, "grad_norm": 2.3115692187148933, "learning_rate": 3.4608082103912766e-06, "loss": 0.8033, "step": 28100 }, { "epoch": 0.34252251593482264, "grad_norm": 2.367305315064722, "learning_rate": 3.4604874919820405e-06, "loss": 0.7781, "step": 28105 }, { "epoch": 0.3425834521589704, "grad_norm": 2.654172630894642, "learning_rate": 3.4601667735728035e-06, "loss": 0.7404, "step": 28110 }, { "epoch": 0.3426443883831182, "grad_norm": 3.020341315367316, "learning_rate": 3.4598460551635665e-06, "loss": 0.7217, "step": 28115 }, { "epoch": 0.34270532460726605, "grad_norm": 2.2134100499778713, "learning_rate": 3.4595253367543304e-06, "loss": 0.8352, "step": 28120 }, { "epoch": 0.3427662608314138, "grad_norm": 2.740949785311847, "learning_rate": 3.4592046183450934e-06, "loss": 0.7823, "step": 28125 }, { "epoch": 0.34282719705556164, "grad_norm": 2.7737119993281203, "learning_rate": 3.4588838999358564e-06, "loss": 0.8374, "step": 28130 }, { "epoch": 0.34288813327970946, "grad_norm": 2.1410824212905215, "learning_rate": 3.4585631815266203e-06, "loss": 0.7748, "step": 28135 }, { "epoch": 0.3429490695038573, "grad_norm": 2.367862238651199, "learning_rate": 3.4582424631173833e-06, "loss": 0.8823, "step": 28140 }, { "epoch": 0.34301000572800505, "grad_norm": 2.705778369638011, "learning_rate": 3.4579217447081463e-06, "loss": 0.7675, "step": 28145 }, { "epoch": 0.3430709419521529, "grad_norm": 2.121756825362353, "learning_rate": 3.4576010262989097e-06, "loss": 0.7727, "step": 28150 }, { "epoch": 0.3431318781763007, "grad_norm": 2.5141163527474633, "learning_rate": 3.457280307889673e-06, "loss": 0.7823, "step": 28155 }, { "epoch": 0.34319281440044846, "grad_norm": 2.6446581860402394, "learning_rate": 3.4569595894804366e-06, "loss": 0.8238, "step": 28160 }, { "epoch": 0.3432537506245963, "grad_norm": 2.616685068786689, "learning_rate": 3.4566388710711996e-06, "loss": 0.7919, "step": 28165 }, { "epoch": 0.3433146868487441, "grad_norm": 2.045193845609169, "learning_rate": 3.456318152661963e-06, "loss": 0.7668, "step": 28170 }, { "epoch": 0.34337562307289193, "grad_norm": 2.3278051789979246, "learning_rate": 3.4559974342527265e-06, "loss": 0.7692, "step": 28175 }, { "epoch": 0.3434365592970397, "grad_norm": 2.726385190988585, "learning_rate": 3.4556767158434895e-06, "loss": 0.7743, "step": 28180 }, { "epoch": 0.3434974955211875, "grad_norm": 2.3819782367891165, "learning_rate": 3.4553559974342534e-06, "loss": 0.7133, "step": 28185 }, { "epoch": 0.34355843174533535, "grad_norm": 2.9756616075036137, "learning_rate": 3.4550352790250164e-06, "loss": 0.7965, "step": 28190 }, { "epoch": 0.3436193679694831, "grad_norm": 2.7459831028571946, "learning_rate": 3.4547145606157794e-06, "loss": 0.7527, "step": 28195 }, { "epoch": 0.34368030419363094, "grad_norm": 3.121821335478452, "learning_rate": 3.4543938422065433e-06, "loss": 0.818, "step": 28200 }, { "epoch": 0.34374124041777876, "grad_norm": 2.5417375477923265, "learning_rate": 3.4540731237973063e-06, "loss": 0.8107, "step": 28205 }, { "epoch": 0.3438021766419266, "grad_norm": 2.494839622724739, "learning_rate": 3.4537524053880693e-06, "loss": 0.8214, "step": 28210 }, { "epoch": 0.34386311286607435, "grad_norm": 2.797797005472518, "learning_rate": 3.453431686978833e-06, "loss": 0.7988, "step": 28215 }, { "epoch": 0.3439240490902222, "grad_norm": 2.0272667629867414, "learning_rate": 3.453110968569596e-06, "loss": 0.7263, "step": 28220 }, { "epoch": 0.34398498531437, "grad_norm": 3.043436116462057, "learning_rate": 3.4527902501603592e-06, "loss": 0.7053, "step": 28225 }, { "epoch": 0.34404592153851776, "grad_norm": 2.200787397890122, "learning_rate": 3.4524695317511227e-06, "loss": 0.7293, "step": 28230 }, { "epoch": 0.3441068577626656, "grad_norm": 2.2290578636699254, "learning_rate": 3.452148813341886e-06, "loss": 0.7316, "step": 28235 }, { "epoch": 0.3441677939868134, "grad_norm": 2.625349732199008, "learning_rate": 3.4518280949326495e-06, "loss": 0.8484, "step": 28240 }, { "epoch": 0.34422873021096123, "grad_norm": 2.944917924490767, "learning_rate": 3.4515073765234126e-06, "loss": 0.7572, "step": 28245 }, { "epoch": 0.344289666435109, "grad_norm": 2.4552037880163393, "learning_rate": 3.451186658114176e-06, "loss": 0.8027, "step": 28250 }, { "epoch": 0.3443506026592568, "grad_norm": 2.433368327438251, "learning_rate": 3.4508659397049394e-06, "loss": 0.7624, "step": 28255 }, { "epoch": 0.34441153888340464, "grad_norm": 2.401622405508978, "learning_rate": 3.4505452212957025e-06, "loss": 0.7848, "step": 28260 }, { "epoch": 0.3444724751075524, "grad_norm": 2.3421528339067046, "learning_rate": 3.4502245028864663e-06, "loss": 0.7277, "step": 28265 }, { "epoch": 0.34453341133170023, "grad_norm": 2.880184591373286, "learning_rate": 3.4499037844772293e-06, "loss": 0.8484, "step": 28270 }, { "epoch": 0.34459434755584806, "grad_norm": 2.2916317508299886, "learning_rate": 3.4495830660679923e-06, "loss": 0.8192, "step": 28275 }, { "epoch": 0.3446552837799959, "grad_norm": 2.5992142872107866, "learning_rate": 3.4492623476587562e-06, "loss": 0.8144, "step": 28280 }, { "epoch": 0.34471622000414365, "grad_norm": 3.4512695092948475, "learning_rate": 3.4489416292495192e-06, "loss": 0.7058, "step": 28285 }, { "epoch": 0.34477715622829147, "grad_norm": 2.586261810605707, "learning_rate": 3.4486209108402822e-06, "loss": 0.7473, "step": 28290 }, { "epoch": 0.3448380924524393, "grad_norm": 1.864381056275383, "learning_rate": 3.448300192431046e-06, "loss": 0.8264, "step": 28295 }, { "epoch": 0.34489902867658706, "grad_norm": 1.9760802937950848, "learning_rate": 3.447979474021809e-06, "loss": 0.6467, "step": 28300 }, { "epoch": 0.3449599649007349, "grad_norm": 3.2288378476937765, "learning_rate": 3.447658755612572e-06, "loss": 0.7827, "step": 28305 }, { "epoch": 0.3450209011248827, "grad_norm": 2.042385904045478, "learning_rate": 3.4473380372033356e-06, "loss": 0.7727, "step": 28310 }, { "epoch": 0.34508183734903053, "grad_norm": 2.603217464407318, "learning_rate": 3.447017318794099e-06, "loss": 0.8149, "step": 28315 }, { "epoch": 0.3451427735731783, "grad_norm": 3.0404307765018204, "learning_rate": 3.4466966003848625e-06, "loss": 0.7702, "step": 28320 }, { "epoch": 0.3452037097973261, "grad_norm": 2.7365780748312853, "learning_rate": 3.4463758819756255e-06, "loss": 0.8091, "step": 28325 }, { "epoch": 0.34526464602147394, "grad_norm": 2.58797325300619, "learning_rate": 3.4460551635663893e-06, "loss": 0.771, "step": 28330 }, { "epoch": 0.3453255822456217, "grad_norm": 2.1440247296276347, "learning_rate": 3.4457344451571524e-06, "loss": 0.7471, "step": 28335 }, { "epoch": 0.34538651846976953, "grad_norm": 2.3112729624371524, "learning_rate": 3.4454137267479154e-06, "loss": 0.8133, "step": 28340 }, { "epoch": 0.34544745469391736, "grad_norm": 2.9624067048667704, "learning_rate": 3.4450930083386792e-06, "loss": 0.8113, "step": 28345 }, { "epoch": 0.3455083909180652, "grad_norm": 3.735839159938144, "learning_rate": 3.4447722899294423e-06, "loss": 0.7403, "step": 28350 }, { "epoch": 0.34556932714221295, "grad_norm": 2.5641681700035055, "learning_rate": 3.4444515715202053e-06, "loss": 0.7931, "step": 28355 }, { "epoch": 0.34563026336636077, "grad_norm": 2.1652980775387505, "learning_rate": 3.444130853110969e-06, "loss": 0.7686, "step": 28360 }, { "epoch": 0.3456911995905086, "grad_norm": 2.4885368604838405, "learning_rate": 3.443810134701732e-06, "loss": 0.8067, "step": 28365 }, { "epoch": 0.34575213581465636, "grad_norm": 2.443616777233237, "learning_rate": 3.443489416292495e-06, "loss": 0.8139, "step": 28370 }, { "epoch": 0.3458130720388042, "grad_norm": 2.2892107077735253, "learning_rate": 3.443168697883259e-06, "loss": 0.782, "step": 28375 }, { "epoch": 0.345874008262952, "grad_norm": 2.509848640163656, "learning_rate": 3.442847979474022e-06, "loss": 0.7206, "step": 28380 }, { "epoch": 0.3459349444870998, "grad_norm": 2.402744666144403, "learning_rate": 3.4425272610647855e-06, "loss": 0.7919, "step": 28385 }, { "epoch": 0.3459958807112476, "grad_norm": 4.6306586219278785, "learning_rate": 3.4422065426555485e-06, "loss": 0.7845, "step": 28390 }, { "epoch": 0.3460568169353954, "grad_norm": 2.2607521121751035, "learning_rate": 3.441885824246312e-06, "loss": 0.7503, "step": 28395 }, { "epoch": 0.34611775315954324, "grad_norm": 2.4749131007324188, "learning_rate": 3.4415651058370754e-06, "loss": 0.7926, "step": 28400 }, { "epoch": 0.346178689383691, "grad_norm": 2.166767030406532, "learning_rate": 3.4412443874278384e-06, "loss": 0.7765, "step": 28405 }, { "epoch": 0.34623962560783883, "grad_norm": 2.4580726188396333, "learning_rate": 3.4409236690186023e-06, "loss": 0.7618, "step": 28410 }, { "epoch": 0.34630056183198665, "grad_norm": 2.27577923977301, "learning_rate": 3.4406029506093653e-06, "loss": 0.7813, "step": 28415 }, { "epoch": 0.3463614980561345, "grad_norm": 3.6892026256374826, "learning_rate": 3.4402822322001283e-06, "loss": 0.7294, "step": 28420 }, { "epoch": 0.34642243428028224, "grad_norm": 2.4336914669714753, "learning_rate": 3.439961513790892e-06, "loss": 0.849, "step": 28425 }, { "epoch": 0.34648337050443007, "grad_norm": 2.2973321952398407, "learning_rate": 3.439640795381655e-06, "loss": 0.7946, "step": 28430 }, { "epoch": 0.3465443067285779, "grad_norm": 2.493430640320377, "learning_rate": 3.439320076972418e-06, "loss": 0.8243, "step": 28435 }, { "epoch": 0.34660524295272566, "grad_norm": 2.497854334396739, "learning_rate": 3.438999358563182e-06, "loss": 0.8483, "step": 28440 }, { "epoch": 0.3466661791768735, "grad_norm": 2.1254941666277425, "learning_rate": 3.438678640153945e-06, "loss": 0.7839, "step": 28445 }, { "epoch": 0.3467271154010213, "grad_norm": 2.594281792099207, "learning_rate": 3.438357921744708e-06, "loss": 0.7502, "step": 28450 }, { "epoch": 0.3467880516251691, "grad_norm": 2.304621001463886, "learning_rate": 3.438037203335472e-06, "loss": 0.8659, "step": 28455 }, { "epoch": 0.3468489878493169, "grad_norm": 2.452795671719471, "learning_rate": 3.437716484926235e-06, "loss": 0.8531, "step": 28460 }, { "epoch": 0.3469099240734647, "grad_norm": 2.520830202660416, "learning_rate": 3.4373957665169984e-06, "loss": 0.8172, "step": 28465 }, { "epoch": 0.34697086029761254, "grad_norm": 2.832198888037693, "learning_rate": 3.437075048107762e-06, "loss": 0.7827, "step": 28470 }, { "epoch": 0.3470317965217603, "grad_norm": 2.6394494203786234, "learning_rate": 3.436754329698525e-06, "loss": 0.6881, "step": 28475 }, { "epoch": 0.34709273274590813, "grad_norm": 2.256844598281776, "learning_rate": 3.4364336112892883e-06, "loss": 0.7509, "step": 28480 }, { "epoch": 0.34715366897005595, "grad_norm": 2.6722412291584, "learning_rate": 3.4361128928800513e-06, "loss": 0.7738, "step": 28485 }, { "epoch": 0.3472146051942038, "grad_norm": 2.2787313198546846, "learning_rate": 3.435792174470815e-06, "loss": 0.7663, "step": 28490 }, { "epoch": 0.34727554141835154, "grad_norm": 2.6851752275024583, "learning_rate": 3.4354714560615782e-06, "loss": 0.7928, "step": 28495 }, { "epoch": 0.34733647764249936, "grad_norm": 2.7972081240811875, "learning_rate": 3.4351507376523412e-06, "loss": 0.7425, "step": 28500 }, { "epoch": 0.3473974138666472, "grad_norm": 2.535705402730948, "learning_rate": 3.434830019243105e-06, "loss": 0.818, "step": 28505 }, { "epoch": 0.34745835009079495, "grad_norm": 2.0872530813652306, "learning_rate": 3.434509300833868e-06, "loss": 0.775, "step": 28510 }, { "epoch": 0.3475192863149428, "grad_norm": 2.929473647672704, "learning_rate": 3.434188582424631e-06, "loss": 0.766, "step": 28515 }, { "epoch": 0.3475802225390906, "grad_norm": 2.2083605002166955, "learning_rate": 3.433867864015395e-06, "loss": 0.7796, "step": 28520 }, { "epoch": 0.34764115876323837, "grad_norm": 2.2152547100269437, "learning_rate": 3.433547145606158e-06, "loss": 0.7876, "step": 28525 }, { "epoch": 0.3477020949873862, "grad_norm": 2.042688569805845, "learning_rate": 3.433226427196921e-06, "loss": 0.834, "step": 28530 }, { "epoch": 0.347763031211534, "grad_norm": 2.2326091941254123, "learning_rate": 3.432905708787685e-06, "loss": 0.8943, "step": 28535 }, { "epoch": 0.34782396743568184, "grad_norm": 2.2949898632329884, "learning_rate": 3.432584990378448e-06, "loss": 0.7897, "step": 28540 }, { "epoch": 0.3478849036598296, "grad_norm": 2.142372535753737, "learning_rate": 3.4322642719692113e-06, "loss": 0.831, "step": 28545 }, { "epoch": 0.3479458398839774, "grad_norm": 2.6212177690449203, "learning_rate": 3.4319435535599748e-06, "loss": 0.8838, "step": 28550 }, { "epoch": 0.34800677610812525, "grad_norm": 2.4662048646217674, "learning_rate": 3.4316228351507382e-06, "loss": 0.7699, "step": 28555 }, { "epoch": 0.348067712332273, "grad_norm": 2.2294692293985126, "learning_rate": 3.4313021167415012e-06, "loss": 0.7202, "step": 28560 }, { "epoch": 0.34812864855642084, "grad_norm": 3.6226403532193645, "learning_rate": 3.4309813983322643e-06, "loss": 0.7224, "step": 28565 }, { "epoch": 0.34818958478056866, "grad_norm": 2.4071278156450595, "learning_rate": 3.430660679923028e-06, "loss": 0.7749, "step": 28570 }, { "epoch": 0.3482505210047165, "grad_norm": 2.1998734115893295, "learning_rate": 3.430339961513791e-06, "loss": 0.7553, "step": 28575 }, { "epoch": 0.34831145722886425, "grad_norm": 5.107919975336221, "learning_rate": 3.430019243104554e-06, "loss": 0.8109, "step": 28580 }, { "epoch": 0.3483723934530121, "grad_norm": 2.249407654092969, "learning_rate": 3.429698524695318e-06, "loss": 0.8004, "step": 28585 }, { "epoch": 0.3484333296771599, "grad_norm": 2.0549146541465286, "learning_rate": 3.429377806286081e-06, "loss": 0.7425, "step": 28590 }, { "epoch": 0.34849426590130766, "grad_norm": 3.0452709529445254, "learning_rate": 3.429057087876844e-06, "loss": 0.7038, "step": 28595 }, { "epoch": 0.3485552021254555, "grad_norm": 2.115903654868503, "learning_rate": 3.428736369467608e-06, "loss": 0.8306, "step": 28600 }, { "epoch": 0.3486161383496033, "grad_norm": 3.2009297460301, "learning_rate": 3.428415651058371e-06, "loss": 0.7761, "step": 28605 }, { "epoch": 0.34867707457375113, "grad_norm": 3.66809549211351, "learning_rate": 3.4280949326491344e-06, "loss": 0.8303, "step": 28610 }, { "epoch": 0.3487380107978989, "grad_norm": 2.79435744448295, "learning_rate": 3.427774214239898e-06, "loss": 0.7599, "step": 28615 }, { "epoch": 0.3487989470220467, "grad_norm": 2.3112494339299463, "learning_rate": 3.427453495830661e-06, "loss": 0.7412, "step": 28620 }, { "epoch": 0.34885988324619455, "grad_norm": 2.6475380445127694, "learning_rate": 3.4271327774214243e-06, "loss": 0.7834, "step": 28625 }, { "epoch": 0.3489208194703423, "grad_norm": 2.6044686018054106, "learning_rate": 3.4268120590121877e-06, "loss": 0.7922, "step": 28630 }, { "epoch": 0.34898175569449014, "grad_norm": 2.7703096067728756, "learning_rate": 3.426491340602951e-06, "loss": 0.8116, "step": 28635 }, { "epoch": 0.34904269191863796, "grad_norm": 2.3810765709829362, "learning_rate": 3.426170622193714e-06, "loss": 0.8084, "step": 28640 }, { "epoch": 0.3491036281427858, "grad_norm": 4.053227589558563, "learning_rate": 3.425849903784477e-06, "loss": 0.8209, "step": 28645 }, { "epoch": 0.34916456436693355, "grad_norm": 2.708254273827358, "learning_rate": 3.425529185375241e-06, "loss": 0.807, "step": 28650 }, { "epoch": 0.3492255005910814, "grad_norm": 2.510968023020805, "learning_rate": 3.425208466966004e-06, "loss": 0.7345, "step": 28655 }, { "epoch": 0.3492864368152292, "grad_norm": 2.472691465852854, "learning_rate": 3.424887748556767e-06, "loss": 0.7792, "step": 28660 }, { "epoch": 0.34934737303937696, "grad_norm": 2.655844126577075, "learning_rate": 3.424567030147531e-06, "loss": 0.7453, "step": 28665 }, { "epoch": 0.3494083092635248, "grad_norm": 2.4923775321260853, "learning_rate": 3.424246311738294e-06, "loss": 0.7289, "step": 28670 }, { "epoch": 0.3494692454876726, "grad_norm": 2.906664131473226, "learning_rate": 3.423925593329057e-06, "loss": 0.8003, "step": 28675 }, { "epoch": 0.34953018171182043, "grad_norm": 2.3705746950108737, "learning_rate": 3.423604874919821e-06, "loss": 0.809, "step": 28680 }, { "epoch": 0.3495911179359682, "grad_norm": 2.850123809441922, "learning_rate": 3.423284156510584e-06, "loss": 0.7773, "step": 28685 }, { "epoch": 0.349652054160116, "grad_norm": 2.5588884431429184, "learning_rate": 3.4229634381013473e-06, "loss": 0.8345, "step": 28690 }, { "epoch": 0.34971299038426384, "grad_norm": 3.3867278963564575, "learning_rate": 3.4226427196921107e-06, "loss": 0.8601, "step": 28695 }, { "epoch": 0.3497739266084116, "grad_norm": 2.1073846023614666, "learning_rate": 3.4223220012828738e-06, "loss": 0.7633, "step": 28700 }, { "epoch": 0.34983486283255943, "grad_norm": 2.1835454969393018, "learning_rate": 3.422001282873637e-06, "loss": 0.8371, "step": 28705 }, { "epoch": 0.34989579905670726, "grad_norm": 2.3906328440754008, "learning_rate": 3.4216805644644006e-06, "loss": 0.7252, "step": 28710 }, { "epoch": 0.3499567352808551, "grad_norm": 2.3159169259480965, "learning_rate": 3.421359846055164e-06, "loss": 0.7534, "step": 28715 }, { "epoch": 0.35001767150500285, "grad_norm": 2.26542299410552, "learning_rate": 3.421039127645927e-06, "loss": 0.7896, "step": 28720 }, { "epoch": 0.35007860772915067, "grad_norm": 2.137828071109731, "learning_rate": 3.4207184092366905e-06, "loss": 0.7404, "step": 28725 }, { "epoch": 0.3501395439532985, "grad_norm": 3.049142469302994, "learning_rate": 3.420397690827454e-06, "loss": 0.8106, "step": 28730 }, { "epoch": 0.35020048017744626, "grad_norm": 1.9283258687380287, "learning_rate": 3.420076972418217e-06, "loss": 0.7684, "step": 28735 }, { "epoch": 0.3502614164015941, "grad_norm": 2.765982298473544, "learning_rate": 3.41975625400898e-06, "loss": 0.8255, "step": 28740 }, { "epoch": 0.3503223526257419, "grad_norm": 2.3968906918273523, "learning_rate": 3.419435535599744e-06, "loss": 0.7245, "step": 28745 }, { "epoch": 0.35038328884988973, "grad_norm": 2.1529664787460865, "learning_rate": 3.419114817190507e-06, "loss": 0.8097, "step": 28750 }, { "epoch": 0.3504442250740375, "grad_norm": 2.552160271965269, "learning_rate": 3.41879409878127e-06, "loss": 0.7838, "step": 28755 }, { "epoch": 0.3505051612981853, "grad_norm": 2.207386915862944, "learning_rate": 3.4184733803720338e-06, "loss": 0.7934, "step": 28760 }, { "epoch": 0.35056609752233314, "grad_norm": 2.791538148421529, "learning_rate": 3.418152661962797e-06, "loss": 0.8033, "step": 28765 }, { "epoch": 0.3506270337464809, "grad_norm": 2.4328807988395327, "learning_rate": 3.4178319435535602e-06, "loss": 0.6933, "step": 28770 }, { "epoch": 0.35068796997062873, "grad_norm": 2.4401785488522645, "learning_rate": 3.4175112251443237e-06, "loss": 0.8108, "step": 28775 }, { "epoch": 0.35074890619477656, "grad_norm": 2.625782996608837, "learning_rate": 3.4171905067350867e-06, "loss": 0.7471, "step": 28780 }, { "epoch": 0.3508098424189244, "grad_norm": 2.730233014624936, "learning_rate": 3.41686978832585e-06, "loss": 0.7505, "step": 28785 }, { "epoch": 0.35087077864307215, "grad_norm": 2.5075886142667403, "learning_rate": 3.4165490699166136e-06, "loss": 0.7897, "step": 28790 }, { "epoch": 0.35093171486721997, "grad_norm": 2.618288105241427, "learning_rate": 3.416228351507377e-06, "loss": 0.7673, "step": 28795 }, { "epoch": 0.3509926510913678, "grad_norm": 2.8309799592765477, "learning_rate": 3.41590763309814e-06, "loss": 0.7951, "step": 28800 }, { "epoch": 0.35105358731551556, "grad_norm": 2.1807435763262393, "learning_rate": 3.415586914688904e-06, "loss": 0.7399, "step": 28805 }, { "epoch": 0.3511145235396634, "grad_norm": 1.9550678385038671, "learning_rate": 3.415266196279667e-06, "loss": 0.7913, "step": 28810 }, { "epoch": 0.3511754597638112, "grad_norm": 2.2246226571479593, "learning_rate": 3.41494547787043e-06, "loss": 0.7488, "step": 28815 }, { "epoch": 0.351236395987959, "grad_norm": 2.517287550089624, "learning_rate": 3.414624759461193e-06, "loss": 0.8437, "step": 28820 }, { "epoch": 0.3512973322121068, "grad_norm": 2.7207078158973195, "learning_rate": 3.414304041051957e-06, "loss": 0.7531, "step": 28825 }, { "epoch": 0.3513582684362546, "grad_norm": 2.5709138389330786, "learning_rate": 3.41398332264272e-06, "loss": 0.7454, "step": 28830 }, { "epoch": 0.35141920466040244, "grad_norm": 2.374360356950291, "learning_rate": 3.4136626042334833e-06, "loss": 0.7474, "step": 28835 }, { "epoch": 0.3514801408845502, "grad_norm": 2.235602482254616, "learning_rate": 3.4133418858242467e-06, "loss": 0.7695, "step": 28840 }, { "epoch": 0.35154107710869803, "grad_norm": 2.444107769133853, "learning_rate": 3.4130211674150097e-06, "loss": 0.8189, "step": 28845 }, { "epoch": 0.35160201333284585, "grad_norm": 2.394121723802107, "learning_rate": 3.412700449005773e-06, "loss": 0.7232, "step": 28850 }, { "epoch": 0.3516629495569937, "grad_norm": 2.468512035286033, "learning_rate": 3.4123797305965366e-06, "loss": 0.737, "step": 28855 }, { "epoch": 0.35172388578114144, "grad_norm": 2.4518031787094707, "learning_rate": 3.4120590121873e-06, "loss": 0.8446, "step": 28860 }, { "epoch": 0.35178482200528927, "grad_norm": 2.2422248701010408, "learning_rate": 3.411738293778063e-06, "loss": 0.7553, "step": 28865 }, { "epoch": 0.3518457582294371, "grad_norm": 2.099868719764549, "learning_rate": 3.4114175753688265e-06, "loss": 0.7837, "step": 28870 }, { "epoch": 0.35190669445358486, "grad_norm": 2.3578322778202265, "learning_rate": 3.41109685695959e-06, "loss": 0.765, "step": 28875 }, { "epoch": 0.3519676306777327, "grad_norm": 2.4232143372109207, "learning_rate": 3.410776138550353e-06, "loss": 0.8193, "step": 28880 }, { "epoch": 0.3520285669018805, "grad_norm": 2.275175944994321, "learning_rate": 3.410455420141117e-06, "loss": 0.7916, "step": 28885 }, { "epoch": 0.3520895031260283, "grad_norm": 2.7816827397941633, "learning_rate": 3.41013470173188e-06, "loss": 0.8295, "step": 28890 }, { "epoch": 0.3521504393501761, "grad_norm": 2.7289438945857043, "learning_rate": 3.409813983322643e-06, "loss": 0.8104, "step": 28895 }, { "epoch": 0.3522113755743239, "grad_norm": 2.4209483907825606, "learning_rate": 3.409493264913406e-06, "loss": 0.7995, "step": 28900 }, { "epoch": 0.35227231179847174, "grad_norm": 2.3345124936943473, "learning_rate": 3.4091725465041697e-06, "loss": 0.7771, "step": 28905 }, { "epoch": 0.3523332480226195, "grad_norm": 3.040298178950016, "learning_rate": 3.4088518280949327e-06, "loss": 0.7945, "step": 28910 }, { "epoch": 0.35239418424676733, "grad_norm": 2.0830184007984296, "learning_rate": 3.408531109685696e-06, "loss": 0.72, "step": 28915 }, { "epoch": 0.35245512047091515, "grad_norm": 2.8925029029934795, "learning_rate": 3.4082103912764596e-06, "loss": 0.7318, "step": 28920 }, { "epoch": 0.352516056695063, "grad_norm": 3.15791895018914, "learning_rate": 3.4078896728672226e-06, "loss": 0.822, "step": 28925 }, { "epoch": 0.35257699291921074, "grad_norm": 2.3338927385471417, "learning_rate": 3.407568954457986e-06, "loss": 0.778, "step": 28930 }, { "epoch": 0.35263792914335856, "grad_norm": 2.29119295171046, "learning_rate": 3.4072482360487495e-06, "loss": 0.7913, "step": 28935 }, { "epoch": 0.3526988653675064, "grad_norm": 2.769035449724416, "learning_rate": 3.406927517639513e-06, "loss": 0.8414, "step": 28940 }, { "epoch": 0.35275980159165415, "grad_norm": 2.627860264770927, "learning_rate": 3.406606799230276e-06, "loss": 0.7908, "step": 28945 }, { "epoch": 0.352820737815802, "grad_norm": 3.1082119018261043, "learning_rate": 3.4062860808210394e-06, "loss": 0.7667, "step": 28950 }, { "epoch": 0.3528816740399498, "grad_norm": 3.1359275822231147, "learning_rate": 3.405965362411803e-06, "loss": 0.7643, "step": 28955 }, { "epoch": 0.3529426102640976, "grad_norm": 2.176438846334247, "learning_rate": 3.405644644002566e-06, "loss": 0.8013, "step": 28960 }, { "epoch": 0.3530035464882454, "grad_norm": 2.350875953065086, "learning_rate": 3.4053239255933297e-06, "loss": 0.737, "step": 28965 }, { "epoch": 0.3530644827123932, "grad_norm": 3.4109988859947267, "learning_rate": 3.4050032071840928e-06, "loss": 0.7876, "step": 28970 }, { "epoch": 0.35312541893654104, "grad_norm": 2.5380943432476344, "learning_rate": 3.4046824887748558e-06, "loss": 0.7473, "step": 28975 }, { "epoch": 0.3531863551606888, "grad_norm": 2.160593316561211, "learning_rate": 3.404361770365619e-06, "loss": 0.7816, "step": 28980 }, { "epoch": 0.3532472913848366, "grad_norm": 2.495638200205803, "learning_rate": 3.4040410519563827e-06, "loss": 0.7525, "step": 28985 }, { "epoch": 0.35330822760898445, "grad_norm": 2.9617195372086695, "learning_rate": 3.4037203335471457e-06, "loss": 0.7391, "step": 28990 }, { "epoch": 0.35336916383313227, "grad_norm": 2.268461707738863, "learning_rate": 3.403399615137909e-06, "loss": 0.6927, "step": 28995 }, { "epoch": 0.35343010005728004, "grad_norm": 2.9973804056879443, "learning_rate": 3.4030788967286726e-06, "loss": 0.7713, "step": 29000 }, { "epoch": 0.35349103628142786, "grad_norm": 3.247599756466949, "learning_rate": 3.4027581783194356e-06, "loss": 0.7898, "step": 29005 }, { "epoch": 0.3535519725055757, "grad_norm": 2.3306683878785543, "learning_rate": 3.402437459910199e-06, "loss": 0.7386, "step": 29010 }, { "epoch": 0.35361290872972345, "grad_norm": 2.6918736549644167, "learning_rate": 3.4021167415009625e-06, "loss": 0.7373, "step": 29015 }, { "epoch": 0.3536738449538713, "grad_norm": 2.8977230036657136, "learning_rate": 3.401796023091726e-06, "loss": 0.7878, "step": 29020 }, { "epoch": 0.3537347811780191, "grad_norm": 2.8242064951107984, "learning_rate": 3.401475304682489e-06, "loss": 0.916, "step": 29025 }, { "epoch": 0.35379571740216686, "grad_norm": 3.2422942966690007, "learning_rate": 3.4011545862732528e-06, "loss": 0.7856, "step": 29030 }, { "epoch": 0.3538566536263147, "grad_norm": 2.409724614475849, "learning_rate": 3.400833867864016e-06, "loss": 0.7788, "step": 29035 }, { "epoch": 0.3539175898504625, "grad_norm": 3.03787715946877, "learning_rate": 3.400513149454779e-06, "loss": 0.8075, "step": 29040 }, { "epoch": 0.35397852607461033, "grad_norm": 2.474926020283473, "learning_rate": 3.4001924310455427e-06, "loss": 0.7574, "step": 29045 }, { "epoch": 0.3540394622987581, "grad_norm": 2.0373071937763227, "learning_rate": 3.3998717126363057e-06, "loss": 0.7502, "step": 29050 }, { "epoch": 0.3541003985229059, "grad_norm": 2.27134201922277, "learning_rate": 3.3995509942270687e-06, "loss": 0.7045, "step": 29055 }, { "epoch": 0.35416133474705375, "grad_norm": 5.091845607052891, "learning_rate": 3.3992302758178326e-06, "loss": 0.7697, "step": 29060 }, { "epoch": 0.3542222709712015, "grad_norm": 2.549406898498394, "learning_rate": 3.3989095574085956e-06, "loss": 0.8399, "step": 29065 }, { "epoch": 0.35428320719534934, "grad_norm": 3.848078817042407, "learning_rate": 3.3985888389993586e-06, "loss": 0.7369, "step": 29070 }, { "epoch": 0.35434414341949716, "grad_norm": 1.9653047394842944, "learning_rate": 3.398268120590122e-06, "loss": 0.7645, "step": 29075 }, { "epoch": 0.354405079643645, "grad_norm": 2.2938990068277514, "learning_rate": 3.3979474021808855e-06, "loss": 0.7905, "step": 29080 }, { "epoch": 0.35446601586779275, "grad_norm": 3.0003495782581644, "learning_rate": 3.397626683771649e-06, "loss": 0.7495, "step": 29085 }, { "epoch": 0.3545269520919406, "grad_norm": 2.7645765712140755, "learning_rate": 3.397305965362412e-06, "loss": 0.8731, "step": 29090 }, { "epoch": 0.3545878883160884, "grad_norm": 1.9650231472342528, "learning_rate": 3.3969852469531754e-06, "loss": 0.7659, "step": 29095 }, { "epoch": 0.35464882454023616, "grad_norm": 3.869903841302195, "learning_rate": 3.396664528543939e-06, "loss": 0.7429, "step": 29100 }, { "epoch": 0.354709760764384, "grad_norm": 3.0544398900658103, "learning_rate": 3.396343810134702e-06, "loss": 0.744, "step": 29105 }, { "epoch": 0.3547706969885318, "grad_norm": 2.6607684193907812, "learning_rate": 3.3960230917254657e-06, "loss": 0.7706, "step": 29110 }, { "epoch": 0.35483163321267963, "grad_norm": 2.2094443186035377, "learning_rate": 3.3957023733162287e-06, "loss": 0.8425, "step": 29115 }, { "epoch": 0.3548925694368274, "grad_norm": 2.4617369753445817, "learning_rate": 3.3953816549069917e-06, "loss": 0.794, "step": 29120 }, { "epoch": 0.3549535056609752, "grad_norm": 2.5993413833642975, "learning_rate": 3.3950609364977556e-06, "loss": 0.7256, "step": 29125 }, { "epoch": 0.35501444188512304, "grad_norm": 2.6636725583794236, "learning_rate": 3.3947402180885186e-06, "loss": 0.7359, "step": 29130 }, { "epoch": 0.3550753781092708, "grad_norm": 2.4308035457783284, "learning_rate": 3.3944194996792816e-06, "loss": 0.7755, "step": 29135 }, { "epoch": 0.35513631433341863, "grad_norm": 2.817644428325558, "learning_rate": 3.3940987812700455e-06, "loss": 0.857, "step": 29140 }, { "epoch": 0.35519725055756646, "grad_norm": 2.4859052899855625, "learning_rate": 3.3937780628608085e-06, "loss": 0.7785, "step": 29145 }, { "epoch": 0.3552581867817143, "grad_norm": 2.5335989305125723, "learning_rate": 3.3934573444515715e-06, "loss": 0.7755, "step": 29150 }, { "epoch": 0.35531912300586205, "grad_norm": 2.1398686310808634, "learning_rate": 3.393136626042335e-06, "loss": 0.7858, "step": 29155 }, { "epoch": 0.35538005923000987, "grad_norm": 2.5515662440798335, "learning_rate": 3.3928159076330984e-06, "loss": 0.8486, "step": 29160 }, { "epoch": 0.3554409954541577, "grad_norm": 2.731073904337289, "learning_rate": 3.392495189223862e-06, "loss": 0.8171, "step": 29165 }, { "epoch": 0.35550193167830546, "grad_norm": 2.0805816062869273, "learning_rate": 3.392174470814625e-06, "loss": 0.7585, "step": 29170 }, { "epoch": 0.3555628679024533, "grad_norm": 4.189497337809512, "learning_rate": 3.3918537524053883e-06, "loss": 0.8258, "step": 29175 }, { "epoch": 0.3556238041266011, "grad_norm": 2.730189712805893, "learning_rate": 3.3915330339961517e-06, "loss": 0.7461, "step": 29180 }, { "epoch": 0.35568474035074893, "grad_norm": 2.4814737725022047, "learning_rate": 3.3912123155869148e-06, "loss": 0.7052, "step": 29185 }, { "epoch": 0.3557456765748967, "grad_norm": 2.804144221180451, "learning_rate": 3.3908915971776786e-06, "loss": 0.7089, "step": 29190 }, { "epoch": 0.3558066127990445, "grad_norm": 2.1178248914026883, "learning_rate": 3.3905708787684416e-06, "loss": 0.8227, "step": 29195 }, { "epoch": 0.35586754902319234, "grad_norm": 2.3600007458306624, "learning_rate": 3.3902501603592047e-06, "loss": 0.854, "step": 29200 }, { "epoch": 0.3559284852473401, "grad_norm": 3.1868017314198656, "learning_rate": 3.3899294419499685e-06, "loss": 0.8097, "step": 29205 }, { "epoch": 0.35598942147148793, "grad_norm": 3.1347863063390733, "learning_rate": 3.3896087235407315e-06, "loss": 0.7915, "step": 29210 }, { "epoch": 0.35605035769563576, "grad_norm": 2.391756633884591, "learning_rate": 3.3892880051314946e-06, "loss": 0.7966, "step": 29215 }, { "epoch": 0.3561112939197836, "grad_norm": 2.525216815480355, "learning_rate": 3.3889672867222584e-06, "loss": 0.6845, "step": 29220 }, { "epoch": 0.35617223014393135, "grad_norm": 2.4729111196982205, "learning_rate": 3.3886465683130214e-06, "loss": 0.8098, "step": 29225 }, { "epoch": 0.35623316636807917, "grad_norm": 3.032870897074975, "learning_rate": 3.3883258499037845e-06, "loss": 0.7499, "step": 29230 }, { "epoch": 0.356294102592227, "grad_norm": 3.051229210403873, "learning_rate": 3.388005131494548e-06, "loss": 0.7457, "step": 29235 }, { "epoch": 0.35635503881637476, "grad_norm": 2.42743436320269, "learning_rate": 3.3876844130853113e-06, "loss": 0.8402, "step": 29240 }, { "epoch": 0.3564159750405226, "grad_norm": 2.5915571291694053, "learning_rate": 3.3873636946760748e-06, "loss": 0.7819, "step": 29245 }, { "epoch": 0.3564769112646704, "grad_norm": 2.365600745869707, "learning_rate": 3.387042976266838e-06, "loss": 0.7705, "step": 29250 }, { "epoch": 0.3565378474888182, "grad_norm": 2.993018875447678, "learning_rate": 3.3867222578576017e-06, "loss": 0.758, "step": 29255 }, { "epoch": 0.356598783712966, "grad_norm": 2.1535471668240884, "learning_rate": 3.3864015394483647e-06, "loss": 0.7471, "step": 29260 }, { "epoch": 0.3566597199371138, "grad_norm": 3.732238324203568, "learning_rate": 3.3860808210391277e-06, "loss": 0.799, "step": 29265 }, { "epoch": 0.35672065616126164, "grad_norm": 2.0435043171716876, "learning_rate": 3.3857601026298916e-06, "loss": 0.7624, "step": 29270 }, { "epoch": 0.3567815923854094, "grad_norm": 2.4455879273494405, "learning_rate": 3.3854393842206546e-06, "loss": 0.8, "step": 29275 }, { "epoch": 0.35684252860955723, "grad_norm": 2.9779103120318084, "learning_rate": 3.3851186658114176e-06, "loss": 0.8559, "step": 29280 }, { "epoch": 0.35690346483370505, "grad_norm": 3.104750940735082, "learning_rate": 3.3847979474021815e-06, "loss": 0.773, "step": 29285 }, { "epoch": 0.3569644010578529, "grad_norm": 2.1531939002156566, "learning_rate": 3.3844772289929445e-06, "loss": 0.703, "step": 29290 }, { "epoch": 0.35702533728200064, "grad_norm": 3.523859899797102, "learning_rate": 3.3841565105837075e-06, "loss": 0.7505, "step": 29295 }, { "epoch": 0.35708627350614847, "grad_norm": 4.411819637115171, "learning_rate": 3.3838357921744714e-06, "loss": 0.8035, "step": 29300 }, { "epoch": 0.3571472097302963, "grad_norm": 3.165256724784586, "learning_rate": 3.3835150737652344e-06, "loss": 0.8047, "step": 29305 }, { "epoch": 0.35720814595444406, "grad_norm": 2.2619277553058423, "learning_rate": 3.383194355355998e-06, "loss": 0.8502, "step": 29310 }, { "epoch": 0.3572690821785919, "grad_norm": 2.1075301321757336, "learning_rate": 3.3828736369467612e-06, "loss": 0.7592, "step": 29315 }, { "epoch": 0.3573300184027397, "grad_norm": 2.2893335108068222, "learning_rate": 3.3825529185375243e-06, "loss": 0.8134, "step": 29320 }, { "epoch": 0.3573909546268875, "grad_norm": 2.0704815146010445, "learning_rate": 3.3822322001282877e-06, "loss": 0.7173, "step": 29325 }, { "epoch": 0.3574518908510353, "grad_norm": 2.5455148819929483, "learning_rate": 3.3819114817190507e-06, "loss": 0.8237, "step": 29330 }, { "epoch": 0.3575128270751831, "grad_norm": 2.651904134797542, "learning_rate": 3.3815907633098146e-06, "loss": 0.7161, "step": 29335 }, { "epoch": 0.35757376329933094, "grad_norm": 2.3923291703973306, "learning_rate": 3.3812700449005776e-06, "loss": 0.8862, "step": 29340 }, { "epoch": 0.3576346995234787, "grad_norm": 2.4790676557404896, "learning_rate": 3.3809493264913406e-06, "loss": 0.8019, "step": 29345 }, { "epoch": 0.35769563574762653, "grad_norm": 2.3306555331952454, "learning_rate": 3.3806286080821045e-06, "loss": 0.7318, "step": 29350 }, { "epoch": 0.35775657197177435, "grad_norm": 3.0194606571010816, "learning_rate": 3.3803078896728675e-06, "loss": 0.6975, "step": 29355 }, { "epoch": 0.3578175081959222, "grad_norm": 2.5042963003714513, "learning_rate": 3.3799871712636305e-06, "loss": 0.7853, "step": 29360 }, { "epoch": 0.35787844442006994, "grad_norm": 2.033361570252147, "learning_rate": 3.3796664528543944e-06, "loss": 0.7957, "step": 29365 }, { "epoch": 0.35793938064421776, "grad_norm": 3.8871349275101217, "learning_rate": 3.3793457344451574e-06, "loss": 0.8056, "step": 29370 }, { "epoch": 0.3580003168683656, "grad_norm": 2.4044696283183686, "learning_rate": 3.3790250160359204e-06, "loss": 0.7728, "step": 29375 }, { "epoch": 0.35806125309251335, "grad_norm": 2.39060077233838, "learning_rate": 3.3787042976266843e-06, "loss": 0.7905, "step": 29380 }, { "epoch": 0.3581221893166612, "grad_norm": 4.987026457827093, "learning_rate": 3.3783835792174473e-06, "loss": 0.742, "step": 29385 }, { "epoch": 0.358183125540809, "grad_norm": 2.336063357479258, "learning_rate": 3.3780628608082107e-06, "loss": 0.8318, "step": 29390 }, { "epoch": 0.3582440617649568, "grad_norm": 2.7650516498735365, "learning_rate": 3.377742142398974e-06, "loss": 0.7886, "step": 29395 }, { "epoch": 0.3583049979891046, "grad_norm": 2.493019317108303, "learning_rate": 3.377421423989737e-06, "loss": 0.73, "step": 29400 }, { "epoch": 0.3583659342132524, "grad_norm": 2.6376060288467604, "learning_rate": 3.3771007055805006e-06, "loss": 0.8319, "step": 29405 }, { "epoch": 0.35842687043740024, "grad_norm": 2.1754558491264584, "learning_rate": 3.3767799871712636e-06, "loss": 0.8408, "step": 29410 }, { "epoch": 0.358487806661548, "grad_norm": 1.921601563113431, "learning_rate": 3.3764592687620275e-06, "loss": 0.7165, "step": 29415 }, { "epoch": 0.3585487428856958, "grad_norm": 2.414837275298967, "learning_rate": 3.3761385503527905e-06, "loss": 0.683, "step": 29420 }, { "epoch": 0.35860967910984365, "grad_norm": 2.5512516208354317, "learning_rate": 3.3758178319435535e-06, "loss": 0.7367, "step": 29425 }, { "epoch": 0.35867061533399147, "grad_norm": 2.3398351937198845, "learning_rate": 3.3754971135343174e-06, "loss": 0.7314, "step": 29430 }, { "epoch": 0.35873155155813924, "grad_norm": 2.5446094056858803, "learning_rate": 3.3751763951250804e-06, "loss": 0.779, "step": 29435 }, { "epoch": 0.35879248778228706, "grad_norm": 2.8518887710691483, "learning_rate": 3.3748556767158434e-06, "loss": 0.7485, "step": 29440 }, { "epoch": 0.3588534240064349, "grad_norm": 4.362092862766266, "learning_rate": 3.3745349583066073e-06, "loss": 0.693, "step": 29445 }, { "epoch": 0.35891436023058265, "grad_norm": 2.4355649064051255, "learning_rate": 3.3742142398973703e-06, "loss": 0.7759, "step": 29450 }, { "epoch": 0.3589752964547305, "grad_norm": 3.1128281151145387, "learning_rate": 3.3738935214881333e-06, "loss": 0.7684, "step": 29455 }, { "epoch": 0.3590362326788783, "grad_norm": 2.3001780550247797, "learning_rate": 3.373572803078897e-06, "loss": 0.7672, "step": 29460 }, { "epoch": 0.3590971689030261, "grad_norm": 2.6213788210438693, "learning_rate": 3.3732520846696602e-06, "loss": 0.7178, "step": 29465 }, { "epoch": 0.3591581051271739, "grad_norm": 2.8987702681542453, "learning_rate": 3.3729313662604237e-06, "loss": 0.8042, "step": 29470 }, { "epoch": 0.3592190413513217, "grad_norm": 2.9950311040165465, "learning_rate": 3.372610647851187e-06, "loss": 0.8166, "step": 29475 }, { "epoch": 0.35927997757546953, "grad_norm": 2.365413177694568, "learning_rate": 3.37228992944195e-06, "loss": 0.7411, "step": 29480 }, { "epoch": 0.3593409137996173, "grad_norm": 3.1937024922548405, "learning_rate": 3.3719692110327136e-06, "loss": 0.6874, "step": 29485 }, { "epoch": 0.3594018500237651, "grad_norm": 2.5590237810058154, "learning_rate": 3.3716484926234766e-06, "loss": 0.7897, "step": 29490 }, { "epoch": 0.35946278624791295, "grad_norm": 2.3008044210061422, "learning_rate": 3.3713277742142404e-06, "loss": 0.7605, "step": 29495 }, { "epoch": 0.3595237224720607, "grad_norm": 2.2352509347769054, "learning_rate": 3.3710070558050035e-06, "loss": 0.7283, "step": 29500 }, { "epoch": 0.35958465869620854, "grad_norm": 2.4155918033931583, "learning_rate": 3.3706863373957665e-06, "loss": 0.7398, "step": 29505 }, { "epoch": 0.35964559492035636, "grad_norm": 2.341475356594908, "learning_rate": 3.3703656189865303e-06, "loss": 0.8278, "step": 29510 }, { "epoch": 0.3597065311445042, "grad_norm": 3.1768785315513326, "learning_rate": 3.3700449005772934e-06, "loss": 0.7727, "step": 29515 }, { "epoch": 0.35976746736865195, "grad_norm": 2.4694531055745808, "learning_rate": 3.3697241821680564e-06, "loss": 0.741, "step": 29520 }, { "epoch": 0.3598284035927998, "grad_norm": 2.337130760353884, "learning_rate": 3.3694034637588202e-06, "loss": 0.7657, "step": 29525 }, { "epoch": 0.3598893398169476, "grad_norm": 2.342479059546706, "learning_rate": 3.3690827453495833e-06, "loss": 0.7754, "step": 29530 }, { "epoch": 0.35995027604109536, "grad_norm": 2.066231320839347, "learning_rate": 3.3687620269403467e-06, "loss": 0.8134, "step": 29535 }, { "epoch": 0.3600112122652432, "grad_norm": 3.276263093793552, "learning_rate": 3.36844130853111e-06, "loss": 0.7743, "step": 29540 }, { "epoch": 0.360072148489391, "grad_norm": 2.7406737758742006, "learning_rate": 3.368120590121873e-06, "loss": 0.7477, "step": 29545 }, { "epoch": 0.36013308471353883, "grad_norm": 2.3058628756061865, "learning_rate": 3.3677998717126366e-06, "loss": 0.7717, "step": 29550 }, { "epoch": 0.3601940209376866, "grad_norm": 2.3323650220500034, "learning_rate": 3.3674791533034e-06, "loss": 0.7602, "step": 29555 }, { "epoch": 0.3602549571618344, "grad_norm": 6.050560688219144, "learning_rate": 3.3671584348941635e-06, "loss": 0.7677, "step": 29560 }, { "epoch": 0.36031589338598224, "grad_norm": 2.670517236569846, "learning_rate": 3.3668377164849265e-06, "loss": 0.7789, "step": 29565 }, { "epoch": 0.36037682961013, "grad_norm": 2.0709525042331074, "learning_rate": 3.3665169980756895e-06, "loss": 0.7004, "step": 29570 }, { "epoch": 0.36043776583427783, "grad_norm": 2.732350916989511, "learning_rate": 3.3661962796664534e-06, "loss": 0.7195, "step": 29575 }, { "epoch": 0.36049870205842566, "grad_norm": 2.385881000983216, "learning_rate": 3.3658755612572164e-06, "loss": 0.7742, "step": 29580 }, { "epoch": 0.3605596382825735, "grad_norm": 2.0478293829889798, "learning_rate": 3.3655548428479794e-06, "loss": 0.7143, "step": 29585 }, { "epoch": 0.36062057450672125, "grad_norm": 2.3069281875752576, "learning_rate": 3.3652341244387433e-06, "loss": 0.7399, "step": 29590 }, { "epoch": 0.36068151073086907, "grad_norm": 2.7150846453680204, "learning_rate": 3.3649134060295063e-06, "loss": 0.7412, "step": 29595 }, { "epoch": 0.3607424469550169, "grad_norm": 2.2668945371698883, "learning_rate": 3.3645926876202693e-06, "loss": 0.8122, "step": 29600 }, { "epoch": 0.36080338317916466, "grad_norm": 2.401923264421996, "learning_rate": 3.364271969211033e-06, "loss": 0.8262, "step": 29605 }, { "epoch": 0.3608643194033125, "grad_norm": 2.965857495434102, "learning_rate": 3.363951250801796e-06, "loss": 0.8149, "step": 29610 }, { "epoch": 0.3609252556274603, "grad_norm": 2.5824820828302104, "learning_rate": 3.3636305323925596e-06, "loss": 0.7996, "step": 29615 }, { "epoch": 0.36098619185160813, "grad_norm": 2.088869962337209, "learning_rate": 3.363309813983323e-06, "loss": 0.7783, "step": 29620 }, { "epoch": 0.3610471280757559, "grad_norm": 2.7866257595791346, "learning_rate": 3.362989095574086e-06, "loss": 0.7675, "step": 29625 }, { "epoch": 0.3611080642999037, "grad_norm": 2.4369572179478998, "learning_rate": 3.3626683771648495e-06, "loss": 0.7538, "step": 29630 }, { "epoch": 0.36116900052405154, "grad_norm": 2.324591463543903, "learning_rate": 3.362347658755613e-06, "loss": 0.8236, "step": 29635 }, { "epoch": 0.3612299367481993, "grad_norm": 2.2637266565847884, "learning_rate": 3.3620269403463764e-06, "loss": 0.7102, "step": 29640 }, { "epoch": 0.36129087297234713, "grad_norm": 2.417477938585827, "learning_rate": 3.3617062219371394e-06, "loss": 0.7253, "step": 29645 }, { "epoch": 0.36135180919649496, "grad_norm": 2.941506603604249, "learning_rate": 3.361385503527903e-06, "loss": 0.7558, "step": 29650 }, { "epoch": 0.3614127454206428, "grad_norm": 2.4175243260281505, "learning_rate": 3.3610647851186663e-06, "loss": 0.7519, "step": 29655 }, { "epoch": 0.36147368164479055, "grad_norm": 2.751558326462856, "learning_rate": 3.3607440667094293e-06, "loss": 0.7884, "step": 29660 }, { "epoch": 0.36153461786893837, "grad_norm": 2.2834644574465157, "learning_rate": 3.3604233483001923e-06, "loss": 0.7512, "step": 29665 }, { "epoch": 0.3615955540930862, "grad_norm": 2.543719107762324, "learning_rate": 3.360102629890956e-06, "loss": 0.8215, "step": 29670 }, { "epoch": 0.36165649031723396, "grad_norm": 2.4665635825479195, "learning_rate": 3.359781911481719e-06, "loss": 0.7202, "step": 29675 }, { "epoch": 0.3617174265413818, "grad_norm": 2.192555273150473, "learning_rate": 3.3594611930724822e-06, "loss": 0.7898, "step": 29680 }, { "epoch": 0.3617783627655296, "grad_norm": 2.325046243800181, "learning_rate": 3.359140474663246e-06, "loss": 0.7427, "step": 29685 }, { "epoch": 0.3618392989896774, "grad_norm": 2.059945636825326, "learning_rate": 3.358819756254009e-06, "loss": 0.7274, "step": 29690 }, { "epoch": 0.3619002352138252, "grad_norm": 2.2758724867537787, "learning_rate": 3.3584990378447725e-06, "loss": 0.7985, "step": 29695 }, { "epoch": 0.361961171437973, "grad_norm": 2.5830018723149832, "learning_rate": 3.358178319435536e-06, "loss": 0.8332, "step": 29700 }, { "epoch": 0.36202210766212084, "grad_norm": 2.7873958187018664, "learning_rate": 3.357857601026299e-06, "loss": 0.7794, "step": 29705 }, { "epoch": 0.3620830438862686, "grad_norm": 2.264313775561147, "learning_rate": 3.3575368826170624e-06, "loss": 0.7533, "step": 29710 }, { "epoch": 0.36214398011041643, "grad_norm": 2.750950785709325, "learning_rate": 3.357216164207826e-06, "loss": 0.8652, "step": 29715 }, { "epoch": 0.36220491633456425, "grad_norm": 3.1746352260702513, "learning_rate": 3.3568954457985893e-06, "loss": 0.7688, "step": 29720 }, { "epoch": 0.3622658525587121, "grad_norm": 2.8640550914364358, "learning_rate": 3.3565747273893523e-06, "loss": 0.82, "step": 29725 }, { "epoch": 0.36232678878285984, "grad_norm": 3.2759833259821893, "learning_rate": 3.356254008980116e-06, "loss": 0.6993, "step": 29730 }, { "epoch": 0.36238772500700767, "grad_norm": 3.1796674489271615, "learning_rate": 3.3559332905708792e-06, "loss": 0.8229, "step": 29735 }, { "epoch": 0.3624486612311555, "grad_norm": 2.617020549236893, "learning_rate": 3.3556125721616422e-06, "loss": 0.7617, "step": 29740 }, { "epoch": 0.36250959745530326, "grad_norm": 2.5695775521805606, "learning_rate": 3.3552918537524053e-06, "loss": 0.8439, "step": 29745 }, { "epoch": 0.3625705336794511, "grad_norm": 2.0938039420815002, "learning_rate": 3.354971135343169e-06, "loss": 0.764, "step": 29750 }, { "epoch": 0.3626314699035989, "grad_norm": 2.1939656452139555, "learning_rate": 3.354650416933932e-06, "loss": 0.7397, "step": 29755 }, { "epoch": 0.3626924061277467, "grad_norm": 2.6279985254251312, "learning_rate": 3.3543296985246956e-06, "loss": 0.7142, "step": 29760 }, { "epoch": 0.3627533423518945, "grad_norm": 2.806919185453169, "learning_rate": 3.354008980115459e-06, "loss": 0.6999, "step": 29765 }, { "epoch": 0.3628142785760423, "grad_norm": 2.522443014010136, "learning_rate": 3.353688261706222e-06, "loss": 0.7648, "step": 29770 }, { "epoch": 0.36287521480019014, "grad_norm": 2.6152269193341713, "learning_rate": 3.3533675432969855e-06, "loss": 0.7408, "step": 29775 }, { "epoch": 0.3629361510243379, "grad_norm": 2.4862914137573022, "learning_rate": 3.353046824887749e-06, "loss": 0.7948, "step": 29780 }, { "epoch": 0.36299708724848573, "grad_norm": 2.5567718340082135, "learning_rate": 3.3527261064785124e-06, "loss": 0.7881, "step": 29785 }, { "epoch": 0.36305802347263355, "grad_norm": 2.262323493029723, "learning_rate": 3.3524053880692754e-06, "loss": 0.7711, "step": 29790 }, { "epoch": 0.3631189596967814, "grad_norm": 2.803448270697932, "learning_rate": 3.352084669660039e-06, "loss": 0.815, "step": 29795 }, { "epoch": 0.36317989592092914, "grad_norm": 2.054864897740073, "learning_rate": 3.3517639512508023e-06, "loss": 0.7919, "step": 29800 }, { "epoch": 0.36324083214507696, "grad_norm": 2.461302052043859, "learning_rate": 3.3514432328415653e-06, "loss": 0.7502, "step": 29805 }, { "epoch": 0.3633017683692248, "grad_norm": 2.5646190299056117, "learning_rate": 3.351122514432329e-06, "loss": 0.749, "step": 29810 }, { "epoch": 0.36336270459337255, "grad_norm": 2.300799667398818, "learning_rate": 3.350801796023092e-06, "loss": 0.7822, "step": 29815 }, { "epoch": 0.3634236408175204, "grad_norm": 2.21396464671332, "learning_rate": 3.350481077613855e-06, "loss": 0.7285, "step": 29820 }, { "epoch": 0.3634845770416682, "grad_norm": 2.971145251845708, "learning_rate": 3.350160359204618e-06, "loss": 0.759, "step": 29825 }, { "epoch": 0.363545513265816, "grad_norm": 1.9430409618129267, "learning_rate": 3.349839640795382e-06, "loss": 0.8191, "step": 29830 }, { "epoch": 0.3636064494899638, "grad_norm": 2.4473479390185133, "learning_rate": 3.349518922386145e-06, "loss": 0.7569, "step": 29835 }, { "epoch": 0.3636673857141116, "grad_norm": 15.796767230412065, "learning_rate": 3.3491982039769085e-06, "loss": 0.7235, "step": 29840 }, { "epoch": 0.36372832193825944, "grad_norm": 2.4639423760278905, "learning_rate": 3.348877485567672e-06, "loss": 0.8078, "step": 29845 }, { "epoch": 0.3637892581624072, "grad_norm": 2.3834451028094072, "learning_rate": 3.348556767158435e-06, "loss": 0.7475, "step": 29850 }, { "epoch": 0.363850194386555, "grad_norm": 2.867485153590119, "learning_rate": 3.3482360487491984e-06, "loss": 0.8376, "step": 29855 }, { "epoch": 0.36391113061070285, "grad_norm": 3.1640124688560065, "learning_rate": 3.347915330339962e-06, "loss": 0.7836, "step": 29860 }, { "epoch": 0.36397206683485067, "grad_norm": 2.5504757493635815, "learning_rate": 3.3475946119307253e-06, "loss": 0.7747, "step": 29865 }, { "epoch": 0.36403300305899844, "grad_norm": 2.2009063754860496, "learning_rate": 3.3472738935214883e-06, "loss": 0.6841, "step": 29870 }, { "epoch": 0.36409393928314626, "grad_norm": 2.444861214032639, "learning_rate": 3.3469531751122517e-06, "loss": 0.8126, "step": 29875 }, { "epoch": 0.3641548755072941, "grad_norm": 2.1208271086477732, "learning_rate": 3.346632456703015e-06, "loss": 0.7774, "step": 29880 }, { "epoch": 0.36421581173144185, "grad_norm": 2.4745536312431655, "learning_rate": 3.346311738293778e-06, "loss": 0.714, "step": 29885 }, { "epoch": 0.3642767479555897, "grad_norm": 3.035039874991335, "learning_rate": 3.345991019884542e-06, "loss": 0.8312, "step": 29890 }, { "epoch": 0.3643376841797375, "grad_norm": 2.5959640655134275, "learning_rate": 3.345670301475305e-06, "loss": 0.715, "step": 29895 }, { "epoch": 0.3643986204038853, "grad_norm": 2.6156945280433277, "learning_rate": 3.345349583066068e-06, "loss": 0.7622, "step": 29900 }, { "epoch": 0.3644595566280331, "grad_norm": 2.7572278908663015, "learning_rate": 3.345028864656831e-06, "loss": 0.7297, "step": 29905 }, { "epoch": 0.3645204928521809, "grad_norm": 4.6103096419827185, "learning_rate": 3.344708146247595e-06, "loss": 0.7739, "step": 29910 }, { "epoch": 0.36458142907632873, "grad_norm": 2.53034084535762, "learning_rate": 3.344387427838358e-06, "loss": 0.7897, "step": 29915 }, { "epoch": 0.3646423653004765, "grad_norm": 3.7820438209602605, "learning_rate": 3.3440667094291214e-06, "loss": 0.7389, "step": 29920 }, { "epoch": 0.3647033015246243, "grad_norm": 2.415857864488356, "learning_rate": 3.343745991019885e-06, "loss": 0.7692, "step": 29925 }, { "epoch": 0.36476423774877215, "grad_norm": 2.3681528234414784, "learning_rate": 3.343425272610648e-06, "loss": 0.7971, "step": 29930 }, { "epoch": 0.36482517397291997, "grad_norm": 2.873056613242288, "learning_rate": 3.3431045542014113e-06, "loss": 0.7713, "step": 29935 }, { "epoch": 0.36488611019706774, "grad_norm": 2.78725219832386, "learning_rate": 3.3427838357921748e-06, "loss": 0.7473, "step": 29940 }, { "epoch": 0.36494704642121556, "grad_norm": 3.173558412699593, "learning_rate": 3.342463117382938e-06, "loss": 0.6994, "step": 29945 }, { "epoch": 0.3650079826453634, "grad_norm": 2.396338392077598, "learning_rate": 3.3421423989737012e-06, "loss": 0.7697, "step": 29950 }, { "epoch": 0.36506891886951115, "grad_norm": 3.213514138234425, "learning_rate": 3.341821680564465e-06, "loss": 0.8813, "step": 29955 }, { "epoch": 0.365129855093659, "grad_norm": 2.090956567747161, "learning_rate": 3.341500962155228e-06, "loss": 0.723, "step": 29960 }, { "epoch": 0.3651907913178068, "grad_norm": 3.9578124603969393, "learning_rate": 3.341180243745991e-06, "loss": 0.7414, "step": 29965 }, { "epoch": 0.36525172754195456, "grad_norm": 2.3648766448235436, "learning_rate": 3.340859525336755e-06, "loss": 0.8127, "step": 29970 }, { "epoch": 0.3653126637661024, "grad_norm": 2.421506210939002, "learning_rate": 3.340538806927518e-06, "loss": 0.7723, "step": 29975 }, { "epoch": 0.3653735999902502, "grad_norm": 2.3544695098712682, "learning_rate": 3.340218088518281e-06, "loss": 0.7813, "step": 29980 }, { "epoch": 0.36543453621439803, "grad_norm": 2.4245101550630634, "learning_rate": 3.339897370109045e-06, "loss": 0.7851, "step": 29985 }, { "epoch": 0.3654954724385458, "grad_norm": 2.630911776654614, "learning_rate": 3.339576651699808e-06, "loss": 0.7999, "step": 29990 }, { "epoch": 0.3655564086626936, "grad_norm": 2.854778913719956, "learning_rate": 3.339255933290571e-06, "loss": 0.7914, "step": 29995 }, { "epoch": 0.36561734488684144, "grad_norm": 3.0697360233718918, "learning_rate": 3.3389352148813344e-06, "loss": 0.7517, "step": 30000 }, { "epoch": 0.3656782811109892, "grad_norm": 4.166817444674775, "learning_rate": 3.338614496472098e-06, "loss": 0.752, "step": 30005 }, { "epoch": 0.36573921733513703, "grad_norm": 2.0974416578415904, "learning_rate": 3.3382937780628612e-06, "loss": 0.8132, "step": 30010 }, { "epoch": 0.36580015355928486, "grad_norm": 2.398578593263586, "learning_rate": 3.3379730596536243e-06, "loss": 0.7141, "step": 30015 }, { "epoch": 0.3658610897834327, "grad_norm": 2.7586012423671824, "learning_rate": 3.3376523412443877e-06, "loss": 0.7756, "step": 30020 }, { "epoch": 0.36592202600758045, "grad_norm": 2.5129960381180156, "learning_rate": 3.337331622835151e-06, "loss": 0.7878, "step": 30025 }, { "epoch": 0.36598296223172827, "grad_norm": 2.3153094717920086, "learning_rate": 3.337010904425914e-06, "loss": 0.8088, "step": 30030 }, { "epoch": 0.3660438984558761, "grad_norm": 1.9595895947496416, "learning_rate": 3.336690186016678e-06, "loss": 0.749, "step": 30035 }, { "epoch": 0.36610483468002386, "grad_norm": 2.662542084806304, "learning_rate": 3.336369467607441e-06, "loss": 0.8017, "step": 30040 }, { "epoch": 0.3661657709041717, "grad_norm": 2.115202061526038, "learning_rate": 3.336048749198204e-06, "loss": 0.7375, "step": 30045 }, { "epoch": 0.3662267071283195, "grad_norm": 3.17162590606721, "learning_rate": 3.335728030788968e-06, "loss": 0.7475, "step": 30050 }, { "epoch": 0.36628764335246733, "grad_norm": 2.4476807762197246, "learning_rate": 3.335407312379731e-06, "loss": 0.7002, "step": 30055 }, { "epoch": 0.3663485795766151, "grad_norm": 2.3651787659286403, "learning_rate": 3.335086593970494e-06, "loss": 0.7649, "step": 30060 }, { "epoch": 0.3664095158007629, "grad_norm": 2.5960681655147817, "learning_rate": 3.334765875561258e-06, "loss": 0.7161, "step": 30065 }, { "epoch": 0.36647045202491074, "grad_norm": 2.4479020867384715, "learning_rate": 3.334445157152021e-06, "loss": 0.7719, "step": 30070 }, { "epoch": 0.3665313882490585, "grad_norm": 2.1738070023811145, "learning_rate": 3.334124438742784e-06, "loss": 0.6941, "step": 30075 }, { "epoch": 0.36659232447320633, "grad_norm": 2.8032999625553097, "learning_rate": 3.3338037203335473e-06, "loss": 0.7897, "step": 30080 }, { "epoch": 0.36665326069735416, "grad_norm": 2.3377947976648272, "learning_rate": 3.3334830019243107e-06, "loss": 0.8444, "step": 30085 }, { "epoch": 0.366714196921502, "grad_norm": 2.393727217676986, "learning_rate": 3.333162283515074e-06, "loss": 0.802, "step": 30090 }, { "epoch": 0.36677513314564975, "grad_norm": 2.0838821225298445, "learning_rate": 3.332841565105837e-06, "loss": 0.7658, "step": 30095 }, { "epoch": 0.36683606936979757, "grad_norm": 2.8063047534015535, "learning_rate": 3.3325208466966006e-06, "loss": 0.656, "step": 30100 }, { "epoch": 0.3668970055939454, "grad_norm": 4.029395600515814, "learning_rate": 3.332200128287364e-06, "loss": 0.803, "step": 30105 }, { "epoch": 0.36695794181809316, "grad_norm": 2.41316366938927, "learning_rate": 3.331879409878127e-06, "loss": 0.7707, "step": 30110 }, { "epoch": 0.367018878042241, "grad_norm": 2.553383599907179, "learning_rate": 3.331558691468891e-06, "loss": 0.772, "step": 30115 }, { "epoch": 0.3670798142663888, "grad_norm": 2.7316243976308825, "learning_rate": 3.331237973059654e-06, "loss": 0.7999, "step": 30120 }, { "epoch": 0.3671407504905366, "grad_norm": 2.6434220396488572, "learning_rate": 3.330917254650417e-06, "loss": 0.6981, "step": 30125 }, { "epoch": 0.3672016867146844, "grad_norm": 2.2460791673809424, "learning_rate": 3.330596536241181e-06, "loss": 0.7553, "step": 30130 }, { "epoch": 0.3672626229388322, "grad_norm": 4.0217929995876665, "learning_rate": 3.330275817831944e-06, "loss": 0.7761, "step": 30135 }, { "epoch": 0.36732355916298004, "grad_norm": 2.402000181816361, "learning_rate": 3.329955099422707e-06, "loss": 0.8318, "step": 30140 }, { "epoch": 0.3673844953871278, "grad_norm": 2.585254333724968, "learning_rate": 3.3296343810134707e-06, "loss": 0.74, "step": 30145 }, { "epoch": 0.36744543161127563, "grad_norm": 2.1873378005880237, "learning_rate": 3.3293136626042338e-06, "loss": 0.7787, "step": 30150 }, { "epoch": 0.36750636783542345, "grad_norm": 3.136217866910577, "learning_rate": 3.3289929441949968e-06, "loss": 0.8078, "step": 30155 }, { "epoch": 0.3675673040595713, "grad_norm": 2.572143302761664, "learning_rate": 3.32867222578576e-06, "loss": 0.7938, "step": 30160 }, { "epoch": 0.36762824028371904, "grad_norm": 2.085162960529014, "learning_rate": 3.3283515073765237e-06, "loss": 0.7552, "step": 30165 }, { "epoch": 0.36768917650786687, "grad_norm": 2.4682137089118683, "learning_rate": 3.328030788967287e-06, "loss": 0.7795, "step": 30170 }, { "epoch": 0.3677501127320147, "grad_norm": 2.3321104637632604, "learning_rate": 3.32771007055805e-06, "loss": 0.7521, "step": 30175 }, { "epoch": 0.36781104895616246, "grad_norm": 1.90518944139901, "learning_rate": 3.3273893521488135e-06, "loss": 0.726, "step": 30180 }, { "epoch": 0.3678719851803103, "grad_norm": 2.2669918012758674, "learning_rate": 3.327068633739577e-06, "loss": 0.7896, "step": 30185 }, { "epoch": 0.3679329214044581, "grad_norm": 3.0329328298915996, "learning_rate": 3.32674791533034e-06, "loss": 0.7848, "step": 30190 }, { "epoch": 0.3679938576286059, "grad_norm": 2.1778766338763296, "learning_rate": 3.326427196921104e-06, "loss": 0.7784, "step": 30195 }, { "epoch": 0.3680547938527537, "grad_norm": 2.415292735772934, "learning_rate": 3.326106478511867e-06, "loss": 0.7614, "step": 30200 }, { "epoch": 0.3681157300769015, "grad_norm": 2.219007504821751, "learning_rate": 3.32578576010263e-06, "loss": 0.6959, "step": 30205 }, { "epoch": 0.36817666630104934, "grad_norm": 2.325839623913732, "learning_rate": 3.3254650416933938e-06, "loss": 0.7607, "step": 30210 }, { "epoch": 0.3682376025251971, "grad_norm": 3.19832593545954, "learning_rate": 3.3251443232841568e-06, "loss": 0.6435, "step": 30215 }, { "epoch": 0.36829853874934493, "grad_norm": 3.3305745475136947, "learning_rate": 3.32482360487492e-06, "loss": 0.8081, "step": 30220 }, { "epoch": 0.36835947497349275, "grad_norm": 2.203670421214922, "learning_rate": 3.3245028864656837e-06, "loss": 0.7591, "step": 30225 }, { "epoch": 0.3684204111976406, "grad_norm": 2.7612974798175465, "learning_rate": 3.3241821680564467e-06, "loss": 0.7946, "step": 30230 }, { "epoch": 0.36848134742178834, "grad_norm": 2.7834860545823448, "learning_rate": 3.32386144964721e-06, "loss": 0.812, "step": 30235 }, { "epoch": 0.36854228364593616, "grad_norm": 2.7640572187159327, "learning_rate": 3.3235407312379736e-06, "loss": 0.722, "step": 30240 }, { "epoch": 0.368603219870084, "grad_norm": 2.605943905277189, "learning_rate": 3.3232200128287366e-06, "loss": 0.7684, "step": 30245 }, { "epoch": 0.36866415609423175, "grad_norm": 2.39365046622919, "learning_rate": 3.3228992944195e-06, "loss": 0.8264, "step": 30250 }, { "epoch": 0.3687250923183796, "grad_norm": 2.8736695848373475, "learning_rate": 3.322578576010263e-06, "loss": 0.7734, "step": 30255 }, { "epoch": 0.3687860285425274, "grad_norm": 2.8193198166249664, "learning_rate": 3.322257857601027e-06, "loss": 0.7888, "step": 30260 }, { "epoch": 0.3688469647666752, "grad_norm": 2.1927866445332587, "learning_rate": 3.32193713919179e-06, "loss": 0.7308, "step": 30265 }, { "epoch": 0.368907900990823, "grad_norm": 2.585320962690285, "learning_rate": 3.321616420782553e-06, "loss": 0.7969, "step": 30270 }, { "epoch": 0.3689688372149708, "grad_norm": 2.4761582974299623, "learning_rate": 3.321295702373317e-06, "loss": 0.6666, "step": 30275 }, { "epoch": 0.36902977343911864, "grad_norm": 2.505685339814227, "learning_rate": 3.32097498396408e-06, "loss": 0.7858, "step": 30280 }, { "epoch": 0.3690907096632664, "grad_norm": 2.9460751902920452, "learning_rate": 3.320654265554843e-06, "loss": 0.8289, "step": 30285 }, { "epoch": 0.3691516458874142, "grad_norm": 2.5716988964489684, "learning_rate": 3.3203335471456067e-06, "loss": 0.743, "step": 30290 }, { "epoch": 0.36921258211156205, "grad_norm": 2.071357666972847, "learning_rate": 3.3200128287363697e-06, "loss": 0.7357, "step": 30295 }, { "epoch": 0.36927351833570987, "grad_norm": 2.8789772707128782, "learning_rate": 3.3196921103271327e-06, "loss": 0.8274, "step": 30300 }, { "epoch": 0.36933445455985764, "grad_norm": 2.3133548704536913, "learning_rate": 3.3193713919178966e-06, "loss": 0.8273, "step": 30305 }, { "epoch": 0.36939539078400546, "grad_norm": 2.282562978894527, "learning_rate": 3.3190506735086596e-06, "loss": 0.7552, "step": 30310 }, { "epoch": 0.3694563270081533, "grad_norm": 3.0089937183878956, "learning_rate": 3.318729955099423e-06, "loss": 0.8163, "step": 30315 }, { "epoch": 0.36951726323230105, "grad_norm": 2.7519081487710766, "learning_rate": 3.3184092366901865e-06, "loss": 0.7817, "step": 30320 }, { "epoch": 0.3695781994564489, "grad_norm": 2.536991116428997, "learning_rate": 3.3180885182809495e-06, "loss": 0.7557, "step": 30325 }, { "epoch": 0.3696391356805967, "grad_norm": 2.956195485281615, "learning_rate": 3.317767799871713e-06, "loss": 0.8353, "step": 30330 }, { "epoch": 0.3697000719047445, "grad_norm": 2.5845739632015414, "learning_rate": 3.317447081462476e-06, "loss": 0.8452, "step": 30335 }, { "epoch": 0.3697610081288923, "grad_norm": 2.390534633887718, "learning_rate": 3.31712636305324e-06, "loss": 0.7512, "step": 30340 }, { "epoch": 0.3698219443530401, "grad_norm": 2.5080583857099388, "learning_rate": 3.316805644644003e-06, "loss": 0.7934, "step": 30345 }, { "epoch": 0.36988288057718793, "grad_norm": 2.2197297960427553, "learning_rate": 3.316484926234766e-06, "loss": 0.7769, "step": 30350 }, { "epoch": 0.3699438168013357, "grad_norm": 2.3131497687325426, "learning_rate": 3.3161642078255297e-06, "loss": 0.8338, "step": 30355 }, { "epoch": 0.3700047530254835, "grad_norm": 4.108086821191334, "learning_rate": 3.3158434894162927e-06, "loss": 0.7317, "step": 30360 }, { "epoch": 0.37006568924963135, "grad_norm": 2.2767807830453646, "learning_rate": 3.3155227710070558e-06, "loss": 0.7718, "step": 30365 }, { "epoch": 0.37012662547377917, "grad_norm": 2.132380994335286, "learning_rate": 3.3152020525978196e-06, "loss": 0.8097, "step": 30370 }, { "epoch": 0.37018756169792694, "grad_norm": 2.585477825490081, "learning_rate": 3.3148813341885826e-06, "loss": 0.706, "step": 30375 }, { "epoch": 0.37024849792207476, "grad_norm": 3.349215257284361, "learning_rate": 3.3145606157793457e-06, "loss": 0.7491, "step": 30380 }, { "epoch": 0.3703094341462226, "grad_norm": 2.58098487411589, "learning_rate": 3.3142398973701095e-06, "loss": 0.7342, "step": 30385 }, { "epoch": 0.37037037037037035, "grad_norm": 2.226604206342037, "learning_rate": 3.3139191789608725e-06, "loss": 0.783, "step": 30390 }, { "epoch": 0.3704313065945182, "grad_norm": 2.3834287518658113, "learning_rate": 3.313598460551636e-06, "loss": 0.8204, "step": 30395 }, { "epoch": 0.370492242818666, "grad_norm": 3.2465584326994494, "learning_rate": 3.3132777421423994e-06, "loss": 0.8945, "step": 30400 }, { "epoch": 0.3705531790428138, "grad_norm": 2.230219882071087, "learning_rate": 3.3129570237331624e-06, "loss": 0.7093, "step": 30405 }, { "epoch": 0.3706141152669616, "grad_norm": 2.8381511000848287, "learning_rate": 3.312636305323926e-06, "loss": 0.8618, "step": 30410 }, { "epoch": 0.3706750514911094, "grad_norm": 2.1469770757503333, "learning_rate": 3.312315586914689e-06, "loss": 0.7584, "step": 30415 }, { "epoch": 0.37073598771525723, "grad_norm": 1.99138844067495, "learning_rate": 3.3119948685054528e-06, "loss": 0.7587, "step": 30420 }, { "epoch": 0.370796923939405, "grad_norm": 2.4813129363816864, "learning_rate": 3.3116741500962158e-06, "loss": 0.7595, "step": 30425 }, { "epoch": 0.3708578601635528, "grad_norm": 2.8096653887189293, "learning_rate": 3.3113534316869788e-06, "loss": 0.8382, "step": 30430 }, { "epoch": 0.37091879638770064, "grad_norm": 2.559693921285514, "learning_rate": 3.3110327132777427e-06, "loss": 0.8393, "step": 30435 }, { "epoch": 0.3709797326118484, "grad_norm": 2.835928246160415, "learning_rate": 3.3107119948685057e-06, "loss": 0.6921, "step": 30440 }, { "epoch": 0.37104066883599623, "grad_norm": 2.6146098864368503, "learning_rate": 3.3103912764592687e-06, "loss": 0.7545, "step": 30445 }, { "epoch": 0.37110160506014406, "grad_norm": 3.7563631240585105, "learning_rate": 3.3100705580500325e-06, "loss": 0.8318, "step": 30450 }, { "epoch": 0.3711625412842919, "grad_norm": 3.2355706129637762, "learning_rate": 3.3097498396407956e-06, "loss": 0.7925, "step": 30455 }, { "epoch": 0.37122347750843965, "grad_norm": 2.3252916629869596, "learning_rate": 3.309429121231559e-06, "loss": 0.8037, "step": 30460 }, { "epoch": 0.37128441373258747, "grad_norm": 1.9493479427597054, "learning_rate": 3.3091084028223224e-06, "loss": 0.7077, "step": 30465 }, { "epoch": 0.3713453499567353, "grad_norm": 2.4523991046579776, "learning_rate": 3.3087876844130855e-06, "loss": 0.7492, "step": 30470 }, { "epoch": 0.37140628618088306, "grad_norm": 2.2444175958839754, "learning_rate": 3.308466966003849e-06, "loss": 0.7416, "step": 30475 }, { "epoch": 0.3714672224050309, "grad_norm": 2.19508054872902, "learning_rate": 3.3081462475946123e-06, "loss": 0.7652, "step": 30480 }, { "epoch": 0.3715281586291787, "grad_norm": 2.2426291124627613, "learning_rate": 3.3078255291853758e-06, "loss": 0.6607, "step": 30485 }, { "epoch": 0.37158909485332653, "grad_norm": 2.699763386594917, "learning_rate": 3.307504810776139e-06, "loss": 0.7416, "step": 30490 }, { "epoch": 0.3716500310774743, "grad_norm": 3.241119766425818, "learning_rate": 3.307184092366902e-06, "loss": 0.7836, "step": 30495 }, { "epoch": 0.3717109673016221, "grad_norm": 2.298406580324377, "learning_rate": 3.3068633739576657e-06, "loss": 0.7452, "step": 30500 }, { "epoch": 0.37177190352576994, "grad_norm": 2.860023107153853, "learning_rate": 3.3065426555484287e-06, "loss": 0.7824, "step": 30505 }, { "epoch": 0.3718328397499177, "grad_norm": 2.127715618945008, "learning_rate": 3.3062219371391917e-06, "loss": 0.8085, "step": 30510 }, { "epoch": 0.37189377597406553, "grad_norm": 2.359429099293068, "learning_rate": 3.3059012187299556e-06, "loss": 0.8142, "step": 30515 }, { "epoch": 0.37195471219821336, "grad_norm": 2.263022669322393, "learning_rate": 3.3055805003207186e-06, "loss": 0.7938, "step": 30520 }, { "epoch": 0.3720156484223612, "grad_norm": 2.459589928588769, "learning_rate": 3.3052597819114816e-06, "loss": 0.7825, "step": 30525 }, { "epoch": 0.37207658464650895, "grad_norm": 2.116790727043609, "learning_rate": 3.3049390635022455e-06, "loss": 0.7595, "step": 30530 }, { "epoch": 0.37213752087065677, "grad_norm": 2.1286533660693987, "learning_rate": 3.3046183450930085e-06, "loss": 0.729, "step": 30535 }, { "epoch": 0.3721984570948046, "grad_norm": 2.605081914694288, "learning_rate": 3.304297626683772e-06, "loss": 0.7642, "step": 30540 }, { "epoch": 0.37225939331895236, "grad_norm": 2.2973238410540135, "learning_rate": 3.3039769082745354e-06, "loss": 0.7935, "step": 30545 }, { "epoch": 0.3723203295431002, "grad_norm": 2.9111140893074627, "learning_rate": 3.3036561898652984e-06, "loss": 0.8647, "step": 30550 }, { "epoch": 0.372381265767248, "grad_norm": 2.4323932830107884, "learning_rate": 3.303335471456062e-06, "loss": 0.7909, "step": 30555 }, { "epoch": 0.3724422019913958, "grad_norm": 2.261181416234402, "learning_rate": 3.3030147530468253e-06, "loss": 0.699, "step": 30560 }, { "epoch": 0.3725031382155436, "grad_norm": 2.251742662064844, "learning_rate": 3.3026940346375887e-06, "loss": 0.7995, "step": 30565 }, { "epoch": 0.3725640744396914, "grad_norm": 2.268504508861835, "learning_rate": 3.3023733162283517e-06, "loss": 0.7524, "step": 30570 }, { "epoch": 0.37262501066383924, "grad_norm": 2.8189427215491834, "learning_rate": 3.302052597819115e-06, "loss": 0.7657, "step": 30575 }, { "epoch": 0.372685946887987, "grad_norm": 2.329124026283583, "learning_rate": 3.3017318794098786e-06, "loss": 0.7726, "step": 30580 }, { "epoch": 0.37274688311213483, "grad_norm": 2.4158415323678817, "learning_rate": 3.3014111610006416e-06, "loss": 0.7568, "step": 30585 }, { "epoch": 0.37280781933628265, "grad_norm": 3.8696091184357666, "learning_rate": 3.3010904425914046e-06, "loss": 0.8043, "step": 30590 }, { "epoch": 0.3728687555604305, "grad_norm": 3.041504815848889, "learning_rate": 3.3007697241821685e-06, "loss": 0.7983, "step": 30595 }, { "epoch": 0.37292969178457824, "grad_norm": 2.9857059034811506, "learning_rate": 3.3004490057729315e-06, "loss": 0.801, "step": 30600 }, { "epoch": 0.37299062800872607, "grad_norm": 2.563838954558986, "learning_rate": 3.3001282873636945e-06, "loss": 0.7001, "step": 30605 }, { "epoch": 0.3730515642328739, "grad_norm": 2.7661585662153576, "learning_rate": 3.2998075689544584e-06, "loss": 0.7536, "step": 30610 }, { "epoch": 0.37311250045702166, "grad_norm": 2.792644010455039, "learning_rate": 3.2994868505452214e-06, "loss": 0.8339, "step": 30615 }, { "epoch": 0.3731734366811695, "grad_norm": 3.036557500504597, "learning_rate": 3.299166132135985e-06, "loss": 0.7504, "step": 30620 }, { "epoch": 0.3732343729053173, "grad_norm": 3.9824899447205393, "learning_rate": 3.2988454137267483e-06, "loss": 0.8023, "step": 30625 }, { "epoch": 0.3732953091294651, "grad_norm": 2.5571389513878517, "learning_rate": 3.2985246953175113e-06, "loss": 0.8274, "step": 30630 }, { "epoch": 0.3733562453536129, "grad_norm": 2.917119612032349, "learning_rate": 3.2982039769082748e-06, "loss": 0.8204, "step": 30635 }, { "epoch": 0.3734171815777607, "grad_norm": 2.420241126019212, "learning_rate": 3.297883258499038e-06, "loss": 0.8116, "step": 30640 }, { "epoch": 0.37347811780190854, "grad_norm": 2.9838322924665936, "learning_rate": 3.2975625400898016e-06, "loss": 0.8021, "step": 30645 }, { "epoch": 0.3735390540260563, "grad_norm": 2.754453794024094, "learning_rate": 3.2972418216805647e-06, "loss": 0.762, "step": 30650 }, { "epoch": 0.37359999025020413, "grad_norm": 2.4502388700594397, "learning_rate": 3.296921103271328e-06, "loss": 0.7372, "step": 30655 }, { "epoch": 0.37366092647435195, "grad_norm": 2.5081556012240336, "learning_rate": 3.2966003848620915e-06, "loss": 0.702, "step": 30660 }, { "epoch": 0.3737218626984998, "grad_norm": 3.6518274662739976, "learning_rate": 3.2962796664528546e-06, "loss": 0.8058, "step": 30665 }, { "epoch": 0.37378279892264754, "grad_norm": 2.2562842100407146, "learning_rate": 3.2959589480436176e-06, "loss": 0.7229, "step": 30670 }, { "epoch": 0.37384373514679536, "grad_norm": 2.115724791095339, "learning_rate": 3.2956382296343814e-06, "loss": 0.6884, "step": 30675 }, { "epoch": 0.3739046713709432, "grad_norm": 3.4281083857331285, "learning_rate": 3.2953175112251444e-06, "loss": 0.776, "step": 30680 }, { "epoch": 0.37396560759509095, "grad_norm": 2.196170778512231, "learning_rate": 3.2949967928159075e-06, "loss": 0.7503, "step": 30685 }, { "epoch": 0.3740265438192388, "grad_norm": 2.8311005534950375, "learning_rate": 3.2946760744066713e-06, "loss": 0.8421, "step": 30690 }, { "epoch": 0.3740874800433866, "grad_norm": 2.500301235894416, "learning_rate": 3.2943553559974343e-06, "loss": 0.8052, "step": 30695 }, { "epoch": 0.3741484162675344, "grad_norm": 2.2668211604386124, "learning_rate": 3.2940346375881978e-06, "loss": 0.7876, "step": 30700 }, { "epoch": 0.3742093524916822, "grad_norm": 3.5096225057586024, "learning_rate": 3.2937139191789612e-06, "loss": 0.8303, "step": 30705 }, { "epoch": 0.37427028871583, "grad_norm": 2.5987456392328907, "learning_rate": 3.2933932007697247e-06, "loss": 0.8016, "step": 30710 }, { "epoch": 0.37433122493997784, "grad_norm": 4.843013694008967, "learning_rate": 3.2930724823604877e-06, "loss": 0.8552, "step": 30715 }, { "epoch": 0.3743921611641256, "grad_norm": 2.5402499932607787, "learning_rate": 3.292751763951251e-06, "loss": 0.8, "step": 30720 }, { "epoch": 0.3744530973882734, "grad_norm": 3.3566000405587935, "learning_rate": 3.2924310455420146e-06, "loss": 0.7357, "step": 30725 }, { "epoch": 0.37451403361242125, "grad_norm": 2.6977590652903745, "learning_rate": 3.2921103271327776e-06, "loss": 0.7869, "step": 30730 }, { "epoch": 0.37457496983656907, "grad_norm": 2.3933593963906774, "learning_rate": 3.2917896087235414e-06, "loss": 0.797, "step": 30735 }, { "epoch": 0.37463590606071684, "grad_norm": 2.6956870295189836, "learning_rate": 3.2914688903143045e-06, "loss": 0.792, "step": 30740 }, { "epoch": 0.37469684228486466, "grad_norm": 2.74439612513496, "learning_rate": 3.2911481719050675e-06, "loss": 0.8137, "step": 30745 }, { "epoch": 0.3747577785090125, "grad_norm": 2.5344637266821404, "learning_rate": 3.2908274534958305e-06, "loss": 0.7584, "step": 30750 }, { "epoch": 0.37481871473316025, "grad_norm": 2.2783131011525546, "learning_rate": 3.2905067350865944e-06, "loss": 0.7813, "step": 30755 }, { "epoch": 0.3748796509573081, "grad_norm": 2.3236130265018127, "learning_rate": 3.2901860166773574e-06, "loss": 0.7239, "step": 30760 }, { "epoch": 0.3749405871814559, "grad_norm": 2.1634173560910606, "learning_rate": 3.289865298268121e-06, "loss": 0.705, "step": 30765 }, { "epoch": 0.3750015234056037, "grad_norm": 2.6170200888055613, "learning_rate": 3.2895445798588843e-06, "loss": 0.7931, "step": 30770 }, { "epoch": 0.3750624596297515, "grad_norm": 2.5771566118156137, "learning_rate": 3.2892238614496473e-06, "loss": 0.7412, "step": 30775 }, { "epoch": 0.3751233958538993, "grad_norm": 3.3115200581869115, "learning_rate": 3.2889031430404107e-06, "loss": 0.8932, "step": 30780 }, { "epoch": 0.37518433207804713, "grad_norm": 2.510725455655465, "learning_rate": 3.288582424631174e-06, "loss": 0.7456, "step": 30785 }, { "epoch": 0.3752452683021949, "grad_norm": 2.664835254989799, "learning_rate": 3.2882617062219376e-06, "loss": 0.8097, "step": 30790 }, { "epoch": 0.3753062045263427, "grad_norm": 2.427076092455129, "learning_rate": 3.2879409878127006e-06, "loss": 0.8036, "step": 30795 }, { "epoch": 0.37536714075049055, "grad_norm": 2.486934133471, "learning_rate": 3.287620269403464e-06, "loss": 0.7538, "step": 30800 }, { "epoch": 0.37542807697463837, "grad_norm": 2.992801034211368, "learning_rate": 3.2872995509942275e-06, "loss": 0.7012, "step": 30805 }, { "epoch": 0.37548901319878614, "grad_norm": 2.0595948515190807, "learning_rate": 3.2869788325849905e-06, "loss": 0.7567, "step": 30810 }, { "epoch": 0.37554994942293396, "grad_norm": 2.939525339749125, "learning_rate": 3.2866581141757544e-06, "loss": 0.926, "step": 30815 }, { "epoch": 0.3756108856470818, "grad_norm": 2.5151538912064155, "learning_rate": 3.2863373957665174e-06, "loss": 0.7571, "step": 30820 }, { "epoch": 0.37567182187122955, "grad_norm": 2.4403583757336693, "learning_rate": 3.2860166773572804e-06, "loss": 0.8133, "step": 30825 }, { "epoch": 0.3757327580953774, "grad_norm": 2.2833618396485975, "learning_rate": 3.2856959589480443e-06, "loss": 0.7068, "step": 30830 }, { "epoch": 0.3757936943195252, "grad_norm": 2.6705071508998968, "learning_rate": 3.2853752405388073e-06, "loss": 0.7755, "step": 30835 }, { "epoch": 0.375854630543673, "grad_norm": 2.032566772303043, "learning_rate": 3.2850545221295703e-06, "loss": 0.762, "step": 30840 }, { "epoch": 0.3759155667678208, "grad_norm": 2.367250919905698, "learning_rate": 3.2847338037203337e-06, "loss": 0.7615, "step": 30845 }, { "epoch": 0.3759765029919686, "grad_norm": 2.7174450597483513, "learning_rate": 3.284413085311097e-06, "loss": 0.7745, "step": 30850 }, { "epoch": 0.37603743921611643, "grad_norm": 3.5634966918117157, "learning_rate": 3.28409236690186e-06, "loss": 0.8171, "step": 30855 }, { "epoch": 0.3760983754402642, "grad_norm": 2.585536194888504, "learning_rate": 3.2837716484926236e-06, "loss": 0.7831, "step": 30860 }, { "epoch": 0.376159311664412, "grad_norm": 2.35886562850168, "learning_rate": 3.283450930083387e-06, "loss": 0.7254, "step": 30865 }, { "epoch": 0.37622024788855984, "grad_norm": 2.193997882893846, "learning_rate": 3.2831302116741505e-06, "loss": 0.7486, "step": 30870 }, { "epoch": 0.37628118411270767, "grad_norm": 2.1718662439346934, "learning_rate": 3.2828094932649135e-06, "loss": 0.8408, "step": 30875 }, { "epoch": 0.37634212033685543, "grad_norm": 2.227730284545443, "learning_rate": 3.282488774855677e-06, "loss": 0.7654, "step": 30880 }, { "epoch": 0.37640305656100326, "grad_norm": 2.916211032901541, "learning_rate": 3.2821680564464404e-06, "loss": 0.8082, "step": 30885 }, { "epoch": 0.3764639927851511, "grad_norm": 2.599064369698585, "learning_rate": 3.2818473380372034e-06, "loss": 0.7615, "step": 30890 }, { "epoch": 0.37652492900929885, "grad_norm": 3.923517069170699, "learning_rate": 3.2815266196279673e-06, "loss": 0.7523, "step": 30895 }, { "epoch": 0.37658586523344667, "grad_norm": 2.7256682342911467, "learning_rate": 3.2812059012187303e-06, "loss": 0.7314, "step": 30900 }, { "epoch": 0.3766468014575945, "grad_norm": 2.258453365621797, "learning_rate": 3.2808851828094933e-06, "loss": 0.7583, "step": 30905 }, { "epoch": 0.3767077376817423, "grad_norm": 3.427759598924784, "learning_rate": 3.280564464400257e-06, "loss": 0.8658, "step": 30910 }, { "epoch": 0.3767686739058901, "grad_norm": 2.295338164117499, "learning_rate": 3.2802437459910202e-06, "loss": 0.7597, "step": 30915 }, { "epoch": 0.3768296101300379, "grad_norm": 2.3730015519181396, "learning_rate": 3.2799230275817832e-06, "loss": 0.8124, "step": 30920 }, { "epoch": 0.37689054635418573, "grad_norm": 2.3777142144694485, "learning_rate": 3.2796023091725467e-06, "loss": 0.7496, "step": 30925 }, { "epoch": 0.3769514825783335, "grad_norm": 2.416887036768066, "learning_rate": 3.27928159076331e-06, "loss": 0.7835, "step": 30930 }, { "epoch": 0.3770124188024813, "grad_norm": 2.1577439440307833, "learning_rate": 3.2789608723540736e-06, "loss": 0.7433, "step": 30935 }, { "epoch": 0.37707335502662914, "grad_norm": 2.3927765452573038, "learning_rate": 3.2786401539448366e-06, "loss": 0.7395, "step": 30940 }, { "epoch": 0.3771342912507769, "grad_norm": 2.334796950570773, "learning_rate": 3.2783194355356e-06, "loss": 0.7733, "step": 30945 }, { "epoch": 0.37719522747492473, "grad_norm": 2.3244085424580425, "learning_rate": 3.2779987171263634e-06, "loss": 0.7448, "step": 30950 }, { "epoch": 0.37725616369907256, "grad_norm": 3.256119894616026, "learning_rate": 3.2776779987171265e-06, "loss": 0.7933, "step": 30955 }, { "epoch": 0.3773170999232204, "grad_norm": 2.615890683292029, "learning_rate": 3.2773572803078903e-06, "loss": 0.743, "step": 30960 }, { "epoch": 0.37737803614736815, "grad_norm": 2.342842802305864, "learning_rate": 3.2770365618986533e-06, "loss": 0.7792, "step": 30965 }, { "epoch": 0.37743897237151597, "grad_norm": 2.335244008202998, "learning_rate": 3.2767158434894164e-06, "loss": 0.7305, "step": 30970 }, { "epoch": 0.3774999085956638, "grad_norm": 2.568844050028371, "learning_rate": 3.2763951250801802e-06, "loss": 0.7881, "step": 30975 }, { "epoch": 0.37756084481981156, "grad_norm": 2.3385267513651673, "learning_rate": 3.2760744066709432e-06, "loss": 0.7154, "step": 30980 }, { "epoch": 0.3776217810439594, "grad_norm": 2.6058796516758553, "learning_rate": 3.2757536882617063e-06, "loss": 0.7604, "step": 30985 }, { "epoch": 0.3776827172681072, "grad_norm": 2.407226330277717, "learning_rate": 3.27543296985247e-06, "loss": 0.8056, "step": 30990 }, { "epoch": 0.377743653492255, "grad_norm": 2.3662142202198897, "learning_rate": 3.275112251443233e-06, "loss": 0.7884, "step": 30995 }, { "epoch": 0.3778045897164028, "grad_norm": 2.988755480183814, "learning_rate": 3.274791533033996e-06, "loss": 0.8036, "step": 31000 }, { "epoch": 0.3778655259405506, "grad_norm": 5.126584282945106, "learning_rate": 3.2744708146247596e-06, "loss": 0.816, "step": 31005 }, { "epoch": 0.37792646216469844, "grad_norm": 2.453235651528132, "learning_rate": 3.274150096215523e-06, "loss": 0.8099, "step": 31010 }, { "epoch": 0.3779873983888462, "grad_norm": 2.188854628490328, "learning_rate": 3.2738293778062865e-06, "loss": 0.7199, "step": 31015 }, { "epoch": 0.37804833461299403, "grad_norm": 2.5610296488940394, "learning_rate": 3.2735086593970495e-06, "loss": 0.7324, "step": 31020 }, { "epoch": 0.37810927083714185, "grad_norm": 2.3222794424337256, "learning_rate": 3.273187940987813e-06, "loss": 0.7716, "step": 31025 }, { "epoch": 0.3781702070612897, "grad_norm": 2.4817174020967525, "learning_rate": 3.2728672225785764e-06, "loss": 0.8855, "step": 31030 }, { "epoch": 0.37823114328543744, "grad_norm": 2.2123942996166748, "learning_rate": 3.2725465041693394e-06, "loss": 0.7682, "step": 31035 }, { "epoch": 0.37829207950958527, "grad_norm": 3.066680543160969, "learning_rate": 3.2722257857601033e-06, "loss": 0.7795, "step": 31040 }, { "epoch": 0.3783530157337331, "grad_norm": 2.48998129049839, "learning_rate": 3.2719050673508663e-06, "loss": 0.7042, "step": 31045 }, { "epoch": 0.37841395195788086, "grad_norm": 2.6266800862799844, "learning_rate": 3.2715843489416293e-06, "loss": 0.7771, "step": 31050 }, { "epoch": 0.3784748881820287, "grad_norm": 2.337478291868783, "learning_rate": 3.271263630532393e-06, "loss": 0.7346, "step": 31055 }, { "epoch": 0.3785358244061765, "grad_norm": 3.247671959867682, "learning_rate": 3.270942912123156e-06, "loss": 0.7991, "step": 31060 }, { "epoch": 0.3785967606303243, "grad_norm": 1.9915133223213164, "learning_rate": 3.270622193713919e-06, "loss": 0.7899, "step": 31065 }, { "epoch": 0.3786576968544721, "grad_norm": 2.4948364032801895, "learning_rate": 3.270301475304683e-06, "loss": 0.8027, "step": 31070 }, { "epoch": 0.3787186330786199, "grad_norm": 2.2705919895419133, "learning_rate": 3.269980756895446e-06, "loss": 0.8074, "step": 31075 }, { "epoch": 0.37877956930276774, "grad_norm": 2.3986447397634394, "learning_rate": 3.269660038486209e-06, "loss": 0.764, "step": 31080 }, { "epoch": 0.3788405055269155, "grad_norm": 2.8707778767460574, "learning_rate": 3.2693393200769725e-06, "loss": 0.8013, "step": 31085 }, { "epoch": 0.37890144175106333, "grad_norm": 2.353749903831506, "learning_rate": 3.269018601667736e-06, "loss": 0.8172, "step": 31090 }, { "epoch": 0.37896237797521115, "grad_norm": 2.270666944293165, "learning_rate": 3.2686978832584994e-06, "loss": 0.7664, "step": 31095 }, { "epoch": 0.379023314199359, "grad_norm": 2.3345838679889246, "learning_rate": 3.2683771648492624e-06, "loss": 0.6841, "step": 31100 }, { "epoch": 0.37908425042350674, "grad_norm": 2.7379377677222867, "learning_rate": 3.268056446440026e-06, "loss": 0.7912, "step": 31105 }, { "epoch": 0.37914518664765456, "grad_norm": 2.3845802969934224, "learning_rate": 3.2677357280307893e-06, "loss": 0.7811, "step": 31110 }, { "epoch": 0.3792061228718024, "grad_norm": 2.2483103294135445, "learning_rate": 3.2674150096215523e-06, "loss": 0.7921, "step": 31115 }, { "epoch": 0.37926705909595015, "grad_norm": 2.5761634888529015, "learning_rate": 3.267094291212316e-06, "loss": 0.7046, "step": 31120 }, { "epoch": 0.379327995320098, "grad_norm": 3.8764465030416932, "learning_rate": 3.266773572803079e-06, "loss": 0.7652, "step": 31125 }, { "epoch": 0.3793889315442458, "grad_norm": 2.6282297587868317, "learning_rate": 3.2664528543938422e-06, "loss": 0.6948, "step": 31130 }, { "epoch": 0.3794498677683936, "grad_norm": 2.3766167657905055, "learning_rate": 3.266132135984606e-06, "loss": 0.7478, "step": 31135 }, { "epoch": 0.3795108039925414, "grad_norm": 2.67766679669488, "learning_rate": 3.265811417575369e-06, "loss": 0.8226, "step": 31140 }, { "epoch": 0.3795717402166892, "grad_norm": 2.552174719564788, "learning_rate": 3.265490699166132e-06, "loss": 0.8417, "step": 31145 }, { "epoch": 0.37963267644083704, "grad_norm": 5.781870136119901, "learning_rate": 3.265169980756896e-06, "loss": 0.7391, "step": 31150 }, { "epoch": 0.3796936126649848, "grad_norm": 2.6528971710052747, "learning_rate": 3.264849262347659e-06, "loss": 0.8885, "step": 31155 }, { "epoch": 0.3797545488891326, "grad_norm": 2.509794325209346, "learning_rate": 3.2645285439384224e-06, "loss": 0.7585, "step": 31160 }, { "epoch": 0.37981548511328045, "grad_norm": 2.737923618088015, "learning_rate": 3.264207825529186e-06, "loss": 0.7636, "step": 31165 }, { "epoch": 0.37987642133742827, "grad_norm": 2.9375105051587096, "learning_rate": 3.263887107119949e-06, "loss": 0.7732, "step": 31170 }, { "epoch": 0.37993735756157604, "grad_norm": 2.630056019684414, "learning_rate": 3.2635663887107123e-06, "loss": 0.7769, "step": 31175 }, { "epoch": 0.37999829378572386, "grad_norm": 2.5278335625121753, "learning_rate": 3.2632456703014753e-06, "loss": 0.7548, "step": 31180 }, { "epoch": 0.3800592300098717, "grad_norm": 2.8814640745433797, "learning_rate": 3.2629249518922392e-06, "loss": 0.723, "step": 31185 }, { "epoch": 0.38012016623401945, "grad_norm": 2.531836674866131, "learning_rate": 3.2626042334830022e-06, "loss": 0.7296, "step": 31190 }, { "epoch": 0.3801811024581673, "grad_norm": 4.220294473752127, "learning_rate": 3.2622835150737652e-06, "loss": 0.788, "step": 31195 }, { "epoch": 0.3802420386823151, "grad_norm": 2.4196316235358837, "learning_rate": 3.261962796664529e-06, "loss": 0.7855, "step": 31200 }, { "epoch": 0.3803029749064629, "grad_norm": 2.2456145687309204, "learning_rate": 3.261642078255292e-06, "loss": 0.7481, "step": 31205 }, { "epoch": 0.3803639111306107, "grad_norm": 2.565475636537031, "learning_rate": 3.261321359846055e-06, "loss": 0.7472, "step": 31210 }, { "epoch": 0.3804248473547585, "grad_norm": 2.597423366712879, "learning_rate": 3.261000641436819e-06, "loss": 0.8059, "step": 31215 }, { "epoch": 0.38048578357890633, "grad_norm": 4.200737544343955, "learning_rate": 3.260679923027582e-06, "loss": 0.7693, "step": 31220 }, { "epoch": 0.3805467198030541, "grad_norm": 2.8643417926794856, "learning_rate": 3.260359204618345e-06, "loss": 0.6911, "step": 31225 }, { "epoch": 0.3806076560272019, "grad_norm": 2.659961917466442, "learning_rate": 3.260038486209109e-06, "loss": 0.7525, "step": 31230 }, { "epoch": 0.38066859225134975, "grad_norm": 2.3958975581769812, "learning_rate": 3.259717767799872e-06, "loss": 0.8346, "step": 31235 }, { "epoch": 0.38072952847549757, "grad_norm": 2.207924132678469, "learning_rate": 3.2593970493906354e-06, "loss": 0.7997, "step": 31240 }, { "epoch": 0.38079046469964534, "grad_norm": 2.3238115354953517, "learning_rate": 3.259076330981399e-06, "loss": 0.7059, "step": 31245 }, { "epoch": 0.38085140092379316, "grad_norm": 2.2057318361390332, "learning_rate": 3.258755612572162e-06, "loss": 0.7958, "step": 31250 }, { "epoch": 0.380912337147941, "grad_norm": 2.0893197682932376, "learning_rate": 3.2584348941629253e-06, "loss": 0.7497, "step": 31255 }, { "epoch": 0.38097327337208875, "grad_norm": 2.5420239487627114, "learning_rate": 3.2581141757536883e-06, "loss": 0.7847, "step": 31260 }, { "epoch": 0.3810342095962366, "grad_norm": 2.3382489893896325, "learning_rate": 3.257793457344452e-06, "loss": 0.7397, "step": 31265 }, { "epoch": 0.3810951458203844, "grad_norm": 2.3431170591657513, "learning_rate": 3.257472738935215e-06, "loss": 0.8178, "step": 31270 }, { "epoch": 0.3811560820445322, "grad_norm": 2.3518004097154277, "learning_rate": 3.257152020525978e-06, "loss": 0.7886, "step": 31275 }, { "epoch": 0.38121701826868, "grad_norm": 2.16838565379362, "learning_rate": 3.256831302116742e-06, "loss": 0.7959, "step": 31280 }, { "epoch": 0.3812779544928278, "grad_norm": 2.5579155109303597, "learning_rate": 3.256510583707505e-06, "loss": 0.8338, "step": 31285 }, { "epoch": 0.38133889071697563, "grad_norm": 2.7845303518393196, "learning_rate": 3.256189865298268e-06, "loss": 0.8005, "step": 31290 }, { "epoch": 0.3813998269411234, "grad_norm": 2.3377627207400584, "learning_rate": 3.255869146889032e-06, "loss": 0.8287, "step": 31295 }, { "epoch": 0.3814607631652712, "grad_norm": 2.2651067709045463, "learning_rate": 3.255548428479795e-06, "loss": 0.758, "step": 31300 }, { "epoch": 0.38152169938941904, "grad_norm": 2.985506638356499, "learning_rate": 3.255227710070558e-06, "loss": 0.7559, "step": 31305 }, { "epoch": 0.38158263561356687, "grad_norm": 2.754742579223757, "learning_rate": 3.254906991661322e-06, "loss": 0.757, "step": 31310 }, { "epoch": 0.38164357183771463, "grad_norm": 2.6613987338958203, "learning_rate": 3.254586273252085e-06, "loss": 0.8514, "step": 31315 }, { "epoch": 0.38170450806186246, "grad_norm": 2.86916595403273, "learning_rate": 3.2542655548428483e-06, "loss": 0.8217, "step": 31320 }, { "epoch": 0.3817654442860103, "grad_norm": 2.3586484115313247, "learning_rate": 3.2539448364336117e-06, "loss": 0.7252, "step": 31325 }, { "epoch": 0.38182638051015805, "grad_norm": 2.896827513324263, "learning_rate": 3.2536241180243747e-06, "loss": 0.8339, "step": 31330 }, { "epoch": 0.38188731673430587, "grad_norm": 1.8884525234571001, "learning_rate": 3.253303399615138e-06, "loss": 0.7015, "step": 31335 }, { "epoch": 0.3819482529584537, "grad_norm": 2.65347957223524, "learning_rate": 3.252982681205901e-06, "loss": 0.754, "step": 31340 }, { "epoch": 0.3820091891826015, "grad_norm": 3.6799345064399986, "learning_rate": 3.252661962796665e-06, "loss": 0.8818, "step": 31345 }, { "epoch": 0.3820701254067493, "grad_norm": 2.5366926120368194, "learning_rate": 3.252341244387428e-06, "loss": 0.8011, "step": 31350 }, { "epoch": 0.3821310616308971, "grad_norm": 2.269314499611262, "learning_rate": 3.252020525978191e-06, "loss": 0.8041, "step": 31355 }, { "epoch": 0.38219199785504493, "grad_norm": 2.9221219162780416, "learning_rate": 3.251699807568955e-06, "loss": 0.8518, "step": 31360 }, { "epoch": 0.3822529340791927, "grad_norm": 2.1727278414213824, "learning_rate": 3.251379089159718e-06, "loss": 0.799, "step": 31365 }, { "epoch": 0.3823138703033405, "grad_norm": 3.3596926512541465, "learning_rate": 3.251058370750481e-06, "loss": 0.7824, "step": 31370 }, { "epoch": 0.38237480652748834, "grad_norm": 1.731613630028432, "learning_rate": 3.250737652341245e-06, "loss": 0.7349, "step": 31375 }, { "epoch": 0.38243574275163617, "grad_norm": 2.004952989390815, "learning_rate": 3.250416933932008e-06, "loss": 0.7264, "step": 31380 }, { "epoch": 0.38249667897578393, "grad_norm": 2.3061994433085564, "learning_rate": 3.250096215522771e-06, "loss": 0.7821, "step": 31385 }, { "epoch": 0.38255761519993176, "grad_norm": 2.5443000724039813, "learning_rate": 3.2497754971135348e-06, "loss": 0.7541, "step": 31390 }, { "epoch": 0.3826185514240796, "grad_norm": 2.6632984576466954, "learning_rate": 3.2494547787042978e-06, "loss": 0.7654, "step": 31395 }, { "epoch": 0.38267948764822735, "grad_norm": 1.8492451593945216, "learning_rate": 3.2491340602950612e-06, "loss": 0.7589, "step": 31400 }, { "epoch": 0.38274042387237517, "grad_norm": 2.6607295172352408, "learning_rate": 3.2488133418858247e-06, "loss": 0.7692, "step": 31405 }, { "epoch": 0.382801360096523, "grad_norm": 2.371008586968141, "learning_rate": 3.248492623476588e-06, "loss": 0.839, "step": 31410 }, { "epoch": 0.38286229632067076, "grad_norm": 2.3624268657005256, "learning_rate": 3.248171905067351e-06, "loss": 0.7833, "step": 31415 }, { "epoch": 0.3829232325448186, "grad_norm": 2.462493941010968, "learning_rate": 3.2478511866581146e-06, "loss": 0.8841, "step": 31420 }, { "epoch": 0.3829841687689664, "grad_norm": 2.4611768209707963, "learning_rate": 3.247530468248878e-06, "loss": 0.8044, "step": 31425 }, { "epoch": 0.3830451049931142, "grad_norm": 4.6281548320316555, "learning_rate": 3.247209749839641e-06, "loss": 0.7464, "step": 31430 }, { "epoch": 0.383106041217262, "grad_norm": 2.8640913520803934, "learning_rate": 3.246889031430404e-06, "loss": 0.7949, "step": 31435 }, { "epoch": 0.3831669774414098, "grad_norm": 2.1665282759147506, "learning_rate": 3.246568313021168e-06, "loss": 0.7831, "step": 31440 }, { "epoch": 0.38322791366555764, "grad_norm": 2.7848172933849917, "learning_rate": 3.246247594611931e-06, "loss": 0.8004, "step": 31445 }, { "epoch": 0.3832888498897054, "grad_norm": 2.1751638836444323, "learning_rate": 3.245926876202694e-06, "loss": 0.7262, "step": 31450 }, { "epoch": 0.38334978611385323, "grad_norm": 2.8670603671438717, "learning_rate": 3.245606157793458e-06, "loss": 0.7448, "step": 31455 }, { "epoch": 0.38341072233800105, "grad_norm": 2.6273292096502545, "learning_rate": 3.245285439384221e-06, "loss": 0.772, "step": 31460 }, { "epoch": 0.3834716585621489, "grad_norm": 2.351291600711027, "learning_rate": 3.2449647209749842e-06, "loss": 0.7745, "step": 31465 }, { "epoch": 0.38353259478629664, "grad_norm": 2.2638964325842648, "learning_rate": 3.2446440025657477e-06, "loss": 0.7147, "step": 31470 }, { "epoch": 0.38359353101044447, "grad_norm": 2.394966197006143, "learning_rate": 3.2443232841565107e-06, "loss": 0.747, "step": 31475 }, { "epoch": 0.3836544672345923, "grad_norm": 3.372780470564131, "learning_rate": 3.244002565747274e-06, "loss": 0.7146, "step": 31480 }, { "epoch": 0.38371540345874006, "grad_norm": 6.5543213541788425, "learning_rate": 3.2436818473380376e-06, "loss": 0.7557, "step": 31485 }, { "epoch": 0.3837763396828879, "grad_norm": 2.231789326750242, "learning_rate": 3.243361128928801e-06, "loss": 0.7473, "step": 31490 }, { "epoch": 0.3838372759070357, "grad_norm": 2.711284224034842, "learning_rate": 3.243040410519564e-06, "loss": 0.7742, "step": 31495 }, { "epoch": 0.3838982121311835, "grad_norm": 2.7040500330318986, "learning_rate": 3.2427196921103275e-06, "loss": 0.7737, "step": 31500 }, { "epoch": 0.3839591483553313, "grad_norm": 2.164235970016689, "learning_rate": 3.242398973701091e-06, "loss": 0.7985, "step": 31505 }, { "epoch": 0.3840200845794791, "grad_norm": 2.5100890094381603, "learning_rate": 3.242078255291854e-06, "loss": 0.7798, "step": 31510 }, { "epoch": 0.38408102080362694, "grad_norm": 2.3279160066863582, "learning_rate": 3.241757536882617e-06, "loss": 0.8287, "step": 31515 }, { "epoch": 0.3841419570277747, "grad_norm": 2.4680677669848112, "learning_rate": 3.241436818473381e-06, "loss": 0.7785, "step": 31520 }, { "epoch": 0.38420289325192253, "grad_norm": 2.492298515518885, "learning_rate": 3.241116100064144e-06, "loss": 0.8253, "step": 31525 }, { "epoch": 0.38426382947607035, "grad_norm": 2.644651397035758, "learning_rate": 3.240795381654907e-06, "loss": 0.7566, "step": 31530 }, { "epoch": 0.3843247657002182, "grad_norm": 2.2651750763770364, "learning_rate": 3.2404746632456707e-06, "loss": 0.7286, "step": 31535 }, { "epoch": 0.38438570192436594, "grad_norm": 1.90977434670312, "learning_rate": 3.2401539448364337e-06, "loss": 0.7949, "step": 31540 }, { "epoch": 0.38444663814851376, "grad_norm": 2.341053545158038, "learning_rate": 3.239833226427197e-06, "loss": 0.7529, "step": 31545 }, { "epoch": 0.3845075743726616, "grad_norm": 2.874063739789702, "learning_rate": 3.2395125080179606e-06, "loss": 0.7622, "step": 31550 }, { "epoch": 0.38456851059680935, "grad_norm": 3.023712833817334, "learning_rate": 3.2391917896087236e-06, "loss": 0.7409, "step": 31555 }, { "epoch": 0.3846294468209572, "grad_norm": 2.5001796865266397, "learning_rate": 3.238871071199487e-06, "loss": 0.8369, "step": 31560 }, { "epoch": 0.384690383045105, "grad_norm": 2.5097111937582466, "learning_rate": 3.2385503527902505e-06, "loss": 0.7684, "step": 31565 }, { "epoch": 0.3847513192692528, "grad_norm": 3.077524879904448, "learning_rate": 3.238229634381014e-06, "loss": 0.7872, "step": 31570 }, { "epoch": 0.3848122554934006, "grad_norm": 2.743806637439152, "learning_rate": 3.237908915971777e-06, "loss": 0.8609, "step": 31575 }, { "epoch": 0.3848731917175484, "grad_norm": 2.739474625457696, "learning_rate": 3.2375881975625404e-06, "loss": 0.8417, "step": 31580 }, { "epoch": 0.38493412794169624, "grad_norm": 2.4521540438562783, "learning_rate": 3.237267479153304e-06, "loss": 0.8056, "step": 31585 }, { "epoch": 0.384995064165844, "grad_norm": 3.1414158176782276, "learning_rate": 3.236946760744067e-06, "loss": 0.7907, "step": 31590 }, { "epoch": 0.3850560003899918, "grad_norm": 2.5254349696909966, "learning_rate": 3.23662604233483e-06, "loss": 0.7487, "step": 31595 }, { "epoch": 0.38511693661413965, "grad_norm": 2.2254837406297674, "learning_rate": 3.2363053239255937e-06, "loss": 0.7169, "step": 31600 }, { "epoch": 0.38517787283828747, "grad_norm": 2.2306030166076596, "learning_rate": 3.2359846055163568e-06, "loss": 0.6749, "step": 31605 }, { "epoch": 0.38523880906243524, "grad_norm": 2.5796966128412655, "learning_rate": 3.2356638871071198e-06, "loss": 0.8053, "step": 31610 }, { "epoch": 0.38529974528658306, "grad_norm": 1.9671563700576795, "learning_rate": 3.2353431686978836e-06, "loss": 0.7135, "step": 31615 }, { "epoch": 0.3853606815107309, "grad_norm": 4.090395044392744, "learning_rate": 3.2350224502886467e-06, "loss": 0.7959, "step": 31620 }, { "epoch": 0.38542161773487865, "grad_norm": 2.262824143067882, "learning_rate": 3.23470173187941e-06, "loss": 0.7961, "step": 31625 }, { "epoch": 0.3854825539590265, "grad_norm": 2.4282092619703177, "learning_rate": 3.2343810134701735e-06, "loss": 0.7458, "step": 31630 }, { "epoch": 0.3855434901831743, "grad_norm": 2.3729768506375155, "learning_rate": 3.234060295060937e-06, "loss": 0.7536, "step": 31635 }, { "epoch": 0.3856044264073221, "grad_norm": 3.2536217251390553, "learning_rate": 3.2337395766517e-06, "loss": 0.7712, "step": 31640 }, { "epoch": 0.3856653626314699, "grad_norm": 2.600715650356918, "learning_rate": 3.2334188582424634e-06, "loss": 0.7504, "step": 31645 }, { "epoch": 0.3857262988556177, "grad_norm": 2.5685023519044052, "learning_rate": 3.233098139833227e-06, "loss": 0.7143, "step": 31650 }, { "epoch": 0.38578723507976553, "grad_norm": 2.283738639229092, "learning_rate": 3.23277742142399e-06, "loss": 0.7438, "step": 31655 }, { "epoch": 0.3858481713039133, "grad_norm": 2.6626977025770673, "learning_rate": 3.2324567030147538e-06, "loss": 0.7846, "step": 31660 }, { "epoch": 0.3859091075280611, "grad_norm": 2.33998636010116, "learning_rate": 3.2321359846055168e-06, "loss": 0.8272, "step": 31665 }, { "epoch": 0.38597004375220895, "grad_norm": 2.02263617387937, "learning_rate": 3.23181526619628e-06, "loss": 0.729, "step": 31670 }, { "epoch": 0.38603097997635677, "grad_norm": 2.5042992321577358, "learning_rate": 3.231494547787043e-06, "loss": 0.7565, "step": 31675 }, { "epoch": 0.38609191620050454, "grad_norm": 2.166647670369943, "learning_rate": 3.2311738293778067e-06, "loss": 0.7007, "step": 31680 }, { "epoch": 0.38615285242465236, "grad_norm": 2.858972008085489, "learning_rate": 3.2308531109685697e-06, "loss": 0.7562, "step": 31685 }, { "epoch": 0.3862137886488002, "grad_norm": 2.6238317505563136, "learning_rate": 3.230532392559333e-06, "loss": 0.7134, "step": 31690 }, { "epoch": 0.38627472487294795, "grad_norm": 2.2745399614032555, "learning_rate": 3.2302116741500966e-06, "loss": 0.7534, "step": 31695 }, { "epoch": 0.3863356610970958, "grad_norm": 2.7739736521221317, "learning_rate": 3.2298909557408596e-06, "loss": 0.7596, "step": 31700 }, { "epoch": 0.3863965973212436, "grad_norm": 2.4555486175025476, "learning_rate": 3.229570237331623e-06, "loss": 0.6467, "step": 31705 }, { "epoch": 0.3864575335453914, "grad_norm": 2.4303455448244025, "learning_rate": 3.2292495189223865e-06, "loss": 0.7765, "step": 31710 }, { "epoch": 0.3865184697695392, "grad_norm": 2.8899127267035207, "learning_rate": 3.22892880051315e-06, "loss": 0.7534, "step": 31715 }, { "epoch": 0.386579405993687, "grad_norm": 5.855604410846666, "learning_rate": 3.228608082103913e-06, "loss": 0.7627, "step": 31720 }, { "epoch": 0.38664034221783483, "grad_norm": 2.535333978768258, "learning_rate": 3.2282873636946764e-06, "loss": 0.7462, "step": 31725 }, { "epoch": 0.3867012784419826, "grad_norm": 2.340086593263525, "learning_rate": 3.22796664528544e-06, "loss": 0.7466, "step": 31730 }, { "epoch": 0.3867622146661304, "grad_norm": 2.9157625428566085, "learning_rate": 3.227645926876203e-06, "loss": 0.7651, "step": 31735 }, { "epoch": 0.38682315089027824, "grad_norm": 2.927037538879277, "learning_rate": 3.2273252084669667e-06, "loss": 0.7747, "step": 31740 }, { "epoch": 0.38688408711442607, "grad_norm": 2.387723775195405, "learning_rate": 3.2270044900577297e-06, "loss": 0.7393, "step": 31745 }, { "epoch": 0.38694502333857383, "grad_norm": 2.135610271317845, "learning_rate": 3.2266837716484927e-06, "loss": 0.7451, "step": 31750 }, { "epoch": 0.38700595956272166, "grad_norm": 2.511319090316394, "learning_rate": 3.2263630532392566e-06, "loss": 0.7811, "step": 31755 }, { "epoch": 0.3870668957868695, "grad_norm": 2.2585541912510427, "learning_rate": 3.2260423348300196e-06, "loss": 0.8189, "step": 31760 }, { "epoch": 0.38712783201101725, "grad_norm": 2.1611074703440782, "learning_rate": 3.2257216164207826e-06, "loss": 0.8302, "step": 31765 }, { "epoch": 0.38718876823516507, "grad_norm": 2.673104420572312, "learning_rate": 3.225400898011546e-06, "loss": 0.7856, "step": 31770 }, { "epoch": 0.3872497044593129, "grad_norm": 2.4364790551444346, "learning_rate": 3.2250801796023095e-06, "loss": 0.8402, "step": 31775 }, { "epoch": 0.3873106406834607, "grad_norm": 3.20011868666648, "learning_rate": 3.2247594611930725e-06, "loss": 0.8128, "step": 31780 }, { "epoch": 0.3873715769076085, "grad_norm": 4.764704792909064, "learning_rate": 3.224438742783836e-06, "loss": 0.8552, "step": 31785 }, { "epoch": 0.3874325131317563, "grad_norm": 2.1275913346489803, "learning_rate": 3.2241180243745994e-06, "loss": 0.7973, "step": 31790 }, { "epoch": 0.38749344935590413, "grad_norm": 2.6030899580933986, "learning_rate": 3.223797305965363e-06, "loss": 0.7859, "step": 31795 }, { "epoch": 0.3875543855800519, "grad_norm": 2.062911784443171, "learning_rate": 3.223476587556126e-06, "loss": 0.8034, "step": 31800 }, { "epoch": 0.3876153218041997, "grad_norm": 2.6853601236562135, "learning_rate": 3.2231558691468893e-06, "loss": 0.7448, "step": 31805 }, { "epoch": 0.38767625802834754, "grad_norm": 2.2313135834093036, "learning_rate": 3.2228351507376527e-06, "loss": 0.7102, "step": 31810 }, { "epoch": 0.38773719425249537, "grad_norm": 2.8625747553850007, "learning_rate": 3.2225144323284157e-06, "loss": 0.8147, "step": 31815 }, { "epoch": 0.38779813047664313, "grad_norm": 2.3351989734510794, "learning_rate": 3.2221937139191796e-06, "loss": 0.8264, "step": 31820 }, { "epoch": 0.38785906670079096, "grad_norm": 5.063098094547533, "learning_rate": 3.2218729955099426e-06, "loss": 0.7263, "step": 31825 }, { "epoch": 0.3879200029249388, "grad_norm": 3.148900387962143, "learning_rate": 3.2215522771007056e-06, "loss": 0.736, "step": 31830 }, { "epoch": 0.38798093914908655, "grad_norm": 2.404512554560871, "learning_rate": 3.2212315586914695e-06, "loss": 0.7332, "step": 31835 }, { "epoch": 0.38804187537323437, "grad_norm": 2.5832241345059, "learning_rate": 3.2209108402822325e-06, "loss": 0.8308, "step": 31840 }, { "epoch": 0.3881028115973822, "grad_norm": 2.8180028823545475, "learning_rate": 3.2205901218729955e-06, "loss": 0.8194, "step": 31845 }, { "epoch": 0.38816374782153, "grad_norm": 2.759430049129345, "learning_rate": 3.220269403463759e-06, "loss": 0.7548, "step": 31850 }, { "epoch": 0.3882246840456778, "grad_norm": 2.3448108333496487, "learning_rate": 3.2199486850545224e-06, "loss": 0.8091, "step": 31855 }, { "epoch": 0.3882856202698256, "grad_norm": 2.1832761711197035, "learning_rate": 3.2196279666452854e-06, "loss": 0.905, "step": 31860 }, { "epoch": 0.3883465564939734, "grad_norm": 2.1907081108576176, "learning_rate": 3.219307248236049e-06, "loss": 0.7123, "step": 31865 }, { "epoch": 0.3884074927181212, "grad_norm": 2.9635463706093326, "learning_rate": 3.2189865298268123e-06, "loss": 0.8178, "step": 31870 }, { "epoch": 0.388468428942269, "grad_norm": 2.45923420044061, "learning_rate": 3.2186658114175758e-06, "loss": 0.8313, "step": 31875 }, { "epoch": 0.38852936516641684, "grad_norm": 2.9579911454907717, "learning_rate": 3.2183450930083388e-06, "loss": 0.8077, "step": 31880 }, { "epoch": 0.3885903013905646, "grad_norm": 3.1630439724274333, "learning_rate": 3.2180243745991026e-06, "loss": 0.7346, "step": 31885 }, { "epoch": 0.38865123761471243, "grad_norm": 2.1215499712462234, "learning_rate": 3.2177036561898657e-06, "loss": 0.7551, "step": 31890 }, { "epoch": 0.38871217383886025, "grad_norm": 2.3650919907538936, "learning_rate": 3.2173829377806287e-06, "loss": 0.8182, "step": 31895 }, { "epoch": 0.3887731100630081, "grad_norm": 2.9120219521619677, "learning_rate": 3.2170622193713925e-06, "loss": 0.795, "step": 31900 }, { "epoch": 0.38883404628715584, "grad_norm": 2.5495382904389965, "learning_rate": 3.2167415009621556e-06, "loss": 0.7449, "step": 31905 }, { "epoch": 0.38889498251130367, "grad_norm": 2.9475922823536287, "learning_rate": 3.2164207825529186e-06, "loss": 0.7638, "step": 31910 }, { "epoch": 0.3889559187354515, "grad_norm": 2.4052743094691813, "learning_rate": 3.2161000641436824e-06, "loss": 0.7432, "step": 31915 }, { "epoch": 0.38901685495959926, "grad_norm": 3.030311880842699, "learning_rate": 3.2157793457344455e-06, "loss": 0.7853, "step": 31920 }, { "epoch": 0.3890777911837471, "grad_norm": 2.5742531729182745, "learning_rate": 3.2154586273252085e-06, "loss": 0.8012, "step": 31925 }, { "epoch": 0.3891387274078949, "grad_norm": 2.4867932889857385, "learning_rate": 3.215137908915972e-06, "loss": 0.7827, "step": 31930 }, { "epoch": 0.3891996636320427, "grad_norm": 3.537536460389909, "learning_rate": 3.2148171905067354e-06, "loss": 0.7667, "step": 31935 }, { "epoch": 0.3892605998561905, "grad_norm": 2.8581298223224527, "learning_rate": 3.214496472097499e-06, "loss": 0.8041, "step": 31940 }, { "epoch": 0.3893215360803383, "grad_norm": 2.408871471519055, "learning_rate": 3.214175753688262e-06, "loss": 0.8506, "step": 31945 }, { "epoch": 0.38938247230448614, "grad_norm": 2.486161702920426, "learning_rate": 3.2138550352790252e-06, "loss": 0.7428, "step": 31950 }, { "epoch": 0.3894434085286339, "grad_norm": 2.602872311981107, "learning_rate": 3.2135343168697887e-06, "loss": 0.7882, "step": 31955 }, { "epoch": 0.38950434475278173, "grad_norm": 2.3997647352196, "learning_rate": 3.2132135984605517e-06, "loss": 0.7531, "step": 31960 }, { "epoch": 0.38956528097692955, "grad_norm": 3.8598555338614084, "learning_rate": 3.2128928800513156e-06, "loss": 0.7468, "step": 31965 }, { "epoch": 0.3896262172010774, "grad_norm": 2.64819109202546, "learning_rate": 3.2125721616420786e-06, "loss": 0.822, "step": 31970 }, { "epoch": 0.38968715342522514, "grad_norm": 2.546890314327035, "learning_rate": 3.2122514432328416e-06, "loss": 0.8275, "step": 31975 }, { "epoch": 0.38974808964937296, "grad_norm": 2.3334108814737866, "learning_rate": 3.2119307248236055e-06, "loss": 0.7885, "step": 31980 }, { "epoch": 0.3898090258735208, "grad_norm": 2.4705327781002246, "learning_rate": 3.2116100064143685e-06, "loss": 0.7436, "step": 31985 }, { "epoch": 0.38986996209766855, "grad_norm": 2.7067557798098356, "learning_rate": 3.2112892880051315e-06, "loss": 0.7364, "step": 31990 }, { "epoch": 0.3899308983218164, "grad_norm": 2.6256299511009726, "learning_rate": 3.2109685695958954e-06, "loss": 0.7549, "step": 31995 }, { "epoch": 0.3899918345459642, "grad_norm": 2.334619525472292, "learning_rate": 3.2106478511866584e-06, "loss": 0.7446, "step": 32000 }, { "epoch": 0.390052770770112, "grad_norm": 2.6886032095773262, "learning_rate": 3.2103271327774214e-06, "loss": 0.7573, "step": 32005 }, { "epoch": 0.3901137069942598, "grad_norm": 2.979366683340841, "learning_rate": 3.210006414368185e-06, "loss": 0.767, "step": 32010 }, { "epoch": 0.3901746432184076, "grad_norm": 2.6425372006150276, "learning_rate": 3.2096856959589483e-06, "loss": 0.7472, "step": 32015 }, { "epoch": 0.39023557944255544, "grad_norm": 2.4361948801911337, "learning_rate": 3.2093649775497117e-06, "loss": 0.7457, "step": 32020 }, { "epoch": 0.3902965156667032, "grad_norm": 3.1562529954741425, "learning_rate": 3.2090442591404747e-06, "loss": 0.8014, "step": 32025 }, { "epoch": 0.390357451890851, "grad_norm": 2.5419915918662004, "learning_rate": 3.208723540731238e-06, "loss": 0.7405, "step": 32030 }, { "epoch": 0.39041838811499885, "grad_norm": 3.4025598530070353, "learning_rate": 3.2084028223220016e-06, "loss": 0.746, "step": 32035 }, { "epoch": 0.39047932433914667, "grad_norm": 2.1308847943118083, "learning_rate": 3.2080821039127646e-06, "loss": 0.7752, "step": 32040 }, { "epoch": 0.39054026056329444, "grad_norm": 2.0609443151758646, "learning_rate": 3.2077613855035285e-06, "loss": 0.8077, "step": 32045 }, { "epoch": 0.39060119678744226, "grad_norm": 3.6705282025765418, "learning_rate": 3.2074406670942915e-06, "loss": 0.7695, "step": 32050 }, { "epoch": 0.3906621330115901, "grad_norm": 3.120086659678869, "learning_rate": 3.2071199486850545e-06, "loss": 0.6945, "step": 32055 }, { "epoch": 0.39072306923573785, "grad_norm": 2.5919455421614743, "learning_rate": 3.2067992302758184e-06, "loss": 0.8175, "step": 32060 }, { "epoch": 0.3907840054598857, "grad_norm": 1.948619960360947, "learning_rate": 3.2064785118665814e-06, "loss": 0.8224, "step": 32065 }, { "epoch": 0.3908449416840335, "grad_norm": 2.7193934431325473, "learning_rate": 3.2061577934573444e-06, "loss": 0.7679, "step": 32070 }, { "epoch": 0.3909058779081813, "grad_norm": 2.650366537161979, "learning_rate": 3.2058370750481083e-06, "loss": 0.8177, "step": 32075 }, { "epoch": 0.3909668141323291, "grad_norm": 2.6693984400502666, "learning_rate": 3.2055163566388713e-06, "loss": 0.7633, "step": 32080 }, { "epoch": 0.3910277503564769, "grad_norm": 2.8914531150458007, "learning_rate": 3.2051956382296343e-06, "loss": 0.8128, "step": 32085 }, { "epoch": 0.39108868658062473, "grad_norm": 2.534326370110207, "learning_rate": 3.204874919820398e-06, "loss": 0.813, "step": 32090 }, { "epoch": 0.3911496228047725, "grad_norm": 2.8967616020160154, "learning_rate": 3.204554201411161e-06, "loss": 0.7272, "step": 32095 }, { "epoch": 0.3912105590289203, "grad_norm": 2.3545871375653333, "learning_rate": 3.2042334830019246e-06, "loss": 0.8147, "step": 32100 }, { "epoch": 0.39127149525306815, "grad_norm": 1.9161304729259372, "learning_rate": 3.2039127645926877e-06, "loss": 0.7282, "step": 32105 }, { "epoch": 0.39133243147721597, "grad_norm": 2.871545224346921, "learning_rate": 3.2035920461834515e-06, "loss": 0.863, "step": 32110 }, { "epoch": 0.39139336770136374, "grad_norm": 2.3288972729875748, "learning_rate": 3.2032713277742145e-06, "loss": 0.7971, "step": 32115 }, { "epoch": 0.39145430392551156, "grad_norm": 2.401160886193789, "learning_rate": 3.2029506093649776e-06, "loss": 0.8605, "step": 32120 }, { "epoch": 0.3915152401496594, "grad_norm": 2.0717668532429467, "learning_rate": 3.2026298909557414e-06, "loss": 0.7738, "step": 32125 }, { "epoch": 0.39157617637380715, "grad_norm": 2.5242372814125047, "learning_rate": 3.2023091725465044e-06, "loss": 0.849, "step": 32130 }, { "epoch": 0.391637112597955, "grad_norm": 2.6626538062763836, "learning_rate": 3.2019884541372675e-06, "loss": 0.8091, "step": 32135 }, { "epoch": 0.3916980488221028, "grad_norm": 2.6262779911322482, "learning_rate": 3.2016677357280313e-06, "loss": 0.6989, "step": 32140 }, { "epoch": 0.3917589850462506, "grad_norm": 2.2127514104384147, "learning_rate": 3.2013470173187943e-06, "loss": 0.7553, "step": 32145 }, { "epoch": 0.3918199212703984, "grad_norm": 2.522270923582276, "learning_rate": 3.2010262989095574e-06, "loss": 0.7539, "step": 32150 }, { "epoch": 0.3918808574945462, "grad_norm": 4.212088940608779, "learning_rate": 3.2007055805003212e-06, "loss": 0.7058, "step": 32155 }, { "epoch": 0.39194179371869403, "grad_norm": 2.6616495063196983, "learning_rate": 3.2003848620910842e-06, "loss": 0.7519, "step": 32160 }, { "epoch": 0.3920027299428418, "grad_norm": 3.855263633672707, "learning_rate": 3.2000641436818477e-06, "loss": 0.8344, "step": 32165 }, { "epoch": 0.3920636661669896, "grad_norm": 2.4746805250811437, "learning_rate": 3.199743425272611e-06, "loss": 0.8213, "step": 32170 }, { "epoch": 0.39212460239113744, "grad_norm": 2.328262070203473, "learning_rate": 3.199422706863374e-06, "loss": 0.7848, "step": 32175 }, { "epoch": 0.39218553861528527, "grad_norm": 2.373037880717333, "learning_rate": 3.1991019884541376e-06, "loss": 0.7736, "step": 32180 }, { "epoch": 0.39224647483943303, "grad_norm": 2.123711046349282, "learning_rate": 3.1987812700449006e-06, "loss": 0.7808, "step": 32185 }, { "epoch": 0.39230741106358086, "grad_norm": 2.776740642025307, "learning_rate": 3.1984605516356645e-06, "loss": 0.7507, "step": 32190 }, { "epoch": 0.3923683472877287, "grad_norm": 2.261279656203794, "learning_rate": 3.1981398332264275e-06, "loss": 0.727, "step": 32195 }, { "epoch": 0.39242928351187645, "grad_norm": 2.1410349638218005, "learning_rate": 3.1978191148171905e-06, "loss": 0.7589, "step": 32200 }, { "epoch": 0.39249021973602427, "grad_norm": 2.7036773195779307, "learning_rate": 3.1974983964079544e-06, "loss": 0.7303, "step": 32205 }, { "epoch": 0.3925511559601721, "grad_norm": 2.490310134727797, "learning_rate": 3.1971776779987174e-06, "loss": 0.6837, "step": 32210 }, { "epoch": 0.3926120921843199, "grad_norm": 2.555889089810271, "learning_rate": 3.1968569595894804e-06, "loss": 0.7433, "step": 32215 }, { "epoch": 0.3926730284084677, "grad_norm": 2.535696054153128, "learning_rate": 3.1965362411802442e-06, "loss": 0.7947, "step": 32220 }, { "epoch": 0.3927339646326155, "grad_norm": 3.0207126938690405, "learning_rate": 3.1962155227710073e-06, "loss": 0.808, "step": 32225 }, { "epoch": 0.39279490085676333, "grad_norm": 2.2055623282649934, "learning_rate": 3.1958948043617703e-06, "loss": 0.7234, "step": 32230 }, { "epoch": 0.3928558370809111, "grad_norm": 2.3862238487421332, "learning_rate": 3.195574085952534e-06, "loss": 0.693, "step": 32235 }, { "epoch": 0.3929167733050589, "grad_norm": 2.1919169865636894, "learning_rate": 3.195253367543297e-06, "loss": 0.8044, "step": 32240 }, { "epoch": 0.39297770952920674, "grad_norm": 2.682860404369141, "learning_rate": 3.1949326491340606e-06, "loss": 0.8524, "step": 32245 }, { "epoch": 0.39303864575335457, "grad_norm": 2.3750276076645407, "learning_rate": 3.194611930724824e-06, "loss": 0.8192, "step": 32250 }, { "epoch": 0.39309958197750233, "grad_norm": 2.6193463228396445, "learning_rate": 3.194291212315587e-06, "loss": 0.7404, "step": 32255 }, { "epoch": 0.39316051820165016, "grad_norm": 2.6683980957574076, "learning_rate": 3.1939704939063505e-06, "loss": 0.7571, "step": 32260 }, { "epoch": 0.393221454425798, "grad_norm": 3.203371733205817, "learning_rate": 3.1936497754971135e-06, "loss": 0.7854, "step": 32265 }, { "epoch": 0.39328239064994575, "grad_norm": 2.633900696416469, "learning_rate": 3.1933290570878774e-06, "loss": 0.8273, "step": 32270 }, { "epoch": 0.39334332687409357, "grad_norm": 2.694915427871604, "learning_rate": 3.1930083386786404e-06, "loss": 0.7023, "step": 32275 }, { "epoch": 0.3934042630982414, "grad_norm": 2.980544764775539, "learning_rate": 3.1926876202694034e-06, "loss": 0.7703, "step": 32280 }, { "epoch": 0.3934651993223892, "grad_norm": 2.4230878505339684, "learning_rate": 3.1923669018601673e-06, "loss": 0.7814, "step": 32285 }, { "epoch": 0.393526135546537, "grad_norm": 2.5576443660068615, "learning_rate": 3.1920461834509303e-06, "loss": 0.7475, "step": 32290 }, { "epoch": 0.3935870717706848, "grad_norm": 2.2498015215024374, "learning_rate": 3.1917254650416933e-06, "loss": 0.762, "step": 32295 }, { "epoch": 0.3936480079948326, "grad_norm": 2.8170843997288664, "learning_rate": 3.191404746632457e-06, "loss": 0.8609, "step": 32300 }, { "epoch": 0.3937089442189804, "grad_norm": 2.387316479598306, "learning_rate": 3.19108402822322e-06, "loss": 0.8213, "step": 32305 }, { "epoch": 0.3937698804431282, "grad_norm": 2.918863568952782, "learning_rate": 3.190763309813983e-06, "loss": 0.8414, "step": 32310 }, { "epoch": 0.39383081666727604, "grad_norm": 2.8689514123973545, "learning_rate": 3.190442591404747e-06, "loss": 0.7283, "step": 32315 }, { "epoch": 0.39389175289142386, "grad_norm": 2.530094567252706, "learning_rate": 3.19012187299551e-06, "loss": 0.7264, "step": 32320 }, { "epoch": 0.39395268911557163, "grad_norm": 2.2451655152943895, "learning_rate": 3.1898011545862735e-06, "loss": 0.708, "step": 32325 }, { "epoch": 0.39401362533971945, "grad_norm": 2.787463683998123, "learning_rate": 3.189480436177037e-06, "loss": 0.8344, "step": 32330 }, { "epoch": 0.3940745615638673, "grad_norm": 2.563852218666464, "learning_rate": 3.1891597177678004e-06, "loss": 0.785, "step": 32335 }, { "epoch": 0.39413549778801504, "grad_norm": 2.4132338367905537, "learning_rate": 3.1888389993585634e-06, "loss": 0.8008, "step": 32340 }, { "epoch": 0.39419643401216287, "grad_norm": 2.3285940304647554, "learning_rate": 3.188518280949327e-06, "loss": 0.7777, "step": 32345 }, { "epoch": 0.3942573702363107, "grad_norm": 2.1304220422047115, "learning_rate": 3.1881975625400903e-06, "loss": 0.7699, "step": 32350 }, { "epoch": 0.39431830646045846, "grad_norm": 2.0513565969695216, "learning_rate": 3.1878768441308533e-06, "loss": 0.7769, "step": 32355 }, { "epoch": 0.3943792426846063, "grad_norm": 2.139094332364149, "learning_rate": 3.1875561257216163e-06, "loss": 0.706, "step": 32360 }, { "epoch": 0.3944401789087541, "grad_norm": 2.322218163052175, "learning_rate": 3.18723540731238e-06, "loss": 0.8457, "step": 32365 }, { "epoch": 0.3945011151329019, "grad_norm": 2.6227471579201684, "learning_rate": 3.1869146889031432e-06, "loss": 0.7719, "step": 32370 }, { "epoch": 0.3945620513570497, "grad_norm": 3.2362664791042537, "learning_rate": 3.1865939704939062e-06, "loss": 0.8037, "step": 32375 }, { "epoch": 0.3946229875811975, "grad_norm": 3.636448239791723, "learning_rate": 3.18627325208467e-06, "loss": 0.8417, "step": 32380 }, { "epoch": 0.39468392380534534, "grad_norm": 2.7872280533876594, "learning_rate": 3.185952533675433e-06, "loss": 0.8156, "step": 32385 }, { "epoch": 0.3947448600294931, "grad_norm": 2.4940546349834762, "learning_rate": 3.1856318152661966e-06, "loss": 0.8123, "step": 32390 }, { "epoch": 0.39480579625364093, "grad_norm": 2.1062151707225856, "learning_rate": 3.18531109685696e-06, "loss": 0.7233, "step": 32395 }, { "epoch": 0.39486673247778875, "grad_norm": 2.4482133276718954, "learning_rate": 3.184990378447723e-06, "loss": 0.7765, "step": 32400 }, { "epoch": 0.3949276687019366, "grad_norm": 2.0848619989988357, "learning_rate": 3.1846696600384865e-06, "loss": 0.7789, "step": 32405 }, { "epoch": 0.39498860492608434, "grad_norm": 2.6270471054332005, "learning_rate": 3.18434894162925e-06, "loss": 0.7398, "step": 32410 }, { "epoch": 0.39504954115023216, "grad_norm": 2.565914166213319, "learning_rate": 3.1840282232200133e-06, "loss": 0.77, "step": 32415 }, { "epoch": 0.39511047737438, "grad_norm": 2.3144955542455388, "learning_rate": 3.1837075048107764e-06, "loss": 0.7381, "step": 32420 }, { "epoch": 0.39517141359852775, "grad_norm": 2.277151032878388, "learning_rate": 3.18338678640154e-06, "loss": 0.7114, "step": 32425 }, { "epoch": 0.3952323498226756, "grad_norm": 1.92226529419293, "learning_rate": 3.1830660679923032e-06, "loss": 0.667, "step": 32430 }, { "epoch": 0.3952932860468234, "grad_norm": 2.8788162866683, "learning_rate": 3.1827453495830663e-06, "loss": 0.7314, "step": 32435 }, { "epoch": 0.3953542222709712, "grad_norm": 2.1194035062268735, "learning_rate": 3.1824246311738293e-06, "loss": 0.7852, "step": 32440 }, { "epoch": 0.395415158495119, "grad_norm": 2.938656435899418, "learning_rate": 3.182103912764593e-06, "loss": 0.868, "step": 32445 }, { "epoch": 0.3954760947192668, "grad_norm": 2.5459376095084862, "learning_rate": 3.181783194355356e-06, "loss": 0.7478, "step": 32450 }, { "epoch": 0.39553703094341464, "grad_norm": 1.9953862686486334, "learning_rate": 3.181462475946119e-06, "loss": 0.7202, "step": 32455 }, { "epoch": 0.3955979671675624, "grad_norm": 2.56665452199474, "learning_rate": 3.181141757536883e-06, "loss": 0.8007, "step": 32460 }, { "epoch": 0.3956589033917102, "grad_norm": 2.703175237622799, "learning_rate": 3.180821039127646e-06, "loss": 0.7221, "step": 32465 }, { "epoch": 0.39571983961585805, "grad_norm": 2.776058958216361, "learning_rate": 3.1805003207184095e-06, "loss": 0.803, "step": 32470 }, { "epoch": 0.39578077584000587, "grad_norm": 2.3757862580613494, "learning_rate": 3.180179602309173e-06, "loss": 0.6822, "step": 32475 }, { "epoch": 0.39584171206415364, "grad_norm": 3.3119221501333236, "learning_rate": 3.179858883899936e-06, "loss": 0.7881, "step": 32480 }, { "epoch": 0.39590264828830146, "grad_norm": 2.7795527062376393, "learning_rate": 3.1795381654906994e-06, "loss": 0.847, "step": 32485 }, { "epoch": 0.3959635845124493, "grad_norm": 2.648537825995278, "learning_rate": 3.179217447081463e-06, "loss": 0.7674, "step": 32490 }, { "epoch": 0.39602452073659705, "grad_norm": 2.417811717499736, "learning_rate": 3.1788967286722263e-06, "loss": 0.7378, "step": 32495 }, { "epoch": 0.3960854569607449, "grad_norm": 2.3512082506649796, "learning_rate": 3.1785760102629893e-06, "loss": 0.7809, "step": 32500 }, { "epoch": 0.3961463931848927, "grad_norm": 2.8576757941943685, "learning_rate": 3.1782552918537527e-06, "loss": 0.6695, "step": 32505 }, { "epoch": 0.3962073294090405, "grad_norm": 3.3379783779687475, "learning_rate": 3.177934573444516e-06, "loss": 0.756, "step": 32510 }, { "epoch": 0.3962682656331883, "grad_norm": 2.4491949294076343, "learning_rate": 3.177613855035279e-06, "loss": 0.7031, "step": 32515 }, { "epoch": 0.3963292018573361, "grad_norm": 2.173233786773461, "learning_rate": 3.177293136626042e-06, "loss": 0.8084, "step": 32520 }, { "epoch": 0.39639013808148393, "grad_norm": 2.5817593204629317, "learning_rate": 3.176972418216806e-06, "loss": 0.7894, "step": 32525 }, { "epoch": 0.3964510743056317, "grad_norm": 2.3648841869419104, "learning_rate": 3.176651699807569e-06, "loss": 0.7387, "step": 32530 }, { "epoch": 0.3965120105297795, "grad_norm": 2.6350212113146165, "learning_rate": 3.176330981398332e-06, "loss": 0.8021, "step": 32535 }, { "epoch": 0.39657294675392735, "grad_norm": 2.8776161614848013, "learning_rate": 3.176010262989096e-06, "loss": 0.8136, "step": 32540 }, { "epoch": 0.39663388297807517, "grad_norm": 2.986256185525589, "learning_rate": 3.175689544579859e-06, "loss": 0.8006, "step": 32545 }, { "epoch": 0.39669481920222294, "grad_norm": 3.7053482205391948, "learning_rate": 3.1753688261706224e-06, "loss": 0.7767, "step": 32550 }, { "epoch": 0.39675575542637076, "grad_norm": 2.4982151924057656, "learning_rate": 3.175048107761386e-06, "loss": 0.8237, "step": 32555 }, { "epoch": 0.3968166916505186, "grad_norm": 3.063206456809371, "learning_rate": 3.174727389352149e-06, "loss": 0.7776, "step": 32560 }, { "epoch": 0.39687762787466635, "grad_norm": 2.434697330080178, "learning_rate": 3.1744066709429123e-06, "loss": 0.7899, "step": 32565 }, { "epoch": 0.3969385640988142, "grad_norm": 2.5080097635714154, "learning_rate": 3.1740859525336758e-06, "loss": 0.7727, "step": 32570 }, { "epoch": 0.396999500322962, "grad_norm": 2.3360381732341957, "learning_rate": 3.173765234124439e-06, "loss": 0.8312, "step": 32575 }, { "epoch": 0.3970604365471098, "grad_norm": 3.0978267453462434, "learning_rate": 3.173444515715202e-06, "loss": 0.8929, "step": 32580 }, { "epoch": 0.3971213727712576, "grad_norm": 1.8880129742144163, "learning_rate": 3.173123797305966e-06, "loss": 0.7052, "step": 32585 }, { "epoch": 0.3971823089954054, "grad_norm": 2.2894792336686995, "learning_rate": 3.172803078896729e-06, "loss": 0.7553, "step": 32590 }, { "epoch": 0.39724324521955323, "grad_norm": 3.10352615214982, "learning_rate": 3.172482360487492e-06, "loss": 0.8129, "step": 32595 }, { "epoch": 0.397304181443701, "grad_norm": 2.9400600254379334, "learning_rate": 3.172161642078255e-06, "loss": 0.7439, "step": 32600 }, { "epoch": 0.3973651176678488, "grad_norm": 2.523425700794926, "learning_rate": 3.171840923669019e-06, "loss": 0.7508, "step": 32605 }, { "epoch": 0.39742605389199664, "grad_norm": 2.776179991757032, "learning_rate": 3.171520205259782e-06, "loss": 0.8126, "step": 32610 }, { "epoch": 0.39748699011614447, "grad_norm": 2.408918467016552, "learning_rate": 3.1711994868505454e-06, "loss": 0.7307, "step": 32615 }, { "epoch": 0.39754792634029223, "grad_norm": 2.4295630274331375, "learning_rate": 3.170878768441309e-06, "loss": 0.8307, "step": 32620 }, { "epoch": 0.39760886256444006, "grad_norm": 2.4244205661891347, "learning_rate": 3.170558050032072e-06, "loss": 0.746, "step": 32625 }, { "epoch": 0.3976697987885879, "grad_norm": 2.7858166371767727, "learning_rate": 3.1702373316228353e-06, "loss": 0.7864, "step": 32630 }, { "epoch": 0.39773073501273565, "grad_norm": 2.431666061025649, "learning_rate": 3.1699166132135988e-06, "loss": 0.7498, "step": 32635 }, { "epoch": 0.39779167123688347, "grad_norm": 2.434869766652388, "learning_rate": 3.1695958948043622e-06, "loss": 0.7075, "step": 32640 }, { "epoch": 0.3978526074610313, "grad_norm": 2.65531436124724, "learning_rate": 3.1692751763951252e-06, "loss": 0.7292, "step": 32645 }, { "epoch": 0.3979135436851791, "grad_norm": 2.7422980393269905, "learning_rate": 3.1689544579858887e-06, "loss": 0.7183, "step": 32650 }, { "epoch": 0.3979744799093269, "grad_norm": 2.4557832759800666, "learning_rate": 3.168633739576652e-06, "loss": 0.8226, "step": 32655 }, { "epoch": 0.3980354161334747, "grad_norm": 3.0159905551699047, "learning_rate": 3.168313021167415e-06, "loss": 0.8498, "step": 32660 }, { "epoch": 0.39809635235762253, "grad_norm": 2.5041801793252376, "learning_rate": 3.167992302758179e-06, "loss": 0.7327, "step": 32665 }, { "epoch": 0.3981572885817703, "grad_norm": 2.3464291625554705, "learning_rate": 3.167671584348942e-06, "loss": 0.7393, "step": 32670 }, { "epoch": 0.3982182248059181, "grad_norm": 1.9842239178001768, "learning_rate": 3.167350865939705e-06, "loss": 0.6545, "step": 32675 }, { "epoch": 0.39827916103006594, "grad_norm": 2.376560546057932, "learning_rate": 3.167030147530469e-06, "loss": 0.7794, "step": 32680 }, { "epoch": 0.39834009725421377, "grad_norm": 2.12924323146614, "learning_rate": 3.166709429121232e-06, "loss": 0.765, "step": 32685 }, { "epoch": 0.39840103347836153, "grad_norm": 2.2393359241117214, "learning_rate": 3.166388710711995e-06, "loss": 0.7549, "step": 32690 }, { "epoch": 0.39846196970250936, "grad_norm": 2.271754045690432, "learning_rate": 3.1660679923027584e-06, "loss": 0.8117, "step": 32695 }, { "epoch": 0.3985229059266572, "grad_norm": 2.8308804819720184, "learning_rate": 3.165747273893522e-06, "loss": 0.8147, "step": 32700 }, { "epoch": 0.39858384215080495, "grad_norm": 2.8145994584699365, "learning_rate": 3.165426555484285e-06, "loss": 0.8911, "step": 32705 }, { "epoch": 0.39864477837495277, "grad_norm": 3.803795288011166, "learning_rate": 3.1651058370750483e-06, "loss": 0.7321, "step": 32710 }, { "epoch": 0.3987057145991006, "grad_norm": 2.538983724449881, "learning_rate": 3.1647851186658117e-06, "loss": 0.7707, "step": 32715 }, { "epoch": 0.3987666508232484, "grad_norm": 2.4157054032838556, "learning_rate": 3.164464400256575e-06, "loss": 0.7281, "step": 32720 }, { "epoch": 0.3988275870473962, "grad_norm": 3.388165676042527, "learning_rate": 3.164143681847338e-06, "loss": 0.7499, "step": 32725 }, { "epoch": 0.398888523271544, "grad_norm": 2.5520315739503054, "learning_rate": 3.1638229634381016e-06, "loss": 0.8031, "step": 32730 }, { "epoch": 0.3989494594956918, "grad_norm": 3.5955466812835963, "learning_rate": 3.163502245028865e-06, "loss": 0.7839, "step": 32735 }, { "epoch": 0.3990103957198396, "grad_norm": 2.294235109212678, "learning_rate": 3.163181526619628e-06, "loss": 0.7015, "step": 32740 }, { "epoch": 0.3990713319439874, "grad_norm": 2.091754608381987, "learning_rate": 3.162860808210392e-06, "loss": 0.7425, "step": 32745 }, { "epoch": 0.39913226816813524, "grad_norm": 2.2931509140968114, "learning_rate": 3.162540089801155e-06, "loss": 0.8085, "step": 32750 }, { "epoch": 0.39919320439228306, "grad_norm": 2.0991207076410743, "learning_rate": 3.162219371391918e-06, "loss": 0.753, "step": 32755 }, { "epoch": 0.39925414061643083, "grad_norm": 2.2175179615009553, "learning_rate": 3.161898652982682e-06, "loss": 0.6648, "step": 32760 }, { "epoch": 0.39931507684057865, "grad_norm": 2.629907114945057, "learning_rate": 3.161577934573445e-06, "loss": 0.7677, "step": 32765 }, { "epoch": 0.3993760130647265, "grad_norm": 2.4932624789723055, "learning_rate": 3.161257216164208e-06, "loss": 0.7485, "step": 32770 }, { "epoch": 0.39943694928887424, "grad_norm": 2.2849242089540405, "learning_rate": 3.1609364977549713e-06, "loss": 0.7725, "step": 32775 }, { "epoch": 0.39949788551302207, "grad_norm": 1.8064334167770792, "learning_rate": 3.1606157793457347e-06, "loss": 0.7495, "step": 32780 }, { "epoch": 0.3995588217371699, "grad_norm": 2.5625028899770577, "learning_rate": 3.1602950609364978e-06, "loss": 0.7457, "step": 32785 }, { "epoch": 0.3996197579613177, "grad_norm": 2.48078137401223, "learning_rate": 3.159974342527261e-06, "loss": 0.7535, "step": 32790 }, { "epoch": 0.3996806941854655, "grad_norm": 2.681905103222662, "learning_rate": 3.1596536241180246e-06, "loss": 0.7861, "step": 32795 }, { "epoch": 0.3997416304096133, "grad_norm": 2.1520898306248974, "learning_rate": 3.159332905708788e-06, "loss": 0.7931, "step": 32800 }, { "epoch": 0.3998025666337611, "grad_norm": 2.364221818016, "learning_rate": 3.159012187299551e-06, "loss": 0.8005, "step": 32805 }, { "epoch": 0.3998635028579089, "grad_norm": 2.328468986394393, "learning_rate": 3.158691468890315e-06, "loss": 0.7403, "step": 32810 }, { "epoch": 0.3999244390820567, "grad_norm": 2.5666346041052046, "learning_rate": 3.158370750481078e-06, "loss": 0.8396, "step": 32815 }, { "epoch": 0.39998537530620454, "grad_norm": 2.700366943939767, "learning_rate": 3.158050032071841e-06, "loss": 0.6837, "step": 32820 }, { "epoch": 0.40004631153035236, "grad_norm": 2.3742165422180217, "learning_rate": 3.157729313662605e-06, "loss": 0.7411, "step": 32825 }, { "epoch": 0.40010724775450013, "grad_norm": 2.0947665757581078, "learning_rate": 3.157408595253368e-06, "loss": 0.7713, "step": 32830 }, { "epoch": 0.40016818397864795, "grad_norm": 2.1296232524398193, "learning_rate": 3.157087876844131e-06, "loss": 0.7634, "step": 32835 }, { "epoch": 0.4002291202027958, "grad_norm": 1.8123511410745943, "learning_rate": 3.1567671584348948e-06, "loss": 0.7404, "step": 32840 }, { "epoch": 0.40029005642694354, "grad_norm": 2.3559265751973335, "learning_rate": 3.1564464400256578e-06, "loss": 0.9176, "step": 32845 }, { "epoch": 0.40035099265109136, "grad_norm": 2.253582846367318, "learning_rate": 3.1561257216164208e-06, "loss": 0.7628, "step": 32850 }, { "epoch": 0.4004119288752392, "grad_norm": 2.1007629524749447, "learning_rate": 3.1558050032071842e-06, "loss": 0.7954, "step": 32855 }, { "epoch": 0.40047286509938695, "grad_norm": 3.2259666429182383, "learning_rate": 3.1554842847979477e-06, "loss": 0.8164, "step": 32860 }, { "epoch": 0.4005338013235348, "grad_norm": 2.84874209137577, "learning_rate": 3.155163566388711e-06, "loss": 0.744, "step": 32865 }, { "epoch": 0.4005947375476826, "grad_norm": 2.5946412075801355, "learning_rate": 3.154842847979474e-06, "loss": 0.7341, "step": 32870 }, { "epoch": 0.4006556737718304, "grad_norm": 3.65525995485513, "learning_rate": 3.1545221295702376e-06, "loss": 0.8078, "step": 32875 }, { "epoch": 0.4007166099959782, "grad_norm": 2.4985964348620153, "learning_rate": 3.154201411161001e-06, "loss": 0.7731, "step": 32880 }, { "epoch": 0.400777546220126, "grad_norm": 2.452190252203773, "learning_rate": 3.153880692751764e-06, "loss": 0.6973, "step": 32885 }, { "epoch": 0.40083848244427384, "grad_norm": 2.4014361917943914, "learning_rate": 3.153559974342528e-06, "loss": 0.7286, "step": 32890 }, { "epoch": 0.4008994186684216, "grad_norm": 2.5185511858883753, "learning_rate": 3.153239255933291e-06, "loss": 0.7388, "step": 32895 }, { "epoch": 0.4009603548925694, "grad_norm": 2.134470924139715, "learning_rate": 3.152918537524054e-06, "loss": 0.7701, "step": 32900 }, { "epoch": 0.40102129111671725, "grad_norm": 2.4789176824362578, "learning_rate": 3.1525978191148178e-06, "loss": 0.7595, "step": 32905 }, { "epoch": 0.40108222734086507, "grad_norm": 3.289082211344885, "learning_rate": 3.152277100705581e-06, "loss": 0.7978, "step": 32910 }, { "epoch": 0.40114316356501284, "grad_norm": 1.8443859445950477, "learning_rate": 3.151956382296344e-06, "loss": 0.6546, "step": 32915 }, { "epoch": 0.40120409978916066, "grad_norm": 2.6725822989088646, "learning_rate": 3.1516356638871077e-06, "loss": 0.8208, "step": 32920 }, { "epoch": 0.4012650360133085, "grad_norm": 2.376669916199215, "learning_rate": 3.1513149454778707e-06, "loss": 0.7659, "step": 32925 }, { "epoch": 0.40132597223745625, "grad_norm": 2.6144187870182636, "learning_rate": 3.1509942270686337e-06, "loss": 0.8172, "step": 32930 }, { "epoch": 0.4013869084616041, "grad_norm": 2.5455067351214598, "learning_rate": 3.1506735086593976e-06, "loss": 0.8185, "step": 32935 }, { "epoch": 0.4014478446857519, "grad_norm": 2.694006772629, "learning_rate": 3.1503527902501606e-06, "loss": 0.7807, "step": 32940 }, { "epoch": 0.4015087809098997, "grad_norm": 2.260719305348297, "learning_rate": 3.150032071840924e-06, "loss": 0.7172, "step": 32945 }, { "epoch": 0.4015697171340475, "grad_norm": 2.6847168410710416, "learning_rate": 3.149711353431687e-06, "loss": 0.7423, "step": 32950 }, { "epoch": 0.4016306533581953, "grad_norm": 2.666711613399151, "learning_rate": 3.1493906350224505e-06, "loss": 0.7156, "step": 32955 }, { "epoch": 0.40169158958234313, "grad_norm": 3.3360957149942356, "learning_rate": 3.149069916613214e-06, "loss": 0.7604, "step": 32960 }, { "epoch": 0.4017525258064909, "grad_norm": 2.592334293433246, "learning_rate": 3.148749198203977e-06, "loss": 0.8327, "step": 32965 }, { "epoch": 0.4018134620306387, "grad_norm": 2.9704677271327853, "learning_rate": 3.148428479794741e-06, "loss": 0.7862, "step": 32970 }, { "epoch": 0.40187439825478655, "grad_norm": 2.2106917255833034, "learning_rate": 3.148107761385504e-06, "loss": 0.7513, "step": 32975 }, { "epoch": 0.40193533447893437, "grad_norm": 3.0651951098255323, "learning_rate": 3.147787042976267e-06, "loss": 0.7164, "step": 32980 }, { "epoch": 0.40199627070308214, "grad_norm": 2.4039012797847046, "learning_rate": 3.1474663245670307e-06, "loss": 0.7587, "step": 32985 }, { "epoch": 0.40205720692722996, "grad_norm": 2.4088939998942682, "learning_rate": 3.1471456061577937e-06, "loss": 0.8253, "step": 32990 }, { "epoch": 0.4021181431513778, "grad_norm": 2.657822968359403, "learning_rate": 3.1468248877485567e-06, "loss": 0.7599, "step": 32995 }, { "epoch": 0.40217907937552555, "grad_norm": 2.2777451425425603, "learning_rate": 3.1465041693393206e-06, "loss": 0.7444, "step": 33000 }, { "epoch": 0.4022400155996734, "grad_norm": 2.4708691452541416, "learning_rate": 3.1461834509300836e-06, "loss": 0.6904, "step": 33005 }, { "epoch": 0.4023009518238212, "grad_norm": 3.384472830508871, "learning_rate": 3.1458627325208466e-06, "loss": 0.8543, "step": 33010 }, { "epoch": 0.402361888047969, "grad_norm": 2.136003446359692, "learning_rate": 3.1455420141116105e-06, "loss": 0.7966, "step": 33015 }, { "epoch": 0.4024228242721168, "grad_norm": 2.1607102685980135, "learning_rate": 3.1452212957023735e-06, "loss": 0.7475, "step": 33020 }, { "epoch": 0.4024837604962646, "grad_norm": 2.1281406114082584, "learning_rate": 3.144900577293137e-06, "loss": 0.7558, "step": 33025 }, { "epoch": 0.40254469672041243, "grad_norm": 2.119115560802366, "learning_rate": 3.1445798588839e-06, "loss": 0.8006, "step": 33030 }, { "epoch": 0.4026056329445602, "grad_norm": 2.0184895887657537, "learning_rate": 3.144259140474664e-06, "loss": 0.7351, "step": 33035 }, { "epoch": 0.402666569168708, "grad_norm": 2.752889477890536, "learning_rate": 3.143938422065427e-06, "loss": 0.7788, "step": 33040 }, { "epoch": 0.40272750539285584, "grad_norm": 2.801523781553491, "learning_rate": 3.14361770365619e-06, "loss": 0.8071, "step": 33045 }, { "epoch": 0.40278844161700367, "grad_norm": 2.212037999825284, "learning_rate": 3.1432969852469537e-06, "loss": 0.677, "step": 33050 }, { "epoch": 0.40284937784115143, "grad_norm": 2.6194948987252036, "learning_rate": 3.1429762668377168e-06, "loss": 0.714, "step": 33055 }, { "epoch": 0.40291031406529926, "grad_norm": 3.4778584540038744, "learning_rate": 3.1426555484284798e-06, "loss": 0.8035, "step": 33060 }, { "epoch": 0.4029712502894471, "grad_norm": 2.238607034400295, "learning_rate": 3.1423348300192436e-06, "loss": 0.7291, "step": 33065 }, { "epoch": 0.40303218651359485, "grad_norm": 2.150892806296599, "learning_rate": 3.1420141116100067e-06, "loss": 0.749, "step": 33070 }, { "epoch": 0.40309312273774267, "grad_norm": 2.2392799570996007, "learning_rate": 3.1416933932007697e-06, "loss": 0.7723, "step": 33075 }, { "epoch": 0.4031540589618905, "grad_norm": 2.646272386892346, "learning_rate": 3.1413726747915335e-06, "loss": 0.7377, "step": 33080 }, { "epoch": 0.4032149951860383, "grad_norm": 2.2558896730354094, "learning_rate": 3.1410519563822965e-06, "loss": 0.7095, "step": 33085 }, { "epoch": 0.4032759314101861, "grad_norm": 3.1872671646613204, "learning_rate": 3.14073123797306e-06, "loss": 0.7804, "step": 33090 }, { "epoch": 0.4033368676343339, "grad_norm": 2.5779815511656294, "learning_rate": 3.1404105195638234e-06, "loss": 0.795, "step": 33095 }, { "epoch": 0.40339780385848173, "grad_norm": 2.1386007406563214, "learning_rate": 3.1400898011545864e-06, "loss": 0.7852, "step": 33100 }, { "epoch": 0.4034587400826295, "grad_norm": 3.017445385829747, "learning_rate": 3.13976908274535e-06, "loss": 0.8065, "step": 33105 }, { "epoch": 0.4035196763067773, "grad_norm": 2.4843251306549914, "learning_rate": 3.139448364336113e-06, "loss": 0.7531, "step": 33110 }, { "epoch": 0.40358061253092514, "grad_norm": 2.7190765980335145, "learning_rate": 3.1391276459268768e-06, "loss": 0.7911, "step": 33115 }, { "epoch": 0.40364154875507297, "grad_norm": 2.3010315022913588, "learning_rate": 3.1388069275176398e-06, "loss": 0.7225, "step": 33120 }, { "epoch": 0.40370248497922073, "grad_norm": 2.1213469882961156, "learning_rate": 3.138486209108403e-06, "loss": 0.7756, "step": 33125 }, { "epoch": 0.40376342120336856, "grad_norm": 2.7293010152960866, "learning_rate": 3.1381654906991667e-06, "loss": 0.7192, "step": 33130 }, { "epoch": 0.4038243574275164, "grad_norm": 2.3766635896269417, "learning_rate": 3.1378447722899297e-06, "loss": 0.7363, "step": 33135 }, { "epoch": 0.40388529365166415, "grad_norm": 2.1890588623088845, "learning_rate": 3.1375240538806927e-06, "loss": 0.7605, "step": 33140 }, { "epoch": 0.40394622987581197, "grad_norm": 2.3358416599975333, "learning_rate": 3.1372033354714566e-06, "loss": 0.7703, "step": 33145 }, { "epoch": 0.4040071660999598, "grad_norm": 2.3378974310061476, "learning_rate": 3.1368826170622196e-06, "loss": 0.7624, "step": 33150 }, { "epoch": 0.4040681023241076, "grad_norm": 3.127948402894802, "learning_rate": 3.1365618986529826e-06, "loss": 0.7825, "step": 33155 }, { "epoch": 0.4041290385482554, "grad_norm": 2.2797182993907668, "learning_rate": 3.1362411802437465e-06, "loss": 0.7353, "step": 33160 }, { "epoch": 0.4041899747724032, "grad_norm": 2.306230575405437, "learning_rate": 3.1359204618345095e-06, "loss": 0.7832, "step": 33165 }, { "epoch": 0.404250910996551, "grad_norm": 2.966774227108823, "learning_rate": 3.135599743425273e-06, "loss": 0.7455, "step": 33170 }, { "epoch": 0.4043118472206988, "grad_norm": 2.255461960732619, "learning_rate": 3.1352790250160364e-06, "loss": 0.7138, "step": 33175 }, { "epoch": 0.4043727834448466, "grad_norm": 3.633615752351001, "learning_rate": 3.1349583066067994e-06, "loss": 0.739, "step": 33180 }, { "epoch": 0.40443371966899444, "grad_norm": 3.187415373341162, "learning_rate": 3.134637588197563e-06, "loss": 0.7467, "step": 33185 }, { "epoch": 0.40449465589314226, "grad_norm": 2.4882263190027425, "learning_rate": 3.134316869788326e-06, "loss": 0.8259, "step": 33190 }, { "epoch": 0.40455559211729003, "grad_norm": 2.6359531552903226, "learning_rate": 3.1339961513790897e-06, "loss": 0.7517, "step": 33195 }, { "epoch": 0.40461652834143785, "grad_norm": 2.488983879816214, "learning_rate": 3.1336754329698527e-06, "loss": 0.7648, "step": 33200 }, { "epoch": 0.4046774645655857, "grad_norm": 2.4562912829132637, "learning_rate": 3.1333547145606157e-06, "loss": 0.7365, "step": 33205 }, { "epoch": 0.40473840078973344, "grad_norm": 2.7842169849398357, "learning_rate": 3.1330339961513796e-06, "loss": 0.7746, "step": 33210 }, { "epoch": 0.40479933701388127, "grad_norm": 2.543686379419221, "learning_rate": 3.1327132777421426e-06, "loss": 0.7527, "step": 33215 }, { "epoch": 0.4048602732380291, "grad_norm": 2.665717501551375, "learning_rate": 3.1323925593329056e-06, "loss": 0.7835, "step": 33220 }, { "epoch": 0.4049212094621769, "grad_norm": 2.26404213162618, "learning_rate": 3.1320718409236695e-06, "loss": 0.8177, "step": 33225 }, { "epoch": 0.4049821456863247, "grad_norm": 2.4614242725496887, "learning_rate": 3.1317511225144325e-06, "loss": 0.8068, "step": 33230 }, { "epoch": 0.4050430819104725, "grad_norm": 2.399622302474558, "learning_rate": 3.1314304041051955e-06, "loss": 0.7637, "step": 33235 }, { "epoch": 0.4051040181346203, "grad_norm": 2.5445096647443064, "learning_rate": 3.1311096856959594e-06, "loss": 0.7209, "step": 33240 }, { "epoch": 0.4051649543587681, "grad_norm": 2.16434623535089, "learning_rate": 3.1307889672867224e-06, "loss": 0.7184, "step": 33245 }, { "epoch": 0.4052258905829159, "grad_norm": 3.086312301027301, "learning_rate": 3.130468248877486e-06, "loss": 0.7804, "step": 33250 }, { "epoch": 0.40528682680706374, "grad_norm": 2.064449291548325, "learning_rate": 3.1301475304682493e-06, "loss": 0.7329, "step": 33255 }, { "epoch": 0.40534776303121156, "grad_norm": 3.5806785783029924, "learning_rate": 3.1298268120590123e-06, "loss": 0.7449, "step": 33260 }, { "epoch": 0.40540869925535933, "grad_norm": 2.302437299261815, "learning_rate": 3.1295060936497757e-06, "loss": 0.7155, "step": 33265 }, { "epoch": 0.40546963547950715, "grad_norm": 2.1611226313393783, "learning_rate": 3.129185375240539e-06, "loss": 0.7256, "step": 33270 }, { "epoch": 0.405530571703655, "grad_norm": 2.3390448575581733, "learning_rate": 3.1288646568313026e-06, "loss": 0.7194, "step": 33275 }, { "epoch": 0.40559150792780274, "grad_norm": 2.1373331300511897, "learning_rate": 3.1285439384220656e-06, "loss": 0.783, "step": 33280 }, { "epoch": 0.40565244415195056, "grad_norm": 2.2067368731140324, "learning_rate": 3.1282232200128287e-06, "loss": 0.8505, "step": 33285 }, { "epoch": 0.4057133803760984, "grad_norm": 2.6077194687926983, "learning_rate": 3.1279025016035925e-06, "loss": 0.7439, "step": 33290 }, { "epoch": 0.4057743166002462, "grad_norm": 2.2766311105868184, "learning_rate": 3.1275817831943555e-06, "loss": 0.7725, "step": 33295 }, { "epoch": 0.405835252824394, "grad_norm": 2.295941851157276, "learning_rate": 3.1272610647851186e-06, "loss": 0.7501, "step": 33300 }, { "epoch": 0.4058961890485418, "grad_norm": 2.3833525948074636, "learning_rate": 3.1269403463758824e-06, "loss": 0.7383, "step": 33305 }, { "epoch": 0.4059571252726896, "grad_norm": 2.5700926039913043, "learning_rate": 3.1266196279666454e-06, "loss": 0.7858, "step": 33310 }, { "epoch": 0.4060180614968374, "grad_norm": 5.645692765397177, "learning_rate": 3.126298909557409e-06, "loss": 0.6829, "step": 33315 }, { "epoch": 0.4060789977209852, "grad_norm": 2.1513429570147697, "learning_rate": 3.1259781911481723e-06, "loss": 0.7702, "step": 33320 }, { "epoch": 0.40613993394513304, "grad_norm": 2.9063221231075627, "learning_rate": 3.1256574727389353e-06, "loss": 0.7691, "step": 33325 }, { "epoch": 0.4062008701692808, "grad_norm": 2.941319005077459, "learning_rate": 3.1253367543296988e-06, "loss": 0.7199, "step": 33330 }, { "epoch": 0.4062618063934286, "grad_norm": 3.15034261781125, "learning_rate": 3.125016035920462e-06, "loss": 0.7384, "step": 33335 }, { "epoch": 0.40632274261757645, "grad_norm": 2.0870646957123444, "learning_rate": 3.1246953175112257e-06, "loss": 0.7176, "step": 33340 }, { "epoch": 0.40638367884172427, "grad_norm": 2.3931621892095816, "learning_rate": 3.1243745991019887e-06, "loss": 0.7865, "step": 33345 }, { "epoch": 0.40644461506587204, "grad_norm": 2.3692903525318054, "learning_rate": 3.124053880692752e-06, "loss": 0.7768, "step": 33350 }, { "epoch": 0.40650555129001986, "grad_norm": 4.793558547871787, "learning_rate": 3.1237331622835155e-06, "loss": 0.8102, "step": 33355 }, { "epoch": 0.4065664875141677, "grad_norm": 2.631593481516612, "learning_rate": 3.1234124438742786e-06, "loss": 0.7523, "step": 33360 }, { "epoch": 0.40662742373831545, "grad_norm": 2.2612076889011408, "learning_rate": 3.1230917254650416e-06, "loss": 0.8093, "step": 33365 }, { "epoch": 0.4066883599624633, "grad_norm": 2.2638196912902293, "learning_rate": 3.1227710070558054e-06, "loss": 0.7683, "step": 33370 }, { "epoch": 0.4067492961866111, "grad_norm": 2.1465838373816792, "learning_rate": 3.1224502886465685e-06, "loss": 0.7649, "step": 33375 }, { "epoch": 0.4068102324107589, "grad_norm": 2.2181322246242297, "learning_rate": 3.1221295702373315e-06, "loss": 0.7579, "step": 33380 }, { "epoch": 0.4068711686349067, "grad_norm": 2.527003449635758, "learning_rate": 3.1218088518280953e-06, "loss": 0.7813, "step": 33385 }, { "epoch": 0.4069321048590545, "grad_norm": 2.762864885856307, "learning_rate": 3.1214881334188584e-06, "loss": 0.8104, "step": 33390 }, { "epoch": 0.40699304108320233, "grad_norm": 4.9268309532708185, "learning_rate": 3.121167415009622e-06, "loss": 0.7145, "step": 33395 }, { "epoch": 0.4070539773073501, "grad_norm": 2.857181170533033, "learning_rate": 3.1208466966003852e-06, "loss": 0.7539, "step": 33400 }, { "epoch": 0.4071149135314979, "grad_norm": 2.319324785346739, "learning_rate": 3.1205259781911483e-06, "loss": 0.7411, "step": 33405 }, { "epoch": 0.40717584975564575, "grad_norm": 2.7893713639309072, "learning_rate": 3.1202052597819117e-06, "loss": 0.7596, "step": 33410 }, { "epoch": 0.40723678597979357, "grad_norm": 2.3667407100031768, "learning_rate": 3.119884541372675e-06, "loss": 0.7408, "step": 33415 }, { "epoch": 0.40729772220394134, "grad_norm": 2.732917594383519, "learning_rate": 3.1195638229634386e-06, "loss": 0.791, "step": 33420 }, { "epoch": 0.40735865842808916, "grad_norm": 2.758639436035463, "learning_rate": 3.1192431045542016e-06, "loss": 0.817, "step": 33425 }, { "epoch": 0.407419594652237, "grad_norm": 2.596713234197452, "learning_rate": 3.118922386144965e-06, "loss": 0.8032, "step": 33430 }, { "epoch": 0.40748053087638475, "grad_norm": 2.3530328891640004, "learning_rate": 3.1186016677357285e-06, "loss": 0.7847, "step": 33435 }, { "epoch": 0.4075414671005326, "grad_norm": 2.5044562601076423, "learning_rate": 3.1182809493264915e-06, "loss": 0.8482, "step": 33440 }, { "epoch": 0.4076024033246804, "grad_norm": 2.4847286588403836, "learning_rate": 3.1179602309172545e-06, "loss": 0.7318, "step": 33445 }, { "epoch": 0.4076633395488282, "grad_norm": 3.4436313941170225, "learning_rate": 3.1176395125080184e-06, "loss": 0.819, "step": 33450 }, { "epoch": 0.407724275772976, "grad_norm": 2.3214712749081268, "learning_rate": 3.1173187940987814e-06, "loss": 0.7745, "step": 33455 }, { "epoch": 0.4077852119971238, "grad_norm": 2.7968055578415036, "learning_rate": 3.1169980756895444e-06, "loss": 0.8002, "step": 33460 }, { "epoch": 0.40784614822127163, "grad_norm": 1.8940911013459312, "learning_rate": 3.1166773572803083e-06, "loss": 0.7848, "step": 33465 }, { "epoch": 0.4079070844454194, "grad_norm": 2.1156816599918, "learning_rate": 3.1163566388710713e-06, "loss": 0.7401, "step": 33470 }, { "epoch": 0.4079680206695672, "grad_norm": 2.2644948214942886, "learning_rate": 3.1160359204618347e-06, "loss": 0.7631, "step": 33475 }, { "epoch": 0.40802895689371504, "grad_norm": 2.838741198007673, "learning_rate": 3.115715202052598e-06, "loss": 0.7576, "step": 33480 }, { "epoch": 0.40808989311786287, "grad_norm": 2.6054034216559057, "learning_rate": 3.115394483643361e-06, "loss": 0.809, "step": 33485 }, { "epoch": 0.40815082934201063, "grad_norm": 2.2580625672205077, "learning_rate": 3.1150737652341246e-06, "loss": 0.7892, "step": 33490 }, { "epoch": 0.40821176556615846, "grad_norm": 2.768699223247629, "learning_rate": 3.114753046824888e-06, "loss": 0.8067, "step": 33495 }, { "epoch": 0.4082727017903063, "grad_norm": 2.2950602900999653, "learning_rate": 3.1144323284156515e-06, "loss": 0.8062, "step": 33500 }, { "epoch": 0.40833363801445405, "grad_norm": 2.97956061097836, "learning_rate": 3.1141116100064145e-06, "loss": 0.7419, "step": 33505 }, { "epoch": 0.40839457423860187, "grad_norm": 2.5064404212500015, "learning_rate": 3.1137908915971784e-06, "loss": 0.7791, "step": 33510 }, { "epoch": 0.4084555104627497, "grad_norm": 3.3138371001407445, "learning_rate": 3.1134701731879414e-06, "loss": 0.7491, "step": 33515 }, { "epoch": 0.4085164466868975, "grad_norm": 2.1181191572743656, "learning_rate": 3.1131494547787044e-06, "loss": 0.7567, "step": 33520 }, { "epoch": 0.4085773829110453, "grad_norm": 3.9416625022170875, "learning_rate": 3.1128287363694674e-06, "loss": 0.7521, "step": 33525 }, { "epoch": 0.4086383191351931, "grad_norm": 2.2501602809013526, "learning_rate": 3.1125080179602313e-06, "loss": 0.7267, "step": 33530 }, { "epoch": 0.40869925535934093, "grad_norm": 2.1612525685577957, "learning_rate": 3.1121872995509943e-06, "loss": 0.7809, "step": 33535 }, { "epoch": 0.4087601915834887, "grad_norm": 2.228550354285922, "learning_rate": 3.1118665811417578e-06, "loss": 0.7957, "step": 33540 }, { "epoch": 0.4088211278076365, "grad_norm": 2.7667471631006557, "learning_rate": 3.111545862732521e-06, "loss": 0.7471, "step": 33545 }, { "epoch": 0.40888206403178434, "grad_norm": 2.4757058701638894, "learning_rate": 3.1112251443232842e-06, "loss": 0.8188, "step": 33550 }, { "epoch": 0.40894300025593217, "grad_norm": 2.626202165662645, "learning_rate": 3.1109044259140477e-06, "loss": 0.8283, "step": 33555 }, { "epoch": 0.40900393648007993, "grad_norm": 1.84685258914222, "learning_rate": 3.110583707504811e-06, "loss": 0.7461, "step": 33560 }, { "epoch": 0.40906487270422776, "grad_norm": 3.273752316789561, "learning_rate": 3.1102629890955745e-06, "loss": 0.7993, "step": 33565 }, { "epoch": 0.4091258089283756, "grad_norm": 2.2052671127363004, "learning_rate": 3.1099422706863376e-06, "loss": 0.7691, "step": 33570 }, { "epoch": 0.40918674515252335, "grad_norm": 2.038644056988101, "learning_rate": 3.109621552277101e-06, "loss": 0.6956, "step": 33575 }, { "epoch": 0.40924768137667117, "grad_norm": 2.1932855451226336, "learning_rate": 3.1093008338678644e-06, "loss": 0.8105, "step": 33580 }, { "epoch": 0.409308617600819, "grad_norm": 2.3196584469892474, "learning_rate": 3.1089801154586274e-06, "loss": 0.8312, "step": 33585 }, { "epoch": 0.4093695538249668, "grad_norm": 3.5080760264884483, "learning_rate": 3.1086593970493913e-06, "loss": 0.8288, "step": 33590 }, { "epoch": 0.4094304900491146, "grad_norm": 2.565909938817838, "learning_rate": 3.1083386786401543e-06, "loss": 0.7134, "step": 33595 }, { "epoch": 0.4094914262732624, "grad_norm": 2.6243853352111475, "learning_rate": 3.1080179602309173e-06, "loss": 0.756, "step": 33600 }, { "epoch": 0.4095523624974102, "grad_norm": 2.71116923404153, "learning_rate": 3.107697241821681e-06, "loss": 0.7701, "step": 33605 }, { "epoch": 0.409613298721558, "grad_norm": 2.714206641959424, "learning_rate": 3.1073765234124442e-06, "loss": 0.7599, "step": 33610 }, { "epoch": 0.4096742349457058, "grad_norm": 2.909385781730398, "learning_rate": 3.1070558050032072e-06, "loss": 0.7209, "step": 33615 }, { "epoch": 0.40973517116985364, "grad_norm": 2.517702875863407, "learning_rate": 3.1067350865939707e-06, "loss": 0.7742, "step": 33620 }, { "epoch": 0.40979610739400146, "grad_norm": 2.5234050788407534, "learning_rate": 3.106414368184734e-06, "loss": 0.843, "step": 33625 }, { "epoch": 0.40985704361814923, "grad_norm": 2.766423501133627, "learning_rate": 3.106093649775497e-06, "loss": 0.7624, "step": 33630 }, { "epoch": 0.40991797984229705, "grad_norm": 2.339253830949502, "learning_rate": 3.1057729313662606e-06, "loss": 0.8201, "step": 33635 }, { "epoch": 0.4099789160664449, "grad_norm": 2.630787174219811, "learning_rate": 3.105452212957024e-06, "loss": 0.7498, "step": 33640 }, { "epoch": 0.41003985229059264, "grad_norm": 2.521248431549705, "learning_rate": 3.1051314945477875e-06, "loss": 0.8006, "step": 33645 }, { "epoch": 0.41010078851474047, "grad_norm": 2.4569389604684995, "learning_rate": 3.1048107761385505e-06, "loss": 0.7543, "step": 33650 }, { "epoch": 0.4101617247388883, "grad_norm": 2.4268068039854036, "learning_rate": 3.104490057729314e-06, "loss": 0.8324, "step": 33655 }, { "epoch": 0.4102226609630361, "grad_norm": 2.664876277735869, "learning_rate": 3.1041693393200774e-06, "loss": 0.826, "step": 33660 }, { "epoch": 0.4102835971871839, "grad_norm": 2.434580369597212, "learning_rate": 3.1038486209108404e-06, "loss": 0.7691, "step": 33665 }, { "epoch": 0.4103445334113317, "grad_norm": 2.3964978561662407, "learning_rate": 3.1035279025016042e-06, "loss": 0.7851, "step": 33670 }, { "epoch": 0.4104054696354795, "grad_norm": 3.130626442659669, "learning_rate": 3.1032071840923673e-06, "loss": 0.7869, "step": 33675 }, { "epoch": 0.4104664058596273, "grad_norm": 2.555657361841598, "learning_rate": 3.1028864656831303e-06, "loss": 0.7623, "step": 33680 }, { "epoch": 0.4105273420837751, "grad_norm": 2.3277968624499463, "learning_rate": 3.102565747273894e-06, "loss": 0.7368, "step": 33685 }, { "epoch": 0.41058827830792294, "grad_norm": 2.6738931698662385, "learning_rate": 3.102245028864657e-06, "loss": 0.7121, "step": 33690 }, { "epoch": 0.41064921453207076, "grad_norm": 2.934310748948151, "learning_rate": 3.10192431045542e-06, "loss": 0.8163, "step": 33695 }, { "epoch": 0.41071015075621853, "grad_norm": 2.437843138852019, "learning_rate": 3.1016035920461836e-06, "loss": 0.748, "step": 33700 }, { "epoch": 0.41077108698036635, "grad_norm": 2.5573273032806694, "learning_rate": 3.101282873636947e-06, "loss": 0.7202, "step": 33705 }, { "epoch": 0.4108320232045142, "grad_norm": 2.524717981445035, "learning_rate": 3.10096215522771e-06, "loss": 0.7405, "step": 33710 }, { "epoch": 0.41089295942866194, "grad_norm": 2.078733174526159, "learning_rate": 3.1006414368184735e-06, "loss": 0.729, "step": 33715 }, { "epoch": 0.41095389565280976, "grad_norm": 2.640550225022052, "learning_rate": 3.100320718409237e-06, "loss": 0.7965, "step": 33720 }, { "epoch": 0.4110148318769576, "grad_norm": 3.2190103314882537, "learning_rate": 3.1000000000000004e-06, "loss": 0.7334, "step": 33725 }, { "epoch": 0.4110757681011054, "grad_norm": 4.246069647379924, "learning_rate": 3.0996792815907634e-06, "loss": 0.6887, "step": 33730 }, { "epoch": 0.4111367043252532, "grad_norm": 2.267994541465059, "learning_rate": 3.0993585631815273e-06, "loss": 0.7376, "step": 33735 }, { "epoch": 0.411197640549401, "grad_norm": 2.0205435203225637, "learning_rate": 3.0990378447722903e-06, "loss": 0.8547, "step": 33740 }, { "epoch": 0.4112585767735488, "grad_norm": 3.1620220204884992, "learning_rate": 3.0987171263630533e-06, "loss": 0.79, "step": 33745 }, { "epoch": 0.4113195129976966, "grad_norm": 6.013656477900678, "learning_rate": 3.098396407953817e-06, "loss": 0.7541, "step": 33750 }, { "epoch": 0.4113804492218444, "grad_norm": 2.706633132235338, "learning_rate": 3.09807568954458e-06, "loss": 0.7146, "step": 33755 }, { "epoch": 0.41144138544599224, "grad_norm": 2.51980025481806, "learning_rate": 3.097754971135343e-06, "loss": 0.7859, "step": 33760 }, { "epoch": 0.41150232167014006, "grad_norm": 2.6374691970641466, "learning_rate": 3.097434252726107e-06, "loss": 0.7326, "step": 33765 }, { "epoch": 0.4115632578942878, "grad_norm": 1.8578845013697685, "learning_rate": 3.09711353431687e-06, "loss": 0.7327, "step": 33770 }, { "epoch": 0.41162419411843565, "grad_norm": 2.2258625409432744, "learning_rate": 3.096792815907633e-06, "loss": 0.7421, "step": 33775 }, { "epoch": 0.41168513034258347, "grad_norm": 2.525029110109884, "learning_rate": 3.0964720974983965e-06, "loss": 0.7587, "step": 33780 }, { "epoch": 0.41174606656673124, "grad_norm": 2.7916815410287263, "learning_rate": 3.09615137908916e-06, "loss": 0.7255, "step": 33785 }, { "epoch": 0.41180700279087906, "grad_norm": 2.3373942923295363, "learning_rate": 3.0958306606799234e-06, "loss": 0.7183, "step": 33790 }, { "epoch": 0.4118679390150269, "grad_norm": 2.900367126866837, "learning_rate": 3.0955099422706864e-06, "loss": 0.7552, "step": 33795 }, { "epoch": 0.41192887523917465, "grad_norm": 2.8709699463460105, "learning_rate": 3.09518922386145e-06, "loss": 0.7421, "step": 33800 }, { "epoch": 0.4119898114633225, "grad_norm": 2.2598098479158177, "learning_rate": 3.0948685054522133e-06, "loss": 0.8116, "step": 33805 }, { "epoch": 0.4120507476874703, "grad_norm": 2.4043028576832497, "learning_rate": 3.0945477870429763e-06, "loss": 0.7533, "step": 33810 }, { "epoch": 0.4121116839116181, "grad_norm": 2.8644584666055577, "learning_rate": 3.09422706863374e-06, "loss": 0.8506, "step": 33815 }, { "epoch": 0.4121726201357659, "grad_norm": 2.8174181502435602, "learning_rate": 3.0939063502245032e-06, "loss": 0.7622, "step": 33820 }, { "epoch": 0.4122335563599137, "grad_norm": 2.482597461453257, "learning_rate": 3.0935856318152662e-06, "loss": 0.7522, "step": 33825 }, { "epoch": 0.41229449258406153, "grad_norm": 3.204110279083198, "learning_rate": 3.09326491340603e-06, "loss": 0.7335, "step": 33830 }, { "epoch": 0.4123554288082093, "grad_norm": 2.328967746140122, "learning_rate": 3.092944194996793e-06, "loss": 0.7715, "step": 33835 }, { "epoch": 0.4124163650323571, "grad_norm": 2.1531348247167568, "learning_rate": 3.092623476587556e-06, "loss": 0.76, "step": 33840 }, { "epoch": 0.41247730125650495, "grad_norm": 2.9712840000960883, "learning_rate": 3.09230275817832e-06, "loss": 0.7584, "step": 33845 }, { "epoch": 0.41253823748065277, "grad_norm": 2.5128678941092897, "learning_rate": 3.091982039769083e-06, "loss": 0.7307, "step": 33850 }, { "epoch": 0.41259917370480054, "grad_norm": 2.0932734998661795, "learning_rate": 3.091661321359846e-06, "loss": 0.7054, "step": 33855 }, { "epoch": 0.41266010992894836, "grad_norm": 2.2101409106050323, "learning_rate": 3.09134060295061e-06, "loss": 0.7679, "step": 33860 }, { "epoch": 0.4127210461530962, "grad_norm": 2.251397522280859, "learning_rate": 3.091019884541373e-06, "loss": 0.7682, "step": 33865 }, { "epoch": 0.41278198237724395, "grad_norm": 4.100426305072616, "learning_rate": 3.0906991661321363e-06, "loss": 0.7438, "step": 33870 }, { "epoch": 0.4128429186013918, "grad_norm": 2.0152710871386454, "learning_rate": 3.0903784477228994e-06, "loss": 0.8091, "step": 33875 }, { "epoch": 0.4129038548255396, "grad_norm": 2.5202000696416875, "learning_rate": 3.090057729313663e-06, "loss": 0.78, "step": 33880 }, { "epoch": 0.4129647910496874, "grad_norm": 3.1722593849632967, "learning_rate": 3.0897370109044262e-06, "loss": 0.7236, "step": 33885 }, { "epoch": 0.4130257272738352, "grad_norm": 2.684860385981551, "learning_rate": 3.0894162924951893e-06, "loss": 0.7178, "step": 33890 }, { "epoch": 0.413086663497983, "grad_norm": 2.566880037738352, "learning_rate": 3.089095574085953e-06, "loss": 0.7624, "step": 33895 }, { "epoch": 0.41314759972213083, "grad_norm": 2.013730188717449, "learning_rate": 3.088774855676716e-06, "loss": 0.8431, "step": 33900 }, { "epoch": 0.4132085359462786, "grad_norm": 2.468149814830187, "learning_rate": 3.088454137267479e-06, "loss": 0.8296, "step": 33905 }, { "epoch": 0.4132694721704264, "grad_norm": 2.3477717651595555, "learning_rate": 3.088133418858243e-06, "loss": 0.7877, "step": 33910 }, { "epoch": 0.41333040839457424, "grad_norm": 2.782576065519342, "learning_rate": 3.087812700449006e-06, "loss": 0.659, "step": 33915 }, { "epoch": 0.41339134461872207, "grad_norm": 1.81347393871748, "learning_rate": 3.087491982039769e-06, "loss": 0.7905, "step": 33920 }, { "epoch": 0.41345228084286983, "grad_norm": 2.3081135844714322, "learning_rate": 3.087171263630533e-06, "loss": 0.7001, "step": 33925 }, { "epoch": 0.41351321706701766, "grad_norm": 2.451870002715249, "learning_rate": 3.086850545221296e-06, "loss": 0.8277, "step": 33930 }, { "epoch": 0.4135741532911655, "grad_norm": 2.1129612523980605, "learning_rate": 3.086529826812059e-06, "loss": 0.7898, "step": 33935 }, { "epoch": 0.41363508951531325, "grad_norm": 2.4858877787376206, "learning_rate": 3.086209108402823e-06, "loss": 0.7702, "step": 33940 }, { "epoch": 0.41369602573946107, "grad_norm": 3.189128583207369, "learning_rate": 3.085888389993586e-06, "loss": 0.7177, "step": 33945 }, { "epoch": 0.4137569619636089, "grad_norm": 2.05554609221802, "learning_rate": 3.0855676715843493e-06, "loss": 0.7631, "step": 33950 }, { "epoch": 0.4138178981877567, "grad_norm": 3.0384475980492383, "learning_rate": 3.0852469531751123e-06, "loss": 0.7457, "step": 33955 }, { "epoch": 0.4138788344119045, "grad_norm": 2.627046434447805, "learning_rate": 3.0849262347658757e-06, "loss": 0.7841, "step": 33960 }, { "epoch": 0.4139397706360523, "grad_norm": 2.695980218451406, "learning_rate": 3.084605516356639e-06, "loss": 0.7803, "step": 33965 }, { "epoch": 0.41400070686020013, "grad_norm": 2.350404685841754, "learning_rate": 3.084284797947402e-06, "loss": 0.7633, "step": 33970 }, { "epoch": 0.4140616430843479, "grad_norm": 2.613068283959213, "learning_rate": 3.083964079538166e-06, "loss": 0.7617, "step": 33975 }, { "epoch": 0.4141225793084957, "grad_norm": 2.7682012846410307, "learning_rate": 3.083643361128929e-06, "loss": 0.7908, "step": 33980 }, { "epoch": 0.41418351553264354, "grad_norm": 2.5980149071914753, "learning_rate": 3.083322642719692e-06, "loss": 0.7804, "step": 33985 }, { "epoch": 0.41424445175679137, "grad_norm": 2.681891534318954, "learning_rate": 3.083001924310456e-06, "loss": 0.703, "step": 33990 }, { "epoch": 0.41430538798093913, "grad_norm": 2.2484183894055265, "learning_rate": 3.082681205901219e-06, "loss": 0.7533, "step": 33995 }, { "epoch": 0.41436632420508696, "grad_norm": 2.654455894917083, "learning_rate": 3.082360487491982e-06, "loss": 0.7971, "step": 34000 }, { "epoch": 0.4144272604292348, "grad_norm": 2.006464116713141, "learning_rate": 3.082039769082746e-06, "loss": 0.7441, "step": 34005 }, { "epoch": 0.41448819665338255, "grad_norm": 2.89530156441274, "learning_rate": 3.081719050673509e-06, "loss": 0.7285, "step": 34010 }, { "epoch": 0.41454913287753037, "grad_norm": 3.3122818831313463, "learning_rate": 3.0813983322642723e-06, "loss": 0.7985, "step": 34015 }, { "epoch": 0.4146100691016782, "grad_norm": 2.252958712248762, "learning_rate": 3.0810776138550357e-06, "loss": 0.7468, "step": 34020 }, { "epoch": 0.414671005325826, "grad_norm": 2.3018041389952364, "learning_rate": 3.0807568954457988e-06, "loss": 0.7793, "step": 34025 }, { "epoch": 0.4147319415499738, "grad_norm": 3.535542360885839, "learning_rate": 3.080436177036562e-06, "loss": 0.7649, "step": 34030 }, { "epoch": 0.4147928777741216, "grad_norm": 2.6403084592325916, "learning_rate": 3.0801154586273252e-06, "loss": 0.7938, "step": 34035 }, { "epoch": 0.4148538139982694, "grad_norm": 2.1712422138581986, "learning_rate": 3.079794740218089e-06, "loss": 0.7505, "step": 34040 }, { "epoch": 0.4149147502224172, "grad_norm": 2.2054471617644382, "learning_rate": 3.079474021808852e-06, "loss": 0.7781, "step": 34045 }, { "epoch": 0.414975686446565, "grad_norm": 3.3401829716192113, "learning_rate": 3.079153303399615e-06, "loss": 0.7277, "step": 34050 }, { "epoch": 0.41503662267071284, "grad_norm": 2.5845248110851036, "learning_rate": 3.078832584990379e-06, "loss": 0.785, "step": 34055 }, { "epoch": 0.41509755889486066, "grad_norm": 2.754242767223139, "learning_rate": 3.078511866581142e-06, "loss": 0.7703, "step": 34060 }, { "epoch": 0.41515849511900843, "grad_norm": 2.310974697784856, "learning_rate": 3.078191148171905e-06, "loss": 0.8098, "step": 34065 }, { "epoch": 0.41521943134315625, "grad_norm": 2.1615714125742227, "learning_rate": 3.077870429762669e-06, "loss": 0.7113, "step": 34070 }, { "epoch": 0.4152803675673041, "grad_norm": 4.745678850637715, "learning_rate": 3.077549711353432e-06, "loss": 0.7186, "step": 34075 }, { "epoch": 0.41534130379145184, "grad_norm": 3.5095698080941222, "learning_rate": 3.077228992944195e-06, "loss": 0.7628, "step": 34080 }, { "epoch": 0.41540224001559967, "grad_norm": 2.6561418233714953, "learning_rate": 3.0769082745349588e-06, "loss": 0.7042, "step": 34085 }, { "epoch": 0.4154631762397475, "grad_norm": 3.08789281765777, "learning_rate": 3.076587556125722e-06, "loss": 0.7423, "step": 34090 }, { "epoch": 0.4155241124638953, "grad_norm": 2.3488885341166927, "learning_rate": 3.0762668377164852e-06, "loss": 0.6789, "step": 34095 }, { "epoch": 0.4155850486880431, "grad_norm": 2.209192333529427, "learning_rate": 3.0759461193072487e-06, "loss": 0.6201, "step": 34100 }, { "epoch": 0.4156459849121909, "grad_norm": 2.354921468141642, "learning_rate": 3.0756254008980117e-06, "loss": 0.7832, "step": 34105 }, { "epoch": 0.4157069211363387, "grad_norm": 2.1416852535715467, "learning_rate": 3.075304682488775e-06, "loss": 0.8548, "step": 34110 }, { "epoch": 0.4157678573604865, "grad_norm": 3.739454630420874, "learning_rate": 3.074983964079538e-06, "loss": 0.7668, "step": 34115 }, { "epoch": 0.4158287935846343, "grad_norm": 2.517770523310293, "learning_rate": 3.074663245670302e-06, "loss": 0.7391, "step": 34120 }, { "epoch": 0.41588972980878214, "grad_norm": 2.08897385167458, "learning_rate": 3.074342527261065e-06, "loss": 0.7447, "step": 34125 }, { "epoch": 0.41595066603292996, "grad_norm": 2.0961729064113745, "learning_rate": 3.074021808851828e-06, "loss": 0.7515, "step": 34130 }, { "epoch": 0.41601160225707773, "grad_norm": 2.807791263878978, "learning_rate": 3.073701090442592e-06, "loss": 0.7374, "step": 34135 }, { "epoch": 0.41607253848122555, "grad_norm": 2.4962029699013657, "learning_rate": 3.073380372033355e-06, "loss": 0.7542, "step": 34140 }, { "epoch": 0.4161334747053734, "grad_norm": 2.197165472185042, "learning_rate": 3.073059653624118e-06, "loss": 0.7295, "step": 34145 }, { "epoch": 0.41619441092952114, "grad_norm": 2.0517918410369442, "learning_rate": 3.072738935214882e-06, "loss": 0.6922, "step": 34150 }, { "epoch": 0.41625534715366896, "grad_norm": 2.4706461340147903, "learning_rate": 3.072418216805645e-06, "loss": 0.7777, "step": 34155 }, { "epoch": 0.4163162833778168, "grad_norm": 2.444340150648904, "learning_rate": 3.072097498396408e-06, "loss": 0.7361, "step": 34160 }, { "epoch": 0.4163772196019646, "grad_norm": 2.0722655584614214, "learning_rate": 3.0717767799871717e-06, "loss": 0.7255, "step": 34165 }, { "epoch": 0.4164381558261124, "grad_norm": 2.4174242943888107, "learning_rate": 3.0714560615779347e-06, "loss": 0.7637, "step": 34170 }, { "epoch": 0.4164990920502602, "grad_norm": 2.5839565227062558, "learning_rate": 3.071135343168698e-06, "loss": 0.789, "step": 34175 }, { "epoch": 0.416560028274408, "grad_norm": 1.8435813266248702, "learning_rate": 3.0708146247594616e-06, "loss": 0.768, "step": 34180 }, { "epoch": 0.4166209644985558, "grad_norm": 2.813472554039081, "learning_rate": 3.0704939063502246e-06, "loss": 0.7672, "step": 34185 }, { "epoch": 0.4166819007227036, "grad_norm": 2.321070106754525, "learning_rate": 3.070173187940988e-06, "loss": 0.861, "step": 34190 }, { "epoch": 0.41674283694685144, "grad_norm": 2.4151366048291623, "learning_rate": 3.0698524695317515e-06, "loss": 0.8278, "step": 34195 }, { "epoch": 0.41680377317099926, "grad_norm": 2.0306897531934984, "learning_rate": 3.069531751122515e-06, "loss": 0.7473, "step": 34200 }, { "epoch": 0.416864709395147, "grad_norm": 2.788683344985064, "learning_rate": 3.069211032713278e-06, "loss": 0.8806, "step": 34205 }, { "epoch": 0.41692564561929485, "grad_norm": 2.322721661178645, "learning_rate": 3.068890314304041e-06, "loss": 0.8514, "step": 34210 }, { "epoch": 0.41698658184344267, "grad_norm": 2.267301136792329, "learning_rate": 3.068569595894805e-06, "loss": 0.7821, "step": 34215 }, { "epoch": 0.41704751806759044, "grad_norm": 2.955266952601592, "learning_rate": 3.068248877485568e-06, "loss": 0.7512, "step": 34220 }, { "epoch": 0.41710845429173826, "grad_norm": 2.694984371878556, "learning_rate": 3.067928159076331e-06, "loss": 0.7709, "step": 34225 }, { "epoch": 0.4171693905158861, "grad_norm": 2.534951053424433, "learning_rate": 3.0676074406670947e-06, "loss": 0.8048, "step": 34230 }, { "epoch": 0.4172303267400339, "grad_norm": 2.4036922575688324, "learning_rate": 3.0672867222578577e-06, "loss": 0.7816, "step": 34235 }, { "epoch": 0.4172912629641817, "grad_norm": 2.8266369239140423, "learning_rate": 3.066966003848621e-06, "loss": 0.7683, "step": 34240 }, { "epoch": 0.4173521991883295, "grad_norm": 2.5355529495660547, "learning_rate": 3.0666452854393846e-06, "loss": 0.834, "step": 34245 }, { "epoch": 0.4174131354124773, "grad_norm": 2.3876186948860103, "learning_rate": 3.0663245670301476e-06, "loss": 0.7825, "step": 34250 }, { "epoch": 0.4174740716366251, "grad_norm": 2.416416202676939, "learning_rate": 3.066003848620911e-06, "loss": 0.8217, "step": 34255 }, { "epoch": 0.4175350078607729, "grad_norm": 2.053377523495992, "learning_rate": 3.0656831302116745e-06, "loss": 0.7948, "step": 34260 }, { "epoch": 0.41759594408492073, "grad_norm": 2.159126297976852, "learning_rate": 3.065362411802438e-06, "loss": 0.7667, "step": 34265 }, { "epoch": 0.4176568803090685, "grad_norm": 2.944670626363146, "learning_rate": 3.065041693393201e-06, "loss": 0.7387, "step": 34270 }, { "epoch": 0.4177178165332163, "grad_norm": 2.5681976658510153, "learning_rate": 3.0647209749839644e-06, "loss": 0.7187, "step": 34275 }, { "epoch": 0.41777875275736415, "grad_norm": 2.2465600433261494, "learning_rate": 3.064400256574728e-06, "loss": 0.7118, "step": 34280 }, { "epoch": 0.41783968898151197, "grad_norm": 2.2854341326796153, "learning_rate": 3.064079538165491e-06, "loss": 0.7243, "step": 34285 }, { "epoch": 0.41790062520565974, "grad_norm": 2.3859695129603993, "learning_rate": 3.063758819756254e-06, "loss": 0.7423, "step": 34290 }, { "epoch": 0.41796156142980756, "grad_norm": 2.5345423900981268, "learning_rate": 3.0634381013470178e-06, "loss": 0.8097, "step": 34295 }, { "epoch": 0.4180224976539554, "grad_norm": 2.0523020788012376, "learning_rate": 3.0631173829377808e-06, "loss": 0.7673, "step": 34300 }, { "epoch": 0.41808343387810315, "grad_norm": 2.514653409589914, "learning_rate": 3.062796664528544e-06, "loss": 0.7532, "step": 34305 }, { "epoch": 0.418144370102251, "grad_norm": 2.229245774954852, "learning_rate": 3.0624759461193077e-06, "loss": 0.7902, "step": 34310 }, { "epoch": 0.4182053063263988, "grad_norm": 2.91761875060273, "learning_rate": 3.0621552277100707e-06, "loss": 0.811, "step": 34315 }, { "epoch": 0.4182662425505466, "grad_norm": 2.788379610332109, "learning_rate": 3.061834509300834e-06, "loss": 0.7739, "step": 34320 }, { "epoch": 0.4183271787746944, "grad_norm": 2.3753956824993407, "learning_rate": 3.0615137908915976e-06, "loss": 0.7949, "step": 34325 }, { "epoch": 0.4183881149988422, "grad_norm": 2.436149846836787, "learning_rate": 3.0611930724823606e-06, "loss": 0.6394, "step": 34330 }, { "epoch": 0.41844905122299003, "grad_norm": 2.150248847589862, "learning_rate": 3.060872354073124e-06, "loss": 0.828, "step": 34335 }, { "epoch": 0.4185099874471378, "grad_norm": 2.8673380541955993, "learning_rate": 3.0605516356638875e-06, "loss": 0.6987, "step": 34340 }, { "epoch": 0.4185709236712856, "grad_norm": 2.4484235457342836, "learning_rate": 3.060230917254651e-06, "loss": 0.7708, "step": 34345 }, { "epoch": 0.41863185989543344, "grad_norm": 2.959728133686082, "learning_rate": 3.059910198845414e-06, "loss": 0.7051, "step": 34350 }, { "epoch": 0.41869279611958127, "grad_norm": 2.336360255670416, "learning_rate": 3.0595894804361773e-06, "loss": 0.7276, "step": 34355 }, { "epoch": 0.41875373234372903, "grad_norm": 2.364003211730803, "learning_rate": 3.059268762026941e-06, "loss": 0.7871, "step": 34360 }, { "epoch": 0.41881466856787686, "grad_norm": 3.436746763160033, "learning_rate": 3.058948043617704e-06, "loss": 0.7815, "step": 34365 }, { "epoch": 0.4188756047920247, "grad_norm": 2.170599664721361, "learning_rate": 3.058627325208467e-06, "loss": 0.7299, "step": 34370 }, { "epoch": 0.41893654101617245, "grad_norm": 2.9178064454858057, "learning_rate": 3.0583066067992307e-06, "loss": 0.7886, "step": 34375 }, { "epoch": 0.41899747724032027, "grad_norm": 2.230499415333947, "learning_rate": 3.0579858883899937e-06, "loss": 0.802, "step": 34380 }, { "epoch": 0.4190584134644681, "grad_norm": 2.113716863329711, "learning_rate": 3.0576651699807567e-06, "loss": 0.7773, "step": 34385 }, { "epoch": 0.4191193496886159, "grad_norm": 2.939558419926548, "learning_rate": 3.0573444515715206e-06, "loss": 0.6432, "step": 34390 }, { "epoch": 0.4191802859127637, "grad_norm": 2.1020910427684245, "learning_rate": 3.0570237331622836e-06, "loss": 0.7632, "step": 34395 }, { "epoch": 0.4192412221369115, "grad_norm": 2.1702353290831584, "learning_rate": 3.056703014753047e-06, "loss": 0.8128, "step": 34400 }, { "epoch": 0.41930215836105933, "grad_norm": 2.6871117515087737, "learning_rate": 3.0563822963438105e-06, "loss": 0.777, "step": 34405 }, { "epoch": 0.4193630945852071, "grad_norm": 2.6596682015497044, "learning_rate": 3.0560615779345735e-06, "loss": 0.7622, "step": 34410 }, { "epoch": 0.4194240308093549, "grad_norm": 2.2981787706288115, "learning_rate": 3.055740859525337e-06, "loss": 0.7125, "step": 34415 }, { "epoch": 0.41948496703350274, "grad_norm": 1.9690327403890087, "learning_rate": 3.0554201411161004e-06, "loss": 0.7277, "step": 34420 }, { "epoch": 0.41954590325765057, "grad_norm": 2.7830274795586094, "learning_rate": 3.055099422706864e-06, "loss": 0.7874, "step": 34425 }, { "epoch": 0.41960683948179833, "grad_norm": 3.052692853969732, "learning_rate": 3.054778704297627e-06, "loss": 0.7667, "step": 34430 }, { "epoch": 0.41966777570594616, "grad_norm": 3.2705733626499596, "learning_rate": 3.0544579858883903e-06, "loss": 0.8192, "step": 34435 }, { "epoch": 0.419728711930094, "grad_norm": 2.781077927317289, "learning_rate": 3.0541372674791537e-06, "loss": 0.7268, "step": 34440 }, { "epoch": 0.41978964815424175, "grad_norm": 1.9708845589450312, "learning_rate": 3.0538165490699167e-06, "loss": 0.7463, "step": 34445 }, { "epoch": 0.41985058437838957, "grad_norm": 2.3678163956066958, "learning_rate": 3.0534958306606806e-06, "loss": 0.8294, "step": 34450 }, { "epoch": 0.4199115206025374, "grad_norm": 2.8544059510797184, "learning_rate": 3.0531751122514436e-06, "loss": 0.8433, "step": 34455 }, { "epoch": 0.4199724568266852, "grad_norm": 2.3197210075322587, "learning_rate": 3.0528543938422066e-06, "loss": 0.7496, "step": 34460 }, { "epoch": 0.420033393050833, "grad_norm": 2.5908286262353055, "learning_rate": 3.0525336754329696e-06, "loss": 0.7376, "step": 34465 }, { "epoch": 0.4200943292749808, "grad_norm": 2.153541162407736, "learning_rate": 3.0522129570237335e-06, "loss": 0.6674, "step": 34470 }, { "epoch": 0.4201552654991286, "grad_norm": 2.6566864344214016, "learning_rate": 3.0518922386144965e-06, "loss": 0.7058, "step": 34475 }, { "epoch": 0.4202162017232764, "grad_norm": 2.5975467269093495, "learning_rate": 3.05157152020526e-06, "loss": 0.7062, "step": 34480 }, { "epoch": 0.4202771379474242, "grad_norm": 2.115571938909284, "learning_rate": 3.0512508017960234e-06, "loss": 0.8155, "step": 34485 }, { "epoch": 0.42033807417157204, "grad_norm": 2.132706214360737, "learning_rate": 3.050930083386787e-06, "loss": 0.7305, "step": 34490 }, { "epoch": 0.42039901039571986, "grad_norm": 2.2457450810186046, "learning_rate": 3.05060936497755e-06, "loss": 0.7232, "step": 34495 }, { "epoch": 0.42045994661986763, "grad_norm": 2.554884370373889, "learning_rate": 3.0502886465683133e-06, "loss": 0.7987, "step": 34500 }, { "epoch": 0.42052088284401545, "grad_norm": 3.235888407655602, "learning_rate": 3.0499679281590767e-06, "loss": 0.8978, "step": 34505 }, { "epoch": 0.4205818190681633, "grad_norm": 2.7933570493564077, "learning_rate": 3.0496472097498398e-06, "loss": 0.7942, "step": 34510 }, { "epoch": 0.42064275529231104, "grad_norm": 2.334695551836737, "learning_rate": 3.0493264913406036e-06, "loss": 0.7394, "step": 34515 }, { "epoch": 0.42070369151645887, "grad_norm": 2.500216356874713, "learning_rate": 3.0490057729313666e-06, "loss": 0.8149, "step": 34520 }, { "epoch": 0.4207646277406067, "grad_norm": 2.450563781776353, "learning_rate": 3.0486850545221297e-06, "loss": 0.7635, "step": 34525 }, { "epoch": 0.4208255639647545, "grad_norm": 2.606046034850268, "learning_rate": 3.0483643361128935e-06, "loss": 0.8092, "step": 34530 }, { "epoch": 0.4208865001889023, "grad_norm": 2.7489974153704853, "learning_rate": 3.0480436177036565e-06, "loss": 0.7473, "step": 34535 }, { "epoch": 0.4209474364130501, "grad_norm": 3.3000691603482304, "learning_rate": 3.0477228992944196e-06, "loss": 0.7064, "step": 34540 }, { "epoch": 0.4210083726371979, "grad_norm": 2.3411104958025057, "learning_rate": 3.047402180885183e-06, "loss": 0.8093, "step": 34545 }, { "epoch": 0.4210693088613457, "grad_norm": 2.224587094125143, "learning_rate": 3.0470814624759464e-06, "loss": 0.7363, "step": 34550 }, { "epoch": 0.4211302450854935, "grad_norm": 2.276645993638634, "learning_rate": 3.0467607440667095e-06, "loss": 0.8122, "step": 34555 }, { "epoch": 0.42119118130964134, "grad_norm": 2.040492467800631, "learning_rate": 3.046440025657473e-06, "loss": 0.7827, "step": 34560 }, { "epoch": 0.42125211753378916, "grad_norm": 2.336707956166269, "learning_rate": 3.0461193072482363e-06, "loss": 0.7463, "step": 34565 }, { "epoch": 0.42131305375793693, "grad_norm": 1.996101340491317, "learning_rate": 3.0457985888389998e-06, "loss": 0.7785, "step": 34570 }, { "epoch": 0.42137398998208475, "grad_norm": 2.783193837094414, "learning_rate": 3.045477870429763e-06, "loss": 0.7477, "step": 34575 }, { "epoch": 0.4214349262062326, "grad_norm": 2.592816154708551, "learning_rate": 3.0451571520205262e-06, "loss": 0.7448, "step": 34580 }, { "epoch": 0.42149586243038034, "grad_norm": 2.5105760438588853, "learning_rate": 3.0448364336112897e-06, "loss": 0.7994, "step": 34585 }, { "epoch": 0.42155679865452816, "grad_norm": 2.2700411930982014, "learning_rate": 3.0445157152020527e-06, "loss": 0.7111, "step": 34590 }, { "epoch": 0.421617734878676, "grad_norm": 2.9605101777310634, "learning_rate": 3.0441949967928166e-06, "loss": 0.7507, "step": 34595 }, { "epoch": 0.4216786711028238, "grad_norm": 3.5093983808252482, "learning_rate": 3.0438742783835796e-06, "loss": 0.7995, "step": 34600 }, { "epoch": 0.4217396073269716, "grad_norm": 2.566870562300328, "learning_rate": 3.0435535599743426e-06, "loss": 0.7889, "step": 34605 }, { "epoch": 0.4218005435511194, "grad_norm": 2.0689929564086773, "learning_rate": 3.0432328415651065e-06, "loss": 0.7808, "step": 34610 }, { "epoch": 0.4218614797752672, "grad_norm": 2.4979187467504795, "learning_rate": 3.0429121231558695e-06, "loss": 0.7624, "step": 34615 }, { "epoch": 0.421922415999415, "grad_norm": 2.322240131676072, "learning_rate": 3.0425914047466325e-06, "loss": 0.8387, "step": 34620 }, { "epoch": 0.4219833522235628, "grad_norm": 2.7925931691119414, "learning_rate": 3.042270686337396e-06, "loss": 0.7771, "step": 34625 }, { "epoch": 0.42204428844771064, "grad_norm": 3.14743185945352, "learning_rate": 3.0419499679281594e-06, "loss": 0.6946, "step": 34630 }, { "epoch": 0.42210522467185846, "grad_norm": 2.2832231194357675, "learning_rate": 3.0416292495189224e-06, "loss": 0.7904, "step": 34635 }, { "epoch": 0.4221661608960062, "grad_norm": 4.037017163091686, "learning_rate": 3.041308531109686e-06, "loss": 0.8047, "step": 34640 }, { "epoch": 0.42222709712015405, "grad_norm": 2.5197654609914104, "learning_rate": 3.0409878127004493e-06, "loss": 0.7928, "step": 34645 }, { "epoch": 0.42228803334430187, "grad_norm": 2.788922284879418, "learning_rate": 3.0406670942912127e-06, "loss": 0.75, "step": 34650 }, { "epoch": 0.42234896956844964, "grad_norm": 2.7381628953125086, "learning_rate": 3.0403463758819757e-06, "loss": 0.698, "step": 34655 }, { "epoch": 0.42240990579259746, "grad_norm": 2.917898583624442, "learning_rate": 3.040025657472739e-06, "loss": 0.7807, "step": 34660 }, { "epoch": 0.4224708420167453, "grad_norm": 2.3515542380456727, "learning_rate": 3.0397049390635026e-06, "loss": 0.7945, "step": 34665 }, { "epoch": 0.4225317782408931, "grad_norm": 4.36109999827292, "learning_rate": 3.0393842206542656e-06, "loss": 0.8011, "step": 34670 }, { "epoch": 0.4225927144650409, "grad_norm": 3.1598350097293126, "learning_rate": 3.0390635022450295e-06, "loss": 0.7585, "step": 34675 }, { "epoch": 0.4226536506891887, "grad_norm": 2.0519969376950487, "learning_rate": 3.0387427838357925e-06, "loss": 0.8235, "step": 34680 }, { "epoch": 0.4227145869133365, "grad_norm": 2.736500703781145, "learning_rate": 3.0384220654265555e-06, "loss": 0.7552, "step": 34685 }, { "epoch": 0.4227755231374843, "grad_norm": 4.218870732344151, "learning_rate": 3.0381013470173194e-06, "loss": 0.7582, "step": 34690 }, { "epoch": 0.4228364593616321, "grad_norm": 2.5254703699639007, "learning_rate": 3.0377806286080824e-06, "loss": 0.7383, "step": 34695 }, { "epoch": 0.42289739558577993, "grad_norm": 2.6101364520583776, "learning_rate": 3.0374599101988454e-06, "loss": 0.812, "step": 34700 }, { "epoch": 0.42295833180992776, "grad_norm": 3.760964016720983, "learning_rate": 3.037139191789609e-06, "loss": 0.7381, "step": 34705 }, { "epoch": 0.4230192680340755, "grad_norm": 2.739480695381275, "learning_rate": 3.0368184733803723e-06, "loss": 0.6673, "step": 34710 }, { "epoch": 0.42308020425822335, "grad_norm": 5.109527240531206, "learning_rate": 3.0364977549711357e-06, "loss": 0.807, "step": 34715 }, { "epoch": 0.42314114048237117, "grad_norm": 3.53547010421376, "learning_rate": 3.0361770365618987e-06, "loss": 0.7799, "step": 34720 }, { "epoch": 0.42320207670651894, "grad_norm": 2.3304758370365195, "learning_rate": 3.035856318152662e-06, "loss": 0.7336, "step": 34725 }, { "epoch": 0.42326301293066676, "grad_norm": 2.2728264166086816, "learning_rate": 3.0355355997434256e-06, "loss": 0.7747, "step": 34730 }, { "epoch": 0.4233239491548146, "grad_norm": 2.3331589496981207, "learning_rate": 3.0352148813341886e-06, "loss": 0.7624, "step": 34735 }, { "epoch": 0.4233848853789624, "grad_norm": 2.2865265400919457, "learning_rate": 3.0348941629249525e-06, "loss": 0.7845, "step": 34740 }, { "epoch": 0.4234458216031102, "grad_norm": 2.5116761013455164, "learning_rate": 3.0345734445157155e-06, "loss": 0.7696, "step": 34745 }, { "epoch": 0.423506757827258, "grad_norm": 2.9688199845088774, "learning_rate": 3.0342527261064785e-06, "loss": 0.8158, "step": 34750 }, { "epoch": 0.4235676940514058, "grad_norm": 2.6445546102863733, "learning_rate": 3.0339320076972424e-06, "loss": 0.7913, "step": 34755 }, { "epoch": 0.4236286302755536, "grad_norm": 2.1433495266070586, "learning_rate": 3.0336112892880054e-06, "loss": 0.7795, "step": 34760 }, { "epoch": 0.4236895664997014, "grad_norm": 2.8983810408790887, "learning_rate": 3.0332905708787684e-06, "loss": 0.7723, "step": 34765 }, { "epoch": 0.42375050272384923, "grad_norm": 3.258400898716358, "learning_rate": 3.0329698524695323e-06, "loss": 0.7398, "step": 34770 }, { "epoch": 0.423811438947997, "grad_norm": 2.2125072765341494, "learning_rate": 3.0326491340602953e-06, "loss": 0.7661, "step": 34775 }, { "epoch": 0.4238723751721448, "grad_norm": 2.4333752029773956, "learning_rate": 3.0323284156510583e-06, "loss": 0.7813, "step": 34780 }, { "epoch": 0.42393331139629264, "grad_norm": 2.3212560393427415, "learning_rate": 3.032007697241822e-06, "loss": 0.7409, "step": 34785 }, { "epoch": 0.42399424762044047, "grad_norm": 2.5233113432472654, "learning_rate": 3.0316869788325852e-06, "loss": 0.7827, "step": 34790 }, { "epoch": 0.42405518384458823, "grad_norm": 2.8184409678661457, "learning_rate": 3.0313662604233487e-06, "loss": 0.7659, "step": 34795 }, { "epoch": 0.42411612006873606, "grad_norm": 2.5781670485245614, "learning_rate": 3.0310455420141117e-06, "loss": 0.8256, "step": 34800 }, { "epoch": 0.4241770562928839, "grad_norm": 2.4845006836150367, "learning_rate": 3.030724823604875e-06, "loss": 0.8064, "step": 34805 }, { "epoch": 0.42423799251703165, "grad_norm": 3.3826135159554593, "learning_rate": 3.0304041051956386e-06, "loss": 0.8393, "step": 34810 }, { "epoch": 0.42429892874117947, "grad_norm": 2.4056475417359, "learning_rate": 3.0300833867864016e-06, "loss": 0.7453, "step": 34815 }, { "epoch": 0.4243598649653273, "grad_norm": 4.048586980347022, "learning_rate": 3.0297626683771654e-06, "loss": 0.8026, "step": 34820 }, { "epoch": 0.4244208011894751, "grad_norm": 2.5822931395809423, "learning_rate": 3.0294419499679285e-06, "loss": 0.7586, "step": 34825 }, { "epoch": 0.4244817374136229, "grad_norm": 2.7890029008052855, "learning_rate": 3.0291212315586915e-06, "loss": 0.7667, "step": 34830 }, { "epoch": 0.4245426736377707, "grad_norm": 3.7472668522073924, "learning_rate": 3.0288005131494553e-06, "loss": 0.7064, "step": 34835 }, { "epoch": 0.42460360986191853, "grad_norm": 2.9028516206983808, "learning_rate": 3.0284797947402184e-06, "loss": 0.7769, "step": 34840 }, { "epoch": 0.4246645460860663, "grad_norm": 2.820538232709376, "learning_rate": 3.0281590763309814e-06, "loss": 0.7561, "step": 34845 }, { "epoch": 0.4247254823102141, "grad_norm": 2.233208413288322, "learning_rate": 3.0278383579217452e-06, "loss": 0.735, "step": 34850 }, { "epoch": 0.42478641853436194, "grad_norm": 3.3003448313552197, "learning_rate": 3.0275176395125082e-06, "loss": 0.7823, "step": 34855 }, { "epoch": 0.42484735475850977, "grad_norm": 5.791845534917484, "learning_rate": 3.0271969211032713e-06, "loss": 0.8108, "step": 34860 }, { "epoch": 0.42490829098265753, "grad_norm": 2.9842950802084807, "learning_rate": 3.026876202694035e-06, "loss": 0.7091, "step": 34865 }, { "epoch": 0.42496922720680536, "grad_norm": 2.4387134206134857, "learning_rate": 3.026555484284798e-06, "loss": 0.7341, "step": 34870 }, { "epoch": 0.4250301634309532, "grad_norm": 2.560150231585552, "learning_rate": 3.0262347658755616e-06, "loss": 0.7666, "step": 34875 }, { "epoch": 0.42509109965510095, "grad_norm": 1.9502673214469362, "learning_rate": 3.0259140474663246e-06, "loss": 0.6933, "step": 34880 }, { "epoch": 0.42515203587924877, "grad_norm": 3.984326364053865, "learning_rate": 3.025593329057088e-06, "loss": 0.7788, "step": 34885 }, { "epoch": 0.4252129721033966, "grad_norm": 2.6481265341401006, "learning_rate": 3.0252726106478515e-06, "loss": 0.7556, "step": 34890 }, { "epoch": 0.4252739083275444, "grad_norm": 2.9290158205145396, "learning_rate": 3.0249518922386145e-06, "loss": 0.736, "step": 34895 }, { "epoch": 0.4253348445516922, "grad_norm": 2.456658821406637, "learning_rate": 3.0246311738293784e-06, "loss": 0.7944, "step": 34900 }, { "epoch": 0.42539578077584, "grad_norm": 4.014082992850229, "learning_rate": 3.0243104554201414e-06, "loss": 0.7019, "step": 34905 }, { "epoch": 0.4254567169999878, "grad_norm": 3.399392077521978, "learning_rate": 3.0239897370109044e-06, "loss": 0.7875, "step": 34910 }, { "epoch": 0.4255176532241356, "grad_norm": 2.738052755367725, "learning_rate": 3.0236690186016683e-06, "loss": 0.746, "step": 34915 }, { "epoch": 0.4255785894482834, "grad_norm": 2.5503259127214912, "learning_rate": 3.0233483001924313e-06, "loss": 0.7579, "step": 34920 }, { "epoch": 0.42563952567243124, "grad_norm": 2.5613980386076696, "learning_rate": 3.0230275817831943e-06, "loss": 0.7276, "step": 34925 }, { "epoch": 0.42570046189657906, "grad_norm": 2.5445748621401503, "learning_rate": 3.022706863373958e-06, "loss": 0.761, "step": 34930 }, { "epoch": 0.42576139812072683, "grad_norm": 2.3629821742867256, "learning_rate": 3.022386144964721e-06, "loss": 0.7146, "step": 34935 }, { "epoch": 0.42582233434487465, "grad_norm": 2.579027112469265, "learning_rate": 3.0220654265554846e-06, "loss": 0.8151, "step": 34940 }, { "epoch": 0.4258832705690225, "grad_norm": 2.4263326971366315, "learning_rate": 3.021744708146248e-06, "loss": 0.7875, "step": 34945 }, { "epoch": 0.42594420679317024, "grad_norm": 2.239180299066071, "learning_rate": 3.021423989737011e-06, "loss": 0.7384, "step": 34950 }, { "epoch": 0.42600514301731807, "grad_norm": 2.4417042278797787, "learning_rate": 3.0211032713277745e-06, "loss": 0.6996, "step": 34955 }, { "epoch": 0.4260660792414659, "grad_norm": 2.4187293597835735, "learning_rate": 3.0207825529185375e-06, "loss": 0.869, "step": 34960 }, { "epoch": 0.4261270154656137, "grad_norm": 3.0329345491451334, "learning_rate": 3.0204618345093014e-06, "loss": 0.7763, "step": 34965 }, { "epoch": 0.4261879516897615, "grad_norm": 2.5729872399572753, "learning_rate": 3.0201411161000644e-06, "loss": 0.8231, "step": 34970 }, { "epoch": 0.4262488879139093, "grad_norm": 2.540577331198551, "learning_rate": 3.0198203976908274e-06, "loss": 0.8046, "step": 34975 }, { "epoch": 0.4263098241380571, "grad_norm": 2.095691284379045, "learning_rate": 3.0194996792815913e-06, "loss": 0.7642, "step": 34980 }, { "epoch": 0.4263707603622049, "grad_norm": 2.2843131295485124, "learning_rate": 3.0191789608723543e-06, "loss": 0.7522, "step": 34985 }, { "epoch": 0.4264316965863527, "grad_norm": 2.249055997637199, "learning_rate": 3.0188582424631173e-06, "loss": 0.7686, "step": 34990 }, { "epoch": 0.42649263281050054, "grad_norm": 3.0351833941093123, "learning_rate": 3.018537524053881e-06, "loss": 0.8308, "step": 34995 }, { "epoch": 0.42655356903464836, "grad_norm": 2.8357504432087177, "learning_rate": 3.018216805644644e-06, "loss": 0.8099, "step": 35000 }, { "epoch": 0.42661450525879613, "grad_norm": 2.1426415642527883, "learning_rate": 3.0178960872354072e-06, "loss": 0.756, "step": 35005 }, { "epoch": 0.42667544148294395, "grad_norm": 3.896844246655011, "learning_rate": 3.017575368826171e-06, "loss": 0.7787, "step": 35010 }, { "epoch": 0.4267363777070918, "grad_norm": 2.194712231664875, "learning_rate": 3.017254650416934e-06, "loss": 0.7553, "step": 35015 }, { "epoch": 0.42679731393123954, "grad_norm": 2.080682107944436, "learning_rate": 3.0169339320076975e-06, "loss": 0.7138, "step": 35020 }, { "epoch": 0.42685825015538736, "grad_norm": 2.4074239449994637, "learning_rate": 3.016613213598461e-06, "loss": 0.7737, "step": 35025 }, { "epoch": 0.4269191863795352, "grad_norm": 2.0443898812682413, "learning_rate": 3.016292495189224e-06, "loss": 0.743, "step": 35030 }, { "epoch": 0.426980122603683, "grad_norm": 2.760185088461051, "learning_rate": 3.0159717767799874e-06, "loss": 0.7634, "step": 35035 }, { "epoch": 0.4270410588278308, "grad_norm": 2.527510804130759, "learning_rate": 3.0156510583707505e-06, "loss": 0.8132, "step": 35040 }, { "epoch": 0.4271019950519786, "grad_norm": 2.168322344021777, "learning_rate": 3.0153303399615143e-06, "loss": 0.7616, "step": 35045 }, { "epoch": 0.4271629312761264, "grad_norm": 2.350155861278834, "learning_rate": 3.0150096215522773e-06, "loss": 0.7352, "step": 35050 }, { "epoch": 0.4272238675002742, "grad_norm": 2.6692889117319294, "learning_rate": 3.0146889031430404e-06, "loss": 0.8322, "step": 35055 }, { "epoch": 0.427284803724422, "grad_norm": 2.4068392023588046, "learning_rate": 3.0143681847338042e-06, "loss": 0.8348, "step": 35060 }, { "epoch": 0.42734573994856984, "grad_norm": 2.506332512306628, "learning_rate": 3.0140474663245672e-06, "loss": 0.7254, "step": 35065 }, { "epoch": 0.42740667617271766, "grad_norm": 2.643059393128576, "learning_rate": 3.0137267479153303e-06, "loss": 0.729, "step": 35070 }, { "epoch": 0.4274676123968654, "grad_norm": 2.6058333147039336, "learning_rate": 3.013406029506094e-06, "loss": 0.7759, "step": 35075 }, { "epoch": 0.42752854862101325, "grad_norm": 2.8510944442060446, "learning_rate": 3.013085311096857e-06, "loss": 0.7131, "step": 35080 }, { "epoch": 0.42758948484516107, "grad_norm": 2.3903209632031572, "learning_rate": 3.01276459268762e-06, "loss": 0.7573, "step": 35085 }, { "epoch": 0.42765042106930884, "grad_norm": 2.6691627742645943, "learning_rate": 3.012443874278384e-06, "loss": 0.7522, "step": 35090 }, { "epoch": 0.42771135729345666, "grad_norm": 2.808496564982025, "learning_rate": 3.012123155869147e-06, "loss": 0.8054, "step": 35095 }, { "epoch": 0.4277722935176045, "grad_norm": 3.288697175617667, "learning_rate": 3.0118024374599105e-06, "loss": 0.769, "step": 35100 }, { "epoch": 0.4278332297417523, "grad_norm": 2.173275821158442, "learning_rate": 3.011481719050674e-06, "loss": 0.7479, "step": 35105 }, { "epoch": 0.4278941659659001, "grad_norm": 2.7051057841209953, "learning_rate": 3.011161000641437e-06, "loss": 0.6882, "step": 35110 }, { "epoch": 0.4279551021900479, "grad_norm": 2.327319492145621, "learning_rate": 3.0108402822322004e-06, "loss": 0.7688, "step": 35115 }, { "epoch": 0.4280160384141957, "grad_norm": 2.2700013616087182, "learning_rate": 3.010519563822964e-06, "loss": 0.7768, "step": 35120 }, { "epoch": 0.4280769746383435, "grad_norm": 2.4580702140176656, "learning_rate": 3.0101988454137272e-06, "loss": 0.7271, "step": 35125 }, { "epoch": 0.4281379108624913, "grad_norm": 2.507427390281645, "learning_rate": 3.0098781270044903e-06, "loss": 0.6949, "step": 35130 }, { "epoch": 0.42819884708663913, "grad_norm": 2.3424864623924786, "learning_rate": 3.0095574085952533e-06, "loss": 0.8193, "step": 35135 }, { "epoch": 0.42825978331078696, "grad_norm": 2.2025160129850723, "learning_rate": 3.009236690186017e-06, "loss": 0.7533, "step": 35140 }, { "epoch": 0.4283207195349347, "grad_norm": 2.189008480929767, "learning_rate": 3.00891597177678e-06, "loss": 0.7469, "step": 35145 }, { "epoch": 0.42838165575908255, "grad_norm": 2.195963608694897, "learning_rate": 3.008595253367543e-06, "loss": 0.8091, "step": 35150 }, { "epoch": 0.42844259198323037, "grad_norm": 2.5047497389761575, "learning_rate": 3.008274534958307e-06, "loss": 0.7893, "step": 35155 }, { "epoch": 0.42850352820737814, "grad_norm": 2.717573810883544, "learning_rate": 3.00795381654907e-06, "loss": 0.8502, "step": 35160 }, { "epoch": 0.42856446443152596, "grad_norm": 3.4950945064110153, "learning_rate": 3.007633098139833e-06, "loss": 0.8012, "step": 35165 }, { "epoch": 0.4286254006556738, "grad_norm": 2.078476636327176, "learning_rate": 3.007312379730597e-06, "loss": 0.7457, "step": 35170 }, { "epoch": 0.4286863368798216, "grad_norm": 2.4075426846788193, "learning_rate": 3.00699166132136e-06, "loss": 0.8065, "step": 35175 }, { "epoch": 0.4287472731039694, "grad_norm": 2.1636387312176453, "learning_rate": 3.0066709429121234e-06, "loss": 0.6868, "step": 35180 }, { "epoch": 0.4288082093281172, "grad_norm": 2.877599991949179, "learning_rate": 3.006350224502887e-06, "loss": 0.8063, "step": 35185 }, { "epoch": 0.428869145552265, "grad_norm": 2.4569915801303797, "learning_rate": 3.0060295060936503e-06, "loss": 0.7515, "step": 35190 }, { "epoch": 0.4289300817764128, "grad_norm": 2.1285525344098573, "learning_rate": 3.0057087876844133e-06, "loss": 0.7683, "step": 35195 }, { "epoch": 0.4289910180005606, "grad_norm": 2.453465170004667, "learning_rate": 3.0053880692751767e-06, "loss": 0.7877, "step": 35200 }, { "epoch": 0.42905195422470843, "grad_norm": 2.4331125765925576, "learning_rate": 3.00506735086594e-06, "loss": 0.7204, "step": 35205 }, { "epoch": 0.42911289044885625, "grad_norm": 2.5551017368190867, "learning_rate": 3.004746632456703e-06, "loss": 0.7798, "step": 35210 }, { "epoch": 0.429173826673004, "grad_norm": 2.3753402390071368, "learning_rate": 3.004425914047466e-06, "loss": 0.7389, "step": 35215 }, { "epoch": 0.42923476289715184, "grad_norm": 2.6697397394536715, "learning_rate": 3.00410519563823e-06, "loss": 0.8356, "step": 35220 }, { "epoch": 0.42929569912129967, "grad_norm": 2.4858827462612587, "learning_rate": 3.003784477228993e-06, "loss": 0.8142, "step": 35225 }, { "epoch": 0.42935663534544743, "grad_norm": 2.5670657164842487, "learning_rate": 3.003463758819756e-06, "loss": 0.7375, "step": 35230 }, { "epoch": 0.42941757156959526, "grad_norm": 2.657442061200519, "learning_rate": 3.00314304041052e-06, "loss": 0.7809, "step": 35235 }, { "epoch": 0.4294785077937431, "grad_norm": 2.23963862595756, "learning_rate": 3.002822322001283e-06, "loss": 0.7844, "step": 35240 }, { "epoch": 0.42953944401789085, "grad_norm": 2.8345528957084505, "learning_rate": 3.0025016035920464e-06, "loss": 0.7611, "step": 35245 }, { "epoch": 0.42960038024203867, "grad_norm": 2.365413730154921, "learning_rate": 3.00218088518281e-06, "loss": 0.7495, "step": 35250 }, { "epoch": 0.4296613164661865, "grad_norm": 2.677077351977453, "learning_rate": 3.001860166773573e-06, "loss": 0.6801, "step": 35255 }, { "epoch": 0.4297222526903343, "grad_norm": 2.7721936173612756, "learning_rate": 3.0015394483643363e-06, "loss": 0.8616, "step": 35260 }, { "epoch": 0.4297831889144821, "grad_norm": 2.1128037053415905, "learning_rate": 3.0012187299550998e-06, "loss": 0.7926, "step": 35265 }, { "epoch": 0.4298441251386299, "grad_norm": 2.3005155096555217, "learning_rate": 3.000898011545863e-06, "loss": 0.7421, "step": 35270 }, { "epoch": 0.42990506136277773, "grad_norm": 2.311778449204282, "learning_rate": 3.0005772931366262e-06, "loss": 0.7511, "step": 35275 }, { "epoch": 0.4299659975869255, "grad_norm": 2.901642801745642, "learning_rate": 3.0002565747273897e-06, "loss": 0.8113, "step": 35280 }, { "epoch": 0.4300269338110733, "grad_norm": 2.4506336170798857, "learning_rate": 2.999935856318153e-06, "loss": 0.7627, "step": 35285 }, { "epoch": 0.43008787003522114, "grad_norm": 2.4811625835082705, "learning_rate": 2.999615137908916e-06, "loss": 0.7378, "step": 35290 }, { "epoch": 0.43014880625936897, "grad_norm": 4.100142209452132, "learning_rate": 2.999294419499679e-06, "loss": 0.6991, "step": 35295 }, { "epoch": 0.43020974248351673, "grad_norm": 2.0737032818228545, "learning_rate": 2.998973701090443e-06, "loss": 0.7886, "step": 35300 }, { "epoch": 0.43027067870766456, "grad_norm": 2.181026775369709, "learning_rate": 2.998652982681206e-06, "loss": 0.7364, "step": 35305 }, { "epoch": 0.4303316149318124, "grad_norm": 2.1614513087438896, "learning_rate": 2.998332264271969e-06, "loss": 0.7571, "step": 35310 }, { "epoch": 0.43039255115596015, "grad_norm": 2.273886762422338, "learning_rate": 2.998011545862733e-06, "loss": 0.7891, "step": 35315 }, { "epoch": 0.43045348738010797, "grad_norm": 2.6422654201104545, "learning_rate": 2.997690827453496e-06, "loss": 0.709, "step": 35320 }, { "epoch": 0.4305144236042558, "grad_norm": 2.3746159094170545, "learning_rate": 2.9973701090442594e-06, "loss": 0.7154, "step": 35325 }, { "epoch": 0.4305753598284036, "grad_norm": 2.7150912891666925, "learning_rate": 2.997049390635023e-06, "loss": 0.813, "step": 35330 }, { "epoch": 0.4306362960525514, "grad_norm": 3.0198292222409275, "learning_rate": 2.996728672225786e-06, "loss": 0.7973, "step": 35335 }, { "epoch": 0.4306972322766992, "grad_norm": 2.296215182458862, "learning_rate": 2.9964079538165493e-06, "loss": 0.7215, "step": 35340 }, { "epoch": 0.430758168500847, "grad_norm": 2.7080315203300684, "learning_rate": 2.9960872354073127e-06, "loss": 0.7826, "step": 35345 }, { "epoch": 0.4308191047249948, "grad_norm": 2.1917048042428986, "learning_rate": 2.995766516998076e-06, "loss": 0.7539, "step": 35350 }, { "epoch": 0.4308800409491426, "grad_norm": 3.0941153245725683, "learning_rate": 2.995445798588839e-06, "loss": 0.8009, "step": 35355 }, { "epoch": 0.43094097717329044, "grad_norm": 2.5941972574231467, "learning_rate": 2.9951250801796026e-06, "loss": 0.6866, "step": 35360 }, { "epoch": 0.43100191339743826, "grad_norm": 2.635929611727766, "learning_rate": 2.994804361770366e-06, "loss": 0.8207, "step": 35365 }, { "epoch": 0.43106284962158603, "grad_norm": 2.165806903409209, "learning_rate": 2.994483643361129e-06, "loss": 0.7208, "step": 35370 }, { "epoch": 0.43112378584573385, "grad_norm": 2.4890974753721116, "learning_rate": 2.994162924951893e-06, "loss": 0.7136, "step": 35375 }, { "epoch": 0.4311847220698817, "grad_norm": 2.7325292261786593, "learning_rate": 2.993842206542656e-06, "loss": 0.8116, "step": 35380 }, { "epoch": 0.43124565829402944, "grad_norm": 2.2268699936832372, "learning_rate": 2.993521488133419e-06, "loss": 0.8085, "step": 35385 }, { "epoch": 0.43130659451817727, "grad_norm": 2.628990485899256, "learning_rate": 2.993200769724182e-06, "loss": 0.8171, "step": 35390 }, { "epoch": 0.4313675307423251, "grad_norm": 2.563327301975994, "learning_rate": 2.992880051314946e-06, "loss": 0.8088, "step": 35395 }, { "epoch": 0.4314284669664729, "grad_norm": 2.9145573835862026, "learning_rate": 2.992559332905709e-06, "loss": 0.7726, "step": 35400 }, { "epoch": 0.4314894031906207, "grad_norm": 4.37842516242551, "learning_rate": 2.9922386144964723e-06, "loss": 0.7453, "step": 35405 }, { "epoch": 0.4315503394147685, "grad_norm": 2.6474785809906756, "learning_rate": 2.9919178960872357e-06, "loss": 0.7482, "step": 35410 }, { "epoch": 0.4316112756389163, "grad_norm": 2.7405858393292295, "learning_rate": 2.991597177677999e-06, "loss": 0.8374, "step": 35415 }, { "epoch": 0.4316722118630641, "grad_norm": 2.4206851258588684, "learning_rate": 2.991276459268762e-06, "loss": 0.7556, "step": 35420 }, { "epoch": 0.4317331480872119, "grad_norm": 2.48076813773824, "learning_rate": 2.9909557408595256e-06, "loss": 0.8049, "step": 35425 }, { "epoch": 0.43179408431135974, "grad_norm": 3.0675129759717152, "learning_rate": 2.990635022450289e-06, "loss": 0.7835, "step": 35430 }, { "epoch": 0.43185502053550756, "grad_norm": 2.6667338396994076, "learning_rate": 2.990314304041052e-06, "loss": 0.7897, "step": 35435 }, { "epoch": 0.43191595675965533, "grad_norm": 2.4798802620436766, "learning_rate": 2.989993585631816e-06, "loss": 0.769, "step": 35440 }, { "epoch": 0.43197689298380315, "grad_norm": 2.675850083753533, "learning_rate": 2.989672867222579e-06, "loss": 0.7684, "step": 35445 }, { "epoch": 0.432037829207951, "grad_norm": 2.4225091909823275, "learning_rate": 2.989352148813342e-06, "loss": 0.7705, "step": 35450 }, { "epoch": 0.43209876543209874, "grad_norm": 2.5892882258500305, "learning_rate": 2.989031430404106e-06, "loss": 0.7633, "step": 35455 }, { "epoch": 0.43215970165624656, "grad_norm": 2.2286200764270823, "learning_rate": 2.988710711994869e-06, "loss": 0.7571, "step": 35460 }, { "epoch": 0.4322206378803944, "grad_norm": 4.494489208623878, "learning_rate": 2.988389993585632e-06, "loss": 0.7479, "step": 35465 }, { "epoch": 0.4322815741045422, "grad_norm": 2.4902422436089546, "learning_rate": 2.9880692751763953e-06, "loss": 0.8305, "step": 35470 }, { "epoch": 0.43234251032869, "grad_norm": 2.7614684358664636, "learning_rate": 2.9877485567671588e-06, "loss": 0.8048, "step": 35475 }, { "epoch": 0.4324034465528378, "grad_norm": 2.9509764826445632, "learning_rate": 2.9874278383579218e-06, "loss": 0.7389, "step": 35480 }, { "epoch": 0.4324643827769856, "grad_norm": 1.9426223854572984, "learning_rate": 2.987107119948685e-06, "loss": 0.7497, "step": 35485 }, { "epoch": 0.4325253190011334, "grad_norm": 3.0358630068624866, "learning_rate": 2.9867864015394486e-06, "loss": 0.7917, "step": 35490 }, { "epoch": 0.4325862552252812, "grad_norm": 2.2451775833783865, "learning_rate": 2.986465683130212e-06, "loss": 0.7471, "step": 35495 }, { "epoch": 0.43264719144942904, "grad_norm": 2.1205434983794578, "learning_rate": 2.986144964720975e-06, "loss": 0.717, "step": 35500 }, { "epoch": 0.43270812767357686, "grad_norm": 6.558706945235252, "learning_rate": 2.9858242463117385e-06, "loss": 0.8118, "step": 35505 }, { "epoch": 0.4327690638977246, "grad_norm": 2.237026149179465, "learning_rate": 2.985503527902502e-06, "loss": 0.7941, "step": 35510 }, { "epoch": 0.43283000012187245, "grad_norm": 2.145276580211159, "learning_rate": 2.985182809493265e-06, "loss": 0.6643, "step": 35515 }, { "epoch": 0.43289093634602027, "grad_norm": 2.2535616432973895, "learning_rate": 2.984862091084029e-06, "loss": 0.7901, "step": 35520 }, { "epoch": 0.43295187257016804, "grad_norm": 2.154273123984323, "learning_rate": 2.984541372674792e-06, "loss": 0.7834, "step": 35525 }, { "epoch": 0.43301280879431586, "grad_norm": 2.753757764976065, "learning_rate": 2.984220654265555e-06, "loss": 0.7871, "step": 35530 }, { "epoch": 0.4330737450184637, "grad_norm": 2.4977761198851613, "learning_rate": 2.9838999358563188e-06, "loss": 0.7719, "step": 35535 }, { "epoch": 0.4331346812426115, "grad_norm": 2.2046633262919335, "learning_rate": 2.9835792174470818e-06, "loss": 0.799, "step": 35540 }, { "epoch": 0.4331956174667593, "grad_norm": 2.5004308829726054, "learning_rate": 2.983258499037845e-06, "loss": 0.7157, "step": 35545 }, { "epoch": 0.4332565536909071, "grad_norm": 2.5759152932666183, "learning_rate": 2.9829377806286082e-06, "loss": 0.8013, "step": 35550 }, { "epoch": 0.4333174899150549, "grad_norm": 2.670974176075907, "learning_rate": 2.9826170622193717e-06, "loss": 0.716, "step": 35555 }, { "epoch": 0.4333784261392027, "grad_norm": 2.3029509077609767, "learning_rate": 2.9822963438101347e-06, "loss": 0.7623, "step": 35560 }, { "epoch": 0.4334393623633505, "grad_norm": 2.2781258129733146, "learning_rate": 2.981975625400898e-06, "loss": 0.7496, "step": 35565 }, { "epoch": 0.43350029858749833, "grad_norm": 2.263217505348666, "learning_rate": 2.9816549069916616e-06, "loss": 0.7391, "step": 35570 }, { "epoch": 0.43356123481164616, "grad_norm": 2.898631638753049, "learning_rate": 2.981334188582425e-06, "loss": 0.7536, "step": 35575 }, { "epoch": 0.4336221710357939, "grad_norm": 2.6085034322033684, "learning_rate": 2.981013470173188e-06, "loss": 0.7913, "step": 35580 }, { "epoch": 0.43368310725994175, "grad_norm": 2.3632627519558085, "learning_rate": 2.9806927517639515e-06, "loss": 0.7964, "step": 35585 }, { "epoch": 0.43374404348408957, "grad_norm": 2.349578057087949, "learning_rate": 2.980372033354715e-06, "loss": 0.7653, "step": 35590 }, { "epoch": 0.43380497970823734, "grad_norm": 2.6416011465459937, "learning_rate": 2.980051314945478e-06, "loss": 0.731, "step": 35595 }, { "epoch": 0.43386591593238516, "grad_norm": 3.245153258654391, "learning_rate": 2.979730596536242e-06, "loss": 0.754, "step": 35600 }, { "epoch": 0.433926852156533, "grad_norm": 2.368277869980745, "learning_rate": 2.979409878127005e-06, "loss": 0.7578, "step": 35605 }, { "epoch": 0.4339877883806808, "grad_norm": 3.026709595248795, "learning_rate": 2.979089159717768e-06, "loss": 0.8209, "step": 35610 }, { "epoch": 0.4340487246048286, "grad_norm": 2.7255578859833447, "learning_rate": 2.9787684413085317e-06, "loss": 0.7446, "step": 35615 }, { "epoch": 0.4341096608289764, "grad_norm": 2.81759413604665, "learning_rate": 2.9784477228992947e-06, "loss": 0.7868, "step": 35620 }, { "epoch": 0.4341705970531242, "grad_norm": 2.2616774126403203, "learning_rate": 2.9781270044900577e-06, "loss": 0.706, "step": 35625 }, { "epoch": 0.434231533277272, "grad_norm": 2.4431451767506736, "learning_rate": 2.977806286080821e-06, "loss": 0.7203, "step": 35630 }, { "epoch": 0.4342924695014198, "grad_norm": 2.7010566458501035, "learning_rate": 2.9774855676715846e-06, "loss": 0.7351, "step": 35635 }, { "epoch": 0.43435340572556763, "grad_norm": 2.4391257112435407, "learning_rate": 2.9771648492623476e-06, "loss": 0.7029, "step": 35640 }, { "epoch": 0.43441434194971545, "grad_norm": 2.238787172374872, "learning_rate": 2.976844130853111e-06, "loss": 0.8413, "step": 35645 }, { "epoch": 0.4344752781738632, "grad_norm": 2.467744629636879, "learning_rate": 2.9765234124438745e-06, "loss": 0.7882, "step": 35650 }, { "epoch": 0.43453621439801104, "grad_norm": 3.1878636878339726, "learning_rate": 2.976202694034638e-06, "loss": 0.7073, "step": 35655 }, { "epoch": 0.43459715062215887, "grad_norm": 2.2240780126496777, "learning_rate": 2.975881975625401e-06, "loss": 0.8037, "step": 35660 }, { "epoch": 0.43465808684630663, "grad_norm": 2.3423382783636257, "learning_rate": 2.975561257216165e-06, "loss": 0.7034, "step": 35665 }, { "epoch": 0.43471902307045446, "grad_norm": 2.1462911936130764, "learning_rate": 2.975240538806928e-06, "loss": 0.7842, "step": 35670 }, { "epoch": 0.4347799592946023, "grad_norm": 2.730431177009444, "learning_rate": 2.974919820397691e-06, "loss": 0.7482, "step": 35675 }, { "epoch": 0.4348408955187501, "grad_norm": 2.814461101380631, "learning_rate": 2.9745991019884547e-06, "loss": 0.7465, "step": 35680 }, { "epoch": 0.43490183174289787, "grad_norm": 2.5915121772290606, "learning_rate": 2.9742783835792177e-06, "loss": 0.741, "step": 35685 }, { "epoch": 0.4349627679670457, "grad_norm": 2.241704064016241, "learning_rate": 2.9739576651699808e-06, "loss": 0.7982, "step": 35690 }, { "epoch": 0.4350237041911935, "grad_norm": 2.9957440838032867, "learning_rate": 2.9736369467607446e-06, "loss": 0.7665, "step": 35695 }, { "epoch": 0.4350846404153413, "grad_norm": 2.256547248654534, "learning_rate": 2.9733162283515076e-06, "loss": 0.7287, "step": 35700 }, { "epoch": 0.4351455766394891, "grad_norm": 2.5827172103780365, "learning_rate": 2.9729955099422707e-06, "loss": 0.8258, "step": 35705 }, { "epoch": 0.43520651286363693, "grad_norm": 1.994603989440131, "learning_rate": 2.9726747915330345e-06, "loss": 0.7104, "step": 35710 }, { "epoch": 0.4352674490877847, "grad_norm": 2.981142640842294, "learning_rate": 2.9723540731237975e-06, "loss": 0.7381, "step": 35715 }, { "epoch": 0.4353283853119325, "grad_norm": 2.239400132158426, "learning_rate": 2.972033354714561e-06, "loss": 0.7756, "step": 35720 }, { "epoch": 0.43538932153608034, "grad_norm": 2.8641235765019344, "learning_rate": 2.971712636305324e-06, "loss": 0.7728, "step": 35725 }, { "epoch": 0.43545025776022817, "grad_norm": 2.432870595909461, "learning_rate": 2.9713919178960874e-06, "loss": 0.7243, "step": 35730 }, { "epoch": 0.43551119398437593, "grad_norm": 2.3958883413411614, "learning_rate": 2.971071199486851e-06, "loss": 0.7462, "step": 35735 }, { "epoch": 0.43557213020852376, "grad_norm": 2.2029872059301794, "learning_rate": 2.970750481077614e-06, "loss": 0.7561, "step": 35740 }, { "epoch": 0.4356330664326716, "grad_norm": 2.1916081977415307, "learning_rate": 2.9704297626683778e-06, "loss": 0.7046, "step": 35745 }, { "epoch": 0.43569400265681935, "grad_norm": 2.8357674212099018, "learning_rate": 2.9701090442591408e-06, "loss": 0.6631, "step": 35750 }, { "epoch": 0.43575493888096717, "grad_norm": 2.7404447561565775, "learning_rate": 2.9697883258499038e-06, "loss": 0.7314, "step": 35755 }, { "epoch": 0.435815875105115, "grad_norm": 2.2148811330801497, "learning_rate": 2.9694676074406676e-06, "loss": 0.8048, "step": 35760 }, { "epoch": 0.4358768113292628, "grad_norm": 2.8673580465939494, "learning_rate": 2.9691468890314307e-06, "loss": 0.8646, "step": 35765 }, { "epoch": 0.4359377475534106, "grad_norm": 2.0751222936814893, "learning_rate": 2.9688261706221937e-06, "loss": 0.7402, "step": 35770 }, { "epoch": 0.4359986837775584, "grad_norm": 2.3601240921921343, "learning_rate": 2.9685054522129575e-06, "loss": 0.7937, "step": 35775 }, { "epoch": 0.4360596200017062, "grad_norm": 2.1037026141887627, "learning_rate": 2.9681847338037206e-06, "loss": 0.8113, "step": 35780 }, { "epoch": 0.436120556225854, "grad_norm": 2.3267965233451906, "learning_rate": 2.9678640153944836e-06, "loss": 0.7881, "step": 35785 }, { "epoch": 0.4361814924500018, "grad_norm": 2.432927353213262, "learning_rate": 2.9675432969852474e-06, "loss": 0.7661, "step": 35790 }, { "epoch": 0.43624242867414964, "grad_norm": 2.105860932528597, "learning_rate": 2.9672225785760105e-06, "loss": 0.7497, "step": 35795 }, { "epoch": 0.43630336489829746, "grad_norm": 2.97955437650987, "learning_rate": 2.966901860166774e-06, "loss": 0.7671, "step": 35800 }, { "epoch": 0.43636430112244523, "grad_norm": 2.605353431630693, "learning_rate": 2.966581141757537e-06, "loss": 0.7156, "step": 35805 }, { "epoch": 0.43642523734659305, "grad_norm": 2.5796021669969584, "learning_rate": 2.9662604233483004e-06, "loss": 0.717, "step": 35810 }, { "epoch": 0.4364861735707409, "grad_norm": 2.84074518144068, "learning_rate": 2.965939704939064e-06, "loss": 0.7924, "step": 35815 }, { "epoch": 0.43654710979488864, "grad_norm": 2.30759609842166, "learning_rate": 2.965618986529827e-06, "loss": 0.7793, "step": 35820 }, { "epoch": 0.43660804601903647, "grad_norm": 2.3626006395591306, "learning_rate": 2.9652982681205907e-06, "loss": 0.7108, "step": 35825 }, { "epoch": 0.4366689822431843, "grad_norm": 2.3831835442342757, "learning_rate": 2.9649775497113537e-06, "loss": 0.7607, "step": 35830 }, { "epoch": 0.4367299184673321, "grad_norm": 2.344521072284577, "learning_rate": 2.9646568313021167e-06, "loss": 0.7957, "step": 35835 }, { "epoch": 0.4367908546914799, "grad_norm": 2.9894899544764844, "learning_rate": 2.9643361128928806e-06, "loss": 0.8713, "step": 35840 }, { "epoch": 0.4368517909156277, "grad_norm": 2.1415656977873216, "learning_rate": 2.9640153944836436e-06, "loss": 0.7113, "step": 35845 }, { "epoch": 0.4369127271397755, "grad_norm": 2.2595883119870996, "learning_rate": 2.9636946760744066e-06, "loss": 0.7817, "step": 35850 }, { "epoch": 0.4369736633639233, "grad_norm": 4.175255380350198, "learning_rate": 2.9633739576651705e-06, "loss": 0.7747, "step": 35855 }, { "epoch": 0.4370345995880711, "grad_norm": 3.064270472217657, "learning_rate": 2.9630532392559335e-06, "loss": 0.8189, "step": 35860 }, { "epoch": 0.43709553581221894, "grad_norm": 2.5628581006142626, "learning_rate": 2.9627325208466965e-06, "loss": 0.7664, "step": 35865 }, { "epoch": 0.43715647203636676, "grad_norm": 2.118910990539273, "learning_rate": 2.9624118024374604e-06, "loss": 0.823, "step": 35870 }, { "epoch": 0.43721740826051453, "grad_norm": 2.2044221186940822, "learning_rate": 2.9620910840282234e-06, "loss": 0.7101, "step": 35875 }, { "epoch": 0.43727834448466235, "grad_norm": 3.2621389576241975, "learning_rate": 2.961770365618987e-06, "loss": 0.7379, "step": 35880 }, { "epoch": 0.4373392807088102, "grad_norm": 2.5147534648583374, "learning_rate": 2.96144964720975e-06, "loss": 0.7649, "step": 35885 }, { "epoch": 0.43740021693295794, "grad_norm": 1.890213852509887, "learning_rate": 2.9611289288005137e-06, "loss": 0.6851, "step": 35890 }, { "epoch": 0.43746115315710576, "grad_norm": 2.574588718306096, "learning_rate": 2.9608082103912767e-06, "loss": 0.7993, "step": 35895 }, { "epoch": 0.4375220893812536, "grad_norm": 2.995404357564465, "learning_rate": 2.9604874919820397e-06, "loss": 0.7438, "step": 35900 }, { "epoch": 0.4375830256054014, "grad_norm": 2.4452256691644707, "learning_rate": 2.9601667735728036e-06, "loss": 0.6957, "step": 35905 }, { "epoch": 0.4376439618295492, "grad_norm": 2.2294170932639514, "learning_rate": 2.9598460551635666e-06, "loss": 0.7206, "step": 35910 }, { "epoch": 0.437704898053697, "grad_norm": 2.142031330956814, "learning_rate": 2.9595253367543296e-06, "loss": 0.7395, "step": 35915 }, { "epoch": 0.4377658342778448, "grad_norm": 2.548820031677011, "learning_rate": 2.9592046183450935e-06, "loss": 0.7537, "step": 35920 }, { "epoch": 0.4378267705019926, "grad_norm": 2.8676789562514955, "learning_rate": 2.9588838999358565e-06, "loss": 0.8089, "step": 35925 }, { "epoch": 0.4378877067261404, "grad_norm": 2.7772766801655506, "learning_rate": 2.9585631815266195e-06, "loss": 0.7585, "step": 35930 }, { "epoch": 0.43794864295028824, "grad_norm": 2.584697204548255, "learning_rate": 2.9582424631173834e-06, "loss": 0.7712, "step": 35935 }, { "epoch": 0.43800957917443606, "grad_norm": 2.353540770435901, "learning_rate": 2.9579217447081464e-06, "loss": 0.7019, "step": 35940 }, { "epoch": 0.4380705153985838, "grad_norm": 2.174748996567416, "learning_rate": 2.95760102629891e-06, "loss": 0.6749, "step": 35945 }, { "epoch": 0.43813145162273165, "grad_norm": 3.3986243860671106, "learning_rate": 2.9572803078896733e-06, "loss": 0.8135, "step": 35950 }, { "epoch": 0.43819238784687947, "grad_norm": 2.2274646565536105, "learning_rate": 2.9569595894804363e-06, "loss": 0.7758, "step": 35955 }, { "epoch": 0.43825332407102724, "grad_norm": 2.660441047450026, "learning_rate": 2.9566388710711998e-06, "loss": 0.8027, "step": 35960 }, { "epoch": 0.43831426029517506, "grad_norm": 2.241327974124511, "learning_rate": 2.956318152661963e-06, "loss": 0.7819, "step": 35965 }, { "epoch": 0.4383751965193229, "grad_norm": 2.463301053449937, "learning_rate": 2.9559974342527266e-06, "loss": 0.7056, "step": 35970 }, { "epoch": 0.4384361327434707, "grad_norm": 2.279069173627898, "learning_rate": 2.9556767158434897e-06, "loss": 0.7606, "step": 35975 }, { "epoch": 0.4384970689676185, "grad_norm": 2.7676502109393954, "learning_rate": 2.9553559974342527e-06, "loss": 0.7107, "step": 35980 }, { "epoch": 0.4385580051917663, "grad_norm": 2.6009916255921075, "learning_rate": 2.9550352790250165e-06, "loss": 0.7834, "step": 35985 }, { "epoch": 0.4386189414159141, "grad_norm": 2.4967104224248833, "learning_rate": 2.9547145606157795e-06, "loss": 0.7034, "step": 35990 }, { "epoch": 0.4386798776400619, "grad_norm": 2.6714509214192277, "learning_rate": 2.9543938422065426e-06, "loss": 0.7437, "step": 35995 }, { "epoch": 0.4387408138642097, "grad_norm": 2.5378736903432806, "learning_rate": 2.9540731237973064e-06, "loss": 0.7586, "step": 36000 }, { "epoch": 0.43880175008835753, "grad_norm": 2.3205389296416037, "learning_rate": 2.9537524053880694e-06, "loss": 0.7334, "step": 36005 }, { "epoch": 0.43886268631250536, "grad_norm": 2.130339131110607, "learning_rate": 2.9534316869788325e-06, "loss": 0.6819, "step": 36010 }, { "epoch": 0.4389236225366531, "grad_norm": 2.655945761170499, "learning_rate": 2.9531109685695963e-06, "loss": 0.7967, "step": 36015 }, { "epoch": 0.43898455876080095, "grad_norm": 2.4311004989545912, "learning_rate": 2.9527902501603593e-06, "loss": 0.7499, "step": 36020 }, { "epoch": 0.43904549498494877, "grad_norm": 2.807240107928676, "learning_rate": 2.9524695317511228e-06, "loss": 0.7978, "step": 36025 }, { "epoch": 0.43910643120909654, "grad_norm": 2.270220019432378, "learning_rate": 2.9521488133418862e-06, "loss": 0.7157, "step": 36030 }, { "epoch": 0.43916736743324436, "grad_norm": 2.093865204603837, "learning_rate": 2.9518280949326492e-06, "loss": 0.7311, "step": 36035 }, { "epoch": 0.4392283036573922, "grad_norm": 2.789084786694077, "learning_rate": 2.9515073765234127e-06, "loss": 0.7876, "step": 36040 }, { "epoch": 0.43928923988154, "grad_norm": 2.390771152351896, "learning_rate": 2.951186658114176e-06, "loss": 0.7696, "step": 36045 }, { "epoch": 0.4393501761056878, "grad_norm": 3.00664280871745, "learning_rate": 2.9508659397049396e-06, "loss": 0.7755, "step": 36050 }, { "epoch": 0.4394111123298356, "grad_norm": 3.884434401489165, "learning_rate": 2.9505452212957026e-06, "loss": 0.7991, "step": 36055 }, { "epoch": 0.4394720485539834, "grad_norm": 2.1260290572002063, "learning_rate": 2.9502245028864656e-06, "loss": 0.7756, "step": 36060 }, { "epoch": 0.4395329847781312, "grad_norm": 3.033842178214631, "learning_rate": 2.9499037844772295e-06, "loss": 0.7389, "step": 36065 }, { "epoch": 0.439593921002279, "grad_norm": 2.319874340921273, "learning_rate": 2.9495830660679925e-06, "loss": 0.7012, "step": 36070 }, { "epoch": 0.43965485722642683, "grad_norm": 3.497568806799107, "learning_rate": 2.9492623476587555e-06, "loss": 0.7079, "step": 36075 }, { "epoch": 0.43971579345057465, "grad_norm": 2.5873275177229513, "learning_rate": 2.9489416292495194e-06, "loss": 0.7879, "step": 36080 }, { "epoch": 0.4397767296747224, "grad_norm": 2.575702964290286, "learning_rate": 2.9486209108402824e-06, "loss": 0.7783, "step": 36085 }, { "epoch": 0.43983766589887024, "grad_norm": 2.206662277151513, "learning_rate": 2.9483001924310454e-06, "loss": 0.7382, "step": 36090 }, { "epoch": 0.43989860212301807, "grad_norm": 2.3500642894854797, "learning_rate": 2.9479794740218093e-06, "loss": 0.708, "step": 36095 }, { "epoch": 0.43995953834716583, "grad_norm": 2.3484621393105374, "learning_rate": 2.9476587556125723e-06, "loss": 0.8431, "step": 36100 }, { "epoch": 0.44002047457131366, "grad_norm": 2.5493891174357044, "learning_rate": 2.9473380372033357e-06, "loss": 0.7601, "step": 36105 }, { "epoch": 0.4400814107954615, "grad_norm": 3.9678643237129143, "learning_rate": 2.947017318794099e-06, "loss": 0.7574, "step": 36110 }, { "epoch": 0.4401423470196093, "grad_norm": 3.2365037147715827, "learning_rate": 2.9466966003848626e-06, "loss": 0.7634, "step": 36115 }, { "epoch": 0.44020328324375707, "grad_norm": 2.3484560924903133, "learning_rate": 2.9463758819756256e-06, "loss": 0.7764, "step": 36120 }, { "epoch": 0.4402642194679049, "grad_norm": 3.1201512115639356, "learning_rate": 2.946055163566389e-06, "loss": 0.7478, "step": 36125 }, { "epoch": 0.4403251556920527, "grad_norm": 2.2421046616279585, "learning_rate": 2.9457344451571525e-06, "loss": 0.706, "step": 36130 }, { "epoch": 0.4403860919162005, "grad_norm": 2.523511874520461, "learning_rate": 2.9454137267479155e-06, "loss": 0.7857, "step": 36135 }, { "epoch": 0.4404470281403483, "grad_norm": 2.5837727370949106, "learning_rate": 2.9450930083386785e-06, "loss": 0.6731, "step": 36140 }, { "epoch": 0.44050796436449613, "grad_norm": 2.5865861205415657, "learning_rate": 2.9447722899294424e-06, "loss": 0.7374, "step": 36145 }, { "epoch": 0.44056890058864395, "grad_norm": 2.454069659677281, "learning_rate": 2.9444515715202054e-06, "loss": 0.7398, "step": 36150 }, { "epoch": 0.4406298368127917, "grad_norm": 3.7787804608070448, "learning_rate": 2.9441308531109684e-06, "loss": 0.752, "step": 36155 }, { "epoch": 0.44069077303693954, "grad_norm": 2.470529725422466, "learning_rate": 2.9438101347017323e-06, "loss": 0.7486, "step": 36160 }, { "epoch": 0.44075170926108737, "grad_norm": 2.5597238937190085, "learning_rate": 2.9434894162924953e-06, "loss": 0.7891, "step": 36165 }, { "epoch": 0.44081264548523513, "grad_norm": 1.9942834465879713, "learning_rate": 2.9431686978832587e-06, "loss": 0.7143, "step": 36170 }, { "epoch": 0.44087358170938296, "grad_norm": 2.669068615776362, "learning_rate": 2.942847979474022e-06, "loss": 0.7188, "step": 36175 }, { "epoch": 0.4409345179335308, "grad_norm": 2.07689000335771, "learning_rate": 2.942527261064785e-06, "loss": 0.7496, "step": 36180 }, { "epoch": 0.4409954541576786, "grad_norm": 2.7992509684445985, "learning_rate": 2.9422065426555486e-06, "loss": 0.7523, "step": 36185 }, { "epoch": 0.44105639038182637, "grad_norm": 2.275943505823055, "learning_rate": 2.941885824246312e-06, "loss": 0.7915, "step": 36190 }, { "epoch": 0.4411173266059742, "grad_norm": 2.2605906785854177, "learning_rate": 2.9415651058370755e-06, "loss": 0.7996, "step": 36195 }, { "epoch": 0.441178262830122, "grad_norm": 2.2444122206108936, "learning_rate": 2.9412443874278385e-06, "loss": 0.6923, "step": 36200 }, { "epoch": 0.4412391990542698, "grad_norm": 2.4592905789679764, "learning_rate": 2.940923669018602e-06, "loss": 0.7874, "step": 36205 }, { "epoch": 0.4413001352784176, "grad_norm": 2.060768254055436, "learning_rate": 2.9406029506093654e-06, "loss": 0.7848, "step": 36210 }, { "epoch": 0.4413610715025654, "grad_norm": 2.6608598698696735, "learning_rate": 2.9402822322001284e-06, "loss": 0.702, "step": 36215 }, { "epoch": 0.4414220077267132, "grad_norm": 2.3840781905364525, "learning_rate": 2.9399615137908915e-06, "loss": 0.7818, "step": 36220 }, { "epoch": 0.441482943950861, "grad_norm": 2.634322815588872, "learning_rate": 2.9396407953816553e-06, "loss": 0.6954, "step": 36225 }, { "epoch": 0.44154388017500884, "grad_norm": 2.473552120708939, "learning_rate": 2.9393200769724183e-06, "loss": 0.7228, "step": 36230 }, { "epoch": 0.44160481639915666, "grad_norm": 4.499243553132115, "learning_rate": 2.9389993585631813e-06, "loss": 0.8132, "step": 36235 }, { "epoch": 0.44166575262330443, "grad_norm": 2.835959169093794, "learning_rate": 2.938678640153945e-06, "loss": 0.7438, "step": 36240 }, { "epoch": 0.44172668884745225, "grad_norm": 2.4772081735556832, "learning_rate": 2.9383579217447082e-06, "loss": 0.7462, "step": 36245 }, { "epoch": 0.4417876250716001, "grad_norm": 3.018074180003957, "learning_rate": 2.9380372033354717e-06, "loss": 0.7391, "step": 36250 }, { "epoch": 0.44184856129574784, "grad_norm": 2.2675147552242625, "learning_rate": 2.937716484926235e-06, "loss": 0.7734, "step": 36255 }, { "epoch": 0.44190949751989567, "grad_norm": 2.360694836181198, "learning_rate": 2.937395766516998e-06, "loss": 0.7745, "step": 36260 }, { "epoch": 0.4419704337440435, "grad_norm": 2.648707516702545, "learning_rate": 2.9370750481077616e-06, "loss": 0.7391, "step": 36265 }, { "epoch": 0.4420313699681913, "grad_norm": 2.2549555438405298, "learning_rate": 2.936754329698525e-06, "loss": 0.817, "step": 36270 }, { "epoch": 0.4420923061923391, "grad_norm": 2.634859835394775, "learning_rate": 2.9364336112892884e-06, "loss": 0.6863, "step": 36275 }, { "epoch": 0.4421532424164869, "grad_norm": 2.247062388421181, "learning_rate": 2.9361128928800515e-06, "loss": 0.7936, "step": 36280 }, { "epoch": 0.4422141786406347, "grad_norm": 2.906479798815005, "learning_rate": 2.935792174470815e-06, "loss": 0.7374, "step": 36285 }, { "epoch": 0.4422751148647825, "grad_norm": 2.292408887509882, "learning_rate": 2.9354714560615783e-06, "loss": 0.7579, "step": 36290 }, { "epoch": 0.4423360510889303, "grad_norm": 2.65372898046615, "learning_rate": 2.9351507376523414e-06, "loss": 0.7443, "step": 36295 }, { "epoch": 0.44239698731307814, "grad_norm": 2.3503317066872134, "learning_rate": 2.9348300192431052e-06, "loss": 0.8063, "step": 36300 }, { "epoch": 0.44245792353722596, "grad_norm": 2.9682522133528217, "learning_rate": 2.9345093008338682e-06, "loss": 0.7824, "step": 36305 }, { "epoch": 0.44251885976137373, "grad_norm": 2.5247107854156097, "learning_rate": 2.9341885824246313e-06, "loss": 0.777, "step": 36310 }, { "epoch": 0.44257979598552155, "grad_norm": 2.80189416533173, "learning_rate": 2.9338678640153943e-06, "loss": 0.8176, "step": 36315 }, { "epoch": 0.4426407322096694, "grad_norm": 2.274505428324922, "learning_rate": 2.933547145606158e-06, "loss": 0.784, "step": 36320 }, { "epoch": 0.44270166843381714, "grad_norm": 2.3938718264987098, "learning_rate": 2.933226427196921e-06, "loss": 0.7881, "step": 36325 }, { "epoch": 0.44276260465796496, "grad_norm": 2.678481024421016, "learning_rate": 2.9329057087876846e-06, "loss": 0.6993, "step": 36330 }, { "epoch": 0.4428235408821128, "grad_norm": 2.19670046434948, "learning_rate": 2.932584990378448e-06, "loss": 0.7607, "step": 36335 }, { "epoch": 0.4428844771062606, "grad_norm": 2.24443812612373, "learning_rate": 2.932264271969211e-06, "loss": 0.7075, "step": 36340 }, { "epoch": 0.4429454133304084, "grad_norm": 2.001529576464323, "learning_rate": 2.9319435535599745e-06, "loss": 0.7406, "step": 36345 }, { "epoch": 0.4430063495545562, "grad_norm": 2.04971028667802, "learning_rate": 2.931622835150738e-06, "loss": 0.7806, "step": 36350 }, { "epoch": 0.443067285778704, "grad_norm": 2.299732278209622, "learning_rate": 2.9313021167415014e-06, "loss": 0.8155, "step": 36355 }, { "epoch": 0.4431282220028518, "grad_norm": 3.005508169735594, "learning_rate": 2.9309813983322644e-06, "loss": 0.7213, "step": 36360 }, { "epoch": 0.4431891582269996, "grad_norm": 2.473861808336692, "learning_rate": 2.9306606799230283e-06, "loss": 0.8102, "step": 36365 }, { "epoch": 0.44325009445114744, "grad_norm": 2.3209273643492185, "learning_rate": 2.9303399615137913e-06, "loss": 0.7783, "step": 36370 }, { "epoch": 0.44331103067529526, "grad_norm": 2.3223915785698055, "learning_rate": 2.9300192431045543e-06, "loss": 0.7608, "step": 36375 }, { "epoch": 0.443371966899443, "grad_norm": 2.752577022559746, "learning_rate": 2.929698524695318e-06, "loss": 0.7645, "step": 36380 }, { "epoch": 0.44343290312359085, "grad_norm": 2.6551780518610557, "learning_rate": 2.929377806286081e-06, "loss": 0.6583, "step": 36385 }, { "epoch": 0.44349383934773867, "grad_norm": 2.3338699239578187, "learning_rate": 2.929057087876844e-06, "loss": 0.7274, "step": 36390 }, { "epoch": 0.44355477557188644, "grad_norm": 2.618903626804993, "learning_rate": 2.9287363694676076e-06, "loss": 0.7954, "step": 36395 }, { "epoch": 0.44361571179603426, "grad_norm": 2.289911096136062, "learning_rate": 2.928415651058371e-06, "loss": 0.7876, "step": 36400 }, { "epoch": 0.4436766480201821, "grad_norm": 2.14408112580613, "learning_rate": 2.928094932649134e-06, "loss": 0.792, "step": 36405 }, { "epoch": 0.4437375842443299, "grad_norm": 2.6943636396708586, "learning_rate": 2.9277742142398975e-06, "loss": 0.7915, "step": 36410 }, { "epoch": 0.4437985204684777, "grad_norm": 2.0061643008620926, "learning_rate": 2.927453495830661e-06, "loss": 0.8321, "step": 36415 }, { "epoch": 0.4438594566926255, "grad_norm": 3.002248077745817, "learning_rate": 2.9271327774214244e-06, "loss": 0.7765, "step": 36420 }, { "epoch": 0.4439203929167733, "grad_norm": 2.3285393479953447, "learning_rate": 2.9268120590121874e-06, "loss": 0.7619, "step": 36425 }, { "epoch": 0.4439813291409211, "grad_norm": 2.3109854492325934, "learning_rate": 2.926491340602951e-06, "loss": 0.8202, "step": 36430 }, { "epoch": 0.4440422653650689, "grad_norm": 2.22172192071381, "learning_rate": 2.9261706221937143e-06, "loss": 0.7899, "step": 36435 }, { "epoch": 0.44410320158921673, "grad_norm": 2.8159333643495854, "learning_rate": 2.9258499037844773e-06, "loss": 0.7895, "step": 36440 }, { "epoch": 0.44416413781336456, "grad_norm": 2.373046192262517, "learning_rate": 2.925529185375241e-06, "loss": 0.741, "step": 36445 }, { "epoch": 0.4442250740375123, "grad_norm": 2.995486383064472, "learning_rate": 2.925208466966004e-06, "loss": 0.8277, "step": 36450 }, { "epoch": 0.44428601026166015, "grad_norm": 2.5411882591824853, "learning_rate": 2.9248877485567672e-06, "loss": 0.7782, "step": 36455 }, { "epoch": 0.44434694648580797, "grad_norm": 2.7048654419905964, "learning_rate": 2.924567030147531e-06, "loss": 0.7241, "step": 36460 }, { "epoch": 0.44440788270995574, "grad_norm": 2.9805873127583395, "learning_rate": 2.924246311738294e-06, "loss": 0.7426, "step": 36465 }, { "epoch": 0.44446881893410356, "grad_norm": 2.717389648166091, "learning_rate": 2.923925593329057e-06, "loss": 0.8211, "step": 36470 }, { "epoch": 0.4445297551582514, "grad_norm": 3.055635650747935, "learning_rate": 2.9236048749198206e-06, "loss": 0.741, "step": 36475 }, { "epoch": 0.4445906913823992, "grad_norm": 2.1180760490602557, "learning_rate": 2.923284156510584e-06, "loss": 0.7852, "step": 36480 }, { "epoch": 0.444651627606547, "grad_norm": 2.318584281571115, "learning_rate": 2.922963438101347e-06, "loss": 0.7461, "step": 36485 }, { "epoch": 0.4447125638306948, "grad_norm": 3.126378202417201, "learning_rate": 2.9226427196921104e-06, "loss": 0.7819, "step": 36490 }, { "epoch": 0.4447735000548426, "grad_norm": 2.372198424186327, "learning_rate": 2.922322001282874e-06, "loss": 0.6972, "step": 36495 }, { "epoch": 0.4448344362789904, "grad_norm": 2.28374555616624, "learning_rate": 2.9220012828736373e-06, "loss": 0.8124, "step": 36500 }, { "epoch": 0.4448953725031382, "grad_norm": 2.755914901075118, "learning_rate": 2.9216805644644003e-06, "loss": 0.7906, "step": 36505 }, { "epoch": 0.44495630872728603, "grad_norm": 2.4305250567422703, "learning_rate": 2.9213598460551638e-06, "loss": 0.7058, "step": 36510 }, { "epoch": 0.44501724495143385, "grad_norm": 2.577310871000416, "learning_rate": 2.9210391276459272e-06, "loss": 0.7316, "step": 36515 }, { "epoch": 0.4450781811755816, "grad_norm": 2.2047639701113755, "learning_rate": 2.9207184092366902e-06, "loss": 0.7805, "step": 36520 }, { "epoch": 0.44513911739972944, "grad_norm": 2.645430908364216, "learning_rate": 2.920397690827454e-06, "loss": 0.8078, "step": 36525 }, { "epoch": 0.44520005362387727, "grad_norm": 2.8333953326451695, "learning_rate": 2.920076972418217e-06, "loss": 0.7361, "step": 36530 }, { "epoch": 0.44526098984802503, "grad_norm": 2.1821793440261916, "learning_rate": 2.91975625400898e-06, "loss": 0.736, "step": 36535 }, { "epoch": 0.44532192607217286, "grad_norm": 3.008709081658706, "learning_rate": 2.919435535599744e-06, "loss": 0.6896, "step": 36540 }, { "epoch": 0.4453828622963207, "grad_norm": 3.1178906975207186, "learning_rate": 2.919114817190507e-06, "loss": 0.7385, "step": 36545 }, { "epoch": 0.4454437985204685, "grad_norm": 2.4662283851498437, "learning_rate": 2.91879409878127e-06, "loss": 0.7718, "step": 36550 }, { "epoch": 0.44550473474461627, "grad_norm": 2.638231773957373, "learning_rate": 2.918473380372034e-06, "loss": 0.7763, "step": 36555 }, { "epoch": 0.4455656709687641, "grad_norm": 1.8745452761855266, "learning_rate": 2.918152661962797e-06, "loss": 0.8708, "step": 36560 }, { "epoch": 0.4456266071929119, "grad_norm": 2.2680820724525095, "learning_rate": 2.91783194355356e-06, "loss": 0.7713, "step": 36565 }, { "epoch": 0.4456875434170597, "grad_norm": 2.6072424802666276, "learning_rate": 2.9175112251443234e-06, "loss": 0.7161, "step": 36570 }, { "epoch": 0.4457484796412075, "grad_norm": 2.578727142876059, "learning_rate": 2.917190506735087e-06, "loss": 0.7374, "step": 36575 }, { "epoch": 0.44580941586535533, "grad_norm": 2.247424629292334, "learning_rate": 2.9168697883258503e-06, "loss": 0.7678, "step": 36580 }, { "epoch": 0.44587035208950315, "grad_norm": 2.2738011376315947, "learning_rate": 2.9165490699166133e-06, "loss": 0.6741, "step": 36585 }, { "epoch": 0.4459312883136509, "grad_norm": 2.359643908676565, "learning_rate": 2.916228351507377e-06, "loss": 0.8044, "step": 36590 }, { "epoch": 0.44599222453779874, "grad_norm": 2.678895981191218, "learning_rate": 2.91590763309814e-06, "loss": 0.76, "step": 36595 }, { "epoch": 0.44605316076194657, "grad_norm": 3.460752679500214, "learning_rate": 2.915586914688903e-06, "loss": 0.6974, "step": 36600 }, { "epoch": 0.44611409698609433, "grad_norm": 2.2865734576810617, "learning_rate": 2.915266196279667e-06, "loss": 0.7385, "step": 36605 }, { "epoch": 0.44617503321024216, "grad_norm": 2.5540758543343727, "learning_rate": 2.91494547787043e-06, "loss": 0.7873, "step": 36610 }, { "epoch": 0.44623596943439, "grad_norm": 2.6081053138786507, "learning_rate": 2.914624759461193e-06, "loss": 0.7823, "step": 36615 }, { "epoch": 0.4462969056585378, "grad_norm": 2.992175575384227, "learning_rate": 2.914304041051957e-06, "loss": 0.7039, "step": 36620 }, { "epoch": 0.44635784188268557, "grad_norm": 3.6620837118388563, "learning_rate": 2.91398332264272e-06, "loss": 0.7301, "step": 36625 }, { "epoch": 0.4464187781068334, "grad_norm": 2.6157776537214414, "learning_rate": 2.913662604233483e-06, "loss": 0.8081, "step": 36630 }, { "epoch": 0.4464797143309812, "grad_norm": 2.2562754700307592, "learning_rate": 2.913341885824247e-06, "loss": 0.7044, "step": 36635 }, { "epoch": 0.446540650555129, "grad_norm": 2.6831922940157624, "learning_rate": 2.91302116741501e-06, "loss": 0.7209, "step": 36640 }, { "epoch": 0.4466015867792768, "grad_norm": 2.8589089461968014, "learning_rate": 2.9127004490057733e-06, "loss": 0.7701, "step": 36645 }, { "epoch": 0.4466625230034246, "grad_norm": 3.0597500070175023, "learning_rate": 2.9123797305965363e-06, "loss": 0.7202, "step": 36650 }, { "epoch": 0.44672345922757245, "grad_norm": 2.557197075234678, "learning_rate": 2.9120590121872997e-06, "loss": 0.754, "step": 36655 }, { "epoch": 0.4467843954517202, "grad_norm": 2.5617827346286393, "learning_rate": 2.911738293778063e-06, "loss": 0.7905, "step": 36660 }, { "epoch": 0.44684533167586804, "grad_norm": 2.391130742455041, "learning_rate": 2.911417575368826e-06, "loss": 0.7789, "step": 36665 }, { "epoch": 0.44690626790001586, "grad_norm": 2.475548299811314, "learning_rate": 2.91109685695959e-06, "loss": 0.7721, "step": 36670 }, { "epoch": 0.44696720412416363, "grad_norm": 3.6950038892161454, "learning_rate": 2.910776138550353e-06, "loss": 0.7141, "step": 36675 }, { "epoch": 0.44702814034831145, "grad_norm": 3.6031987780864543, "learning_rate": 2.910455420141116e-06, "loss": 0.762, "step": 36680 }, { "epoch": 0.4470890765724593, "grad_norm": 2.5612374565434974, "learning_rate": 2.91013470173188e-06, "loss": 0.7006, "step": 36685 }, { "epoch": 0.44715001279660704, "grad_norm": 3.6152919692966807, "learning_rate": 2.909813983322643e-06, "loss": 0.7661, "step": 36690 }, { "epoch": 0.44721094902075487, "grad_norm": 2.6539599715967, "learning_rate": 2.909493264913406e-06, "loss": 0.7697, "step": 36695 }, { "epoch": 0.4472718852449027, "grad_norm": 2.4220763516979193, "learning_rate": 2.90917254650417e-06, "loss": 0.7657, "step": 36700 }, { "epoch": 0.4473328214690505, "grad_norm": 2.423161536027947, "learning_rate": 2.908851828094933e-06, "loss": 0.7343, "step": 36705 }, { "epoch": 0.4473937576931983, "grad_norm": 2.476355085408851, "learning_rate": 2.908531109685696e-06, "loss": 0.7206, "step": 36710 }, { "epoch": 0.4474546939173461, "grad_norm": 2.927318780451785, "learning_rate": 2.9082103912764598e-06, "loss": 0.8443, "step": 36715 }, { "epoch": 0.4475156301414939, "grad_norm": 2.31082198588543, "learning_rate": 2.9078896728672228e-06, "loss": 0.7979, "step": 36720 }, { "epoch": 0.4475765663656417, "grad_norm": 2.2236085886536343, "learning_rate": 2.9075689544579862e-06, "loss": 0.6842, "step": 36725 }, { "epoch": 0.4476375025897895, "grad_norm": 2.5344950420552577, "learning_rate": 2.9072482360487492e-06, "loss": 0.7759, "step": 36730 }, { "epoch": 0.44769843881393734, "grad_norm": 2.6669748228515173, "learning_rate": 2.9069275176395127e-06, "loss": 0.7558, "step": 36735 }, { "epoch": 0.44775937503808516, "grad_norm": 2.346956805315986, "learning_rate": 2.906606799230276e-06, "loss": 0.7568, "step": 36740 }, { "epoch": 0.44782031126223293, "grad_norm": 2.4350126269217713, "learning_rate": 2.906286080821039e-06, "loss": 0.7175, "step": 36745 }, { "epoch": 0.44788124748638075, "grad_norm": 2.3172139053537197, "learning_rate": 2.905965362411803e-06, "loss": 0.795, "step": 36750 }, { "epoch": 0.4479421837105286, "grad_norm": 3.349515522243215, "learning_rate": 2.905644644002566e-06, "loss": 0.7647, "step": 36755 }, { "epoch": 0.44800311993467634, "grad_norm": 2.512935350209152, "learning_rate": 2.905323925593329e-06, "loss": 0.7471, "step": 36760 }, { "epoch": 0.44806405615882416, "grad_norm": 2.839469849752672, "learning_rate": 2.905003207184093e-06, "loss": 0.7779, "step": 36765 }, { "epoch": 0.448124992382972, "grad_norm": 2.186226350872069, "learning_rate": 2.904682488774856e-06, "loss": 0.7688, "step": 36770 }, { "epoch": 0.4481859286071198, "grad_norm": 2.7220526873427233, "learning_rate": 2.904361770365619e-06, "loss": 0.7277, "step": 36775 }, { "epoch": 0.4482468648312676, "grad_norm": 3.547739995099448, "learning_rate": 2.9040410519563828e-06, "loss": 0.7195, "step": 36780 }, { "epoch": 0.4483078010554154, "grad_norm": 2.4262247228590064, "learning_rate": 2.903720333547146e-06, "loss": 0.7791, "step": 36785 }, { "epoch": 0.4483687372795632, "grad_norm": 2.6693432376847763, "learning_rate": 2.903399615137909e-06, "loss": 0.8641, "step": 36790 }, { "epoch": 0.448429673503711, "grad_norm": 2.6548431300511948, "learning_rate": 2.9030788967286727e-06, "loss": 0.8073, "step": 36795 }, { "epoch": 0.4484906097278588, "grad_norm": 3.062917339726572, "learning_rate": 2.9027581783194357e-06, "loss": 0.7735, "step": 36800 }, { "epoch": 0.44855154595200664, "grad_norm": 2.3804277942847114, "learning_rate": 2.902437459910199e-06, "loss": 0.8261, "step": 36805 }, { "epoch": 0.44861248217615446, "grad_norm": 2.768779450636396, "learning_rate": 2.902116741500962e-06, "loss": 0.8143, "step": 36810 }, { "epoch": 0.4486734184003022, "grad_norm": 2.276292199986457, "learning_rate": 2.901796023091726e-06, "loss": 0.7627, "step": 36815 }, { "epoch": 0.44873435462445005, "grad_norm": 3.4716478204849093, "learning_rate": 2.901475304682489e-06, "loss": 0.8149, "step": 36820 }, { "epoch": 0.44879529084859787, "grad_norm": 3.2170461380927837, "learning_rate": 2.901154586273252e-06, "loss": 0.8252, "step": 36825 }, { "epoch": 0.44885622707274564, "grad_norm": 2.3219234212823006, "learning_rate": 2.900833867864016e-06, "loss": 0.7842, "step": 36830 }, { "epoch": 0.44891716329689346, "grad_norm": 2.7311862855194255, "learning_rate": 2.900513149454779e-06, "loss": 0.7193, "step": 36835 }, { "epoch": 0.4489780995210413, "grad_norm": 2.3658162492466506, "learning_rate": 2.900192431045542e-06, "loss": 0.7931, "step": 36840 }, { "epoch": 0.4490390357451891, "grad_norm": 2.050307364817285, "learning_rate": 2.899871712636306e-06, "loss": 0.7839, "step": 36845 }, { "epoch": 0.4490999719693369, "grad_norm": 2.259900158943386, "learning_rate": 2.899550994227069e-06, "loss": 0.7794, "step": 36850 }, { "epoch": 0.4491609081934847, "grad_norm": 1.9343239619464319, "learning_rate": 2.899230275817832e-06, "loss": 0.7111, "step": 36855 }, { "epoch": 0.4492218444176325, "grad_norm": 2.6430318800940427, "learning_rate": 2.8989095574085957e-06, "loss": 0.8088, "step": 36860 }, { "epoch": 0.4492827806417803, "grad_norm": 2.406405491375032, "learning_rate": 2.8985888389993587e-06, "loss": 0.7745, "step": 36865 }, { "epoch": 0.4493437168659281, "grad_norm": 2.2147625708943384, "learning_rate": 2.898268120590122e-06, "loss": 0.785, "step": 36870 }, { "epoch": 0.44940465309007593, "grad_norm": 2.5191225155794768, "learning_rate": 2.8979474021808856e-06, "loss": 0.8419, "step": 36875 }, { "epoch": 0.44946558931422376, "grad_norm": 2.608255213392826, "learning_rate": 2.8976266837716486e-06, "loss": 0.7235, "step": 36880 }, { "epoch": 0.4495265255383715, "grad_norm": 2.211501802261103, "learning_rate": 2.897305965362412e-06, "loss": 0.7473, "step": 36885 }, { "epoch": 0.44958746176251935, "grad_norm": 2.272075949098385, "learning_rate": 2.8969852469531755e-06, "loss": 0.8027, "step": 36890 }, { "epoch": 0.44964839798666717, "grad_norm": 2.8823057897105167, "learning_rate": 2.896664528543939e-06, "loss": 0.7404, "step": 36895 }, { "epoch": 0.44970933421081494, "grad_norm": 2.6880062774735927, "learning_rate": 2.896343810134702e-06, "loss": 0.7379, "step": 36900 }, { "epoch": 0.44977027043496276, "grad_norm": 1.9159758766043615, "learning_rate": 2.896023091725465e-06, "loss": 0.7208, "step": 36905 }, { "epoch": 0.4498312066591106, "grad_norm": 2.2338122973451306, "learning_rate": 2.895702373316229e-06, "loss": 0.7563, "step": 36910 }, { "epoch": 0.4498921428832584, "grad_norm": 5.594875464015356, "learning_rate": 2.895381654906992e-06, "loss": 0.7351, "step": 36915 }, { "epoch": 0.4499530791074062, "grad_norm": 2.2539088293510257, "learning_rate": 2.895060936497755e-06, "loss": 0.7607, "step": 36920 }, { "epoch": 0.450014015331554, "grad_norm": 2.451010307404221, "learning_rate": 2.8947402180885187e-06, "loss": 0.7356, "step": 36925 }, { "epoch": 0.4500749515557018, "grad_norm": 2.4144119513834403, "learning_rate": 2.8944194996792818e-06, "loss": 0.7311, "step": 36930 }, { "epoch": 0.4501358877798496, "grad_norm": 2.5023371619761368, "learning_rate": 2.8940987812700448e-06, "loss": 0.7301, "step": 36935 }, { "epoch": 0.4501968240039974, "grad_norm": 2.4041775454350853, "learning_rate": 2.8937780628608086e-06, "loss": 0.7921, "step": 36940 }, { "epoch": 0.45025776022814523, "grad_norm": 2.193732063614063, "learning_rate": 2.8934573444515717e-06, "loss": 0.7468, "step": 36945 }, { "epoch": 0.45031869645229305, "grad_norm": 2.0068046422880363, "learning_rate": 2.893136626042335e-06, "loss": 0.7416, "step": 36950 }, { "epoch": 0.4503796326764408, "grad_norm": 2.3495065212438417, "learning_rate": 2.8928159076330985e-06, "loss": 0.7047, "step": 36955 }, { "epoch": 0.45044056890058864, "grad_norm": 3.4430082845106655, "learning_rate": 2.8924951892238616e-06, "loss": 0.747, "step": 36960 }, { "epoch": 0.45050150512473647, "grad_norm": 2.6061012265875094, "learning_rate": 2.892174470814625e-06, "loss": 0.8003, "step": 36965 }, { "epoch": 0.45056244134888424, "grad_norm": 2.4334090215711592, "learning_rate": 2.8918537524053884e-06, "loss": 0.7448, "step": 36970 }, { "epoch": 0.45062337757303206, "grad_norm": 2.8012399563169916, "learning_rate": 2.891533033996152e-06, "loss": 0.7393, "step": 36975 }, { "epoch": 0.4506843137971799, "grad_norm": 2.5728424880411005, "learning_rate": 2.891212315586915e-06, "loss": 0.8084, "step": 36980 }, { "epoch": 0.4507452500213277, "grad_norm": 2.308527316065531, "learning_rate": 2.890891597177678e-06, "loss": 0.7221, "step": 36985 }, { "epoch": 0.45080618624547547, "grad_norm": 2.1835212465704945, "learning_rate": 2.8905708787684418e-06, "loss": 0.7568, "step": 36990 }, { "epoch": 0.4508671224696233, "grad_norm": 2.2332752058399747, "learning_rate": 2.890250160359205e-06, "loss": 0.7265, "step": 36995 }, { "epoch": 0.4509280586937711, "grad_norm": 2.8048652704170367, "learning_rate": 2.889929441949968e-06, "loss": 0.7603, "step": 37000 }, { "epoch": 0.4509889949179189, "grad_norm": 2.1412263296664262, "learning_rate": 2.8896087235407317e-06, "loss": 0.7209, "step": 37005 }, { "epoch": 0.4510499311420667, "grad_norm": 2.6701370364448405, "learning_rate": 2.8892880051314947e-06, "loss": 0.7124, "step": 37010 }, { "epoch": 0.45111086736621453, "grad_norm": 2.399564688783664, "learning_rate": 2.8889672867222577e-06, "loss": 0.7, "step": 37015 }, { "epoch": 0.45117180359036235, "grad_norm": 2.2490709370700275, "learning_rate": 2.8886465683130216e-06, "loss": 0.7289, "step": 37020 }, { "epoch": 0.4512327398145101, "grad_norm": 2.1303987186229376, "learning_rate": 2.8883258499037846e-06, "loss": 0.7841, "step": 37025 }, { "epoch": 0.45129367603865794, "grad_norm": 2.2052533460370523, "learning_rate": 2.888005131494548e-06, "loss": 0.7742, "step": 37030 }, { "epoch": 0.45135461226280577, "grad_norm": 2.1713124002622464, "learning_rate": 2.8876844130853115e-06, "loss": 0.7343, "step": 37035 }, { "epoch": 0.45141554848695353, "grad_norm": 2.55915877303095, "learning_rate": 2.8873636946760745e-06, "loss": 0.81, "step": 37040 }, { "epoch": 0.45147648471110136, "grad_norm": 2.3010768220477287, "learning_rate": 2.887042976266838e-06, "loss": 0.7081, "step": 37045 }, { "epoch": 0.4515374209352492, "grad_norm": 2.9687833995065076, "learning_rate": 2.8867222578576014e-06, "loss": 0.7686, "step": 37050 }, { "epoch": 0.451598357159397, "grad_norm": 3.661024456240687, "learning_rate": 2.886401539448365e-06, "loss": 0.7139, "step": 37055 }, { "epoch": 0.45165929338354477, "grad_norm": 2.016422711459076, "learning_rate": 2.886080821039128e-06, "loss": 0.7504, "step": 37060 }, { "epoch": 0.4517202296076926, "grad_norm": 2.1334313766000457, "learning_rate": 2.885760102629891e-06, "loss": 0.7552, "step": 37065 }, { "epoch": 0.4517811658318404, "grad_norm": 3.021645587536662, "learning_rate": 2.8854393842206547e-06, "loss": 0.7136, "step": 37070 }, { "epoch": 0.4518421020559882, "grad_norm": 2.465547832564564, "learning_rate": 2.8851186658114177e-06, "loss": 0.7806, "step": 37075 }, { "epoch": 0.451903038280136, "grad_norm": 2.607096140564208, "learning_rate": 2.8847979474021807e-06, "loss": 0.7063, "step": 37080 }, { "epoch": 0.4519639745042838, "grad_norm": 2.4227234843950174, "learning_rate": 2.8844772289929446e-06, "loss": 0.7763, "step": 37085 }, { "epoch": 0.45202491072843165, "grad_norm": 2.7235862784026414, "learning_rate": 2.8841565105837076e-06, "loss": 0.7382, "step": 37090 }, { "epoch": 0.4520858469525794, "grad_norm": 2.4167194773764638, "learning_rate": 2.883835792174471e-06, "loss": 0.8146, "step": 37095 }, { "epoch": 0.45214678317672724, "grad_norm": 2.513231044043765, "learning_rate": 2.8835150737652345e-06, "loss": 0.7467, "step": 37100 }, { "epoch": 0.45220771940087506, "grad_norm": 2.7150278316447207, "learning_rate": 2.8831943553559975e-06, "loss": 0.7511, "step": 37105 }, { "epoch": 0.45226865562502283, "grad_norm": 3.1979255646045206, "learning_rate": 2.882873636946761e-06, "loss": 0.7145, "step": 37110 }, { "epoch": 0.45232959184917065, "grad_norm": 2.8433142220405077, "learning_rate": 2.8825529185375244e-06, "loss": 0.7806, "step": 37115 }, { "epoch": 0.4523905280733185, "grad_norm": 2.577154187653229, "learning_rate": 2.882232200128288e-06, "loss": 0.7621, "step": 37120 }, { "epoch": 0.4524514642974663, "grad_norm": 3.857698650915063, "learning_rate": 2.881911481719051e-06, "loss": 0.6722, "step": 37125 }, { "epoch": 0.45251240052161407, "grad_norm": 2.3468871575131978, "learning_rate": 2.8815907633098143e-06, "loss": 0.7002, "step": 37130 }, { "epoch": 0.4525733367457619, "grad_norm": 2.9947341765583975, "learning_rate": 2.8812700449005777e-06, "loss": 0.7558, "step": 37135 }, { "epoch": 0.4526342729699097, "grad_norm": 2.151610124350231, "learning_rate": 2.8809493264913407e-06, "loss": 0.7819, "step": 37140 }, { "epoch": 0.4526952091940575, "grad_norm": 2.1500795847359706, "learning_rate": 2.8806286080821038e-06, "loss": 0.7639, "step": 37145 }, { "epoch": 0.4527561454182053, "grad_norm": 2.5534105151972146, "learning_rate": 2.8803078896728676e-06, "loss": 0.7398, "step": 37150 }, { "epoch": 0.4528170816423531, "grad_norm": 2.219430558545709, "learning_rate": 2.8799871712636306e-06, "loss": 0.7542, "step": 37155 }, { "epoch": 0.4528780178665009, "grad_norm": 2.345421229806144, "learning_rate": 2.8796664528543937e-06, "loss": 0.816, "step": 37160 }, { "epoch": 0.4529389540906487, "grad_norm": 1.9178726490955238, "learning_rate": 2.8793457344451575e-06, "loss": 0.7521, "step": 37165 }, { "epoch": 0.45299989031479654, "grad_norm": 2.42820152239905, "learning_rate": 2.8790250160359205e-06, "loss": 0.7086, "step": 37170 }, { "epoch": 0.45306082653894436, "grad_norm": 2.6053368571921207, "learning_rate": 2.878704297626684e-06, "loss": 0.7774, "step": 37175 }, { "epoch": 0.45312176276309213, "grad_norm": 2.5921899793092877, "learning_rate": 2.8783835792174474e-06, "loss": 0.7615, "step": 37180 }, { "epoch": 0.45318269898723995, "grad_norm": 2.3178002277055856, "learning_rate": 2.8780628608082104e-06, "loss": 0.7585, "step": 37185 }, { "epoch": 0.4532436352113878, "grad_norm": 2.2460647472726687, "learning_rate": 2.877742142398974e-06, "loss": 0.7498, "step": 37190 }, { "epoch": 0.45330457143553554, "grad_norm": 2.2928315102124697, "learning_rate": 2.8774214239897373e-06, "loss": 0.8033, "step": 37195 }, { "epoch": 0.45336550765968336, "grad_norm": 2.436641744064792, "learning_rate": 2.8771007055805008e-06, "loss": 0.6859, "step": 37200 }, { "epoch": 0.4534264438838312, "grad_norm": 2.3200718200128323, "learning_rate": 2.8767799871712638e-06, "loss": 0.7474, "step": 37205 }, { "epoch": 0.453487380107979, "grad_norm": 2.6418632834034663, "learning_rate": 2.8764592687620272e-06, "loss": 0.7858, "step": 37210 }, { "epoch": 0.4535483163321268, "grad_norm": 2.7032277903267556, "learning_rate": 2.8761385503527907e-06, "loss": 0.8025, "step": 37215 }, { "epoch": 0.4536092525562746, "grad_norm": 2.7821246006206164, "learning_rate": 2.8758178319435537e-06, "loss": 0.7677, "step": 37220 }, { "epoch": 0.4536701887804224, "grad_norm": 2.348113478007381, "learning_rate": 2.8754971135343175e-06, "loss": 0.7526, "step": 37225 }, { "epoch": 0.4537311250045702, "grad_norm": 2.2677763595005884, "learning_rate": 2.8751763951250806e-06, "loss": 0.7683, "step": 37230 }, { "epoch": 0.453792061228718, "grad_norm": 2.369971090765524, "learning_rate": 2.8748556767158436e-06, "loss": 0.7535, "step": 37235 }, { "epoch": 0.45385299745286584, "grad_norm": 3.55089557616952, "learning_rate": 2.8745349583066066e-06, "loss": 0.7873, "step": 37240 }, { "epoch": 0.45391393367701366, "grad_norm": 2.3137134483238464, "learning_rate": 2.8742142398973705e-06, "loss": 0.678, "step": 37245 }, { "epoch": 0.4539748699011614, "grad_norm": 2.6634229028560417, "learning_rate": 2.8738935214881335e-06, "loss": 0.7397, "step": 37250 }, { "epoch": 0.45403580612530925, "grad_norm": 2.6216587547904937, "learning_rate": 2.873572803078897e-06, "loss": 0.687, "step": 37255 }, { "epoch": 0.45409674234945707, "grad_norm": 2.770087665515098, "learning_rate": 2.8732520846696603e-06, "loss": 0.6827, "step": 37260 }, { "epoch": 0.45415767857360484, "grad_norm": 2.48433286447543, "learning_rate": 2.8729313662604234e-06, "loss": 0.7343, "step": 37265 }, { "epoch": 0.45421861479775266, "grad_norm": 3.2345503673221736, "learning_rate": 2.872610647851187e-06, "loss": 0.7563, "step": 37270 }, { "epoch": 0.4542795510219005, "grad_norm": 2.4381841420278283, "learning_rate": 2.8722899294419502e-06, "loss": 0.7188, "step": 37275 }, { "epoch": 0.4543404872460483, "grad_norm": 2.3084128587313892, "learning_rate": 2.8719692110327137e-06, "loss": 0.7364, "step": 37280 }, { "epoch": 0.4544014234701961, "grad_norm": 2.296896311680153, "learning_rate": 2.8716484926234767e-06, "loss": 0.7758, "step": 37285 }, { "epoch": 0.4544623596943439, "grad_norm": 2.0311211632469233, "learning_rate": 2.8713277742142406e-06, "loss": 0.7253, "step": 37290 }, { "epoch": 0.4545232959184917, "grad_norm": 2.0523332826639478, "learning_rate": 2.8710070558050036e-06, "loss": 0.8124, "step": 37295 }, { "epoch": 0.4545842321426395, "grad_norm": 1.9379851423947336, "learning_rate": 2.8706863373957666e-06, "loss": 0.7058, "step": 37300 }, { "epoch": 0.4546451683667873, "grad_norm": 2.291646893263524, "learning_rate": 2.8703656189865305e-06, "loss": 0.714, "step": 37305 }, { "epoch": 0.45470610459093513, "grad_norm": 2.460278237687551, "learning_rate": 2.8700449005772935e-06, "loss": 0.7381, "step": 37310 }, { "epoch": 0.45476704081508296, "grad_norm": 2.794372409824397, "learning_rate": 2.8697241821680565e-06, "loss": 0.7438, "step": 37315 }, { "epoch": 0.4548279770392307, "grad_norm": 2.3576570863544335, "learning_rate": 2.86940346375882e-06, "loss": 0.7409, "step": 37320 }, { "epoch": 0.45488891326337855, "grad_norm": 2.823049962319129, "learning_rate": 2.8690827453495834e-06, "loss": 0.7698, "step": 37325 }, { "epoch": 0.45494984948752637, "grad_norm": 2.2819042218464487, "learning_rate": 2.8687620269403464e-06, "loss": 0.782, "step": 37330 }, { "epoch": 0.45501078571167414, "grad_norm": 2.7014156313445077, "learning_rate": 2.86844130853111e-06, "loss": 0.6837, "step": 37335 }, { "epoch": 0.45507172193582196, "grad_norm": 2.0560906507231573, "learning_rate": 2.8681205901218733e-06, "loss": 0.8192, "step": 37340 }, { "epoch": 0.4551326581599698, "grad_norm": 2.231647060362494, "learning_rate": 2.8677998717126367e-06, "loss": 0.7597, "step": 37345 }, { "epoch": 0.4551935943841176, "grad_norm": 2.636420347980179, "learning_rate": 2.8674791533033997e-06, "loss": 0.7256, "step": 37350 }, { "epoch": 0.4552545306082654, "grad_norm": 2.6936515201979967, "learning_rate": 2.867158434894163e-06, "loss": 0.7789, "step": 37355 }, { "epoch": 0.4553154668324132, "grad_norm": 2.3796874295308617, "learning_rate": 2.8668377164849266e-06, "loss": 0.6633, "step": 37360 }, { "epoch": 0.455376403056561, "grad_norm": 2.6152178972601625, "learning_rate": 2.8665169980756896e-06, "loss": 0.8432, "step": 37365 }, { "epoch": 0.4554373392807088, "grad_norm": 2.142332836962734, "learning_rate": 2.8661962796664535e-06, "loss": 0.8061, "step": 37370 }, { "epoch": 0.4554982755048566, "grad_norm": 1.9872149269903954, "learning_rate": 2.8658755612572165e-06, "loss": 0.6485, "step": 37375 }, { "epoch": 0.45555921172900443, "grad_norm": 2.378653441765186, "learning_rate": 2.8655548428479795e-06, "loss": 0.8269, "step": 37380 }, { "epoch": 0.45562014795315225, "grad_norm": 3.1122220248572785, "learning_rate": 2.8652341244387434e-06, "loss": 0.7627, "step": 37385 }, { "epoch": 0.4556810841773, "grad_norm": 2.158059004407394, "learning_rate": 2.8649134060295064e-06, "loss": 0.7655, "step": 37390 }, { "epoch": 0.45574202040144784, "grad_norm": 2.4772631011308692, "learning_rate": 2.8645926876202694e-06, "loss": 0.7319, "step": 37395 }, { "epoch": 0.45580295662559567, "grad_norm": 2.61294729683932, "learning_rate": 2.864271969211033e-06, "loss": 0.7071, "step": 37400 }, { "epoch": 0.45586389284974344, "grad_norm": 2.5520982739971614, "learning_rate": 2.8639512508017963e-06, "loss": 0.7027, "step": 37405 }, { "epoch": 0.45592482907389126, "grad_norm": 4.46094760506228, "learning_rate": 2.8636305323925593e-06, "loss": 0.7651, "step": 37410 }, { "epoch": 0.4559857652980391, "grad_norm": 1.9232613188437484, "learning_rate": 2.8633098139833228e-06, "loss": 0.7037, "step": 37415 }, { "epoch": 0.4560467015221869, "grad_norm": 2.302382252252, "learning_rate": 2.862989095574086e-06, "loss": 0.7481, "step": 37420 }, { "epoch": 0.45610763774633467, "grad_norm": 2.677184128070378, "learning_rate": 2.8626683771648496e-06, "loss": 0.7514, "step": 37425 }, { "epoch": 0.4561685739704825, "grad_norm": 2.260017946003509, "learning_rate": 2.8623476587556127e-06, "loss": 0.7918, "step": 37430 }, { "epoch": 0.4562295101946303, "grad_norm": 3.3340826628039033, "learning_rate": 2.862026940346376e-06, "loss": 0.7804, "step": 37435 }, { "epoch": 0.4562904464187781, "grad_norm": 2.3431816304423663, "learning_rate": 2.8617062219371395e-06, "loss": 0.7131, "step": 37440 }, { "epoch": 0.4563513826429259, "grad_norm": 2.3909696054048593, "learning_rate": 2.8613855035279026e-06, "loss": 0.6805, "step": 37445 }, { "epoch": 0.45641231886707373, "grad_norm": 2.4474281549294394, "learning_rate": 2.8610647851186664e-06, "loss": 0.7359, "step": 37450 }, { "epoch": 0.45647325509122155, "grad_norm": 2.7270804868758227, "learning_rate": 2.8607440667094294e-06, "loss": 0.7344, "step": 37455 }, { "epoch": 0.4565341913153693, "grad_norm": 2.342912745621228, "learning_rate": 2.8604233483001925e-06, "loss": 0.7182, "step": 37460 }, { "epoch": 0.45659512753951714, "grad_norm": 2.333573865259468, "learning_rate": 2.8601026298909563e-06, "loss": 0.7682, "step": 37465 }, { "epoch": 0.45665606376366497, "grad_norm": 2.1334105127837617, "learning_rate": 2.8597819114817193e-06, "loss": 0.7377, "step": 37470 }, { "epoch": 0.45671699998781273, "grad_norm": 2.558997555044822, "learning_rate": 2.8594611930724824e-06, "loss": 0.7302, "step": 37475 }, { "epoch": 0.45677793621196056, "grad_norm": 2.64962700691809, "learning_rate": 2.8591404746632462e-06, "loss": 0.7834, "step": 37480 }, { "epoch": 0.4568388724361084, "grad_norm": 2.509171943626946, "learning_rate": 2.8588197562540092e-06, "loss": 0.7847, "step": 37485 }, { "epoch": 0.4568998086602562, "grad_norm": 2.708823971135404, "learning_rate": 2.8584990378447723e-06, "loss": 0.8056, "step": 37490 }, { "epoch": 0.45696074488440397, "grad_norm": 2.1603863288613634, "learning_rate": 2.8581783194355357e-06, "loss": 0.7521, "step": 37495 }, { "epoch": 0.4570216811085518, "grad_norm": 2.7754801629593104, "learning_rate": 2.857857601026299e-06, "loss": 0.7196, "step": 37500 }, { "epoch": 0.4570826173326996, "grad_norm": 2.224772392611206, "learning_rate": 2.8575368826170626e-06, "loss": 0.7783, "step": 37505 }, { "epoch": 0.4571435535568474, "grad_norm": 2.9549054330603615, "learning_rate": 2.8572161642078256e-06, "loss": 0.7871, "step": 37510 }, { "epoch": 0.4572044897809952, "grad_norm": 2.5739031635187133, "learning_rate": 2.856895445798589e-06, "loss": 0.7895, "step": 37515 }, { "epoch": 0.457265426005143, "grad_norm": 2.8325188721459544, "learning_rate": 2.8565747273893525e-06, "loss": 0.7821, "step": 37520 }, { "epoch": 0.45732636222929085, "grad_norm": 2.300408797055328, "learning_rate": 2.8562540089801155e-06, "loss": 0.6599, "step": 37525 }, { "epoch": 0.4573872984534386, "grad_norm": 2.305691261841928, "learning_rate": 2.8559332905708793e-06, "loss": 0.8001, "step": 37530 }, { "epoch": 0.45744823467758644, "grad_norm": 2.82770971055451, "learning_rate": 2.8556125721616424e-06, "loss": 0.7425, "step": 37535 }, { "epoch": 0.45750917090173426, "grad_norm": 2.136138440697664, "learning_rate": 2.8552918537524054e-06, "loss": 0.7587, "step": 37540 }, { "epoch": 0.45757010712588203, "grad_norm": 2.2346881955185354, "learning_rate": 2.8549711353431692e-06, "loss": 0.8071, "step": 37545 }, { "epoch": 0.45763104335002985, "grad_norm": 1.9536194946091348, "learning_rate": 2.8546504169339323e-06, "loss": 0.6812, "step": 37550 }, { "epoch": 0.4576919795741777, "grad_norm": 2.289698584792783, "learning_rate": 2.8543296985246953e-06, "loss": 0.8071, "step": 37555 }, { "epoch": 0.4577529157983255, "grad_norm": 2.759800922157328, "learning_rate": 2.854008980115459e-06, "loss": 0.8018, "step": 37560 }, { "epoch": 0.45781385202247327, "grad_norm": 2.6297015868673213, "learning_rate": 2.853688261706222e-06, "loss": 0.7864, "step": 37565 }, { "epoch": 0.4578747882466211, "grad_norm": 2.518501104776446, "learning_rate": 2.8533675432969856e-06, "loss": 0.8301, "step": 37570 }, { "epoch": 0.4579357244707689, "grad_norm": 2.799734907284549, "learning_rate": 2.8530468248877486e-06, "loss": 0.8065, "step": 37575 }, { "epoch": 0.4579966606949167, "grad_norm": 2.534205789858644, "learning_rate": 2.852726106478512e-06, "loss": 0.7648, "step": 37580 }, { "epoch": 0.4580575969190645, "grad_norm": 2.453159890891248, "learning_rate": 2.8524053880692755e-06, "loss": 0.6957, "step": 37585 }, { "epoch": 0.4581185331432123, "grad_norm": 2.0573983922320562, "learning_rate": 2.8520846696600385e-06, "loss": 0.753, "step": 37590 }, { "epoch": 0.45817946936736015, "grad_norm": 2.110307040204326, "learning_rate": 2.8517639512508024e-06, "loss": 0.7685, "step": 37595 }, { "epoch": 0.4582404055915079, "grad_norm": 2.5926481832019213, "learning_rate": 2.8514432328415654e-06, "loss": 0.6962, "step": 37600 }, { "epoch": 0.45830134181565574, "grad_norm": 2.318772449646382, "learning_rate": 2.8511225144323284e-06, "loss": 0.7811, "step": 37605 }, { "epoch": 0.45836227803980356, "grad_norm": 2.7398287617106147, "learning_rate": 2.8508017960230923e-06, "loss": 0.7252, "step": 37610 }, { "epoch": 0.45842321426395133, "grad_norm": 2.518546275211442, "learning_rate": 2.8504810776138553e-06, "loss": 0.7595, "step": 37615 }, { "epoch": 0.45848415048809915, "grad_norm": 2.377101010674503, "learning_rate": 2.8501603592046183e-06, "loss": 0.7102, "step": 37620 }, { "epoch": 0.458545086712247, "grad_norm": 2.1069222904884004, "learning_rate": 2.849839640795382e-06, "loss": 0.7505, "step": 37625 }, { "epoch": 0.45860602293639474, "grad_norm": 2.180442440606738, "learning_rate": 2.849518922386145e-06, "loss": 0.7879, "step": 37630 }, { "epoch": 0.45866695916054256, "grad_norm": 2.7522617325743495, "learning_rate": 2.849198203976908e-06, "loss": 0.7526, "step": 37635 }, { "epoch": 0.4587278953846904, "grad_norm": 2.6723611329904275, "learning_rate": 2.848877485567672e-06, "loss": 0.7213, "step": 37640 }, { "epoch": 0.4587888316088382, "grad_norm": 2.160476636867619, "learning_rate": 2.848556767158435e-06, "loss": 0.7348, "step": 37645 }, { "epoch": 0.458849767832986, "grad_norm": 2.8634092925152266, "learning_rate": 2.8482360487491985e-06, "loss": 0.7741, "step": 37650 }, { "epoch": 0.4589107040571338, "grad_norm": 2.661498100146297, "learning_rate": 2.8479153303399615e-06, "loss": 0.7333, "step": 37655 }, { "epoch": 0.4589716402812816, "grad_norm": 2.75312304091459, "learning_rate": 2.847594611930725e-06, "loss": 0.7845, "step": 37660 }, { "epoch": 0.4590325765054294, "grad_norm": 2.239609257749552, "learning_rate": 2.8472738935214884e-06, "loss": 0.7761, "step": 37665 }, { "epoch": 0.4590935127295772, "grad_norm": 2.9715574191186587, "learning_rate": 2.8469531751122514e-06, "loss": 0.7242, "step": 37670 }, { "epoch": 0.45915444895372504, "grad_norm": 2.7857496219225255, "learning_rate": 2.8466324567030153e-06, "loss": 0.8138, "step": 37675 }, { "epoch": 0.45921538517787286, "grad_norm": 2.8893400261398994, "learning_rate": 2.8463117382937783e-06, "loss": 0.7813, "step": 37680 }, { "epoch": 0.4592763214020206, "grad_norm": 3.0487809784469357, "learning_rate": 2.8459910198845413e-06, "loss": 0.7734, "step": 37685 }, { "epoch": 0.45933725762616845, "grad_norm": 2.8362827442160645, "learning_rate": 2.845670301475305e-06, "loss": 0.8247, "step": 37690 }, { "epoch": 0.45939819385031627, "grad_norm": 2.5608583189745926, "learning_rate": 2.8453495830660682e-06, "loss": 0.7582, "step": 37695 }, { "epoch": 0.45945913007446404, "grad_norm": 2.5540425457019413, "learning_rate": 2.8450288646568312e-06, "loss": 0.8101, "step": 37700 }, { "epoch": 0.45952006629861186, "grad_norm": 2.347256993942313, "learning_rate": 2.844708146247595e-06, "loss": 0.8104, "step": 37705 }, { "epoch": 0.4595810025227597, "grad_norm": 2.319492183814193, "learning_rate": 2.844387427838358e-06, "loss": 0.751, "step": 37710 }, { "epoch": 0.4596419387469075, "grad_norm": 2.5003241293507372, "learning_rate": 2.844066709429121e-06, "loss": 0.7326, "step": 37715 }, { "epoch": 0.4597028749710553, "grad_norm": 2.7100835140003117, "learning_rate": 2.843745991019885e-06, "loss": 0.8244, "step": 37720 }, { "epoch": 0.4597638111952031, "grad_norm": 2.22628371300758, "learning_rate": 2.843425272610648e-06, "loss": 0.7664, "step": 37725 }, { "epoch": 0.4598247474193509, "grad_norm": 2.0751091182712296, "learning_rate": 2.8431045542014115e-06, "loss": 0.7235, "step": 37730 }, { "epoch": 0.4598856836434987, "grad_norm": 2.5716765124103267, "learning_rate": 2.8427838357921745e-06, "loss": 0.7912, "step": 37735 }, { "epoch": 0.4599466198676465, "grad_norm": 3.4410611615588595, "learning_rate": 2.842463117382938e-06, "loss": 0.7216, "step": 37740 }, { "epoch": 0.46000755609179433, "grad_norm": 2.4381472402999753, "learning_rate": 2.8421423989737014e-06, "loss": 0.8581, "step": 37745 }, { "epoch": 0.46006849231594216, "grad_norm": 2.3768090994371684, "learning_rate": 2.8418216805644644e-06, "loss": 0.789, "step": 37750 }, { "epoch": 0.4601294285400899, "grad_norm": 2.820455749093855, "learning_rate": 2.8415009621552282e-06, "loss": 0.7668, "step": 37755 }, { "epoch": 0.46019036476423775, "grad_norm": 2.877269687261617, "learning_rate": 2.8411802437459912e-06, "loss": 0.7601, "step": 37760 }, { "epoch": 0.46025130098838557, "grad_norm": 2.774660420091043, "learning_rate": 2.8408595253367543e-06, "loss": 0.7005, "step": 37765 }, { "epoch": 0.46031223721253334, "grad_norm": 2.272862651152554, "learning_rate": 2.840538806927518e-06, "loss": 0.749, "step": 37770 }, { "epoch": 0.46037317343668116, "grad_norm": 2.3723989727680657, "learning_rate": 2.840218088518281e-06, "loss": 0.8442, "step": 37775 }, { "epoch": 0.460434109660829, "grad_norm": 3.402014678861035, "learning_rate": 2.839897370109044e-06, "loss": 0.7108, "step": 37780 }, { "epoch": 0.4604950458849768, "grad_norm": 4.186099648690467, "learning_rate": 2.839576651699808e-06, "loss": 0.8755, "step": 37785 }, { "epoch": 0.4605559821091246, "grad_norm": 2.2029808308657124, "learning_rate": 2.839255933290571e-06, "loss": 0.7249, "step": 37790 }, { "epoch": 0.4606169183332724, "grad_norm": 2.194118914535496, "learning_rate": 2.8389352148813345e-06, "loss": 0.8169, "step": 37795 }, { "epoch": 0.4606778545574202, "grad_norm": 2.520013906651855, "learning_rate": 2.838614496472098e-06, "loss": 0.763, "step": 37800 }, { "epoch": 0.460738790781568, "grad_norm": 2.6399517483136368, "learning_rate": 2.838293778062861e-06, "loss": 0.7453, "step": 37805 }, { "epoch": 0.4607997270057158, "grad_norm": 2.1733782561664317, "learning_rate": 2.8379730596536244e-06, "loss": 0.7903, "step": 37810 }, { "epoch": 0.46086066322986363, "grad_norm": 2.5714489367248774, "learning_rate": 2.837652341244388e-06, "loss": 0.7038, "step": 37815 }, { "epoch": 0.46092159945401145, "grad_norm": 2.2527706080025416, "learning_rate": 2.8373316228351513e-06, "loss": 0.7048, "step": 37820 }, { "epoch": 0.4609825356781592, "grad_norm": 2.5520185552621224, "learning_rate": 2.8370109044259143e-06, "loss": 0.781, "step": 37825 }, { "epoch": 0.46104347190230704, "grad_norm": 2.6510254455217495, "learning_rate": 2.8366901860166773e-06, "loss": 0.8343, "step": 37830 }, { "epoch": 0.46110440812645487, "grad_norm": 2.305961652520456, "learning_rate": 2.836369467607441e-06, "loss": 0.8412, "step": 37835 }, { "epoch": 0.46116534435060264, "grad_norm": 3.15644068458965, "learning_rate": 2.836048749198204e-06, "loss": 0.7069, "step": 37840 }, { "epoch": 0.46122628057475046, "grad_norm": 2.0973487610681945, "learning_rate": 2.835728030788967e-06, "loss": 0.7643, "step": 37845 }, { "epoch": 0.4612872167988983, "grad_norm": 2.2963162959623027, "learning_rate": 2.835407312379731e-06, "loss": 0.7297, "step": 37850 }, { "epoch": 0.4613481530230461, "grad_norm": 2.542405011690101, "learning_rate": 2.835086593970494e-06, "loss": 0.7988, "step": 37855 }, { "epoch": 0.46140908924719387, "grad_norm": 2.593916073930216, "learning_rate": 2.834765875561257e-06, "loss": 0.7668, "step": 37860 }, { "epoch": 0.4614700254713417, "grad_norm": 2.678304028094239, "learning_rate": 2.834445157152021e-06, "loss": 0.7754, "step": 37865 }, { "epoch": 0.4615309616954895, "grad_norm": 2.4549678975004805, "learning_rate": 2.834124438742784e-06, "loss": 0.7638, "step": 37870 }, { "epoch": 0.4615918979196373, "grad_norm": 2.0549927063727633, "learning_rate": 2.8338037203335474e-06, "loss": 0.722, "step": 37875 }, { "epoch": 0.4616528341437851, "grad_norm": 2.691263744977953, "learning_rate": 2.833483001924311e-06, "loss": 0.7383, "step": 37880 }, { "epoch": 0.46171377036793293, "grad_norm": 2.1590229699833694, "learning_rate": 2.833162283515074e-06, "loss": 0.7216, "step": 37885 }, { "epoch": 0.46177470659208075, "grad_norm": 3.76842262473321, "learning_rate": 2.8328415651058373e-06, "loss": 0.7783, "step": 37890 }, { "epoch": 0.4618356428162285, "grad_norm": 2.5580329121491148, "learning_rate": 2.8325208466966007e-06, "loss": 0.7381, "step": 37895 }, { "epoch": 0.46189657904037634, "grad_norm": 2.568020323249518, "learning_rate": 2.832200128287364e-06, "loss": 0.7507, "step": 37900 }, { "epoch": 0.46195751526452417, "grad_norm": 3.072545863743206, "learning_rate": 2.831879409878127e-06, "loss": 0.775, "step": 37905 }, { "epoch": 0.46201845148867193, "grad_norm": 2.187433880286674, "learning_rate": 2.8315586914688902e-06, "loss": 0.7748, "step": 37910 }, { "epoch": 0.46207938771281976, "grad_norm": 2.152139148929882, "learning_rate": 2.831237973059654e-06, "loss": 0.7233, "step": 37915 }, { "epoch": 0.4621403239369676, "grad_norm": 2.989500846017813, "learning_rate": 2.830917254650417e-06, "loss": 0.7539, "step": 37920 }, { "epoch": 0.4622012601611154, "grad_norm": 2.250292720658191, "learning_rate": 2.83059653624118e-06, "loss": 0.8014, "step": 37925 }, { "epoch": 0.46226219638526317, "grad_norm": 2.116714460846522, "learning_rate": 2.830275817831944e-06, "loss": 0.699, "step": 37930 }, { "epoch": 0.462323132609411, "grad_norm": 2.427446638883272, "learning_rate": 2.829955099422707e-06, "loss": 0.7417, "step": 37935 }, { "epoch": 0.4623840688335588, "grad_norm": 3.284311577569816, "learning_rate": 2.82963438101347e-06, "loss": 0.7364, "step": 37940 }, { "epoch": 0.4624450050577066, "grad_norm": 2.591576251858357, "learning_rate": 2.829313662604234e-06, "loss": 0.7406, "step": 37945 }, { "epoch": 0.4625059412818544, "grad_norm": 3.343084574518571, "learning_rate": 2.828992944194997e-06, "loss": 0.9314, "step": 37950 }, { "epoch": 0.4625668775060022, "grad_norm": 4.413054468299715, "learning_rate": 2.8286722257857603e-06, "loss": 0.7339, "step": 37955 }, { "epoch": 0.46262781373015005, "grad_norm": 2.525994340202586, "learning_rate": 2.8283515073765238e-06, "loss": 0.7909, "step": 37960 }, { "epoch": 0.4626887499542978, "grad_norm": 2.5563198263863334, "learning_rate": 2.828030788967287e-06, "loss": 0.729, "step": 37965 }, { "epoch": 0.46274968617844564, "grad_norm": 2.586241059097782, "learning_rate": 2.8277100705580502e-06, "loss": 0.7164, "step": 37970 }, { "epoch": 0.46281062240259346, "grad_norm": 2.3000547118596355, "learning_rate": 2.8273893521488137e-06, "loss": 0.7506, "step": 37975 }, { "epoch": 0.46287155862674123, "grad_norm": 2.5569569836479302, "learning_rate": 2.827068633739577e-06, "loss": 0.8284, "step": 37980 }, { "epoch": 0.46293249485088905, "grad_norm": 2.659219552773406, "learning_rate": 2.82674791533034e-06, "loss": 0.7539, "step": 37985 }, { "epoch": 0.4629934310750369, "grad_norm": 2.5939620630588047, "learning_rate": 2.826427196921103e-06, "loss": 0.7106, "step": 37990 }, { "epoch": 0.4630543672991847, "grad_norm": 2.373605141077987, "learning_rate": 2.826106478511867e-06, "loss": 0.7381, "step": 37995 }, { "epoch": 0.46311530352333247, "grad_norm": 2.574377396680872, "learning_rate": 2.82578576010263e-06, "loss": 0.7426, "step": 38000 }, { "epoch": 0.4631762397474803, "grad_norm": 3.458095515821668, "learning_rate": 2.825465041693393e-06, "loss": 0.7574, "step": 38005 }, { "epoch": 0.4632371759716281, "grad_norm": 2.394035471050135, "learning_rate": 2.825144323284157e-06, "loss": 0.8127, "step": 38010 }, { "epoch": 0.4632981121957759, "grad_norm": 2.8689871026843, "learning_rate": 2.82482360487492e-06, "loss": 0.7967, "step": 38015 }, { "epoch": 0.4633590484199237, "grad_norm": 2.4392002706265723, "learning_rate": 2.8245028864656834e-06, "loss": 0.7694, "step": 38020 }, { "epoch": 0.4634199846440715, "grad_norm": 3.01516670139713, "learning_rate": 2.824182168056447e-06, "loss": 0.7489, "step": 38025 }, { "epoch": 0.46348092086821935, "grad_norm": 2.628173382958934, "learning_rate": 2.82386144964721e-06, "loss": 0.74, "step": 38030 }, { "epoch": 0.4635418570923671, "grad_norm": 2.583696137051003, "learning_rate": 2.8235407312379733e-06, "loss": 0.8386, "step": 38035 }, { "epoch": 0.46360279331651494, "grad_norm": 2.337837615938099, "learning_rate": 2.8232200128287367e-06, "loss": 0.7159, "step": 38040 }, { "epoch": 0.46366372954066276, "grad_norm": 3.9674038269929084, "learning_rate": 2.8228992944195e-06, "loss": 0.7163, "step": 38045 }, { "epoch": 0.46372466576481053, "grad_norm": 2.373082192603001, "learning_rate": 2.822578576010263e-06, "loss": 0.8127, "step": 38050 }, { "epoch": 0.46378560198895835, "grad_norm": 2.262600079577489, "learning_rate": 2.8222578576010266e-06, "loss": 0.7443, "step": 38055 }, { "epoch": 0.4638465382131062, "grad_norm": 2.648474150752311, "learning_rate": 2.82193713919179e-06, "loss": 0.7454, "step": 38060 }, { "epoch": 0.463907474437254, "grad_norm": 2.399528977286124, "learning_rate": 2.821616420782553e-06, "loss": 0.7589, "step": 38065 }, { "epoch": 0.46396841066140176, "grad_norm": 2.2368896011186417, "learning_rate": 2.821295702373317e-06, "loss": 0.8281, "step": 38070 }, { "epoch": 0.4640293468855496, "grad_norm": 2.4043609005827915, "learning_rate": 2.82097498396408e-06, "loss": 0.7425, "step": 38075 }, { "epoch": 0.4640902831096974, "grad_norm": 2.1177846565193255, "learning_rate": 2.820654265554843e-06, "loss": 0.7382, "step": 38080 }, { "epoch": 0.4641512193338452, "grad_norm": 2.475649598807142, "learning_rate": 2.820333547145606e-06, "loss": 0.7459, "step": 38085 }, { "epoch": 0.464212155557993, "grad_norm": 2.1663221744493204, "learning_rate": 2.82001282873637e-06, "loss": 0.7739, "step": 38090 }, { "epoch": 0.4642730917821408, "grad_norm": 2.4903280643205647, "learning_rate": 2.819692110327133e-06, "loss": 0.7357, "step": 38095 }, { "epoch": 0.46433402800628865, "grad_norm": 2.5807210368698836, "learning_rate": 2.8193713919178963e-06, "loss": 0.7623, "step": 38100 }, { "epoch": 0.4643949642304364, "grad_norm": 2.4898046620488756, "learning_rate": 2.8190506735086597e-06, "loss": 0.7542, "step": 38105 }, { "epoch": 0.46445590045458424, "grad_norm": 2.593714975051136, "learning_rate": 2.8187299550994228e-06, "loss": 0.7268, "step": 38110 }, { "epoch": 0.46451683667873206, "grad_norm": 2.1686452807444514, "learning_rate": 2.818409236690186e-06, "loss": 0.7457, "step": 38115 }, { "epoch": 0.4645777729028798, "grad_norm": 2.3139889953761945, "learning_rate": 2.8180885182809496e-06, "loss": 0.7562, "step": 38120 }, { "epoch": 0.46463870912702765, "grad_norm": 2.7421411905288666, "learning_rate": 2.817767799871713e-06, "loss": 0.7942, "step": 38125 }, { "epoch": 0.46469964535117547, "grad_norm": 2.2741433342574204, "learning_rate": 2.817447081462476e-06, "loss": 0.7146, "step": 38130 }, { "epoch": 0.46476058157532324, "grad_norm": 2.4755370441870737, "learning_rate": 2.8171263630532395e-06, "loss": 0.7331, "step": 38135 }, { "epoch": 0.46482151779947106, "grad_norm": 2.5431325120692114, "learning_rate": 2.816805644644003e-06, "loss": 0.8226, "step": 38140 }, { "epoch": 0.4648824540236189, "grad_norm": 2.213017755616954, "learning_rate": 2.816484926234766e-06, "loss": 0.7152, "step": 38145 }, { "epoch": 0.4649433902477667, "grad_norm": 2.1489587837509925, "learning_rate": 2.81616420782553e-06, "loss": 0.7421, "step": 38150 }, { "epoch": 0.4650043264719145, "grad_norm": 2.292882474887667, "learning_rate": 2.815843489416293e-06, "loss": 0.768, "step": 38155 }, { "epoch": 0.4650652626960623, "grad_norm": 2.4192280302395446, "learning_rate": 2.815522771007056e-06, "loss": 0.7804, "step": 38160 }, { "epoch": 0.4651261989202101, "grad_norm": 2.233554522077479, "learning_rate": 2.815202052597819e-06, "loss": 0.7713, "step": 38165 }, { "epoch": 0.4651871351443579, "grad_norm": 2.2971449518959153, "learning_rate": 2.8148813341885828e-06, "loss": 0.7132, "step": 38170 }, { "epoch": 0.4652480713685057, "grad_norm": 2.537271783055072, "learning_rate": 2.8145606157793458e-06, "loss": 0.7713, "step": 38175 }, { "epoch": 0.46530900759265353, "grad_norm": 2.1174745569657807, "learning_rate": 2.8142398973701092e-06, "loss": 0.7132, "step": 38180 }, { "epoch": 0.46536994381680136, "grad_norm": 3.004211428254558, "learning_rate": 2.8139191789608727e-06, "loss": 0.8051, "step": 38185 }, { "epoch": 0.4654308800409491, "grad_norm": 2.11263477569809, "learning_rate": 2.8135984605516357e-06, "loss": 0.8284, "step": 38190 }, { "epoch": 0.46549181626509695, "grad_norm": 2.4509046622705917, "learning_rate": 2.813277742142399e-06, "loss": 0.7439, "step": 38195 }, { "epoch": 0.46555275248924477, "grad_norm": 2.441617786370891, "learning_rate": 2.8129570237331626e-06, "loss": 0.7821, "step": 38200 }, { "epoch": 0.46561368871339254, "grad_norm": 2.4823686849786255, "learning_rate": 2.812636305323926e-06, "loss": 0.7348, "step": 38205 }, { "epoch": 0.46567462493754036, "grad_norm": 2.37131493414316, "learning_rate": 2.812315586914689e-06, "loss": 0.7222, "step": 38210 }, { "epoch": 0.4657355611616882, "grad_norm": 2.6109103777583775, "learning_rate": 2.8119948685054525e-06, "loss": 0.7264, "step": 38215 }, { "epoch": 0.465796497385836, "grad_norm": 3.1620143349306824, "learning_rate": 2.811674150096216e-06, "loss": 0.7452, "step": 38220 }, { "epoch": 0.4658574336099838, "grad_norm": 3.06685416793892, "learning_rate": 2.811353431686979e-06, "loss": 0.7701, "step": 38225 }, { "epoch": 0.4659183698341316, "grad_norm": 2.3686642729362037, "learning_rate": 2.8110327132777428e-06, "loss": 0.7884, "step": 38230 }, { "epoch": 0.4659793060582794, "grad_norm": 2.446553919330831, "learning_rate": 2.810711994868506e-06, "loss": 0.7128, "step": 38235 }, { "epoch": 0.4660402422824272, "grad_norm": 2.5215013086150146, "learning_rate": 2.810391276459269e-06, "loss": 0.7707, "step": 38240 }, { "epoch": 0.466101178506575, "grad_norm": 2.6797583363567528, "learning_rate": 2.810070558050032e-06, "loss": 0.7783, "step": 38245 }, { "epoch": 0.46616211473072283, "grad_norm": 2.964779363775791, "learning_rate": 2.8097498396407957e-06, "loss": 0.7258, "step": 38250 }, { "epoch": 0.46622305095487065, "grad_norm": 2.588327520207514, "learning_rate": 2.8094291212315587e-06, "loss": 0.7925, "step": 38255 }, { "epoch": 0.4662839871790184, "grad_norm": 2.3152142918546303, "learning_rate": 2.809108402822322e-06, "loss": 0.7213, "step": 38260 }, { "epoch": 0.46634492340316624, "grad_norm": 3.959214729890094, "learning_rate": 2.8087876844130856e-06, "loss": 0.7494, "step": 38265 }, { "epoch": 0.46640585962731407, "grad_norm": 4.814066469847188, "learning_rate": 2.808466966003849e-06, "loss": 0.757, "step": 38270 }, { "epoch": 0.46646679585146184, "grad_norm": 2.534274630263808, "learning_rate": 2.808146247594612e-06, "loss": 0.7521, "step": 38275 }, { "epoch": 0.46652773207560966, "grad_norm": 2.888689602982718, "learning_rate": 2.8078255291853755e-06, "loss": 0.8273, "step": 38280 }, { "epoch": 0.4665886682997575, "grad_norm": 2.695805891706595, "learning_rate": 2.807504810776139e-06, "loss": 0.7821, "step": 38285 }, { "epoch": 0.4666496045239053, "grad_norm": 2.642734978253491, "learning_rate": 2.807184092366902e-06, "loss": 0.6886, "step": 38290 }, { "epoch": 0.46671054074805307, "grad_norm": 2.788531173990767, "learning_rate": 2.806863373957666e-06, "loss": 0.7676, "step": 38295 }, { "epoch": 0.4667714769722009, "grad_norm": 2.63736555946067, "learning_rate": 2.806542655548429e-06, "loss": 0.7989, "step": 38300 }, { "epoch": 0.4668324131963487, "grad_norm": 2.69656616740097, "learning_rate": 2.806221937139192e-06, "loss": 0.7454, "step": 38305 }, { "epoch": 0.4668933494204965, "grad_norm": 2.3050758584246354, "learning_rate": 2.8059012187299557e-06, "loss": 0.7585, "step": 38310 }, { "epoch": 0.4669542856446443, "grad_norm": 2.330061514810628, "learning_rate": 2.8055805003207187e-06, "loss": 0.7048, "step": 38315 }, { "epoch": 0.46701522186879213, "grad_norm": 2.787409943149311, "learning_rate": 2.8052597819114817e-06, "loss": 0.789, "step": 38320 }, { "epoch": 0.46707615809293995, "grad_norm": 2.2934652076064563, "learning_rate": 2.804939063502245e-06, "loss": 0.765, "step": 38325 }, { "epoch": 0.4671370943170877, "grad_norm": 2.7478151559739223, "learning_rate": 2.8046183450930086e-06, "loss": 0.7588, "step": 38330 }, { "epoch": 0.46719803054123554, "grad_norm": 2.553963329113042, "learning_rate": 2.8042976266837716e-06, "loss": 0.8233, "step": 38335 }, { "epoch": 0.46725896676538337, "grad_norm": 2.5159152931137276, "learning_rate": 2.803976908274535e-06, "loss": 0.7533, "step": 38340 }, { "epoch": 0.46731990298953113, "grad_norm": 3.013606718820514, "learning_rate": 2.8036561898652985e-06, "loss": 0.8401, "step": 38345 }, { "epoch": 0.46738083921367896, "grad_norm": 2.3504506963880534, "learning_rate": 2.803335471456062e-06, "loss": 0.7255, "step": 38350 }, { "epoch": 0.4674417754378268, "grad_norm": 2.6508402713401744, "learning_rate": 2.803014753046825e-06, "loss": 0.7739, "step": 38355 }, { "epoch": 0.4675027116619746, "grad_norm": 3.8364671316193486, "learning_rate": 2.8026940346375884e-06, "loss": 0.7133, "step": 38360 }, { "epoch": 0.46756364788612237, "grad_norm": 2.2098833673561167, "learning_rate": 2.802373316228352e-06, "loss": 0.7183, "step": 38365 }, { "epoch": 0.4676245841102702, "grad_norm": 2.6531736335656264, "learning_rate": 2.802052597819115e-06, "loss": 0.7684, "step": 38370 }, { "epoch": 0.467685520334418, "grad_norm": 2.422618935057297, "learning_rate": 2.8017318794098787e-06, "loss": 0.6829, "step": 38375 }, { "epoch": 0.4677464565585658, "grad_norm": 2.1762757136754103, "learning_rate": 2.8014111610006418e-06, "loss": 0.8013, "step": 38380 }, { "epoch": 0.4678073927827136, "grad_norm": 2.495032202648831, "learning_rate": 2.8010904425914048e-06, "loss": 0.8178, "step": 38385 }, { "epoch": 0.4678683290068614, "grad_norm": 2.150900516630118, "learning_rate": 2.8007697241821686e-06, "loss": 0.7622, "step": 38390 }, { "epoch": 0.46792926523100925, "grad_norm": 2.3767566248474443, "learning_rate": 2.8004490057729316e-06, "loss": 0.7271, "step": 38395 }, { "epoch": 0.467990201455157, "grad_norm": 1.9393502429721383, "learning_rate": 2.8001282873636947e-06, "loss": 0.7741, "step": 38400 }, { "epoch": 0.46805113767930484, "grad_norm": 2.697557786021292, "learning_rate": 2.7998075689544585e-06, "loss": 0.7991, "step": 38405 }, { "epoch": 0.46811207390345266, "grad_norm": 2.515268814021847, "learning_rate": 2.7994868505452215e-06, "loss": 0.757, "step": 38410 }, { "epoch": 0.46817301012760043, "grad_norm": 2.6059772953753497, "learning_rate": 2.7991661321359846e-06, "loss": 0.7068, "step": 38415 }, { "epoch": 0.46823394635174825, "grad_norm": 3.020687872362842, "learning_rate": 2.798845413726748e-06, "loss": 0.7187, "step": 38420 }, { "epoch": 0.4682948825758961, "grad_norm": 2.3420836279200192, "learning_rate": 2.7985246953175114e-06, "loss": 0.8029, "step": 38425 }, { "epoch": 0.4683558188000439, "grad_norm": 2.505193516239937, "learning_rate": 2.798203976908275e-06, "loss": 0.7425, "step": 38430 }, { "epoch": 0.46841675502419167, "grad_norm": 3.3388972856910204, "learning_rate": 2.797883258499038e-06, "loss": 0.7063, "step": 38435 }, { "epoch": 0.4684776912483395, "grad_norm": 2.599593265958525, "learning_rate": 2.7975625400898013e-06, "loss": 0.7586, "step": 38440 }, { "epoch": 0.4685386274724873, "grad_norm": 2.4525276571449632, "learning_rate": 2.7972418216805648e-06, "loss": 0.7835, "step": 38445 }, { "epoch": 0.4685995636966351, "grad_norm": 2.1948860411977504, "learning_rate": 2.796921103271328e-06, "loss": 0.7284, "step": 38450 }, { "epoch": 0.4686604999207829, "grad_norm": 2.522638470006452, "learning_rate": 2.7966003848620917e-06, "loss": 0.7579, "step": 38455 }, { "epoch": 0.4687214361449307, "grad_norm": 2.349363142108316, "learning_rate": 2.7962796664528547e-06, "loss": 0.748, "step": 38460 }, { "epoch": 0.46878237236907855, "grad_norm": 2.357139612008364, "learning_rate": 2.7959589480436177e-06, "loss": 0.7193, "step": 38465 }, { "epoch": 0.4688433085932263, "grad_norm": 2.6188991537203443, "learning_rate": 2.7956382296343816e-06, "loss": 0.7782, "step": 38470 }, { "epoch": 0.46890424481737414, "grad_norm": 2.3879051608051443, "learning_rate": 2.7953175112251446e-06, "loss": 0.7661, "step": 38475 }, { "epoch": 0.46896518104152196, "grad_norm": 2.7259761640730877, "learning_rate": 2.7949967928159076e-06, "loss": 0.7107, "step": 38480 }, { "epoch": 0.46902611726566973, "grad_norm": 2.3659527911654004, "learning_rate": 2.7946760744066715e-06, "loss": 0.7685, "step": 38485 }, { "epoch": 0.46908705348981755, "grad_norm": 2.774603307622771, "learning_rate": 2.7943553559974345e-06, "loss": 0.8476, "step": 38490 }, { "epoch": 0.4691479897139654, "grad_norm": 2.296655705566805, "learning_rate": 2.794034637588198e-06, "loss": 0.8364, "step": 38495 }, { "epoch": 0.4692089259381132, "grad_norm": 2.2117151707072242, "learning_rate": 2.793713919178961e-06, "loss": 0.7093, "step": 38500 }, { "epoch": 0.46926986216226096, "grad_norm": 2.490469985374926, "learning_rate": 2.7933932007697244e-06, "loss": 0.7564, "step": 38505 }, { "epoch": 0.4693307983864088, "grad_norm": 2.823687315459788, "learning_rate": 2.793072482360488e-06, "loss": 0.7757, "step": 38510 }, { "epoch": 0.4693917346105566, "grad_norm": 2.240444885564578, "learning_rate": 2.792751763951251e-06, "loss": 0.7544, "step": 38515 }, { "epoch": 0.4694526708347044, "grad_norm": 2.573178132583615, "learning_rate": 2.7924310455420147e-06, "loss": 0.7619, "step": 38520 }, { "epoch": 0.4695136070588522, "grad_norm": 2.198992124346718, "learning_rate": 2.7921103271327777e-06, "loss": 0.7944, "step": 38525 }, { "epoch": 0.469574543283, "grad_norm": 2.6401792277991616, "learning_rate": 2.7917896087235407e-06, "loss": 0.7504, "step": 38530 }, { "epoch": 0.46963547950714785, "grad_norm": 2.3540110595133514, "learning_rate": 2.7914688903143046e-06, "loss": 0.7964, "step": 38535 }, { "epoch": 0.4696964157312956, "grad_norm": 2.523842911710289, "learning_rate": 2.7911481719050676e-06, "loss": 0.7698, "step": 38540 }, { "epoch": 0.46975735195544344, "grad_norm": 2.5310199139342218, "learning_rate": 2.7908274534958306e-06, "loss": 0.8446, "step": 38545 }, { "epoch": 0.46981828817959126, "grad_norm": 2.0589632253836747, "learning_rate": 2.7905067350865945e-06, "loss": 0.7633, "step": 38550 }, { "epoch": 0.469879224403739, "grad_norm": 2.0729791720663573, "learning_rate": 2.7901860166773575e-06, "loss": 0.745, "step": 38555 }, { "epoch": 0.46994016062788685, "grad_norm": 3.395265573860232, "learning_rate": 2.7898652982681205e-06, "loss": 0.7701, "step": 38560 }, { "epoch": 0.47000109685203467, "grad_norm": 2.4455774824368763, "learning_rate": 2.7895445798588844e-06, "loss": 0.7452, "step": 38565 }, { "epoch": 0.4700620330761825, "grad_norm": 2.7356358445022497, "learning_rate": 2.7892238614496474e-06, "loss": 0.7935, "step": 38570 }, { "epoch": 0.47012296930033026, "grad_norm": 2.632396788087276, "learning_rate": 2.788903143040411e-06, "loss": 0.7783, "step": 38575 }, { "epoch": 0.4701839055244781, "grad_norm": 2.6265630626984255, "learning_rate": 2.788582424631174e-06, "loss": 0.7186, "step": 38580 }, { "epoch": 0.4702448417486259, "grad_norm": 2.279363290534254, "learning_rate": 2.7882617062219373e-06, "loss": 0.7537, "step": 38585 }, { "epoch": 0.4703057779727737, "grad_norm": 2.8788749041144204, "learning_rate": 2.7879409878127007e-06, "loss": 0.7874, "step": 38590 }, { "epoch": 0.4703667141969215, "grad_norm": 2.651795791292939, "learning_rate": 2.7876202694034638e-06, "loss": 0.7465, "step": 38595 }, { "epoch": 0.4704276504210693, "grad_norm": 3.5727198214813987, "learning_rate": 2.7872995509942276e-06, "loss": 0.7573, "step": 38600 }, { "epoch": 0.4704885866452171, "grad_norm": 2.635327869049347, "learning_rate": 2.7869788325849906e-06, "loss": 0.7867, "step": 38605 }, { "epoch": 0.4705495228693649, "grad_norm": 2.3137534003907008, "learning_rate": 2.7866581141757537e-06, "loss": 0.7999, "step": 38610 }, { "epoch": 0.47061045909351273, "grad_norm": 2.4293617548327395, "learning_rate": 2.7863373957665175e-06, "loss": 0.7322, "step": 38615 }, { "epoch": 0.47067139531766056, "grad_norm": 2.1662192596557834, "learning_rate": 2.7860166773572805e-06, "loss": 0.7399, "step": 38620 }, { "epoch": 0.4707323315418083, "grad_norm": 2.5034120633775965, "learning_rate": 2.7856959589480436e-06, "loss": 0.7698, "step": 38625 }, { "epoch": 0.47079326776595615, "grad_norm": 2.8085581737328575, "learning_rate": 2.7853752405388074e-06, "loss": 0.8401, "step": 38630 }, { "epoch": 0.47085420399010397, "grad_norm": 2.221607072959961, "learning_rate": 2.7850545221295704e-06, "loss": 0.8097, "step": 38635 }, { "epoch": 0.47091514021425174, "grad_norm": 2.1405935696108207, "learning_rate": 2.7847338037203334e-06, "loss": 0.7489, "step": 38640 }, { "epoch": 0.47097607643839956, "grad_norm": 2.196844133278764, "learning_rate": 2.7844130853110973e-06, "loss": 0.7664, "step": 38645 }, { "epoch": 0.4710370126625474, "grad_norm": 2.8867767723320528, "learning_rate": 2.7840923669018603e-06, "loss": 0.6793, "step": 38650 }, { "epoch": 0.4710979488866952, "grad_norm": 2.774461469350683, "learning_rate": 2.7837716484926238e-06, "loss": 0.7727, "step": 38655 }, { "epoch": 0.471158885110843, "grad_norm": 2.4782400757975, "learning_rate": 2.7834509300833868e-06, "loss": 0.7568, "step": 38660 }, { "epoch": 0.4712198213349908, "grad_norm": 2.472096391399947, "learning_rate": 2.7831302116741502e-06, "loss": 0.7457, "step": 38665 }, { "epoch": 0.4712807575591386, "grad_norm": 4.207560871074452, "learning_rate": 2.7828094932649137e-06, "loss": 0.7956, "step": 38670 }, { "epoch": 0.4713416937832864, "grad_norm": 2.179138533173357, "learning_rate": 2.7824887748556767e-06, "loss": 0.7602, "step": 38675 }, { "epoch": 0.4714026300074342, "grad_norm": 2.2846536338979457, "learning_rate": 2.7821680564464405e-06, "loss": 0.7867, "step": 38680 }, { "epoch": 0.47146356623158203, "grad_norm": 2.6241755240756532, "learning_rate": 2.7818473380372036e-06, "loss": 0.7187, "step": 38685 }, { "epoch": 0.47152450245572985, "grad_norm": 2.5242533395961817, "learning_rate": 2.7815266196279666e-06, "loss": 0.7686, "step": 38690 }, { "epoch": 0.4715854386798776, "grad_norm": 2.5682438442327356, "learning_rate": 2.7812059012187304e-06, "loss": 0.7117, "step": 38695 }, { "epoch": 0.47164637490402544, "grad_norm": 2.369203483304633, "learning_rate": 2.7808851828094935e-06, "loss": 0.7487, "step": 38700 }, { "epoch": 0.47170731112817327, "grad_norm": 2.3709489015850536, "learning_rate": 2.7805644644002565e-06, "loss": 0.8568, "step": 38705 }, { "epoch": 0.47176824735232104, "grad_norm": 2.327741942395848, "learning_rate": 2.7802437459910203e-06, "loss": 0.7221, "step": 38710 }, { "epoch": 0.47182918357646886, "grad_norm": 3.2614496105160504, "learning_rate": 2.7799230275817834e-06, "loss": 0.7162, "step": 38715 }, { "epoch": 0.4718901198006167, "grad_norm": 2.01328839163385, "learning_rate": 2.779602309172547e-06, "loss": 0.7077, "step": 38720 }, { "epoch": 0.4719510560247645, "grad_norm": 2.329879555724787, "learning_rate": 2.7792815907633102e-06, "loss": 0.7484, "step": 38725 }, { "epoch": 0.47201199224891227, "grad_norm": 2.4673509752228573, "learning_rate": 2.7789608723540733e-06, "loss": 0.7968, "step": 38730 }, { "epoch": 0.4720729284730601, "grad_norm": 2.676617218060865, "learning_rate": 2.7786401539448367e-06, "loss": 0.7261, "step": 38735 }, { "epoch": 0.4721338646972079, "grad_norm": 2.6694350857556532, "learning_rate": 2.7783194355356e-06, "loss": 0.7835, "step": 38740 }, { "epoch": 0.4721948009213557, "grad_norm": 2.0047832947566735, "learning_rate": 2.7779987171263636e-06, "loss": 0.7946, "step": 38745 }, { "epoch": 0.4722557371455035, "grad_norm": 2.772359601066216, "learning_rate": 2.7776779987171266e-06, "loss": 0.7231, "step": 38750 }, { "epoch": 0.47231667336965133, "grad_norm": 4.235857290372768, "learning_rate": 2.7773572803078896e-06, "loss": 0.763, "step": 38755 }, { "epoch": 0.47237760959379915, "grad_norm": 2.6674662294923506, "learning_rate": 2.7770365618986535e-06, "loss": 0.7607, "step": 38760 }, { "epoch": 0.4724385458179469, "grad_norm": 2.4885923226266087, "learning_rate": 2.7767158434894165e-06, "loss": 0.794, "step": 38765 }, { "epoch": 0.47249948204209474, "grad_norm": 2.440086917848692, "learning_rate": 2.7763951250801795e-06, "loss": 0.748, "step": 38770 }, { "epoch": 0.47256041826624257, "grad_norm": 3.2563912934303927, "learning_rate": 2.7760744066709434e-06, "loss": 0.7275, "step": 38775 }, { "epoch": 0.47262135449039033, "grad_norm": 2.712957374229736, "learning_rate": 2.7757536882617064e-06, "loss": 0.7374, "step": 38780 }, { "epoch": 0.47268229071453816, "grad_norm": 3.3895029223281057, "learning_rate": 2.7754329698524694e-06, "loss": 0.7865, "step": 38785 }, { "epoch": 0.472743226938686, "grad_norm": 2.574061650701908, "learning_rate": 2.7751122514432333e-06, "loss": 0.7639, "step": 38790 }, { "epoch": 0.4728041631628338, "grad_norm": 2.631808483406008, "learning_rate": 2.7747915330339963e-06, "loss": 0.7395, "step": 38795 }, { "epoch": 0.47286509938698157, "grad_norm": 2.2661448270774875, "learning_rate": 2.7744708146247597e-06, "loss": 0.6872, "step": 38800 }, { "epoch": 0.4729260356111294, "grad_norm": 2.0182889633314565, "learning_rate": 2.774150096215523e-06, "loss": 0.725, "step": 38805 }, { "epoch": 0.4729869718352772, "grad_norm": 2.442238867021143, "learning_rate": 2.773829377806286e-06, "loss": 0.835, "step": 38810 }, { "epoch": 0.473047908059425, "grad_norm": 2.4901986514209358, "learning_rate": 2.7735086593970496e-06, "loss": 0.7873, "step": 38815 }, { "epoch": 0.4731088442835728, "grad_norm": 2.956663232856675, "learning_rate": 2.773187940987813e-06, "loss": 0.8313, "step": 38820 }, { "epoch": 0.4731697805077206, "grad_norm": 2.405647953574313, "learning_rate": 2.7728672225785765e-06, "loss": 0.7679, "step": 38825 }, { "epoch": 0.47323071673186845, "grad_norm": 2.322246110158066, "learning_rate": 2.7725465041693395e-06, "loss": 0.8446, "step": 38830 }, { "epoch": 0.4732916529560162, "grad_norm": 2.3764587470838388, "learning_rate": 2.7722257857601025e-06, "loss": 0.7564, "step": 38835 }, { "epoch": 0.47335258918016404, "grad_norm": 2.263687782617476, "learning_rate": 2.7719050673508664e-06, "loss": 0.7207, "step": 38840 }, { "epoch": 0.47341352540431186, "grad_norm": 2.7420760037570506, "learning_rate": 2.7715843489416294e-06, "loss": 0.7545, "step": 38845 }, { "epoch": 0.47347446162845963, "grad_norm": 2.633441676648471, "learning_rate": 2.7712636305323924e-06, "loss": 0.8544, "step": 38850 }, { "epoch": 0.47353539785260745, "grad_norm": 2.325255387596041, "learning_rate": 2.7709429121231563e-06, "loss": 0.7396, "step": 38855 }, { "epoch": 0.4735963340767553, "grad_norm": 2.3366146556984675, "learning_rate": 2.7706221937139193e-06, "loss": 0.7152, "step": 38860 }, { "epoch": 0.4736572703009031, "grad_norm": 3.1601539996392582, "learning_rate": 2.7703014753046823e-06, "loss": 0.6937, "step": 38865 }, { "epoch": 0.47371820652505087, "grad_norm": 2.8582232172635793, "learning_rate": 2.769980756895446e-06, "loss": 0.7932, "step": 38870 }, { "epoch": 0.4737791427491987, "grad_norm": 2.0803481209970767, "learning_rate": 2.7696600384862092e-06, "loss": 0.8119, "step": 38875 }, { "epoch": 0.4738400789733465, "grad_norm": 2.1462831719135753, "learning_rate": 2.7693393200769727e-06, "loss": 0.7683, "step": 38880 }, { "epoch": 0.4739010151974943, "grad_norm": 2.878851235230267, "learning_rate": 2.769018601667736e-06, "loss": 0.7543, "step": 38885 }, { "epoch": 0.4739619514216421, "grad_norm": 2.72222202852151, "learning_rate": 2.768697883258499e-06, "loss": 0.7619, "step": 38890 }, { "epoch": 0.4740228876457899, "grad_norm": 2.2777404394815437, "learning_rate": 2.7683771648492626e-06, "loss": 0.7683, "step": 38895 }, { "epoch": 0.47408382386993775, "grad_norm": 2.083200636957715, "learning_rate": 2.768056446440026e-06, "loss": 0.8058, "step": 38900 }, { "epoch": 0.4741447600940855, "grad_norm": 2.0635186842223954, "learning_rate": 2.7677357280307894e-06, "loss": 0.7326, "step": 38905 }, { "epoch": 0.47420569631823334, "grad_norm": 2.8850352026478983, "learning_rate": 2.7674150096215524e-06, "loss": 0.7508, "step": 38910 }, { "epoch": 0.47426663254238116, "grad_norm": 2.6537704677706153, "learning_rate": 2.7670942912123155e-06, "loss": 0.7672, "step": 38915 }, { "epoch": 0.47432756876652893, "grad_norm": 2.3562859274119723, "learning_rate": 2.7667735728030793e-06, "loss": 0.8345, "step": 38920 }, { "epoch": 0.47438850499067675, "grad_norm": 3.3516249175649286, "learning_rate": 2.7664528543938423e-06, "loss": 0.7337, "step": 38925 }, { "epoch": 0.4744494412148246, "grad_norm": 2.369672329473434, "learning_rate": 2.7661321359846054e-06, "loss": 0.772, "step": 38930 }, { "epoch": 0.4745103774389724, "grad_norm": 2.273188017344876, "learning_rate": 2.7658114175753692e-06, "loss": 0.7165, "step": 38935 }, { "epoch": 0.47457131366312016, "grad_norm": 2.4812001454649524, "learning_rate": 2.7654906991661322e-06, "loss": 0.7547, "step": 38940 }, { "epoch": 0.474632249887268, "grad_norm": 2.748334957960847, "learning_rate": 2.7651699807568953e-06, "loss": 0.7239, "step": 38945 }, { "epoch": 0.4746931861114158, "grad_norm": 2.427587085511233, "learning_rate": 2.764849262347659e-06, "loss": 0.6931, "step": 38950 }, { "epoch": 0.4747541223355636, "grad_norm": 2.5678768064357644, "learning_rate": 2.764528543938422e-06, "loss": 0.7504, "step": 38955 }, { "epoch": 0.4748150585597114, "grad_norm": 2.422320372913127, "learning_rate": 2.7642078255291856e-06, "loss": 0.7541, "step": 38960 }, { "epoch": 0.4748759947838592, "grad_norm": 2.8952500312779397, "learning_rate": 2.763887107119949e-06, "loss": 0.8146, "step": 38965 }, { "epoch": 0.47493693100800705, "grad_norm": 2.4038215190623693, "learning_rate": 2.7635663887107125e-06, "loss": 0.7462, "step": 38970 }, { "epoch": 0.4749978672321548, "grad_norm": 2.776605588417515, "learning_rate": 2.7632456703014755e-06, "loss": 0.7859, "step": 38975 }, { "epoch": 0.47505880345630264, "grad_norm": 3.2561274915054033, "learning_rate": 2.762924951892239e-06, "loss": 0.7686, "step": 38980 }, { "epoch": 0.47511973968045046, "grad_norm": 2.142745866596415, "learning_rate": 2.7626042334830024e-06, "loss": 0.6952, "step": 38985 }, { "epoch": 0.4751806759045982, "grad_norm": 2.932565871221037, "learning_rate": 2.7622835150737654e-06, "loss": 0.7914, "step": 38990 }, { "epoch": 0.47524161212874605, "grad_norm": 3.067113518234033, "learning_rate": 2.7619627966645292e-06, "loss": 0.7221, "step": 38995 }, { "epoch": 0.47530254835289387, "grad_norm": 2.8089317098380695, "learning_rate": 2.7616420782552923e-06, "loss": 0.7016, "step": 39000 }, { "epoch": 0.4753634845770417, "grad_norm": 2.339455930680858, "learning_rate": 2.7613213598460553e-06, "loss": 0.7972, "step": 39005 }, { "epoch": 0.47542442080118946, "grad_norm": 2.883426430337425, "learning_rate": 2.7610006414368183e-06, "loss": 0.8019, "step": 39010 }, { "epoch": 0.4754853570253373, "grad_norm": 2.1495700033683613, "learning_rate": 2.760679923027582e-06, "loss": 0.7961, "step": 39015 }, { "epoch": 0.4755462932494851, "grad_norm": 2.3817394380995136, "learning_rate": 2.760359204618345e-06, "loss": 0.8402, "step": 39020 }, { "epoch": 0.4756072294736329, "grad_norm": 2.3051685383426843, "learning_rate": 2.7600384862091086e-06, "loss": 0.7519, "step": 39025 }, { "epoch": 0.4756681656977807, "grad_norm": 2.0968966141103365, "learning_rate": 2.759717767799872e-06, "loss": 0.7105, "step": 39030 }, { "epoch": 0.4757291019219285, "grad_norm": 2.8015188645970057, "learning_rate": 2.759397049390635e-06, "loss": 0.7208, "step": 39035 }, { "epoch": 0.47579003814607634, "grad_norm": 2.5682117304849954, "learning_rate": 2.7590763309813985e-06, "loss": 0.7841, "step": 39040 }, { "epoch": 0.4758509743702241, "grad_norm": 2.754548204390758, "learning_rate": 2.758755612572162e-06, "loss": 0.6862, "step": 39045 }, { "epoch": 0.47591191059437193, "grad_norm": 2.6635532259229784, "learning_rate": 2.7584348941629254e-06, "loss": 0.7044, "step": 39050 }, { "epoch": 0.47597284681851976, "grad_norm": 3.1333865796366895, "learning_rate": 2.7581141757536884e-06, "loss": 0.7499, "step": 39055 }, { "epoch": 0.4760337830426675, "grad_norm": 2.8578702867818078, "learning_rate": 2.757793457344452e-06, "loss": 0.7953, "step": 39060 }, { "epoch": 0.47609471926681535, "grad_norm": 2.2486949439432613, "learning_rate": 2.7574727389352153e-06, "loss": 0.7498, "step": 39065 }, { "epoch": 0.47615565549096317, "grad_norm": 2.373830273297787, "learning_rate": 2.7571520205259783e-06, "loss": 0.7751, "step": 39070 }, { "epoch": 0.47621659171511094, "grad_norm": 2.2303610009108885, "learning_rate": 2.756831302116742e-06, "loss": 0.7291, "step": 39075 }, { "epoch": 0.47627752793925876, "grad_norm": 2.0777354652666062, "learning_rate": 2.756510583707505e-06, "loss": 0.7506, "step": 39080 }, { "epoch": 0.4763384641634066, "grad_norm": 2.5423427390710494, "learning_rate": 2.756189865298268e-06, "loss": 0.7993, "step": 39085 }, { "epoch": 0.4763994003875544, "grad_norm": 3.013680333182609, "learning_rate": 2.7558691468890312e-06, "loss": 0.8202, "step": 39090 }, { "epoch": 0.4764603366117022, "grad_norm": 4.205438873294491, "learning_rate": 2.755548428479795e-06, "loss": 0.7138, "step": 39095 }, { "epoch": 0.47652127283585, "grad_norm": 2.3453947671132247, "learning_rate": 2.755227710070558e-06, "loss": 0.6996, "step": 39100 }, { "epoch": 0.4765822090599978, "grad_norm": 3.2319594186631777, "learning_rate": 2.7549069916613215e-06, "loss": 0.8076, "step": 39105 }, { "epoch": 0.4766431452841456, "grad_norm": 2.7279776069045143, "learning_rate": 2.754586273252085e-06, "loss": 0.8486, "step": 39110 }, { "epoch": 0.4767040815082934, "grad_norm": 2.2681878437733825, "learning_rate": 2.754265554842848e-06, "loss": 0.8092, "step": 39115 }, { "epoch": 0.47676501773244123, "grad_norm": 2.1519366266047997, "learning_rate": 2.7539448364336114e-06, "loss": 0.7088, "step": 39120 }, { "epoch": 0.47682595395658905, "grad_norm": 2.4977069915619596, "learning_rate": 2.753624118024375e-06, "loss": 0.7683, "step": 39125 }, { "epoch": 0.4768868901807368, "grad_norm": 2.56395686138973, "learning_rate": 2.7533033996151383e-06, "loss": 0.8031, "step": 39130 }, { "epoch": 0.47694782640488464, "grad_norm": 2.5084108680729433, "learning_rate": 2.7529826812059013e-06, "loss": 0.7963, "step": 39135 }, { "epoch": 0.47700876262903247, "grad_norm": 2.615178071918224, "learning_rate": 2.7526619627966648e-06, "loss": 0.8061, "step": 39140 }, { "epoch": 0.47706969885318024, "grad_norm": 2.4143814444044227, "learning_rate": 2.7523412443874282e-06, "loss": 0.7341, "step": 39145 }, { "epoch": 0.47713063507732806, "grad_norm": 2.8680711922739706, "learning_rate": 2.7520205259781912e-06, "loss": 0.7633, "step": 39150 }, { "epoch": 0.4771915713014759, "grad_norm": 2.229536272294754, "learning_rate": 2.751699807568955e-06, "loss": 0.7477, "step": 39155 }, { "epoch": 0.4772525075256237, "grad_norm": 2.4434956695136365, "learning_rate": 2.751379089159718e-06, "loss": 0.7507, "step": 39160 }, { "epoch": 0.47731344374977147, "grad_norm": 2.587275979290872, "learning_rate": 2.751058370750481e-06, "loss": 0.832, "step": 39165 }, { "epoch": 0.4773743799739193, "grad_norm": 2.555294801114063, "learning_rate": 2.750737652341244e-06, "loss": 0.7219, "step": 39170 }, { "epoch": 0.4774353161980671, "grad_norm": 3.583918996069859, "learning_rate": 2.750416933932008e-06, "loss": 0.747, "step": 39175 }, { "epoch": 0.4774962524222149, "grad_norm": 2.7213982600114797, "learning_rate": 2.750096215522771e-06, "loss": 0.7519, "step": 39180 }, { "epoch": 0.4775571886463627, "grad_norm": 3.2209942725964424, "learning_rate": 2.7497754971135345e-06, "loss": 0.7675, "step": 39185 }, { "epoch": 0.47761812487051053, "grad_norm": 2.2717665315556417, "learning_rate": 2.749454778704298e-06, "loss": 0.8259, "step": 39190 }, { "epoch": 0.47767906109465835, "grad_norm": 2.174222319462859, "learning_rate": 2.7491340602950613e-06, "loss": 0.7135, "step": 39195 }, { "epoch": 0.4777399973188061, "grad_norm": 2.353726756984566, "learning_rate": 2.7488133418858244e-06, "loss": 0.6961, "step": 39200 }, { "epoch": 0.47780093354295394, "grad_norm": 2.3939763514047994, "learning_rate": 2.748492623476588e-06, "loss": 0.7269, "step": 39205 }, { "epoch": 0.47786186976710177, "grad_norm": 2.329583840913785, "learning_rate": 2.7481719050673512e-06, "loss": 0.7504, "step": 39210 }, { "epoch": 0.47792280599124953, "grad_norm": 2.914178765107467, "learning_rate": 2.7478511866581143e-06, "loss": 0.7815, "step": 39215 }, { "epoch": 0.47798374221539736, "grad_norm": 2.303385433975302, "learning_rate": 2.747530468248878e-06, "loss": 0.8327, "step": 39220 }, { "epoch": 0.4780446784395452, "grad_norm": 2.862461950796168, "learning_rate": 2.747209749839641e-06, "loss": 0.719, "step": 39225 }, { "epoch": 0.478105614663693, "grad_norm": 2.148307852630342, "learning_rate": 2.746889031430404e-06, "loss": 0.7394, "step": 39230 }, { "epoch": 0.47816655088784077, "grad_norm": 2.174894396438261, "learning_rate": 2.746568313021168e-06, "loss": 0.7663, "step": 39235 }, { "epoch": 0.4782274871119886, "grad_norm": 2.5477239748176763, "learning_rate": 2.746247594611931e-06, "loss": 0.6993, "step": 39240 }, { "epoch": 0.4782884233361364, "grad_norm": 2.309243296078361, "learning_rate": 2.745926876202694e-06, "loss": 0.7634, "step": 39245 }, { "epoch": 0.4783493595602842, "grad_norm": 2.5202449204117032, "learning_rate": 2.7456061577934575e-06, "loss": 0.7743, "step": 39250 }, { "epoch": 0.478410295784432, "grad_norm": 2.4530400726767576, "learning_rate": 2.745285439384221e-06, "loss": 0.7348, "step": 39255 }, { "epoch": 0.4784712320085798, "grad_norm": 2.336877285424109, "learning_rate": 2.744964720974984e-06, "loss": 0.7801, "step": 39260 }, { "epoch": 0.47853216823272765, "grad_norm": 2.2597847273178875, "learning_rate": 2.7446440025657474e-06, "loss": 0.7474, "step": 39265 }, { "epoch": 0.4785931044568754, "grad_norm": 2.4430959562213825, "learning_rate": 2.744323284156511e-06, "loss": 0.7492, "step": 39270 }, { "epoch": 0.47865404068102324, "grad_norm": 2.300975338580437, "learning_rate": 2.7440025657472743e-06, "loss": 0.7571, "step": 39275 }, { "epoch": 0.47871497690517106, "grad_norm": 2.1924529514558095, "learning_rate": 2.7436818473380373e-06, "loss": 0.7435, "step": 39280 }, { "epoch": 0.47877591312931883, "grad_norm": 2.4659367396418377, "learning_rate": 2.7433611289288007e-06, "loss": 0.711, "step": 39285 }, { "epoch": 0.47883684935346665, "grad_norm": 2.6467222916345565, "learning_rate": 2.743040410519564e-06, "loss": 0.7826, "step": 39290 }, { "epoch": 0.4788977855776145, "grad_norm": 3.097130027605062, "learning_rate": 2.742719692110327e-06, "loss": 0.7368, "step": 39295 }, { "epoch": 0.4789587218017623, "grad_norm": 2.3507810799176045, "learning_rate": 2.742398973701091e-06, "loss": 0.6482, "step": 39300 }, { "epoch": 0.47901965802591007, "grad_norm": 2.517446720343679, "learning_rate": 2.742078255291854e-06, "loss": 0.7579, "step": 39305 }, { "epoch": 0.4790805942500579, "grad_norm": 3.2875606244312703, "learning_rate": 2.741757536882617e-06, "loss": 0.709, "step": 39310 }, { "epoch": 0.4791415304742057, "grad_norm": 2.1805598278885765, "learning_rate": 2.741436818473381e-06, "loss": 0.7083, "step": 39315 }, { "epoch": 0.4792024666983535, "grad_norm": 2.411406286701529, "learning_rate": 2.741116100064144e-06, "loss": 0.8075, "step": 39320 }, { "epoch": 0.4792634029225013, "grad_norm": 2.5386570454031245, "learning_rate": 2.740795381654907e-06, "loss": 0.7722, "step": 39325 }, { "epoch": 0.4793243391466491, "grad_norm": 4.795738591357979, "learning_rate": 2.740474663245671e-06, "loss": 0.7078, "step": 39330 }, { "epoch": 0.47938527537079695, "grad_norm": 2.6241397466415446, "learning_rate": 2.740153944836434e-06, "loss": 0.7284, "step": 39335 }, { "epoch": 0.4794462115949447, "grad_norm": 2.605326755261583, "learning_rate": 2.739833226427197e-06, "loss": 0.7451, "step": 39340 }, { "epoch": 0.47950714781909254, "grad_norm": 2.27698272092901, "learning_rate": 2.7395125080179603e-06, "loss": 0.774, "step": 39345 }, { "epoch": 0.47956808404324036, "grad_norm": 2.6831984764924877, "learning_rate": 2.7391917896087238e-06, "loss": 0.8046, "step": 39350 }, { "epoch": 0.47962902026738813, "grad_norm": 2.8997809773966883, "learning_rate": 2.738871071199487e-06, "loss": 0.7128, "step": 39355 }, { "epoch": 0.47968995649153595, "grad_norm": 3.468767862449663, "learning_rate": 2.7385503527902502e-06, "loss": 0.7753, "step": 39360 }, { "epoch": 0.4797508927156838, "grad_norm": 2.1687382055598126, "learning_rate": 2.7382296343810137e-06, "loss": 0.7102, "step": 39365 }, { "epoch": 0.4798118289398316, "grad_norm": 3.071265732937958, "learning_rate": 2.737908915971777e-06, "loss": 0.8358, "step": 39370 }, { "epoch": 0.47987276516397936, "grad_norm": 3.1279280136471694, "learning_rate": 2.73758819756254e-06, "loss": 0.7396, "step": 39375 }, { "epoch": 0.4799337013881272, "grad_norm": 2.492682456257004, "learning_rate": 2.737267479153304e-06, "loss": 0.8052, "step": 39380 }, { "epoch": 0.479994637612275, "grad_norm": 2.3820262601960303, "learning_rate": 2.736946760744067e-06, "loss": 0.7708, "step": 39385 }, { "epoch": 0.4800555738364228, "grad_norm": 2.4378528018541723, "learning_rate": 2.73662604233483e-06, "loss": 0.6952, "step": 39390 }, { "epoch": 0.4801165100605706, "grad_norm": 2.3937639615046264, "learning_rate": 2.736305323925594e-06, "loss": 0.7252, "step": 39395 }, { "epoch": 0.4801774462847184, "grad_norm": 2.699756020071421, "learning_rate": 2.735984605516357e-06, "loss": 0.7369, "step": 39400 }, { "epoch": 0.48023838250886625, "grad_norm": 2.3143286770229556, "learning_rate": 2.73566388710712e-06, "loss": 0.7487, "step": 39405 }, { "epoch": 0.480299318733014, "grad_norm": 2.2432547249663304, "learning_rate": 2.7353431686978838e-06, "loss": 0.7722, "step": 39410 }, { "epoch": 0.48036025495716184, "grad_norm": 2.302951048625179, "learning_rate": 2.7350224502886468e-06, "loss": 0.7315, "step": 39415 }, { "epoch": 0.48042119118130966, "grad_norm": 1.9983378686084452, "learning_rate": 2.73470173187941e-06, "loss": 0.7696, "step": 39420 }, { "epoch": 0.4804821274054574, "grad_norm": 2.569102867067892, "learning_rate": 2.7343810134701732e-06, "loss": 0.7611, "step": 39425 }, { "epoch": 0.48054306362960525, "grad_norm": 3.4965066552146005, "learning_rate": 2.7340602950609367e-06, "loss": 0.7495, "step": 39430 }, { "epoch": 0.48060399985375307, "grad_norm": 1.9649376798054616, "learning_rate": 2.7337395766517e-06, "loss": 0.7616, "step": 39435 }, { "epoch": 0.4806649360779009, "grad_norm": 2.640156489951058, "learning_rate": 2.733418858242463e-06, "loss": 0.7431, "step": 39440 }, { "epoch": 0.48072587230204866, "grad_norm": 2.1980758183224167, "learning_rate": 2.733098139833227e-06, "loss": 0.736, "step": 39445 }, { "epoch": 0.4807868085261965, "grad_norm": 2.0665349067789363, "learning_rate": 2.73277742142399e-06, "loss": 0.6765, "step": 39450 }, { "epoch": 0.4808477447503443, "grad_norm": 3.260129508553238, "learning_rate": 2.732456703014753e-06, "loss": 0.651, "step": 39455 }, { "epoch": 0.4809086809744921, "grad_norm": 2.993087585408376, "learning_rate": 2.732135984605517e-06, "loss": 0.8364, "step": 39460 }, { "epoch": 0.4809696171986399, "grad_norm": 2.3418557870329844, "learning_rate": 2.73181526619628e-06, "loss": 0.7998, "step": 39465 }, { "epoch": 0.4810305534227877, "grad_norm": 2.235959319071115, "learning_rate": 2.731494547787043e-06, "loss": 0.7956, "step": 39470 }, { "epoch": 0.48109148964693554, "grad_norm": 2.3058721518992336, "learning_rate": 2.731173829377807e-06, "loss": 0.784, "step": 39475 }, { "epoch": 0.4811524258710833, "grad_norm": 2.3236611297739778, "learning_rate": 2.73085311096857e-06, "loss": 0.7577, "step": 39480 }, { "epoch": 0.48121336209523113, "grad_norm": 2.1567171922466604, "learning_rate": 2.730532392559333e-06, "loss": 0.7192, "step": 39485 }, { "epoch": 0.48127429831937896, "grad_norm": 1.999400425097077, "learning_rate": 2.7302116741500967e-06, "loss": 0.806, "step": 39490 }, { "epoch": 0.4813352345435267, "grad_norm": 2.2984923486158215, "learning_rate": 2.7298909557408597e-06, "loss": 0.8536, "step": 39495 }, { "epoch": 0.48139617076767455, "grad_norm": 2.8243389350269474, "learning_rate": 2.729570237331623e-06, "loss": 0.7397, "step": 39500 }, { "epoch": 0.48145710699182237, "grad_norm": 2.2564746352016356, "learning_rate": 2.729249518922386e-06, "loss": 0.7717, "step": 39505 }, { "epoch": 0.4815180432159702, "grad_norm": 2.4838747217832626, "learning_rate": 2.7289288005131496e-06, "loss": 0.7815, "step": 39510 }, { "epoch": 0.48157897944011796, "grad_norm": 2.2332843889488725, "learning_rate": 2.728608082103913e-06, "loss": 0.7441, "step": 39515 }, { "epoch": 0.4816399156642658, "grad_norm": 2.474970036645673, "learning_rate": 2.728287363694676e-06, "loss": 0.647, "step": 39520 }, { "epoch": 0.4817008518884136, "grad_norm": 3.051254207139137, "learning_rate": 2.72796664528544e-06, "loss": 0.8333, "step": 39525 }, { "epoch": 0.4817617881125614, "grad_norm": 2.425489053677046, "learning_rate": 2.727645926876203e-06, "loss": 0.6981, "step": 39530 }, { "epoch": 0.4818227243367092, "grad_norm": 3.1449911124884298, "learning_rate": 2.727325208466966e-06, "loss": 0.8237, "step": 39535 }, { "epoch": 0.481883660560857, "grad_norm": 3.198273039148177, "learning_rate": 2.72700449005773e-06, "loss": 0.7247, "step": 39540 }, { "epoch": 0.4819445967850048, "grad_norm": 1.9066531117128753, "learning_rate": 2.726683771648493e-06, "loss": 0.8098, "step": 39545 }, { "epoch": 0.4820055330091526, "grad_norm": 2.72587331224266, "learning_rate": 2.726363053239256e-06, "loss": 0.756, "step": 39550 }, { "epoch": 0.48206646923330043, "grad_norm": 2.236122046399008, "learning_rate": 2.7260423348300197e-06, "loss": 0.8127, "step": 39555 }, { "epoch": 0.48212740545744825, "grad_norm": 2.391192983035257, "learning_rate": 2.7257216164207827e-06, "loss": 0.702, "step": 39560 }, { "epoch": 0.482188341681596, "grad_norm": 2.4197937438626673, "learning_rate": 2.7254008980115458e-06, "loss": 0.7795, "step": 39565 }, { "epoch": 0.48224927790574385, "grad_norm": 2.2265997013109655, "learning_rate": 2.7250801796023096e-06, "loss": 0.6964, "step": 39570 }, { "epoch": 0.48231021412989167, "grad_norm": 2.138127469946873, "learning_rate": 2.7247594611930726e-06, "loss": 0.7576, "step": 39575 }, { "epoch": 0.48237115035403944, "grad_norm": 2.6933367032252793, "learning_rate": 2.724438742783836e-06, "loss": 0.7797, "step": 39580 }, { "epoch": 0.48243208657818726, "grad_norm": 3.0149956859122606, "learning_rate": 2.7241180243745995e-06, "loss": 0.7399, "step": 39585 }, { "epoch": 0.4824930228023351, "grad_norm": 3.3339489551672394, "learning_rate": 2.7237973059653625e-06, "loss": 0.755, "step": 39590 }, { "epoch": 0.4825539590264829, "grad_norm": 2.411148583904275, "learning_rate": 2.723476587556126e-06, "loss": 0.7775, "step": 39595 }, { "epoch": 0.48261489525063067, "grad_norm": 2.2349868879740336, "learning_rate": 2.723155869146889e-06, "loss": 0.8115, "step": 39600 }, { "epoch": 0.4826758314747785, "grad_norm": 2.2151996662268547, "learning_rate": 2.722835150737653e-06, "loss": 0.7516, "step": 39605 }, { "epoch": 0.4827367676989263, "grad_norm": 2.073487284560251, "learning_rate": 2.722514432328416e-06, "loss": 0.7767, "step": 39610 }, { "epoch": 0.4827977039230741, "grad_norm": 2.2192716489573874, "learning_rate": 2.722193713919179e-06, "loss": 0.7616, "step": 39615 }, { "epoch": 0.4828586401472219, "grad_norm": 2.3471660219955504, "learning_rate": 2.7218729955099428e-06, "loss": 0.7226, "step": 39620 }, { "epoch": 0.48291957637136973, "grad_norm": 2.2120519387968427, "learning_rate": 2.7215522771007058e-06, "loss": 0.9007, "step": 39625 }, { "epoch": 0.48298051259551755, "grad_norm": 2.416963619844484, "learning_rate": 2.721231558691469e-06, "loss": 0.7399, "step": 39630 }, { "epoch": 0.4830414488196653, "grad_norm": 2.8096447597354213, "learning_rate": 2.7209108402822327e-06, "loss": 0.7919, "step": 39635 }, { "epoch": 0.48310238504381314, "grad_norm": 2.2809375595634123, "learning_rate": 2.7205901218729957e-06, "loss": 0.8022, "step": 39640 }, { "epoch": 0.48316332126796097, "grad_norm": 2.706471402336941, "learning_rate": 2.7202694034637587e-06, "loss": 0.7378, "step": 39645 }, { "epoch": 0.48322425749210873, "grad_norm": 2.0977006259331383, "learning_rate": 2.7199486850545226e-06, "loss": 0.6142, "step": 39650 }, { "epoch": 0.48328519371625656, "grad_norm": 2.5753003625155886, "learning_rate": 2.7196279666452856e-06, "loss": 0.7587, "step": 39655 }, { "epoch": 0.4833461299404044, "grad_norm": 2.691077416174538, "learning_rate": 2.719307248236049e-06, "loss": 0.7393, "step": 39660 }, { "epoch": 0.4834070661645522, "grad_norm": 2.877909411019521, "learning_rate": 2.7189865298268124e-06, "loss": 0.8176, "step": 39665 }, { "epoch": 0.48346800238869997, "grad_norm": 3.019102070537057, "learning_rate": 2.718665811417576e-06, "loss": 0.7661, "step": 39670 }, { "epoch": 0.4835289386128478, "grad_norm": 2.0025798766523737, "learning_rate": 2.718345093008339e-06, "loss": 0.7387, "step": 39675 }, { "epoch": 0.4835898748369956, "grad_norm": 2.6235395864306508, "learning_rate": 2.718024374599102e-06, "loss": 0.7157, "step": 39680 }, { "epoch": 0.4836508110611434, "grad_norm": 2.724560340151525, "learning_rate": 2.7177036561898658e-06, "loss": 0.7928, "step": 39685 }, { "epoch": 0.4837117472852912, "grad_norm": 2.3738459612598706, "learning_rate": 2.717382937780629e-06, "loss": 0.7892, "step": 39690 }, { "epoch": 0.483772683509439, "grad_norm": 2.3733115374677896, "learning_rate": 2.717062219371392e-06, "loss": 0.6819, "step": 39695 }, { "epoch": 0.48383361973358685, "grad_norm": 2.122994089309562, "learning_rate": 2.7167415009621557e-06, "loss": 0.7581, "step": 39700 }, { "epoch": 0.4838945559577346, "grad_norm": 3.9916855308729926, "learning_rate": 2.7164207825529187e-06, "loss": 0.781, "step": 39705 }, { "epoch": 0.48395549218188244, "grad_norm": 2.646938253401217, "learning_rate": 2.7161000641436817e-06, "loss": 0.7876, "step": 39710 }, { "epoch": 0.48401642840603026, "grad_norm": 2.350637496284478, "learning_rate": 2.7157793457344456e-06, "loss": 0.7963, "step": 39715 }, { "epoch": 0.48407736463017803, "grad_norm": 3.1588155967736893, "learning_rate": 2.7154586273252086e-06, "loss": 0.7311, "step": 39720 }, { "epoch": 0.48413830085432585, "grad_norm": 2.8501969176732027, "learning_rate": 2.715137908915972e-06, "loss": 0.7613, "step": 39725 }, { "epoch": 0.4841992370784737, "grad_norm": 2.6942672817408315, "learning_rate": 2.7148171905067355e-06, "loss": 0.7823, "step": 39730 }, { "epoch": 0.4842601733026215, "grad_norm": 2.3515420456051124, "learning_rate": 2.7144964720974985e-06, "loss": 0.7814, "step": 39735 }, { "epoch": 0.48432110952676927, "grad_norm": 2.4801702116506483, "learning_rate": 2.714175753688262e-06, "loss": 0.761, "step": 39740 }, { "epoch": 0.4843820457509171, "grad_norm": 2.2090587632973744, "learning_rate": 2.7138550352790254e-06, "loss": 0.7667, "step": 39745 }, { "epoch": 0.4844429819750649, "grad_norm": 3.2786205558239505, "learning_rate": 2.713534316869789e-06, "loss": 0.7528, "step": 39750 }, { "epoch": 0.4845039181992127, "grad_norm": 3.4343349788963535, "learning_rate": 2.713213598460552e-06, "loss": 0.7372, "step": 39755 }, { "epoch": 0.4845648544233605, "grad_norm": 3.2144745599202156, "learning_rate": 2.712892880051315e-06, "loss": 0.7048, "step": 39760 }, { "epoch": 0.4846257906475083, "grad_norm": 2.5793795118126934, "learning_rate": 2.7125721616420787e-06, "loss": 0.7808, "step": 39765 }, { "epoch": 0.48468672687165615, "grad_norm": 2.734371568946417, "learning_rate": 2.7122514432328417e-06, "loss": 0.7789, "step": 39770 }, { "epoch": 0.4847476630958039, "grad_norm": 2.6681262632413767, "learning_rate": 2.7119307248236047e-06, "loss": 0.7349, "step": 39775 }, { "epoch": 0.48480859931995174, "grad_norm": 2.257845085140207, "learning_rate": 2.7116100064143686e-06, "loss": 0.7625, "step": 39780 }, { "epoch": 0.48486953554409956, "grad_norm": 2.692081528652778, "learning_rate": 2.7112892880051316e-06, "loss": 0.7745, "step": 39785 }, { "epoch": 0.48493047176824733, "grad_norm": 2.0091500818752714, "learning_rate": 2.7109685695958946e-06, "loss": 0.8029, "step": 39790 }, { "epoch": 0.48499140799239515, "grad_norm": 2.253051968554577, "learning_rate": 2.7106478511866585e-06, "loss": 0.721, "step": 39795 }, { "epoch": 0.485052344216543, "grad_norm": 2.6585389712876566, "learning_rate": 2.7103271327774215e-06, "loss": 0.7082, "step": 39800 }, { "epoch": 0.4851132804406908, "grad_norm": 2.4542897275970486, "learning_rate": 2.710006414368185e-06, "loss": 0.7395, "step": 39805 }, { "epoch": 0.48517421666483856, "grad_norm": 2.2175068842572925, "learning_rate": 2.7096856959589484e-06, "loss": 0.8232, "step": 39810 }, { "epoch": 0.4852351528889864, "grad_norm": 2.7786817382835434, "learning_rate": 2.7093649775497114e-06, "loss": 0.7417, "step": 39815 }, { "epoch": 0.4852960891131342, "grad_norm": 2.501995013943158, "learning_rate": 2.709044259140475e-06, "loss": 0.6774, "step": 39820 }, { "epoch": 0.485357025337282, "grad_norm": 4.9509167708240085, "learning_rate": 2.7087235407312383e-06, "loss": 0.7961, "step": 39825 }, { "epoch": 0.4854179615614298, "grad_norm": 1.9638355833734011, "learning_rate": 2.7084028223220017e-06, "loss": 0.7607, "step": 39830 }, { "epoch": 0.4854788977855776, "grad_norm": 2.300860513242931, "learning_rate": 2.7080821039127648e-06, "loss": 0.7461, "step": 39835 }, { "epoch": 0.48553983400972545, "grad_norm": 2.395643409135042, "learning_rate": 2.7077613855035278e-06, "loss": 0.7428, "step": 39840 }, { "epoch": 0.4856007702338732, "grad_norm": 2.3371495045366415, "learning_rate": 2.7074406670942916e-06, "loss": 0.7384, "step": 39845 }, { "epoch": 0.48566170645802104, "grad_norm": 2.3580893408447494, "learning_rate": 2.7071199486850547e-06, "loss": 0.7282, "step": 39850 }, { "epoch": 0.48572264268216886, "grad_norm": 3.259433235017753, "learning_rate": 2.7067992302758177e-06, "loss": 0.678, "step": 39855 }, { "epoch": 0.4857835789063166, "grad_norm": 1.906572431731409, "learning_rate": 2.7064785118665815e-06, "loss": 0.681, "step": 39860 }, { "epoch": 0.48584451513046445, "grad_norm": 2.698349433040149, "learning_rate": 2.7061577934573446e-06, "loss": 0.689, "step": 39865 }, { "epoch": 0.48590545135461227, "grad_norm": 2.968979171380968, "learning_rate": 2.7058370750481076e-06, "loss": 0.7761, "step": 39870 }, { "epoch": 0.4859663875787601, "grad_norm": 3.015425912729166, "learning_rate": 2.7055163566388714e-06, "loss": 0.7553, "step": 39875 }, { "epoch": 0.48602732380290786, "grad_norm": 2.6585370108980597, "learning_rate": 2.7051956382296345e-06, "loss": 0.7347, "step": 39880 }, { "epoch": 0.4860882600270557, "grad_norm": 2.4696641570118305, "learning_rate": 2.704874919820398e-06, "loss": 0.7446, "step": 39885 }, { "epoch": 0.4861491962512035, "grad_norm": 2.695080536557952, "learning_rate": 2.7045542014111613e-06, "loss": 0.7248, "step": 39890 }, { "epoch": 0.4862101324753513, "grad_norm": 2.629947627187243, "learning_rate": 2.7042334830019248e-06, "loss": 0.6554, "step": 39895 }, { "epoch": 0.4862710686994991, "grad_norm": 2.892457602430913, "learning_rate": 2.703912764592688e-06, "loss": 0.8208, "step": 39900 }, { "epoch": 0.4863320049236469, "grad_norm": 2.5245565618017403, "learning_rate": 2.7035920461834512e-06, "loss": 0.7816, "step": 39905 }, { "epoch": 0.48639294114779474, "grad_norm": 2.57235658608361, "learning_rate": 2.7032713277742147e-06, "loss": 0.6971, "step": 39910 }, { "epoch": 0.4864538773719425, "grad_norm": 2.76191413953369, "learning_rate": 2.7029506093649777e-06, "loss": 0.7143, "step": 39915 }, { "epoch": 0.48651481359609033, "grad_norm": 2.2528475704138717, "learning_rate": 2.7026298909557416e-06, "loss": 0.737, "step": 39920 }, { "epoch": 0.48657574982023816, "grad_norm": 2.705044658904276, "learning_rate": 2.7023091725465046e-06, "loss": 0.7809, "step": 39925 }, { "epoch": 0.4866366860443859, "grad_norm": 2.1976880640132066, "learning_rate": 2.7019884541372676e-06, "loss": 0.785, "step": 39930 }, { "epoch": 0.48669762226853375, "grad_norm": 2.5055073672921306, "learning_rate": 2.7016677357280306e-06, "loss": 0.7648, "step": 39935 }, { "epoch": 0.48675855849268157, "grad_norm": 2.458302377739271, "learning_rate": 2.7013470173187945e-06, "loss": 0.7686, "step": 39940 }, { "epoch": 0.4868194947168294, "grad_norm": 2.6485223583159816, "learning_rate": 2.7010262989095575e-06, "loss": 0.7802, "step": 39945 }, { "epoch": 0.48688043094097716, "grad_norm": 2.7417554996698508, "learning_rate": 2.700705580500321e-06, "loss": 0.757, "step": 39950 }, { "epoch": 0.486941367165125, "grad_norm": 2.536035088038184, "learning_rate": 2.7003848620910844e-06, "loss": 0.7348, "step": 39955 }, { "epoch": 0.4870023033892728, "grad_norm": 2.1411445400996043, "learning_rate": 2.7000641436818474e-06, "loss": 0.7637, "step": 39960 }, { "epoch": 0.4870632396134206, "grad_norm": 2.527272478898339, "learning_rate": 2.699743425272611e-06, "loss": 0.7264, "step": 39965 }, { "epoch": 0.4871241758375684, "grad_norm": 2.000079006242974, "learning_rate": 2.6994227068633743e-06, "loss": 0.7228, "step": 39970 }, { "epoch": 0.4871851120617162, "grad_norm": 2.3961227599020676, "learning_rate": 2.6991019884541377e-06, "loss": 0.7968, "step": 39975 }, { "epoch": 0.48724604828586404, "grad_norm": 6.318174324470217, "learning_rate": 2.6987812700449007e-06, "loss": 0.7281, "step": 39980 }, { "epoch": 0.4873069845100118, "grad_norm": 2.320870409082089, "learning_rate": 2.698460551635664e-06, "loss": 0.7392, "step": 39985 }, { "epoch": 0.48736792073415963, "grad_norm": 2.2011794199098356, "learning_rate": 2.6981398332264276e-06, "loss": 0.7621, "step": 39990 }, { "epoch": 0.48742885695830745, "grad_norm": 2.4483493683069915, "learning_rate": 2.6978191148171906e-06, "loss": 0.7157, "step": 39995 }, { "epoch": 0.4874897931824552, "grad_norm": 2.484354741631085, "learning_rate": 2.6974983964079545e-06, "loss": 0.7357, "step": 40000 }, { "epoch": 0.48755072940660305, "grad_norm": 2.4577475447288264, "learning_rate": 2.6971776779987175e-06, "loss": 0.6561, "step": 40005 }, { "epoch": 0.48761166563075087, "grad_norm": 2.5488639380028264, "learning_rate": 2.6968569595894805e-06, "loss": 0.6814, "step": 40010 }, { "epoch": 0.4876726018548987, "grad_norm": 2.220148561645107, "learning_rate": 2.6965362411802435e-06, "loss": 0.689, "step": 40015 }, { "epoch": 0.48773353807904646, "grad_norm": 2.756431378097865, "learning_rate": 2.6962155227710074e-06, "loss": 0.757, "step": 40020 }, { "epoch": 0.4877944743031943, "grad_norm": 1.8824043846272456, "learning_rate": 2.6958948043617704e-06, "loss": 0.7068, "step": 40025 }, { "epoch": 0.4878554105273421, "grad_norm": 1.9362328884145243, "learning_rate": 2.695574085952534e-06, "loss": 0.7972, "step": 40030 }, { "epoch": 0.48791634675148987, "grad_norm": 2.1514969227382763, "learning_rate": 2.6952533675432973e-06, "loss": 0.7452, "step": 40035 }, { "epoch": 0.4879772829756377, "grad_norm": 2.3791233943892687, "learning_rate": 2.6949326491340603e-06, "loss": 0.6627, "step": 40040 }, { "epoch": 0.4880382191997855, "grad_norm": 2.0668103045927633, "learning_rate": 2.6946119307248237e-06, "loss": 0.6777, "step": 40045 }, { "epoch": 0.4880991554239333, "grad_norm": 4.030703922136176, "learning_rate": 2.694291212315587e-06, "loss": 0.7735, "step": 40050 }, { "epoch": 0.4881600916480811, "grad_norm": 2.2807911091514823, "learning_rate": 2.6939704939063506e-06, "loss": 0.7542, "step": 40055 }, { "epoch": 0.48822102787222893, "grad_norm": 2.511201283262667, "learning_rate": 2.6936497754971136e-06, "loss": 0.7365, "step": 40060 }, { "epoch": 0.48828196409637675, "grad_norm": 2.4458648698458543, "learning_rate": 2.693329057087877e-06, "loss": 0.7609, "step": 40065 }, { "epoch": 0.4883429003205245, "grad_norm": 2.6075871287647856, "learning_rate": 2.6930083386786405e-06, "loss": 0.7409, "step": 40070 }, { "epoch": 0.48840383654467234, "grad_norm": 2.0563398842679588, "learning_rate": 2.6926876202694035e-06, "loss": 0.7344, "step": 40075 }, { "epoch": 0.48846477276882017, "grad_norm": 2.118494156101581, "learning_rate": 2.6923669018601674e-06, "loss": 0.7081, "step": 40080 }, { "epoch": 0.48852570899296793, "grad_norm": 2.8681998631521184, "learning_rate": 2.6920461834509304e-06, "loss": 0.7268, "step": 40085 }, { "epoch": 0.48858664521711576, "grad_norm": 3.9313036330535263, "learning_rate": 2.6917254650416934e-06, "loss": 0.8428, "step": 40090 }, { "epoch": 0.4886475814412636, "grad_norm": 2.6753880871016564, "learning_rate": 2.6914047466324565e-06, "loss": 0.7088, "step": 40095 }, { "epoch": 0.4887085176654114, "grad_norm": 2.244601367702643, "learning_rate": 2.6910840282232203e-06, "loss": 0.7296, "step": 40100 }, { "epoch": 0.48876945388955917, "grad_norm": 2.3919740360794703, "learning_rate": 2.6907633098139833e-06, "loss": 0.7347, "step": 40105 }, { "epoch": 0.488830390113707, "grad_norm": 2.328882338945589, "learning_rate": 2.6904425914047468e-06, "loss": 0.7765, "step": 40110 }, { "epoch": 0.4888913263378548, "grad_norm": 2.3692925597087284, "learning_rate": 2.6901218729955102e-06, "loss": 0.7527, "step": 40115 }, { "epoch": 0.4889522625620026, "grad_norm": 2.385537857092493, "learning_rate": 2.6898011545862732e-06, "loss": 0.8105, "step": 40120 }, { "epoch": 0.4890131987861504, "grad_norm": 2.3556870630758735, "learning_rate": 2.6894804361770367e-06, "loss": 0.7721, "step": 40125 }, { "epoch": 0.4890741350102982, "grad_norm": 3.0708196800210574, "learning_rate": 2.6891597177678e-06, "loss": 0.7664, "step": 40130 }, { "epoch": 0.48913507123444605, "grad_norm": 3.068118353538275, "learning_rate": 2.6888389993585636e-06, "loss": 0.7715, "step": 40135 }, { "epoch": 0.4891960074585938, "grad_norm": 2.290289048979817, "learning_rate": 2.6885182809493266e-06, "loss": 0.655, "step": 40140 }, { "epoch": 0.48925694368274164, "grad_norm": 2.6849187947178734, "learning_rate": 2.6881975625400904e-06, "loss": 0.7808, "step": 40145 }, { "epoch": 0.48931787990688946, "grad_norm": 2.496797476563368, "learning_rate": 2.6878768441308535e-06, "loss": 0.7981, "step": 40150 }, { "epoch": 0.48937881613103723, "grad_norm": 4.727472016876645, "learning_rate": 2.6875561257216165e-06, "loss": 0.8466, "step": 40155 }, { "epoch": 0.48943975235518505, "grad_norm": 2.382559965653428, "learning_rate": 2.6872354073123803e-06, "loss": 0.7638, "step": 40160 }, { "epoch": 0.4895006885793329, "grad_norm": 3.576987040688663, "learning_rate": 2.6869146889031434e-06, "loss": 0.7781, "step": 40165 }, { "epoch": 0.4895616248034807, "grad_norm": 2.3878279040929167, "learning_rate": 2.6865939704939064e-06, "loss": 0.7646, "step": 40170 }, { "epoch": 0.48962256102762847, "grad_norm": 2.245482966543114, "learning_rate": 2.68627325208467e-06, "loss": 0.7644, "step": 40175 }, { "epoch": 0.4896834972517763, "grad_norm": 3.9581432337861315, "learning_rate": 2.6859525336754332e-06, "loss": 0.744, "step": 40180 }, { "epoch": 0.4897444334759241, "grad_norm": 2.144678327120404, "learning_rate": 2.6856318152661963e-06, "loss": 0.7476, "step": 40185 }, { "epoch": 0.4898053697000719, "grad_norm": 2.5151869696998794, "learning_rate": 2.6853110968569597e-06, "loss": 0.8316, "step": 40190 }, { "epoch": 0.4898663059242197, "grad_norm": 2.377469534708312, "learning_rate": 2.684990378447723e-06, "loss": 0.7666, "step": 40195 }, { "epoch": 0.4899272421483675, "grad_norm": 2.741175118903913, "learning_rate": 2.6846696600384866e-06, "loss": 0.7467, "step": 40200 }, { "epoch": 0.48998817837251535, "grad_norm": 2.1232223516071813, "learning_rate": 2.6843489416292496e-06, "loss": 0.7553, "step": 40205 }, { "epoch": 0.4900491145966631, "grad_norm": 2.7056512731466666, "learning_rate": 2.684028223220013e-06, "loss": 0.701, "step": 40210 }, { "epoch": 0.49011005082081094, "grad_norm": 3.619568132949889, "learning_rate": 2.6837075048107765e-06, "loss": 0.7532, "step": 40215 }, { "epoch": 0.49017098704495876, "grad_norm": 3.382132915257376, "learning_rate": 2.6833867864015395e-06, "loss": 0.8043, "step": 40220 }, { "epoch": 0.49023192326910653, "grad_norm": 2.770989803145308, "learning_rate": 2.6830660679923034e-06, "loss": 0.7358, "step": 40225 }, { "epoch": 0.49029285949325435, "grad_norm": 3.222893904091299, "learning_rate": 2.6827453495830664e-06, "loss": 0.7913, "step": 40230 }, { "epoch": 0.4903537957174022, "grad_norm": 2.261354199804646, "learning_rate": 2.6824246311738294e-06, "loss": 0.7699, "step": 40235 }, { "epoch": 0.49041473194155, "grad_norm": 2.567140012096552, "learning_rate": 2.6821039127645933e-06, "loss": 0.7579, "step": 40240 }, { "epoch": 0.49047566816569776, "grad_norm": 2.4248500801429436, "learning_rate": 2.6817831943553563e-06, "loss": 0.7821, "step": 40245 }, { "epoch": 0.4905366043898456, "grad_norm": 2.8696980625732134, "learning_rate": 2.6814624759461193e-06, "loss": 0.7281, "step": 40250 }, { "epoch": 0.4905975406139934, "grad_norm": 2.2580390637522907, "learning_rate": 2.681141757536883e-06, "loss": 0.7674, "step": 40255 }, { "epoch": 0.4906584768381412, "grad_norm": 2.728475987945583, "learning_rate": 2.680821039127646e-06, "loss": 0.7607, "step": 40260 }, { "epoch": 0.490719413062289, "grad_norm": 3.367281136978164, "learning_rate": 2.680500320718409e-06, "loss": 0.7483, "step": 40265 }, { "epoch": 0.4907803492864368, "grad_norm": 2.7543290807814693, "learning_rate": 2.6801796023091726e-06, "loss": 0.8193, "step": 40270 }, { "epoch": 0.49084128551058465, "grad_norm": 2.2176803885481546, "learning_rate": 2.679858883899936e-06, "loss": 0.7158, "step": 40275 }, { "epoch": 0.4909022217347324, "grad_norm": 3.31801968582736, "learning_rate": 2.6795381654906995e-06, "loss": 0.7741, "step": 40280 }, { "epoch": 0.49096315795888024, "grad_norm": 2.1128616613662254, "learning_rate": 2.6792174470814625e-06, "loss": 0.754, "step": 40285 }, { "epoch": 0.49102409418302806, "grad_norm": 3.0124054510013156, "learning_rate": 2.678896728672226e-06, "loss": 0.7527, "step": 40290 }, { "epoch": 0.4910850304071758, "grad_norm": 3.0027617767622017, "learning_rate": 2.6785760102629894e-06, "loss": 0.7333, "step": 40295 }, { "epoch": 0.49114596663132365, "grad_norm": 2.6356642280324176, "learning_rate": 2.6782552918537524e-06, "loss": 0.7188, "step": 40300 }, { "epoch": 0.4912069028554715, "grad_norm": 2.6760514558543833, "learning_rate": 2.6779345734445163e-06, "loss": 0.78, "step": 40305 }, { "epoch": 0.4912678390796193, "grad_norm": 2.329513101127358, "learning_rate": 2.6776138550352793e-06, "loss": 0.794, "step": 40310 }, { "epoch": 0.49132877530376706, "grad_norm": 2.105361507541078, "learning_rate": 2.6772931366260423e-06, "loss": 0.7958, "step": 40315 }, { "epoch": 0.4913897115279149, "grad_norm": 2.2816575591000543, "learning_rate": 2.676972418216806e-06, "loss": 0.7885, "step": 40320 }, { "epoch": 0.4914506477520627, "grad_norm": 2.791569385201554, "learning_rate": 2.676651699807569e-06, "loss": 0.8152, "step": 40325 }, { "epoch": 0.4915115839762105, "grad_norm": 3.4170989654861286, "learning_rate": 2.6763309813983322e-06, "loss": 0.7711, "step": 40330 }, { "epoch": 0.4915725202003583, "grad_norm": 2.652152978484329, "learning_rate": 2.676010262989096e-06, "loss": 0.806, "step": 40335 }, { "epoch": 0.4916334564245061, "grad_norm": 2.3831953648339317, "learning_rate": 2.675689544579859e-06, "loss": 0.7374, "step": 40340 }, { "epoch": 0.49169439264865394, "grad_norm": 2.793661483223359, "learning_rate": 2.675368826170622e-06, "loss": 0.745, "step": 40345 }, { "epoch": 0.4917553288728017, "grad_norm": 2.7588273711199567, "learning_rate": 2.6750481077613856e-06, "loss": 0.7653, "step": 40350 }, { "epoch": 0.49181626509694953, "grad_norm": 2.51814265155056, "learning_rate": 2.674727389352149e-06, "loss": 0.8078, "step": 40355 }, { "epoch": 0.49187720132109736, "grad_norm": 2.7440144336750456, "learning_rate": 2.6744066709429124e-06, "loss": 0.7925, "step": 40360 }, { "epoch": 0.4919381375452451, "grad_norm": 2.4428988139049537, "learning_rate": 2.6740859525336755e-06, "loss": 0.7901, "step": 40365 }, { "epoch": 0.49199907376939295, "grad_norm": 2.0973162470834863, "learning_rate": 2.6737652341244393e-06, "loss": 0.7061, "step": 40370 }, { "epoch": 0.49206000999354077, "grad_norm": 2.730006030070563, "learning_rate": 2.6734445157152023e-06, "loss": 0.7186, "step": 40375 }, { "epoch": 0.4921209462176886, "grad_norm": 2.332797266985999, "learning_rate": 2.6731237973059654e-06, "loss": 0.7622, "step": 40380 }, { "epoch": 0.49218188244183636, "grad_norm": 2.6806035649088535, "learning_rate": 2.6728030788967292e-06, "loss": 0.7807, "step": 40385 }, { "epoch": 0.4922428186659842, "grad_norm": 2.324122635958802, "learning_rate": 2.6724823604874922e-06, "loss": 0.8177, "step": 40390 }, { "epoch": 0.492303754890132, "grad_norm": 2.9084204354456697, "learning_rate": 2.6721616420782553e-06, "loss": 0.7633, "step": 40395 }, { "epoch": 0.4923646911142798, "grad_norm": 3.749939070894101, "learning_rate": 2.671840923669019e-06, "loss": 0.7479, "step": 40400 }, { "epoch": 0.4924256273384276, "grad_norm": 2.604432901130277, "learning_rate": 2.671520205259782e-06, "loss": 0.7516, "step": 40405 }, { "epoch": 0.4924865635625754, "grad_norm": 2.513040362076595, "learning_rate": 2.671199486850545e-06, "loss": 0.7981, "step": 40410 }, { "epoch": 0.49254749978672324, "grad_norm": 3.3054179145744524, "learning_rate": 2.670878768441309e-06, "loss": 0.8248, "step": 40415 }, { "epoch": 0.492608436010871, "grad_norm": 2.6757877511102977, "learning_rate": 2.670558050032072e-06, "loss": 0.6723, "step": 40420 }, { "epoch": 0.49266937223501883, "grad_norm": 2.6269383568230373, "learning_rate": 2.6702373316228355e-06, "loss": 0.7452, "step": 40425 }, { "epoch": 0.49273030845916665, "grad_norm": 2.9087659083435944, "learning_rate": 2.6699166132135985e-06, "loss": 0.8566, "step": 40430 }, { "epoch": 0.4927912446833144, "grad_norm": 2.8302403265355722, "learning_rate": 2.669595894804362e-06, "loss": 0.737, "step": 40435 }, { "epoch": 0.49285218090746225, "grad_norm": 2.6918248500909034, "learning_rate": 2.6692751763951254e-06, "loss": 0.7594, "step": 40440 }, { "epoch": 0.49291311713161007, "grad_norm": 2.5492290573091325, "learning_rate": 2.6689544579858884e-06, "loss": 0.7832, "step": 40445 }, { "epoch": 0.4929740533557579, "grad_norm": 2.217626194676313, "learning_rate": 2.6686337395766522e-06, "loss": 0.6943, "step": 40450 }, { "epoch": 0.49303498957990566, "grad_norm": 2.3726299361279866, "learning_rate": 2.6683130211674153e-06, "loss": 0.7944, "step": 40455 }, { "epoch": 0.4930959258040535, "grad_norm": 2.4503058965375715, "learning_rate": 2.6679923027581783e-06, "loss": 0.7442, "step": 40460 }, { "epoch": 0.4931568620282013, "grad_norm": 2.8930371534660684, "learning_rate": 2.667671584348942e-06, "loss": 0.8413, "step": 40465 }, { "epoch": 0.49321779825234907, "grad_norm": 2.1942432368824045, "learning_rate": 2.667350865939705e-06, "loss": 0.7333, "step": 40470 }, { "epoch": 0.4932787344764969, "grad_norm": 2.421567851427482, "learning_rate": 2.667030147530468e-06, "loss": 0.7254, "step": 40475 }, { "epoch": 0.4933396707006447, "grad_norm": 2.122744534879179, "learning_rate": 2.666709429121232e-06, "loss": 0.7501, "step": 40480 }, { "epoch": 0.49340060692479254, "grad_norm": 2.9567795194641042, "learning_rate": 2.666388710711995e-06, "loss": 0.7093, "step": 40485 }, { "epoch": 0.4934615431489403, "grad_norm": 2.5305817477233123, "learning_rate": 2.666067992302758e-06, "loss": 0.7212, "step": 40490 }, { "epoch": 0.49352247937308813, "grad_norm": 2.2565580141490704, "learning_rate": 2.665747273893522e-06, "loss": 0.7592, "step": 40495 }, { "epoch": 0.49358341559723595, "grad_norm": 2.1532242755473936, "learning_rate": 2.665426555484285e-06, "loss": 0.6864, "step": 40500 }, { "epoch": 0.4936443518213837, "grad_norm": 2.535339677842028, "learning_rate": 2.6651058370750484e-06, "loss": 0.6957, "step": 40505 }, { "epoch": 0.49370528804553154, "grad_norm": 2.2647616603616876, "learning_rate": 2.664785118665812e-06, "loss": 0.7659, "step": 40510 }, { "epoch": 0.49376622426967937, "grad_norm": 3.0059664670817656, "learning_rate": 2.664464400256575e-06, "loss": 0.7254, "step": 40515 }, { "epoch": 0.49382716049382713, "grad_norm": 2.0618288722311986, "learning_rate": 2.6641436818473383e-06, "loss": 0.7103, "step": 40520 }, { "epoch": 0.49388809671797496, "grad_norm": 2.9747457339331898, "learning_rate": 2.6638229634381013e-06, "loss": 0.7214, "step": 40525 }, { "epoch": 0.4939490329421228, "grad_norm": 2.9456818100794218, "learning_rate": 2.663502245028865e-06, "loss": 0.7091, "step": 40530 }, { "epoch": 0.4940099691662706, "grad_norm": 2.3706726660706274, "learning_rate": 2.663181526619628e-06, "loss": 0.7149, "step": 40535 }, { "epoch": 0.49407090539041837, "grad_norm": 2.3902100358271543, "learning_rate": 2.662860808210391e-06, "loss": 0.7412, "step": 40540 }, { "epoch": 0.4941318416145662, "grad_norm": 2.2581804857140266, "learning_rate": 2.662540089801155e-06, "loss": 0.7787, "step": 40545 }, { "epoch": 0.494192777838714, "grad_norm": 2.256283921362607, "learning_rate": 2.662219371391918e-06, "loss": 0.7373, "step": 40550 }, { "epoch": 0.4942537140628618, "grad_norm": 2.47499627968288, "learning_rate": 2.661898652982681e-06, "loss": 0.7606, "step": 40555 }, { "epoch": 0.4943146502870096, "grad_norm": 2.7926504533929752, "learning_rate": 2.661577934573445e-06, "loss": 0.7224, "step": 40560 }, { "epoch": 0.4943755865111574, "grad_norm": 2.2246938136199548, "learning_rate": 2.661257216164208e-06, "loss": 0.7467, "step": 40565 }, { "epoch": 0.49443652273530525, "grad_norm": 2.5621104297430883, "learning_rate": 2.660936497754971e-06, "loss": 0.7543, "step": 40570 }, { "epoch": 0.494497458959453, "grad_norm": 2.5422513830949365, "learning_rate": 2.660615779345735e-06, "loss": 0.8205, "step": 40575 }, { "epoch": 0.49455839518360084, "grad_norm": 2.2661286055659433, "learning_rate": 2.660295060936498e-06, "loss": 0.7729, "step": 40580 }, { "epoch": 0.49461933140774866, "grad_norm": 5.211313894656094, "learning_rate": 2.6599743425272613e-06, "loss": 0.7553, "step": 40585 }, { "epoch": 0.49468026763189643, "grad_norm": 2.110019832487359, "learning_rate": 2.6596536241180248e-06, "loss": 0.7606, "step": 40590 }, { "epoch": 0.49474120385604425, "grad_norm": 2.0820798873404245, "learning_rate": 2.659332905708788e-06, "loss": 0.6801, "step": 40595 }, { "epoch": 0.4948021400801921, "grad_norm": 2.053555093620536, "learning_rate": 2.6590121872995512e-06, "loss": 0.7001, "step": 40600 }, { "epoch": 0.4948630763043399, "grad_norm": 2.4014878306244425, "learning_rate": 2.6586914688903142e-06, "loss": 0.7806, "step": 40605 }, { "epoch": 0.49492401252848767, "grad_norm": 2.428078564385055, "learning_rate": 2.658370750481078e-06, "loss": 0.7611, "step": 40610 }, { "epoch": 0.4949849487526355, "grad_norm": 2.847740015870148, "learning_rate": 2.658050032071841e-06, "loss": 0.7519, "step": 40615 }, { "epoch": 0.4950458849767833, "grad_norm": 2.7066891357320184, "learning_rate": 2.657729313662604e-06, "loss": 0.8195, "step": 40620 }, { "epoch": 0.4951068212009311, "grad_norm": 2.4358494291378574, "learning_rate": 2.657408595253368e-06, "loss": 0.755, "step": 40625 }, { "epoch": 0.4951677574250789, "grad_norm": 2.272710705089312, "learning_rate": 2.657087876844131e-06, "loss": 0.7046, "step": 40630 }, { "epoch": 0.4952286936492267, "grad_norm": 2.502204651889737, "learning_rate": 2.656767158434894e-06, "loss": 0.7459, "step": 40635 }, { "epoch": 0.49528962987337455, "grad_norm": 2.4195695336712117, "learning_rate": 2.656446440025658e-06, "loss": 0.7383, "step": 40640 }, { "epoch": 0.4953505660975223, "grad_norm": 2.6252935858545716, "learning_rate": 2.656125721616421e-06, "loss": 0.7487, "step": 40645 }, { "epoch": 0.49541150232167014, "grad_norm": 2.491065316786204, "learning_rate": 2.6558050032071844e-06, "loss": 0.7242, "step": 40650 }, { "epoch": 0.49547243854581796, "grad_norm": 2.2261663376407563, "learning_rate": 2.655484284797948e-06, "loss": 0.7052, "step": 40655 }, { "epoch": 0.49553337476996573, "grad_norm": 2.4836759395226977, "learning_rate": 2.655163566388711e-06, "loss": 0.8295, "step": 40660 }, { "epoch": 0.49559431099411355, "grad_norm": 2.4071555370079327, "learning_rate": 2.6548428479794743e-06, "loss": 0.8102, "step": 40665 }, { "epoch": 0.4956552472182614, "grad_norm": 1.9510353457659808, "learning_rate": 2.6545221295702377e-06, "loss": 0.6908, "step": 40670 }, { "epoch": 0.4957161834424092, "grad_norm": 2.9241883942804865, "learning_rate": 2.654201411161001e-06, "loss": 0.747, "step": 40675 }, { "epoch": 0.49577711966655696, "grad_norm": 2.42500446645143, "learning_rate": 2.653880692751764e-06, "loss": 0.7675, "step": 40680 }, { "epoch": 0.4958380558907048, "grad_norm": 2.3796768477135504, "learning_rate": 2.653559974342527e-06, "loss": 0.7236, "step": 40685 }, { "epoch": 0.4958989921148526, "grad_norm": 2.57250256898191, "learning_rate": 2.653239255933291e-06, "loss": 0.7603, "step": 40690 }, { "epoch": 0.4959599283390004, "grad_norm": 2.989699977426844, "learning_rate": 2.652918537524054e-06, "loss": 0.7737, "step": 40695 }, { "epoch": 0.4960208645631482, "grad_norm": 2.2245961938899774, "learning_rate": 2.652597819114817e-06, "loss": 0.789, "step": 40700 }, { "epoch": 0.496081800787296, "grad_norm": 2.60126523740246, "learning_rate": 2.652277100705581e-06, "loss": 0.7559, "step": 40705 }, { "epoch": 0.49614273701144385, "grad_norm": 2.2616438552999134, "learning_rate": 2.651956382296344e-06, "loss": 0.7813, "step": 40710 }, { "epoch": 0.4962036732355916, "grad_norm": 2.538198578110047, "learning_rate": 2.651635663887107e-06, "loss": 0.7408, "step": 40715 }, { "epoch": 0.49626460945973944, "grad_norm": 2.822121650177565, "learning_rate": 2.651314945477871e-06, "loss": 0.7574, "step": 40720 }, { "epoch": 0.49632554568388726, "grad_norm": 2.3571441883419157, "learning_rate": 2.650994227068634e-06, "loss": 0.7827, "step": 40725 }, { "epoch": 0.496386481908035, "grad_norm": 2.4184908415801987, "learning_rate": 2.6506735086593973e-06, "loss": 0.7641, "step": 40730 }, { "epoch": 0.49644741813218285, "grad_norm": 3.7387615838834343, "learning_rate": 2.6503527902501607e-06, "loss": 0.8165, "step": 40735 }, { "epoch": 0.4965083543563307, "grad_norm": 2.6497208277410147, "learning_rate": 2.6500320718409237e-06, "loss": 0.7498, "step": 40740 }, { "epoch": 0.4965692905804785, "grad_norm": 2.250896753620089, "learning_rate": 2.649711353431687e-06, "loss": 0.7833, "step": 40745 }, { "epoch": 0.49663022680462626, "grad_norm": 2.4583702350713805, "learning_rate": 2.6493906350224506e-06, "loss": 0.7279, "step": 40750 }, { "epoch": 0.4966911630287741, "grad_norm": 2.259285223688454, "learning_rate": 2.649069916613214e-06, "loss": 0.7427, "step": 40755 }, { "epoch": 0.4967520992529219, "grad_norm": 2.196200220906682, "learning_rate": 2.648749198203977e-06, "loss": 0.775, "step": 40760 }, { "epoch": 0.4968130354770697, "grad_norm": 2.2055649831273203, "learning_rate": 2.64842847979474e-06, "loss": 0.6996, "step": 40765 }, { "epoch": 0.4968739717012175, "grad_norm": 2.5675802647684325, "learning_rate": 2.648107761385504e-06, "loss": 0.8067, "step": 40770 }, { "epoch": 0.4969349079253653, "grad_norm": 2.823496873713194, "learning_rate": 2.647787042976267e-06, "loss": 0.7995, "step": 40775 }, { "epoch": 0.49699584414951314, "grad_norm": 2.966073527149379, "learning_rate": 2.64746632456703e-06, "loss": 0.6799, "step": 40780 }, { "epoch": 0.4970567803736609, "grad_norm": 2.4083515401205253, "learning_rate": 2.647145606157794e-06, "loss": 0.7391, "step": 40785 }, { "epoch": 0.49711771659780873, "grad_norm": 2.3589384556656063, "learning_rate": 2.646824887748557e-06, "loss": 0.7792, "step": 40790 }, { "epoch": 0.49717865282195656, "grad_norm": 2.8674935530384973, "learning_rate": 2.64650416933932e-06, "loss": 0.7406, "step": 40795 }, { "epoch": 0.4972395890461043, "grad_norm": 2.629443374976385, "learning_rate": 2.6461834509300838e-06, "loss": 0.7745, "step": 40800 }, { "epoch": 0.49730052527025215, "grad_norm": 2.7317836450444117, "learning_rate": 2.6458627325208468e-06, "loss": 0.8172, "step": 40805 }, { "epoch": 0.49736146149439997, "grad_norm": 3.3302505321013625, "learning_rate": 2.64554201411161e-06, "loss": 0.7474, "step": 40810 }, { "epoch": 0.4974223977185478, "grad_norm": 2.3717293424597883, "learning_rate": 2.6452212957023736e-06, "loss": 0.7399, "step": 40815 }, { "epoch": 0.49748333394269556, "grad_norm": 2.8165140403386912, "learning_rate": 2.6449005772931367e-06, "loss": 0.7917, "step": 40820 }, { "epoch": 0.4975442701668434, "grad_norm": 2.4784746107027282, "learning_rate": 2.6445798588839e-06, "loss": 0.7031, "step": 40825 }, { "epoch": 0.4976052063909912, "grad_norm": 2.6663413783067584, "learning_rate": 2.6442591404746635e-06, "loss": 0.7749, "step": 40830 }, { "epoch": 0.497666142615139, "grad_norm": 2.977681070095432, "learning_rate": 2.643938422065427e-06, "loss": 0.7943, "step": 40835 }, { "epoch": 0.4977270788392868, "grad_norm": 2.6613025357555147, "learning_rate": 2.64361770365619e-06, "loss": 0.7784, "step": 40840 }, { "epoch": 0.4977880150634346, "grad_norm": 2.4480349662022256, "learning_rate": 2.643296985246954e-06, "loss": 0.7714, "step": 40845 }, { "epoch": 0.49784895128758244, "grad_norm": 2.4023615583698517, "learning_rate": 2.642976266837717e-06, "loss": 0.7951, "step": 40850 }, { "epoch": 0.4979098875117302, "grad_norm": 2.551825638382663, "learning_rate": 2.64265554842848e-06, "loss": 0.7156, "step": 40855 }, { "epoch": 0.49797082373587803, "grad_norm": 2.77900270370914, "learning_rate": 2.642334830019243e-06, "loss": 0.7237, "step": 40860 }, { "epoch": 0.49803175996002585, "grad_norm": 2.619288464446842, "learning_rate": 2.6420141116100068e-06, "loss": 0.6918, "step": 40865 }, { "epoch": 0.4980926961841736, "grad_norm": 2.734642984724681, "learning_rate": 2.64169339320077e-06, "loss": 0.7559, "step": 40870 }, { "epoch": 0.49815363240832145, "grad_norm": 3.0010013733286818, "learning_rate": 2.6413726747915332e-06, "loss": 0.7184, "step": 40875 }, { "epoch": 0.49821456863246927, "grad_norm": 2.31726791196573, "learning_rate": 2.6410519563822967e-06, "loss": 0.7204, "step": 40880 }, { "epoch": 0.4982755048566171, "grad_norm": 2.652477168448403, "learning_rate": 2.6407312379730597e-06, "loss": 0.7456, "step": 40885 }, { "epoch": 0.49833644108076486, "grad_norm": 2.6811271398689147, "learning_rate": 2.640410519563823e-06, "loss": 0.7712, "step": 40890 }, { "epoch": 0.4983973773049127, "grad_norm": 2.2367038159700745, "learning_rate": 2.6400898011545866e-06, "loss": 0.7663, "step": 40895 }, { "epoch": 0.4984583135290605, "grad_norm": 2.3803251994237176, "learning_rate": 2.63976908274535e-06, "loss": 0.7596, "step": 40900 }, { "epoch": 0.49851924975320827, "grad_norm": 2.69171768940534, "learning_rate": 2.639448364336113e-06, "loss": 0.8403, "step": 40905 }, { "epoch": 0.4985801859773561, "grad_norm": 3.388899855584336, "learning_rate": 2.6391276459268765e-06, "loss": 0.7368, "step": 40910 }, { "epoch": 0.4986411222015039, "grad_norm": 2.858616454167907, "learning_rate": 2.63880692751764e-06, "loss": 0.8027, "step": 40915 }, { "epoch": 0.49870205842565174, "grad_norm": 2.4503098591384194, "learning_rate": 2.638486209108403e-06, "loss": 0.7271, "step": 40920 }, { "epoch": 0.4987629946497995, "grad_norm": 2.5960417115831897, "learning_rate": 2.638165490699167e-06, "loss": 0.8456, "step": 40925 }, { "epoch": 0.49882393087394733, "grad_norm": 2.454889789645089, "learning_rate": 2.63784477228993e-06, "loss": 0.6928, "step": 40930 }, { "epoch": 0.49888486709809515, "grad_norm": 2.251909267905545, "learning_rate": 2.637524053880693e-06, "loss": 0.7554, "step": 40935 }, { "epoch": 0.4989458033222429, "grad_norm": 2.1800489860301813, "learning_rate": 2.637203335471456e-06, "loss": 0.7038, "step": 40940 }, { "epoch": 0.49900673954639074, "grad_norm": 2.2819756042445136, "learning_rate": 2.6368826170622197e-06, "loss": 0.7581, "step": 40945 }, { "epoch": 0.49906767577053857, "grad_norm": 2.776342625750666, "learning_rate": 2.6365618986529827e-06, "loss": 0.7297, "step": 40950 }, { "epoch": 0.4991286119946864, "grad_norm": 3.181183338781797, "learning_rate": 2.636241180243746e-06, "loss": 0.7531, "step": 40955 }, { "epoch": 0.49918954821883416, "grad_norm": 2.936202808405694, "learning_rate": 2.6359204618345096e-06, "loss": 0.713, "step": 40960 }, { "epoch": 0.499250484442982, "grad_norm": 2.1970435361368144, "learning_rate": 2.6355997434252726e-06, "loss": 0.8369, "step": 40965 }, { "epoch": 0.4993114206671298, "grad_norm": 2.937660860273068, "learning_rate": 2.635279025016036e-06, "loss": 0.8194, "step": 40970 }, { "epoch": 0.49937235689127757, "grad_norm": 2.771460661712293, "learning_rate": 2.6349583066067995e-06, "loss": 0.7071, "step": 40975 }, { "epoch": 0.4994332931154254, "grad_norm": 3.2640419665053555, "learning_rate": 2.634637588197563e-06, "loss": 0.8378, "step": 40980 }, { "epoch": 0.4994942293395732, "grad_norm": 2.5661910963914383, "learning_rate": 2.634316869788326e-06, "loss": 0.7821, "step": 40985 }, { "epoch": 0.499555165563721, "grad_norm": 2.6038544791409874, "learning_rate": 2.6339961513790894e-06, "loss": 0.8029, "step": 40990 }, { "epoch": 0.4996161017878688, "grad_norm": 2.450946983233917, "learning_rate": 2.633675432969853e-06, "loss": 0.7426, "step": 40995 }, { "epoch": 0.4996770380120166, "grad_norm": 2.322080934468724, "learning_rate": 2.633354714560616e-06, "loss": 0.6742, "step": 41000 }, { "epoch": 0.49973797423616445, "grad_norm": 2.3093259644126234, "learning_rate": 2.6330339961513797e-06, "loss": 0.7261, "step": 41005 }, { "epoch": 0.4997989104603122, "grad_norm": 2.323629159107253, "learning_rate": 2.6327132777421427e-06, "loss": 0.7778, "step": 41010 }, { "epoch": 0.49985984668446004, "grad_norm": 3.5550215382909114, "learning_rate": 2.6323925593329058e-06, "loss": 0.8106, "step": 41015 }, { "epoch": 0.49992078290860786, "grad_norm": 2.420431545732328, "learning_rate": 2.6320718409236688e-06, "loss": 0.7596, "step": 41020 }, { "epoch": 0.49998171913275563, "grad_norm": 3.130866154742879, "learning_rate": 2.6317511225144326e-06, "loss": 0.6833, "step": 41025 }, { "epoch": 0.5000426553569035, "grad_norm": 2.605643499904424, "learning_rate": 2.6314304041051957e-06, "loss": 0.6968, "step": 41030 }, { "epoch": 0.5001035915810512, "grad_norm": 2.1961623873703955, "learning_rate": 2.631109685695959e-06, "loss": 0.7208, "step": 41035 }, { "epoch": 0.500164527805199, "grad_norm": 2.4899331013914137, "learning_rate": 2.6307889672867225e-06, "loss": 0.8257, "step": 41040 }, { "epoch": 0.5002254640293469, "grad_norm": 2.5253410436365002, "learning_rate": 2.6304682488774855e-06, "loss": 0.7511, "step": 41045 }, { "epoch": 0.5002864002534947, "grad_norm": 2.1547396070580835, "learning_rate": 2.630147530468249e-06, "loss": 0.7537, "step": 41050 }, { "epoch": 0.5003473364776425, "grad_norm": 2.396895166334986, "learning_rate": 2.6298268120590124e-06, "loss": 0.7266, "step": 41055 }, { "epoch": 0.5004082727017903, "grad_norm": 2.3516828898849305, "learning_rate": 2.629506093649776e-06, "loss": 0.727, "step": 41060 }, { "epoch": 0.5004692089259382, "grad_norm": 2.3943515855654445, "learning_rate": 2.629185375240539e-06, "loss": 0.7246, "step": 41065 }, { "epoch": 0.5005301451500859, "grad_norm": 3.2016348078082872, "learning_rate": 2.6288646568313027e-06, "loss": 0.7039, "step": 41070 }, { "epoch": 0.5005910813742337, "grad_norm": 3.412906691648727, "learning_rate": 2.6285439384220658e-06, "loss": 0.7339, "step": 41075 }, { "epoch": 0.5006520175983815, "grad_norm": 2.2187457628761833, "learning_rate": 2.6282232200128288e-06, "loss": 0.7088, "step": 41080 }, { "epoch": 0.5007129538225293, "grad_norm": 2.3936278457519995, "learning_rate": 2.6279025016035926e-06, "loss": 0.8067, "step": 41085 }, { "epoch": 0.5007738900466772, "grad_norm": 2.359851689647234, "learning_rate": 2.6275817831943557e-06, "loss": 0.69, "step": 41090 }, { "epoch": 0.500834826270825, "grad_norm": 2.5596996516946326, "learning_rate": 2.6272610647851187e-06, "loss": 0.789, "step": 41095 }, { "epoch": 0.5008957624949728, "grad_norm": 2.5513733503141305, "learning_rate": 2.6269403463758825e-06, "loss": 0.772, "step": 41100 }, { "epoch": 0.5009566987191205, "grad_norm": 2.5506519810800756, "learning_rate": 2.6266196279666456e-06, "loss": 0.763, "step": 41105 }, { "epoch": 0.5010176349432683, "grad_norm": 3.030563236178984, "learning_rate": 2.6262989095574086e-06, "loss": 0.7674, "step": 41110 }, { "epoch": 0.5010785711674162, "grad_norm": 2.357411827710843, "learning_rate": 2.625978191148172e-06, "loss": 0.745, "step": 41115 }, { "epoch": 0.501139507391564, "grad_norm": 3.8542062634391474, "learning_rate": 2.6256574727389355e-06, "loss": 0.7089, "step": 41120 }, { "epoch": 0.5012004436157118, "grad_norm": 2.8317981524248856, "learning_rate": 2.625336754329699e-06, "loss": 0.6898, "step": 41125 }, { "epoch": 0.5012613798398596, "grad_norm": 2.4143469435278218, "learning_rate": 2.625016035920462e-06, "loss": 0.7847, "step": 41130 }, { "epoch": 0.5013223160640075, "grad_norm": 2.417158994218234, "learning_rate": 2.6246953175112254e-06, "loss": 0.7134, "step": 41135 }, { "epoch": 0.5013832522881552, "grad_norm": 2.544490210820878, "learning_rate": 2.624374599101989e-06, "loss": 0.7654, "step": 41140 }, { "epoch": 0.501444188512303, "grad_norm": 2.096969636443557, "learning_rate": 2.624053880692752e-06, "loss": 0.7607, "step": 41145 }, { "epoch": 0.5015051247364508, "grad_norm": 2.5647773228864335, "learning_rate": 2.6237331622835157e-06, "loss": 0.7549, "step": 41150 }, { "epoch": 0.5015660609605986, "grad_norm": 2.7080977913332447, "learning_rate": 2.6234124438742787e-06, "loss": 0.7504, "step": 41155 }, { "epoch": 0.5016269971847465, "grad_norm": 2.594556293619908, "learning_rate": 2.6230917254650417e-06, "loss": 0.7419, "step": 41160 }, { "epoch": 0.5016879334088943, "grad_norm": 2.09368951168177, "learning_rate": 2.6227710070558056e-06, "loss": 0.8016, "step": 41165 }, { "epoch": 0.5017488696330421, "grad_norm": 2.1576468149901564, "learning_rate": 2.6224502886465686e-06, "loss": 0.7378, "step": 41170 }, { "epoch": 0.5018098058571898, "grad_norm": 2.469546639917776, "learning_rate": 2.6221295702373316e-06, "loss": 0.7336, "step": 41175 }, { "epoch": 0.5018707420813376, "grad_norm": 2.2456619244799882, "learning_rate": 2.6218088518280955e-06, "loss": 0.7324, "step": 41180 }, { "epoch": 0.5019316783054855, "grad_norm": 3.227665390249476, "learning_rate": 2.6214881334188585e-06, "loss": 0.7197, "step": 41185 }, { "epoch": 0.5019926145296333, "grad_norm": 3.2683657700943693, "learning_rate": 2.6211674150096215e-06, "loss": 0.7574, "step": 41190 }, { "epoch": 0.5020535507537811, "grad_norm": 3.540834882020255, "learning_rate": 2.620846696600385e-06, "loss": 0.772, "step": 41195 }, { "epoch": 0.5021144869779289, "grad_norm": 2.3227505687368017, "learning_rate": 2.6205259781911484e-06, "loss": 0.7907, "step": 41200 }, { "epoch": 0.5021754232020768, "grad_norm": 3.1616632738271733, "learning_rate": 2.620205259781912e-06, "loss": 0.8175, "step": 41205 }, { "epoch": 0.5022363594262245, "grad_norm": 2.442427605247273, "learning_rate": 2.619884541372675e-06, "loss": 0.6707, "step": 41210 }, { "epoch": 0.5022972956503723, "grad_norm": 2.8045684068385373, "learning_rate": 2.6195638229634383e-06, "loss": 0.7751, "step": 41215 }, { "epoch": 0.5023582318745201, "grad_norm": 1.9522392301930995, "learning_rate": 2.6192431045542017e-06, "loss": 0.7381, "step": 41220 }, { "epoch": 0.5024191680986679, "grad_norm": 2.5284111375550973, "learning_rate": 2.6189223861449647e-06, "loss": 0.7297, "step": 41225 }, { "epoch": 0.5024801043228158, "grad_norm": 1.9669402781478207, "learning_rate": 2.6186016677357286e-06, "loss": 0.6573, "step": 41230 }, { "epoch": 0.5025410405469636, "grad_norm": 2.296277811654036, "learning_rate": 2.6182809493264916e-06, "loss": 0.7463, "step": 41235 }, { "epoch": 0.5026019767711114, "grad_norm": 2.3889408067773106, "learning_rate": 2.6179602309172546e-06, "loss": 0.7099, "step": 41240 }, { "epoch": 0.5026629129952591, "grad_norm": 2.1104405770734274, "learning_rate": 2.6176395125080185e-06, "loss": 0.7593, "step": 41245 }, { "epoch": 0.5027238492194069, "grad_norm": 2.555063061533378, "learning_rate": 2.6173187940987815e-06, "loss": 0.7042, "step": 41250 }, { "epoch": 0.5027847854435548, "grad_norm": 2.1240134832587847, "learning_rate": 2.6169980756895445e-06, "loss": 0.8075, "step": 41255 }, { "epoch": 0.5028457216677026, "grad_norm": 2.541196997105321, "learning_rate": 2.6166773572803084e-06, "loss": 0.7217, "step": 41260 }, { "epoch": 0.5029066578918504, "grad_norm": 3.3853787588246385, "learning_rate": 2.6163566388710714e-06, "loss": 0.7432, "step": 41265 }, { "epoch": 0.5029675941159982, "grad_norm": 2.590059932827894, "learning_rate": 2.6160359204618344e-06, "loss": 0.734, "step": 41270 }, { "epoch": 0.503028530340146, "grad_norm": 2.044361008269482, "learning_rate": 2.615715202052598e-06, "loss": 0.7523, "step": 41275 }, { "epoch": 0.5030894665642938, "grad_norm": 2.775008347510773, "learning_rate": 2.6153944836433613e-06, "loss": 0.8171, "step": 41280 }, { "epoch": 0.5031504027884416, "grad_norm": 2.7164187429787754, "learning_rate": 2.6150737652341248e-06, "loss": 0.7781, "step": 41285 }, { "epoch": 0.5032113390125894, "grad_norm": 2.564771633589314, "learning_rate": 2.6147530468248878e-06, "loss": 0.8163, "step": 41290 }, { "epoch": 0.5032722752367372, "grad_norm": 2.3189921928945716, "learning_rate": 2.614432328415651e-06, "loss": 0.8094, "step": 41295 }, { "epoch": 0.503333211460885, "grad_norm": 2.565241228320825, "learning_rate": 2.6141116100064147e-06, "loss": 0.805, "step": 41300 }, { "epoch": 0.5033941476850329, "grad_norm": 3.533431893631639, "learning_rate": 2.6137908915971777e-06, "loss": 0.7899, "step": 41305 }, { "epoch": 0.5034550839091807, "grad_norm": 2.112086480828547, "learning_rate": 2.6134701731879415e-06, "loss": 0.7993, "step": 41310 }, { "epoch": 0.5035160201333284, "grad_norm": 1.9397315817941174, "learning_rate": 2.6131494547787045e-06, "loss": 0.7443, "step": 41315 }, { "epoch": 0.5035769563574762, "grad_norm": 2.14170171870089, "learning_rate": 2.6128287363694676e-06, "loss": 0.752, "step": 41320 }, { "epoch": 0.5036378925816241, "grad_norm": 2.538616090662014, "learning_rate": 2.6125080179602314e-06, "loss": 0.718, "step": 41325 }, { "epoch": 0.5036988288057719, "grad_norm": 2.067186049977193, "learning_rate": 2.6121872995509944e-06, "loss": 0.723, "step": 41330 }, { "epoch": 0.5037597650299197, "grad_norm": 2.621467905544647, "learning_rate": 2.6118665811417575e-06, "loss": 0.7545, "step": 41335 }, { "epoch": 0.5038207012540675, "grad_norm": 2.517280981742695, "learning_rate": 2.6115458627325213e-06, "loss": 0.699, "step": 41340 }, { "epoch": 0.5038816374782153, "grad_norm": 2.151504065622651, "learning_rate": 2.6112251443232843e-06, "loss": 0.7204, "step": 41345 }, { "epoch": 0.5039425737023631, "grad_norm": 4.2142054426883355, "learning_rate": 2.6109044259140478e-06, "loss": 0.7028, "step": 41350 }, { "epoch": 0.5040035099265109, "grad_norm": 2.946865455416725, "learning_rate": 2.610583707504811e-06, "loss": 0.6619, "step": 41355 }, { "epoch": 0.5040644461506587, "grad_norm": 2.0678516887791423, "learning_rate": 2.6102629890955742e-06, "loss": 0.7762, "step": 41360 }, { "epoch": 0.5041253823748065, "grad_norm": 2.2723744571534135, "learning_rate": 2.6099422706863377e-06, "loss": 0.7732, "step": 41365 }, { "epoch": 0.5041863185989544, "grad_norm": 2.76424154213914, "learning_rate": 2.6096215522771007e-06, "loss": 0.7995, "step": 41370 }, { "epoch": 0.5042472548231022, "grad_norm": 5.2954889290324205, "learning_rate": 2.6093008338678646e-06, "loss": 0.8027, "step": 41375 }, { "epoch": 0.50430819104725, "grad_norm": 2.3165769682599167, "learning_rate": 2.6089801154586276e-06, "loss": 0.7741, "step": 41380 }, { "epoch": 0.5043691272713977, "grad_norm": 4.291821210311519, "learning_rate": 2.6086593970493906e-06, "loss": 0.7423, "step": 41385 }, { "epoch": 0.5044300634955455, "grad_norm": 2.7440088903699675, "learning_rate": 2.6083386786401545e-06, "loss": 0.7696, "step": 41390 }, { "epoch": 0.5044909997196934, "grad_norm": 2.7321997411533827, "learning_rate": 2.6080179602309175e-06, "loss": 0.7946, "step": 41395 }, { "epoch": 0.5045519359438412, "grad_norm": 2.547873523571255, "learning_rate": 2.6076972418216805e-06, "loss": 0.7592, "step": 41400 }, { "epoch": 0.504612872167989, "grad_norm": 2.90159234722442, "learning_rate": 2.6073765234124444e-06, "loss": 0.8115, "step": 41405 }, { "epoch": 0.5046738083921368, "grad_norm": 2.7967724265671534, "learning_rate": 2.6070558050032074e-06, "loss": 0.7542, "step": 41410 }, { "epoch": 0.5047347446162846, "grad_norm": 3.0647841510686455, "learning_rate": 2.6067350865939704e-06, "loss": 0.7385, "step": 41415 }, { "epoch": 0.5047956808404324, "grad_norm": 2.7959934082470235, "learning_rate": 2.6064143681847343e-06, "loss": 0.8318, "step": 41420 }, { "epoch": 0.5048566170645802, "grad_norm": 2.463049264782669, "learning_rate": 2.6060936497754973e-06, "loss": 0.7555, "step": 41425 }, { "epoch": 0.504917553288728, "grad_norm": 2.387763893287882, "learning_rate": 2.6057729313662607e-06, "loss": 0.7193, "step": 41430 }, { "epoch": 0.5049784895128758, "grad_norm": 2.400771047645921, "learning_rate": 2.605452212957024e-06, "loss": 0.7591, "step": 41435 }, { "epoch": 0.5050394257370237, "grad_norm": 2.3074724584492463, "learning_rate": 2.605131494547787e-06, "loss": 0.7653, "step": 41440 }, { "epoch": 0.5051003619611715, "grad_norm": 2.6988352668329956, "learning_rate": 2.6048107761385506e-06, "loss": 0.7907, "step": 41445 }, { "epoch": 0.5051612981853193, "grad_norm": 2.6454971695223812, "learning_rate": 2.6044900577293136e-06, "loss": 0.7688, "step": 41450 }, { "epoch": 0.505222234409467, "grad_norm": 2.0989632941901086, "learning_rate": 2.6041693393200775e-06, "loss": 0.7189, "step": 41455 }, { "epoch": 0.5052831706336148, "grad_norm": 2.576542666667962, "learning_rate": 2.6038486209108405e-06, "loss": 0.7313, "step": 41460 }, { "epoch": 0.5053441068577627, "grad_norm": 2.164611173461115, "learning_rate": 2.6035279025016035e-06, "loss": 0.7506, "step": 41465 }, { "epoch": 0.5054050430819105, "grad_norm": 2.435914375444445, "learning_rate": 2.6032071840923674e-06, "loss": 0.6551, "step": 41470 }, { "epoch": 0.5054659793060583, "grad_norm": 2.8521510722383554, "learning_rate": 2.6028864656831304e-06, "loss": 0.6946, "step": 41475 }, { "epoch": 0.5055269155302061, "grad_norm": 3.5673741485203214, "learning_rate": 2.6025657472738934e-06, "loss": 0.7166, "step": 41480 }, { "epoch": 0.505587851754354, "grad_norm": 2.56080562641002, "learning_rate": 2.6022450288646573e-06, "loss": 0.7556, "step": 41485 }, { "epoch": 0.5056487879785017, "grad_norm": 2.3588999677863747, "learning_rate": 2.6019243104554203e-06, "loss": 0.794, "step": 41490 }, { "epoch": 0.5057097242026495, "grad_norm": 2.5511702437779844, "learning_rate": 2.6016035920461833e-06, "loss": 0.7218, "step": 41495 }, { "epoch": 0.5057706604267973, "grad_norm": 3.184168919738491, "learning_rate": 2.601282873636947e-06, "loss": 0.7513, "step": 41500 }, { "epoch": 0.5058315966509451, "grad_norm": 2.094108515916754, "learning_rate": 2.60096215522771e-06, "loss": 0.7055, "step": 41505 }, { "epoch": 0.505892532875093, "grad_norm": 2.393778365201715, "learning_rate": 2.6006414368184736e-06, "loss": 0.723, "step": 41510 }, { "epoch": 0.5059534690992408, "grad_norm": 2.0411960039486376, "learning_rate": 2.600320718409237e-06, "loss": 0.7899, "step": 41515 }, { "epoch": 0.5060144053233886, "grad_norm": 2.3604832517963765, "learning_rate": 2.6e-06, "loss": 0.7683, "step": 41520 }, { "epoch": 0.5060753415475363, "grad_norm": 2.3553852289922976, "learning_rate": 2.5996792815907635e-06, "loss": 0.6762, "step": 41525 }, { "epoch": 0.5061362777716841, "grad_norm": 3.0819233009319102, "learning_rate": 2.5993585631815266e-06, "loss": 0.7424, "step": 41530 }, { "epoch": 0.506197213995832, "grad_norm": 2.4182128757541066, "learning_rate": 2.5990378447722904e-06, "loss": 0.7585, "step": 41535 }, { "epoch": 0.5062581502199798, "grad_norm": 2.3855099773290633, "learning_rate": 2.5987171263630534e-06, "loss": 0.7182, "step": 41540 }, { "epoch": 0.5063190864441276, "grad_norm": 2.1473993074414555, "learning_rate": 2.5983964079538164e-06, "loss": 0.7574, "step": 41545 }, { "epoch": 0.5063800226682754, "grad_norm": 2.4597162409197137, "learning_rate": 2.5980756895445803e-06, "loss": 0.7793, "step": 41550 }, { "epoch": 0.5064409588924232, "grad_norm": 2.521051433259515, "learning_rate": 2.5977549711353433e-06, "loss": 0.7942, "step": 41555 }, { "epoch": 0.506501895116571, "grad_norm": 2.5689794922340714, "learning_rate": 2.5974342527261063e-06, "loss": 0.7239, "step": 41560 }, { "epoch": 0.5065628313407188, "grad_norm": 2.807557096946934, "learning_rate": 2.59711353431687e-06, "loss": 0.7578, "step": 41565 }, { "epoch": 0.5066237675648666, "grad_norm": 2.166007655068786, "learning_rate": 2.5967928159076332e-06, "loss": 0.705, "step": 41570 }, { "epoch": 0.5066847037890144, "grad_norm": 2.4381939690146006, "learning_rate": 2.5964720974983967e-06, "loss": 0.7376, "step": 41575 }, { "epoch": 0.5067456400131622, "grad_norm": 2.3360562733317796, "learning_rate": 2.59615137908916e-06, "loss": 0.755, "step": 41580 }, { "epoch": 0.5068065762373101, "grad_norm": 2.4761995513031114, "learning_rate": 2.595830660679923e-06, "loss": 0.7758, "step": 41585 }, { "epoch": 0.5068675124614579, "grad_norm": 2.804637128401857, "learning_rate": 2.5955099422706866e-06, "loss": 0.793, "step": 41590 }, { "epoch": 0.5069284486856056, "grad_norm": 2.0069154555536497, "learning_rate": 2.59518922386145e-06, "loss": 0.701, "step": 41595 }, { "epoch": 0.5069893849097534, "grad_norm": 2.4302708822564703, "learning_rate": 2.5948685054522134e-06, "loss": 0.7805, "step": 41600 }, { "epoch": 0.5070503211339012, "grad_norm": 2.457944133812704, "learning_rate": 2.5945477870429765e-06, "loss": 0.8184, "step": 41605 }, { "epoch": 0.5071112573580491, "grad_norm": 4.278965038970287, "learning_rate": 2.5942270686337395e-06, "loss": 0.7127, "step": 41610 }, { "epoch": 0.5071721935821969, "grad_norm": 2.4343243392872944, "learning_rate": 2.5939063502245033e-06, "loss": 0.7863, "step": 41615 }, { "epoch": 0.5072331298063447, "grad_norm": 3.2545806912737207, "learning_rate": 2.5935856318152664e-06, "loss": 0.7976, "step": 41620 }, { "epoch": 0.5072940660304925, "grad_norm": 2.563050906529982, "learning_rate": 2.5932649134060294e-06, "loss": 0.8466, "step": 41625 }, { "epoch": 0.5073550022546403, "grad_norm": 2.4495620165265963, "learning_rate": 2.5929441949967932e-06, "loss": 0.7494, "step": 41630 }, { "epoch": 0.5074159384787881, "grad_norm": 2.422303344767346, "learning_rate": 2.5926234765875563e-06, "loss": 0.7141, "step": 41635 }, { "epoch": 0.5074768747029359, "grad_norm": 2.6450507704985675, "learning_rate": 2.5923027581783193e-06, "loss": 0.7665, "step": 41640 }, { "epoch": 0.5075378109270837, "grad_norm": 2.3241251280963255, "learning_rate": 2.591982039769083e-06, "loss": 0.7341, "step": 41645 }, { "epoch": 0.5075987471512315, "grad_norm": 1.6304454755935396, "learning_rate": 2.591661321359846e-06, "loss": 0.7733, "step": 41650 }, { "epoch": 0.5076596833753794, "grad_norm": 2.4893126047164738, "learning_rate": 2.5913406029506096e-06, "loss": 0.8112, "step": 41655 }, { "epoch": 0.5077206195995272, "grad_norm": 2.109043860791332, "learning_rate": 2.591019884541373e-06, "loss": 0.7517, "step": 41660 }, { "epoch": 0.5077815558236749, "grad_norm": 2.8990027609097613, "learning_rate": 2.590699166132136e-06, "loss": 0.7606, "step": 41665 }, { "epoch": 0.5078424920478227, "grad_norm": 3.304586861155675, "learning_rate": 2.5903784477228995e-06, "loss": 0.8188, "step": 41670 }, { "epoch": 0.5079034282719705, "grad_norm": 2.4328620661082843, "learning_rate": 2.590057729313663e-06, "loss": 0.7437, "step": 41675 }, { "epoch": 0.5079643644961184, "grad_norm": 3.5160785367081524, "learning_rate": 2.5897370109044264e-06, "loss": 0.7426, "step": 41680 }, { "epoch": 0.5080253007202662, "grad_norm": 2.358761788008986, "learning_rate": 2.5894162924951894e-06, "loss": 0.7694, "step": 41685 }, { "epoch": 0.508086236944414, "grad_norm": 2.461434799501215, "learning_rate": 2.589095574085953e-06, "loss": 0.7236, "step": 41690 }, { "epoch": 0.5081471731685618, "grad_norm": 2.6460351841363696, "learning_rate": 2.5887748556767163e-06, "loss": 0.7906, "step": 41695 }, { "epoch": 0.5082081093927096, "grad_norm": 2.2551451422749365, "learning_rate": 2.5884541372674793e-06, "loss": 0.7223, "step": 41700 }, { "epoch": 0.5082690456168574, "grad_norm": 2.580336292019964, "learning_rate": 2.5881334188582423e-06, "loss": 0.7449, "step": 41705 }, { "epoch": 0.5083299818410052, "grad_norm": 1.9743949921034754, "learning_rate": 2.587812700449006e-06, "loss": 0.8434, "step": 41710 }, { "epoch": 0.508390918065153, "grad_norm": 2.4623299267557406, "learning_rate": 2.587491982039769e-06, "loss": 0.731, "step": 41715 }, { "epoch": 0.5084518542893008, "grad_norm": 2.4136790745502736, "learning_rate": 2.587171263630532e-06, "loss": 0.7877, "step": 41720 }, { "epoch": 0.5085127905134487, "grad_norm": 1.9554114198662282, "learning_rate": 2.586850545221296e-06, "loss": 0.7813, "step": 41725 }, { "epoch": 0.5085737267375964, "grad_norm": 2.455996219787612, "learning_rate": 2.586529826812059e-06, "loss": 0.7906, "step": 41730 }, { "epoch": 0.5086346629617442, "grad_norm": 2.226794996504029, "learning_rate": 2.5862091084028225e-06, "loss": 0.7162, "step": 41735 }, { "epoch": 0.508695599185892, "grad_norm": 3.5935082796006843, "learning_rate": 2.585888389993586e-06, "loss": 0.6941, "step": 41740 }, { "epoch": 0.5087565354100398, "grad_norm": 2.908092188883076, "learning_rate": 2.585567671584349e-06, "loss": 0.8705, "step": 41745 }, { "epoch": 0.5088174716341877, "grad_norm": 2.530798034521738, "learning_rate": 2.5852469531751124e-06, "loss": 0.7549, "step": 41750 }, { "epoch": 0.5088784078583355, "grad_norm": 3.0726370982772124, "learning_rate": 2.584926234765876e-06, "loss": 0.797, "step": 41755 }, { "epoch": 0.5089393440824833, "grad_norm": 2.7258785508787815, "learning_rate": 2.5846055163566393e-06, "loss": 0.7254, "step": 41760 }, { "epoch": 0.509000280306631, "grad_norm": 2.8958927569960005, "learning_rate": 2.5842847979474023e-06, "loss": 0.7054, "step": 41765 }, { "epoch": 0.5090612165307788, "grad_norm": 2.688427132872671, "learning_rate": 2.583964079538166e-06, "loss": 0.7803, "step": 41770 }, { "epoch": 0.5091221527549267, "grad_norm": 2.3078453999636848, "learning_rate": 2.583643361128929e-06, "loss": 0.728, "step": 41775 }, { "epoch": 0.5091830889790745, "grad_norm": 2.2982797537298287, "learning_rate": 2.5833226427196922e-06, "loss": 0.7397, "step": 41780 }, { "epoch": 0.5092440252032223, "grad_norm": 2.6915189521013194, "learning_rate": 2.5830019243104552e-06, "loss": 0.7721, "step": 41785 }, { "epoch": 0.5093049614273701, "grad_norm": 2.413519656616152, "learning_rate": 2.582681205901219e-06, "loss": 0.768, "step": 41790 }, { "epoch": 0.509365897651518, "grad_norm": 2.3380861895797835, "learning_rate": 2.582360487491982e-06, "loss": 0.7606, "step": 41795 }, { "epoch": 0.5094268338756657, "grad_norm": 2.61684551075939, "learning_rate": 2.5820397690827456e-06, "loss": 0.765, "step": 41800 }, { "epoch": 0.5094877700998135, "grad_norm": 2.52270578211013, "learning_rate": 2.581719050673509e-06, "loss": 0.6758, "step": 41805 }, { "epoch": 0.5095487063239613, "grad_norm": 2.2852678715009325, "learning_rate": 2.581398332264272e-06, "loss": 0.7498, "step": 41810 }, { "epoch": 0.5096096425481091, "grad_norm": 2.3052935762197073, "learning_rate": 2.5810776138550354e-06, "loss": 0.7902, "step": 41815 }, { "epoch": 0.509670578772257, "grad_norm": 3.523325587139561, "learning_rate": 2.580756895445799e-06, "loss": 0.7109, "step": 41820 }, { "epoch": 0.5097315149964048, "grad_norm": 1.9983508202757971, "learning_rate": 2.5804361770365623e-06, "loss": 0.7219, "step": 41825 }, { "epoch": 0.5097924512205526, "grad_norm": 2.8156349903812394, "learning_rate": 2.5801154586273253e-06, "loss": 0.7856, "step": 41830 }, { "epoch": 0.5098533874447003, "grad_norm": 2.1905079337608004, "learning_rate": 2.5797947402180888e-06, "loss": 0.802, "step": 41835 }, { "epoch": 0.5099143236688481, "grad_norm": 2.3016868660696095, "learning_rate": 2.5794740218088522e-06, "loss": 0.7768, "step": 41840 }, { "epoch": 0.509975259892996, "grad_norm": 2.331866079974611, "learning_rate": 2.5791533033996152e-06, "loss": 0.7488, "step": 41845 }, { "epoch": 0.5100361961171438, "grad_norm": 2.2649754434547034, "learning_rate": 2.578832584990379e-06, "loss": 0.7501, "step": 41850 }, { "epoch": 0.5100971323412916, "grad_norm": 2.6292371009071287, "learning_rate": 2.578511866581142e-06, "loss": 0.8172, "step": 41855 }, { "epoch": 0.5101580685654394, "grad_norm": 2.9324894819910226, "learning_rate": 2.578191148171905e-06, "loss": 0.7929, "step": 41860 }, { "epoch": 0.5102190047895873, "grad_norm": 2.6787015927168976, "learning_rate": 2.577870429762668e-06, "loss": 0.7186, "step": 41865 }, { "epoch": 0.510279941013735, "grad_norm": 2.24373924481295, "learning_rate": 2.577549711353432e-06, "loss": 0.7332, "step": 41870 }, { "epoch": 0.5103408772378828, "grad_norm": 2.4071545520503963, "learning_rate": 2.577228992944195e-06, "loss": 0.7516, "step": 41875 }, { "epoch": 0.5104018134620306, "grad_norm": 2.6736318790070266, "learning_rate": 2.5769082745349585e-06, "loss": 0.7771, "step": 41880 }, { "epoch": 0.5104627496861784, "grad_norm": 2.9726926866043075, "learning_rate": 2.576587556125722e-06, "loss": 0.7648, "step": 41885 }, { "epoch": 0.5105236859103263, "grad_norm": 2.543879077350759, "learning_rate": 2.576266837716485e-06, "loss": 0.8264, "step": 41890 }, { "epoch": 0.5105846221344741, "grad_norm": 2.7997191760308597, "learning_rate": 2.5759461193072484e-06, "loss": 0.7701, "step": 41895 }, { "epoch": 0.5106455583586219, "grad_norm": 3.6608914086096265, "learning_rate": 2.575625400898012e-06, "loss": 0.7482, "step": 41900 }, { "epoch": 0.5107064945827696, "grad_norm": 2.8958083807303376, "learning_rate": 2.5753046824887753e-06, "loss": 0.7936, "step": 41905 }, { "epoch": 0.5107674308069174, "grad_norm": 2.027588186763417, "learning_rate": 2.5749839640795383e-06, "loss": 0.686, "step": 41910 }, { "epoch": 0.5108283670310653, "grad_norm": 2.4181434287869648, "learning_rate": 2.5746632456703017e-06, "loss": 0.7731, "step": 41915 }, { "epoch": 0.5108893032552131, "grad_norm": 2.001506436586611, "learning_rate": 2.574342527261065e-06, "loss": 0.6613, "step": 41920 }, { "epoch": 0.5109502394793609, "grad_norm": 2.3716081823724537, "learning_rate": 2.574021808851828e-06, "loss": 0.7865, "step": 41925 }, { "epoch": 0.5110111757035087, "grad_norm": 2.4219837462739666, "learning_rate": 2.573701090442592e-06, "loss": 0.7479, "step": 41930 }, { "epoch": 0.5110721119276566, "grad_norm": 2.411493303427676, "learning_rate": 2.573380372033355e-06, "loss": 0.8061, "step": 41935 }, { "epoch": 0.5111330481518043, "grad_norm": 2.0145414192934816, "learning_rate": 2.573059653624118e-06, "loss": 0.6484, "step": 41940 }, { "epoch": 0.5111939843759521, "grad_norm": 2.2776376959185343, "learning_rate": 2.572738935214881e-06, "loss": 0.7661, "step": 41945 }, { "epoch": 0.5112549206000999, "grad_norm": 2.8233769092413867, "learning_rate": 2.572418216805645e-06, "loss": 0.741, "step": 41950 }, { "epoch": 0.5113158568242477, "grad_norm": 2.665154517037331, "learning_rate": 2.572097498396408e-06, "loss": 0.6893, "step": 41955 }, { "epoch": 0.5113767930483956, "grad_norm": 2.229565125984057, "learning_rate": 2.5717767799871714e-06, "loss": 0.8067, "step": 41960 }, { "epoch": 0.5114377292725434, "grad_norm": 2.491652471235263, "learning_rate": 2.571456061577935e-06, "loss": 0.7122, "step": 41965 }, { "epoch": 0.5114986654966912, "grad_norm": 3.940385333003422, "learning_rate": 2.571135343168698e-06, "loss": 0.7487, "step": 41970 }, { "epoch": 0.5115596017208389, "grad_norm": 2.23834387119879, "learning_rate": 2.5708146247594613e-06, "loss": 0.7331, "step": 41975 }, { "epoch": 0.5116205379449867, "grad_norm": 3.118912075324984, "learning_rate": 2.5704939063502247e-06, "loss": 0.7605, "step": 41980 }, { "epoch": 0.5116814741691346, "grad_norm": 2.0150565448784348, "learning_rate": 2.570173187940988e-06, "loss": 0.6956, "step": 41985 }, { "epoch": 0.5117424103932824, "grad_norm": 2.165714627654015, "learning_rate": 2.569852469531751e-06, "loss": 0.7275, "step": 41990 }, { "epoch": 0.5118033466174302, "grad_norm": 2.1294637958125757, "learning_rate": 2.5695317511225146e-06, "loss": 0.7143, "step": 41995 }, { "epoch": 0.511864282841578, "grad_norm": 2.380441429005123, "learning_rate": 2.569211032713278e-06, "loss": 0.7796, "step": 42000 }, { "epoch": 0.5119252190657259, "grad_norm": 2.4452065391924562, "learning_rate": 2.568890314304041e-06, "loss": 0.7477, "step": 42005 }, { "epoch": 0.5119861552898736, "grad_norm": 2.2537713318062997, "learning_rate": 2.568569595894805e-06, "loss": 0.7375, "step": 42010 }, { "epoch": 0.5120470915140214, "grad_norm": 2.6808041679998986, "learning_rate": 2.568248877485568e-06, "loss": 0.7855, "step": 42015 }, { "epoch": 0.5121080277381692, "grad_norm": 2.430928292473642, "learning_rate": 2.567928159076331e-06, "loss": 0.7695, "step": 42020 }, { "epoch": 0.512168963962317, "grad_norm": 2.9260247674507416, "learning_rate": 2.567607440667095e-06, "loss": 0.7604, "step": 42025 }, { "epoch": 0.5122299001864649, "grad_norm": 2.5608271583776285, "learning_rate": 2.567286722257858e-06, "loss": 0.7565, "step": 42030 }, { "epoch": 0.5122908364106127, "grad_norm": 2.387051426919924, "learning_rate": 2.566966003848621e-06, "loss": 0.7086, "step": 42035 }, { "epoch": 0.5123517726347605, "grad_norm": 2.457194293940225, "learning_rate": 2.5666452854393843e-06, "loss": 0.7361, "step": 42040 }, { "epoch": 0.5124127088589082, "grad_norm": 2.2219102260258135, "learning_rate": 2.5663245670301478e-06, "loss": 0.7145, "step": 42045 }, { "epoch": 0.512473645083056, "grad_norm": 2.3391276326562056, "learning_rate": 2.5660038486209112e-06, "loss": 0.7669, "step": 42050 }, { "epoch": 0.5125345813072039, "grad_norm": 3.5948021197678846, "learning_rate": 2.5656831302116742e-06, "loss": 0.7333, "step": 42055 }, { "epoch": 0.5125955175313517, "grad_norm": 4.300817565223053, "learning_rate": 2.5653624118024377e-06, "loss": 0.7916, "step": 42060 }, { "epoch": 0.5126564537554995, "grad_norm": 2.447913253287858, "learning_rate": 2.565041693393201e-06, "loss": 0.7734, "step": 42065 }, { "epoch": 0.5127173899796473, "grad_norm": 2.247256347576577, "learning_rate": 2.564720974983964e-06, "loss": 0.6234, "step": 42070 }, { "epoch": 0.5127783262037952, "grad_norm": 2.50804033127366, "learning_rate": 2.564400256574728e-06, "loss": 0.7492, "step": 42075 }, { "epoch": 0.5128392624279429, "grad_norm": 2.451559349561536, "learning_rate": 2.564079538165491e-06, "loss": 0.7527, "step": 42080 }, { "epoch": 0.5129001986520907, "grad_norm": 2.4399842340743265, "learning_rate": 2.563758819756254e-06, "loss": 0.7726, "step": 42085 }, { "epoch": 0.5129611348762385, "grad_norm": 2.2260614464253248, "learning_rate": 2.563438101347018e-06, "loss": 0.7453, "step": 42090 }, { "epoch": 0.5130220711003863, "grad_norm": 2.5031395817314044, "learning_rate": 2.563117382937781e-06, "loss": 0.6723, "step": 42095 }, { "epoch": 0.5130830073245342, "grad_norm": 2.40512228644711, "learning_rate": 2.562796664528544e-06, "loss": 0.7516, "step": 42100 }, { "epoch": 0.513143943548682, "grad_norm": 1.943339835412717, "learning_rate": 2.5624759461193078e-06, "loss": 0.7718, "step": 42105 }, { "epoch": 0.5132048797728298, "grad_norm": 2.414798315130886, "learning_rate": 2.562155227710071e-06, "loss": 0.7528, "step": 42110 }, { "epoch": 0.5132658159969775, "grad_norm": 1.900833039792989, "learning_rate": 2.561834509300834e-06, "loss": 0.7721, "step": 42115 }, { "epoch": 0.5133267522211253, "grad_norm": 2.6578440350486376, "learning_rate": 2.5615137908915973e-06, "loss": 0.7276, "step": 42120 }, { "epoch": 0.5133876884452732, "grad_norm": 2.340998779279312, "learning_rate": 2.5611930724823607e-06, "loss": 0.7207, "step": 42125 }, { "epoch": 0.513448624669421, "grad_norm": 2.0225909815045426, "learning_rate": 2.560872354073124e-06, "loss": 0.6999, "step": 42130 }, { "epoch": 0.5135095608935688, "grad_norm": 2.494714651040793, "learning_rate": 2.560551635663887e-06, "loss": 0.759, "step": 42135 }, { "epoch": 0.5135704971177166, "grad_norm": 2.307332863256799, "learning_rate": 2.5602309172546506e-06, "loss": 0.7294, "step": 42140 }, { "epoch": 0.5136314333418645, "grad_norm": 2.6872168291232468, "learning_rate": 2.559910198845414e-06, "loss": 0.83, "step": 42145 }, { "epoch": 0.5136923695660122, "grad_norm": 2.1359608002403876, "learning_rate": 2.559589480436177e-06, "loss": 0.7602, "step": 42150 }, { "epoch": 0.51375330579016, "grad_norm": 3.663939025609407, "learning_rate": 2.559268762026941e-06, "loss": 0.7836, "step": 42155 }, { "epoch": 0.5138142420143078, "grad_norm": 2.7024856047954446, "learning_rate": 2.558948043617704e-06, "loss": 0.7397, "step": 42160 }, { "epoch": 0.5138751782384556, "grad_norm": 2.4236888124529536, "learning_rate": 2.558627325208467e-06, "loss": 0.7236, "step": 42165 }, { "epoch": 0.5139361144626035, "grad_norm": 4.186219341769754, "learning_rate": 2.558306606799231e-06, "loss": 0.7291, "step": 42170 }, { "epoch": 0.5139970506867513, "grad_norm": 4.280628678604985, "learning_rate": 2.557985888389994e-06, "loss": 0.7502, "step": 42175 }, { "epoch": 0.5140579869108991, "grad_norm": 2.856481195603278, "learning_rate": 2.557665169980757e-06, "loss": 0.6983, "step": 42180 }, { "epoch": 0.5141189231350468, "grad_norm": 2.2156766487042434, "learning_rate": 2.5573444515715207e-06, "loss": 0.781, "step": 42185 }, { "epoch": 0.5141798593591946, "grad_norm": 2.2568060779727563, "learning_rate": 2.5570237331622837e-06, "loss": 0.7476, "step": 42190 }, { "epoch": 0.5142407955833425, "grad_norm": 2.5452722707169477, "learning_rate": 2.5567030147530467e-06, "loss": 0.7508, "step": 42195 }, { "epoch": 0.5143017318074903, "grad_norm": 2.2371760384041353, "learning_rate": 2.55638229634381e-06, "loss": 0.7446, "step": 42200 }, { "epoch": 0.5143626680316381, "grad_norm": 2.5061212550763896, "learning_rate": 2.5560615779345736e-06, "loss": 0.6891, "step": 42205 }, { "epoch": 0.5144236042557859, "grad_norm": 2.469409051871475, "learning_rate": 2.555740859525337e-06, "loss": 0.7667, "step": 42210 }, { "epoch": 0.5144845404799337, "grad_norm": 2.6063623450218434, "learning_rate": 2.5554201411161e-06, "loss": 0.684, "step": 42215 }, { "epoch": 0.5145454767040815, "grad_norm": 2.390496754460807, "learning_rate": 2.5550994227068635e-06, "loss": 0.8061, "step": 42220 }, { "epoch": 0.5146064129282293, "grad_norm": 2.486366390775374, "learning_rate": 2.554778704297627e-06, "loss": 0.7663, "step": 42225 }, { "epoch": 0.5146673491523771, "grad_norm": 2.5256362451084735, "learning_rate": 2.55445798588839e-06, "loss": 0.7313, "step": 42230 }, { "epoch": 0.5147282853765249, "grad_norm": 3.486216438749931, "learning_rate": 2.554137267479154e-06, "loss": 0.7941, "step": 42235 }, { "epoch": 0.5147892216006728, "grad_norm": 2.72131279804281, "learning_rate": 2.553816549069917e-06, "loss": 0.7609, "step": 42240 }, { "epoch": 0.5148501578248206, "grad_norm": 2.167127395021028, "learning_rate": 2.55349583066068e-06, "loss": 0.7994, "step": 42245 }, { "epoch": 0.5149110940489684, "grad_norm": 3.03086465923001, "learning_rate": 2.5531751122514437e-06, "loss": 0.8121, "step": 42250 }, { "epoch": 0.5149720302731161, "grad_norm": 2.709341038197963, "learning_rate": 2.5528543938422068e-06, "loss": 0.7808, "step": 42255 }, { "epoch": 0.5150329664972639, "grad_norm": 2.4703576120280246, "learning_rate": 2.5525336754329698e-06, "loss": 0.7698, "step": 42260 }, { "epoch": 0.5150939027214118, "grad_norm": 2.024007623969224, "learning_rate": 2.5522129570237336e-06, "loss": 0.7771, "step": 42265 }, { "epoch": 0.5151548389455596, "grad_norm": 2.1377987599786534, "learning_rate": 2.5518922386144967e-06, "loss": 0.6631, "step": 42270 }, { "epoch": 0.5152157751697074, "grad_norm": 2.9047571453626255, "learning_rate": 2.55157152020526e-06, "loss": 0.7036, "step": 42275 }, { "epoch": 0.5152767113938552, "grad_norm": 2.322271321268994, "learning_rate": 2.551250801796023e-06, "loss": 0.7745, "step": 42280 }, { "epoch": 0.515337647618003, "grad_norm": 2.0551662669099824, "learning_rate": 2.5509300833867866e-06, "loss": 0.769, "step": 42285 }, { "epoch": 0.5153985838421508, "grad_norm": 2.9486972724205294, "learning_rate": 2.55060936497755e-06, "loss": 0.7851, "step": 42290 }, { "epoch": 0.5154595200662986, "grad_norm": 2.456467617806397, "learning_rate": 2.550288646568313e-06, "loss": 0.768, "step": 42295 }, { "epoch": 0.5155204562904464, "grad_norm": 2.3127720687960216, "learning_rate": 2.549967928159077e-06, "loss": 0.7163, "step": 42300 }, { "epoch": 0.5155813925145942, "grad_norm": 2.563281159699252, "learning_rate": 2.54964720974984e-06, "loss": 0.7871, "step": 42305 }, { "epoch": 0.515642328738742, "grad_norm": 2.542742269580505, "learning_rate": 2.549326491340603e-06, "loss": 0.7165, "step": 42310 }, { "epoch": 0.5157032649628899, "grad_norm": 3.1095349753820543, "learning_rate": 2.5490057729313668e-06, "loss": 0.7913, "step": 42315 }, { "epoch": 0.5157642011870377, "grad_norm": 2.496658795792607, "learning_rate": 2.54868505452213e-06, "loss": 0.7347, "step": 42320 }, { "epoch": 0.5158251374111854, "grad_norm": 2.3050753635785606, "learning_rate": 2.548364336112893e-06, "loss": 0.8055, "step": 42325 }, { "epoch": 0.5158860736353332, "grad_norm": 2.508411297318664, "learning_rate": 2.5480436177036567e-06, "loss": 0.7646, "step": 42330 }, { "epoch": 0.515947009859481, "grad_norm": 2.794100370008175, "learning_rate": 2.5477228992944197e-06, "loss": 0.7054, "step": 42335 }, { "epoch": 0.5160079460836289, "grad_norm": 2.5477713606544277, "learning_rate": 2.5474021808851827e-06, "loss": 0.7366, "step": 42340 }, { "epoch": 0.5160688823077767, "grad_norm": 3.636330051683813, "learning_rate": 2.5470814624759466e-06, "loss": 0.7728, "step": 42345 }, { "epoch": 0.5161298185319245, "grad_norm": 2.431529209088287, "learning_rate": 2.5467607440667096e-06, "loss": 0.7707, "step": 42350 }, { "epoch": 0.5161907547560723, "grad_norm": 2.550882624289606, "learning_rate": 2.546440025657473e-06, "loss": 0.7992, "step": 42355 }, { "epoch": 0.5162516909802201, "grad_norm": 2.1449131924523566, "learning_rate": 2.5461193072482365e-06, "loss": 0.8157, "step": 42360 }, { "epoch": 0.5163126272043679, "grad_norm": 2.217926268953928, "learning_rate": 2.5457985888389995e-06, "loss": 0.7366, "step": 42365 }, { "epoch": 0.5163735634285157, "grad_norm": 2.387327543632429, "learning_rate": 2.545477870429763e-06, "loss": 0.7362, "step": 42370 }, { "epoch": 0.5164344996526635, "grad_norm": 2.0058711190672645, "learning_rate": 2.545157152020526e-06, "loss": 0.7655, "step": 42375 }, { "epoch": 0.5164954358768113, "grad_norm": 1.9387023647601505, "learning_rate": 2.54483643361129e-06, "loss": 0.7158, "step": 42380 }, { "epoch": 0.5165563721009592, "grad_norm": 2.5011514269934136, "learning_rate": 2.544515715202053e-06, "loss": 0.7965, "step": 42385 }, { "epoch": 0.516617308325107, "grad_norm": 2.5492562070594222, "learning_rate": 2.544194996792816e-06, "loss": 0.7096, "step": 42390 }, { "epoch": 0.5166782445492547, "grad_norm": 2.4914491029685473, "learning_rate": 2.5438742783835797e-06, "loss": 0.7017, "step": 42395 }, { "epoch": 0.5167391807734025, "grad_norm": 2.288577565172415, "learning_rate": 2.5435535599743427e-06, "loss": 0.7322, "step": 42400 }, { "epoch": 0.5168001169975504, "grad_norm": 2.463186434291419, "learning_rate": 2.5432328415651057e-06, "loss": 0.7434, "step": 42405 }, { "epoch": 0.5168610532216982, "grad_norm": 2.3467335619024445, "learning_rate": 2.5429121231558696e-06, "loss": 0.7356, "step": 42410 }, { "epoch": 0.516921989445846, "grad_norm": 2.3666520291183413, "learning_rate": 2.5425914047466326e-06, "loss": 0.8027, "step": 42415 }, { "epoch": 0.5169829256699938, "grad_norm": 2.6199911904498845, "learning_rate": 2.5422706863373956e-06, "loss": 0.7805, "step": 42420 }, { "epoch": 0.5170438618941416, "grad_norm": 2.0506789380014445, "learning_rate": 2.5419499679281595e-06, "loss": 0.757, "step": 42425 }, { "epoch": 0.5171047981182894, "grad_norm": 2.996024360511518, "learning_rate": 2.5416292495189225e-06, "loss": 0.7409, "step": 42430 }, { "epoch": 0.5171657343424372, "grad_norm": 2.3097925037067, "learning_rate": 2.541308531109686e-06, "loss": 0.7602, "step": 42435 }, { "epoch": 0.517226670566585, "grad_norm": 2.6749391717164115, "learning_rate": 2.5409878127004494e-06, "loss": 0.7595, "step": 42440 }, { "epoch": 0.5172876067907328, "grad_norm": 2.186454454567275, "learning_rate": 2.5406670942912124e-06, "loss": 0.7122, "step": 42445 }, { "epoch": 0.5173485430148806, "grad_norm": 2.2741253764827185, "learning_rate": 2.540346375881976e-06, "loss": 0.7459, "step": 42450 }, { "epoch": 0.5174094792390285, "grad_norm": 4.738603008650348, "learning_rate": 2.540025657472739e-06, "loss": 0.7534, "step": 42455 }, { "epoch": 0.5174704154631763, "grad_norm": 3.0396912244911536, "learning_rate": 2.5397049390635027e-06, "loss": 0.7617, "step": 42460 }, { "epoch": 0.517531351687324, "grad_norm": 2.759648108852288, "learning_rate": 2.5393842206542657e-06, "loss": 0.8037, "step": 42465 }, { "epoch": 0.5175922879114718, "grad_norm": 3.025818314396303, "learning_rate": 2.5390635022450288e-06, "loss": 0.714, "step": 42470 }, { "epoch": 0.5176532241356196, "grad_norm": 2.4699035888961127, "learning_rate": 2.5387427838357926e-06, "loss": 0.6977, "step": 42475 }, { "epoch": 0.5177141603597675, "grad_norm": 3.0521935851811555, "learning_rate": 2.5384220654265556e-06, "loss": 0.897, "step": 42480 }, { "epoch": 0.5177750965839153, "grad_norm": 2.414810935498425, "learning_rate": 2.5381013470173187e-06, "loss": 0.762, "step": 42485 }, { "epoch": 0.5178360328080631, "grad_norm": 3.2734410397622224, "learning_rate": 2.5377806286080825e-06, "loss": 0.7242, "step": 42490 }, { "epoch": 0.5178969690322109, "grad_norm": 2.4303521748645878, "learning_rate": 2.5374599101988455e-06, "loss": 0.6909, "step": 42495 }, { "epoch": 0.5179579052563587, "grad_norm": 2.537941862113925, "learning_rate": 2.5371391917896086e-06, "loss": 0.7719, "step": 42500 }, { "epoch": 0.5180188414805065, "grad_norm": 2.578987210666167, "learning_rate": 2.5368184733803724e-06, "loss": 0.7587, "step": 42505 }, { "epoch": 0.5180797777046543, "grad_norm": 2.612732423931305, "learning_rate": 2.5364977549711354e-06, "loss": 0.7695, "step": 42510 }, { "epoch": 0.5181407139288021, "grad_norm": 2.6010012650101864, "learning_rate": 2.536177036561899e-06, "loss": 0.6916, "step": 42515 }, { "epoch": 0.5182016501529499, "grad_norm": 2.1323514359655427, "learning_rate": 2.5358563181526623e-06, "loss": 0.6673, "step": 42520 }, { "epoch": 0.5182625863770978, "grad_norm": 2.24858477217585, "learning_rate": 2.5355355997434258e-06, "loss": 0.7063, "step": 42525 }, { "epoch": 0.5183235226012456, "grad_norm": 2.2368305215285167, "learning_rate": 2.5352148813341888e-06, "loss": 0.7236, "step": 42530 }, { "epoch": 0.5183844588253933, "grad_norm": 4.0040758088514385, "learning_rate": 2.534894162924952e-06, "loss": 0.7365, "step": 42535 }, { "epoch": 0.5184453950495411, "grad_norm": 2.584518340760783, "learning_rate": 2.5345734445157157e-06, "loss": 0.714, "step": 42540 }, { "epoch": 0.518506331273689, "grad_norm": 2.2372379815550008, "learning_rate": 2.5342527261064787e-06, "loss": 0.7508, "step": 42545 }, { "epoch": 0.5185672674978368, "grad_norm": 2.655207152932814, "learning_rate": 2.5339320076972417e-06, "loss": 0.6431, "step": 42550 }, { "epoch": 0.5186282037219846, "grad_norm": 3.745772720343849, "learning_rate": 2.5336112892880056e-06, "loss": 0.7747, "step": 42555 }, { "epoch": 0.5186891399461324, "grad_norm": 2.460064067858796, "learning_rate": 2.5332905708787686e-06, "loss": 0.729, "step": 42560 }, { "epoch": 0.5187500761702802, "grad_norm": 3.182313517499214, "learning_rate": 2.5329698524695316e-06, "loss": 0.7515, "step": 42565 }, { "epoch": 0.518811012394428, "grad_norm": 1.8936518314838329, "learning_rate": 2.5326491340602955e-06, "loss": 0.7489, "step": 42570 }, { "epoch": 0.5188719486185758, "grad_norm": 2.164568149045235, "learning_rate": 2.5323284156510585e-06, "loss": 0.7418, "step": 42575 }, { "epoch": 0.5189328848427236, "grad_norm": 4.771746723100975, "learning_rate": 2.532007697241822e-06, "loss": 0.714, "step": 42580 }, { "epoch": 0.5189938210668714, "grad_norm": 2.7293036044679893, "learning_rate": 2.5316869788325853e-06, "loss": 0.7457, "step": 42585 }, { "epoch": 0.5190547572910192, "grad_norm": 2.323143900024594, "learning_rate": 2.5313662604233484e-06, "loss": 0.7889, "step": 42590 }, { "epoch": 0.5191156935151671, "grad_norm": 3.2336150455759407, "learning_rate": 2.531045542014112e-06, "loss": 0.7094, "step": 42595 }, { "epoch": 0.5191766297393149, "grad_norm": 2.2872727170938623, "learning_rate": 2.5307248236048752e-06, "loss": 0.7792, "step": 42600 }, { "epoch": 0.5192375659634626, "grad_norm": 2.418805455456571, "learning_rate": 2.5304041051956387e-06, "loss": 0.7393, "step": 42605 }, { "epoch": 0.5192985021876104, "grad_norm": 2.3997680681590823, "learning_rate": 2.5300833867864017e-06, "loss": 0.8167, "step": 42610 }, { "epoch": 0.5193594384117582, "grad_norm": 2.4421948547277856, "learning_rate": 2.529762668377165e-06, "loss": 0.764, "step": 42615 }, { "epoch": 0.5194203746359061, "grad_norm": 2.0442208972293576, "learning_rate": 2.5294419499679286e-06, "loss": 0.7532, "step": 42620 }, { "epoch": 0.5194813108600539, "grad_norm": 2.568212047653904, "learning_rate": 2.5291212315586916e-06, "loss": 0.7693, "step": 42625 }, { "epoch": 0.5195422470842017, "grad_norm": 2.221982600781647, "learning_rate": 2.5288005131494546e-06, "loss": 0.7915, "step": 42630 }, { "epoch": 0.5196031833083495, "grad_norm": 2.5369460867684164, "learning_rate": 2.5284797947402185e-06, "loss": 0.6983, "step": 42635 }, { "epoch": 0.5196641195324972, "grad_norm": 4.539547874133859, "learning_rate": 2.5281590763309815e-06, "loss": 0.8034, "step": 42640 }, { "epoch": 0.5197250557566451, "grad_norm": 3.009666238748432, "learning_rate": 2.5278383579217445e-06, "loss": 0.8125, "step": 42645 }, { "epoch": 0.5197859919807929, "grad_norm": 2.0323232524344284, "learning_rate": 2.5275176395125084e-06, "loss": 0.683, "step": 42650 }, { "epoch": 0.5198469282049407, "grad_norm": 2.5488912940387904, "learning_rate": 2.5271969211032714e-06, "loss": 0.7666, "step": 42655 }, { "epoch": 0.5199078644290885, "grad_norm": 2.742506826255998, "learning_rate": 2.526876202694035e-06, "loss": 0.796, "step": 42660 }, { "epoch": 0.5199688006532364, "grad_norm": 2.3007582557041437, "learning_rate": 2.5265554842847983e-06, "loss": 0.7183, "step": 42665 }, { "epoch": 0.5200297368773841, "grad_norm": 3.321223148488378, "learning_rate": 2.5262347658755613e-06, "loss": 0.8587, "step": 42670 }, { "epoch": 0.5200906731015319, "grad_norm": 2.272675610188566, "learning_rate": 2.5259140474663247e-06, "loss": 0.7922, "step": 42675 }, { "epoch": 0.5201516093256797, "grad_norm": 2.6343712414320515, "learning_rate": 2.525593329057088e-06, "loss": 0.6941, "step": 42680 }, { "epoch": 0.5202125455498275, "grad_norm": 2.590637377254563, "learning_rate": 2.5252726106478516e-06, "loss": 0.7197, "step": 42685 }, { "epoch": 0.5202734817739754, "grad_norm": 1.998226444742505, "learning_rate": 2.5249518922386146e-06, "loss": 0.7491, "step": 42690 }, { "epoch": 0.5203344179981232, "grad_norm": 2.5602121667257394, "learning_rate": 2.524631173829378e-06, "loss": 0.7792, "step": 42695 }, { "epoch": 0.520395354222271, "grad_norm": 2.521988121885255, "learning_rate": 2.5243104554201415e-06, "loss": 0.7178, "step": 42700 }, { "epoch": 0.5204562904464187, "grad_norm": 2.396314442913947, "learning_rate": 2.5239897370109045e-06, "loss": 0.7658, "step": 42705 }, { "epoch": 0.5205172266705665, "grad_norm": 2.4940467003919515, "learning_rate": 2.5236690186016675e-06, "loss": 0.7511, "step": 42710 }, { "epoch": 0.5205781628947144, "grad_norm": 2.027249407598078, "learning_rate": 2.5233483001924314e-06, "loss": 0.6913, "step": 42715 }, { "epoch": 0.5206390991188622, "grad_norm": 2.333356629001935, "learning_rate": 2.5230275817831944e-06, "loss": 0.8366, "step": 42720 }, { "epoch": 0.52070003534301, "grad_norm": 1.977857584027657, "learning_rate": 2.5227068633739574e-06, "loss": 0.6926, "step": 42725 }, { "epoch": 0.5207609715671578, "grad_norm": 2.0039564232178693, "learning_rate": 2.5223861449647213e-06, "loss": 0.6841, "step": 42730 }, { "epoch": 0.5208219077913057, "grad_norm": 2.0375704077157257, "learning_rate": 2.5220654265554843e-06, "loss": 0.7766, "step": 42735 }, { "epoch": 0.5208828440154534, "grad_norm": 2.5553523990633944, "learning_rate": 2.5217447081462478e-06, "loss": 0.752, "step": 42740 }, { "epoch": 0.5209437802396012, "grad_norm": 2.5627110079703477, "learning_rate": 2.521423989737011e-06, "loss": 0.7187, "step": 42745 }, { "epoch": 0.521004716463749, "grad_norm": 2.245692161148221, "learning_rate": 2.5211032713277746e-06, "loss": 0.7317, "step": 42750 }, { "epoch": 0.5210656526878968, "grad_norm": 2.2827399030705595, "learning_rate": 2.5207825529185377e-06, "loss": 0.7379, "step": 42755 }, { "epoch": 0.5211265889120447, "grad_norm": 2.531781403023523, "learning_rate": 2.520461834509301e-06, "loss": 0.7453, "step": 42760 }, { "epoch": 0.5211875251361925, "grad_norm": 1.997335201957895, "learning_rate": 2.5201411161000645e-06, "loss": 0.7573, "step": 42765 }, { "epoch": 0.5212484613603403, "grad_norm": 2.097019680925522, "learning_rate": 2.5198203976908276e-06, "loss": 0.6851, "step": 42770 }, { "epoch": 0.521309397584488, "grad_norm": 2.556973268425627, "learning_rate": 2.5194996792815914e-06, "loss": 0.7465, "step": 42775 }, { "epoch": 0.5213703338086358, "grad_norm": 2.743318637244268, "learning_rate": 2.5191789608723544e-06, "loss": 0.8263, "step": 42780 }, { "epoch": 0.5214312700327837, "grad_norm": 2.0941010206566104, "learning_rate": 2.5188582424631175e-06, "loss": 0.7953, "step": 42785 }, { "epoch": 0.5214922062569315, "grad_norm": 2.197831334197366, "learning_rate": 2.5185375240538805e-06, "loss": 0.7664, "step": 42790 }, { "epoch": 0.5215531424810793, "grad_norm": 2.799951809357387, "learning_rate": 2.5182168056446443e-06, "loss": 0.7343, "step": 42795 }, { "epoch": 0.5216140787052271, "grad_norm": 2.497070029768146, "learning_rate": 2.5178960872354074e-06, "loss": 0.693, "step": 42800 }, { "epoch": 0.521675014929375, "grad_norm": 2.6514731351004506, "learning_rate": 2.517575368826171e-06, "loss": 0.8315, "step": 42805 }, { "epoch": 0.5217359511535227, "grad_norm": 2.5344216748939212, "learning_rate": 2.5172546504169342e-06, "loss": 0.8175, "step": 42810 }, { "epoch": 0.5217968873776705, "grad_norm": 1.930896892461473, "learning_rate": 2.5169339320076972e-06, "loss": 0.6905, "step": 42815 }, { "epoch": 0.5218578236018183, "grad_norm": 2.5370885766673346, "learning_rate": 2.5166132135984607e-06, "loss": 0.7113, "step": 42820 }, { "epoch": 0.5219187598259661, "grad_norm": 2.8298735369067627, "learning_rate": 2.516292495189224e-06, "loss": 0.6962, "step": 42825 }, { "epoch": 0.521979696050114, "grad_norm": 2.687453331038393, "learning_rate": 2.5159717767799876e-06, "loss": 0.7451, "step": 42830 }, { "epoch": 0.5220406322742618, "grad_norm": 2.0486662217327174, "learning_rate": 2.5156510583707506e-06, "loss": 0.7578, "step": 42835 }, { "epoch": 0.5221015684984096, "grad_norm": 2.8215208660835804, "learning_rate": 2.515330339961514e-06, "loss": 0.8015, "step": 42840 }, { "epoch": 0.5221625047225573, "grad_norm": 2.588410361623115, "learning_rate": 2.5150096215522775e-06, "loss": 0.7662, "step": 42845 }, { "epoch": 0.5222234409467051, "grad_norm": 2.7054383797129367, "learning_rate": 2.5146889031430405e-06, "loss": 0.7256, "step": 42850 }, { "epoch": 0.522284377170853, "grad_norm": 2.370514483137446, "learning_rate": 2.5143681847338043e-06, "loss": 0.7419, "step": 42855 }, { "epoch": 0.5223453133950008, "grad_norm": 2.369895705244141, "learning_rate": 2.5140474663245674e-06, "loss": 0.6769, "step": 42860 }, { "epoch": 0.5224062496191486, "grad_norm": 2.5390998551142325, "learning_rate": 2.5137267479153304e-06, "loss": 0.7698, "step": 42865 }, { "epoch": 0.5224671858432964, "grad_norm": 2.498762350976115, "learning_rate": 2.5134060295060934e-06, "loss": 0.7775, "step": 42870 }, { "epoch": 0.5225281220674443, "grad_norm": 3.2149925177752197, "learning_rate": 2.5130853110968573e-06, "loss": 0.8244, "step": 42875 }, { "epoch": 0.522589058291592, "grad_norm": 2.1802604116223994, "learning_rate": 2.5127645926876203e-06, "loss": 0.7734, "step": 42880 }, { "epoch": 0.5226499945157398, "grad_norm": 2.3232919814753688, "learning_rate": 2.5124438742783837e-06, "loss": 0.7173, "step": 42885 }, { "epoch": 0.5227109307398876, "grad_norm": 3.347153410832636, "learning_rate": 2.512123155869147e-06, "loss": 0.7453, "step": 42890 }, { "epoch": 0.5227718669640354, "grad_norm": 2.1945974944575424, "learning_rate": 2.51180243745991e-06, "loss": 0.7194, "step": 42895 }, { "epoch": 0.5228328031881833, "grad_norm": 2.527417657687188, "learning_rate": 2.5114817190506736e-06, "loss": 0.7252, "step": 42900 }, { "epoch": 0.5228937394123311, "grad_norm": 2.7362300212245025, "learning_rate": 2.511161000641437e-06, "loss": 0.7547, "step": 42905 }, { "epoch": 0.5229546756364789, "grad_norm": 2.570861395696515, "learning_rate": 2.5108402822322005e-06, "loss": 0.7896, "step": 42910 }, { "epoch": 0.5230156118606266, "grad_norm": 2.227878157299659, "learning_rate": 2.5105195638229635e-06, "loss": 0.6994, "step": 42915 }, { "epoch": 0.5230765480847744, "grad_norm": 3.501334669351575, "learning_rate": 2.510198845413727e-06, "loss": 0.676, "step": 42920 }, { "epoch": 0.5231374843089223, "grad_norm": 2.2054966565754426, "learning_rate": 2.5098781270044904e-06, "loss": 0.7639, "step": 42925 }, { "epoch": 0.5231984205330701, "grad_norm": 3.1371991956254006, "learning_rate": 2.5095574085952534e-06, "loss": 0.716, "step": 42930 }, { "epoch": 0.5232593567572179, "grad_norm": 2.6330082779547253, "learning_rate": 2.5092366901860173e-06, "loss": 0.7691, "step": 42935 }, { "epoch": 0.5233202929813657, "grad_norm": 2.2744072705964196, "learning_rate": 2.5089159717767803e-06, "loss": 0.8007, "step": 42940 }, { "epoch": 0.5233812292055136, "grad_norm": 2.690598777297629, "learning_rate": 2.5085952533675433e-06, "loss": 0.7692, "step": 42945 }, { "epoch": 0.5234421654296613, "grad_norm": 2.072500262237548, "learning_rate": 2.508274534958307e-06, "loss": 0.7575, "step": 42950 }, { "epoch": 0.5235031016538091, "grad_norm": 2.9620499649437395, "learning_rate": 2.50795381654907e-06, "loss": 0.778, "step": 42955 }, { "epoch": 0.5235640378779569, "grad_norm": 2.2526806325860527, "learning_rate": 2.507633098139833e-06, "loss": 0.7369, "step": 42960 }, { "epoch": 0.5236249741021047, "grad_norm": 3.3083516847799754, "learning_rate": 2.5073123797305966e-06, "loss": 0.7591, "step": 42965 }, { "epoch": 0.5236859103262526, "grad_norm": 2.280190184851751, "learning_rate": 2.50699166132136e-06, "loss": 0.6927, "step": 42970 }, { "epoch": 0.5237468465504004, "grad_norm": 2.4902702170288245, "learning_rate": 2.5066709429121235e-06, "loss": 0.6613, "step": 42975 }, { "epoch": 0.5238077827745482, "grad_norm": 2.1161358946568023, "learning_rate": 2.5063502245028865e-06, "loss": 0.7774, "step": 42980 }, { "epoch": 0.5238687189986959, "grad_norm": 2.7528543302089243, "learning_rate": 2.50602950609365e-06, "loss": 0.8651, "step": 42985 }, { "epoch": 0.5239296552228437, "grad_norm": 3.1569876949906126, "learning_rate": 2.5057087876844134e-06, "loss": 0.7903, "step": 42990 }, { "epoch": 0.5239905914469916, "grad_norm": 2.4914880353413347, "learning_rate": 2.5053880692751764e-06, "loss": 0.6895, "step": 42995 }, { "epoch": 0.5240515276711394, "grad_norm": 2.44746543812504, "learning_rate": 2.5050673508659403e-06, "loss": 0.8134, "step": 43000 }, { "epoch": 0.5241124638952872, "grad_norm": 2.061558887118136, "learning_rate": 2.5047466324567033e-06, "loss": 0.7308, "step": 43005 }, { "epoch": 0.524173400119435, "grad_norm": 2.315397265191997, "learning_rate": 2.5044259140474663e-06, "loss": 0.72, "step": 43010 }, { "epoch": 0.5242343363435829, "grad_norm": 1.9489374283090295, "learning_rate": 2.50410519563823e-06, "loss": 0.7724, "step": 43015 }, { "epoch": 0.5242952725677306, "grad_norm": 2.379364998593607, "learning_rate": 2.5037844772289932e-06, "loss": 0.7594, "step": 43020 }, { "epoch": 0.5243562087918784, "grad_norm": 2.570587038172744, "learning_rate": 2.5034637588197562e-06, "loss": 0.7333, "step": 43025 }, { "epoch": 0.5244171450160262, "grad_norm": 2.368024982847063, "learning_rate": 2.50314304041052e-06, "loss": 0.7687, "step": 43030 }, { "epoch": 0.524478081240174, "grad_norm": 2.539302489900027, "learning_rate": 2.502822322001283e-06, "loss": 0.831, "step": 43035 }, { "epoch": 0.5245390174643219, "grad_norm": 2.068231954779225, "learning_rate": 2.502501603592046e-06, "loss": 0.7638, "step": 43040 }, { "epoch": 0.5245999536884697, "grad_norm": 2.1615438687154716, "learning_rate": 2.5021808851828096e-06, "loss": 0.7581, "step": 43045 }, { "epoch": 0.5246608899126175, "grad_norm": 2.867407235474603, "learning_rate": 2.501860166773573e-06, "loss": 0.7135, "step": 43050 }, { "epoch": 0.5247218261367652, "grad_norm": 2.3388108502305784, "learning_rate": 2.5015394483643365e-06, "loss": 0.7996, "step": 43055 }, { "epoch": 0.524782762360913, "grad_norm": 2.616588666256273, "learning_rate": 2.5012187299550995e-06, "loss": 0.7803, "step": 43060 }, { "epoch": 0.5248436985850609, "grad_norm": 3.0983243007219277, "learning_rate": 2.500898011545863e-06, "loss": 0.6924, "step": 43065 }, { "epoch": 0.5249046348092087, "grad_norm": 2.4448823445999883, "learning_rate": 2.5005772931366264e-06, "loss": 0.7678, "step": 43070 }, { "epoch": 0.5249655710333565, "grad_norm": 2.7020230868816455, "learning_rate": 2.5002565747273894e-06, "loss": 0.8144, "step": 43075 }, { "epoch": 0.5250265072575043, "grad_norm": 2.614330293645718, "learning_rate": 2.499935856318153e-06, "loss": 0.8013, "step": 43080 }, { "epoch": 0.5250874434816521, "grad_norm": 2.5060264961086918, "learning_rate": 2.4996151379089162e-06, "loss": 0.7482, "step": 43085 }, { "epoch": 0.5251483797057999, "grad_norm": 2.6925173500452337, "learning_rate": 2.4992944194996797e-06, "loss": 0.7155, "step": 43090 }, { "epoch": 0.5252093159299477, "grad_norm": 2.7143599470277904, "learning_rate": 2.4989737010904427e-06, "loss": 0.7461, "step": 43095 }, { "epoch": 0.5252702521540955, "grad_norm": 2.1278449408399225, "learning_rate": 2.498652982681206e-06, "loss": 0.7645, "step": 43100 }, { "epoch": 0.5253311883782433, "grad_norm": 3.392090333606307, "learning_rate": 2.498332264271969e-06, "loss": 0.7805, "step": 43105 }, { "epoch": 0.5253921246023912, "grad_norm": 2.328837273529321, "learning_rate": 2.4980115458627326e-06, "loss": 0.7252, "step": 43110 }, { "epoch": 0.525453060826539, "grad_norm": 2.418085137901575, "learning_rate": 2.497690827453496e-06, "loss": 0.7547, "step": 43115 }, { "epoch": 0.5255139970506868, "grad_norm": 2.316706479343837, "learning_rate": 2.497370109044259e-06, "loss": 0.7764, "step": 43120 }, { "epoch": 0.5255749332748345, "grad_norm": 3.1343273090964745, "learning_rate": 2.4970493906350225e-06, "loss": 0.8007, "step": 43125 }, { "epoch": 0.5256358694989823, "grad_norm": 2.128315171313216, "learning_rate": 2.496728672225786e-06, "loss": 0.7511, "step": 43130 }, { "epoch": 0.5256968057231302, "grad_norm": 2.0670046024110516, "learning_rate": 2.4964079538165494e-06, "loss": 0.7418, "step": 43135 }, { "epoch": 0.525757741947278, "grad_norm": 2.334374150548118, "learning_rate": 2.496087235407313e-06, "loss": 0.7185, "step": 43140 }, { "epoch": 0.5258186781714258, "grad_norm": 3.5025387294339008, "learning_rate": 2.495766516998076e-06, "loss": 0.8042, "step": 43145 }, { "epoch": 0.5258796143955736, "grad_norm": 2.7348559457264985, "learning_rate": 2.4954457985888393e-06, "loss": 0.7098, "step": 43150 }, { "epoch": 0.5259405506197214, "grad_norm": 2.2139809053153714, "learning_rate": 2.4951250801796027e-06, "loss": 0.7339, "step": 43155 }, { "epoch": 0.5260014868438692, "grad_norm": 2.578570399765952, "learning_rate": 2.4948043617703657e-06, "loss": 0.779, "step": 43160 }, { "epoch": 0.526062423068017, "grad_norm": 2.305971766474099, "learning_rate": 2.494483643361129e-06, "loss": 0.8051, "step": 43165 }, { "epoch": 0.5261233592921648, "grad_norm": 2.3405576921787383, "learning_rate": 2.4941629249518926e-06, "loss": 0.7344, "step": 43170 }, { "epoch": 0.5261842955163126, "grad_norm": 2.9367957174879846, "learning_rate": 2.4938422065426556e-06, "loss": 0.7874, "step": 43175 }, { "epoch": 0.5262452317404605, "grad_norm": 2.524054633969713, "learning_rate": 2.493521488133419e-06, "loss": 0.7096, "step": 43180 }, { "epoch": 0.5263061679646083, "grad_norm": 2.293867862311031, "learning_rate": 2.493200769724182e-06, "loss": 0.8008, "step": 43185 }, { "epoch": 0.5263671041887561, "grad_norm": 2.938689818116435, "learning_rate": 2.4928800513149455e-06, "loss": 0.8178, "step": 43190 }, { "epoch": 0.5264280404129038, "grad_norm": 2.8236790592260186, "learning_rate": 2.492559332905709e-06, "loss": 0.824, "step": 43195 }, { "epoch": 0.5264889766370516, "grad_norm": 2.3583288787080763, "learning_rate": 2.492238614496472e-06, "loss": 0.757, "step": 43200 }, { "epoch": 0.5265499128611995, "grad_norm": 2.496040209175544, "learning_rate": 2.4919178960872354e-06, "loss": 0.7283, "step": 43205 }, { "epoch": 0.5266108490853473, "grad_norm": 2.4458567966620737, "learning_rate": 2.491597177677999e-06, "loss": 0.7081, "step": 43210 }, { "epoch": 0.5266717853094951, "grad_norm": 2.619742425372386, "learning_rate": 2.4912764592687623e-06, "loss": 0.7886, "step": 43215 }, { "epoch": 0.5267327215336429, "grad_norm": 2.8096705718747335, "learning_rate": 2.4909557408595257e-06, "loss": 0.7421, "step": 43220 }, { "epoch": 0.5267936577577907, "grad_norm": 2.2365837537000557, "learning_rate": 2.490635022450289e-06, "loss": 0.6948, "step": 43225 }, { "epoch": 0.5268545939819385, "grad_norm": 2.6141610894390754, "learning_rate": 2.490314304041052e-06, "loss": 0.7625, "step": 43230 }, { "epoch": 0.5269155302060863, "grad_norm": 2.542047584683843, "learning_rate": 2.4899935856318156e-06, "loss": 0.7682, "step": 43235 }, { "epoch": 0.5269764664302341, "grad_norm": 2.321040750612659, "learning_rate": 2.4896728672225787e-06, "loss": 0.6974, "step": 43240 }, { "epoch": 0.5270374026543819, "grad_norm": 2.7413267875042466, "learning_rate": 2.489352148813342e-06, "loss": 0.7757, "step": 43245 }, { "epoch": 0.5270983388785297, "grad_norm": 2.385197114566601, "learning_rate": 2.4890314304041055e-06, "loss": 0.7549, "step": 43250 }, { "epoch": 0.5271592751026776, "grad_norm": 2.428512063169227, "learning_rate": 2.4887107119948686e-06, "loss": 0.7991, "step": 43255 }, { "epoch": 0.5272202113268254, "grad_norm": 2.298490516067313, "learning_rate": 2.488389993585632e-06, "loss": 0.7019, "step": 43260 }, { "epoch": 0.5272811475509731, "grad_norm": 2.18812073993775, "learning_rate": 2.4880692751763954e-06, "loss": 0.8168, "step": 43265 }, { "epoch": 0.5273420837751209, "grad_norm": 2.2855861873449226, "learning_rate": 2.4877485567671585e-06, "loss": 0.7318, "step": 43270 }, { "epoch": 0.5274030199992688, "grad_norm": 2.146892996598961, "learning_rate": 2.487427838357922e-06, "loss": 0.7096, "step": 43275 }, { "epoch": 0.5274639562234166, "grad_norm": 2.2715988964882206, "learning_rate": 2.4871071199486853e-06, "loss": 0.7248, "step": 43280 }, { "epoch": 0.5275248924475644, "grad_norm": 2.082026684153286, "learning_rate": 2.4867864015394484e-06, "loss": 0.6975, "step": 43285 }, { "epoch": 0.5275858286717122, "grad_norm": 2.327774747321211, "learning_rate": 2.486465683130212e-06, "loss": 0.7172, "step": 43290 }, { "epoch": 0.52764676489586, "grad_norm": 2.298861879396072, "learning_rate": 2.4861449647209752e-06, "loss": 0.7203, "step": 43295 }, { "epoch": 0.5277077011200078, "grad_norm": 2.272406509568766, "learning_rate": 2.4858242463117387e-06, "loss": 0.7324, "step": 43300 }, { "epoch": 0.5277686373441556, "grad_norm": 3.0638531303289325, "learning_rate": 2.485503527902502e-06, "loss": 0.7068, "step": 43305 }, { "epoch": 0.5278295735683034, "grad_norm": 2.4198865354508934, "learning_rate": 2.485182809493265e-06, "loss": 0.7456, "step": 43310 }, { "epoch": 0.5278905097924512, "grad_norm": 2.4208960454967885, "learning_rate": 2.4848620910840286e-06, "loss": 0.7485, "step": 43315 }, { "epoch": 0.527951446016599, "grad_norm": 2.624356478087, "learning_rate": 2.4845413726747916e-06, "loss": 0.7083, "step": 43320 }, { "epoch": 0.5280123822407469, "grad_norm": 2.570837495378389, "learning_rate": 2.484220654265555e-06, "loss": 0.8057, "step": 43325 }, { "epoch": 0.5280733184648947, "grad_norm": 3.622850466444668, "learning_rate": 2.4838999358563185e-06, "loss": 0.7046, "step": 43330 }, { "epoch": 0.5281342546890424, "grad_norm": 2.7977226019165324, "learning_rate": 2.4835792174470815e-06, "loss": 0.7989, "step": 43335 }, { "epoch": 0.5281951909131902, "grad_norm": 2.2669561315781768, "learning_rate": 2.483258499037845e-06, "loss": 0.7204, "step": 43340 }, { "epoch": 0.528256127137338, "grad_norm": 2.7864587688201845, "learning_rate": 2.4829377806286084e-06, "loss": 0.7763, "step": 43345 }, { "epoch": 0.5283170633614859, "grad_norm": 2.196497838368746, "learning_rate": 2.4826170622193714e-06, "loss": 0.6442, "step": 43350 }, { "epoch": 0.5283779995856337, "grad_norm": 2.8330621223278274, "learning_rate": 2.482296343810135e-06, "loss": 0.7281, "step": 43355 }, { "epoch": 0.5284389358097815, "grad_norm": 2.2679065641596594, "learning_rate": 2.4819756254008983e-06, "loss": 0.7389, "step": 43360 }, { "epoch": 0.5284998720339293, "grad_norm": 2.2037628811702294, "learning_rate": 2.4816549069916617e-06, "loss": 0.7665, "step": 43365 }, { "epoch": 0.528560808258077, "grad_norm": 2.2761548359107047, "learning_rate": 2.4813341885824247e-06, "loss": 0.7347, "step": 43370 }, { "epoch": 0.5286217444822249, "grad_norm": 2.5576751891632883, "learning_rate": 2.481013470173188e-06, "loss": 0.8576, "step": 43375 }, { "epoch": 0.5286826807063727, "grad_norm": 2.322283882749212, "learning_rate": 2.4806927517639516e-06, "loss": 0.7152, "step": 43380 }, { "epoch": 0.5287436169305205, "grad_norm": 3.095638416349448, "learning_rate": 2.480372033354715e-06, "loss": 0.7433, "step": 43385 }, { "epoch": 0.5288045531546683, "grad_norm": 2.2880898196589463, "learning_rate": 2.480051314945478e-06, "loss": 0.7551, "step": 43390 }, { "epoch": 0.5288654893788162, "grad_norm": 2.6135096180009625, "learning_rate": 2.4797305965362415e-06, "loss": 0.7193, "step": 43395 }, { "epoch": 0.528926425602964, "grad_norm": 2.082722550687297, "learning_rate": 2.4794098781270045e-06, "loss": 0.6993, "step": 43400 }, { "epoch": 0.5289873618271117, "grad_norm": 3.30204241282939, "learning_rate": 2.479089159717768e-06, "loss": 0.7464, "step": 43405 }, { "epoch": 0.5290482980512595, "grad_norm": 2.429020525880321, "learning_rate": 2.4787684413085314e-06, "loss": 0.7252, "step": 43410 }, { "epoch": 0.5291092342754073, "grad_norm": 2.7062152344594916, "learning_rate": 2.4784477228992944e-06, "loss": 0.7346, "step": 43415 }, { "epoch": 0.5291701704995552, "grad_norm": 2.322973072717932, "learning_rate": 2.478127004490058e-06, "loss": 0.717, "step": 43420 }, { "epoch": 0.529231106723703, "grad_norm": 3.7564947560701194, "learning_rate": 2.4778062860808213e-06, "loss": 0.7153, "step": 43425 }, { "epoch": 0.5292920429478508, "grad_norm": 2.752580869287279, "learning_rate": 2.4774855676715843e-06, "loss": 0.7448, "step": 43430 }, { "epoch": 0.5293529791719986, "grad_norm": 2.761706867638025, "learning_rate": 2.4771648492623478e-06, "loss": 0.753, "step": 43435 }, { "epoch": 0.5294139153961464, "grad_norm": 2.8613073993165137, "learning_rate": 2.476844130853111e-06, "loss": 0.7995, "step": 43440 }, { "epoch": 0.5294748516202942, "grad_norm": 2.5732042987118975, "learning_rate": 2.4765234124438746e-06, "loss": 0.7179, "step": 43445 }, { "epoch": 0.529535787844442, "grad_norm": 2.2625264729217163, "learning_rate": 2.476202694034638e-06, "loss": 0.6798, "step": 43450 }, { "epoch": 0.5295967240685898, "grad_norm": 4.083513855716167, "learning_rate": 2.475881975625401e-06, "loss": 0.7337, "step": 43455 }, { "epoch": 0.5296576602927376, "grad_norm": 2.442990913339741, "learning_rate": 2.4755612572161645e-06, "loss": 0.7196, "step": 43460 }, { "epoch": 0.5297185965168855, "grad_norm": 2.468211079739411, "learning_rate": 2.475240538806928e-06, "loss": 0.8031, "step": 43465 }, { "epoch": 0.5297795327410333, "grad_norm": 2.5518880307698035, "learning_rate": 2.474919820397691e-06, "loss": 0.7002, "step": 43470 }, { "epoch": 0.529840468965181, "grad_norm": 2.69357800356295, "learning_rate": 2.4745991019884544e-06, "loss": 0.765, "step": 43475 }, { "epoch": 0.5299014051893288, "grad_norm": 2.370923768030495, "learning_rate": 2.4742783835792174e-06, "loss": 0.6912, "step": 43480 }, { "epoch": 0.5299623414134766, "grad_norm": 3.1072620471901247, "learning_rate": 2.473957665169981e-06, "loss": 0.7813, "step": 43485 }, { "epoch": 0.5300232776376245, "grad_norm": 2.163187153851281, "learning_rate": 2.4736369467607443e-06, "loss": 0.7814, "step": 43490 }, { "epoch": 0.5300842138617723, "grad_norm": 2.915196228017552, "learning_rate": 2.4733162283515073e-06, "loss": 0.7873, "step": 43495 }, { "epoch": 0.5301451500859201, "grad_norm": 2.3843929099098236, "learning_rate": 2.4729955099422708e-06, "loss": 0.7741, "step": 43500 }, { "epoch": 0.5302060863100679, "grad_norm": 2.3410966189047318, "learning_rate": 2.4726747915330342e-06, "loss": 0.7343, "step": 43505 }, { "epoch": 0.5302670225342156, "grad_norm": 2.1873483665113826, "learning_rate": 2.4723540731237972e-06, "loss": 0.8299, "step": 43510 }, { "epoch": 0.5303279587583635, "grad_norm": 2.911012657477998, "learning_rate": 2.4720333547145607e-06, "loss": 0.7375, "step": 43515 }, { "epoch": 0.5303888949825113, "grad_norm": 2.4271817994530274, "learning_rate": 2.471712636305324e-06, "loss": 0.7208, "step": 43520 }, { "epoch": 0.5304498312066591, "grad_norm": 2.810918652413079, "learning_rate": 2.4713919178960876e-06, "loss": 0.7813, "step": 43525 }, { "epoch": 0.5305107674308069, "grad_norm": 2.3427066740351665, "learning_rate": 2.471071199486851e-06, "loss": 0.7163, "step": 43530 }, { "epoch": 0.5305717036549548, "grad_norm": 2.8954184984395277, "learning_rate": 2.470750481077614e-06, "loss": 0.7663, "step": 43535 }, { "epoch": 0.5306326398791026, "grad_norm": 2.281587744411008, "learning_rate": 2.4704297626683775e-06, "loss": 0.7546, "step": 43540 }, { "epoch": 0.5306935761032503, "grad_norm": 2.1231680970559927, "learning_rate": 2.470109044259141e-06, "loss": 0.678, "step": 43545 }, { "epoch": 0.5307545123273981, "grad_norm": 2.885573290393607, "learning_rate": 2.469788325849904e-06, "loss": 0.7694, "step": 43550 }, { "epoch": 0.5308154485515459, "grad_norm": 2.3558180015118118, "learning_rate": 2.4694676074406674e-06, "loss": 0.7658, "step": 43555 }, { "epoch": 0.5308763847756938, "grad_norm": 2.7007915157065012, "learning_rate": 2.469146889031431e-06, "loss": 0.7465, "step": 43560 }, { "epoch": 0.5309373209998416, "grad_norm": 2.6335226070519275, "learning_rate": 2.468826170622194e-06, "loss": 0.7537, "step": 43565 }, { "epoch": 0.5309982572239894, "grad_norm": 2.438820780203668, "learning_rate": 2.4685054522129573e-06, "loss": 0.7131, "step": 43570 }, { "epoch": 0.5310591934481372, "grad_norm": 2.7318122062548564, "learning_rate": 2.4681847338037203e-06, "loss": 0.6796, "step": 43575 }, { "epoch": 0.531120129672285, "grad_norm": 2.275541321995652, "learning_rate": 2.4678640153944837e-06, "loss": 0.7602, "step": 43580 }, { "epoch": 0.5311810658964328, "grad_norm": 2.4385251996136272, "learning_rate": 2.467543296985247e-06, "loss": 0.6356, "step": 43585 }, { "epoch": 0.5312420021205806, "grad_norm": 3.033385680575292, "learning_rate": 2.4672225785760106e-06, "loss": 0.7605, "step": 43590 }, { "epoch": 0.5313029383447284, "grad_norm": 2.483110538603155, "learning_rate": 2.4669018601667736e-06, "loss": 0.7753, "step": 43595 }, { "epoch": 0.5313638745688762, "grad_norm": 2.7127904707696295, "learning_rate": 2.466581141757537e-06, "loss": 0.7156, "step": 43600 }, { "epoch": 0.5314248107930241, "grad_norm": 2.5059919031076534, "learning_rate": 2.4662604233483005e-06, "loss": 0.7674, "step": 43605 }, { "epoch": 0.5314857470171719, "grad_norm": 2.509298215255698, "learning_rate": 2.465939704939064e-06, "loss": 0.7915, "step": 43610 }, { "epoch": 0.5315466832413196, "grad_norm": 3.1890674346123955, "learning_rate": 2.465618986529827e-06, "loss": 0.766, "step": 43615 }, { "epoch": 0.5316076194654674, "grad_norm": 2.85496624599944, "learning_rate": 2.4652982681205904e-06, "loss": 0.7799, "step": 43620 }, { "epoch": 0.5316685556896152, "grad_norm": 2.379263281436533, "learning_rate": 2.464977549711354e-06, "loss": 0.7583, "step": 43625 }, { "epoch": 0.5317294919137631, "grad_norm": 1.7762469889397645, "learning_rate": 2.464656831302117e-06, "loss": 0.7755, "step": 43630 }, { "epoch": 0.5317904281379109, "grad_norm": 1.8267646343851411, "learning_rate": 2.4643361128928803e-06, "loss": 0.7418, "step": 43635 }, { "epoch": 0.5318513643620587, "grad_norm": 2.484873901241807, "learning_rate": 2.4640153944836437e-06, "loss": 0.7368, "step": 43640 }, { "epoch": 0.5319123005862064, "grad_norm": 2.3512192386328032, "learning_rate": 2.4636946760744067e-06, "loss": 0.8692, "step": 43645 }, { "epoch": 0.5319732368103542, "grad_norm": 2.693629884487864, "learning_rate": 2.46337395766517e-06, "loss": 0.7654, "step": 43650 }, { "epoch": 0.5320341730345021, "grad_norm": 2.194691785238083, "learning_rate": 2.463053239255933e-06, "loss": 0.7119, "step": 43655 }, { "epoch": 0.5320951092586499, "grad_norm": 2.760102232340001, "learning_rate": 2.4627325208466966e-06, "loss": 0.7628, "step": 43660 }, { "epoch": 0.5321560454827977, "grad_norm": 2.349104209940413, "learning_rate": 2.46241180243746e-06, "loss": 0.756, "step": 43665 }, { "epoch": 0.5322169817069455, "grad_norm": 2.400595590663245, "learning_rate": 2.4620910840282235e-06, "loss": 0.8003, "step": 43670 }, { "epoch": 0.5322779179310934, "grad_norm": 2.870308802516341, "learning_rate": 2.461770365618987e-06, "loss": 0.7677, "step": 43675 }, { "epoch": 0.5323388541552411, "grad_norm": 2.345767610000846, "learning_rate": 2.46144964720975e-06, "loss": 0.7631, "step": 43680 }, { "epoch": 0.5323997903793889, "grad_norm": 2.12751922169731, "learning_rate": 2.4611289288005134e-06, "loss": 0.7837, "step": 43685 }, { "epoch": 0.5324607266035367, "grad_norm": 3.1323544978028397, "learning_rate": 2.460808210391277e-06, "loss": 0.715, "step": 43690 }, { "epoch": 0.5325216628276845, "grad_norm": 2.362069787680793, "learning_rate": 2.46048749198204e-06, "loss": 0.7241, "step": 43695 }, { "epoch": 0.5325825990518324, "grad_norm": 2.1940420651797243, "learning_rate": 2.4601667735728033e-06, "loss": 0.7311, "step": 43700 }, { "epoch": 0.5326435352759802, "grad_norm": 2.1652824908490276, "learning_rate": 2.4598460551635668e-06, "loss": 0.6632, "step": 43705 }, { "epoch": 0.532704471500128, "grad_norm": 2.626249288621093, "learning_rate": 2.4595253367543298e-06, "loss": 0.6645, "step": 43710 }, { "epoch": 0.5327654077242757, "grad_norm": 2.5921728996042037, "learning_rate": 2.459204618345093e-06, "loss": 0.7438, "step": 43715 }, { "epoch": 0.5328263439484235, "grad_norm": 2.371704448867609, "learning_rate": 2.4588838999358566e-06, "loss": 0.7316, "step": 43720 }, { "epoch": 0.5328872801725714, "grad_norm": 2.1488488625403632, "learning_rate": 2.4585631815266197e-06, "loss": 0.7132, "step": 43725 }, { "epoch": 0.5329482163967192, "grad_norm": 2.0280403828864535, "learning_rate": 2.458242463117383e-06, "loss": 0.7085, "step": 43730 }, { "epoch": 0.533009152620867, "grad_norm": 4.612718676046209, "learning_rate": 2.457921744708146e-06, "loss": 0.8312, "step": 43735 }, { "epoch": 0.5330700888450148, "grad_norm": 2.679997457720913, "learning_rate": 2.4576010262989096e-06, "loss": 0.7648, "step": 43740 }, { "epoch": 0.5331310250691627, "grad_norm": 2.2956013681297387, "learning_rate": 2.457280307889673e-06, "loss": 0.6698, "step": 43745 }, { "epoch": 0.5331919612933104, "grad_norm": 2.4054761143240535, "learning_rate": 2.4569595894804364e-06, "loss": 0.7722, "step": 43750 }, { "epoch": 0.5332528975174582, "grad_norm": 2.2004708450313135, "learning_rate": 2.4566388710712e-06, "loss": 0.7401, "step": 43755 }, { "epoch": 0.533313833741606, "grad_norm": 2.557645658957391, "learning_rate": 2.456318152661963e-06, "loss": 0.7726, "step": 43760 }, { "epoch": 0.5333747699657538, "grad_norm": 2.1024888349830393, "learning_rate": 2.4559974342527263e-06, "loss": 0.7588, "step": 43765 }, { "epoch": 0.5334357061899017, "grad_norm": 2.7545216430285095, "learning_rate": 2.4556767158434898e-06, "loss": 0.8199, "step": 43770 }, { "epoch": 0.5334966424140495, "grad_norm": 2.0961513997828525, "learning_rate": 2.455355997434253e-06, "loss": 0.7046, "step": 43775 }, { "epoch": 0.5335575786381973, "grad_norm": 2.6778823947892403, "learning_rate": 2.4550352790250162e-06, "loss": 0.7234, "step": 43780 }, { "epoch": 0.533618514862345, "grad_norm": 2.661971216088766, "learning_rate": 2.4547145606157797e-06, "loss": 0.8044, "step": 43785 }, { "epoch": 0.5336794510864928, "grad_norm": 2.504173735482415, "learning_rate": 2.4543938422065427e-06, "loss": 0.7688, "step": 43790 }, { "epoch": 0.5337403873106407, "grad_norm": 2.2766909817497556, "learning_rate": 2.454073123797306e-06, "loss": 0.693, "step": 43795 }, { "epoch": 0.5338013235347885, "grad_norm": 2.52792931469491, "learning_rate": 2.4537524053880696e-06, "loss": 0.7817, "step": 43800 }, { "epoch": 0.5338622597589363, "grad_norm": 2.4668391431131456, "learning_rate": 2.4534316869788326e-06, "loss": 0.709, "step": 43805 }, { "epoch": 0.5339231959830841, "grad_norm": 2.6796323466152323, "learning_rate": 2.453110968569596e-06, "loss": 0.7592, "step": 43810 }, { "epoch": 0.533984132207232, "grad_norm": 2.134914903109111, "learning_rate": 2.4527902501603595e-06, "loss": 0.7787, "step": 43815 }, { "epoch": 0.5340450684313797, "grad_norm": 2.794809521361112, "learning_rate": 2.4524695317511225e-06, "loss": 0.8171, "step": 43820 }, { "epoch": 0.5341060046555275, "grad_norm": 2.202083243421265, "learning_rate": 2.452148813341886e-06, "loss": 0.8103, "step": 43825 }, { "epoch": 0.5341669408796753, "grad_norm": 2.0922720285140817, "learning_rate": 2.4518280949326494e-06, "loss": 0.7157, "step": 43830 }, { "epoch": 0.5342278771038231, "grad_norm": 4.238474576856144, "learning_rate": 2.451507376523413e-06, "loss": 0.7128, "step": 43835 }, { "epoch": 0.534288813327971, "grad_norm": 2.638570302670186, "learning_rate": 2.4511866581141763e-06, "loss": 0.7158, "step": 43840 }, { "epoch": 0.5343497495521188, "grad_norm": 2.3748797840735447, "learning_rate": 2.4508659397049393e-06, "loss": 0.769, "step": 43845 }, { "epoch": 0.5344106857762666, "grad_norm": 2.1443659729030213, "learning_rate": 2.4505452212957027e-06, "loss": 0.7488, "step": 43850 }, { "epoch": 0.5344716220004143, "grad_norm": 2.5407774286154505, "learning_rate": 2.450224502886466e-06, "loss": 0.7661, "step": 43855 }, { "epoch": 0.5345325582245621, "grad_norm": 2.481757339072773, "learning_rate": 2.449903784477229e-06, "loss": 0.7103, "step": 43860 }, { "epoch": 0.53459349444871, "grad_norm": 2.967764046005253, "learning_rate": 2.4495830660679926e-06, "loss": 0.8125, "step": 43865 }, { "epoch": 0.5346544306728578, "grad_norm": 2.5919322044560205, "learning_rate": 2.4492623476587556e-06, "loss": 0.7345, "step": 43870 }, { "epoch": 0.5347153668970056, "grad_norm": 3.0910961336810345, "learning_rate": 2.448941629249519e-06, "loss": 0.6996, "step": 43875 }, { "epoch": 0.5347763031211534, "grad_norm": 2.177409100837251, "learning_rate": 2.4486209108402825e-06, "loss": 0.7296, "step": 43880 }, { "epoch": 0.5348372393453013, "grad_norm": 2.5398819250724496, "learning_rate": 2.4483001924310455e-06, "loss": 0.7964, "step": 43885 }, { "epoch": 0.534898175569449, "grad_norm": 2.007049364579381, "learning_rate": 2.447979474021809e-06, "loss": 0.7234, "step": 43890 }, { "epoch": 0.5349591117935968, "grad_norm": 2.4673108914168393, "learning_rate": 2.4476587556125724e-06, "loss": 0.7837, "step": 43895 }, { "epoch": 0.5350200480177446, "grad_norm": 2.2568858915030914, "learning_rate": 2.4473380372033354e-06, "loss": 0.7409, "step": 43900 }, { "epoch": 0.5350809842418924, "grad_norm": 2.301533763203119, "learning_rate": 2.447017318794099e-06, "loss": 0.7998, "step": 43905 }, { "epoch": 0.5351419204660403, "grad_norm": 2.625858183777147, "learning_rate": 2.4466966003848623e-06, "loss": 0.7707, "step": 43910 }, { "epoch": 0.5352028566901881, "grad_norm": 2.2526553352759024, "learning_rate": 2.4463758819756257e-06, "loss": 0.7701, "step": 43915 }, { "epoch": 0.5352637929143359, "grad_norm": 2.5871241906518496, "learning_rate": 2.446055163566389e-06, "loss": 0.7141, "step": 43920 }, { "epoch": 0.5353247291384836, "grad_norm": 2.6727046684114164, "learning_rate": 2.445734445157152e-06, "loss": 0.7839, "step": 43925 }, { "epoch": 0.5353856653626314, "grad_norm": 2.269097878187133, "learning_rate": 2.4454137267479156e-06, "loss": 0.7629, "step": 43930 }, { "epoch": 0.5354466015867793, "grad_norm": 2.515189000152006, "learning_rate": 2.445093008338679e-06, "loss": 0.6976, "step": 43935 }, { "epoch": 0.5355075378109271, "grad_norm": 2.730562419669257, "learning_rate": 2.444772289929442e-06, "loss": 0.7711, "step": 43940 }, { "epoch": 0.5355684740350749, "grad_norm": 2.5361165131810646, "learning_rate": 2.4444515715202055e-06, "loss": 0.804, "step": 43945 }, { "epoch": 0.5356294102592227, "grad_norm": 2.924918493209414, "learning_rate": 2.4441308531109685e-06, "loss": 0.7458, "step": 43950 }, { "epoch": 0.5356903464833705, "grad_norm": 2.673088069307866, "learning_rate": 2.443810134701732e-06, "loss": 0.7507, "step": 43955 }, { "epoch": 0.5357512827075183, "grad_norm": 2.133368021442248, "learning_rate": 2.4434894162924954e-06, "loss": 0.7913, "step": 43960 }, { "epoch": 0.5358122189316661, "grad_norm": 2.211450779630704, "learning_rate": 2.4431686978832584e-06, "loss": 0.6996, "step": 43965 }, { "epoch": 0.5358731551558139, "grad_norm": 2.5624255743876434, "learning_rate": 2.442847979474022e-06, "loss": 0.7018, "step": 43970 }, { "epoch": 0.5359340913799617, "grad_norm": 2.7422791017783985, "learning_rate": 2.4425272610647853e-06, "loss": 0.7433, "step": 43975 }, { "epoch": 0.5359950276041096, "grad_norm": 2.4608555276459163, "learning_rate": 2.4422065426555488e-06, "loss": 0.7147, "step": 43980 }, { "epoch": 0.5360559638282574, "grad_norm": 2.09855510664623, "learning_rate": 2.4418858242463118e-06, "loss": 0.7937, "step": 43985 }, { "epoch": 0.5361169000524052, "grad_norm": 2.6510376772995095, "learning_rate": 2.4415651058370752e-06, "loss": 0.742, "step": 43990 }, { "epoch": 0.5361778362765529, "grad_norm": 2.87802782431221, "learning_rate": 2.4412443874278387e-06, "loss": 0.7332, "step": 43995 }, { "epoch": 0.5362387725007007, "grad_norm": 2.536025534515171, "learning_rate": 2.440923669018602e-06, "loss": 0.7667, "step": 44000 }, { "epoch": 0.5362997087248486, "grad_norm": 2.535825812580773, "learning_rate": 2.440602950609365e-06, "loss": 0.6843, "step": 44005 }, { "epoch": 0.5363606449489964, "grad_norm": 2.3838108458453897, "learning_rate": 2.4402822322001286e-06, "loss": 0.7722, "step": 44010 }, { "epoch": 0.5364215811731442, "grad_norm": 3.0513863012716036, "learning_rate": 2.439961513790892e-06, "loss": 0.7599, "step": 44015 }, { "epoch": 0.536482517397292, "grad_norm": 3.120509907720229, "learning_rate": 2.439640795381655e-06, "loss": 0.7463, "step": 44020 }, { "epoch": 0.5365434536214398, "grad_norm": 2.5308904269684165, "learning_rate": 2.4393200769724185e-06, "loss": 0.7998, "step": 44025 }, { "epoch": 0.5366043898455876, "grad_norm": 3.3434906067833077, "learning_rate": 2.4389993585631815e-06, "loss": 0.7913, "step": 44030 }, { "epoch": 0.5366653260697354, "grad_norm": 2.2991027724791633, "learning_rate": 2.438678640153945e-06, "loss": 0.6887, "step": 44035 }, { "epoch": 0.5367262622938832, "grad_norm": 2.2036530537361085, "learning_rate": 2.4383579217447084e-06, "loss": 0.8331, "step": 44040 }, { "epoch": 0.536787198518031, "grad_norm": 2.473891838652393, "learning_rate": 2.4380372033354714e-06, "loss": 0.7895, "step": 44045 }, { "epoch": 0.5368481347421789, "grad_norm": 2.9329766794001872, "learning_rate": 2.437716484926235e-06, "loss": 0.7069, "step": 44050 }, { "epoch": 0.5369090709663267, "grad_norm": 4.045176281304571, "learning_rate": 2.4373957665169983e-06, "loss": 0.7639, "step": 44055 }, { "epoch": 0.5369700071904745, "grad_norm": 2.415119524137002, "learning_rate": 2.4370750481077617e-06, "loss": 0.7473, "step": 44060 }, { "epoch": 0.5370309434146222, "grad_norm": 2.0948990323178975, "learning_rate": 2.436754329698525e-06, "loss": 0.765, "step": 44065 }, { "epoch": 0.53709187963877, "grad_norm": 2.4658980175002685, "learning_rate": 2.436433611289288e-06, "loss": 0.6818, "step": 44070 }, { "epoch": 0.5371528158629179, "grad_norm": 2.0838935216388714, "learning_rate": 2.4361128928800516e-06, "loss": 0.7978, "step": 44075 }, { "epoch": 0.5372137520870657, "grad_norm": 3.859294375226074, "learning_rate": 2.435792174470815e-06, "loss": 0.7756, "step": 44080 }, { "epoch": 0.5372746883112135, "grad_norm": 3.07920799650618, "learning_rate": 2.435471456061578e-06, "loss": 0.785, "step": 44085 }, { "epoch": 0.5373356245353613, "grad_norm": 3.4909562853062117, "learning_rate": 2.4351507376523415e-06, "loss": 0.7066, "step": 44090 }, { "epoch": 0.5373965607595091, "grad_norm": 2.3299489925882266, "learning_rate": 2.434830019243105e-06, "loss": 0.7809, "step": 44095 }, { "epoch": 0.5374574969836569, "grad_norm": 2.3224723653769788, "learning_rate": 2.434509300833868e-06, "loss": 0.7708, "step": 44100 }, { "epoch": 0.5375184332078047, "grad_norm": 3.9848085788281336, "learning_rate": 2.4341885824246314e-06, "loss": 0.7276, "step": 44105 }, { "epoch": 0.5375793694319525, "grad_norm": 2.559507973442667, "learning_rate": 2.433867864015395e-06, "loss": 0.7351, "step": 44110 }, { "epoch": 0.5376403056561003, "grad_norm": 2.2395944380274577, "learning_rate": 2.433547145606158e-06, "loss": 0.7292, "step": 44115 }, { "epoch": 0.5377012418802481, "grad_norm": 2.0191217316115737, "learning_rate": 2.4332264271969213e-06, "loss": 0.7113, "step": 44120 }, { "epoch": 0.537762178104396, "grad_norm": 2.0007168564922657, "learning_rate": 2.4329057087876843e-06, "loss": 0.7352, "step": 44125 }, { "epoch": 0.5378231143285438, "grad_norm": 3.377486276605628, "learning_rate": 2.4325849903784477e-06, "loss": 0.7822, "step": 44130 }, { "epoch": 0.5378840505526915, "grad_norm": 2.0610676421802565, "learning_rate": 2.432264271969211e-06, "loss": 0.7541, "step": 44135 }, { "epoch": 0.5379449867768393, "grad_norm": 2.3528212580743633, "learning_rate": 2.4319435535599746e-06, "loss": 0.7498, "step": 44140 }, { "epoch": 0.5380059230009872, "grad_norm": 3.3240203138262188, "learning_rate": 2.431622835150738e-06, "loss": 0.707, "step": 44145 }, { "epoch": 0.538066859225135, "grad_norm": 2.1536668949613222, "learning_rate": 2.4313021167415015e-06, "loss": 0.7448, "step": 44150 }, { "epoch": 0.5381277954492828, "grad_norm": 2.3026921121375223, "learning_rate": 2.4309813983322645e-06, "loss": 0.7533, "step": 44155 }, { "epoch": 0.5381887316734306, "grad_norm": 2.5393439022103004, "learning_rate": 2.430660679923028e-06, "loss": 0.7623, "step": 44160 }, { "epoch": 0.5382496678975784, "grad_norm": 2.2428276266765206, "learning_rate": 2.430339961513791e-06, "loss": 0.7244, "step": 44165 }, { "epoch": 0.5383106041217262, "grad_norm": 2.3729761752715652, "learning_rate": 2.4300192431045544e-06, "loss": 0.7745, "step": 44170 }, { "epoch": 0.538371540345874, "grad_norm": 2.893624998249092, "learning_rate": 2.429698524695318e-06, "loss": 0.7739, "step": 44175 }, { "epoch": 0.5384324765700218, "grad_norm": 3.240102093904009, "learning_rate": 2.429377806286081e-06, "loss": 0.8014, "step": 44180 }, { "epoch": 0.5384934127941696, "grad_norm": 2.2691583108168407, "learning_rate": 2.4290570878768443e-06, "loss": 0.7691, "step": 44185 }, { "epoch": 0.5385543490183174, "grad_norm": 2.5927386084330366, "learning_rate": 2.4287363694676078e-06, "loss": 0.731, "step": 44190 }, { "epoch": 0.5386152852424653, "grad_norm": 2.6169012810322885, "learning_rate": 2.4284156510583708e-06, "loss": 0.8299, "step": 44195 }, { "epoch": 0.5386762214666131, "grad_norm": 3.377529432543864, "learning_rate": 2.428094932649134e-06, "loss": 0.7225, "step": 44200 }, { "epoch": 0.5387371576907608, "grad_norm": 2.112838557263083, "learning_rate": 2.4277742142398977e-06, "loss": 0.7473, "step": 44205 }, { "epoch": 0.5387980939149086, "grad_norm": 2.1962190301383706, "learning_rate": 2.4274534958306607e-06, "loss": 0.6824, "step": 44210 }, { "epoch": 0.5388590301390564, "grad_norm": 2.5321242294491375, "learning_rate": 2.427132777421424e-06, "loss": 0.7233, "step": 44215 }, { "epoch": 0.5389199663632043, "grad_norm": 2.3612845624048076, "learning_rate": 2.4268120590121875e-06, "loss": 0.7333, "step": 44220 }, { "epoch": 0.5389809025873521, "grad_norm": 2.3356486472183637, "learning_rate": 2.426491340602951e-06, "loss": 0.6935, "step": 44225 }, { "epoch": 0.5390418388114999, "grad_norm": 2.460075638021495, "learning_rate": 2.4261706221937144e-06, "loss": 0.7096, "step": 44230 }, { "epoch": 0.5391027750356477, "grad_norm": 3.1084198046981943, "learning_rate": 2.4258499037844774e-06, "loss": 0.6962, "step": 44235 }, { "epoch": 0.5391637112597955, "grad_norm": 2.460870261570338, "learning_rate": 2.425529185375241e-06, "loss": 0.7031, "step": 44240 }, { "epoch": 0.5392246474839433, "grad_norm": 2.3949513578660393, "learning_rate": 2.425208466966004e-06, "loss": 0.7269, "step": 44245 }, { "epoch": 0.5392855837080911, "grad_norm": 2.4667126460321755, "learning_rate": 2.4248877485567673e-06, "loss": 0.6664, "step": 44250 }, { "epoch": 0.5393465199322389, "grad_norm": 2.8717501663596026, "learning_rate": 2.4245670301475308e-06, "loss": 0.842, "step": 44255 }, { "epoch": 0.5394074561563867, "grad_norm": 2.9429180192285758, "learning_rate": 2.424246311738294e-06, "loss": 0.6508, "step": 44260 }, { "epoch": 0.5394683923805346, "grad_norm": 2.2176940367334974, "learning_rate": 2.4239255933290572e-06, "loss": 0.7015, "step": 44265 }, { "epoch": 0.5395293286046824, "grad_norm": 2.791446559585695, "learning_rate": 2.4236048749198207e-06, "loss": 0.723, "step": 44270 }, { "epoch": 0.5395902648288301, "grad_norm": 2.5633954270272814, "learning_rate": 2.4232841565105837e-06, "loss": 0.6861, "step": 44275 }, { "epoch": 0.5396512010529779, "grad_norm": 2.6457807124502635, "learning_rate": 2.422963438101347e-06, "loss": 0.7317, "step": 44280 }, { "epoch": 0.5397121372771257, "grad_norm": 2.4357891393844047, "learning_rate": 2.4226427196921106e-06, "loss": 0.7655, "step": 44285 }, { "epoch": 0.5397730735012736, "grad_norm": 2.3412947375869297, "learning_rate": 2.422322001282874e-06, "loss": 0.7676, "step": 44290 }, { "epoch": 0.5398340097254214, "grad_norm": 2.546692633285405, "learning_rate": 2.422001282873637e-06, "loss": 0.7843, "step": 44295 }, { "epoch": 0.5398949459495692, "grad_norm": 2.509709363635286, "learning_rate": 2.4216805644644005e-06, "loss": 0.7614, "step": 44300 }, { "epoch": 0.539955882173717, "grad_norm": 2.8283392825642553, "learning_rate": 2.421359846055164e-06, "loss": 0.7963, "step": 44305 }, { "epoch": 0.5400168183978648, "grad_norm": 2.6855460618518947, "learning_rate": 2.4210391276459274e-06, "loss": 0.7316, "step": 44310 }, { "epoch": 0.5400777546220126, "grad_norm": 2.1218906638871533, "learning_rate": 2.4207184092366904e-06, "loss": 0.806, "step": 44315 }, { "epoch": 0.5401386908461604, "grad_norm": 2.832982338305335, "learning_rate": 2.420397690827454e-06, "loss": 0.7295, "step": 44320 }, { "epoch": 0.5401996270703082, "grad_norm": 2.27052403787971, "learning_rate": 2.420076972418217e-06, "loss": 0.6866, "step": 44325 }, { "epoch": 0.540260563294456, "grad_norm": 2.8233140905033296, "learning_rate": 2.4197562540089803e-06, "loss": 0.7776, "step": 44330 }, { "epoch": 0.5403214995186039, "grad_norm": 1.912284075035207, "learning_rate": 2.4194355355997437e-06, "loss": 0.7125, "step": 44335 }, { "epoch": 0.5403824357427517, "grad_norm": 2.7504369374412034, "learning_rate": 2.4191148171905067e-06, "loss": 0.7581, "step": 44340 }, { "epoch": 0.5404433719668994, "grad_norm": 2.4276525150366868, "learning_rate": 2.41879409878127e-06, "loss": 0.7524, "step": 44345 }, { "epoch": 0.5405043081910472, "grad_norm": 2.6266399558068674, "learning_rate": 2.4184733803720336e-06, "loss": 0.7336, "step": 44350 }, { "epoch": 0.540565244415195, "grad_norm": 2.1782353201350357, "learning_rate": 2.4181526619627966e-06, "loss": 0.6632, "step": 44355 }, { "epoch": 0.5406261806393429, "grad_norm": 2.326102173788611, "learning_rate": 2.41783194355356e-06, "loss": 0.7292, "step": 44360 }, { "epoch": 0.5406871168634907, "grad_norm": 2.4710338724469443, "learning_rate": 2.4175112251443235e-06, "loss": 0.7146, "step": 44365 }, { "epoch": 0.5407480530876385, "grad_norm": 2.201052991824057, "learning_rate": 2.417190506735087e-06, "loss": 0.7258, "step": 44370 }, { "epoch": 0.5408089893117863, "grad_norm": 2.257932263683314, "learning_rate": 2.4168697883258504e-06, "loss": 0.7496, "step": 44375 }, { "epoch": 0.540869925535934, "grad_norm": 2.402600683872971, "learning_rate": 2.4165490699166134e-06, "loss": 0.6442, "step": 44380 }, { "epoch": 0.5409308617600819, "grad_norm": 2.5883994243673816, "learning_rate": 2.416228351507377e-06, "loss": 0.7426, "step": 44385 }, { "epoch": 0.5409917979842297, "grad_norm": 2.18954539106528, "learning_rate": 2.4159076330981403e-06, "loss": 0.79, "step": 44390 }, { "epoch": 0.5410527342083775, "grad_norm": 1.9698004095182133, "learning_rate": 2.4155869146889033e-06, "loss": 0.7749, "step": 44395 }, { "epoch": 0.5411136704325253, "grad_norm": 2.5193044856171003, "learning_rate": 2.4152661962796667e-06, "loss": 0.6964, "step": 44400 }, { "epoch": 0.5411746066566732, "grad_norm": 2.8603749095899915, "learning_rate": 2.4149454778704298e-06, "loss": 0.7986, "step": 44405 }, { "epoch": 0.541235542880821, "grad_norm": 2.109490987237727, "learning_rate": 2.414624759461193e-06, "loss": 0.7793, "step": 44410 }, { "epoch": 0.5412964791049687, "grad_norm": 2.650866199449832, "learning_rate": 2.4143040410519566e-06, "loss": 0.6944, "step": 44415 }, { "epoch": 0.5413574153291165, "grad_norm": 2.7376879256570614, "learning_rate": 2.4139833226427197e-06, "loss": 0.7104, "step": 44420 }, { "epoch": 0.5414183515532643, "grad_norm": 2.6758779475839014, "learning_rate": 2.413662604233483e-06, "loss": 0.7401, "step": 44425 }, { "epoch": 0.5414792877774122, "grad_norm": 2.86129191861566, "learning_rate": 2.4133418858242465e-06, "loss": 0.7485, "step": 44430 }, { "epoch": 0.54154022400156, "grad_norm": 3.589882575533387, "learning_rate": 2.4130211674150096e-06, "loss": 0.7396, "step": 44435 }, { "epoch": 0.5416011602257078, "grad_norm": 2.6359533270296303, "learning_rate": 2.412700449005773e-06, "loss": 0.7577, "step": 44440 }, { "epoch": 0.5416620964498556, "grad_norm": 2.1598167226315534, "learning_rate": 2.4123797305965364e-06, "loss": 0.7408, "step": 44445 }, { "epoch": 0.5417230326740033, "grad_norm": 2.0599921306534315, "learning_rate": 2.4120590121873e-06, "loss": 0.664, "step": 44450 }, { "epoch": 0.5417839688981512, "grad_norm": 3.5769024397993694, "learning_rate": 2.4117382937780633e-06, "loss": 0.7755, "step": 44455 }, { "epoch": 0.541844905122299, "grad_norm": 2.956985236211624, "learning_rate": 2.4114175753688263e-06, "loss": 0.7899, "step": 44460 }, { "epoch": 0.5419058413464468, "grad_norm": 2.3046749942022227, "learning_rate": 2.4110968569595898e-06, "loss": 0.704, "step": 44465 }, { "epoch": 0.5419667775705946, "grad_norm": 2.2478859931889787, "learning_rate": 2.410776138550353e-06, "loss": 0.7245, "step": 44470 }, { "epoch": 0.5420277137947425, "grad_norm": 2.314003854940188, "learning_rate": 2.4104554201411162e-06, "loss": 0.7463, "step": 44475 }, { "epoch": 0.5420886500188903, "grad_norm": 2.476458198674694, "learning_rate": 2.4101347017318797e-06, "loss": 0.8145, "step": 44480 }, { "epoch": 0.542149586243038, "grad_norm": 2.4127942341550317, "learning_rate": 2.409813983322643e-06, "loss": 0.7752, "step": 44485 }, { "epoch": 0.5422105224671858, "grad_norm": 2.3088665576398655, "learning_rate": 2.409493264913406e-06, "loss": 0.7278, "step": 44490 }, { "epoch": 0.5422714586913336, "grad_norm": 2.6123193481864435, "learning_rate": 2.4091725465041696e-06, "loss": 0.7739, "step": 44495 }, { "epoch": 0.5423323949154815, "grad_norm": 1.9621988447565506, "learning_rate": 2.4088518280949326e-06, "loss": 0.7021, "step": 44500 }, { "epoch": 0.5423933311396293, "grad_norm": 2.8505330102594555, "learning_rate": 2.408531109685696e-06, "loss": 0.7144, "step": 44505 }, { "epoch": 0.5424542673637771, "grad_norm": 2.788766227492299, "learning_rate": 2.4082103912764595e-06, "loss": 0.6827, "step": 44510 }, { "epoch": 0.5425152035879249, "grad_norm": 2.6365694753140283, "learning_rate": 2.407889672867223e-06, "loss": 0.7452, "step": 44515 }, { "epoch": 0.5425761398120726, "grad_norm": 2.4029154928841185, "learning_rate": 2.407568954457986e-06, "loss": 0.7715, "step": 44520 }, { "epoch": 0.5426370760362205, "grad_norm": 2.3617593831681627, "learning_rate": 2.4072482360487494e-06, "loss": 0.7628, "step": 44525 }, { "epoch": 0.5426980122603683, "grad_norm": 2.5562979220990965, "learning_rate": 2.406927517639513e-06, "loss": 0.7833, "step": 44530 }, { "epoch": 0.5427589484845161, "grad_norm": 2.2665770482381937, "learning_rate": 2.4066067992302762e-06, "loss": 0.7545, "step": 44535 }, { "epoch": 0.5428198847086639, "grad_norm": 3.018496300412925, "learning_rate": 2.4062860808210393e-06, "loss": 0.6853, "step": 44540 }, { "epoch": 0.5428808209328118, "grad_norm": 2.350430580410257, "learning_rate": 2.4059653624118027e-06, "loss": 0.7706, "step": 44545 }, { "epoch": 0.5429417571569596, "grad_norm": 2.9366857285578343, "learning_rate": 2.405644644002566e-06, "loss": 0.7111, "step": 44550 }, { "epoch": 0.5430026933811073, "grad_norm": 2.7758855417139534, "learning_rate": 2.405323925593329e-06, "loss": 0.7329, "step": 44555 }, { "epoch": 0.5430636296052551, "grad_norm": 2.4969440367168962, "learning_rate": 2.4050032071840926e-06, "loss": 0.7963, "step": 44560 }, { "epoch": 0.5431245658294029, "grad_norm": 2.3611216052859416, "learning_rate": 2.404682488774856e-06, "loss": 0.7775, "step": 44565 }, { "epoch": 0.5431855020535508, "grad_norm": 2.2856616526285745, "learning_rate": 2.404361770365619e-06, "loss": 0.8129, "step": 44570 }, { "epoch": 0.5432464382776986, "grad_norm": 2.566000372533561, "learning_rate": 2.4040410519563825e-06, "loss": 0.83, "step": 44575 }, { "epoch": 0.5433073745018464, "grad_norm": 2.8445067586088975, "learning_rate": 2.4037203335471455e-06, "loss": 0.7339, "step": 44580 }, { "epoch": 0.5433683107259941, "grad_norm": 2.0687812796472094, "learning_rate": 2.403399615137909e-06, "loss": 0.7596, "step": 44585 }, { "epoch": 0.5434292469501419, "grad_norm": 2.1167710597800817, "learning_rate": 2.4030788967286724e-06, "loss": 0.7473, "step": 44590 }, { "epoch": 0.5434901831742898, "grad_norm": 2.7339824680902987, "learning_rate": 2.402758178319436e-06, "loss": 0.6422, "step": 44595 }, { "epoch": 0.5435511193984376, "grad_norm": 2.889006269521039, "learning_rate": 2.402437459910199e-06, "loss": 0.7224, "step": 44600 }, { "epoch": 0.5436120556225854, "grad_norm": 2.4106889752007223, "learning_rate": 2.4021167415009623e-06, "loss": 0.7442, "step": 44605 }, { "epoch": 0.5436729918467332, "grad_norm": 2.6838142937967384, "learning_rate": 2.4017960230917257e-06, "loss": 0.7224, "step": 44610 }, { "epoch": 0.5437339280708811, "grad_norm": 3.0219829626248496, "learning_rate": 2.401475304682489e-06, "loss": 0.8082, "step": 44615 }, { "epoch": 0.5437948642950288, "grad_norm": 2.545918616698623, "learning_rate": 2.401154586273252e-06, "loss": 0.7695, "step": 44620 }, { "epoch": 0.5438558005191766, "grad_norm": 2.309156840169021, "learning_rate": 2.4008338678640156e-06, "loss": 0.7249, "step": 44625 }, { "epoch": 0.5439167367433244, "grad_norm": 2.83500064171057, "learning_rate": 2.400513149454779e-06, "loss": 0.7415, "step": 44630 }, { "epoch": 0.5439776729674722, "grad_norm": 2.6168994647156647, "learning_rate": 2.400192431045542e-06, "loss": 0.7415, "step": 44635 }, { "epoch": 0.5440386091916201, "grad_norm": 2.9256390056287356, "learning_rate": 2.3998717126363055e-06, "loss": 0.7339, "step": 44640 }, { "epoch": 0.5440995454157679, "grad_norm": 2.470648910975664, "learning_rate": 2.399550994227069e-06, "loss": 0.7503, "step": 44645 }, { "epoch": 0.5441604816399157, "grad_norm": 3.112931297589533, "learning_rate": 2.399230275817832e-06, "loss": 0.8405, "step": 44650 }, { "epoch": 0.5442214178640634, "grad_norm": 2.9006369701167385, "learning_rate": 2.3989095574085954e-06, "loss": 0.6894, "step": 44655 }, { "epoch": 0.5442823540882112, "grad_norm": 2.5558602236384718, "learning_rate": 2.3985888389993584e-06, "loss": 0.7778, "step": 44660 }, { "epoch": 0.5443432903123591, "grad_norm": 2.4263249198451837, "learning_rate": 2.398268120590122e-06, "loss": 0.7063, "step": 44665 }, { "epoch": 0.5444042265365069, "grad_norm": 2.3939361275507207, "learning_rate": 2.3979474021808853e-06, "loss": 0.7469, "step": 44670 }, { "epoch": 0.5444651627606547, "grad_norm": 2.1587703762018267, "learning_rate": 2.3976266837716488e-06, "loss": 0.7669, "step": 44675 }, { "epoch": 0.5445260989848025, "grad_norm": 3.9639827708967323, "learning_rate": 2.397305965362412e-06, "loss": 0.7394, "step": 44680 }, { "epoch": 0.5445870352089504, "grad_norm": 3.1168439387106384, "learning_rate": 2.3969852469531752e-06, "loss": 0.7258, "step": 44685 }, { "epoch": 0.5446479714330981, "grad_norm": 4.493238736036378, "learning_rate": 2.3966645285439387e-06, "loss": 0.8042, "step": 44690 }, { "epoch": 0.5447089076572459, "grad_norm": 2.22227808334684, "learning_rate": 2.396343810134702e-06, "loss": 0.7661, "step": 44695 }, { "epoch": 0.5447698438813937, "grad_norm": 2.733573438679663, "learning_rate": 2.396023091725465e-06, "loss": 0.7626, "step": 44700 }, { "epoch": 0.5448307801055415, "grad_norm": 2.561440099818502, "learning_rate": 2.3957023733162286e-06, "loss": 0.7746, "step": 44705 }, { "epoch": 0.5448917163296894, "grad_norm": 2.613408883710283, "learning_rate": 2.395381654906992e-06, "loss": 0.8529, "step": 44710 }, { "epoch": 0.5449526525538372, "grad_norm": 2.4935675257863985, "learning_rate": 2.395060936497755e-06, "loss": 0.7929, "step": 44715 }, { "epoch": 0.545013588777985, "grad_norm": 2.7955766690737858, "learning_rate": 2.3947402180885184e-06, "loss": 0.7393, "step": 44720 }, { "epoch": 0.5450745250021327, "grad_norm": 4.1324394064712315, "learning_rate": 2.394419499679282e-06, "loss": 0.7743, "step": 44725 }, { "epoch": 0.5451354612262805, "grad_norm": 3.009859941738966, "learning_rate": 2.394098781270045e-06, "loss": 0.7571, "step": 44730 }, { "epoch": 0.5451963974504284, "grad_norm": 2.732745310232526, "learning_rate": 2.3937780628608083e-06, "loss": 0.7293, "step": 44735 }, { "epoch": 0.5452573336745762, "grad_norm": 2.391686430884133, "learning_rate": 2.3934573444515718e-06, "loss": 0.7212, "step": 44740 }, { "epoch": 0.545318269898724, "grad_norm": 2.2020654510343696, "learning_rate": 2.393136626042335e-06, "loss": 0.7796, "step": 44745 }, { "epoch": 0.5453792061228718, "grad_norm": 2.047307007113204, "learning_rate": 2.3928159076330982e-06, "loss": 0.8048, "step": 44750 }, { "epoch": 0.5454401423470197, "grad_norm": 3.07765588494259, "learning_rate": 2.3924951892238617e-06, "loss": 0.8464, "step": 44755 }, { "epoch": 0.5455010785711674, "grad_norm": 2.54097015224351, "learning_rate": 2.392174470814625e-06, "loss": 0.7644, "step": 44760 }, { "epoch": 0.5455620147953152, "grad_norm": 6.9866129599120645, "learning_rate": 2.3918537524053886e-06, "loss": 0.7782, "step": 44765 }, { "epoch": 0.545622951019463, "grad_norm": 2.3883386760208096, "learning_rate": 2.3915330339961516e-06, "loss": 0.7322, "step": 44770 }, { "epoch": 0.5456838872436108, "grad_norm": 2.171535469863895, "learning_rate": 2.391212315586915e-06, "loss": 0.7042, "step": 44775 }, { "epoch": 0.5457448234677587, "grad_norm": 2.784905257583878, "learning_rate": 2.3908915971776785e-06, "loss": 0.7659, "step": 44780 }, { "epoch": 0.5458057596919065, "grad_norm": 3.660351535966124, "learning_rate": 2.3905708787684415e-06, "loss": 0.806, "step": 44785 }, { "epoch": 0.5458666959160543, "grad_norm": 2.904957846176801, "learning_rate": 2.390250160359205e-06, "loss": 0.7875, "step": 44790 }, { "epoch": 0.545927632140202, "grad_norm": 2.4768552830750052, "learning_rate": 2.389929441949968e-06, "loss": 0.7544, "step": 44795 }, { "epoch": 0.5459885683643498, "grad_norm": 2.746672305492391, "learning_rate": 2.3896087235407314e-06, "loss": 0.7475, "step": 44800 }, { "epoch": 0.5460495045884977, "grad_norm": 3.1189883997579995, "learning_rate": 2.389288005131495e-06, "loss": 0.7141, "step": 44805 }, { "epoch": 0.5461104408126455, "grad_norm": 2.4957426264965523, "learning_rate": 2.388967286722258e-06, "loss": 0.7293, "step": 44810 }, { "epoch": 0.5461713770367933, "grad_norm": 3.2849756680582325, "learning_rate": 2.3886465683130213e-06, "loss": 0.7124, "step": 44815 }, { "epoch": 0.5462323132609411, "grad_norm": 2.635121901548323, "learning_rate": 2.3883258499037847e-06, "loss": 0.7903, "step": 44820 }, { "epoch": 0.546293249485089, "grad_norm": 3.477818777179699, "learning_rate": 2.3880051314945477e-06, "loss": 0.7553, "step": 44825 }, { "epoch": 0.5463541857092367, "grad_norm": 2.428373404533343, "learning_rate": 2.387684413085311e-06, "loss": 0.7095, "step": 44830 }, { "epoch": 0.5464151219333845, "grad_norm": 2.4348364211758042, "learning_rate": 2.3873636946760746e-06, "loss": 0.8235, "step": 44835 }, { "epoch": 0.5464760581575323, "grad_norm": 2.6550657017030352, "learning_rate": 2.387042976266838e-06, "loss": 0.8352, "step": 44840 }, { "epoch": 0.5465369943816801, "grad_norm": 2.42569501516594, "learning_rate": 2.3867222578576015e-06, "loss": 0.7155, "step": 44845 }, { "epoch": 0.546597930605828, "grad_norm": 2.3474006121124065, "learning_rate": 2.3864015394483645e-06, "loss": 0.7286, "step": 44850 }, { "epoch": 0.5466588668299758, "grad_norm": 2.1829060895494026, "learning_rate": 2.386080821039128e-06, "loss": 0.7028, "step": 44855 }, { "epoch": 0.5467198030541236, "grad_norm": 2.351578000548662, "learning_rate": 2.3857601026298914e-06, "loss": 0.7313, "step": 44860 }, { "epoch": 0.5467807392782713, "grad_norm": 2.2339365341159, "learning_rate": 2.3854393842206544e-06, "loss": 0.6577, "step": 44865 }, { "epoch": 0.5468416755024191, "grad_norm": 2.5970518475019255, "learning_rate": 2.385118665811418e-06, "loss": 0.73, "step": 44870 }, { "epoch": 0.546902611726567, "grad_norm": 3.0461991461415185, "learning_rate": 2.384797947402181e-06, "loss": 0.771, "step": 44875 }, { "epoch": 0.5469635479507148, "grad_norm": 2.243093925982321, "learning_rate": 2.3844772289929443e-06, "loss": 0.6586, "step": 44880 }, { "epoch": 0.5470244841748626, "grad_norm": 2.182545024604443, "learning_rate": 2.3841565105837077e-06, "loss": 0.798, "step": 44885 }, { "epoch": 0.5470854203990104, "grad_norm": 2.165197414982624, "learning_rate": 2.3838357921744708e-06, "loss": 0.7257, "step": 44890 }, { "epoch": 0.5471463566231582, "grad_norm": 2.798466503412555, "learning_rate": 2.383515073765234e-06, "loss": 0.7718, "step": 44895 }, { "epoch": 0.547207292847306, "grad_norm": 2.6268396223456163, "learning_rate": 2.3831943553559976e-06, "loss": 0.6953, "step": 44900 }, { "epoch": 0.5472682290714538, "grad_norm": 2.5182223700679534, "learning_rate": 2.382873636946761e-06, "loss": 0.7033, "step": 44905 }, { "epoch": 0.5473291652956016, "grad_norm": 2.359630515627553, "learning_rate": 2.382552918537524e-06, "loss": 0.7451, "step": 44910 }, { "epoch": 0.5473901015197494, "grad_norm": 1.9470774020522674, "learning_rate": 2.3822322001282875e-06, "loss": 0.7297, "step": 44915 }, { "epoch": 0.5474510377438973, "grad_norm": 2.545433311707751, "learning_rate": 2.381911481719051e-06, "loss": 0.7538, "step": 44920 }, { "epoch": 0.5475119739680451, "grad_norm": 2.406437553339605, "learning_rate": 2.3815907633098144e-06, "loss": 0.8076, "step": 44925 }, { "epoch": 0.5475729101921929, "grad_norm": 2.2558602318461003, "learning_rate": 2.3812700449005774e-06, "loss": 0.786, "step": 44930 }, { "epoch": 0.5476338464163406, "grad_norm": 2.539668189657865, "learning_rate": 2.380949326491341e-06, "loss": 0.8432, "step": 44935 }, { "epoch": 0.5476947826404884, "grad_norm": 2.514996335292139, "learning_rate": 2.3806286080821043e-06, "loss": 0.6825, "step": 44940 }, { "epoch": 0.5477557188646363, "grad_norm": 1.8703664596515661, "learning_rate": 2.3803078896728673e-06, "loss": 0.6795, "step": 44945 }, { "epoch": 0.5478166550887841, "grad_norm": 3.8674789390487674, "learning_rate": 2.3799871712636308e-06, "loss": 0.7781, "step": 44950 }, { "epoch": 0.5478775913129319, "grad_norm": 2.474900269527604, "learning_rate": 2.379666452854394e-06, "loss": 0.7131, "step": 44955 }, { "epoch": 0.5479385275370797, "grad_norm": 2.3080757634951814, "learning_rate": 2.3793457344451572e-06, "loss": 0.7704, "step": 44960 }, { "epoch": 0.5479994637612275, "grad_norm": 2.670713836629146, "learning_rate": 2.3790250160359207e-06, "loss": 0.7056, "step": 44965 }, { "epoch": 0.5480603999853753, "grad_norm": 2.5094881381014646, "learning_rate": 2.3787042976266837e-06, "loss": 0.7014, "step": 44970 }, { "epoch": 0.5481213362095231, "grad_norm": 2.1262219697726152, "learning_rate": 2.378383579217447e-06, "loss": 0.6954, "step": 44975 }, { "epoch": 0.5481822724336709, "grad_norm": 2.2164130186609023, "learning_rate": 2.3780628608082106e-06, "loss": 0.7475, "step": 44980 }, { "epoch": 0.5482432086578187, "grad_norm": 2.7005735529230916, "learning_rate": 2.377742142398974e-06, "loss": 0.8123, "step": 44985 }, { "epoch": 0.5483041448819665, "grad_norm": 2.767475475357118, "learning_rate": 2.3774214239897374e-06, "loss": 0.7249, "step": 44990 }, { "epoch": 0.5483650811061144, "grad_norm": 2.4102413267461626, "learning_rate": 2.3771007055805005e-06, "loss": 0.7804, "step": 44995 }, { "epoch": 0.5484260173302622, "grad_norm": 2.4566083376455206, "learning_rate": 2.376779987171264e-06, "loss": 0.7061, "step": 45000 }, { "epoch": 0.5484869535544099, "grad_norm": 2.3791896751443065, "learning_rate": 2.3764592687620273e-06, "loss": 0.7756, "step": 45005 }, { "epoch": 0.5485478897785577, "grad_norm": 2.799208103176155, "learning_rate": 2.3761385503527904e-06, "loss": 0.798, "step": 45010 }, { "epoch": 0.5486088260027056, "grad_norm": 2.8175183265005663, "learning_rate": 2.375817831943554e-06, "loss": 0.7539, "step": 45015 }, { "epoch": 0.5486697622268534, "grad_norm": 1.9715537757377175, "learning_rate": 2.3754971135343172e-06, "loss": 0.7401, "step": 45020 }, { "epoch": 0.5487306984510012, "grad_norm": 3.0005007335446705, "learning_rate": 2.3751763951250803e-06, "loss": 0.7846, "step": 45025 }, { "epoch": 0.548791634675149, "grad_norm": 2.323183956386483, "learning_rate": 2.3748556767158437e-06, "loss": 0.7165, "step": 45030 }, { "epoch": 0.5488525708992968, "grad_norm": 2.150524690639934, "learning_rate": 2.374534958306607e-06, "loss": 0.6832, "step": 45035 }, { "epoch": 0.5489135071234446, "grad_norm": 2.632551984569431, "learning_rate": 2.37421423989737e-06, "loss": 0.7971, "step": 45040 }, { "epoch": 0.5489744433475924, "grad_norm": 2.341734559199391, "learning_rate": 2.3738935214881336e-06, "loss": 0.7207, "step": 45045 }, { "epoch": 0.5490353795717402, "grad_norm": 2.172921986031141, "learning_rate": 2.3735728030788966e-06, "loss": 0.7153, "step": 45050 }, { "epoch": 0.549096315795888, "grad_norm": 2.584985240340094, "learning_rate": 2.37325208466966e-06, "loss": 0.8305, "step": 45055 }, { "epoch": 0.5491572520200358, "grad_norm": 2.768256706177887, "learning_rate": 2.3729313662604235e-06, "loss": 0.7233, "step": 45060 }, { "epoch": 0.5492181882441837, "grad_norm": 2.3354952110046203, "learning_rate": 2.372610647851187e-06, "loss": 0.8131, "step": 45065 }, { "epoch": 0.5492791244683315, "grad_norm": 2.5724144767747155, "learning_rate": 2.3722899294419504e-06, "loss": 0.6646, "step": 45070 }, { "epoch": 0.5493400606924792, "grad_norm": 2.801149424611848, "learning_rate": 2.3719692110327134e-06, "loss": 0.7885, "step": 45075 }, { "epoch": 0.549400996916627, "grad_norm": 3.7488795457272635, "learning_rate": 2.371648492623477e-06, "loss": 0.7407, "step": 45080 }, { "epoch": 0.5494619331407748, "grad_norm": 2.9277173532387026, "learning_rate": 2.3713277742142403e-06, "loss": 0.7818, "step": 45085 }, { "epoch": 0.5495228693649227, "grad_norm": 2.0293822261849623, "learning_rate": 2.3710070558050033e-06, "loss": 0.6912, "step": 45090 }, { "epoch": 0.5495838055890705, "grad_norm": 3.91311913413344, "learning_rate": 2.3706863373957667e-06, "loss": 0.7379, "step": 45095 }, { "epoch": 0.5496447418132183, "grad_norm": 2.61343515489356, "learning_rate": 2.37036561898653e-06, "loss": 0.715, "step": 45100 }, { "epoch": 0.5497056780373661, "grad_norm": 2.9553818180452223, "learning_rate": 2.370044900577293e-06, "loss": 0.8283, "step": 45105 }, { "epoch": 0.5497666142615139, "grad_norm": 2.217852148836362, "learning_rate": 2.3697241821680566e-06, "loss": 0.7851, "step": 45110 }, { "epoch": 0.5498275504856617, "grad_norm": 3.051579320882834, "learning_rate": 2.36940346375882e-06, "loss": 0.836, "step": 45115 }, { "epoch": 0.5498884867098095, "grad_norm": 2.3681257189093774, "learning_rate": 2.369082745349583e-06, "loss": 0.7318, "step": 45120 }, { "epoch": 0.5499494229339573, "grad_norm": 2.9531876108636093, "learning_rate": 2.3687620269403465e-06, "loss": 0.7769, "step": 45125 }, { "epoch": 0.5500103591581051, "grad_norm": 3.088604133713601, "learning_rate": 2.36844130853111e-06, "loss": 0.7041, "step": 45130 }, { "epoch": 0.550071295382253, "grad_norm": 2.1161499417904666, "learning_rate": 2.368120590121873e-06, "loss": 0.7674, "step": 45135 }, { "epoch": 0.5501322316064008, "grad_norm": 2.285459509471517, "learning_rate": 2.3677998717126364e-06, "loss": 0.7097, "step": 45140 }, { "epoch": 0.5501931678305485, "grad_norm": 2.5021956174467674, "learning_rate": 2.3674791533034e-06, "loss": 0.7277, "step": 45145 }, { "epoch": 0.5502541040546963, "grad_norm": 2.3044497404983573, "learning_rate": 2.3671584348941633e-06, "loss": 0.729, "step": 45150 }, { "epoch": 0.5503150402788441, "grad_norm": 2.73573558743275, "learning_rate": 2.3668377164849267e-06, "loss": 0.7769, "step": 45155 }, { "epoch": 0.550375976502992, "grad_norm": 2.9101119986298745, "learning_rate": 2.3665169980756898e-06, "loss": 0.7412, "step": 45160 }, { "epoch": 0.5504369127271398, "grad_norm": 2.7802111505137965, "learning_rate": 2.366196279666453e-06, "loss": 0.7889, "step": 45165 }, { "epoch": 0.5504978489512876, "grad_norm": 2.396670534750954, "learning_rate": 2.3658755612572162e-06, "loss": 0.7811, "step": 45170 }, { "epoch": 0.5505587851754354, "grad_norm": 2.4745502083878668, "learning_rate": 2.3655548428479797e-06, "loss": 0.7632, "step": 45175 }, { "epoch": 0.5506197213995832, "grad_norm": 3.0659652787715506, "learning_rate": 2.365234124438743e-06, "loss": 0.8041, "step": 45180 }, { "epoch": 0.550680657623731, "grad_norm": 2.2766003499461536, "learning_rate": 2.364913406029506e-06, "loss": 0.7516, "step": 45185 }, { "epoch": 0.5507415938478788, "grad_norm": 2.2856950949565653, "learning_rate": 2.3645926876202696e-06, "loss": 0.7328, "step": 45190 }, { "epoch": 0.5508025300720266, "grad_norm": 2.098647947289243, "learning_rate": 2.364271969211033e-06, "loss": 0.7463, "step": 45195 }, { "epoch": 0.5508634662961744, "grad_norm": 2.2259960869744653, "learning_rate": 2.363951250801796e-06, "loss": 0.7202, "step": 45200 }, { "epoch": 0.5509244025203223, "grad_norm": 2.538929685933125, "learning_rate": 2.3636305323925595e-06, "loss": 0.7099, "step": 45205 }, { "epoch": 0.5509853387444701, "grad_norm": 2.300805687599514, "learning_rate": 2.363309813983323e-06, "loss": 0.7597, "step": 45210 }, { "epoch": 0.5510462749686178, "grad_norm": 2.664785520598835, "learning_rate": 2.3629890955740863e-06, "loss": 0.6486, "step": 45215 }, { "epoch": 0.5511072111927656, "grad_norm": 3.2549083748085703, "learning_rate": 2.3626683771648493e-06, "loss": 0.8027, "step": 45220 }, { "epoch": 0.5511681474169134, "grad_norm": 2.97460741113381, "learning_rate": 2.362347658755613e-06, "loss": 0.7856, "step": 45225 }, { "epoch": 0.5512290836410613, "grad_norm": 2.524990963176657, "learning_rate": 2.3620269403463762e-06, "loss": 0.7543, "step": 45230 }, { "epoch": 0.5512900198652091, "grad_norm": 2.8476942643424557, "learning_rate": 2.3617062219371397e-06, "loss": 0.7966, "step": 45235 }, { "epoch": 0.5513509560893569, "grad_norm": 2.2992402002650034, "learning_rate": 2.3613855035279027e-06, "loss": 0.7224, "step": 45240 }, { "epoch": 0.5514118923135047, "grad_norm": 2.452445176052006, "learning_rate": 2.361064785118666e-06, "loss": 0.7727, "step": 45245 }, { "epoch": 0.5514728285376524, "grad_norm": 2.3141203194674027, "learning_rate": 2.360744066709429e-06, "loss": 0.7511, "step": 45250 }, { "epoch": 0.5515337647618003, "grad_norm": 2.2416377908265255, "learning_rate": 2.3604233483001926e-06, "loss": 0.7439, "step": 45255 }, { "epoch": 0.5515947009859481, "grad_norm": 2.272922490389317, "learning_rate": 2.360102629890956e-06, "loss": 0.7816, "step": 45260 }, { "epoch": 0.5516556372100959, "grad_norm": 2.05237326457088, "learning_rate": 2.359781911481719e-06, "loss": 0.7255, "step": 45265 }, { "epoch": 0.5517165734342437, "grad_norm": 2.282929605305249, "learning_rate": 2.3594611930724825e-06, "loss": 0.6877, "step": 45270 }, { "epoch": 0.5517775096583916, "grad_norm": 2.231394210185499, "learning_rate": 2.359140474663246e-06, "loss": 0.7932, "step": 45275 }, { "epoch": 0.5518384458825394, "grad_norm": 2.380428631473645, "learning_rate": 2.358819756254009e-06, "loss": 0.7014, "step": 45280 }, { "epoch": 0.5518993821066871, "grad_norm": 2.1803459874078124, "learning_rate": 2.3584990378447724e-06, "loss": 0.722, "step": 45285 }, { "epoch": 0.5519603183308349, "grad_norm": 3.626989401587682, "learning_rate": 2.358178319435536e-06, "loss": 0.7076, "step": 45290 }, { "epoch": 0.5520212545549827, "grad_norm": 3.962748650979795, "learning_rate": 2.3578576010262993e-06, "loss": 0.751, "step": 45295 }, { "epoch": 0.5520821907791306, "grad_norm": 2.6849735600900884, "learning_rate": 2.3575368826170623e-06, "loss": 0.7816, "step": 45300 }, { "epoch": 0.5521431270032784, "grad_norm": 2.5085583988579576, "learning_rate": 2.3572161642078257e-06, "loss": 0.7548, "step": 45305 }, { "epoch": 0.5522040632274262, "grad_norm": 2.489757180135277, "learning_rate": 2.356895445798589e-06, "loss": 0.7756, "step": 45310 }, { "epoch": 0.552264999451574, "grad_norm": 2.7017933110291636, "learning_rate": 2.3565747273893526e-06, "loss": 0.7131, "step": 45315 }, { "epoch": 0.5523259356757217, "grad_norm": 2.255602778028945, "learning_rate": 2.3562540089801156e-06, "loss": 0.726, "step": 45320 }, { "epoch": 0.5523868718998696, "grad_norm": 2.916459162459714, "learning_rate": 2.355933290570879e-06, "loss": 0.7596, "step": 45325 }, { "epoch": 0.5524478081240174, "grad_norm": 3.7170898613972816, "learning_rate": 2.3556125721616425e-06, "loss": 0.7693, "step": 45330 }, { "epoch": 0.5525087443481652, "grad_norm": 2.199409029445237, "learning_rate": 2.3552918537524055e-06, "loss": 0.7234, "step": 45335 }, { "epoch": 0.552569680572313, "grad_norm": 2.4339497111535797, "learning_rate": 2.354971135343169e-06, "loss": 0.7702, "step": 45340 }, { "epoch": 0.5526306167964609, "grad_norm": 3.1326247159527663, "learning_rate": 2.354650416933932e-06, "loss": 0.6529, "step": 45345 }, { "epoch": 0.5526915530206087, "grad_norm": 2.7550177788296626, "learning_rate": 2.3543296985246954e-06, "loss": 0.7151, "step": 45350 }, { "epoch": 0.5527524892447564, "grad_norm": 2.3039235513206173, "learning_rate": 2.354008980115459e-06, "loss": 0.7282, "step": 45355 }, { "epoch": 0.5528134254689042, "grad_norm": 2.2639059470248295, "learning_rate": 2.353688261706222e-06, "loss": 0.763, "step": 45360 }, { "epoch": 0.552874361693052, "grad_norm": 3.088682540612708, "learning_rate": 2.3533675432969853e-06, "loss": 0.7801, "step": 45365 }, { "epoch": 0.5529352979171999, "grad_norm": 2.040791898082836, "learning_rate": 2.3530468248877487e-06, "loss": 0.7395, "step": 45370 }, { "epoch": 0.5529962341413477, "grad_norm": 2.7344545929674724, "learning_rate": 2.352726106478512e-06, "loss": 0.7779, "step": 45375 }, { "epoch": 0.5530571703654955, "grad_norm": 2.2368134550060392, "learning_rate": 2.3524053880692756e-06, "loss": 0.7193, "step": 45380 }, { "epoch": 0.5531181065896433, "grad_norm": 2.2681212408945806, "learning_rate": 2.3520846696600386e-06, "loss": 0.6976, "step": 45385 }, { "epoch": 0.553179042813791, "grad_norm": 2.665656209652639, "learning_rate": 2.351763951250802e-06, "loss": 0.7129, "step": 45390 }, { "epoch": 0.5532399790379389, "grad_norm": 2.9329433693051405, "learning_rate": 2.3514432328415655e-06, "loss": 0.7234, "step": 45395 }, { "epoch": 0.5533009152620867, "grad_norm": 2.2790111429997104, "learning_rate": 2.3511225144323285e-06, "loss": 0.755, "step": 45400 }, { "epoch": 0.5533618514862345, "grad_norm": 2.6428894332469057, "learning_rate": 2.350801796023092e-06, "loss": 0.7391, "step": 45405 }, { "epoch": 0.5534227877103823, "grad_norm": 2.2526053449828756, "learning_rate": 2.3504810776138554e-06, "loss": 0.7266, "step": 45410 }, { "epoch": 0.5534837239345302, "grad_norm": 3.170884765113748, "learning_rate": 2.3501603592046184e-06, "loss": 0.8025, "step": 45415 }, { "epoch": 0.553544660158678, "grad_norm": 3.2043377887954727, "learning_rate": 2.349839640795382e-06, "loss": 0.7467, "step": 45420 }, { "epoch": 0.5536055963828257, "grad_norm": 2.9260045300238313, "learning_rate": 2.349518922386145e-06, "loss": 0.6938, "step": 45425 }, { "epoch": 0.5536665326069735, "grad_norm": 2.253091803141945, "learning_rate": 2.3491982039769083e-06, "loss": 0.7106, "step": 45430 }, { "epoch": 0.5537274688311213, "grad_norm": 2.714819660637932, "learning_rate": 2.3488774855676718e-06, "loss": 0.7784, "step": 45435 }, { "epoch": 0.5537884050552692, "grad_norm": 2.31023592420952, "learning_rate": 2.348556767158435e-06, "loss": 0.7026, "step": 45440 }, { "epoch": 0.553849341279417, "grad_norm": 2.116815063963689, "learning_rate": 2.3482360487491982e-06, "loss": 0.755, "step": 45445 }, { "epoch": 0.5539102775035648, "grad_norm": 2.740254105958691, "learning_rate": 2.3479153303399617e-06, "loss": 0.7702, "step": 45450 }, { "epoch": 0.5539712137277126, "grad_norm": 2.996493215452792, "learning_rate": 2.347594611930725e-06, "loss": 0.6585, "step": 45455 }, { "epoch": 0.5540321499518603, "grad_norm": 2.4706230794750375, "learning_rate": 2.3472738935214886e-06, "loss": 0.7346, "step": 45460 }, { "epoch": 0.5540930861760082, "grad_norm": 2.459857289455111, "learning_rate": 2.3469531751122516e-06, "loss": 0.7857, "step": 45465 }, { "epoch": 0.554154022400156, "grad_norm": 2.2079664710960674, "learning_rate": 2.346632456703015e-06, "loss": 0.7762, "step": 45470 }, { "epoch": 0.5542149586243038, "grad_norm": 2.4459885932369323, "learning_rate": 2.3463117382937785e-06, "loss": 0.7428, "step": 45475 }, { "epoch": 0.5542758948484516, "grad_norm": 2.61451129213076, "learning_rate": 2.3459910198845415e-06, "loss": 0.7795, "step": 45480 }, { "epoch": 0.5543368310725995, "grad_norm": 3.0270350414601195, "learning_rate": 2.345670301475305e-06, "loss": 0.7432, "step": 45485 }, { "epoch": 0.5543977672967473, "grad_norm": 2.502282736238508, "learning_rate": 2.3453495830660683e-06, "loss": 0.678, "step": 45490 }, { "epoch": 0.554458703520895, "grad_norm": 2.7411963730157014, "learning_rate": 2.3450288646568314e-06, "loss": 0.733, "step": 45495 }, { "epoch": 0.5545196397450428, "grad_norm": 2.482430669114225, "learning_rate": 2.344708146247595e-06, "loss": 0.761, "step": 45500 }, { "epoch": 0.5545805759691906, "grad_norm": 2.2447216953167413, "learning_rate": 2.344387427838358e-06, "loss": 0.7019, "step": 45505 }, { "epoch": 0.5546415121933385, "grad_norm": 4.261060575822449, "learning_rate": 2.3440667094291213e-06, "loss": 0.7673, "step": 45510 }, { "epoch": 0.5547024484174863, "grad_norm": 2.190910751018937, "learning_rate": 2.3437459910198847e-06, "loss": 0.7359, "step": 45515 }, { "epoch": 0.5547633846416341, "grad_norm": 2.2145964829530924, "learning_rate": 2.343425272610648e-06, "loss": 0.7496, "step": 45520 }, { "epoch": 0.5548243208657819, "grad_norm": 2.336282516461856, "learning_rate": 2.343104554201411e-06, "loss": 0.7508, "step": 45525 }, { "epoch": 0.5548852570899296, "grad_norm": 2.8501344240920004, "learning_rate": 2.3427838357921746e-06, "loss": 0.7386, "step": 45530 }, { "epoch": 0.5549461933140775, "grad_norm": 2.5860685100177725, "learning_rate": 2.342463117382938e-06, "loss": 0.7017, "step": 45535 }, { "epoch": 0.5550071295382253, "grad_norm": 2.566947053939513, "learning_rate": 2.3421423989737015e-06, "loss": 0.753, "step": 45540 }, { "epoch": 0.5550680657623731, "grad_norm": 2.4785275438298124, "learning_rate": 2.3418216805644645e-06, "loss": 0.87, "step": 45545 }, { "epoch": 0.5551290019865209, "grad_norm": 2.694175672361835, "learning_rate": 2.341500962155228e-06, "loss": 0.8104, "step": 45550 }, { "epoch": 0.5551899382106688, "grad_norm": 2.335949015145824, "learning_rate": 2.3411802437459914e-06, "loss": 0.7756, "step": 45555 }, { "epoch": 0.5552508744348165, "grad_norm": 2.3789706347953192, "learning_rate": 2.3408595253367544e-06, "loss": 0.7162, "step": 45560 }, { "epoch": 0.5553118106589643, "grad_norm": 2.963932975680828, "learning_rate": 2.340538806927518e-06, "loss": 0.8092, "step": 45565 }, { "epoch": 0.5553727468831121, "grad_norm": 3.283275950867324, "learning_rate": 2.3402180885182813e-06, "loss": 0.7062, "step": 45570 }, { "epoch": 0.5554336831072599, "grad_norm": 2.542291056419951, "learning_rate": 2.3398973701090443e-06, "loss": 0.7454, "step": 45575 }, { "epoch": 0.5554946193314078, "grad_norm": 2.6177300589783625, "learning_rate": 2.3395766516998077e-06, "loss": 0.7141, "step": 45580 }, { "epoch": 0.5555555555555556, "grad_norm": 2.4133911106342407, "learning_rate": 2.3392559332905707e-06, "loss": 0.7672, "step": 45585 }, { "epoch": 0.5556164917797034, "grad_norm": 3.1875232180549125, "learning_rate": 2.338935214881334e-06, "loss": 0.7684, "step": 45590 }, { "epoch": 0.5556774280038511, "grad_norm": 3.1040740434511713, "learning_rate": 2.3386144964720976e-06, "loss": 0.8023, "step": 45595 }, { "epoch": 0.5557383642279989, "grad_norm": 3.348415189963358, "learning_rate": 2.338293778062861e-06, "loss": 0.7832, "step": 45600 }, { "epoch": 0.5557993004521468, "grad_norm": 2.7141924968580726, "learning_rate": 2.3379730596536245e-06, "loss": 0.7236, "step": 45605 }, { "epoch": 0.5558602366762946, "grad_norm": 2.334508740207406, "learning_rate": 2.3376523412443875e-06, "loss": 0.7938, "step": 45610 }, { "epoch": 0.5559211729004424, "grad_norm": 2.7777414846413753, "learning_rate": 2.337331622835151e-06, "loss": 0.8418, "step": 45615 }, { "epoch": 0.5559821091245902, "grad_norm": 3.075537260723993, "learning_rate": 2.3370109044259144e-06, "loss": 0.7519, "step": 45620 }, { "epoch": 0.556043045348738, "grad_norm": 2.358041979837512, "learning_rate": 2.336690186016678e-06, "loss": 0.7268, "step": 45625 }, { "epoch": 0.5561039815728858, "grad_norm": 2.225060220030871, "learning_rate": 2.336369467607441e-06, "loss": 0.7091, "step": 45630 }, { "epoch": 0.5561649177970336, "grad_norm": 2.314778899767617, "learning_rate": 2.3360487491982043e-06, "loss": 0.6669, "step": 45635 }, { "epoch": 0.5562258540211814, "grad_norm": 2.960636838260708, "learning_rate": 2.3357280307889673e-06, "loss": 0.7254, "step": 45640 }, { "epoch": 0.5562867902453292, "grad_norm": 2.8670101677684365, "learning_rate": 2.3354073123797308e-06, "loss": 0.769, "step": 45645 }, { "epoch": 0.556347726469477, "grad_norm": 2.4877451527720034, "learning_rate": 2.335086593970494e-06, "loss": 0.6956, "step": 45650 }, { "epoch": 0.5564086626936249, "grad_norm": 2.2424602553787367, "learning_rate": 2.3347658755612572e-06, "loss": 0.7632, "step": 45655 }, { "epoch": 0.5564695989177727, "grad_norm": 2.3911289626030787, "learning_rate": 2.3344451571520207e-06, "loss": 0.744, "step": 45660 }, { "epoch": 0.5565305351419204, "grad_norm": 2.46613388099288, "learning_rate": 2.334124438742784e-06, "loss": 0.7103, "step": 45665 }, { "epoch": 0.5565914713660682, "grad_norm": 2.4860993731280265, "learning_rate": 2.333803720333547e-06, "loss": 0.7807, "step": 45670 }, { "epoch": 0.5566524075902161, "grad_norm": 2.946513580548998, "learning_rate": 2.3334830019243106e-06, "loss": 0.7714, "step": 45675 }, { "epoch": 0.5567133438143639, "grad_norm": 2.33726602026483, "learning_rate": 2.333162283515074e-06, "loss": 0.7765, "step": 45680 }, { "epoch": 0.5567742800385117, "grad_norm": 2.2570525862934105, "learning_rate": 2.3328415651058374e-06, "loss": 0.7484, "step": 45685 }, { "epoch": 0.5568352162626595, "grad_norm": 2.0276269044118806, "learning_rate": 2.332520846696601e-06, "loss": 0.7007, "step": 45690 }, { "epoch": 0.5568961524868073, "grad_norm": 2.738544943269463, "learning_rate": 2.332200128287364e-06, "loss": 0.7467, "step": 45695 }, { "epoch": 0.5569570887109551, "grad_norm": 2.4848676432523, "learning_rate": 2.3318794098781273e-06, "loss": 0.8054, "step": 45700 }, { "epoch": 0.5570180249351029, "grad_norm": 2.810743144793586, "learning_rate": 2.3315586914688908e-06, "loss": 0.7887, "step": 45705 }, { "epoch": 0.5570789611592507, "grad_norm": 2.0997112349303464, "learning_rate": 2.331237973059654e-06, "loss": 0.7343, "step": 45710 }, { "epoch": 0.5571398973833985, "grad_norm": 2.6748343410508957, "learning_rate": 2.3309172546504172e-06, "loss": 0.7979, "step": 45715 }, { "epoch": 0.5572008336075464, "grad_norm": 2.166860301506305, "learning_rate": 2.3305965362411802e-06, "loss": 0.7229, "step": 45720 }, { "epoch": 0.5572617698316942, "grad_norm": 2.244734862815856, "learning_rate": 2.3302758178319437e-06, "loss": 0.7649, "step": 45725 }, { "epoch": 0.557322706055842, "grad_norm": 2.5753517628141354, "learning_rate": 2.329955099422707e-06, "loss": 0.7512, "step": 45730 }, { "epoch": 0.5573836422799897, "grad_norm": 2.3345867832904217, "learning_rate": 2.32963438101347e-06, "loss": 0.6842, "step": 45735 }, { "epoch": 0.5574445785041375, "grad_norm": 2.599693744900944, "learning_rate": 2.3293136626042336e-06, "loss": 0.8087, "step": 45740 }, { "epoch": 0.5575055147282854, "grad_norm": 2.376562706745523, "learning_rate": 2.328992944194997e-06, "loss": 0.7578, "step": 45745 }, { "epoch": 0.5575664509524332, "grad_norm": 2.3013272354355063, "learning_rate": 2.32867222578576e-06, "loss": 0.7309, "step": 45750 }, { "epoch": 0.557627387176581, "grad_norm": 2.557254992876719, "learning_rate": 2.3283515073765235e-06, "loss": 0.7326, "step": 45755 }, { "epoch": 0.5576883234007288, "grad_norm": 2.047857043898348, "learning_rate": 2.328030788967287e-06, "loss": 0.7895, "step": 45760 }, { "epoch": 0.5577492596248766, "grad_norm": 2.5352971945412226, "learning_rate": 2.3277100705580504e-06, "loss": 0.837, "step": 45765 }, { "epoch": 0.5578101958490244, "grad_norm": 2.2763192533082046, "learning_rate": 2.327389352148814e-06, "loss": 0.7601, "step": 45770 }, { "epoch": 0.5578711320731722, "grad_norm": 3.1482179272265025, "learning_rate": 2.327068633739577e-06, "loss": 0.6697, "step": 45775 }, { "epoch": 0.55793206829732, "grad_norm": 2.303538868721236, "learning_rate": 2.3267479153303403e-06, "loss": 0.8002, "step": 45780 }, { "epoch": 0.5579930045214678, "grad_norm": 2.2635503033000766, "learning_rate": 2.3264271969211037e-06, "loss": 0.7493, "step": 45785 }, { "epoch": 0.5580539407456157, "grad_norm": 2.6418198686399954, "learning_rate": 2.3261064785118667e-06, "loss": 0.8326, "step": 45790 }, { "epoch": 0.5581148769697635, "grad_norm": 3.000354469050242, "learning_rate": 2.32578576010263e-06, "loss": 0.746, "step": 45795 }, { "epoch": 0.5581758131939113, "grad_norm": 2.856047141785265, "learning_rate": 2.325465041693393e-06, "loss": 0.7648, "step": 45800 }, { "epoch": 0.558236749418059, "grad_norm": 2.6270706358381988, "learning_rate": 2.3251443232841566e-06, "loss": 0.7761, "step": 45805 }, { "epoch": 0.5582976856422068, "grad_norm": 2.511402446102012, "learning_rate": 2.32482360487492e-06, "loss": 0.6884, "step": 45810 }, { "epoch": 0.5583586218663547, "grad_norm": 3.1325289111395325, "learning_rate": 2.324502886465683e-06, "loss": 0.7517, "step": 45815 }, { "epoch": 0.5584195580905025, "grad_norm": 2.8891943498767554, "learning_rate": 2.3241821680564465e-06, "loss": 0.6722, "step": 45820 }, { "epoch": 0.5584804943146503, "grad_norm": 4.540087108101399, "learning_rate": 2.32386144964721e-06, "loss": 0.7416, "step": 45825 }, { "epoch": 0.5585414305387981, "grad_norm": 3.0541025048800763, "learning_rate": 2.3235407312379734e-06, "loss": 0.7847, "step": 45830 }, { "epoch": 0.558602366762946, "grad_norm": 2.742608575916886, "learning_rate": 2.3232200128287364e-06, "loss": 0.7339, "step": 45835 }, { "epoch": 0.5586633029870937, "grad_norm": 2.4522522302646492, "learning_rate": 2.3228992944195e-06, "loss": 0.7158, "step": 45840 }, { "epoch": 0.5587242392112415, "grad_norm": 2.436740978539163, "learning_rate": 2.3225785760102633e-06, "loss": 0.8086, "step": 45845 }, { "epoch": 0.5587851754353893, "grad_norm": 2.5254301383347073, "learning_rate": 2.3222578576010267e-06, "loss": 0.7165, "step": 45850 }, { "epoch": 0.5588461116595371, "grad_norm": 2.4113022489291223, "learning_rate": 2.3219371391917897e-06, "loss": 0.7337, "step": 45855 }, { "epoch": 0.558907047883685, "grad_norm": 2.8167217818591297, "learning_rate": 2.321616420782553e-06, "loss": 0.7047, "step": 45860 }, { "epoch": 0.5589679841078328, "grad_norm": 2.4573303709811065, "learning_rate": 2.3212957023733166e-06, "loss": 0.7376, "step": 45865 }, { "epoch": 0.5590289203319806, "grad_norm": 2.708497757518077, "learning_rate": 2.3209749839640796e-06, "loss": 0.7688, "step": 45870 }, { "epoch": 0.5590898565561283, "grad_norm": 2.6299451313393765, "learning_rate": 2.320654265554843e-06, "loss": 0.7451, "step": 45875 }, { "epoch": 0.5591507927802761, "grad_norm": 2.6430158693999966, "learning_rate": 2.320333547145606e-06, "loss": 0.7418, "step": 45880 }, { "epoch": 0.559211729004424, "grad_norm": 2.8683846744482993, "learning_rate": 2.3200128287363695e-06, "loss": 0.7382, "step": 45885 }, { "epoch": 0.5592726652285718, "grad_norm": 3.720832016605475, "learning_rate": 2.319692110327133e-06, "loss": 0.789, "step": 45890 }, { "epoch": 0.5593336014527196, "grad_norm": 2.5821882265430633, "learning_rate": 2.319371391917896e-06, "loss": 0.7466, "step": 45895 }, { "epoch": 0.5593945376768674, "grad_norm": 2.664963634672814, "learning_rate": 2.3190506735086594e-06, "loss": 0.7774, "step": 45900 }, { "epoch": 0.5594554739010152, "grad_norm": 2.642395720169226, "learning_rate": 2.318729955099423e-06, "loss": 0.7165, "step": 45905 }, { "epoch": 0.559516410125163, "grad_norm": 2.469683959768483, "learning_rate": 2.3184092366901863e-06, "loss": 0.7482, "step": 45910 }, { "epoch": 0.5595773463493108, "grad_norm": 3.406751375169502, "learning_rate": 2.3180885182809498e-06, "loss": 0.8096, "step": 45915 }, { "epoch": 0.5596382825734586, "grad_norm": 2.4434600062730842, "learning_rate": 2.3177677998717128e-06, "loss": 0.7282, "step": 45920 }, { "epoch": 0.5596992187976064, "grad_norm": 2.5120021432488384, "learning_rate": 2.3174470814624762e-06, "loss": 0.7342, "step": 45925 }, { "epoch": 0.5597601550217542, "grad_norm": 2.2107660425465303, "learning_rate": 2.3171263630532397e-06, "loss": 0.7556, "step": 45930 }, { "epoch": 0.5598210912459021, "grad_norm": 2.2603739829379945, "learning_rate": 2.3168056446440027e-06, "loss": 0.7087, "step": 45935 }, { "epoch": 0.5598820274700499, "grad_norm": 2.3266423319911036, "learning_rate": 2.316484926234766e-06, "loss": 0.7668, "step": 45940 }, { "epoch": 0.5599429636941976, "grad_norm": 2.5903086809292573, "learning_rate": 2.3161642078255296e-06, "loss": 0.7029, "step": 45945 }, { "epoch": 0.5600038999183454, "grad_norm": 2.0898506642626464, "learning_rate": 2.3158434894162926e-06, "loss": 0.7189, "step": 45950 }, { "epoch": 0.5600648361424932, "grad_norm": 2.0281782404636837, "learning_rate": 2.315522771007056e-06, "loss": 0.712, "step": 45955 }, { "epoch": 0.5601257723666411, "grad_norm": 2.1081783712198354, "learning_rate": 2.3152020525978195e-06, "loss": 0.7384, "step": 45960 }, { "epoch": 0.5601867085907889, "grad_norm": 2.373111865401689, "learning_rate": 2.3148813341885825e-06, "loss": 0.77, "step": 45965 }, { "epoch": 0.5602476448149367, "grad_norm": 2.499913568577867, "learning_rate": 2.314560615779346e-06, "loss": 0.8023, "step": 45970 }, { "epoch": 0.5603085810390845, "grad_norm": 3.4242174875582005, "learning_rate": 2.314239897370109e-06, "loss": 0.8246, "step": 45975 }, { "epoch": 0.5603695172632323, "grad_norm": 2.4373606669078103, "learning_rate": 2.3139191789608724e-06, "loss": 0.7277, "step": 45980 }, { "epoch": 0.5604304534873801, "grad_norm": 2.661832787159401, "learning_rate": 2.313598460551636e-06, "loss": 0.8247, "step": 45985 }, { "epoch": 0.5604913897115279, "grad_norm": 2.6660349287871656, "learning_rate": 2.3132777421423992e-06, "loss": 0.6959, "step": 45990 }, { "epoch": 0.5605523259356757, "grad_norm": 2.3834599189374917, "learning_rate": 2.3129570237331627e-06, "loss": 0.7613, "step": 45995 }, { "epoch": 0.5606132621598235, "grad_norm": 2.955024201531471, "learning_rate": 2.3126363053239257e-06, "loss": 0.7737, "step": 46000 }, { "epoch": 0.5606741983839714, "grad_norm": 2.4893277809047585, "learning_rate": 2.312315586914689e-06, "loss": 0.7365, "step": 46005 }, { "epoch": 0.5607351346081192, "grad_norm": 2.3871771351743267, "learning_rate": 2.3119948685054526e-06, "loss": 0.6884, "step": 46010 }, { "epoch": 0.5607960708322669, "grad_norm": 2.9873997397975653, "learning_rate": 2.3116741500962156e-06, "loss": 0.8001, "step": 46015 }, { "epoch": 0.5608570070564147, "grad_norm": 2.0646897290056394, "learning_rate": 2.311353431686979e-06, "loss": 0.7411, "step": 46020 }, { "epoch": 0.5609179432805625, "grad_norm": 3.081254574114208, "learning_rate": 2.3110327132777425e-06, "loss": 0.7583, "step": 46025 }, { "epoch": 0.5609788795047104, "grad_norm": 2.225319474838813, "learning_rate": 2.3107119948685055e-06, "loss": 0.7239, "step": 46030 }, { "epoch": 0.5610398157288582, "grad_norm": 2.31236154286047, "learning_rate": 2.310391276459269e-06, "loss": 0.7761, "step": 46035 }, { "epoch": 0.561100751953006, "grad_norm": 2.7028498868874116, "learning_rate": 2.3100705580500324e-06, "loss": 0.6982, "step": 46040 }, { "epoch": 0.5611616881771538, "grad_norm": 4.58455496258289, "learning_rate": 2.3097498396407954e-06, "loss": 0.7436, "step": 46045 }, { "epoch": 0.5612226244013016, "grad_norm": 2.4829884691869832, "learning_rate": 2.309429121231559e-06, "loss": 0.7271, "step": 46050 }, { "epoch": 0.5612835606254494, "grad_norm": 2.7352786556544784, "learning_rate": 2.3091084028223223e-06, "loss": 0.8028, "step": 46055 }, { "epoch": 0.5613444968495972, "grad_norm": 2.746182616430792, "learning_rate": 2.3087876844130853e-06, "loss": 0.7747, "step": 46060 }, { "epoch": 0.561405433073745, "grad_norm": 2.6730362515513577, "learning_rate": 2.3084669660038487e-06, "loss": 0.7801, "step": 46065 }, { "epoch": 0.5614663692978928, "grad_norm": 2.5486017312844247, "learning_rate": 2.308146247594612e-06, "loss": 0.7558, "step": 46070 }, { "epoch": 0.5615273055220407, "grad_norm": 2.62695192065374, "learning_rate": 2.3078255291853756e-06, "loss": 0.6926, "step": 46075 }, { "epoch": 0.5615882417461885, "grad_norm": 2.6170839758534967, "learning_rate": 2.307504810776139e-06, "loss": 0.7767, "step": 46080 }, { "epoch": 0.5616491779703362, "grad_norm": 3.6678686500638524, "learning_rate": 2.307184092366902e-06, "loss": 0.7464, "step": 46085 }, { "epoch": 0.561710114194484, "grad_norm": 2.8948329113141207, "learning_rate": 2.3068633739576655e-06, "loss": 0.7798, "step": 46090 }, { "epoch": 0.5617710504186318, "grad_norm": 2.070653681544423, "learning_rate": 2.3065426555484285e-06, "loss": 0.672, "step": 46095 }, { "epoch": 0.5618319866427797, "grad_norm": 2.1135517327344573, "learning_rate": 2.306221937139192e-06, "loss": 0.7658, "step": 46100 }, { "epoch": 0.5618929228669275, "grad_norm": 2.7400309194296923, "learning_rate": 2.3059012187299554e-06, "loss": 0.8352, "step": 46105 }, { "epoch": 0.5619538590910753, "grad_norm": 2.1947011467177737, "learning_rate": 2.3055805003207184e-06, "loss": 0.7567, "step": 46110 }, { "epoch": 0.5620147953152231, "grad_norm": 2.742804831098212, "learning_rate": 2.305259781911482e-06, "loss": 0.7465, "step": 46115 }, { "epoch": 0.5620757315393708, "grad_norm": 2.348900914134541, "learning_rate": 2.3049390635022453e-06, "loss": 0.7574, "step": 46120 }, { "epoch": 0.5621366677635187, "grad_norm": 2.0921321949816414, "learning_rate": 2.3046183450930083e-06, "loss": 0.7608, "step": 46125 }, { "epoch": 0.5621976039876665, "grad_norm": 1.979342961742168, "learning_rate": 2.3042976266837718e-06, "loss": 0.681, "step": 46130 }, { "epoch": 0.5622585402118143, "grad_norm": 2.3403492372733967, "learning_rate": 2.303976908274535e-06, "loss": 0.7357, "step": 46135 }, { "epoch": 0.5623194764359621, "grad_norm": 2.4148011963274914, "learning_rate": 2.3036561898652982e-06, "loss": 0.7273, "step": 46140 }, { "epoch": 0.56238041266011, "grad_norm": 2.3437390366066877, "learning_rate": 2.3033354714560617e-06, "loss": 0.7407, "step": 46145 }, { "epoch": 0.5624413488842578, "grad_norm": 2.6277330185756864, "learning_rate": 2.303014753046825e-06, "loss": 0.7428, "step": 46150 }, { "epoch": 0.5625022851084055, "grad_norm": 2.500461719939209, "learning_rate": 2.3026940346375885e-06, "loss": 0.7958, "step": 46155 }, { "epoch": 0.5625632213325533, "grad_norm": 3.0577978297164328, "learning_rate": 2.302373316228352e-06, "loss": 0.7523, "step": 46160 }, { "epoch": 0.5626241575567011, "grad_norm": 2.0067823806482132, "learning_rate": 2.302052597819115e-06, "loss": 0.7276, "step": 46165 }, { "epoch": 0.562685093780849, "grad_norm": 2.819882563936202, "learning_rate": 2.3017318794098784e-06, "loss": 0.7414, "step": 46170 }, { "epoch": 0.5627460300049968, "grad_norm": 2.130527454476886, "learning_rate": 2.3014111610006415e-06, "loss": 0.7372, "step": 46175 }, { "epoch": 0.5628069662291446, "grad_norm": 2.351662109551309, "learning_rate": 2.301090442591405e-06, "loss": 0.671, "step": 46180 }, { "epoch": 0.5628679024532924, "grad_norm": 2.2735084961843888, "learning_rate": 2.3007697241821683e-06, "loss": 0.6994, "step": 46185 }, { "epoch": 0.5629288386774401, "grad_norm": 2.967007786091649, "learning_rate": 2.3004490057729314e-06, "loss": 0.7121, "step": 46190 }, { "epoch": 0.562989774901588, "grad_norm": 4.042779587642708, "learning_rate": 2.300128287363695e-06, "loss": 0.805, "step": 46195 }, { "epoch": 0.5630507111257358, "grad_norm": 2.7919976023990203, "learning_rate": 2.2998075689544582e-06, "loss": 0.7446, "step": 46200 }, { "epoch": 0.5631116473498836, "grad_norm": 2.486510824755283, "learning_rate": 2.2994868505452213e-06, "loss": 0.7402, "step": 46205 }, { "epoch": 0.5631725835740314, "grad_norm": 2.237864435932412, "learning_rate": 2.2991661321359847e-06, "loss": 0.7393, "step": 46210 }, { "epoch": 0.5632335197981793, "grad_norm": 2.471131293623965, "learning_rate": 2.298845413726748e-06, "loss": 0.7132, "step": 46215 }, { "epoch": 0.5632944560223271, "grad_norm": 3.5271224533979524, "learning_rate": 2.2985246953175116e-06, "loss": 0.8006, "step": 46220 }, { "epoch": 0.5633553922464748, "grad_norm": 2.3011952022533246, "learning_rate": 2.2982039769082746e-06, "loss": 0.7668, "step": 46225 }, { "epoch": 0.5634163284706226, "grad_norm": 2.541705408805143, "learning_rate": 2.297883258499038e-06, "loss": 0.6895, "step": 46230 }, { "epoch": 0.5634772646947704, "grad_norm": 2.4348577296870477, "learning_rate": 2.2975625400898015e-06, "loss": 0.707, "step": 46235 }, { "epoch": 0.5635382009189183, "grad_norm": 2.5024040384993644, "learning_rate": 2.297241821680565e-06, "loss": 0.7437, "step": 46240 }, { "epoch": 0.5635991371430661, "grad_norm": 2.0657304433984853, "learning_rate": 2.296921103271328e-06, "loss": 0.6924, "step": 46245 }, { "epoch": 0.5636600733672139, "grad_norm": 2.321395081098673, "learning_rate": 2.2966003848620914e-06, "loss": 0.7864, "step": 46250 }, { "epoch": 0.5637210095913617, "grad_norm": 2.789212836274482, "learning_rate": 2.296279666452855e-06, "loss": 0.7278, "step": 46255 }, { "epoch": 0.5637819458155094, "grad_norm": 2.07639457222761, "learning_rate": 2.295958948043618e-06, "loss": 0.6504, "step": 46260 }, { "epoch": 0.5638428820396573, "grad_norm": 2.9837553710675895, "learning_rate": 2.2956382296343813e-06, "loss": 0.6858, "step": 46265 }, { "epoch": 0.5639038182638051, "grad_norm": 2.484552428187635, "learning_rate": 2.2953175112251443e-06, "loss": 0.7821, "step": 46270 }, { "epoch": 0.5639647544879529, "grad_norm": 2.753862694394565, "learning_rate": 2.2949967928159077e-06, "loss": 0.7529, "step": 46275 }, { "epoch": 0.5640256907121007, "grad_norm": 2.3352799862713405, "learning_rate": 2.294676074406671e-06, "loss": 0.7529, "step": 46280 }, { "epoch": 0.5640866269362486, "grad_norm": 2.7442132880454326, "learning_rate": 2.294355355997434e-06, "loss": 0.7741, "step": 46285 }, { "epoch": 0.5641475631603964, "grad_norm": 2.2017176492318384, "learning_rate": 2.2940346375881976e-06, "loss": 0.7921, "step": 46290 }, { "epoch": 0.5642084993845441, "grad_norm": 2.5706646568215348, "learning_rate": 2.293713919178961e-06, "loss": 0.6973, "step": 46295 }, { "epoch": 0.5642694356086919, "grad_norm": 2.416144767602373, "learning_rate": 2.2933932007697245e-06, "loss": 0.7744, "step": 46300 }, { "epoch": 0.5643303718328397, "grad_norm": 2.1631897201066415, "learning_rate": 2.293072482360488e-06, "loss": 0.7336, "step": 46305 }, { "epoch": 0.5643913080569876, "grad_norm": 2.5055570292884317, "learning_rate": 2.292751763951251e-06, "loss": 0.7752, "step": 46310 }, { "epoch": 0.5644522442811354, "grad_norm": 2.2964617834284136, "learning_rate": 2.2924310455420144e-06, "loss": 0.7757, "step": 46315 }, { "epoch": 0.5645131805052832, "grad_norm": 1.9216236454830187, "learning_rate": 2.292110327132778e-06, "loss": 0.7284, "step": 46320 }, { "epoch": 0.564574116729431, "grad_norm": 2.6233799306471526, "learning_rate": 2.291789608723541e-06, "loss": 0.7773, "step": 46325 }, { "epoch": 0.5646350529535787, "grad_norm": 2.8223816037460296, "learning_rate": 2.2914688903143043e-06, "loss": 0.6834, "step": 46330 }, { "epoch": 0.5646959891777266, "grad_norm": 3.661693628157558, "learning_rate": 2.2911481719050677e-06, "loss": 0.7419, "step": 46335 }, { "epoch": 0.5647569254018744, "grad_norm": 2.6488591141276965, "learning_rate": 2.2908274534958308e-06, "loss": 0.7379, "step": 46340 }, { "epoch": 0.5648178616260222, "grad_norm": 2.353533077200287, "learning_rate": 2.290506735086594e-06, "loss": 0.6846, "step": 46345 }, { "epoch": 0.56487879785017, "grad_norm": 1.989065216871222, "learning_rate": 2.290186016677357e-06, "loss": 0.7129, "step": 46350 }, { "epoch": 0.5649397340743179, "grad_norm": 2.043559678422109, "learning_rate": 2.2898652982681206e-06, "loss": 0.7549, "step": 46355 }, { "epoch": 0.5650006702984657, "grad_norm": 2.3937123697490006, "learning_rate": 2.289544579858884e-06, "loss": 0.7905, "step": 46360 }, { "epoch": 0.5650616065226134, "grad_norm": 2.5691744873596227, "learning_rate": 2.289223861449647e-06, "loss": 0.7436, "step": 46365 }, { "epoch": 0.5651225427467612, "grad_norm": 2.0341278249155863, "learning_rate": 2.2889031430404105e-06, "loss": 0.7092, "step": 46370 }, { "epoch": 0.565183478970909, "grad_norm": 2.019232352378978, "learning_rate": 2.288582424631174e-06, "loss": 0.7175, "step": 46375 }, { "epoch": 0.5652444151950569, "grad_norm": 2.9516270178914246, "learning_rate": 2.2882617062219374e-06, "loss": 0.7314, "step": 46380 }, { "epoch": 0.5653053514192047, "grad_norm": 2.2311140330394954, "learning_rate": 2.287940987812701e-06, "loss": 0.7254, "step": 46385 }, { "epoch": 0.5653662876433525, "grad_norm": 2.3383124477938932, "learning_rate": 2.287620269403464e-06, "loss": 0.7264, "step": 46390 }, { "epoch": 0.5654272238675003, "grad_norm": 2.0472474947632957, "learning_rate": 2.2872995509942273e-06, "loss": 0.7056, "step": 46395 }, { "epoch": 0.565488160091648, "grad_norm": 2.4655547048567112, "learning_rate": 2.2869788325849908e-06, "loss": 0.8147, "step": 46400 }, { "epoch": 0.5655490963157959, "grad_norm": 2.2983789113186064, "learning_rate": 2.2866581141757538e-06, "loss": 0.7326, "step": 46405 }, { "epoch": 0.5656100325399437, "grad_norm": 2.9203787614973327, "learning_rate": 2.2863373957665172e-06, "loss": 0.7844, "step": 46410 }, { "epoch": 0.5656709687640915, "grad_norm": 2.0276910949530835, "learning_rate": 2.2860166773572807e-06, "loss": 0.754, "step": 46415 }, { "epoch": 0.5657319049882393, "grad_norm": 2.7173693574014, "learning_rate": 2.2856959589480437e-06, "loss": 0.7016, "step": 46420 }, { "epoch": 0.5657928412123872, "grad_norm": 3.0944090600495664, "learning_rate": 2.285375240538807e-06, "loss": 0.7719, "step": 46425 }, { "epoch": 0.565853777436535, "grad_norm": 2.5268905622415434, "learning_rate": 2.28505452212957e-06, "loss": 0.7116, "step": 46430 }, { "epoch": 0.5659147136606827, "grad_norm": 2.4177988663334307, "learning_rate": 2.2847338037203336e-06, "loss": 0.7526, "step": 46435 }, { "epoch": 0.5659756498848305, "grad_norm": 2.0606036765918754, "learning_rate": 2.284413085311097e-06, "loss": 0.7647, "step": 46440 }, { "epoch": 0.5660365861089783, "grad_norm": 2.750646823042378, "learning_rate": 2.2840923669018605e-06, "loss": 0.723, "step": 46445 }, { "epoch": 0.5660975223331262, "grad_norm": 2.401802029520578, "learning_rate": 2.2837716484926235e-06, "loss": 0.7574, "step": 46450 }, { "epoch": 0.566158458557274, "grad_norm": 1.969212459459801, "learning_rate": 2.283450930083387e-06, "loss": 0.7913, "step": 46455 }, { "epoch": 0.5662193947814218, "grad_norm": 2.747953470050436, "learning_rate": 2.2831302116741504e-06, "loss": 0.7331, "step": 46460 }, { "epoch": 0.5662803310055696, "grad_norm": 2.7341809247077333, "learning_rate": 2.282809493264914e-06, "loss": 0.7313, "step": 46465 }, { "epoch": 0.5663412672297173, "grad_norm": 2.8694131641547194, "learning_rate": 2.282488774855677e-06, "loss": 0.7081, "step": 46470 }, { "epoch": 0.5664022034538652, "grad_norm": 2.3182161466210576, "learning_rate": 2.2821680564464403e-06, "loss": 0.7353, "step": 46475 }, { "epoch": 0.566463139678013, "grad_norm": 2.2914445293545285, "learning_rate": 2.2818473380372037e-06, "loss": 0.7983, "step": 46480 }, { "epoch": 0.5665240759021608, "grad_norm": 2.9928513903864733, "learning_rate": 2.2815266196279667e-06, "loss": 0.6742, "step": 46485 }, { "epoch": 0.5665850121263086, "grad_norm": 2.5691718806827195, "learning_rate": 2.28120590121873e-06, "loss": 0.7842, "step": 46490 }, { "epoch": 0.5666459483504565, "grad_norm": 2.2254433015810093, "learning_rate": 2.2808851828094936e-06, "loss": 0.7173, "step": 46495 }, { "epoch": 0.5667068845746042, "grad_norm": 2.6172987486768724, "learning_rate": 2.2805644644002566e-06, "loss": 0.7001, "step": 46500 }, { "epoch": 0.566767820798752, "grad_norm": 2.596020085169384, "learning_rate": 2.28024374599102e-06, "loss": 0.8071, "step": 46505 }, { "epoch": 0.5668287570228998, "grad_norm": 2.2279755890566135, "learning_rate": 2.279923027581783e-06, "loss": 0.7042, "step": 46510 }, { "epoch": 0.5668896932470476, "grad_norm": 2.76104228036471, "learning_rate": 2.2796023091725465e-06, "loss": 0.759, "step": 46515 }, { "epoch": 0.5669506294711955, "grad_norm": 1.8748789137510717, "learning_rate": 2.27928159076331e-06, "loss": 0.805, "step": 46520 }, { "epoch": 0.5670115656953433, "grad_norm": 2.2399596520093716, "learning_rate": 2.2789608723540734e-06, "loss": 0.7719, "step": 46525 }, { "epoch": 0.5670725019194911, "grad_norm": 3.780893013751554, "learning_rate": 2.278640153944837e-06, "loss": 0.7356, "step": 46530 }, { "epoch": 0.5671334381436388, "grad_norm": 2.4143357708751116, "learning_rate": 2.2783194355356e-06, "loss": 0.8005, "step": 46535 }, { "epoch": 0.5671943743677866, "grad_norm": 2.2405853072801296, "learning_rate": 2.2779987171263633e-06, "loss": 0.6651, "step": 46540 }, { "epoch": 0.5672553105919345, "grad_norm": 2.268587160552636, "learning_rate": 2.2776779987171267e-06, "loss": 0.7858, "step": 46545 }, { "epoch": 0.5673162468160823, "grad_norm": 2.3514071401415824, "learning_rate": 2.27735728030789e-06, "loss": 0.7103, "step": 46550 }, { "epoch": 0.5673771830402301, "grad_norm": 4.30394575037353, "learning_rate": 2.277036561898653e-06, "loss": 0.7231, "step": 46555 }, { "epoch": 0.5674381192643779, "grad_norm": 2.4966724313633804, "learning_rate": 2.2767158434894166e-06, "loss": 0.764, "step": 46560 }, { "epoch": 0.5674990554885257, "grad_norm": 3.0223877332818994, "learning_rate": 2.2763951250801796e-06, "loss": 0.7175, "step": 46565 }, { "epoch": 0.5675599917126735, "grad_norm": 2.1262528596685994, "learning_rate": 2.276074406670943e-06, "loss": 0.6623, "step": 46570 }, { "epoch": 0.5676209279368213, "grad_norm": 2.397397294457963, "learning_rate": 2.2757536882617065e-06, "loss": 0.6509, "step": 46575 }, { "epoch": 0.5676818641609691, "grad_norm": 2.649839398003065, "learning_rate": 2.2754329698524695e-06, "loss": 0.776, "step": 46580 }, { "epoch": 0.5677428003851169, "grad_norm": 3.0509322904545972, "learning_rate": 2.275112251443233e-06, "loss": 0.71, "step": 46585 }, { "epoch": 0.5678037366092648, "grad_norm": 2.2768137155532404, "learning_rate": 2.2747915330339964e-06, "loss": 0.7008, "step": 46590 }, { "epoch": 0.5678646728334126, "grad_norm": 3.275333245741649, "learning_rate": 2.2744708146247594e-06, "loss": 0.732, "step": 46595 }, { "epoch": 0.5679256090575604, "grad_norm": 2.216324526251292, "learning_rate": 2.274150096215523e-06, "loss": 0.7096, "step": 46600 }, { "epoch": 0.5679865452817081, "grad_norm": 3.0844980587364836, "learning_rate": 2.2738293778062863e-06, "loss": 0.7417, "step": 46605 }, { "epoch": 0.5680474815058559, "grad_norm": 2.7457290581986658, "learning_rate": 2.2735086593970498e-06, "loss": 0.7386, "step": 46610 }, { "epoch": 0.5681084177300038, "grad_norm": 2.2631127966469955, "learning_rate": 2.2731879409878128e-06, "loss": 0.72, "step": 46615 }, { "epoch": 0.5681693539541516, "grad_norm": 2.9487871957651857, "learning_rate": 2.272867222578576e-06, "loss": 0.7402, "step": 46620 }, { "epoch": 0.5682302901782994, "grad_norm": 2.7776363907237838, "learning_rate": 2.2725465041693396e-06, "loss": 0.8952, "step": 46625 }, { "epoch": 0.5682912264024472, "grad_norm": 3.5439467355004126, "learning_rate": 2.272225785760103e-06, "loss": 0.8073, "step": 46630 }, { "epoch": 0.568352162626595, "grad_norm": 2.871217705464999, "learning_rate": 2.271905067350866e-06, "loss": 0.7114, "step": 46635 }, { "epoch": 0.5684130988507428, "grad_norm": 2.2899690724503747, "learning_rate": 2.2715843489416295e-06, "loss": 0.7354, "step": 46640 }, { "epoch": 0.5684740350748906, "grad_norm": 2.2591043260034085, "learning_rate": 2.2712636305323926e-06, "loss": 0.7698, "step": 46645 }, { "epoch": 0.5685349712990384, "grad_norm": 1.9997981731640595, "learning_rate": 2.270942912123156e-06, "loss": 0.7534, "step": 46650 }, { "epoch": 0.5685959075231862, "grad_norm": 1.9978911188738093, "learning_rate": 2.2706221937139194e-06, "loss": 0.716, "step": 46655 }, { "epoch": 0.568656843747334, "grad_norm": 2.2899084846187336, "learning_rate": 2.2703014753046825e-06, "loss": 0.6753, "step": 46660 }, { "epoch": 0.5687177799714819, "grad_norm": 3.2530210000036033, "learning_rate": 2.269980756895446e-06, "loss": 0.7375, "step": 46665 }, { "epoch": 0.5687787161956297, "grad_norm": 2.584732436639219, "learning_rate": 2.2696600384862093e-06, "loss": 0.7365, "step": 46670 }, { "epoch": 0.5688396524197774, "grad_norm": 3.085651245380254, "learning_rate": 2.2693393200769724e-06, "loss": 0.7713, "step": 46675 }, { "epoch": 0.5689005886439252, "grad_norm": 2.0820089004643796, "learning_rate": 2.269018601667736e-06, "loss": 0.7089, "step": 46680 }, { "epoch": 0.568961524868073, "grad_norm": 2.275993675306189, "learning_rate": 2.2686978832584992e-06, "loss": 0.7011, "step": 46685 }, { "epoch": 0.5690224610922209, "grad_norm": 3.2009811537368145, "learning_rate": 2.2683771648492627e-06, "loss": 0.7465, "step": 46690 }, { "epoch": 0.5690833973163687, "grad_norm": 2.9574809519598224, "learning_rate": 2.268056446440026e-06, "loss": 0.7546, "step": 46695 }, { "epoch": 0.5691443335405165, "grad_norm": 4.36309273215167, "learning_rate": 2.267735728030789e-06, "loss": 0.7056, "step": 46700 }, { "epoch": 0.5692052697646643, "grad_norm": 2.511782171504311, "learning_rate": 2.2674150096215526e-06, "loss": 0.7359, "step": 46705 }, { "epoch": 0.5692662059888121, "grad_norm": 2.2988288656472036, "learning_rate": 2.267094291212316e-06, "loss": 0.747, "step": 46710 }, { "epoch": 0.5693271422129599, "grad_norm": 2.1868288353138685, "learning_rate": 2.266773572803079e-06, "loss": 0.7338, "step": 46715 }, { "epoch": 0.5693880784371077, "grad_norm": 2.6153388641309347, "learning_rate": 2.2664528543938425e-06, "loss": 0.7704, "step": 46720 }, { "epoch": 0.5694490146612555, "grad_norm": 2.8553652468537214, "learning_rate": 2.2661321359846055e-06, "loss": 0.7411, "step": 46725 }, { "epoch": 0.5695099508854033, "grad_norm": 2.7642642576645753, "learning_rate": 2.265811417575369e-06, "loss": 0.8068, "step": 46730 }, { "epoch": 0.5695708871095512, "grad_norm": 3.1665645967451765, "learning_rate": 2.2654906991661324e-06, "loss": 0.7074, "step": 46735 }, { "epoch": 0.569631823333699, "grad_norm": 2.7360132866086193, "learning_rate": 2.2651699807568954e-06, "loss": 0.7223, "step": 46740 }, { "epoch": 0.5696927595578467, "grad_norm": 2.8067229900583563, "learning_rate": 2.264849262347659e-06, "loss": 0.7134, "step": 46745 }, { "epoch": 0.5697536957819945, "grad_norm": 3.1047670137054832, "learning_rate": 2.2645285439384223e-06, "loss": 0.7638, "step": 46750 }, { "epoch": 0.5698146320061424, "grad_norm": 3.129151600953298, "learning_rate": 2.2642078255291857e-06, "loss": 0.7288, "step": 46755 }, { "epoch": 0.5698755682302902, "grad_norm": 3.8791096139147347, "learning_rate": 2.2638871071199487e-06, "loss": 0.796, "step": 46760 }, { "epoch": 0.569936504454438, "grad_norm": 2.587892517090623, "learning_rate": 2.263566388710712e-06, "loss": 0.7291, "step": 46765 }, { "epoch": 0.5699974406785858, "grad_norm": 3.106431943070414, "learning_rate": 2.2632456703014756e-06, "loss": 0.8064, "step": 46770 }, { "epoch": 0.5700583769027336, "grad_norm": 2.4864010453771828, "learning_rate": 2.262924951892239e-06, "loss": 0.7569, "step": 46775 }, { "epoch": 0.5701193131268814, "grad_norm": 2.6250822535080456, "learning_rate": 2.262604233483002e-06, "loss": 0.7801, "step": 46780 }, { "epoch": 0.5701802493510292, "grad_norm": 2.5079654083770975, "learning_rate": 2.2622835150737655e-06, "loss": 0.7432, "step": 46785 }, { "epoch": 0.570241185575177, "grad_norm": 2.1391627698676308, "learning_rate": 2.261962796664529e-06, "loss": 0.7549, "step": 46790 }, { "epoch": 0.5703021217993248, "grad_norm": 2.4385299349600675, "learning_rate": 2.261642078255292e-06, "loss": 0.778, "step": 46795 }, { "epoch": 0.5703630580234726, "grad_norm": 3.168123064276709, "learning_rate": 2.2613213598460554e-06, "loss": 0.8318, "step": 46800 }, { "epoch": 0.5704239942476205, "grad_norm": 2.5195695123626223, "learning_rate": 2.2610006414368184e-06, "loss": 0.7114, "step": 46805 }, { "epoch": 0.5704849304717683, "grad_norm": 2.300795314550579, "learning_rate": 2.260679923027582e-06, "loss": 0.7711, "step": 46810 }, { "epoch": 0.570545866695916, "grad_norm": 2.1869019657756237, "learning_rate": 2.2603592046183453e-06, "loss": 0.7569, "step": 46815 }, { "epoch": 0.5706068029200638, "grad_norm": 2.6439542479046425, "learning_rate": 2.2600384862091083e-06, "loss": 0.6876, "step": 46820 }, { "epoch": 0.5706677391442116, "grad_norm": 2.5366453943999345, "learning_rate": 2.2597177677998718e-06, "loss": 0.7377, "step": 46825 }, { "epoch": 0.5707286753683595, "grad_norm": 2.388998089411783, "learning_rate": 2.259397049390635e-06, "loss": 0.751, "step": 46830 }, { "epoch": 0.5707896115925073, "grad_norm": 2.317680800397424, "learning_rate": 2.2590763309813986e-06, "loss": 0.7617, "step": 46835 }, { "epoch": 0.5708505478166551, "grad_norm": 2.7575926721445274, "learning_rate": 2.2587556125721617e-06, "loss": 0.7768, "step": 46840 }, { "epoch": 0.5709114840408029, "grad_norm": 2.4485693049503503, "learning_rate": 2.258434894162925e-06, "loss": 0.7361, "step": 46845 }, { "epoch": 0.5709724202649507, "grad_norm": 2.727012451740275, "learning_rate": 2.2581141757536885e-06, "loss": 0.7346, "step": 46850 }, { "epoch": 0.5710333564890985, "grad_norm": 2.3344464568807384, "learning_rate": 2.257793457344452e-06, "loss": 0.7033, "step": 46855 }, { "epoch": 0.5710942927132463, "grad_norm": 2.4679401896107396, "learning_rate": 2.257472738935215e-06, "loss": 0.7181, "step": 46860 }, { "epoch": 0.5711552289373941, "grad_norm": 2.4735432033372926, "learning_rate": 2.2571520205259784e-06, "loss": 0.6898, "step": 46865 }, { "epoch": 0.5712161651615419, "grad_norm": 3.753821912725134, "learning_rate": 2.256831302116742e-06, "loss": 0.7263, "step": 46870 }, { "epoch": 0.5712771013856898, "grad_norm": 2.6699624224579765, "learning_rate": 2.256510583707505e-06, "loss": 0.7975, "step": 46875 }, { "epoch": 0.5713380376098376, "grad_norm": 2.7639201589260427, "learning_rate": 2.2561898652982683e-06, "loss": 0.717, "step": 46880 }, { "epoch": 0.5713989738339853, "grad_norm": 2.566132753623178, "learning_rate": 2.2558691468890318e-06, "loss": 0.7437, "step": 46885 }, { "epoch": 0.5714599100581331, "grad_norm": 2.806695542053645, "learning_rate": 2.2555484284797948e-06, "loss": 0.7637, "step": 46890 }, { "epoch": 0.571520846282281, "grad_norm": 2.4884277249442137, "learning_rate": 2.2552277100705582e-06, "loss": 0.7978, "step": 46895 }, { "epoch": 0.5715817825064288, "grad_norm": 2.5049951171183915, "learning_rate": 2.2549069916613212e-06, "loss": 0.7602, "step": 46900 }, { "epoch": 0.5716427187305766, "grad_norm": 2.3919041208681917, "learning_rate": 2.2545862732520847e-06, "loss": 0.7574, "step": 46905 }, { "epoch": 0.5717036549547244, "grad_norm": 2.5839311903837285, "learning_rate": 2.254265554842848e-06, "loss": 0.7826, "step": 46910 }, { "epoch": 0.5717645911788722, "grad_norm": 2.278194870361476, "learning_rate": 2.2539448364336116e-06, "loss": 0.7098, "step": 46915 }, { "epoch": 0.57182552740302, "grad_norm": 2.4468375061321823, "learning_rate": 2.253624118024375e-06, "loss": 0.6562, "step": 46920 }, { "epoch": 0.5718864636271678, "grad_norm": 2.1389899184726935, "learning_rate": 2.253303399615138e-06, "loss": 0.7459, "step": 46925 }, { "epoch": 0.5719473998513156, "grad_norm": 2.1489928199153683, "learning_rate": 2.2529826812059015e-06, "loss": 0.7849, "step": 46930 }, { "epoch": 0.5720083360754634, "grad_norm": 2.541516590481475, "learning_rate": 2.252661962796665e-06, "loss": 0.7683, "step": 46935 }, { "epoch": 0.5720692722996112, "grad_norm": 2.4860387875688468, "learning_rate": 2.252341244387428e-06, "loss": 0.7108, "step": 46940 }, { "epoch": 0.5721302085237591, "grad_norm": 2.717595973371642, "learning_rate": 2.2520205259781914e-06, "loss": 0.7462, "step": 46945 }, { "epoch": 0.5721911447479069, "grad_norm": 2.445311410250172, "learning_rate": 2.251699807568955e-06, "loss": 0.7503, "step": 46950 }, { "epoch": 0.5722520809720546, "grad_norm": 2.737653621185917, "learning_rate": 2.251379089159718e-06, "loss": 0.7749, "step": 46955 }, { "epoch": 0.5723130171962024, "grad_norm": 3.161467321644419, "learning_rate": 2.2510583707504813e-06, "loss": 0.723, "step": 46960 }, { "epoch": 0.5723739534203502, "grad_norm": 2.2176177433509006, "learning_rate": 2.2507376523412447e-06, "loss": 0.7419, "step": 46965 }, { "epoch": 0.5724348896444981, "grad_norm": 2.609940437120402, "learning_rate": 2.2504169339320077e-06, "loss": 0.7406, "step": 46970 }, { "epoch": 0.5724958258686459, "grad_norm": 2.2426311649629245, "learning_rate": 2.250096215522771e-06, "loss": 0.7792, "step": 46975 }, { "epoch": 0.5725567620927937, "grad_norm": 2.350579940703818, "learning_rate": 2.249775497113534e-06, "loss": 0.7656, "step": 46980 }, { "epoch": 0.5726176983169415, "grad_norm": 2.5901861697876183, "learning_rate": 2.2494547787042976e-06, "loss": 0.7029, "step": 46985 }, { "epoch": 0.5726786345410892, "grad_norm": 5.508859324229514, "learning_rate": 2.249134060295061e-06, "loss": 0.7386, "step": 46990 }, { "epoch": 0.5727395707652371, "grad_norm": 3.179701412477889, "learning_rate": 2.2488133418858245e-06, "loss": 0.7228, "step": 46995 }, { "epoch": 0.5728005069893849, "grad_norm": 3.1088173408686894, "learning_rate": 2.248492623476588e-06, "loss": 0.6908, "step": 47000 }, { "epoch": 0.5728614432135327, "grad_norm": 2.896951357277297, "learning_rate": 2.2481719050673514e-06, "loss": 0.7187, "step": 47005 }, { "epoch": 0.5729223794376805, "grad_norm": 3.0759361592872794, "learning_rate": 2.2478511866581144e-06, "loss": 0.7351, "step": 47010 }, { "epoch": 0.5729833156618284, "grad_norm": 2.4002381062228944, "learning_rate": 2.247530468248878e-06, "loss": 0.7518, "step": 47015 }, { "epoch": 0.5730442518859762, "grad_norm": 2.242289196768876, "learning_rate": 2.247209749839641e-06, "loss": 0.6895, "step": 47020 }, { "epoch": 0.5731051881101239, "grad_norm": 2.21403266721227, "learning_rate": 2.2468890314304043e-06, "loss": 0.6813, "step": 47025 }, { "epoch": 0.5731661243342717, "grad_norm": 2.5971054981099617, "learning_rate": 2.2465683130211677e-06, "loss": 0.7763, "step": 47030 }, { "epoch": 0.5732270605584195, "grad_norm": 2.5889107765834187, "learning_rate": 2.2462475946119307e-06, "loss": 0.6818, "step": 47035 }, { "epoch": 0.5732879967825674, "grad_norm": 2.4019085408585337, "learning_rate": 2.245926876202694e-06, "loss": 0.7929, "step": 47040 }, { "epoch": 0.5733489330067152, "grad_norm": 2.3343169126976897, "learning_rate": 2.2456061577934576e-06, "loss": 0.7249, "step": 47045 }, { "epoch": 0.573409869230863, "grad_norm": 2.237684330263626, "learning_rate": 2.2452854393842206e-06, "loss": 0.6913, "step": 47050 }, { "epoch": 0.5734708054550108, "grad_norm": 1.9490138901478726, "learning_rate": 2.244964720974984e-06, "loss": 0.767, "step": 47055 }, { "epoch": 0.5735317416791585, "grad_norm": 2.387792427298471, "learning_rate": 2.2446440025657475e-06, "loss": 0.7032, "step": 47060 }, { "epoch": 0.5735926779033064, "grad_norm": 3.478120867750231, "learning_rate": 2.2443232841565105e-06, "loss": 0.7522, "step": 47065 }, { "epoch": 0.5736536141274542, "grad_norm": 2.2050209861336296, "learning_rate": 2.244002565747274e-06, "loss": 0.7715, "step": 47070 }, { "epoch": 0.573714550351602, "grad_norm": 4.159811380457503, "learning_rate": 2.2436818473380374e-06, "loss": 0.7533, "step": 47075 }, { "epoch": 0.5737754865757498, "grad_norm": 2.388384059641913, "learning_rate": 2.243361128928801e-06, "loss": 0.7568, "step": 47080 }, { "epoch": 0.5738364227998977, "grad_norm": 2.4914663561184343, "learning_rate": 2.2430404105195643e-06, "loss": 0.8135, "step": 47085 }, { "epoch": 0.5738973590240455, "grad_norm": 2.001133873848306, "learning_rate": 2.2427196921103273e-06, "loss": 0.6887, "step": 47090 }, { "epoch": 0.5739582952481932, "grad_norm": 2.3630677812492866, "learning_rate": 2.2423989737010908e-06, "loss": 0.7796, "step": 47095 }, { "epoch": 0.574019231472341, "grad_norm": 2.8229403549292824, "learning_rate": 2.2420782552918538e-06, "loss": 0.7633, "step": 47100 }, { "epoch": 0.5740801676964888, "grad_norm": 2.4447731783421407, "learning_rate": 2.241757536882617e-06, "loss": 0.7646, "step": 47105 }, { "epoch": 0.5741411039206367, "grad_norm": 2.47864943989495, "learning_rate": 2.2414368184733807e-06, "loss": 0.787, "step": 47110 }, { "epoch": 0.5742020401447845, "grad_norm": 3.2248803912502075, "learning_rate": 2.2411161000641437e-06, "loss": 0.729, "step": 47115 }, { "epoch": 0.5742629763689323, "grad_norm": 2.6554239884141366, "learning_rate": 2.240795381654907e-06, "loss": 0.6641, "step": 47120 }, { "epoch": 0.5743239125930801, "grad_norm": 2.082050905847392, "learning_rate": 2.2404746632456705e-06, "loss": 0.7447, "step": 47125 }, { "epoch": 0.5743848488172278, "grad_norm": 2.3153131012565766, "learning_rate": 2.2401539448364336e-06, "loss": 0.6963, "step": 47130 }, { "epoch": 0.5744457850413757, "grad_norm": 2.365875505005096, "learning_rate": 2.239833226427197e-06, "loss": 0.6748, "step": 47135 }, { "epoch": 0.5745067212655235, "grad_norm": 1.815049081772581, "learning_rate": 2.2395125080179604e-06, "loss": 0.7315, "step": 47140 }, { "epoch": 0.5745676574896713, "grad_norm": 2.7389272630793977, "learning_rate": 2.239191789608724e-06, "loss": 0.6907, "step": 47145 }, { "epoch": 0.5746285937138191, "grad_norm": 2.6859242312948104, "learning_rate": 2.238871071199487e-06, "loss": 0.7293, "step": 47150 }, { "epoch": 0.574689529937967, "grad_norm": 2.1540657548302073, "learning_rate": 2.2385503527902503e-06, "loss": 0.6996, "step": 47155 }, { "epoch": 0.5747504661621148, "grad_norm": 2.6704505808364774, "learning_rate": 2.2382296343810138e-06, "loss": 0.7183, "step": 47160 }, { "epoch": 0.5748114023862625, "grad_norm": 2.4576781791332323, "learning_rate": 2.2379089159717772e-06, "loss": 0.7242, "step": 47165 }, { "epoch": 0.5748723386104103, "grad_norm": 2.6673733730928038, "learning_rate": 2.2375881975625402e-06, "loss": 0.7565, "step": 47170 }, { "epoch": 0.5749332748345581, "grad_norm": 3.0076554157470325, "learning_rate": 2.2372674791533037e-06, "loss": 0.7632, "step": 47175 }, { "epoch": 0.574994211058706, "grad_norm": 2.129721973663044, "learning_rate": 2.236946760744067e-06, "loss": 0.7384, "step": 47180 }, { "epoch": 0.5750551472828538, "grad_norm": 2.3852481972412125, "learning_rate": 2.23662604233483e-06, "loss": 0.7375, "step": 47185 }, { "epoch": 0.5751160835070016, "grad_norm": 2.4028799617439045, "learning_rate": 2.2363053239255936e-06, "loss": 0.7677, "step": 47190 }, { "epoch": 0.5751770197311494, "grad_norm": 2.4391703285525836, "learning_rate": 2.2359846055163566e-06, "loss": 0.7884, "step": 47195 }, { "epoch": 0.5752379559552971, "grad_norm": 2.4094242702890107, "learning_rate": 2.23566388710712e-06, "loss": 0.8228, "step": 47200 }, { "epoch": 0.575298892179445, "grad_norm": 2.130752739478381, "learning_rate": 2.2353431686978835e-06, "loss": 0.806, "step": 47205 }, { "epoch": 0.5753598284035928, "grad_norm": 2.2499566755209415, "learning_rate": 2.2350224502886465e-06, "loss": 0.6981, "step": 47210 }, { "epoch": 0.5754207646277406, "grad_norm": 2.589293223132913, "learning_rate": 2.23470173187941e-06, "loss": 0.764, "step": 47215 }, { "epoch": 0.5754817008518884, "grad_norm": 2.05299006697635, "learning_rate": 2.2343810134701734e-06, "loss": 0.697, "step": 47220 }, { "epoch": 0.5755426370760363, "grad_norm": 2.249094049939865, "learning_rate": 2.234060295060937e-06, "loss": 0.7318, "step": 47225 }, { "epoch": 0.5756035733001841, "grad_norm": 2.271056772166432, "learning_rate": 2.2337395766517003e-06, "loss": 0.7619, "step": 47230 }, { "epoch": 0.5756645095243318, "grad_norm": 3.354557546324863, "learning_rate": 2.2334188582424633e-06, "loss": 0.7616, "step": 47235 }, { "epoch": 0.5757254457484796, "grad_norm": 4.529269606090052, "learning_rate": 2.2330981398332267e-06, "loss": 0.6735, "step": 47240 }, { "epoch": 0.5757863819726274, "grad_norm": 2.334003350097494, "learning_rate": 2.23277742142399e-06, "loss": 0.792, "step": 47245 }, { "epoch": 0.5758473181967753, "grad_norm": 2.7389560341807493, "learning_rate": 2.232456703014753e-06, "loss": 0.7032, "step": 47250 }, { "epoch": 0.5759082544209231, "grad_norm": 2.3107962343385244, "learning_rate": 2.2321359846055166e-06, "loss": 0.6977, "step": 47255 }, { "epoch": 0.5759691906450709, "grad_norm": 2.942678751887804, "learning_rate": 2.23181526619628e-06, "loss": 0.7419, "step": 47260 }, { "epoch": 0.5760301268692187, "grad_norm": 2.8305445084374257, "learning_rate": 2.231494547787043e-06, "loss": 0.7164, "step": 47265 }, { "epoch": 0.5760910630933664, "grad_norm": 2.776120139590781, "learning_rate": 2.2311738293778065e-06, "loss": 0.737, "step": 47270 }, { "epoch": 0.5761519993175143, "grad_norm": 2.236471725794192, "learning_rate": 2.2308531109685695e-06, "loss": 0.6897, "step": 47275 }, { "epoch": 0.5762129355416621, "grad_norm": 3.0944421851130115, "learning_rate": 2.230532392559333e-06, "loss": 0.7728, "step": 47280 }, { "epoch": 0.5762738717658099, "grad_norm": 2.976903474231995, "learning_rate": 2.2302116741500964e-06, "loss": 0.6864, "step": 47285 }, { "epoch": 0.5763348079899577, "grad_norm": 2.3727690114751976, "learning_rate": 2.2298909557408594e-06, "loss": 0.7085, "step": 47290 }, { "epoch": 0.5763957442141056, "grad_norm": 2.2210647235679963, "learning_rate": 2.229570237331623e-06, "loss": 0.7203, "step": 47295 }, { "epoch": 0.5764566804382534, "grad_norm": 2.8175893840537225, "learning_rate": 2.2292495189223863e-06, "loss": 0.7583, "step": 47300 }, { "epoch": 0.5765176166624011, "grad_norm": 2.4320431876306805, "learning_rate": 2.2289288005131497e-06, "loss": 0.7802, "step": 47305 }, { "epoch": 0.5765785528865489, "grad_norm": 2.3781109873901842, "learning_rate": 2.228608082103913e-06, "loss": 0.791, "step": 47310 }, { "epoch": 0.5766394891106967, "grad_norm": 2.3923731945552724, "learning_rate": 2.228287363694676e-06, "loss": 0.7377, "step": 47315 }, { "epoch": 0.5767004253348446, "grad_norm": 2.339253825145666, "learning_rate": 2.2279666452854396e-06, "loss": 0.8282, "step": 47320 }, { "epoch": 0.5767613615589924, "grad_norm": 2.5435867022433105, "learning_rate": 2.227645926876203e-06, "loss": 0.7262, "step": 47325 }, { "epoch": 0.5768222977831402, "grad_norm": 2.5909861183655782, "learning_rate": 2.227325208466966e-06, "loss": 0.7985, "step": 47330 }, { "epoch": 0.576883234007288, "grad_norm": 2.7962878322688693, "learning_rate": 2.2270044900577295e-06, "loss": 0.6943, "step": 47335 }, { "epoch": 0.5769441702314357, "grad_norm": 2.534117401680366, "learning_rate": 2.226683771648493e-06, "loss": 0.7901, "step": 47340 }, { "epoch": 0.5770051064555836, "grad_norm": 2.999858342481234, "learning_rate": 2.226363053239256e-06, "loss": 0.7718, "step": 47345 }, { "epoch": 0.5770660426797314, "grad_norm": 2.4850602353772597, "learning_rate": 2.2260423348300194e-06, "loss": 0.7222, "step": 47350 }, { "epoch": 0.5771269789038792, "grad_norm": 2.5683856489866144, "learning_rate": 2.2257216164207824e-06, "loss": 0.7817, "step": 47355 }, { "epoch": 0.577187915128027, "grad_norm": 2.807205208995474, "learning_rate": 2.225400898011546e-06, "loss": 0.7825, "step": 47360 }, { "epoch": 0.5772488513521749, "grad_norm": 2.407203377892036, "learning_rate": 2.2250801796023093e-06, "loss": 0.7628, "step": 47365 }, { "epoch": 0.5773097875763227, "grad_norm": 2.3260873989559214, "learning_rate": 2.2247594611930728e-06, "loss": 0.6765, "step": 47370 }, { "epoch": 0.5773707238004704, "grad_norm": 3.0502308541643024, "learning_rate": 2.2244387427838358e-06, "loss": 0.7536, "step": 47375 }, { "epoch": 0.5774316600246182, "grad_norm": 2.325908203621437, "learning_rate": 2.2241180243745992e-06, "loss": 0.6797, "step": 47380 }, { "epoch": 0.577492596248766, "grad_norm": 2.6058880358475056, "learning_rate": 2.2237973059653627e-06, "loss": 0.7989, "step": 47385 }, { "epoch": 0.5775535324729139, "grad_norm": 3.0889509206888657, "learning_rate": 2.223476587556126e-06, "loss": 0.7307, "step": 47390 }, { "epoch": 0.5776144686970617, "grad_norm": 2.4767497046229763, "learning_rate": 2.223155869146889e-06, "loss": 0.6732, "step": 47395 }, { "epoch": 0.5776754049212095, "grad_norm": 2.5296360472934034, "learning_rate": 2.2228351507376526e-06, "loss": 0.8104, "step": 47400 }, { "epoch": 0.5777363411453573, "grad_norm": 2.5449717608748013, "learning_rate": 2.222514432328416e-06, "loss": 0.7956, "step": 47405 }, { "epoch": 0.577797277369505, "grad_norm": 3.439406185081842, "learning_rate": 2.222193713919179e-06, "loss": 0.7591, "step": 47410 }, { "epoch": 0.5778582135936529, "grad_norm": 2.603723135902549, "learning_rate": 2.2218729955099425e-06, "loss": 0.7669, "step": 47415 }, { "epoch": 0.5779191498178007, "grad_norm": 2.20853310518588, "learning_rate": 2.221552277100706e-06, "loss": 0.6885, "step": 47420 }, { "epoch": 0.5779800860419485, "grad_norm": 2.7393763003867027, "learning_rate": 2.221231558691469e-06, "loss": 0.7276, "step": 47425 }, { "epoch": 0.5780410222660963, "grad_norm": 3.446163982099688, "learning_rate": 2.2209108402822324e-06, "loss": 0.6784, "step": 47430 }, { "epoch": 0.5781019584902441, "grad_norm": 2.5210799977854395, "learning_rate": 2.220590121872996e-06, "loss": 0.6926, "step": 47435 }, { "epoch": 0.578162894714392, "grad_norm": 3.1096201315206957, "learning_rate": 2.220269403463759e-06, "loss": 0.7159, "step": 47440 }, { "epoch": 0.5782238309385397, "grad_norm": 2.1810105806790436, "learning_rate": 2.2199486850545223e-06, "loss": 0.7631, "step": 47445 }, { "epoch": 0.5782847671626875, "grad_norm": 2.1796377823386166, "learning_rate": 2.2196279666452857e-06, "loss": 0.7489, "step": 47450 }, { "epoch": 0.5783457033868353, "grad_norm": 3.4071006160982407, "learning_rate": 2.219307248236049e-06, "loss": 0.7916, "step": 47455 }, { "epoch": 0.5784066396109832, "grad_norm": 2.5865214103013594, "learning_rate": 2.218986529826812e-06, "loss": 0.7898, "step": 47460 }, { "epoch": 0.578467575835131, "grad_norm": 2.384149101012996, "learning_rate": 2.2186658114175756e-06, "loss": 0.8129, "step": 47465 }, { "epoch": 0.5785285120592788, "grad_norm": 2.352044077499584, "learning_rate": 2.218345093008339e-06, "loss": 0.7126, "step": 47470 }, { "epoch": 0.5785894482834265, "grad_norm": 2.7901844197185546, "learning_rate": 2.2180243745991025e-06, "loss": 0.796, "step": 47475 }, { "epoch": 0.5786503845075743, "grad_norm": 2.0370324837979203, "learning_rate": 2.2177036561898655e-06, "loss": 0.7406, "step": 47480 }, { "epoch": 0.5787113207317222, "grad_norm": 2.8525603153288013, "learning_rate": 2.217382937780629e-06, "loss": 0.779, "step": 47485 }, { "epoch": 0.57877225695587, "grad_norm": 2.3865997379406374, "learning_rate": 2.217062219371392e-06, "loss": 0.8008, "step": 47490 }, { "epoch": 0.5788331931800178, "grad_norm": 2.47075457079507, "learning_rate": 2.2167415009621554e-06, "loss": 0.8262, "step": 47495 }, { "epoch": 0.5788941294041656, "grad_norm": 2.5290710376989765, "learning_rate": 2.216420782552919e-06, "loss": 0.7369, "step": 47500 }, { "epoch": 0.5789550656283134, "grad_norm": 2.3595864957853987, "learning_rate": 2.216100064143682e-06, "loss": 0.6982, "step": 47505 }, { "epoch": 0.5790160018524612, "grad_norm": 2.2809605421940855, "learning_rate": 2.2157793457344453e-06, "loss": 0.722, "step": 47510 }, { "epoch": 0.579076938076609, "grad_norm": 2.290144537537604, "learning_rate": 2.2154586273252087e-06, "loss": 0.7517, "step": 47515 }, { "epoch": 0.5791378743007568, "grad_norm": 2.5649635825905137, "learning_rate": 2.2151379089159717e-06, "loss": 0.7493, "step": 47520 }, { "epoch": 0.5791988105249046, "grad_norm": 2.4281455130603073, "learning_rate": 2.214817190506735e-06, "loss": 0.6736, "step": 47525 }, { "epoch": 0.5792597467490525, "grad_norm": 2.19007011873038, "learning_rate": 2.2144964720974986e-06, "loss": 0.7245, "step": 47530 }, { "epoch": 0.5793206829732003, "grad_norm": 2.555739428858927, "learning_rate": 2.214175753688262e-06, "loss": 0.7661, "step": 47535 }, { "epoch": 0.5793816191973481, "grad_norm": 2.4715245255905294, "learning_rate": 2.213855035279025e-06, "loss": 0.7051, "step": 47540 }, { "epoch": 0.5794425554214958, "grad_norm": 2.3454393344410738, "learning_rate": 2.2135343168697885e-06, "loss": 0.7852, "step": 47545 }, { "epoch": 0.5795034916456436, "grad_norm": 3.3615365560713704, "learning_rate": 2.213213598460552e-06, "loss": 0.7331, "step": 47550 }, { "epoch": 0.5795644278697915, "grad_norm": 2.4306181213381155, "learning_rate": 2.2128928800513154e-06, "loss": 0.7283, "step": 47555 }, { "epoch": 0.5796253640939393, "grad_norm": 2.2820973928386117, "learning_rate": 2.2125721616420784e-06, "loss": 0.7409, "step": 47560 }, { "epoch": 0.5796863003180871, "grad_norm": 2.4713369044886653, "learning_rate": 2.212251443232842e-06, "loss": 0.7537, "step": 47565 }, { "epoch": 0.5797472365422349, "grad_norm": 2.1833874060219247, "learning_rate": 2.211930724823605e-06, "loss": 0.8384, "step": 47570 }, { "epoch": 0.5798081727663827, "grad_norm": 2.4216477680591617, "learning_rate": 2.2116100064143683e-06, "loss": 0.6936, "step": 47575 }, { "epoch": 0.5798691089905305, "grad_norm": 2.8845084132528833, "learning_rate": 2.2112892880051318e-06, "loss": 0.6758, "step": 47580 }, { "epoch": 0.5799300452146783, "grad_norm": 2.6641338805764385, "learning_rate": 2.2109685695958948e-06, "loss": 0.7486, "step": 47585 }, { "epoch": 0.5799909814388261, "grad_norm": 2.852663098737711, "learning_rate": 2.2106478511866582e-06, "loss": 0.7379, "step": 47590 }, { "epoch": 0.5800519176629739, "grad_norm": 2.5452846631933377, "learning_rate": 2.2103271327774217e-06, "loss": 0.7315, "step": 47595 }, { "epoch": 0.5801128538871217, "grad_norm": 2.5029782072983324, "learning_rate": 2.2100064143681847e-06, "loss": 0.7408, "step": 47600 }, { "epoch": 0.5801737901112696, "grad_norm": 3.1220003337416258, "learning_rate": 2.209685695958948e-06, "loss": 0.714, "step": 47605 }, { "epoch": 0.5802347263354174, "grad_norm": 2.8373130283399424, "learning_rate": 2.2093649775497116e-06, "loss": 0.824, "step": 47610 }, { "epoch": 0.5802956625595651, "grad_norm": 2.4627804125265347, "learning_rate": 2.209044259140475e-06, "loss": 0.7362, "step": 47615 }, { "epoch": 0.5803565987837129, "grad_norm": 2.3682544268736203, "learning_rate": 2.2087235407312384e-06, "loss": 0.7739, "step": 47620 }, { "epoch": 0.5804175350078608, "grad_norm": 1.766347820472946, "learning_rate": 2.2084028223220014e-06, "loss": 0.6851, "step": 47625 }, { "epoch": 0.5804784712320086, "grad_norm": 2.4065235722375964, "learning_rate": 2.208082103912765e-06, "loss": 0.7904, "step": 47630 }, { "epoch": 0.5805394074561564, "grad_norm": 2.094254073590666, "learning_rate": 2.2077613855035283e-06, "loss": 0.7442, "step": 47635 }, { "epoch": 0.5806003436803042, "grad_norm": 2.625237974379511, "learning_rate": 2.2074406670942913e-06, "loss": 0.7409, "step": 47640 }, { "epoch": 0.580661279904452, "grad_norm": 2.3914508984706426, "learning_rate": 2.2071199486850548e-06, "loss": 0.7012, "step": 47645 }, { "epoch": 0.5807222161285998, "grad_norm": 2.3109000327630755, "learning_rate": 2.206799230275818e-06, "loss": 0.7525, "step": 47650 }, { "epoch": 0.5807831523527476, "grad_norm": 2.278883420166271, "learning_rate": 2.2064785118665812e-06, "loss": 0.7836, "step": 47655 }, { "epoch": 0.5808440885768954, "grad_norm": 2.1643494787587354, "learning_rate": 2.2061577934573447e-06, "loss": 0.7178, "step": 47660 }, { "epoch": 0.5809050248010432, "grad_norm": 2.970305337635172, "learning_rate": 2.2058370750481077e-06, "loss": 0.7123, "step": 47665 }, { "epoch": 0.580965961025191, "grad_norm": 2.2167757168271107, "learning_rate": 2.205516356638871e-06, "loss": 0.727, "step": 47670 }, { "epoch": 0.5810268972493389, "grad_norm": 2.231283454162437, "learning_rate": 2.2051956382296346e-06, "loss": 0.6757, "step": 47675 }, { "epoch": 0.5810878334734867, "grad_norm": 2.3474856089485847, "learning_rate": 2.2048749198203976e-06, "loss": 0.7588, "step": 47680 }, { "epoch": 0.5811487696976344, "grad_norm": 2.4108168317788103, "learning_rate": 2.204554201411161e-06, "loss": 0.7539, "step": 47685 }, { "epoch": 0.5812097059217822, "grad_norm": 2.6662052279097592, "learning_rate": 2.2042334830019245e-06, "loss": 0.7229, "step": 47690 }, { "epoch": 0.58127064214593, "grad_norm": 2.8907144389244626, "learning_rate": 2.203912764592688e-06, "loss": 0.7713, "step": 47695 }, { "epoch": 0.5813315783700779, "grad_norm": 2.193680769468842, "learning_rate": 2.2035920461834514e-06, "loss": 0.7111, "step": 47700 }, { "epoch": 0.5813925145942257, "grad_norm": 2.3247620077547926, "learning_rate": 2.2032713277742144e-06, "loss": 0.7486, "step": 47705 }, { "epoch": 0.5814534508183735, "grad_norm": 2.7999393815932327, "learning_rate": 2.202950609364978e-06, "loss": 0.7278, "step": 47710 }, { "epoch": 0.5815143870425213, "grad_norm": 2.4493950829966127, "learning_rate": 2.2026298909557413e-06, "loss": 0.7525, "step": 47715 }, { "epoch": 0.581575323266669, "grad_norm": 2.4125722391419586, "learning_rate": 2.2023091725465043e-06, "loss": 0.7443, "step": 47720 }, { "epoch": 0.5816362594908169, "grad_norm": 2.4141983042185107, "learning_rate": 2.2019884541372677e-06, "loss": 0.6656, "step": 47725 }, { "epoch": 0.5816971957149647, "grad_norm": 1.7795457638927585, "learning_rate": 2.2016677357280307e-06, "loss": 0.671, "step": 47730 }, { "epoch": 0.5817581319391125, "grad_norm": 2.1905620587549186, "learning_rate": 2.201347017318794e-06, "loss": 0.7486, "step": 47735 }, { "epoch": 0.5818190681632603, "grad_norm": 2.0891246961228247, "learning_rate": 2.2010262989095576e-06, "loss": 0.7109, "step": 47740 }, { "epoch": 0.5818800043874082, "grad_norm": 2.0905549199084743, "learning_rate": 2.2007055805003206e-06, "loss": 0.7346, "step": 47745 }, { "epoch": 0.581940940611556, "grad_norm": 2.5795432773609646, "learning_rate": 2.200384862091084e-06, "loss": 0.7725, "step": 47750 }, { "epoch": 0.5820018768357037, "grad_norm": 2.652438819194553, "learning_rate": 2.2000641436818475e-06, "loss": 0.7781, "step": 47755 }, { "epoch": 0.5820628130598515, "grad_norm": 2.2415364469775856, "learning_rate": 2.199743425272611e-06, "loss": 0.7266, "step": 47760 }, { "epoch": 0.5821237492839993, "grad_norm": 2.0908940882477576, "learning_rate": 2.199422706863374e-06, "loss": 0.721, "step": 47765 }, { "epoch": 0.5821846855081472, "grad_norm": 2.109108646379655, "learning_rate": 2.1991019884541374e-06, "loss": 0.7413, "step": 47770 }, { "epoch": 0.582245621732295, "grad_norm": 2.200093856736037, "learning_rate": 2.198781270044901e-06, "loss": 0.7475, "step": 47775 }, { "epoch": 0.5823065579564428, "grad_norm": 2.7709575570180007, "learning_rate": 2.1984605516356643e-06, "loss": 0.7204, "step": 47780 }, { "epoch": 0.5823674941805906, "grad_norm": 3.872900725804477, "learning_rate": 2.1981398332264273e-06, "loss": 0.7413, "step": 47785 }, { "epoch": 0.5824284304047384, "grad_norm": 2.375638446181969, "learning_rate": 2.1978191148171907e-06, "loss": 0.7388, "step": 47790 }, { "epoch": 0.5824893666288862, "grad_norm": 2.438905742716946, "learning_rate": 2.197498396407954e-06, "loss": 0.7468, "step": 47795 }, { "epoch": 0.582550302853034, "grad_norm": 2.662314509332198, "learning_rate": 2.197177677998717e-06, "loss": 0.7521, "step": 47800 }, { "epoch": 0.5826112390771818, "grad_norm": 2.292245871473713, "learning_rate": 2.1968569595894806e-06, "loss": 0.7366, "step": 47805 }, { "epoch": 0.5826721753013296, "grad_norm": 3.076954322933163, "learning_rate": 2.196536241180244e-06, "loss": 0.7421, "step": 47810 }, { "epoch": 0.5827331115254775, "grad_norm": 2.4896059095966603, "learning_rate": 2.196215522771007e-06, "loss": 0.7406, "step": 47815 }, { "epoch": 0.5827940477496253, "grad_norm": 3.1739504569517574, "learning_rate": 2.1958948043617705e-06, "loss": 0.7315, "step": 47820 }, { "epoch": 0.582854983973773, "grad_norm": 4.1323541783900035, "learning_rate": 2.1955740859525336e-06, "loss": 0.7184, "step": 47825 }, { "epoch": 0.5829159201979208, "grad_norm": 2.474125281241877, "learning_rate": 2.195253367543297e-06, "loss": 0.8038, "step": 47830 }, { "epoch": 0.5829768564220686, "grad_norm": 2.3697362409422276, "learning_rate": 2.1949326491340604e-06, "loss": 0.6863, "step": 47835 }, { "epoch": 0.5830377926462165, "grad_norm": 2.303334858476525, "learning_rate": 2.194611930724824e-06, "loss": 0.725, "step": 47840 }, { "epoch": 0.5830987288703643, "grad_norm": 2.0089817015560225, "learning_rate": 2.1942912123155873e-06, "loss": 0.6881, "step": 47845 }, { "epoch": 0.5831596650945121, "grad_norm": 2.7534537676815773, "learning_rate": 2.1939704939063503e-06, "loss": 0.7904, "step": 47850 }, { "epoch": 0.5832206013186599, "grad_norm": 2.627801669747651, "learning_rate": 2.1936497754971138e-06, "loss": 0.7194, "step": 47855 }, { "epoch": 0.5832815375428076, "grad_norm": 2.8458027628607656, "learning_rate": 2.1933290570878772e-06, "loss": 0.7875, "step": 47860 }, { "epoch": 0.5833424737669555, "grad_norm": 2.906410176518693, "learning_rate": 2.1930083386786402e-06, "loss": 0.6905, "step": 47865 }, { "epoch": 0.5834034099911033, "grad_norm": 2.59268688762319, "learning_rate": 2.1926876202694037e-06, "loss": 0.7945, "step": 47870 }, { "epoch": 0.5834643462152511, "grad_norm": 2.4626556883364565, "learning_rate": 2.192366901860167e-06, "loss": 0.7627, "step": 47875 }, { "epoch": 0.5835252824393989, "grad_norm": 2.974921908667282, "learning_rate": 2.19204618345093e-06, "loss": 0.7633, "step": 47880 }, { "epoch": 0.5835862186635468, "grad_norm": 2.8135300766082665, "learning_rate": 2.1917254650416936e-06, "loss": 0.7852, "step": 47885 }, { "epoch": 0.5836471548876946, "grad_norm": 2.6123863454239875, "learning_rate": 2.191404746632457e-06, "loss": 0.695, "step": 47890 }, { "epoch": 0.5837080911118423, "grad_norm": 3.0693569748722913, "learning_rate": 2.19108402822322e-06, "loss": 0.7767, "step": 47895 }, { "epoch": 0.5837690273359901, "grad_norm": 2.601589778314738, "learning_rate": 2.1907633098139835e-06, "loss": 0.6717, "step": 47900 }, { "epoch": 0.5838299635601379, "grad_norm": 2.51730797070996, "learning_rate": 2.1904425914047465e-06, "loss": 0.7213, "step": 47905 }, { "epoch": 0.5838908997842858, "grad_norm": 3.123175305893774, "learning_rate": 2.19012187299551e-06, "loss": 0.7184, "step": 47910 }, { "epoch": 0.5839518360084336, "grad_norm": 2.0891431455659313, "learning_rate": 2.1898011545862734e-06, "loss": 0.714, "step": 47915 }, { "epoch": 0.5840127722325814, "grad_norm": 2.1990365037508837, "learning_rate": 2.189480436177037e-06, "loss": 0.713, "step": 47920 }, { "epoch": 0.5840737084567292, "grad_norm": 3.047591812648603, "learning_rate": 2.1891597177678002e-06, "loss": 0.7938, "step": 47925 }, { "epoch": 0.584134644680877, "grad_norm": 2.2422997564277467, "learning_rate": 2.1888389993585637e-06, "loss": 0.7856, "step": 47930 }, { "epoch": 0.5841955809050248, "grad_norm": 2.210415032769175, "learning_rate": 2.1885182809493267e-06, "loss": 0.791, "step": 47935 }, { "epoch": 0.5842565171291726, "grad_norm": 2.3175722218328945, "learning_rate": 2.18819756254009e-06, "loss": 0.7103, "step": 47940 }, { "epoch": 0.5843174533533204, "grad_norm": 2.8319107944142665, "learning_rate": 2.187876844130853e-06, "loss": 0.7476, "step": 47945 }, { "epoch": 0.5843783895774682, "grad_norm": 2.7514842066020604, "learning_rate": 2.1875561257216166e-06, "loss": 0.7417, "step": 47950 }, { "epoch": 0.5844393258016161, "grad_norm": 2.773652894751704, "learning_rate": 2.18723540731238e-06, "loss": 0.7082, "step": 47955 }, { "epoch": 0.5845002620257639, "grad_norm": 2.3836808154788725, "learning_rate": 2.186914688903143e-06, "loss": 0.7315, "step": 47960 }, { "epoch": 0.5845611982499116, "grad_norm": 2.301936251679375, "learning_rate": 2.1865939704939065e-06, "loss": 0.7405, "step": 47965 }, { "epoch": 0.5846221344740594, "grad_norm": 2.331507784796332, "learning_rate": 2.18627325208467e-06, "loss": 0.6736, "step": 47970 }, { "epoch": 0.5846830706982072, "grad_norm": 2.0645064840328033, "learning_rate": 2.185952533675433e-06, "loss": 0.7033, "step": 47975 }, { "epoch": 0.5847440069223551, "grad_norm": 2.591493422318134, "learning_rate": 2.1856318152661964e-06, "loss": 0.804, "step": 47980 }, { "epoch": 0.5848049431465029, "grad_norm": 3.160013136876254, "learning_rate": 2.18531109685696e-06, "loss": 0.7746, "step": 47985 }, { "epoch": 0.5848658793706507, "grad_norm": 2.4527574566914674, "learning_rate": 2.184990378447723e-06, "loss": 0.7192, "step": 47990 }, { "epoch": 0.5849268155947985, "grad_norm": 2.0967753759791177, "learning_rate": 2.1846696600384863e-06, "loss": 0.7089, "step": 47995 }, { "epoch": 0.5849877518189462, "grad_norm": 3.1519602064477055, "learning_rate": 2.1843489416292497e-06, "loss": 0.7449, "step": 48000 }, { "epoch": 0.5850486880430941, "grad_norm": 2.6802522390350734, "learning_rate": 2.184028223220013e-06, "loss": 0.7556, "step": 48005 }, { "epoch": 0.5851096242672419, "grad_norm": 2.9282043685790047, "learning_rate": 2.1837075048107766e-06, "loss": 0.762, "step": 48010 }, { "epoch": 0.5851705604913897, "grad_norm": 2.8264739265133563, "learning_rate": 2.1833867864015396e-06, "loss": 0.745, "step": 48015 }, { "epoch": 0.5852314967155375, "grad_norm": 2.3464232978152624, "learning_rate": 2.183066067992303e-06, "loss": 0.7039, "step": 48020 }, { "epoch": 0.5852924329396854, "grad_norm": 2.5211959085964977, "learning_rate": 2.182745349583066e-06, "loss": 0.7236, "step": 48025 }, { "epoch": 0.5853533691638332, "grad_norm": 4.162522184263232, "learning_rate": 2.1824246311738295e-06, "loss": 0.7522, "step": 48030 }, { "epoch": 0.5854143053879809, "grad_norm": 2.206566203112737, "learning_rate": 2.182103912764593e-06, "loss": 0.8048, "step": 48035 }, { "epoch": 0.5854752416121287, "grad_norm": 2.4851513899327706, "learning_rate": 2.181783194355356e-06, "loss": 0.7939, "step": 48040 }, { "epoch": 0.5855361778362765, "grad_norm": 2.389833439475763, "learning_rate": 2.1814624759461194e-06, "loss": 0.7596, "step": 48045 }, { "epoch": 0.5855971140604244, "grad_norm": 2.8388405912585664, "learning_rate": 2.181141757536883e-06, "loss": 0.7156, "step": 48050 }, { "epoch": 0.5856580502845722, "grad_norm": 2.4248984073092537, "learning_rate": 2.180821039127646e-06, "loss": 0.7735, "step": 48055 }, { "epoch": 0.58571898650872, "grad_norm": 2.5001065151361805, "learning_rate": 2.1805003207184093e-06, "loss": 0.7591, "step": 48060 }, { "epoch": 0.5857799227328678, "grad_norm": 2.858408183899652, "learning_rate": 2.1801796023091728e-06, "loss": 0.7611, "step": 48065 }, { "epoch": 0.5858408589570155, "grad_norm": 2.142027034575866, "learning_rate": 2.179858883899936e-06, "loss": 0.7061, "step": 48070 }, { "epoch": 0.5859017951811634, "grad_norm": 1.9922729945813913, "learning_rate": 2.1795381654906992e-06, "loss": 0.7102, "step": 48075 }, { "epoch": 0.5859627314053112, "grad_norm": 2.443197758700943, "learning_rate": 2.1792174470814627e-06, "loss": 0.7786, "step": 48080 }, { "epoch": 0.586023667629459, "grad_norm": 2.656072581099504, "learning_rate": 2.178896728672226e-06, "loss": 0.7876, "step": 48085 }, { "epoch": 0.5860846038536068, "grad_norm": 2.4797707619744616, "learning_rate": 2.1785760102629895e-06, "loss": 0.7316, "step": 48090 }, { "epoch": 0.5861455400777547, "grad_norm": 2.758133457072549, "learning_rate": 2.1782552918537526e-06, "loss": 0.7205, "step": 48095 }, { "epoch": 0.5862064763019025, "grad_norm": 2.726454259682785, "learning_rate": 2.177934573444516e-06, "loss": 0.7107, "step": 48100 }, { "epoch": 0.5862674125260502, "grad_norm": 2.872476092270103, "learning_rate": 2.1776138550352794e-06, "loss": 0.7255, "step": 48105 }, { "epoch": 0.586328348750198, "grad_norm": 2.9076044067128564, "learning_rate": 2.1772931366260425e-06, "loss": 0.7013, "step": 48110 }, { "epoch": 0.5863892849743458, "grad_norm": 2.3982827128458633, "learning_rate": 2.176972418216806e-06, "loss": 0.726, "step": 48115 }, { "epoch": 0.5864502211984937, "grad_norm": 2.245372417766646, "learning_rate": 2.176651699807569e-06, "loss": 0.7672, "step": 48120 }, { "epoch": 0.5865111574226415, "grad_norm": 2.626127166887058, "learning_rate": 2.1763309813983323e-06, "loss": 0.6665, "step": 48125 }, { "epoch": 0.5865720936467893, "grad_norm": 2.3440276663993256, "learning_rate": 2.176010262989096e-06, "loss": 0.8256, "step": 48130 }, { "epoch": 0.5866330298709371, "grad_norm": 2.1551058186581415, "learning_rate": 2.175689544579859e-06, "loss": 0.6787, "step": 48135 }, { "epoch": 0.5866939660950848, "grad_norm": 2.4750694614596895, "learning_rate": 2.1753688261706222e-06, "loss": 0.744, "step": 48140 }, { "epoch": 0.5867549023192327, "grad_norm": 2.129682455552971, "learning_rate": 2.1750481077613857e-06, "loss": 0.7383, "step": 48145 }, { "epoch": 0.5868158385433805, "grad_norm": 3.070765557665802, "learning_rate": 2.174727389352149e-06, "loss": 0.785, "step": 48150 }, { "epoch": 0.5868767747675283, "grad_norm": 2.5105882338738024, "learning_rate": 2.1744066709429126e-06, "loss": 0.7578, "step": 48155 }, { "epoch": 0.5869377109916761, "grad_norm": 2.5835490375106085, "learning_rate": 2.1740859525336756e-06, "loss": 0.7373, "step": 48160 }, { "epoch": 0.586998647215824, "grad_norm": 2.6541985934616417, "learning_rate": 2.173765234124439e-06, "loss": 0.7276, "step": 48165 }, { "epoch": 0.5870595834399718, "grad_norm": 2.2114406629726315, "learning_rate": 2.1734445157152025e-06, "loss": 0.7334, "step": 48170 }, { "epoch": 0.5871205196641195, "grad_norm": 2.3599624407906, "learning_rate": 2.1731237973059655e-06, "loss": 0.7143, "step": 48175 }, { "epoch": 0.5871814558882673, "grad_norm": 2.279765434649746, "learning_rate": 2.172803078896729e-06, "loss": 0.6933, "step": 48180 }, { "epoch": 0.5872423921124151, "grad_norm": 2.5031639224044935, "learning_rate": 2.1724823604874924e-06, "loss": 0.7625, "step": 48185 }, { "epoch": 0.587303328336563, "grad_norm": 2.3318001502925525, "learning_rate": 2.1721616420782554e-06, "loss": 0.7491, "step": 48190 }, { "epoch": 0.5873642645607108, "grad_norm": 2.254391848110559, "learning_rate": 2.171840923669019e-06, "loss": 0.8193, "step": 48195 }, { "epoch": 0.5874252007848586, "grad_norm": 3.1580609455664033, "learning_rate": 2.171520205259782e-06, "loss": 0.8295, "step": 48200 }, { "epoch": 0.5874861370090064, "grad_norm": 4.100527301395342, "learning_rate": 2.1711994868505453e-06, "loss": 0.7421, "step": 48205 }, { "epoch": 0.5875470732331541, "grad_norm": 2.495722625132077, "learning_rate": 2.1708787684413087e-06, "loss": 0.7836, "step": 48210 }, { "epoch": 0.587608009457302, "grad_norm": 2.835484594428674, "learning_rate": 2.1705580500320717e-06, "loss": 0.7205, "step": 48215 }, { "epoch": 0.5876689456814498, "grad_norm": 2.436934185041748, "learning_rate": 2.170237331622835e-06, "loss": 0.6515, "step": 48220 }, { "epoch": 0.5877298819055976, "grad_norm": 3.07711648117295, "learning_rate": 2.1699166132135986e-06, "loss": 0.811, "step": 48225 }, { "epoch": 0.5877908181297454, "grad_norm": 2.5594276759748866, "learning_rate": 2.169595894804362e-06, "loss": 0.7279, "step": 48230 }, { "epoch": 0.5878517543538933, "grad_norm": 2.243063041463777, "learning_rate": 2.1692751763951255e-06, "loss": 0.7252, "step": 48235 }, { "epoch": 0.5879126905780411, "grad_norm": 3.1473569075498973, "learning_rate": 2.1689544579858885e-06, "loss": 0.8251, "step": 48240 }, { "epoch": 0.5879736268021888, "grad_norm": 2.545667984128086, "learning_rate": 2.168633739576652e-06, "loss": 0.7259, "step": 48245 }, { "epoch": 0.5880345630263366, "grad_norm": 2.754634793634978, "learning_rate": 2.1683130211674154e-06, "loss": 0.806, "step": 48250 }, { "epoch": 0.5880954992504844, "grad_norm": 2.3500592740625716, "learning_rate": 2.1679923027581784e-06, "loss": 0.744, "step": 48255 }, { "epoch": 0.5881564354746323, "grad_norm": 2.6868366756023394, "learning_rate": 2.167671584348942e-06, "loss": 0.6752, "step": 48260 }, { "epoch": 0.5882173716987801, "grad_norm": 2.358763951940603, "learning_rate": 2.1673508659397053e-06, "loss": 0.7199, "step": 48265 }, { "epoch": 0.5882783079229279, "grad_norm": 2.213520891761497, "learning_rate": 2.1670301475304683e-06, "loss": 0.6941, "step": 48270 }, { "epoch": 0.5883392441470757, "grad_norm": 2.720875851935995, "learning_rate": 2.1667094291212317e-06, "loss": 0.6764, "step": 48275 }, { "epoch": 0.5884001803712234, "grad_norm": 1.9093993468222061, "learning_rate": 2.1663887107119948e-06, "loss": 0.7805, "step": 48280 }, { "epoch": 0.5884611165953713, "grad_norm": 2.278118004719969, "learning_rate": 2.166067992302758e-06, "loss": 0.6852, "step": 48285 }, { "epoch": 0.5885220528195191, "grad_norm": 3.4914221050548875, "learning_rate": 2.1657472738935216e-06, "loss": 0.7321, "step": 48290 }, { "epoch": 0.5885829890436669, "grad_norm": 2.402514116055638, "learning_rate": 2.165426555484285e-06, "loss": 0.7179, "step": 48295 }, { "epoch": 0.5886439252678147, "grad_norm": 2.620117405555671, "learning_rate": 2.165105837075048e-06, "loss": 0.7535, "step": 48300 }, { "epoch": 0.5887048614919625, "grad_norm": 2.5857361876949074, "learning_rate": 2.1647851186658115e-06, "loss": 0.7887, "step": 48305 }, { "epoch": 0.5887657977161104, "grad_norm": 2.539335998651736, "learning_rate": 2.164464400256575e-06, "loss": 0.7212, "step": 48310 }, { "epoch": 0.5888267339402581, "grad_norm": 2.810720652926499, "learning_rate": 2.1641436818473384e-06, "loss": 0.7838, "step": 48315 }, { "epoch": 0.5888876701644059, "grad_norm": 2.681564756797468, "learning_rate": 2.1638229634381014e-06, "loss": 0.7587, "step": 48320 }, { "epoch": 0.5889486063885537, "grad_norm": 2.96238278956626, "learning_rate": 2.163502245028865e-06, "loss": 0.8186, "step": 48325 }, { "epoch": 0.5890095426127016, "grad_norm": 2.3012697710256957, "learning_rate": 2.1631815266196283e-06, "loss": 0.7218, "step": 48330 }, { "epoch": 0.5890704788368494, "grad_norm": 2.2649542007704446, "learning_rate": 2.1628608082103913e-06, "loss": 0.7758, "step": 48335 }, { "epoch": 0.5891314150609972, "grad_norm": 2.118324054048632, "learning_rate": 2.1625400898011548e-06, "loss": 0.6731, "step": 48340 }, { "epoch": 0.589192351285145, "grad_norm": 2.476938962055849, "learning_rate": 2.1622193713919182e-06, "loss": 0.7703, "step": 48345 }, { "epoch": 0.5892532875092927, "grad_norm": 4.506473378185059, "learning_rate": 2.1618986529826812e-06, "loss": 0.7073, "step": 48350 }, { "epoch": 0.5893142237334406, "grad_norm": 2.43263060448762, "learning_rate": 2.1615779345734447e-06, "loss": 0.7625, "step": 48355 }, { "epoch": 0.5893751599575884, "grad_norm": 2.4913086798618, "learning_rate": 2.161257216164208e-06, "loss": 0.7127, "step": 48360 }, { "epoch": 0.5894360961817362, "grad_norm": 1.8819335407248146, "learning_rate": 2.160936497754971e-06, "loss": 0.7001, "step": 48365 }, { "epoch": 0.589497032405884, "grad_norm": 2.5380869765755243, "learning_rate": 2.1606157793457346e-06, "loss": 0.7583, "step": 48370 }, { "epoch": 0.5895579686300318, "grad_norm": 2.5092272351943117, "learning_rate": 2.160295060936498e-06, "loss": 0.8354, "step": 48375 }, { "epoch": 0.5896189048541797, "grad_norm": 2.578329677782469, "learning_rate": 2.159974342527261e-06, "loss": 0.7424, "step": 48380 }, { "epoch": 0.5896798410783274, "grad_norm": 2.483151385497562, "learning_rate": 2.1596536241180245e-06, "loss": 0.7192, "step": 48385 }, { "epoch": 0.5897407773024752, "grad_norm": 2.708347586566899, "learning_rate": 2.159332905708788e-06, "loss": 0.7359, "step": 48390 }, { "epoch": 0.589801713526623, "grad_norm": 2.6690393489985005, "learning_rate": 2.1590121872995513e-06, "loss": 0.8128, "step": 48395 }, { "epoch": 0.5898626497507709, "grad_norm": 2.495460526986816, "learning_rate": 2.158691468890315e-06, "loss": 0.7729, "step": 48400 }, { "epoch": 0.5899235859749187, "grad_norm": 2.3462157346265053, "learning_rate": 2.158370750481078e-06, "loss": 0.7828, "step": 48405 }, { "epoch": 0.5899845221990665, "grad_norm": 3.574074692689008, "learning_rate": 2.1580500320718412e-06, "loss": 0.7325, "step": 48410 }, { "epoch": 0.5900454584232142, "grad_norm": 2.60648038513406, "learning_rate": 2.1577293136626043e-06, "loss": 0.7729, "step": 48415 }, { "epoch": 0.590106394647362, "grad_norm": 2.850003946633998, "learning_rate": 2.1574085952533677e-06, "loss": 0.7393, "step": 48420 }, { "epoch": 0.5901673308715099, "grad_norm": 2.622672450314256, "learning_rate": 2.157087876844131e-06, "loss": 0.673, "step": 48425 }, { "epoch": 0.5902282670956577, "grad_norm": 2.4043603411495686, "learning_rate": 2.156767158434894e-06, "loss": 0.7256, "step": 48430 }, { "epoch": 0.5902892033198055, "grad_norm": 2.7672692728318786, "learning_rate": 2.1564464400256576e-06, "loss": 0.692, "step": 48435 }, { "epoch": 0.5903501395439533, "grad_norm": 2.138289102562079, "learning_rate": 2.156125721616421e-06, "loss": 0.7022, "step": 48440 }, { "epoch": 0.5904110757681011, "grad_norm": 2.8894973327133275, "learning_rate": 2.155805003207184e-06, "loss": 0.7138, "step": 48445 }, { "epoch": 0.5904720119922489, "grad_norm": 2.8001149942575285, "learning_rate": 2.1554842847979475e-06, "loss": 0.7396, "step": 48450 }, { "epoch": 0.5905329482163967, "grad_norm": 2.8681063824841178, "learning_rate": 2.155163566388711e-06, "loss": 0.7251, "step": 48455 }, { "epoch": 0.5905938844405445, "grad_norm": 3.282007186563552, "learning_rate": 2.1548428479794744e-06, "loss": 0.7316, "step": 48460 }, { "epoch": 0.5906548206646923, "grad_norm": 2.8182807870777107, "learning_rate": 2.1545221295702374e-06, "loss": 0.8168, "step": 48465 }, { "epoch": 0.5907157568888401, "grad_norm": 2.417736590812501, "learning_rate": 2.154201411161001e-06, "loss": 0.6998, "step": 48470 }, { "epoch": 0.590776693112988, "grad_norm": 2.398459179047823, "learning_rate": 2.1538806927517643e-06, "loss": 0.7127, "step": 48475 }, { "epoch": 0.5908376293371358, "grad_norm": 2.1185033662854607, "learning_rate": 2.1535599743425277e-06, "loss": 0.7714, "step": 48480 }, { "epoch": 0.5908985655612835, "grad_norm": 2.890570712565115, "learning_rate": 2.1532392559332907e-06, "loss": 0.718, "step": 48485 }, { "epoch": 0.5909595017854313, "grad_norm": 2.4004592499177364, "learning_rate": 2.152918537524054e-06, "loss": 0.7209, "step": 48490 }, { "epoch": 0.5910204380095792, "grad_norm": 1.9996387243529277, "learning_rate": 2.152597819114817e-06, "loss": 0.7035, "step": 48495 }, { "epoch": 0.591081374233727, "grad_norm": 2.408456324466284, "learning_rate": 2.1522771007055806e-06, "loss": 0.736, "step": 48500 }, { "epoch": 0.5911423104578748, "grad_norm": 2.348934203633441, "learning_rate": 2.151956382296344e-06, "loss": 0.7432, "step": 48505 }, { "epoch": 0.5912032466820226, "grad_norm": 2.5003401015171174, "learning_rate": 2.151635663887107e-06, "loss": 0.7532, "step": 48510 }, { "epoch": 0.5912641829061704, "grad_norm": 2.259942348909588, "learning_rate": 2.1513149454778705e-06, "loss": 0.7966, "step": 48515 }, { "epoch": 0.5913251191303182, "grad_norm": 2.2652787559222425, "learning_rate": 2.150994227068634e-06, "loss": 0.7493, "step": 48520 }, { "epoch": 0.591386055354466, "grad_norm": 3.0225173139767834, "learning_rate": 2.150673508659397e-06, "loss": 0.7387, "step": 48525 }, { "epoch": 0.5914469915786138, "grad_norm": 2.6166044514805735, "learning_rate": 2.1503527902501604e-06, "loss": 0.7808, "step": 48530 }, { "epoch": 0.5915079278027616, "grad_norm": 2.619404112099113, "learning_rate": 2.150032071840924e-06, "loss": 0.7283, "step": 48535 }, { "epoch": 0.5915688640269094, "grad_norm": 2.1143510659680116, "learning_rate": 2.1497113534316873e-06, "loss": 0.7189, "step": 48540 }, { "epoch": 0.5916298002510573, "grad_norm": 2.8224650621178937, "learning_rate": 2.1493906350224507e-06, "loss": 0.7242, "step": 48545 }, { "epoch": 0.5916907364752051, "grad_norm": 2.4061257171380412, "learning_rate": 2.1490699166132138e-06, "loss": 0.7483, "step": 48550 }, { "epoch": 0.5917516726993528, "grad_norm": 2.6595032556955167, "learning_rate": 2.148749198203977e-06, "loss": 0.768, "step": 48555 }, { "epoch": 0.5918126089235006, "grad_norm": 2.3994852158872204, "learning_rate": 2.1484284797947406e-06, "loss": 0.7958, "step": 48560 }, { "epoch": 0.5918735451476484, "grad_norm": 2.529538047938026, "learning_rate": 2.1481077613855037e-06, "loss": 0.7413, "step": 48565 }, { "epoch": 0.5919344813717963, "grad_norm": 2.4153677442870785, "learning_rate": 2.147787042976267e-06, "loss": 0.729, "step": 48570 }, { "epoch": 0.5919954175959441, "grad_norm": 2.428931960653655, "learning_rate": 2.14746632456703e-06, "loss": 0.7146, "step": 48575 }, { "epoch": 0.5920563538200919, "grad_norm": 2.929905450381797, "learning_rate": 2.1471456061577936e-06, "loss": 0.744, "step": 48580 }, { "epoch": 0.5921172900442397, "grad_norm": 2.3670022661237056, "learning_rate": 2.146824887748557e-06, "loss": 0.7272, "step": 48585 }, { "epoch": 0.5921782262683875, "grad_norm": 2.6686040520531145, "learning_rate": 2.14650416933932e-06, "loss": 0.8314, "step": 48590 }, { "epoch": 0.5922391624925353, "grad_norm": 2.2054525991868754, "learning_rate": 2.1461834509300835e-06, "loss": 0.7293, "step": 48595 }, { "epoch": 0.5923000987166831, "grad_norm": 2.2478577861659823, "learning_rate": 2.145862732520847e-06, "loss": 0.7815, "step": 48600 }, { "epoch": 0.5923610349408309, "grad_norm": 2.2252501180294204, "learning_rate": 2.14554201411161e-06, "loss": 0.6718, "step": 48605 }, { "epoch": 0.5924219711649787, "grad_norm": 3.1420975468197345, "learning_rate": 2.1452212957023734e-06, "loss": 0.7747, "step": 48610 }, { "epoch": 0.5924829073891266, "grad_norm": 2.543794927729367, "learning_rate": 2.144900577293137e-06, "loss": 0.7236, "step": 48615 }, { "epoch": 0.5925438436132744, "grad_norm": 2.8146018777273087, "learning_rate": 2.1445798588839002e-06, "loss": 0.7579, "step": 48620 }, { "epoch": 0.5926047798374221, "grad_norm": 2.3446158610686245, "learning_rate": 2.1442591404746637e-06, "loss": 0.7343, "step": 48625 }, { "epoch": 0.5926657160615699, "grad_norm": 2.9078855208660723, "learning_rate": 2.1439384220654267e-06, "loss": 0.7681, "step": 48630 }, { "epoch": 0.5927266522857177, "grad_norm": 2.7039710287379815, "learning_rate": 2.14361770365619e-06, "loss": 0.7192, "step": 48635 }, { "epoch": 0.5927875885098656, "grad_norm": 3.458555361586207, "learning_rate": 2.1432969852469536e-06, "loss": 0.7266, "step": 48640 }, { "epoch": 0.5928485247340134, "grad_norm": 2.5469809338069367, "learning_rate": 2.1429762668377166e-06, "loss": 0.739, "step": 48645 }, { "epoch": 0.5929094609581612, "grad_norm": 2.321776205096442, "learning_rate": 2.14265554842848e-06, "loss": 0.7528, "step": 48650 }, { "epoch": 0.592970397182309, "grad_norm": 2.449317854634678, "learning_rate": 2.1423348300192435e-06, "loss": 0.7805, "step": 48655 }, { "epoch": 0.5930313334064568, "grad_norm": 2.308407196530186, "learning_rate": 2.1420141116100065e-06, "loss": 0.707, "step": 48660 }, { "epoch": 0.5930922696306046, "grad_norm": 2.254446736534813, "learning_rate": 2.14169339320077e-06, "loss": 0.7555, "step": 48665 }, { "epoch": 0.5931532058547524, "grad_norm": 2.611128342783004, "learning_rate": 2.141372674791533e-06, "loss": 0.6796, "step": 48670 }, { "epoch": 0.5932141420789002, "grad_norm": 2.5798329298732336, "learning_rate": 2.1410519563822964e-06, "loss": 0.7353, "step": 48675 }, { "epoch": 0.593275078303048, "grad_norm": 2.173669637090377, "learning_rate": 2.14073123797306e-06, "loss": 0.6976, "step": 48680 }, { "epoch": 0.5933360145271959, "grad_norm": 2.019860164307488, "learning_rate": 2.1404105195638233e-06, "loss": 0.781, "step": 48685 }, { "epoch": 0.5933969507513437, "grad_norm": 2.26903306770548, "learning_rate": 2.1400898011545863e-06, "loss": 0.7209, "step": 48690 }, { "epoch": 0.5934578869754914, "grad_norm": 2.2703018656877925, "learning_rate": 2.1397690827453497e-06, "loss": 0.6597, "step": 48695 }, { "epoch": 0.5935188231996392, "grad_norm": 3.282603018643278, "learning_rate": 2.139448364336113e-06, "loss": 0.6877, "step": 48700 }, { "epoch": 0.593579759423787, "grad_norm": 2.6740641735054407, "learning_rate": 2.1391276459268766e-06, "loss": 0.7431, "step": 48705 }, { "epoch": 0.5936406956479349, "grad_norm": 2.0660843624862992, "learning_rate": 2.1388069275176396e-06, "loss": 0.7365, "step": 48710 }, { "epoch": 0.5937016318720827, "grad_norm": 2.1951665589601213, "learning_rate": 2.138486209108403e-06, "loss": 0.7323, "step": 48715 }, { "epoch": 0.5937625680962305, "grad_norm": 2.38623944496286, "learning_rate": 2.1381654906991665e-06, "loss": 0.7652, "step": 48720 }, { "epoch": 0.5938235043203783, "grad_norm": 2.7489455386934423, "learning_rate": 2.1378447722899295e-06, "loss": 0.7519, "step": 48725 }, { "epoch": 0.593884440544526, "grad_norm": 2.2559257756381412, "learning_rate": 2.137524053880693e-06, "loss": 0.6663, "step": 48730 }, { "epoch": 0.5939453767686739, "grad_norm": 2.5730877425327283, "learning_rate": 2.1372033354714564e-06, "loss": 0.8018, "step": 48735 }, { "epoch": 0.5940063129928217, "grad_norm": 2.736708651869611, "learning_rate": 2.1368826170622194e-06, "loss": 0.76, "step": 48740 }, { "epoch": 0.5940672492169695, "grad_norm": 2.4081440725999834, "learning_rate": 2.136561898652983e-06, "loss": 0.7308, "step": 48745 }, { "epoch": 0.5941281854411173, "grad_norm": 2.1444705779400994, "learning_rate": 2.136241180243746e-06, "loss": 0.7603, "step": 48750 }, { "epoch": 0.5941891216652652, "grad_norm": 2.2872445546884115, "learning_rate": 2.1359204618345093e-06, "loss": 0.7519, "step": 48755 }, { "epoch": 0.594250057889413, "grad_norm": 2.3318567047308814, "learning_rate": 2.1355997434252727e-06, "loss": 0.7667, "step": 48760 }, { "epoch": 0.5943109941135607, "grad_norm": 2.617697967301081, "learning_rate": 2.135279025016036e-06, "loss": 0.6993, "step": 48765 }, { "epoch": 0.5943719303377085, "grad_norm": 3.7541650546720375, "learning_rate": 2.1349583066067996e-06, "loss": 0.7219, "step": 48770 }, { "epoch": 0.5944328665618563, "grad_norm": 2.4327850988964115, "learning_rate": 2.1346375881975626e-06, "loss": 0.7482, "step": 48775 }, { "epoch": 0.5944938027860042, "grad_norm": 2.4034856586846987, "learning_rate": 2.134316869788326e-06, "loss": 0.7396, "step": 48780 }, { "epoch": 0.594554739010152, "grad_norm": 2.4946154091118675, "learning_rate": 2.1339961513790895e-06, "loss": 0.7082, "step": 48785 }, { "epoch": 0.5946156752342998, "grad_norm": 2.571513293198276, "learning_rate": 2.1336754329698525e-06, "loss": 0.8271, "step": 48790 }, { "epoch": 0.5946766114584476, "grad_norm": 2.482975557174988, "learning_rate": 2.133354714560616e-06, "loss": 0.7086, "step": 48795 }, { "epoch": 0.5947375476825953, "grad_norm": 2.401829401632847, "learning_rate": 2.1330339961513794e-06, "loss": 0.7413, "step": 48800 }, { "epoch": 0.5947984839067432, "grad_norm": 2.3647805530270363, "learning_rate": 2.1327132777421424e-06, "loss": 0.7367, "step": 48805 }, { "epoch": 0.594859420130891, "grad_norm": 2.773970278616758, "learning_rate": 2.132392559332906e-06, "loss": 0.801, "step": 48810 }, { "epoch": 0.5949203563550388, "grad_norm": 2.8338705132171467, "learning_rate": 2.1320718409236693e-06, "loss": 0.7094, "step": 48815 }, { "epoch": 0.5949812925791866, "grad_norm": 2.805137729417263, "learning_rate": 2.1317511225144323e-06, "loss": 0.6921, "step": 48820 }, { "epoch": 0.5950422288033345, "grad_norm": 2.292209233612235, "learning_rate": 2.1314304041051958e-06, "loss": 0.7693, "step": 48825 }, { "epoch": 0.5951031650274823, "grad_norm": 2.912372579767053, "learning_rate": 2.131109685695959e-06, "loss": 0.7311, "step": 48830 }, { "epoch": 0.59516410125163, "grad_norm": 2.0052103981304223, "learning_rate": 2.1307889672867222e-06, "loss": 0.8004, "step": 48835 }, { "epoch": 0.5952250374757778, "grad_norm": 6.230648091275176, "learning_rate": 2.1304682488774857e-06, "loss": 0.7704, "step": 48840 }, { "epoch": 0.5952859736999256, "grad_norm": 3.2899633664722288, "learning_rate": 2.130147530468249e-06, "loss": 0.7654, "step": 48845 }, { "epoch": 0.5953469099240735, "grad_norm": 3.390741501742268, "learning_rate": 2.1298268120590126e-06, "loss": 0.8224, "step": 48850 }, { "epoch": 0.5954078461482213, "grad_norm": 2.396225219916592, "learning_rate": 2.1295060936497756e-06, "loss": 0.6367, "step": 48855 }, { "epoch": 0.5954687823723691, "grad_norm": 2.2788376079311212, "learning_rate": 2.129185375240539e-06, "loss": 0.774, "step": 48860 }, { "epoch": 0.5955297185965169, "grad_norm": 2.336352773571903, "learning_rate": 2.1288646568313025e-06, "loss": 0.8147, "step": 48865 }, { "epoch": 0.5955906548206646, "grad_norm": 2.240812560001531, "learning_rate": 2.1285439384220655e-06, "loss": 0.8119, "step": 48870 }, { "epoch": 0.5956515910448125, "grad_norm": 3.1653482379287894, "learning_rate": 2.128223220012829e-06, "loss": 0.7421, "step": 48875 }, { "epoch": 0.5957125272689603, "grad_norm": 2.417225186244904, "learning_rate": 2.1279025016035924e-06, "loss": 0.7699, "step": 48880 }, { "epoch": 0.5957734634931081, "grad_norm": 2.863884381885029, "learning_rate": 2.1275817831943554e-06, "loss": 0.723, "step": 48885 }, { "epoch": 0.5958343997172559, "grad_norm": 2.356891454584921, "learning_rate": 2.127261064785119e-06, "loss": 0.6316, "step": 48890 }, { "epoch": 0.5958953359414038, "grad_norm": 2.6630252314221106, "learning_rate": 2.1269403463758822e-06, "loss": 0.7669, "step": 48895 }, { "epoch": 0.5959562721655516, "grad_norm": 2.7365444042913, "learning_rate": 2.1266196279666453e-06, "loss": 0.7103, "step": 48900 }, { "epoch": 0.5960172083896993, "grad_norm": 2.689784771669996, "learning_rate": 2.1262989095574087e-06, "loss": 0.7904, "step": 48905 }, { "epoch": 0.5960781446138471, "grad_norm": 3.048911315659503, "learning_rate": 2.125978191148172e-06, "loss": 0.7517, "step": 48910 }, { "epoch": 0.5961390808379949, "grad_norm": 2.116314656688207, "learning_rate": 2.125657472738935e-06, "loss": 0.6876, "step": 48915 }, { "epoch": 0.5962000170621428, "grad_norm": 2.509035531578275, "learning_rate": 2.1253367543296986e-06, "loss": 0.686, "step": 48920 }, { "epoch": 0.5962609532862906, "grad_norm": 3.150586972657499, "learning_rate": 2.125016035920462e-06, "loss": 0.7945, "step": 48925 }, { "epoch": 0.5963218895104384, "grad_norm": 2.1909512162931706, "learning_rate": 2.1246953175112255e-06, "loss": 0.7329, "step": 48930 }, { "epoch": 0.5963828257345862, "grad_norm": 2.1318943074312915, "learning_rate": 2.124374599101989e-06, "loss": 0.7637, "step": 48935 }, { "epoch": 0.5964437619587339, "grad_norm": 2.246451751347708, "learning_rate": 2.124053880692752e-06, "loss": 0.7359, "step": 48940 }, { "epoch": 0.5965046981828818, "grad_norm": 2.148060733245152, "learning_rate": 2.1237331622835154e-06, "loss": 0.6868, "step": 48945 }, { "epoch": 0.5965656344070296, "grad_norm": 2.1130813089080367, "learning_rate": 2.123412443874279e-06, "loss": 0.7471, "step": 48950 }, { "epoch": 0.5966265706311774, "grad_norm": 2.4890692567953354, "learning_rate": 2.123091725465042e-06, "loss": 0.7707, "step": 48955 }, { "epoch": 0.5966875068553252, "grad_norm": 2.471149328749535, "learning_rate": 2.1227710070558053e-06, "loss": 0.7195, "step": 48960 }, { "epoch": 0.5967484430794731, "grad_norm": 2.836358289716425, "learning_rate": 2.1224502886465683e-06, "loss": 0.7861, "step": 48965 }, { "epoch": 0.5968093793036209, "grad_norm": 2.5026908544845083, "learning_rate": 2.1221295702373317e-06, "loss": 0.6689, "step": 48970 }, { "epoch": 0.5968703155277686, "grad_norm": 2.8953194086991076, "learning_rate": 2.121808851828095e-06, "loss": 0.7444, "step": 48975 }, { "epoch": 0.5969312517519164, "grad_norm": 2.799611591235785, "learning_rate": 2.121488133418858e-06, "loss": 0.782, "step": 48980 }, { "epoch": 0.5969921879760642, "grad_norm": 2.627278373862524, "learning_rate": 2.1211674150096216e-06, "loss": 0.8187, "step": 48985 }, { "epoch": 0.5970531242002121, "grad_norm": 2.5116904718692794, "learning_rate": 2.120846696600385e-06, "loss": 0.7487, "step": 48990 }, { "epoch": 0.5971140604243599, "grad_norm": 2.5735085530658206, "learning_rate": 2.1205259781911485e-06, "loss": 0.6941, "step": 48995 }, { "epoch": 0.5971749966485077, "grad_norm": 2.1565783746133906, "learning_rate": 2.1202052597819115e-06, "loss": 0.7504, "step": 49000 }, { "epoch": 0.5972359328726555, "grad_norm": 2.0270417532507885, "learning_rate": 2.119884541372675e-06, "loss": 0.6891, "step": 49005 }, { "epoch": 0.5972968690968032, "grad_norm": 2.2417016952566353, "learning_rate": 2.1195638229634384e-06, "loss": 0.7369, "step": 49010 }, { "epoch": 0.5973578053209511, "grad_norm": 3.0195000106723513, "learning_rate": 2.119243104554202e-06, "loss": 0.7287, "step": 49015 }, { "epoch": 0.5974187415450989, "grad_norm": 2.5246909989136253, "learning_rate": 2.118922386144965e-06, "loss": 0.7406, "step": 49020 }, { "epoch": 0.5974796777692467, "grad_norm": 2.851968726218873, "learning_rate": 2.1186016677357283e-06, "loss": 0.7909, "step": 49025 }, { "epoch": 0.5975406139933945, "grad_norm": 2.7466675743398636, "learning_rate": 2.1182809493264917e-06, "loss": 0.749, "step": 49030 }, { "epoch": 0.5976015502175424, "grad_norm": 2.186500435062649, "learning_rate": 2.1179602309172548e-06, "loss": 0.7359, "step": 49035 }, { "epoch": 0.5976624864416902, "grad_norm": 2.553119416439643, "learning_rate": 2.117639512508018e-06, "loss": 0.8023, "step": 49040 }, { "epoch": 0.5977234226658379, "grad_norm": 3.104368117699524, "learning_rate": 2.1173187940987812e-06, "loss": 0.7909, "step": 49045 }, { "epoch": 0.5977843588899857, "grad_norm": 2.548934466331667, "learning_rate": 2.1169980756895447e-06, "loss": 0.7303, "step": 49050 }, { "epoch": 0.5978452951141335, "grad_norm": 2.4031420332517794, "learning_rate": 2.116677357280308e-06, "loss": 0.7148, "step": 49055 }, { "epoch": 0.5979062313382814, "grad_norm": 2.7802340892298067, "learning_rate": 2.116356638871071e-06, "loss": 0.7535, "step": 49060 }, { "epoch": 0.5979671675624292, "grad_norm": 2.2485548739322274, "learning_rate": 2.1160359204618346e-06, "loss": 0.7004, "step": 49065 }, { "epoch": 0.598028103786577, "grad_norm": 2.2193174992226, "learning_rate": 2.115715202052598e-06, "loss": 0.788, "step": 49070 }, { "epoch": 0.5980890400107248, "grad_norm": 2.5069744437380828, "learning_rate": 2.1153944836433614e-06, "loss": 0.7814, "step": 49075 }, { "epoch": 0.5981499762348725, "grad_norm": 2.7854965632302036, "learning_rate": 2.1150737652341245e-06, "loss": 0.7165, "step": 49080 }, { "epoch": 0.5982109124590204, "grad_norm": 2.35255314171265, "learning_rate": 2.114753046824888e-06, "loss": 0.7412, "step": 49085 }, { "epoch": 0.5982718486831682, "grad_norm": 2.596459705880195, "learning_rate": 2.1144323284156513e-06, "loss": 0.8329, "step": 49090 }, { "epoch": 0.598332784907316, "grad_norm": 2.1081485025358506, "learning_rate": 2.1141116100064148e-06, "loss": 0.7633, "step": 49095 }, { "epoch": 0.5983937211314638, "grad_norm": 2.427986884638406, "learning_rate": 2.113790891597178e-06, "loss": 0.7593, "step": 49100 }, { "epoch": 0.5984546573556117, "grad_norm": 2.3140795419529785, "learning_rate": 2.1134701731879412e-06, "loss": 0.7705, "step": 49105 }, { "epoch": 0.5985155935797595, "grad_norm": 3.0504215825566994, "learning_rate": 2.1131494547787047e-06, "loss": 0.7195, "step": 49110 }, { "epoch": 0.5985765298039072, "grad_norm": 2.246633686915132, "learning_rate": 2.1128287363694677e-06, "loss": 0.712, "step": 49115 }, { "epoch": 0.598637466028055, "grad_norm": 2.828264704152257, "learning_rate": 2.112508017960231e-06, "loss": 0.7769, "step": 49120 }, { "epoch": 0.5986984022522028, "grad_norm": 2.6550356872744416, "learning_rate": 2.112187299550994e-06, "loss": 0.7083, "step": 49125 }, { "epoch": 0.5987593384763507, "grad_norm": 2.4431116741209573, "learning_rate": 2.1118665811417576e-06, "loss": 0.7825, "step": 49130 }, { "epoch": 0.5988202747004985, "grad_norm": 1.7783116125762453, "learning_rate": 2.111545862732521e-06, "loss": 0.6994, "step": 49135 }, { "epoch": 0.5988812109246463, "grad_norm": 3.229661516217637, "learning_rate": 2.111225144323284e-06, "loss": 0.8098, "step": 49140 }, { "epoch": 0.5989421471487941, "grad_norm": 2.787637616621824, "learning_rate": 2.1109044259140475e-06, "loss": 0.8084, "step": 49145 }, { "epoch": 0.5990030833729418, "grad_norm": 2.224786704537751, "learning_rate": 2.110583707504811e-06, "loss": 0.7874, "step": 49150 }, { "epoch": 0.5990640195970897, "grad_norm": 2.7723947302897627, "learning_rate": 2.1102629890955744e-06, "loss": 0.827, "step": 49155 }, { "epoch": 0.5991249558212375, "grad_norm": 2.3068910877077435, "learning_rate": 2.109942270686338e-06, "loss": 0.8618, "step": 49160 }, { "epoch": 0.5991858920453853, "grad_norm": 2.4289990449886134, "learning_rate": 2.109621552277101e-06, "loss": 0.6729, "step": 49165 }, { "epoch": 0.5992468282695331, "grad_norm": 2.2592293331645146, "learning_rate": 2.1093008338678643e-06, "loss": 0.7724, "step": 49170 }, { "epoch": 0.599307764493681, "grad_norm": 2.267724004925859, "learning_rate": 2.1089801154586277e-06, "loss": 0.7547, "step": 49175 }, { "epoch": 0.5993687007178288, "grad_norm": 2.982051382875182, "learning_rate": 2.1086593970493907e-06, "loss": 0.7824, "step": 49180 }, { "epoch": 0.5994296369419765, "grad_norm": 2.4915920035879373, "learning_rate": 2.108338678640154e-06, "loss": 0.6894, "step": 49185 }, { "epoch": 0.5994905731661243, "grad_norm": 2.5283910381247803, "learning_rate": 2.1080179602309176e-06, "loss": 0.7325, "step": 49190 }, { "epoch": 0.5995515093902721, "grad_norm": 2.3066575421616373, "learning_rate": 2.1076972418216806e-06, "loss": 0.7286, "step": 49195 }, { "epoch": 0.59961244561442, "grad_norm": 2.4619657789902334, "learning_rate": 2.107376523412444e-06, "loss": 0.7539, "step": 49200 }, { "epoch": 0.5996733818385678, "grad_norm": 2.3477339008007707, "learning_rate": 2.107055805003207e-06, "loss": 0.7321, "step": 49205 }, { "epoch": 0.5997343180627156, "grad_norm": 2.3639460651763233, "learning_rate": 2.1067350865939705e-06, "loss": 0.7016, "step": 49210 }, { "epoch": 0.5997952542868634, "grad_norm": 3.755527506825203, "learning_rate": 2.106414368184734e-06, "loss": 0.7757, "step": 49215 }, { "epoch": 0.5998561905110111, "grad_norm": 2.4377286255372064, "learning_rate": 2.106093649775497e-06, "loss": 0.7084, "step": 49220 }, { "epoch": 0.599917126735159, "grad_norm": 2.845485214347998, "learning_rate": 2.1057729313662604e-06, "loss": 0.7174, "step": 49225 }, { "epoch": 0.5999780629593068, "grad_norm": 2.767183758227817, "learning_rate": 2.105452212957024e-06, "loss": 0.7256, "step": 49230 }, { "epoch": 0.6000389991834546, "grad_norm": 2.8361632952899347, "learning_rate": 2.1051314945477873e-06, "loss": 0.6635, "step": 49235 }, { "epoch": 0.6000999354076024, "grad_norm": 2.5341247294224263, "learning_rate": 2.1048107761385507e-06, "loss": 0.6634, "step": 49240 }, { "epoch": 0.6001608716317502, "grad_norm": 3.4191873506202826, "learning_rate": 2.104490057729314e-06, "loss": 0.6796, "step": 49245 }, { "epoch": 0.6002218078558981, "grad_norm": 2.8649870451381636, "learning_rate": 2.104169339320077e-06, "loss": 0.7661, "step": 49250 }, { "epoch": 0.6002827440800458, "grad_norm": 2.263314704745958, "learning_rate": 2.1038486209108406e-06, "loss": 0.6613, "step": 49255 }, { "epoch": 0.6003436803041936, "grad_norm": 2.3949970285571087, "learning_rate": 2.1035279025016036e-06, "loss": 0.762, "step": 49260 }, { "epoch": 0.6004046165283414, "grad_norm": 2.5966243334590824, "learning_rate": 2.103207184092367e-06, "loss": 0.78, "step": 49265 }, { "epoch": 0.6004655527524893, "grad_norm": 2.123933313353473, "learning_rate": 2.1028864656831305e-06, "loss": 0.7157, "step": 49270 }, { "epoch": 0.6005264889766371, "grad_norm": 2.1788189033752827, "learning_rate": 2.1025657472738935e-06, "loss": 0.6709, "step": 49275 }, { "epoch": 0.6005874252007849, "grad_norm": 2.586538377084856, "learning_rate": 2.102245028864657e-06, "loss": 0.7093, "step": 49280 }, { "epoch": 0.6006483614249327, "grad_norm": 2.3432004767024086, "learning_rate": 2.1019243104554204e-06, "loss": 0.7418, "step": 49285 }, { "epoch": 0.6007092976490804, "grad_norm": 2.2069284785510614, "learning_rate": 2.1016035920461834e-06, "loss": 0.7589, "step": 49290 }, { "epoch": 0.6007702338732283, "grad_norm": 3.7251277161645495, "learning_rate": 2.101282873636947e-06, "loss": 0.7794, "step": 49295 }, { "epoch": 0.6008311700973761, "grad_norm": 2.376637727396473, "learning_rate": 2.1009621552277103e-06, "loss": 0.7616, "step": 49300 }, { "epoch": 0.6008921063215239, "grad_norm": 3.0356949726371747, "learning_rate": 2.1006414368184733e-06, "loss": 0.8219, "step": 49305 }, { "epoch": 0.6009530425456717, "grad_norm": 2.1507503075939263, "learning_rate": 2.1003207184092368e-06, "loss": 0.742, "step": 49310 }, { "epoch": 0.6010139787698195, "grad_norm": 3.2718632307952547, "learning_rate": 2.1000000000000002e-06, "loss": 0.6952, "step": 49315 }, { "epoch": 0.6010749149939674, "grad_norm": 2.4153407756590086, "learning_rate": 2.0996792815907637e-06, "loss": 0.8197, "step": 49320 }, { "epoch": 0.6011358512181151, "grad_norm": 2.4616168422748532, "learning_rate": 2.099358563181527e-06, "loss": 0.7737, "step": 49325 }, { "epoch": 0.6011967874422629, "grad_norm": 3.21947793382948, "learning_rate": 2.09903784477229e-06, "loss": 0.7826, "step": 49330 }, { "epoch": 0.6012577236664107, "grad_norm": 2.0415169396905863, "learning_rate": 2.0987171263630536e-06, "loss": 0.7381, "step": 49335 }, { "epoch": 0.6013186598905585, "grad_norm": 2.69481517208086, "learning_rate": 2.0983964079538166e-06, "loss": 0.7749, "step": 49340 }, { "epoch": 0.6013795961147064, "grad_norm": 2.263783442297929, "learning_rate": 2.09807568954458e-06, "loss": 0.7345, "step": 49345 }, { "epoch": 0.6014405323388542, "grad_norm": 3.5251390472873796, "learning_rate": 2.0977549711353435e-06, "loss": 0.7061, "step": 49350 }, { "epoch": 0.601501468563002, "grad_norm": 2.905651970352169, "learning_rate": 2.0974342527261065e-06, "loss": 0.7618, "step": 49355 }, { "epoch": 0.6015624047871497, "grad_norm": 3.3161185716373356, "learning_rate": 2.09711353431687e-06, "loss": 0.7642, "step": 49360 }, { "epoch": 0.6016233410112976, "grad_norm": 2.2579137398744775, "learning_rate": 2.0967928159076334e-06, "loss": 0.8213, "step": 49365 }, { "epoch": 0.6016842772354454, "grad_norm": 2.7144912960116323, "learning_rate": 2.0964720974983964e-06, "loss": 0.8038, "step": 49370 }, { "epoch": 0.6017452134595932, "grad_norm": 2.534307036286381, "learning_rate": 2.09615137908916e-06, "loss": 0.75, "step": 49375 }, { "epoch": 0.601806149683741, "grad_norm": 2.7306899581927304, "learning_rate": 2.0958306606799233e-06, "loss": 0.7479, "step": 49380 }, { "epoch": 0.6018670859078888, "grad_norm": 2.2747600739572356, "learning_rate": 2.0955099422706867e-06, "loss": 0.7315, "step": 49385 }, { "epoch": 0.6019280221320366, "grad_norm": 2.3724751374254347, "learning_rate": 2.0951892238614497e-06, "loss": 0.7037, "step": 49390 }, { "epoch": 0.6019889583561844, "grad_norm": 2.611478747114881, "learning_rate": 2.094868505452213e-06, "loss": 0.7162, "step": 49395 }, { "epoch": 0.6020498945803322, "grad_norm": 2.192631712547368, "learning_rate": 2.0945477870429766e-06, "loss": 0.7568, "step": 49400 }, { "epoch": 0.60211083080448, "grad_norm": 2.4440184429578244, "learning_rate": 2.09422706863374e-06, "loss": 0.7582, "step": 49405 }, { "epoch": 0.6021717670286278, "grad_norm": 3.314908284990602, "learning_rate": 2.093906350224503e-06, "loss": 0.7124, "step": 49410 }, { "epoch": 0.6022327032527757, "grad_norm": 2.819300877104781, "learning_rate": 2.0935856318152665e-06, "loss": 0.7393, "step": 49415 }, { "epoch": 0.6022936394769235, "grad_norm": 2.222904843363775, "learning_rate": 2.0932649134060295e-06, "loss": 0.7241, "step": 49420 }, { "epoch": 0.6023545757010712, "grad_norm": 3.4880440364927368, "learning_rate": 2.092944194996793e-06, "loss": 0.6821, "step": 49425 }, { "epoch": 0.602415511925219, "grad_norm": 2.725236469307113, "learning_rate": 2.0926234765875564e-06, "loss": 0.7228, "step": 49430 }, { "epoch": 0.6024764481493668, "grad_norm": 2.5647953050370287, "learning_rate": 2.0923027581783194e-06, "loss": 0.7247, "step": 49435 }, { "epoch": 0.6025373843735147, "grad_norm": 2.5299228400713196, "learning_rate": 2.091982039769083e-06, "loss": 0.8511, "step": 49440 }, { "epoch": 0.6025983205976625, "grad_norm": 2.5543334242479303, "learning_rate": 2.0916613213598463e-06, "loss": 0.7524, "step": 49445 }, { "epoch": 0.6026592568218103, "grad_norm": 2.6409949115210707, "learning_rate": 2.0913406029506093e-06, "loss": 0.7345, "step": 49450 }, { "epoch": 0.6027201930459581, "grad_norm": 2.1856460252097327, "learning_rate": 2.0910198845413727e-06, "loss": 0.7886, "step": 49455 }, { "epoch": 0.6027811292701059, "grad_norm": 2.1909761862197463, "learning_rate": 2.090699166132136e-06, "loss": 0.7015, "step": 49460 }, { "epoch": 0.6028420654942537, "grad_norm": 2.2151090921026957, "learning_rate": 2.0903784477228996e-06, "loss": 0.7038, "step": 49465 }, { "epoch": 0.6029030017184015, "grad_norm": 2.496228919642563, "learning_rate": 2.090057729313663e-06, "loss": 0.6892, "step": 49470 }, { "epoch": 0.6029639379425493, "grad_norm": 4.087196719622297, "learning_rate": 2.089737010904426e-06, "loss": 0.7022, "step": 49475 }, { "epoch": 0.6030248741666971, "grad_norm": 2.6432672909917465, "learning_rate": 2.0894162924951895e-06, "loss": 0.7708, "step": 49480 }, { "epoch": 0.603085810390845, "grad_norm": 3.0759831217917206, "learning_rate": 2.089095574085953e-06, "loss": 0.7357, "step": 49485 }, { "epoch": 0.6031467466149928, "grad_norm": 2.8215847568501133, "learning_rate": 2.088774855676716e-06, "loss": 0.6968, "step": 49490 }, { "epoch": 0.6032076828391405, "grad_norm": 2.377603947050229, "learning_rate": 2.0884541372674794e-06, "loss": 0.8326, "step": 49495 }, { "epoch": 0.6032686190632883, "grad_norm": 1.9634664820691585, "learning_rate": 2.0881334188582424e-06, "loss": 0.77, "step": 49500 }, { "epoch": 0.6033295552874361, "grad_norm": 2.3488168966867904, "learning_rate": 2.087812700449006e-06, "loss": 0.765, "step": 49505 }, { "epoch": 0.603390491511584, "grad_norm": 2.4835120275380036, "learning_rate": 2.0874919820397693e-06, "loss": 0.7934, "step": 49510 }, { "epoch": 0.6034514277357318, "grad_norm": 2.2786942012252647, "learning_rate": 2.0871712636305323e-06, "loss": 0.6815, "step": 49515 }, { "epoch": 0.6035123639598796, "grad_norm": 2.3019051921000475, "learning_rate": 2.0868505452212958e-06, "loss": 0.7441, "step": 49520 }, { "epoch": 0.6035733001840274, "grad_norm": 2.9033457984708546, "learning_rate": 2.086529826812059e-06, "loss": 0.7852, "step": 49525 }, { "epoch": 0.6036342364081752, "grad_norm": 2.649849070499382, "learning_rate": 2.0862091084028222e-06, "loss": 0.711, "step": 49530 }, { "epoch": 0.603695172632323, "grad_norm": 2.3426975614999592, "learning_rate": 2.0858883899935857e-06, "loss": 0.6623, "step": 49535 }, { "epoch": 0.6037561088564708, "grad_norm": 3.0491349898949296, "learning_rate": 2.085567671584349e-06, "loss": 0.6765, "step": 49540 }, { "epoch": 0.6038170450806186, "grad_norm": 2.4022084710735463, "learning_rate": 2.0852469531751125e-06, "loss": 0.7449, "step": 49545 }, { "epoch": 0.6038779813047664, "grad_norm": 2.557928837344079, "learning_rate": 2.084926234765876e-06, "loss": 0.763, "step": 49550 }, { "epoch": 0.6039389175289143, "grad_norm": 2.1294067748225944, "learning_rate": 2.084605516356639e-06, "loss": 0.7259, "step": 49555 }, { "epoch": 0.6039998537530621, "grad_norm": 2.5622450843121203, "learning_rate": 2.0842847979474024e-06, "loss": 0.7477, "step": 49560 }, { "epoch": 0.6040607899772098, "grad_norm": 3.0309295858335887, "learning_rate": 2.083964079538166e-06, "loss": 0.6825, "step": 49565 }, { "epoch": 0.6041217262013576, "grad_norm": 2.092890386735421, "learning_rate": 2.083643361128929e-06, "loss": 0.7145, "step": 49570 }, { "epoch": 0.6041826624255054, "grad_norm": 2.35982562143562, "learning_rate": 2.0833226427196923e-06, "loss": 0.6909, "step": 49575 }, { "epoch": 0.6042435986496533, "grad_norm": 2.1280626483159626, "learning_rate": 2.0830019243104558e-06, "loss": 0.729, "step": 49580 }, { "epoch": 0.6043045348738011, "grad_norm": 2.6552899010451947, "learning_rate": 2.082681205901219e-06, "loss": 0.7476, "step": 49585 }, { "epoch": 0.6043654710979489, "grad_norm": 2.8254662403535806, "learning_rate": 2.0823604874919822e-06, "loss": 0.7882, "step": 49590 }, { "epoch": 0.6044264073220967, "grad_norm": 2.189352296098607, "learning_rate": 2.0820397690827453e-06, "loss": 0.7591, "step": 49595 }, { "epoch": 0.6044873435462444, "grad_norm": 2.339290451744305, "learning_rate": 2.0817190506735087e-06, "loss": 0.7461, "step": 49600 }, { "epoch": 0.6045482797703923, "grad_norm": 4.835298539665459, "learning_rate": 2.081398332264272e-06, "loss": 0.8236, "step": 49605 }, { "epoch": 0.6046092159945401, "grad_norm": 2.1718033420082197, "learning_rate": 2.0810776138550356e-06, "loss": 0.7103, "step": 49610 }, { "epoch": 0.6046701522186879, "grad_norm": 2.50824263080374, "learning_rate": 2.0807568954457986e-06, "loss": 0.787, "step": 49615 }, { "epoch": 0.6047310884428357, "grad_norm": 2.2753251289298766, "learning_rate": 2.080436177036562e-06, "loss": 0.7316, "step": 49620 }, { "epoch": 0.6047920246669836, "grad_norm": 2.246269624994117, "learning_rate": 2.0801154586273255e-06, "loss": 0.6859, "step": 49625 }, { "epoch": 0.6048529608911314, "grad_norm": 2.1454715388609147, "learning_rate": 2.079794740218089e-06, "loss": 0.7461, "step": 49630 }, { "epoch": 0.6049138971152791, "grad_norm": 2.7731825462792625, "learning_rate": 2.079474021808852e-06, "loss": 0.6907, "step": 49635 }, { "epoch": 0.6049748333394269, "grad_norm": 2.381305317788658, "learning_rate": 2.0791533033996154e-06, "loss": 0.6872, "step": 49640 }, { "epoch": 0.6050357695635747, "grad_norm": 2.819727789029104, "learning_rate": 2.078832584990379e-06, "loss": 0.718, "step": 49645 }, { "epoch": 0.6050967057877226, "grad_norm": 2.37132904748596, "learning_rate": 2.078511866581142e-06, "loss": 0.6851, "step": 49650 }, { "epoch": 0.6051576420118704, "grad_norm": 2.599524096565044, "learning_rate": 2.0781911481719053e-06, "loss": 0.7857, "step": 49655 }, { "epoch": 0.6052185782360182, "grad_norm": 2.3051161416952257, "learning_rate": 2.0778704297626687e-06, "loss": 0.6857, "step": 49660 }, { "epoch": 0.605279514460166, "grad_norm": 2.3695552438464933, "learning_rate": 2.0775497113534317e-06, "loss": 0.7135, "step": 49665 }, { "epoch": 0.6053404506843137, "grad_norm": 2.630001266859794, "learning_rate": 2.077228992944195e-06, "loss": 0.766, "step": 49670 }, { "epoch": 0.6054013869084616, "grad_norm": 2.195841327893907, "learning_rate": 2.076908274534958e-06, "loss": 0.7235, "step": 49675 }, { "epoch": 0.6054623231326094, "grad_norm": 2.6347361919976824, "learning_rate": 2.0765875561257216e-06, "loss": 0.6896, "step": 49680 }, { "epoch": 0.6055232593567572, "grad_norm": 2.88092819106357, "learning_rate": 2.076266837716485e-06, "loss": 0.7021, "step": 49685 }, { "epoch": 0.605584195580905, "grad_norm": 2.3492578033470353, "learning_rate": 2.0759461193072485e-06, "loss": 0.744, "step": 49690 }, { "epoch": 0.6056451318050529, "grad_norm": 2.46056274719351, "learning_rate": 2.075625400898012e-06, "loss": 0.8089, "step": 49695 }, { "epoch": 0.6057060680292007, "grad_norm": 2.1959631932439687, "learning_rate": 2.075304682488775e-06, "loss": 0.7409, "step": 49700 }, { "epoch": 0.6057670042533484, "grad_norm": 3.185014290662002, "learning_rate": 2.0749839640795384e-06, "loss": 0.7268, "step": 49705 }, { "epoch": 0.6058279404774962, "grad_norm": 3.8874395483257653, "learning_rate": 2.074663245670302e-06, "loss": 0.7741, "step": 49710 }, { "epoch": 0.605888876701644, "grad_norm": 2.6699605204611503, "learning_rate": 2.074342527261065e-06, "loss": 0.6564, "step": 49715 }, { "epoch": 0.6059498129257919, "grad_norm": 2.7745774723887076, "learning_rate": 2.0740218088518283e-06, "loss": 0.8086, "step": 49720 }, { "epoch": 0.6060107491499397, "grad_norm": 2.1498387479980687, "learning_rate": 2.0737010904425917e-06, "loss": 0.6724, "step": 49725 }, { "epoch": 0.6060716853740875, "grad_norm": 3.8764752340301185, "learning_rate": 2.0733803720333548e-06, "loss": 0.7659, "step": 49730 }, { "epoch": 0.6061326215982353, "grad_norm": 2.714480621200297, "learning_rate": 2.073059653624118e-06, "loss": 0.7639, "step": 49735 }, { "epoch": 0.606193557822383, "grad_norm": 1.963011852049426, "learning_rate": 2.0727389352148816e-06, "loss": 0.706, "step": 49740 }, { "epoch": 0.6062544940465309, "grad_norm": 2.2633773588708883, "learning_rate": 2.0724182168056447e-06, "loss": 0.7376, "step": 49745 }, { "epoch": 0.6063154302706787, "grad_norm": 2.160615018614884, "learning_rate": 2.072097498396408e-06, "loss": 0.7584, "step": 49750 }, { "epoch": 0.6063763664948265, "grad_norm": 2.6866814573908506, "learning_rate": 2.071776779987171e-06, "loss": 0.7943, "step": 49755 }, { "epoch": 0.6064373027189743, "grad_norm": 2.926477757233805, "learning_rate": 2.0714560615779346e-06, "loss": 0.7722, "step": 49760 }, { "epoch": 0.6064982389431222, "grad_norm": 2.4346320914426314, "learning_rate": 2.071135343168698e-06, "loss": 0.6603, "step": 49765 }, { "epoch": 0.60655917516727, "grad_norm": 2.4682741210843533, "learning_rate": 2.0708146247594614e-06, "loss": 0.7185, "step": 49770 }, { "epoch": 0.6066201113914177, "grad_norm": 1.927795453279316, "learning_rate": 2.070493906350225e-06, "loss": 0.7435, "step": 49775 }, { "epoch": 0.6066810476155655, "grad_norm": 2.8297375282389465, "learning_rate": 2.070173187940988e-06, "loss": 0.7966, "step": 49780 }, { "epoch": 0.6067419838397133, "grad_norm": 2.239838760711989, "learning_rate": 2.0698524695317513e-06, "loss": 0.7579, "step": 49785 }, { "epoch": 0.6068029200638612, "grad_norm": 3.1114614434569035, "learning_rate": 2.0695317511225148e-06, "loss": 0.709, "step": 49790 }, { "epoch": 0.606863856288009, "grad_norm": 2.115906825503605, "learning_rate": 2.0692110327132778e-06, "loss": 0.7151, "step": 49795 }, { "epoch": 0.6069247925121568, "grad_norm": 2.539156722404496, "learning_rate": 2.0688903143040412e-06, "loss": 0.7296, "step": 49800 }, { "epoch": 0.6069857287363046, "grad_norm": 2.413454614351603, "learning_rate": 2.0685695958948047e-06, "loss": 0.7526, "step": 49805 }, { "epoch": 0.6070466649604523, "grad_norm": 2.6855231915240094, "learning_rate": 2.0682488774855677e-06, "loss": 0.7161, "step": 49810 }, { "epoch": 0.6071076011846002, "grad_norm": 2.8484488387438267, "learning_rate": 2.067928159076331e-06, "loss": 0.7113, "step": 49815 }, { "epoch": 0.607168537408748, "grad_norm": 2.701134470203095, "learning_rate": 2.0676074406670946e-06, "loss": 0.6708, "step": 49820 }, { "epoch": 0.6072294736328958, "grad_norm": 2.6577583281538577, "learning_rate": 2.0672867222578576e-06, "loss": 0.7838, "step": 49825 }, { "epoch": 0.6072904098570436, "grad_norm": 2.477120145339616, "learning_rate": 2.066966003848621e-06, "loss": 0.7323, "step": 49830 }, { "epoch": 0.6073513460811915, "grad_norm": 2.2580218729282437, "learning_rate": 2.0666452854393845e-06, "loss": 0.7596, "step": 49835 }, { "epoch": 0.6074122823053393, "grad_norm": 2.096915844382775, "learning_rate": 2.0663245670301475e-06, "loss": 0.7339, "step": 49840 }, { "epoch": 0.607473218529487, "grad_norm": 2.6067977893779477, "learning_rate": 2.066003848620911e-06, "loss": 0.7749, "step": 49845 }, { "epoch": 0.6075341547536348, "grad_norm": 2.5344937820849784, "learning_rate": 2.0656831302116744e-06, "loss": 0.7582, "step": 49850 }, { "epoch": 0.6075950909777826, "grad_norm": 2.28148708536988, "learning_rate": 2.065362411802438e-06, "loss": 0.7343, "step": 49855 }, { "epoch": 0.6076560272019305, "grad_norm": 4.34232335739896, "learning_rate": 2.0650416933932012e-06, "loss": 0.6799, "step": 49860 }, { "epoch": 0.6077169634260783, "grad_norm": 3.471309122238936, "learning_rate": 2.0647209749839643e-06, "loss": 0.8259, "step": 49865 }, { "epoch": 0.6077778996502261, "grad_norm": 3.167805316750741, "learning_rate": 2.0644002565747277e-06, "loss": 0.7256, "step": 49870 }, { "epoch": 0.6078388358743739, "grad_norm": 2.5832960761853583, "learning_rate": 2.064079538165491e-06, "loss": 0.7716, "step": 49875 }, { "epoch": 0.6078997720985216, "grad_norm": 2.5072377559166457, "learning_rate": 2.063758819756254e-06, "loss": 0.6769, "step": 49880 }, { "epoch": 0.6079607083226695, "grad_norm": 2.799596129733554, "learning_rate": 2.0634381013470176e-06, "loss": 0.7134, "step": 49885 }, { "epoch": 0.6080216445468173, "grad_norm": 2.6312990657823994, "learning_rate": 2.0631173829377806e-06, "loss": 0.7002, "step": 49890 }, { "epoch": 0.6080825807709651, "grad_norm": 3.216522901138698, "learning_rate": 2.062796664528544e-06, "loss": 0.706, "step": 49895 }, { "epoch": 0.6081435169951129, "grad_norm": 2.419081857513499, "learning_rate": 2.0624759461193075e-06, "loss": 0.7477, "step": 49900 }, { "epoch": 0.6082044532192608, "grad_norm": 2.187594983669469, "learning_rate": 2.0621552277100705e-06, "loss": 0.6604, "step": 49905 }, { "epoch": 0.6082653894434086, "grad_norm": 2.8342803424443934, "learning_rate": 2.061834509300834e-06, "loss": 0.7424, "step": 49910 }, { "epoch": 0.6083263256675563, "grad_norm": 2.7172699962901854, "learning_rate": 2.0615137908915974e-06, "loss": 0.7966, "step": 49915 }, { "epoch": 0.6083872618917041, "grad_norm": 2.332709960423074, "learning_rate": 2.0611930724823604e-06, "loss": 0.7477, "step": 49920 }, { "epoch": 0.6084481981158519, "grad_norm": 2.275078249015461, "learning_rate": 2.060872354073124e-06, "loss": 0.7823, "step": 49925 }, { "epoch": 0.6085091343399998, "grad_norm": 2.242347918581224, "learning_rate": 2.0605516356638873e-06, "loss": 0.7565, "step": 49930 }, { "epoch": 0.6085700705641476, "grad_norm": 2.7179528889384716, "learning_rate": 2.0602309172546507e-06, "loss": 0.734, "step": 49935 }, { "epoch": 0.6086310067882954, "grad_norm": 2.0951666436397183, "learning_rate": 2.059910198845414e-06, "loss": 0.7136, "step": 49940 }, { "epoch": 0.6086919430124432, "grad_norm": 2.5910159081180564, "learning_rate": 2.059589480436177e-06, "loss": 0.7062, "step": 49945 }, { "epoch": 0.6087528792365909, "grad_norm": 2.7700869844858347, "learning_rate": 2.0592687620269406e-06, "loss": 0.7491, "step": 49950 }, { "epoch": 0.6088138154607388, "grad_norm": 2.8390316922463765, "learning_rate": 2.058948043617704e-06, "loss": 0.7218, "step": 49955 }, { "epoch": 0.6088747516848866, "grad_norm": 2.350662373787425, "learning_rate": 2.058627325208467e-06, "loss": 0.7178, "step": 49960 }, { "epoch": 0.6089356879090344, "grad_norm": 2.070820086964777, "learning_rate": 2.0583066067992305e-06, "loss": 0.7723, "step": 49965 }, { "epoch": 0.6089966241331822, "grad_norm": 3.664888980085654, "learning_rate": 2.0579858883899935e-06, "loss": 0.694, "step": 49970 }, { "epoch": 0.60905756035733, "grad_norm": 2.134086747858264, "learning_rate": 2.057665169980757e-06, "loss": 0.7994, "step": 49975 }, { "epoch": 0.6091184965814779, "grad_norm": 2.247860321380871, "learning_rate": 2.0573444515715204e-06, "loss": 0.774, "step": 49980 }, { "epoch": 0.6091794328056256, "grad_norm": 1.8556914772590598, "learning_rate": 2.0570237331622834e-06, "loss": 0.6867, "step": 49985 }, { "epoch": 0.6092403690297734, "grad_norm": 2.244638464859031, "learning_rate": 2.056703014753047e-06, "loss": 0.7406, "step": 49990 }, { "epoch": 0.6093013052539212, "grad_norm": 2.7404946644754444, "learning_rate": 2.0563822963438103e-06, "loss": 0.728, "step": 49995 }, { "epoch": 0.609362241478069, "grad_norm": 2.5589016587124807, "learning_rate": 2.0560615779345738e-06, "loss": 0.7737, "step": 50000 }, { "epoch": 0.6094231777022169, "grad_norm": 2.2878182605735837, "learning_rate": 2.0557408595253368e-06, "loss": 0.7546, "step": 50005 }, { "epoch": 0.6094841139263647, "grad_norm": 2.2554524314580466, "learning_rate": 2.0554201411161002e-06, "loss": 0.7313, "step": 50010 }, { "epoch": 0.6095450501505125, "grad_norm": 4.964524557902871, "learning_rate": 2.0550994227068637e-06, "loss": 0.67, "step": 50015 }, { "epoch": 0.6096059863746602, "grad_norm": 3.1189200259479293, "learning_rate": 2.054778704297627e-06, "loss": 0.7137, "step": 50020 }, { "epoch": 0.6096669225988081, "grad_norm": 2.2393312784096535, "learning_rate": 2.05445798588839e-06, "loss": 0.6953, "step": 50025 }, { "epoch": 0.6097278588229559, "grad_norm": 2.6995128137850566, "learning_rate": 2.0541372674791535e-06, "loss": 0.7497, "step": 50030 }, { "epoch": 0.6097887950471037, "grad_norm": 2.5382000131985105, "learning_rate": 2.053816549069917e-06, "loss": 0.752, "step": 50035 }, { "epoch": 0.6098497312712515, "grad_norm": 2.6233956831078964, "learning_rate": 2.05349583066068e-06, "loss": 0.7246, "step": 50040 }, { "epoch": 0.6099106674953994, "grad_norm": 3.4968176061596195, "learning_rate": 2.0531751122514434e-06, "loss": 0.7288, "step": 50045 }, { "epoch": 0.6099716037195472, "grad_norm": 2.5990866737994196, "learning_rate": 2.0528543938422065e-06, "loss": 0.7221, "step": 50050 }, { "epoch": 0.6100325399436949, "grad_norm": 2.3758740981264825, "learning_rate": 2.05253367543297e-06, "loss": 0.7215, "step": 50055 }, { "epoch": 0.6100934761678427, "grad_norm": 2.198725543944272, "learning_rate": 2.0522129570237333e-06, "loss": 0.8023, "step": 50060 }, { "epoch": 0.6101544123919905, "grad_norm": 2.8420891912372, "learning_rate": 2.0518922386144964e-06, "loss": 0.737, "step": 50065 }, { "epoch": 0.6102153486161384, "grad_norm": 3.434328667918654, "learning_rate": 2.05157152020526e-06, "loss": 0.768, "step": 50070 }, { "epoch": 0.6102762848402862, "grad_norm": 2.661013352764322, "learning_rate": 2.0512508017960232e-06, "loss": 0.7242, "step": 50075 }, { "epoch": 0.610337221064434, "grad_norm": 2.4452984784493084, "learning_rate": 2.0509300833867867e-06, "loss": 0.7297, "step": 50080 }, { "epoch": 0.6103981572885818, "grad_norm": 2.4008321446463414, "learning_rate": 2.05060936497755e-06, "loss": 0.7787, "step": 50085 }, { "epoch": 0.6104590935127295, "grad_norm": 2.671263454987558, "learning_rate": 2.050288646568313e-06, "loss": 0.7602, "step": 50090 }, { "epoch": 0.6105200297368774, "grad_norm": 2.919748588315875, "learning_rate": 2.0499679281590766e-06, "loss": 0.7206, "step": 50095 }, { "epoch": 0.6105809659610252, "grad_norm": 2.365643424656572, "learning_rate": 2.04964720974984e-06, "loss": 0.6974, "step": 50100 }, { "epoch": 0.610641902185173, "grad_norm": 2.539082458989671, "learning_rate": 2.049326491340603e-06, "loss": 0.6864, "step": 50105 }, { "epoch": 0.6107028384093208, "grad_norm": 2.3572468980271863, "learning_rate": 2.0490057729313665e-06, "loss": 0.739, "step": 50110 }, { "epoch": 0.6107637746334686, "grad_norm": 2.1263522226203184, "learning_rate": 2.04868505452213e-06, "loss": 0.7285, "step": 50115 }, { "epoch": 0.6108247108576165, "grad_norm": 2.4814882168597046, "learning_rate": 2.048364336112893e-06, "loss": 0.8038, "step": 50120 }, { "epoch": 0.6108856470817642, "grad_norm": 2.1894738266634612, "learning_rate": 2.0480436177036564e-06, "loss": 0.7728, "step": 50125 }, { "epoch": 0.610946583305912, "grad_norm": 2.4697865247108703, "learning_rate": 2.0477228992944194e-06, "loss": 0.7386, "step": 50130 }, { "epoch": 0.6110075195300598, "grad_norm": 2.4399069174600045, "learning_rate": 2.047402180885183e-06, "loss": 0.678, "step": 50135 }, { "epoch": 0.6110684557542077, "grad_norm": 2.695844017022027, "learning_rate": 2.0470814624759463e-06, "loss": 0.7333, "step": 50140 }, { "epoch": 0.6111293919783555, "grad_norm": 2.421496696872583, "learning_rate": 2.0467607440667093e-06, "loss": 0.7053, "step": 50145 }, { "epoch": 0.6111903282025033, "grad_norm": 3.0135483724430006, "learning_rate": 2.0464400256574727e-06, "loss": 0.7482, "step": 50150 }, { "epoch": 0.6112512644266511, "grad_norm": 2.3345433826229285, "learning_rate": 2.046119307248236e-06, "loss": 0.6534, "step": 50155 }, { "epoch": 0.6113122006507988, "grad_norm": 2.8016644958770276, "learning_rate": 2.0457985888389996e-06, "loss": 0.7793, "step": 50160 }, { "epoch": 0.6113731368749467, "grad_norm": 2.303586096552082, "learning_rate": 2.045477870429763e-06, "loss": 0.8314, "step": 50165 }, { "epoch": 0.6114340730990945, "grad_norm": 2.420522587050813, "learning_rate": 2.0451571520205265e-06, "loss": 0.7092, "step": 50170 }, { "epoch": 0.6114950093232423, "grad_norm": 2.7688065459416866, "learning_rate": 2.0448364336112895e-06, "loss": 0.7192, "step": 50175 }, { "epoch": 0.6115559455473901, "grad_norm": 2.339061851788835, "learning_rate": 2.044515715202053e-06, "loss": 0.7581, "step": 50180 }, { "epoch": 0.611616881771538, "grad_norm": 2.2825728089974167, "learning_rate": 2.044194996792816e-06, "loss": 0.7182, "step": 50185 }, { "epoch": 0.6116778179956858, "grad_norm": 2.3510661726602895, "learning_rate": 2.0438742783835794e-06, "loss": 0.7168, "step": 50190 }, { "epoch": 0.6117387542198335, "grad_norm": 2.502647687156039, "learning_rate": 2.043553559974343e-06, "loss": 0.7382, "step": 50195 }, { "epoch": 0.6117996904439813, "grad_norm": 2.72411904116259, "learning_rate": 2.043232841565106e-06, "loss": 0.7497, "step": 50200 }, { "epoch": 0.6118606266681291, "grad_norm": 2.1275771068994187, "learning_rate": 2.0429121231558693e-06, "loss": 0.753, "step": 50205 }, { "epoch": 0.611921562892277, "grad_norm": 2.3171886012435183, "learning_rate": 2.0425914047466327e-06, "loss": 0.6499, "step": 50210 }, { "epoch": 0.6119824991164248, "grad_norm": 2.468418869491957, "learning_rate": 2.0422706863373958e-06, "loss": 0.6995, "step": 50215 }, { "epoch": 0.6120434353405726, "grad_norm": 2.0798017976770486, "learning_rate": 2.041949967928159e-06, "loss": 0.605, "step": 50220 }, { "epoch": 0.6121043715647204, "grad_norm": 2.1708283277972407, "learning_rate": 2.0416292495189226e-06, "loss": 0.7537, "step": 50225 }, { "epoch": 0.6121653077888681, "grad_norm": 2.5965943926364714, "learning_rate": 2.0413085311096857e-06, "loss": 0.7542, "step": 50230 }, { "epoch": 0.612226244013016, "grad_norm": 2.094685279568677, "learning_rate": 2.040987812700449e-06, "loss": 0.6811, "step": 50235 }, { "epoch": 0.6122871802371638, "grad_norm": 2.455059530395005, "learning_rate": 2.0406670942912125e-06, "loss": 0.7496, "step": 50240 }, { "epoch": 0.6123481164613116, "grad_norm": 1.7541381613109068, "learning_rate": 2.040346375881976e-06, "loss": 0.7211, "step": 50245 }, { "epoch": 0.6124090526854594, "grad_norm": 4.234724552450694, "learning_rate": 2.0400256574727394e-06, "loss": 0.8182, "step": 50250 }, { "epoch": 0.6124699889096072, "grad_norm": 2.5931182094084555, "learning_rate": 2.0397049390635024e-06, "loss": 0.6435, "step": 50255 }, { "epoch": 0.6125309251337551, "grad_norm": 3.0440890081662264, "learning_rate": 2.039384220654266e-06, "loss": 0.7223, "step": 50260 }, { "epoch": 0.6125918613579028, "grad_norm": 2.038011154140013, "learning_rate": 2.039063502245029e-06, "loss": 0.7119, "step": 50265 }, { "epoch": 0.6126527975820506, "grad_norm": 2.2918685161675447, "learning_rate": 2.0387427838357923e-06, "loss": 0.7384, "step": 50270 }, { "epoch": 0.6127137338061984, "grad_norm": 2.8823241328981353, "learning_rate": 2.0384220654265558e-06, "loss": 0.8294, "step": 50275 }, { "epoch": 0.6127746700303462, "grad_norm": 4.276198348618961, "learning_rate": 2.0381013470173188e-06, "loss": 0.7423, "step": 50280 }, { "epoch": 0.6128356062544941, "grad_norm": 2.5614531333217294, "learning_rate": 2.0377806286080822e-06, "loss": 0.7736, "step": 50285 }, { "epoch": 0.6128965424786419, "grad_norm": 3.1465727499953284, "learning_rate": 2.0374599101988457e-06, "loss": 0.7441, "step": 50290 }, { "epoch": 0.6129574787027897, "grad_norm": 3.4659245484917154, "learning_rate": 2.0371391917896087e-06, "loss": 0.7542, "step": 50295 }, { "epoch": 0.6130184149269374, "grad_norm": 2.5392970014114677, "learning_rate": 2.036818473380372e-06, "loss": 0.7445, "step": 50300 }, { "epoch": 0.6130793511510853, "grad_norm": 2.446854889999353, "learning_rate": 2.0364977549711356e-06, "loss": 0.7612, "step": 50305 }, { "epoch": 0.6131402873752331, "grad_norm": 2.3690146355401622, "learning_rate": 2.036177036561899e-06, "loss": 0.7741, "step": 50310 }, { "epoch": 0.6132012235993809, "grad_norm": 2.210672750634271, "learning_rate": 2.035856318152662e-06, "loss": 0.7514, "step": 50315 }, { "epoch": 0.6132621598235287, "grad_norm": 2.436022336587472, "learning_rate": 2.0355355997434255e-06, "loss": 0.7665, "step": 50320 }, { "epoch": 0.6133230960476765, "grad_norm": 2.412183587949675, "learning_rate": 2.035214881334189e-06, "loss": 0.7971, "step": 50325 }, { "epoch": 0.6133840322718243, "grad_norm": 2.2423356783390505, "learning_rate": 2.0348941629249523e-06, "loss": 0.6859, "step": 50330 }, { "epoch": 0.6134449684959721, "grad_norm": 2.937369993496899, "learning_rate": 2.0345734445157154e-06, "loss": 0.7492, "step": 50335 }, { "epoch": 0.6135059047201199, "grad_norm": 2.202745727067104, "learning_rate": 2.034252726106479e-06, "loss": 0.7921, "step": 50340 }, { "epoch": 0.6135668409442677, "grad_norm": 2.9661274646682263, "learning_rate": 2.033932007697242e-06, "loss": 0.7182, "step": 50345 }, { "epoch": 0.6136277771684155, "grad_norm": 2.8026766651676644, "learning_rate": 2.0336112892880053e-06, "loss": 0.762, "step": 50350 }, { "epoch": 0.6136887133925634, "grad_norm": 2.410386105902949, "learning_rate": 2.0332905708787687e-06, "loss": 0.706, "step": 50355 }, { "epoch": 0.6137496496167112, "grad_norm": 2.317022890706792, "learning_rate": 2.0329698524695317e-06, "loss": 0.684, "step": 50360 }, { "epoch": 0.6138105858408589, "grad_norm": 2.5441221369349107, "learning_rate": 2.032649134060295e-06, "loss": 0.7149, "step": 50365 }, { "epoch": 0.6138715220650067, "grad_norm": 2.2733091471614713, "learning_rate": 2.0323284156510586e-06, "loss": 0.7297, "step": 50370 }, { "epoch": 0.6139324582891545, "grad_norm": 2.2055889037438905, "learning_rate": 2.0320076972418216e-06, "loss": 0.7648, "step": 50375 }, { "epoch": 0.6139933945133024, "grad_norm": 2.5471210509273465, "learning_rate": 2.031686978832585e-06, "loss": 0.7199, "step": 50380 }, { "epoch": 0.6140543307374502, "grad_norm": 2.4174152724705307, "learning_rate": 2.0313662604233485e-06, "loss": 0.6231, "step": 50385 }, { "epoch": 0.614115266961598, "grad_norm": 2.5005239747401395, "learning_rate": 2.031045542014112e-06, "loss": 0.7638, "step": 50390 }, { "epoch": 0.6141762031857458, "grad_norm": 2.6483552965386687, "learning_rate": 2.030724823604875e-06, "loss": 0.6982, "step": 50395 }, { "epoch": 0.6142371394098936, "grad_norm": 2.3622095558179272, "learning_rate": 2.0304041051956384e-06, "loss": 0.7413, "step": 50400 }, { "epoch": 0.6142980756340414, "grad_norm": 2.287836513076411, "learning_rate": 2.030083386786402e-06, "loss": 0.7403, "step": 50405 }, { "epoch": 0.6143590118581892, "grad_norm": 2.5148758310487622, "learning_rate": 2.0297626683771653e-06, "loss": 0.6595, "step": 50410 }, { "epoch": 0.614419948082337, "grad_norm": 2.220196759041906, "learning_rate": 2.0294419499679283e-06, "loss": 0.6817, "step": 50415 }, { "epoch": 0.6144808843064848, "grad_norm": 3.4356221013057806, "learning_rate": 2.0291212315586917e-06, "loss": 0.7326, "step": 50420 }, { "epoch": 0.6145418205306327, "grad_norm": 3.076254661703977, "learning_rate": 2.0288005131494547e-06, "loss": 0.8003, "step": 50425 }, { "epoch": 0.6146027567547805, "grad_norm": 2.552106164483224, "learning_rate": 2.028479794740218e-06, "loss": 0.7593, "step": 50430 }, { "epoch": 0.6146636929789282, "grad_norm": 2.8854357390519487, "learning_rate": 2.0281590763309816e-06, "loss": 0.809, "step": 50435 }, { "epoch": 0.614724629203076, "grad_norm": 2.5296367786083716, "learning_rate": 2.0278383579217446e-06, "loss": 0.7324, "step": 50440 }, { "epoch": 0.6147855654272238, "grad_norm": 2.51349228574213, "learning_rate": 2.027517639512508e-06, "loss": 0.831, "step": 50445 }, { "epoch": 0.6148465016513717, "grad_norm": 2.6510669896023855, "learning_rate": 2.0271969211032715e-06, "loss": 0.7529, "step": 50450 }, { "epoch": 0.6149074378755195, "grad_norm": 2.8089475381971254, "learning_rate": 2.0268762026940345e-06, "loss": 0.7715, "step": 50455 }, { "epoch": 0.6149683740996673, "grad_norm": 2.4801425766299037, "learning_rate": 2.026555484284798e-06, "loss": 0.7498, "step": 50460 }, { "epoch": 0.6150293103238151, "grad_norm": 2.447377817325182, "learning_rate": 2.0262347658755614e-06, "loss": 0.7722, "step": 50465 }, { "epoch": 0.6150902465479628, "grad_norm": 2.9346065812778597, "learning_rate": 2.025914047466325e-06, "loss": 0.7643, "step": 50470 }, { "epoch": 0.6151511827721107, "grad_norm": 2.097980105117366, "learning_rate": 2.0255933290570883e-06, "loss": 0.7789, "step": 50475 }, { "epoch": 0.6152121189962585, "grad_norm": 2.6539058723273343, "learning_rate": 2.0252726106478513e-06, "loss": 0.7598, "step": 50480 }, { "epoch": 0.6152730552204063, "grad_norm": 2.139005202339001, "learning_rate": 2.0249518922386148e-06, "loss": 0.7395, "step": 50485 }, { "epoch": 0.6153339914445541, "grad_norm": 2.418364401742697, "learning_rate": 2.024631173829378e-06, "loss": 0.7469, "step": 50490 }, { "epoch": 0.615394927668702, "grad_norm": 2.5277592275125067, "learning_rate": 2.0243104554201412e-06, "loss": 0.7544, "step": 50495 }, { "epoch": 0.6154558638928498, "grad_norm": 2.474208681491355, "learning_rate": 2.0239897370109047e-06, "loss": 0.6974, "step": 50500 }, { "epoch": 0.6155168001169975, "grad_norm": 2.551203668861319, "learning_rate": 2.023669018601668e-06, "loss": 0.7159, "step": 50505 }, { "epoch": 0.6155777363411453, "grad_norm": 2.3063011956729214, "learning_rate": 2.023348300192431e-06, "loss": 0.6998, "step": 50510 }, { "epoch": 0.6156386725652931, "grad_norm": 2.2381525251220826, "learning_rate": 2.0230275817831946e-06, "loss": 0.8094, "step": 50515 }, { "epoch": 0.615699608789441, "grad_norm": 2.2303786704511808, "learning_rate": 2.0227068633739576e-06, "loss": 0.6773, "step": 50520 }, { "epoch": 0.6157605450135888, "grad_norm": 2.417175759051689, "learning_rate": 2.022386144964721e-06, "loss": 0.668, "step": 50525 }, { "epoch": 0.6158214812377366, "grad_norm": 1.9175940895944912, "learning_rate": 2.0220654265554845e-06, "loss": 0.7402, "step": 50530 }, { "epoch": 0.6158824174618844, "grad_norm": 2.202772849069645, "learning_rate": 2.021744708146248e-06, "loss": 0.6761, "step": 50535 }, { "epoch": 0.6159433536860321, "grad_norm": 2.642923302016286, "learning_rate": 2.021423989737011e-06, "loss": 0.7414, "step": 50540 }, { "epoch": 0.61600428991018, "grad_norm": 2.8116612976098816, "learning_rate": 2.0211032713277743e-06, "loss": 0.6772, "step": 50545 }, { "epoch": 0.6160652261343278, "grad_norm": 2.16486260152708, "learning_rate": 2.0207825529185378e-06, "loss": 0.7299, "step": 50550 }, { "epoch": 0.6161261623584756, "grad_norm": 2.532758126532956, "learning_rate": 2.0204618345093012e-06, "loss": 0.7, "step": 50555 }, { "epoch": 0.6161870985826234, "grad_norm": 3.2700062603458995, "learning_rate": 2.0201411161000642e-06, "loss": 0.6855, "step": 50560 }, { "epoch": 0.6162480348067713, "grad_norm": 2.8778540128185752, "learning_rate": 2.0198203976908277e-06, "loss": 0.7978, "step": 50565 }, { "epoch": 0.6163089710309191, "grad_norm": 4.471735208869696, "learning_rate": 2.019499679281591e-06, "loss": 0.7356, "step": 50570 }, { "epoch": 0.6163699072550668, "grad_norm": 2.555445701396123, "learning_rate": 2.019178960872354e-06, "loss": 0.7414, "step": 50575 }, { "epoch": 0.6164308434792146, "grad_norm": 2.2888183749664583, "learning_rate": 2.0188582424631176e-06, "loss": 0.7119, "step": 50580 }, { "epoch": 0.6164917797033624, "grad_norm": 3.0452998429992353, "learning_rate": 2.018537524053881e-06, "loss": 0.6948, "step": 50585 }, { "epoch": 0.6165527159275103, "grad_norm": 2.3404933189375714, "learning_rate": 2.018216805644644e-06, "loss": 0.6986, "step": 50590 }, { "epoch": 0.6166136521516581, "grad_norm": 2.994892356303448, "learning_rate": 2.0178960872354075e-06, "loss": 0.7513, "step": 50595 }, { "epoch": 0.6166745883758059, "grad_norm": 3.082445915087809, "learning_rate": 2.0175753688261705e-06, "loss": 0.73, "step": 50600 }, { "epoch": 0.6167355245999537, "grad_norm": 2.9020558066401305, "learning_rate": 2.017254650416934e-06, "loss": 0.7252, "step": 50605 }, { "epoch": 0.6167964608241014, "grad_norm": 2.3962840683967475, "learning_rate": 2.0169339320076974e-06, "loss": 0.7993, "step": 50610 }, { "epoch": 0.6168573970482493, "grad_norm": 3.5369799537868416, "learning_rate": 2.016613213598461e-06, "loss": 0.7605, "step": 50615 }, { "epoch": 0.6169183332723971, "grad_norm": 2.2779874161002485, "learning_rate": 2.016292495189224e-06, "loss": 0.7688, "step": 50620 }, { "epoch": 0.6169792694965449, "grad_norm": 2.6013568740400776, "learning_rate": 2.0159717767799873e-06, "loss": 0.8079, "step": 50625 }, { "epoch": 0.6170402057206927, "grad_norm": 2.439843903447774, "learning_rate": 2.0156510583707507e-06, "loss": 0.7635, "step": 50630 }, { "epoch": 0.6171011419448406, "grad_norm": 2.5891542277235, "learning_rate": 2.015330339961514e-06, "loss": 0.7138, "step": 50635 }, { "epoch": 0.6171620781689884, "grad_norm": 2.1913110921349364, "learning_rate": 2.015009621552277e-06, "loss": 0.7689, "step": 50640 }, { "epoch": 0.6172230143931361, "grad_norm": 2.238457901561975, "learning_rate": 2.0146889031430406e-06, "loss": 0.731, "step": 50645 }, { "epoch": 0.6172839506172839, "grad_norm": 2.275812031317816, "learning_rate": 2.014368184733804e-06, "loss": 0.7245, "step": 50650 }, { "epoch": 0.6173448868414317, "grad_norm": 2.379063284668365, "learning_rate": 2.014047466324567e-06, "loss": 0.7169, "step": 50655 }, { "epoch": 0.6174058230655796, "grad_norm": 2.6375008247538863, "learning_rate": 2.0137267479153305e-06, "loss": 0.7218, "step": 50660 }, { "epoch": 0.6174667592897274, "grad_norm": 2.6968882431376278, "learning_rate": 2.013406029506094e-06, "loss": 0.7083, "step": 50665 }, { "epoch": 0.6175276955138752, "grad_norm": 2.2246185364097895, "learning_rate": 2.013085311096857e-06, "loss": 0.7256, "step": 50670 }, { "epoch": 0.617588631738023, "grad_norm": 2.3671242830396277, "learning_rate": 2.0127645926876204e-06, "loss": 0.7249, "step": 50675 }, { "epoch": 0.6176495679621707, "grad_norm": 2.808254438329224, "learning_rate": 2.0124438742783834e-06, "loss": 0.7572, "step": 50680 }, { "epoch": 0.6177105041863186, "grad_norm": 2.148232253954666, "learning_rate": 2.012123155869147e-06, "loss": 0.7466, "step": 50685 }, { "epoch": 0.6177714404104664, "grad_norm": 2.224852060407683, "learning_rate": 2.0118024374599103e-06, "loss": 0.706, "step": 50690 }, { "epoch": 0.6178323766346142, "grad_norm": 2.8477032625168985, "learning_rate": 2.0114817190506737e-06, "loss": 0.7547, "step": 50695 }, { "epoch": 0.617893312858762, "grad_norm": 2.2859715218667147, "learning_rate": 2.011161000641437e-06, "loss": 0.7975, "step": 50700 }, { "epoch": 0.6179542490829099, "grad_norm": 2.8245045499441304, "learning_rate": 2.0108402822322e-06, "loss": 0.7094, "step": 50705 }, { "epoch": 0.6180151853070577, "grad_norm": 3.6087400665585125, "learning_rate": 2.0105195638229636e-06, "loss": 0.7769, "step": 50710 }, { "epoch": 0.6180761215312054, "grad_norm": 3.474231641311976, "learning_rate": 2.010198845413727e-06, "loss": 0.737, "step": 50715 }, { "epoch": 0.6181370577553532, "grad_norm": 2.2094843518887544, "learning_rate": 2.00987812700449e-06, "loss": 0.7278, "step": 50720 }, { "epoch": 0.618197993979501, "grad_norm": 3.1214108074004776, "learning_rate": 2.0095574085952535e-06, "loss": 0.7499, "step": 50725 }, { "epoch": 0.6182589302036489, "grad_norm": 2.315434375558118, "learning_rate": 2.009236690186017e-06, "loss": 0.7131, "step": 50730 }, { "epoch": 0.6183198664277967, "grad_norm": 2.245253072156822, "learning_rate": 2.00891597177678e-06, "loss": 0.8028, "step": 50735 }, { "epoch": 0.6183808026519445, "grad_norm": 2.477032592842193, "learning_rate": 2.0085952533675434e-06, "loss": 0.7687, "step": 50740 }, { "epoch": 0.6184417388760923, "grad_norm": 2.5496721919327583, "learning_rate": 2.008274534958307e-06, "loss": 0.7123, "step": 50745 }, { "epoch": 0.61850267510024, "grad_norm": 2.953337530185752, "learning_rate": 2.00795381654907e-06, "loss": 0.756, "step": 50750 }, { "epoch": 0.6185636113243879, "grad_norm": 2.1609344203291916, "learning_rate": 2.0076330981398333e-06, "loss": 0.7697, "step": 50755 }, { "epoch": 0.6186245475485357, "grad_norm": 2.38524920679461, "learning_rate": 2.0073123797305968e-06, "loss": 0.6598, "step": 50760 }, { "epoch": 0.6186854837726835, "grad_norm": 2.4788770408365752, "learning_rate": 2.00699166132136e-06, "loss": 0.7112, "step": 50765 }, { "epoch": 0.6187464199968313, "grad_norm": 2.0931179086148237, "learning_rate": 2.0066709429121232e-06, "loss": 0.6818, "step": 50770 }, { "epoch": 0.6188073562209792, "grad_norm": 2.333718371768747, "learning_rate": 2.0063502245028867e-06, "loss": 0.8106, "step": 50775 }, { "epoch": 0.618868292445127, "grad_norm": 2.2274310439459346, "learning_rate": 2.00602950609365e-06, "loss": 0.7333, "step": 50780 }, { "epoch": 0.6189292286692747, "grad_norm": 3.631612856009295, "learning_rate": 2.0057087876844136e-06, "loss": 0.8216, "step": 50785 }, { "epoch": 0.6189901648934225, "grad_norm": 2.539749205238199, "learning_rate": 2.0053880692751766e-06, "loss": 0.7223, "step": 50790 }, { "epoch": 0.6190511011175703, "grad_norm": 2.550693570214712, "learning_rate": 2.00506735086594e-06, "loss": 0.729, "step": 50795 }, { "epoch": 0.6191120373417182, "grad_norm": 2.2661729385891083, "learning_rate": 2.0047466324567034e-06, "loss": 0.8162, "step": 50800 }, { "epoch": 0.619172973565866, "grad_norm": 2.211916046501564, "learning_rate": 2.0044259140474665e-06, "loss": 0.7484, "step": 50805 }, { "epoch": 0.6192339097900138, "grad_norm": 2.610294706048528, "learning_rate": 2.00410519563823e-06, "loss": 0.7012, "step": 50810 }, { "epoch": 0.6192948460141616, "grad_norm": 2.6602930542527474, "learning_rate": 2.003784477228993e-06, "loss": 0.7851, "step": 50815 }, { "epoch": 0.6193557822383093, "grad_norm": 2.4577835226765146, "learning_rate": 2.0034637588197564e-06, "loss": 0.7609, "step": 50820 }, { "epoch": 0.6194167184624572, "grad_norm": 2.1697747929262814, "learning_rate": 2.00314304041052e-06, "loss": 0.763, "step": 50825 }, { "epoch": 0.619477654686605, "grad_norm": 2.228656491577752, "learning_rate": 2.002822322001283e-06, "loss": 0.7107, "step": 50830 }, { "epoch": 0.6195385909107528, "grad_norm": 2.5370597109668256, "learning_rate": 2.0025016035920463e-06, "loss": 0.7933, "step": 50835 }, { "epoch": 0.6195995271349006, "grad_norm": 2.4619451750322363, "learning_rate": 2.0021808851828097e-06, "loss": 0.7491, "step": 50840 }, { "epoch": 0.6196604633590485, "grad_norm": 2.595328413511456, "learning_rate": 2.0018601667735727e-06, "loss": 0.7023, "step": 50845 }, { "epoch": 0.6197213995831963, "grad_norm": 3.34765835337928, "learning_rate": 2.001539448364336e-06, "loss": 0.7641, "step": 50850 }, { "epoch": 0.619782335807344, "grad_norm": 2.8634263270597256, "learning_rate": 2.0012187299550996e-06, "loss": 0.7095, "step": 50855 }, { "epoch": 0.6198432720314918, "grad_norm": 2.3526060164247418, "learning_rate": 2.000898011545863e-06, "loss": 0.6466, "step": 50860 }, { "epoch": 0.6199042082556396, "grad_norm": 2.889931140101235, "learning_rate": 2.0005772931366265e-06, "loss": 0.7806, "step": 50865 }, { "epoch": 0.6199651444797875, "grad_norm": 2.493925136839297, "learning_rate": 2.0002565747273895e-06, "loss": 0.7852, "step": 50870 }, { "epoch": 0.6200260807039353, "grad_norm": 2.2400600676791376, "learning_rate": 1.999935856318153e-06, "loss": 0.699, "step": 50875 }, { "epoch": 0.6200870169280831, "grad_norm": 2.6361491582732133, "learning_rate": 1.9996151379089164e-06, "loss": 0.7461, "step": 50880 }, { "epoch": 0.6201479531522309, "grad_norm": 2.5963549621062767, "learning_rate": 1.9992944194996794e-06, "loss": 0.7173, "step": 50885 }, { "epoch": 0.6202088893763786, "grad_norm": 2.667949353918721, "learning_rate": 1.998973701090443e-06, "loss": 0.6966, "step": 50890 }, { "epoch": 0.6202698256005265, "grad_norm": 2.5822396274690322, "learning_rate": 1.998652982681206e-06, "loss": 0.7181, "step": 50895 }, { "epoch": 0.6203307618246743, "grad_norm": 3.0181270694216313, "learning_rate": 1.9983322642719693e-06, "loss": 0.7487, "step": 50900 }, { "epoch": 0.6203916980488221, "grad_norm": 2.418746193721999, "learning_rate": 1.9980115458627327e-06, "loss": 0.7064, "step": 50905 }, { "epoch": 0.6204526342729699, "grad_norm": 2.2037209800673954, "learning_rate": 1.9976908274534957e-06, "loss": 0.7159, "step": 50910 }, { "epoch": 0.6205135704971178, "grad_norm": 2.4599140022312755, "learning_rate": 1.997370109044259e-06, "loss": 0.713, "step": 50915 }, { "epoch": 0.6205745067212656, "grad_norm": 3.499637358442165, "learning_rate": 1.9970493906350226e-06, "loss": 0.7127, "step": 50920 }, { "epoch": 0.6206354429454133, "grad_norm": 2.19030141667498, "learning_rate": 1.996728672225786e-06, "loss": 0.7222, "step": 50925 }, { "epoch": 0.6206963791695611, "grad_norm": 3.4352084878906894, "learning_rate": 1.996407953816549e-06, "loss": 0.7489, "step": 50930 }, { "epoch": 0.6207573153937089, "grad_norm": 2.2227845869658567, "learning_rate": 1.9960872354073125e-06, "loss": 0.7586, "step": 50935 }, { "epoch": 0.6208182516178568, "grad_norm": 2.493046141825006, "learning_rate": 1.995766516998076e-06, "loss": 0.7835, "step": 50940 }, { "epoch": 0.6208791878420046, "grad_norm": 2.1904954169928397, "learning_rate": 1.9954457985888394e-06, "loss": 0.729, "step": 50945 }, { "epoch": 0.6209401240661524, "grad_norm": 2.3536258678687525, "learning_rate": 1.9951250801796024e-06, "loss": 0.6947, "step": 50950 }, { "epoch": 0.6210010602903002, "grad_norm": 2.636245156719164, "learning_rate": 1.994804361770366e-06, "loss": 0.69, "step": 50955 }, { "epoch": 0.6210619965144479, "grad_norm": 2.469935581060131, "learning_rate": 1.9944836433611293e-06, "loss": 0.6993, "step": 50960 }, { "epoch": 0.6211229327385958, "grad_norm": 2.3380624721828185, "learning_rate": 1.9941629249518923e-06, "loss": 0.7634, "step": 50965 }, { "epoch": 0.6211838689627436, "grad_norm": 2.2831040007314116, "learning_rate": 1.9938422065426558e-06, "loss": 0.7956, "step": 50970 }, { "epoch": 0.6212448051868914, "grad_norm": 2.5026508662971216, "learning_rate": 1.9935214881334188e-06, "loss": 0.7246, "step": 50975 }, { "epoch": 0.6213057414110392, "grad_norm": 2.0323825404986082, "learning_rate": 1.9932007697241822e-06, "loss": 0.7718, "step": 50980 }, { "epoch": 0.621366677635187, "grad_norm": 3.0116361491071837, "learning_rate": 1.9928800513149457e-06, "loss": 0.6757, "step": 50985 }, { "epoch": 0.6214276138593349, "grad_norm": 2.259675387874882, "learning_rate": 1.9925593329057087e-06, "loss": 0.7708, "step": 50990 }, { "epoch": 0.6214885500834826, "grad_norm": 1.8382939466641253, "learning_rate": 1.992238614496472e-06, "loss": 0.7851, "step": 50995 }, { "epoch": 0.6215494863076304, "grad_norm": 2.6505674037994065, "learning_rate": 1.9919178960872356e-06, "loss": 0.778, "step": 51000 }, { "epoch": 0.6216104225317782, "grad_norm": 2.989720334264075, "learning_rate": 1.991597177677999e-06, "loss": 0.713, "step": 51005 }, { "epoch": 0.621671358755926, "grad_norm": 2.686280023754108, "learning_rate": 1.9912764592687624e-06, "loss": 0.8028, "step": 51010 }, { "epoch": 0.6217322949800739, "grad_norm": 2.3772225444083768, "learning_rate": 1.9909557408595255e-06, "loss": 0.7613, "step": 51015 }, { "epoch": 0.6217932312042217, "grad_norm": 2.426649119682176, "learning_rate": 1.990635022450289e-06, "loss": 0.714, "step": 51020 }, { "epoch": 0.6218541674283695, "grad_norm": 2.319505542814154, "learning_rate": 1.9903143040410523e-06, "loss": 0.7061, "step": 51025 }, { "epoch": 0.6219151036525172, "grad_norm": 2.9088246948356575, "learning_rate": 1.9899935856318154e-06, "loss": 0.7422, "step": 51030 }, { "epoch": 0.621976039876665, "grad_norm": 3.8095658038461298, "learning_rate": 1.989672867222579e-06, "loss": 0.7416, "step": 51035 }, { "epoch": 0.6220369761008129, "grad_norm": 2.7264770378464367, "learning_rate": 1.9893521488133422e-06, "loss": 0.7289, "step": 51040 }, { "epoch": 0.6220979123249607, "grad_norm": 2.2144495740076406, "learning_rate": 1.9890314304041052e-06, "loss": 0.8147, "step": 51045 }, { "epoch": 0.6221588485491085, "grad_norm": 2.32370755257157, "learning_rate": 1.9887107119948687e-06, "loss": 0.7114, "step": 51050 }, { "epoch": 0.6222197847732563, "grad_norm": 2.3706904782939717, "learning_rate": 1.9883899935856317e-06, "loss": 0.7222, "step": 51055 }, { "epoch": 0.6222807209974042, "grad_norm": 2.6003421154204753, "learning_rate": 1.988069275176395e-06, "loss": 0.7518, "step": 51060 }, { "epoch": 0.6223416572215519, "grad_norm": 3.1277240619193756, "learning_rate": 1.9877485567671586e-06, "loss": 0.7674, "step": 51065 }, { "epoch": 0.6224025934456997, "grad_norm": 2.299677004481858, "learning_rate": 1.9874278383579216e-06, "loss": 0.7235, "step": 51070 }, { "epoch": 0.6224635296698475, "grad_norm": 2.3466207209271337, "learning_rate": 1.987107119948685e-06, "loss": 0.6352, "step": 51075 }, { "epoch": 0.6225244658939953, "grad_norm": 2.4786642789398745, "learning_rate": 1.9867864015394485e-06, "loss": 0.7652, "step": 51080 }, { "epoch": 0.6225854021181432, "grad_norm": 2.3592090988164025, "learning_rate": 1.986465683130212e-06, "loss": 0.7754, "step": 51085 }, { "epoch": 0.622646338342291, "grad_norm": 2.783015405212825, "learning_rate": 1.9861449647209754e-06, "loss": 0.7535, "step": 51090 }, { "epoch": 0.6227072745664388, "grad_norm": 4.28803455662464, "learning_rate": 1.9858242463117384e-06, "loss": 0.7839, "step": 51095 }, { "epoch": 0.6227682107905865, "grad_norm": 2.405858055725292, "learning_rate": 1.985503527902502e-06, "loss": 0.7208, "step": 51100 }, { "epoch": 0.6228291470147344, "grad_norm": 2.9240095745964, "learning_rate": 1.9851828094932653e-06, "loss": 0.7495, "step": 51105 }, { "epoch": 0.6228900832388822, "grad_norm": 2.819967330867636, "learning_rate": 1.9848620910840283e-06, "loss": 0.7935, "step": 51110 }, { "epoch": 0.62295101946303, "grad_norm": 4.127437757421206, "learning_rate": 1.9845413726747917e-06, "loss": 0.7639, "step": 51115 }, { "epoch": 0.6230119556871778, "grad_norm": 2.784027528096594, "learning_rate": 1.984220654265555e-06, "loss": 0.7614, "step": 51120 }, { "epoch": 0.6230728919113256, "grad_norm": 2.7171811902193643, "learning_rate": 1.983899935856318e-06, "loss": 0.7185, "step": 51125 }, { "epoch": 0.6231338281354735, "grad_norm": 2.467943542704786, "learning_rate": 1.9835792174470816e-06, "loss": 0.7139, "step": 51130 }, { "epoch": 0.6231947643596212, "grad_norm": 2.7240660233638434, "learning_rate": 1.983258499037845e-06, "loss": 0.6752, "step": 51135 }, { "epoch": 0.623255700583769, "grad_norm": 2.4767996632958122, "learning_rate": 1.982937780628608e-06, "loss": 0.7262, "step": 51140 }, { "epoch": 0.6233166368079168, "grad_norm": 2.4738299397522066, "learning_rate": 1.9826170622193715e-06, "loss": 0.7682, "step": 51145 }, { "epoch": 0.6233775730320646, "grad_norm": 3.001116699394872, "learning_rate": 1.982296343810135e-06, "loss": 0.6662, "step": 51150 }, { "epoch": 0.6234385092562125, "grad_norm": 2.6738432290127148, "learning_rate": 1.981975625400898e-06, "loss": 0.7266, "step": 51155 }, { "epoch": 0.6234994454803603, "grad_norm": 2.445325991319232, "learning_rate": 1.9816549069916614e-06, "loss": 0.7262, "step": 51160 }, { "epoch": 0.6235603817045081, "grad_norm": 2.4836492695381116, "learning_rate": 1.981334188582425e-06, "loss": 0.6964, "step": 51165 }, { "epoch": 0.6236213179286558, "grad_norm": 2.4651666440981006, "learning_rate": 1.9810134701731883e-06, "loss": 0.7584, "step": 51170 }, { "epoch": 0.6236822541528037, "grad_norm": 3.154914101305567, "learning_rate": 1.9806927517639517e-06, "loss": 0.7944, "step": 51175 }, { "epoch": 0.6237431903769515, "grad_norm": 2.486272102093279, "learning_rate": 1.9803720333547147e-06, "loss": 0.7713, "step": 51180 }, { "epoch": 0.6238041266010993, "grad_norm": 2.4354453574336543, "learning_rate": 1.980051314945478e-06, "loss": 0.7085, "step": 51185 }, { "epoch": 0.6238650628252471, "grad_norm": 2.3060066146241573, "learning_rate": 1.979730596536241e-06, "loss": 0.7764, "step": 51190 }, { "epoch": 0.6239259990493949, "grad_norm": 2.9186440869298913, "learning_rate": 1.9794098781270046e-06, "loss": 0.7916, "step": 51195 }, { "epoch": 0.6239869352735428, "grad_norm": 2.321253836158075, "learning_rate": 1.979089159717768e-06, "loss": 0.7717, "step": 51200 }, { "epoch": 0.6240478714976905, "grad_norm": 2.2525492266861753, "learning_rate": 1.978768441308531e-06, "loss": 0.7574, "step": 51205 }, { "epoch": 0.6241088077218383, "grad_norm": 2.8212716810335903, "learning_rate": 1.9784477228992945e-06, "loss": 0.6416, "step": 51210 }, { "epoch": 0.6241697439459861, "grad_norm": 2.269934905354955, "learning_rate": 1.978127004490058e-06, "loss": 0.7415, "step": 51215 }, { "epoch": 0.624230680170134, "grad_norm": 2.9102848236598025, "learning_rate": 1.977806286080821e-06, "loss": 0.6978, "step": 51220 }, { "epoch": 0.6242916163942818, "grad_norm": 2.4819040010257614, "learning_rate": 1.9774855676715844e-06, "loss": 0.765, "step": 51225 }, { "epoch": 0.6243525526184296, "grad_norm": 2.6731445973247525, "learning_rate": 1.977164849262348e-06, "loss": 0.7803, "step": 51230 }, { "epoch": 0.6244134888425774, "grad_norm": 3.4679796848663376, "learning_rate": 1.9768441308531113e-06, "loss": 0.7907, "step": 51235 }, { "epoch": 0.6244744250667251, "grad_norm": 2.6470449504499487, "learning_rate": 1.9765234124438743e-06, "loss": 0.7702, "step": 51240 }, { "epoch": 0.624535361290873, "grad_norm": 1.9841325941179104, "learning_rate": 1.9762026940346378e-06, "loss": 0.6549, "step": 51245 }, { "epoch": 0.6245962975150208, "grad_norm": 2.426309283179819, "learning_rate": 1.9758819756254012e-06, "loss": 0.8062, "step": 51250 }, { "epoch": 0.6246572337391686, "grad_norm": 2.215680256419692, "learning_rate": 1.9755612572161647e-06, "loss": 0.7175, "step": 51255 }, { "epoch": 0.6247181699633164, "grad_norm": 2.409529331036633, "learning_rate": 1.9752405388069277e-06, "loss": 0.7948, "step": 51260 }, { "epoch": 0.6247791061874642, "grad_norm": 2.5892441020096797, "learning_rate": 1.974919820397691e-06, "loss": 0.7203, "step": 51265 }, { "epoch": 0.6248400424116121, "grad_norm": 2.7963579370988043, "learning_rate": 1.974599101988454e-06, "loss": 0.753, "step": 51270 }, { "epoch": 0.6249009786357598, "grad_norm": 2.72443123365497, "learning_rate": 1.9742783835792176e-06, "loss": 0.7014, "step": 51275 }, { "epoch": 0.6249619148599076, "grad_norm": 2.1969109187696487, "learning_rate": 1.973957665169981e-06, "loss": 0.7427, "step": 51280 }, { "epoch": 0.6250228510840554, "grad_norm": 2.0390114784456785, "learning_rate": 1.973636946760744e-06, "loss": 0.7333, "step": 51285 }, { "epoch": 0.6250837873082032, "grad_norm": 1.9846037778352437, "learning_rate": 1.9733162283515075e-06, "loss": 0.7519, "step": 51290 }, { "epoch": 0.6251447235323511, "grad_norm": 2.445309172016679, "learning_rate": 1.972995509942271e-06, "loss": 0.7609, "step": 51295 }, { "epoch": 0.6252056597564989, "grad_norm": 2.5011485174303365, "learning_rate": 1.972674791533034e-06, "loss": 0.7836, "step": 51300 }, { "epoch": 0.6252665959806466, "grad_norm": 2.9763366507623585, "learning_rate": 1.9723540731237974e-06, "loss": 0.7632, "step": 51305 }, { "epoch": 0.6253275322047944, "grad_norm": 2.5782757471161757, "learning_rate": 1.972033354714561e-06, "loss": 0.7579, "step": 51310 }, { "epoch": 0.6253884684289422, "grad_norm": 2.1134611437516053, "learning_rate": 1.9717126363053242e-06, "loss": 0.8593, "step": 51315 }, { "epoch": 0.6254494046530901, "grad_norm": 2.236633354392513, "learning_rate": 1.9713919178960873e-06, "loss": 0.752, "step": 51320 }, { "epoch": 0.6255103408772379, "grad_norm": 2.640984689536537, "learning_rate": 1.9710711994868507e-06, "loss": 0.7491, "step": 51325 }, { "epoch": 0.6255712771013857, "grad_norm": 2.5574526302858738, "learning_rate": 1.970750481077614e-06, "loss": 0.7412, "step": 51330 }, { "epoch": 0.6256322133255335, "grad_norm": 2.909274253324254, "learning_rate": 1.9704297626683776e-06, "loss": 0.7286, "step": 51335 }, { "epoch": 0.6256931495496812, "grad_norm": 2.6823691907996623, "learning_rate": 1.9701090442591406e-06, "loss": 0.7255, "step": 51340 }, { "epoch": 0.6257540857738291, "grad_norm": 2.2216943663768354, "learning_rate": 1.969788325849904e-06, "loss": 0.7756, "step": 51345 }, { "epoch": 0.6258150219979769, "grad_norm": 2.1016580388965456, "learning_rate": 1.969467607440667e-06, "loss": 0.7334, "step": 51350 }, { "epoch": 0.6258759582221247, "grad_norm": 2.5953583859327383, "learning_rate": 1.9691468890314305e-06, "loss": 0.6571, "step": 51355 }, { "epoch": 0.6259368944462725, "grad_norm": 2.2952849312822856, "learning_rate": 1.968826170622194e-06, "loss": 0.7143, "step": 51360 }, { "epoch": 0.6259978306704204, "grad_norm": 3.2871631268923074, "learning_rate": 1.968505452212957e-06, "loss": 0.7032, "step": 51365 }, { "epoch": 0.6260587668945682, "grad_norm": 3.255442164063983, "learning_rate": 1.9681847338037204e-06, "loss": 0.7269, "step": 51370 }, { "epoch": 0.6261197031187159, "grad_norm": 2.2977112709292253, "learning_rate": 1.967864015394484e-06, "loss": 0.6689, "step": 51375 }, { "epoch": 0.6261806393428637, "grad_norm": 2.808017505180434, "learning_rate": 1.967543296985247e-06, "loss": 0.739, "step": 51380 }, { "epoch": 0.6262415755670115, "grad_norm": 2.335271135560819, "learning_rate": 1.9672225785760103e-06, "loss": 0.7285, "step": 51385 }, { "epoch": 0.6263025117911594, "grad_norm": 2.4445790127332794, "learning_rate": 1.9669018601667737e-06, "loss": 0.7141, "step": 51390 }, { "epoch": 0.6263634480153072, "grad_norm": 2.9806824234842804, "learning_rate": 1.966581141757537e-06, "loss": 0.6592, "step": 51395 }, { "epoch": 0.626424384239455, "grad_norm": 2.4046150459082054, "learning_rate": 1.9662604233483006e-06, "loss": 0.8029, "step": 51400 }, { "epoch": 0.6264853204636028, "grad_norm": 3.2448936937272292, "learning_rate": 1.9659397049390636e-06, "loss": 0.7327, "step": 51405 }, { "epoch": 0.6265462566877505, "grad_norm": 2.5941416374607007, "learning_rate": 1.965618986529827e-06, "loss": 0.7694, "step": 51410 }, { "epoch": 0.6266071929118984, "grad_norm": 3.0891094259543976, "learning_rate": 1.9652982681205905e-06, "loss": 0.7686, "step": 51415 }, { "epoch": 0.6266681291360462, "grad_norm": 2.4624500630228123, "learning_rate": 1.9649775497113535e-06, "loss": 0.6966, "step": 51420 }, { "epoch": 0.626729065360194, "grad_norm": 3.312639282512659, "learning_rate": 1.964656831302117e-06, "loss": 0.7024, "step": 51425 }, { "epoch": 0.6267900015843418, "grad_norm": 2.5707915629251645, "learning_rate": 1.9643361128928804e-06, "loss": 0.8087, "step": 51430 }, { "epoch": 0.6268509378084897, "grad_norm": 2.531064706362264, "learning_rate": 1.9640153944836434e-06, "loss": 0.7465, "step": 51435 }, { "epoch": 0.6269118740326375, "grad_norm": 2.618613967930948, "learning_rate": 1.963694676074407e-06, "loss": 0.7534, "step": 51440 }, { "epoch": 0.6269728102567852, "grad_norm": 2.5843429842330017, "learning_rate": 1.96337395766517e-06, "loss": 0.799, "step": 51445 }, { "epoch": 0.627033746480933, "grad_norm": 2.5265879283222, "learning_rate": 1.9630532392559333e-06, "loss": 0.7898, "step": 51450 }, { "epoch": 0.6270946827050808, "grad_norm": 2.6407620776601033, "learning_rate": 1.9627325208466968e-06, "loss": 0.7156, "step": 51455 }, { "epoch": 0.6271556189292287, "grad_norm": 2.3715353773029046, "learning_rate": 1.9624118024374598e-06, "loss": 0.7914, "step": 51460 }, { "epoch": 0.6272165551533765, "grad_norm": 2.2175844174382995, "learning_rate": 1.9620910840282232e-06, "loss": 0.744, "step": 51465 }, { "epoch": 0.6272774913775243, "grad_norm": 2.513262487114392, "learning_rate": 1.9617703656189867e-06, "loss": 0.7382, "step": 51470 }, { "epoch": 0.6273384276016721, "grad_norm": 2.4302316734253195, "learning_rate": 1.96144964720975e-06, "loss": 0.7414, "step": 51475 }, { "epoch": 0.6273993638258198, "grad_norm": 2.6172461782857455, "learning_rate": 1.9611289288005135e-06, "loss": 0.7082, "step": 51480 }, { "epoch": 0.6274603000499677, "grad_norm": 2.066936913019616, "learning_rate": 1.9608082103912766e-06, "loss": 0.7264, "step": 51485 }, { "epoch": 0.6275212362741155, "grad_norm": 2.4902069153427355, "learning_rate": 1.96048749198204e-06, "loss": 0.7487, "step": 51490 }, { "epoch": 0.6275821724982633, "grad_norm": 2.38154182504038, "learning_rate": 1.9601667735728034e-06, "loss": 0.7229, "step": 51495 }, { "epoch": 0.6276431087224111, "grad_norm": 2.2252003240167695, "learning_rate": 1.9598460551635665e-06, "loss": 0.708, "step": 51500 }, { "epoch": 0.627704044946559, "grad_norm": 3.167604522307887, "learning_rate": 1.95952533675433e-06, "loss": 0.7443, "step": 51505 }, { "epoch": 0.6277649811707068, "grad_norm": 2.9879727501764073, "learning_rate": 1.9592046183450933e-06, "loss": 0.7754, "step": 51510 }, { "epoch": 0.6278259173948545, "grad_norm": 2.549206442529593, "learning_rate": 1.9588838999358564e-06, "loss": 0.7844, "step": 51515 }, { "epoch": 0.6278868536190023, "grad_norm": 2.424443176761617, "learning_rate": 1.95856318152662e-06, "loss": 0.8655, "step": 51520 }, { "epoch": 0.6279477898431501, "grad_norm": 2.6807740734725733, "learning_rate": 1.958242463117383e-06, "loss": 0.7505, "step": 51525 }, { "epoch": 0.628008726067298, "grad_norm": 2.2415363621592235, "learning_rate": 1.9579217447081463e-06, "loss": 0.794, "step": 51530 }, { "epoch": 0.6280696622914458, "grad_norm": 2.599892346196569, "learning_rate": 1.9576010262989097e-06, "loss": 0.7283, "step": 51535 }, { "epoch": 0.6281305985155936, "grad_norm": 2.3400181681514347, "learning_rate": 1.957280307889673e-06, "loss": 0.7997, "step": 51540 }, { "epoch": 0.6281915347397414, "grad_norm": 2.7621376243310958, "learning_rate": 1.956959589480436e-06, "loss": 0.6928, "step": 51545 }, { "epoch": 0.6282524709638891, "grad_norm": 2.4133904508338113, "learning_rate": 1.9566388710711996e-06, "loss": 0.8045, "step": 51550 }, { "epoch": 0.628313407188037, "grad_norm": 2.466825328975615, "learning_rate": 1.956318152661963e-06, "loss": 0.7619, "step": 51555 }, { "epoch": 0.6283743434121848, "grad_norm": 2.765709596436441, "learning_rate": 1.9559974342527265e-06, "loss": 0.7373, "step": 51560 }, { "epoch": 0.6284352796363326, "grad_norm": 2.3709394545122895, "learning_rate": 1.9556767158434895e-06, "loss": 0.7401, "step": 51565 }, { "epoch": 0.6284962158604804, "grad_norm": 3.040654830638728, "learning_rate": 1.955355997434253e-06, "loss": 0.7525, "step": 51570 }, { "epoch": 0.6285571520846283, "grad_norm": 2.5533839728556544, "learning_rate": 1.9550352790250164e-06, "loss": 0.731, "step": 51575 }, { "epoch": 0.6286180883087761, "grad_norm": 3.029453155066492, "learning_rate": 1.9547145606157794e-06, "loss": 0.8141, "step": 51580 }, { "epoch": 0.6286790245329238, "grad_norm": 3.028562297834859, "learning_rate": 1.954393842206543e-06, "loss": 0.7985, "step": 51585 }, { "epoch": 0.6287399607570716, "grad_norm": 2.0462822310368614, "learning_rate": 1.9540731237973063e-06, "loss": 0.7463, "step": 51590 }, { "epoch": 0.6288008969812194, "grad_norm": 2.812093092452605, "learning_rate": 1.9537524053880693e-06, "loss": 0.8168, "step": 51595 }, { "epoch": 0.6288618332053673, "grad_norm": 3.2599419907354723, "learning_rate": 1.9534316869788327e-06, "loss": 0.7701, "step": 51600 }, { "epoch": 0.6289227694295151, "grad_norm": 2.746734226076847, "learning_rate": 1.9531109685695957e-06, "loss": 0.7839, "step": 51605 }, { "epoch": 0.6289837056536629, "grad_norm": 3.9351896944622333, "learning_rate": 1.952790250160359e-06, "loss": 0.6709, "step": 51610 }, { "epoch": 0.6290446418778107, "grad_norm": 2.4198621012281314, "learning_rate": 1.9524695317511226e-06, "loss": 0.7335, "step": 51615 }, { "epoch": 0.6291055781019584, "grad_norm": 2.134585837807005, "learning_rate": 1.952148813341886e-06, "loss": 0.7598, "step": 51620 }, { "epoch": 0.6291665143261063, "grad_norm": 1.7898936986187626, "learning_rate": 1.9518280949326495e-06, "loss": 0.7112, "step": 51625 }, { "epoch": 0.6292274505502541, "grad_norm": 2.4149285748884544, "learning_rate": 1.9515073765234125e-06, "loss": 0.6812, "step": 51630 }, { "epoch": 0.6292883867744019, "grad_norm": 2.0731573887013948, "learning_rate": 1.951186658114176e-06, "loss": 0.7597, "step": 51635 }, { "epoch": 0.6293493229985497, "grad_norm": 2.8056630823569297, "learning_rate": 1.9508659397049394e-06, "loss": 0.6841, "step": 51640 }, { "epoch": 0.6294102592226976, "grad_norm": 2.138529816170861, "learning_rate": 1.9505452212957024e-06, "loss": 0.6997, "step": 51645 }, { "epoch": 0.6294711954468454, "grad_norm": 2.169785109493405, "learning_rate": 1.950224502886466e-06, "loss": 0.7838, "step": 51650 }, { "epoch": 0.6295321316709931, "grad_norm": 1.8781893630701736, "learning_rate": 1.9499037844772293e-06, "loss": 0.7407, "step": 51655 }, { "epoch": 0.6295930678951409, "grad_norm": 2.393412630991097, "learning_rate": 1.9495830660679923e-06, "loss": 0.7364, "step": 51660 }, { "epoch": 0.6296540041192887, "grad_norm": 2.2849314089275974, "learning_rate": 1.9492623476587558e-06, "loss": 0.6636, "step": 51665 }, { "epoch": 0.6297149403434366, "grad_norm": 2.8128970511207, "learning_rate": 1.948941629249519e-06, "loss": 0.7537, "step": 51670 }, { "epoch": 0.6297758765675844, "grad_norm": 2.489944599317183, "learning_rate": 1.948620910840282e-06, "loss": 0.7406, "step": 51675 }, { "epoch": 0.6298368127917322, "grad_norm": 2.335214772474089, "learning_rate": 1.9483001924310456e-06, "loss": 0.7438, "step": 51680 }, { "epoch": 0.62989774901588, "grad_norm": 2.5455399658660998, "learning_rate": 1.947979474021809e-06, "loss": 0.7435, "step": 51685 }, { "epoch": 0.6299586852400277, "grad_norm": 2.082053489375185, "learning_rate": 1.947658755612572e-06, "loss": 0.6884, "step": 51690 }, { "epoch": 0.6300196214641756, "grad_norm": 2.4398466078996366, "learning_rate": 1.9473380372033355e-06, "loss": 0.7922, "step": 51695 }, { "epoch": 0.6300805576883234, "grad_norm": 2.759457637415242, "learning_rate": 1.947017318794099e-06, "loss": 0.7495, "step": 51700 }, { "epoch": 0.6301414939124712, "grad_norm": 2.944469334498365, "learning_rate": 1.9466966003848624e-06, "loss": 0.8044, "step": 51705 }, { "epoch": 0.630202430136619, "grad_norm": 2.6683606378885276, "learning_rate": 1.946375881975626e-06, "loss": 0.7261, "step": 51710 }, { "epoch": 0.6302633663607669, "grad_norm": 2.403593287473971, "learning_rate": 1.946055163566389e-06, "loss": 0.7211, "step": 51715 }, { "epoch": 0.6303243025849147, "grad_norm": 2.247027029037873, "learning_rate": 1.9457344451571523e-06, "loss": 0.752, "step": 51720 }, { "epoch": 0.6303852388090624, "grad_norm": 2.5957339331587623, "learning_rate": 1.9454137267479158e-06, "loss": 0.6957, "step": 51725 }, { "epoch": 0.6304461750332102, "grad_norm": 2.294110189379883, "learning_rate": 1.9450930083386788e-06, "loss": 0.7458, "step": 51730 }, { "epoch": 0.630507111257358, "grad_norm": 2.4861136209280965, "learning_rate": 1.9447722899294422e-06, "loss": 0.7603, "step": 51735 }, { "epoch": 0.6305680474815059, "grad_norm": 2.2641843256899827, "learning_rate": 1.9444515715202052e-06, "loss": 0.7542, "step": 51740 }, { "epoch": 0.6306289837056537, "grad_norm": 3.1943391349216284, "learning_rate": 1.9441308531109687e-06, "loss": 0.776, "step": 51745 }, { "epoch": 0.6306899199298015, "grad_norm": 2.14754597232484, "learning_rate": 1.943810134701732e-06, "loss": 0.7142, "step": 51750 }, { "epoch": 0.6307508561539493, "grad_norm": 2.3014358810627322, "learning_rate": 1.943489416292495e-06, "loss": 0.7099, "step": 51755 }, { "epoch": 0.630811792378097, "grad_norm": 2.7863857220236428, "learning_rate": 1.9431686978832586e-06, "loss": 0.7282, "step": 51760 }, { "epoch": 0.6308727286022449, "grad_norm": 2.5025823625488623, "learning_rate": 1.942847979474022e-06, "loss": 0.747, "step": 51765 }, { "epoch": 0.6309336648263927, "grad_norm": 2.5666045950543777, "learning_rate": 1.942527261064785e-06, "loss": 0.7086, "step": 51770 }, { "epoch": 0.6309946010505405, "grad_norm": 2.7905303846938074, "learning_rate": 1.9422065426555485e-06, "loss": 0.7047, "step": 51775 }, { "epoch": 0.6310555372746883, "grad_norm": 2.1950645698560702, "learning_rate": 1.941885824246312e-06, "loss": 0.7672, "step": 51780 }, { "epoch": 0.6311164734988362, "grad_norm": 2.2903870970868834, "learning_rate": 1.9415651058370754e-06, "loss": 0.7136, "step": 51785 }, { "epoch": 0.631177409722984, "grad_norm": 2.1486726730427312, "learning_rate": 1.941244387427839e-06, "loss": 0.7352, "step": 51790 }, { "epoch": 0.6312383459471317, "grad_norm": 2.99094762378093, "learning_rate": 1.940923669018602e-06, "loss": 0.7627, "step": 51795 }, { "epoch": 0.6312992821712795, "grad_norm": 2.2386329082837655, "learning_rate": 1.9406029506093653e-06, "loss": 0.789, "step": 51800 }, { "epoch": 0.6313602183954273, "grad_norm": 2.4981427292478657, "learning_rate": 1.9402822322001287e-06, "loss": 0.753, "step": 51805 }, { "epoch": 0.6314211546195752, "grad_norm": 2.656635893725874, "learning_rate": 1.9399615137908917e-06, "loss": 0.8591, "step": 51810 }, { "epoch": 0.631482090843723, "grad_norm": 2.392812128095503, "learning_rate": 1.939640795381655e-06, "loss": 0.6889, "step": 51815 }, { "epoch": 0.6315430270678708, "grad_norm": 2.475334351321842, "learning_rate": 1.939320076972418e-06, "loss": 0.7501, "step": 51820 }, { "epoch": 0.6316039632920186, "grad_norm": 2.4008408865554935, "learning_rate": 1.9389993585631816e-06, "loss": 0.7509, "step": 51825 }, { "epoch": 0.6316648995161663, "grad_norm": 1.9780854319097014, "learning_rate": 1.938678640153945e-06, "loss": 0.6919, "step": 51830 }, { "epoch": 0.6317258357403142, "grad_norm": 2.3181818461890678, "learning_rate": 1.938357921744708e-06, "loss": 0.7698, "step": 51835 }, { "epoch": 0.631786771964462, "grad_norm": 5.41891763689812, "learning_rate": 1.9380372033354715e-06, "loss": 0.7791, "step": 51840 }, { "epoch": 0.6318477081886098, "grad_norm": 3.143524291531921, "learning_rate": 1.937716484926235e-06, "loss": 0.7506, "step": 51845 }, { "epoch": 0.6319086444127576, "grad_norm": 1.990903345442803, "learning_rate": 1.9373957665169984e-06, "loss": 0.7114, "step": 51850 }, { "epoch": 0.6319695806369054, "grad_norm": 2.36061083926426, "learning_rate": 1.9370750481077614e-06, "loss": 0.7459, "step": 51855 }, { "epoch": 0.6320305168610533, "grad_norm": 2.939537690369429, "learning_rate": 1.936754329698525e-06, "loss": 0.7357, "step": 51860 }, { "epoch": 0.632091453085201, "grad_norm": 2.4266462195611886, "learning_rate": 1.9364336112892883e-06, "loss": 0.7022, "step": 51865 }, { "epoch": 0.6321523893093488, "grad_norm": 2.494255553500615, "learning_rate": 1.9361128928800517e-06, "loss": 0.7175, "step": 51870 }, { "epoch": 0.6322133255334966, "grad_norm": 2.696508261481401, "learning_rate": 1.9357921744708147e-06, "loss": 0.6967, "step": 51875 }, { "epoch": 0.6322742617576445, "grad_norm": 2.7638969335905994, "learning_rate": 1.935471456061578e-06, "loss": 0.71, "step": 51880 }, { "epoch": 0.6323351979817923, "grad_norm": 2.496557198087902, "learning_rate": 1.9351507376523416e-06, "loss": 0.6988, "step": 51885 }, { "epoch": 0.6323961342059401, "grad_norm": 2.821567337840972, "learning_rate": 1.9348300192431046e-06, "loss": 0.7323, "step": 51890 }, { "epoch": 0.6324570704300879, "grad_norm": 2.8711033967045094, "learning_rate": 1.934509300833868e-06, "loss": 0.7021, "step": 51895 }, { "epoch": 0.6325180066542356, "grad_norm": 2.0429842675884196, "learning_rate": 1.934188582424631e-06, "loss": 0.7578, "step": 51900 }, { "epoch": 0.6325789428783835, "grad_norm": 2.869871898727873, "learning_rate": 1.9338678640153945e-06, "loss": 0.7208, "step": 51905 }, { "epoch": 0.6326398791025313, "grad_norm": 2.2931945196753345, "learning_rate": 1.933547145606158e-06, "loss": 0.7113, "step": 51910 }, { "epoch": 0.6327008153266791, "grad_norm": 2.90351023343433, "learning_rate": 1.933226427196921e-06, "loss": 0.7743, "step": 51915 }, { "epoch": 0.6327617515508269, "grad_norm": 2.8127060863452877, "learning_rate": 1.9329057087876844e-06, "loss": 0.7166, "step": 51920 }, { "epoch": 0.6328226877749747, "grad_norm": 2.3282472027237553, "learning_rate": 1.932584990378448e-06, "loss": 0.6578, "step": 51925 }, { "epoch": 0.6328836239991226, "grad_norm": 2.4336717782875708, "learning_rate": 1.9322642719692113e-06, "loss": 0.7566, "step": 51930 }, { "epoch": 0.6329445602232703, "grad_norm": 2.463233680446383, "learning_rate": 1.9319435535599747e-06, "loss": 0.7356, "step": 51935 }, { "epoch": 0.6330054964474181, "grad_norm": 2.5994641317951643, "learning_rate": 1.9316228351507378e-06, "loss": 0.7663, "step": 51940 }, { "epoch": 0.6330664326715659, "grad_norm": 2.1407312472625675, "learning_rate": 1.931302116741501e-06, "loss": 0.7707, "step": 51945 }, { "epoch": 0.6331273688957137, "grad_norm": 2.036099006737142, "learning_rate": 1.9309813983322646e-06, "loss": 0.7554, "step": 51950 }, { "epoch": 0.6331883051198616, "grad_norm": 2.142854515334074, "learning_rate": 1.9306606799230277e-06, "loss": 0.7936, "step": 51955 }, { "epoch": 0.6332492413440094, "grad_norm": 2.612431278280294, "learning_rate": 1.930339961513791e-06, "loss": 0.6965, "step": 51960 }, { "epoch": 0.6333101775681572, "grad_norm": 3.7925339342733873, "learning_rate": 1.9300192431045545e-06, "loss": 0.7769, "step": 51965 }, { "epoch": 0.6333711137923049, "grad_norm": 2.4044715427261893, "learning_rate": 1.9296985246953176e-06, "loss": 0.7176, "step": 51970 }, { "epoch": 0.6334320500164528, "grad_norm": 4.093655218774384, "learning_rate": 1.929377806286081e-06, "loss": 0.7055, "step": 51975 }, { "epoch": 0.6334929862406006, "grad_norm": 3.4091525066162203, "learning_rate": 1.9290570878768444e-06, "loss": 0.769, "step": 51980 }, { "epoch": 0.6335539224647484, "grad_norm": 2.442901222946972, "learning_rate": 1.9287363694676075e-06, "loss": 0.7977, "step": 51985 }, { "epoch": 0.6336148586888962, "grad_norm": 2.2451341033691365, "learning_rate": 1.928415651058371e-06, "loss": 0.7571, "step": 51990 }, { "epoch": 0.633675794913044, "grad_norm": 2.3251159756913595, "learning_rate": 1.928094932649134e-06, "loss": 0.7132, "step": 51995 }, { "epoch": 0.6337367311371919, "grad_norm": 2.09649886584101, "learning_rate": 1.9277742142398974e-06, "loss": 0.7746, "step": 52000 }, { "epoch": 0.6337976673613396, "grad_norm": 2.876293712131875, "learning_rate": 1.927453495830661e-06, "loss": 0.6325, "step": 52005 }, { "epoch": 0.6338586035854874, "grad_norm": 2.7858897787898083, "learning_rate": 1.9271327774214242e-06, "loss": 0.7168, "step": 52010 }, { "epoch": 0.6339195398096352, "grad_norm": 2.6487345088780287, "learning_rate": 1.9268120590121877e-06, "loss": 0.7425, "step": 52015 }, { "epoch": 0.633980476033783, "grad_norm": 2.404737326994166, "learning_rate": 1.9264913406029507e-06, "loss": 0.7314, "step": 52020 }, { "epoch": 0.6340414122579309, "grad_norm": 2.3322191406157464, "learning_rate": 1.926170622193714e-06, "loss": 0.6593, "step": 52025 }, { "epoch": 0.6341023484820787, "grad_norm": 2.9908980205749622, "learning_rate": 1.9258499037844776e-06, "loss": 0.7309, "step": 52030 }, { "epoch": 0.6341632847062265, "grad_norm": 2.90344474239682, "learning_rate": 1.9255291853752406e-06, "loss": 0.7645, "step": 52035 }, { "epoch": 0.6342242209303742, "grad_norm": 2.636843230062408, "learning_rate": 1.925208466966004e-06, "loss": 0.7245, "step": 52040 }, { "epoch": 0.634285157154522, "grad_norm": 2.137937763284672, "learning_rate": 1.9248877485567675e-06, "loss": 0.7625, "step": 52045 }, { "epoch": 0.6343460933786699, "grad_norm": 2.386876593771554, "learning_rate": 1.9245670301475305e-06, "loss": 0.7474, "step": 52050 }, { "epoch": 0.6344070296028177, "grad_norm": 2.11525455131165, "learning_rate": 1.924246311738294e-06, "loss": 0.7776, "step": 52055 }, { "epoch": 0.6344679658269655, "grad_norm": 2.2285704043518746, "learning_rate": 1.9239255933290574e-06, "loss": 0.6923, "step": 52060 }, { "epoch": 0.6345289020511133, "grad_norm": 2.0063398182953662, "learning_rate": 1.9236048749198204e-06, "loss": 0.627, "step": 52065 }, { "epoch": 0.6345898382752612, "grad_norm": 2.4820004074626008, "learning_rate": 1.923284156510584e-06, "loss": 0.751, "step": 52070 }, { "epoch": 0.6346507744994089, "grad_norm": 3.1779468944629317, "learning_rate": 1.9229634381013473e-06, "loss": 0.6936, "step": 52075 }, { "epoch": 0.6347117107235567, "grad_norm": 3.3397011495136457, "learning_rate": 1.9226427196921103e-06, "loss": 0.793, "step": 52080 }, { "epoch": 0.6347726469477045, "grad_norm": 2.4573882915503753, "learning_rate": 1.9223220012828737e-06, "loss": 0.7896, "step": 52085 }, { "epoch": 0.6348335831718523, "grad_norm": 2.810296655262972, "learning_rate": 1.922001282873637e-06, "loss": 0.7512, "step": 52090 }, { "epoch": 0.6348945193960002, "grad_norm": 2.217771897402566, "learning_rate": 1.9216805644644006e-06, "loss": 0.758, "step": 52095 }, { "epoch": 0.634955455620148, "grad_norm": 2.77050291768422, "learning_rate": 1.921359846055164e-06, "loss": 0.7419, "step": 52100 }, { "epoch": 0.6350163918442958, "grad_norm": 2.746705752478818, "learning_rate": 1.921039127645927e-06, "loss": 0.7617, "step": 52105 }, { "epoch": 0.6350773280684435, "grad_norm": 2.9179084742875614, "learning_rate": 1.9207184092366905e-06, "loss": 0.7387, "step": 52110 }, { "epoch": 0.6351382642925913, "grad_norm": 2.2008618264665185, "learning_rate": 1.9203976908274535e-06, "loss": 0.7425, "step": 52115 }, { "epoch": 0.6351992005167392, "grad_norm": 2.2838316733722177, "learning_rate": 1.920076972418217e-06, "loss": 0.6615, "step": 52120 }, { "epoch": 0.635260136740887, "grad_norm": 2.6691036785582636, "learning_rate": 1.9197562540089804e-06, "loss": 0.7824, "step": 52125 }, { "epoch": 0.6353210729650348, "grad_norm": 2.3238796839010045, "learning_rate": 1.9194355355997434e-06, "loss": 0.701, "step": 52130 }, { "epoch": 0.6353820091891826, "grad_norm": 3.1107375390874314, "learning_rate": 1.919114817190507e-06, "loss": 0.8266, "step": 52135 }, { "epoch": 0.6354429454133305, "grad_norm": 2.562929579122561, "learning_rate": 1.9187940987812703e-06, "loss": 0.7231, "step": 52140 }, { "epoch": 0.6355038816374782, "grad_norm": 3.8135287743653876, "learning_rate": 1.9184733803720333e-06, "loss": 0.7947, "step": 52145 }, { "epoch": 0.635564817861626, "grad_norm": 2.3163468054740117, "learning_rate": 1.9181526619627968e-06, "loss": 0.7628, "step": 52150 }, { "epoch": 0.6356257540857738, "grad_norm": 1.9331269623468754, "learning_rate": 1.91783194355356e-06, "loss": 0.7294, "step": 52155 }, { "epoch": 0.6356866903099216, "grad_norm": 2.3645303422831163, "learning_rate": 1.917511225144323e-06, "loss": 0.7438, "step": 52160 }, { "epoch": 0.6357476265340695, "grad_norm": 3.153889877915463, "learning_rate": 1.9171905067350867e-06, "loss": 0.7044, "step": 52165 }, { "epoch": 0.6358085627582173, "grad_norm": 2.4535970250713746, "learning_rate": 1.91686978832585e-06, "loss": 0.7371, "step": 52170 }, { "epoch": 0.6358694989823651, "grad_norm": 2.3701612718228926, "learning_rate": 1.9165490699166135e-06, "loss": 0.7537, "step": 52175 }, { "epoch": 0.6359304352065128, "grad_norm": 2.568855194859494, "learning_rate": 1.916228351507377e-06, "loss": 0.6923, "step": 52180 }, { "epoch": 0.6359913714306606, "grad_norm": 2.2289641464874603, "learning_rate": 1.91590763309814e-06, "loss": 0.6744, "step": 52185 }, { "epoch": 0.6360523076548085, "grad_norm": 3.4470373823530602, "learning_rate": 1.9155869146889034e-06, "loss": 0.7611, "step": 52190 }, { "epoch": 0.6361132438789563, "grad_norm": 2.383586747708997, "learning_rate": 1.9152661962796664e-06, "loss": 0.7214, "step": 52195 }, { "epoch": 0.6361741801031041, "grad_norm": 3.201804546377761, "learning_rate": 1.91494547787043e-06, "loss": 0.7279, "step": 52200 }, { "epoch": 0.6362351163272519, "grad_norm": 2.831896973023909, "learning_rate": 1.9146247594611933e-06, "loss": 0.7625, "step": 52205 }, { "epoch": 0.6362960525513998, "grad_norm": 2.5993469226274497, "learning_rate": 1.9143040410519563e-06, "loss": 0.7666, "step": 52210 }, { "epoch": 0.6363569887755475, "grad_norm": 2.274357724485528, "learning_rate": 1.9139833226427198e-06, "loss": 0.7129, "step": 52215 }, { "epoch": 0.6364179249996953, "grad_norm": 2.827150710981661, "learning_rate": 1.9136626042334832e-06, "loss": 0.7965, "step": 52220 }, { "epoch": 0.6364788612238431, "grad_norm": 2.758969277563341, "learning_rate": 1.9133418858242462e-06, "loss": 0.7324, "step": 52225 }, { "epoch": 0.6365397974479909, "grad_norm": 3.4102077986288113, "learning_rate": 1.9130211674150097e-06, "loss": 0.7917, "step": 52230 }, { "epoch": 0.6366007336721388, "grad_norm": 2.6259507479206827, "learning_rate": 1.912700449005773e-06, "loss": 0.6905, "step": 52235 }, { "epoch": 0.6366616698962866, "grad_norm": 2.5783438097330924, "learning_rate": 1.9123797305965366e-06, "loss": 0.8174, "step": 52240 }, { "epoch": 0.6367226061204343, "grad_norm": 2.900546578768331, "learning_rate": 1.9120590121872996e-06, "loss": 0.7848, "step": 52245 }, { "epoch": 0.6367835423445821, "grad_norm": 2.043741877052432, "learning_rate": 1.911738293778063e-06, "loss": 0.7312, "step": 52250 }, { "epoch": 0.6368444785687299, "grad_norm": 2.1352692401871813, "learning_rate": 1.9114175753688265e-06, "loss": 0.7007, "step": 52255 }, { "epoch": 0.6369054147928778, "grad_norm": 2.722126401450801, "learning_rate": 1.91109685695959e-06, "loss": 0.7428, "step": 52260 }, { "epoch": 0.6369663510170256, "grad_norm": 4.708394794372568, "learning_rate": 1.910776138550353e-06, "loss": 0.7631, "step": 52265 }, { "epoch": 0.6370272872411734, "grad_norm": 3.130538937311421, "learning_rate": 1.9104554201411164e-06, "loss": 0.7222, "step": 52270 }, { "epoch": 0.6370882234653212, "grad_norm": 2.9015746599097967, "learning_rate": 1.91013470173188e-06, "loss": 0.7481, "step": 52275 }, { "epoch": 0.637149159689469, "grad_norm": 2.111633987323803, "learning_rate": 1.909813983322643e-06, "loss": 0.7619, "step": 52280 }, { "epoch": 0.6372100959136168, "grad_norm": 2.7109993733168367, "learning_rate": 1.9094932649134063e-06, "loss": 0.7379, "step": 52285 }, { "epoch": 0.6372710321377646, "grad_norm": 2.524425432411069, "learning_rate": 1.9091725465041693e-06, "loss": 0.8099, "step": 52290 }, { "epoch": 0.6373319683619124, "grad_norm": 2.1408762846712417, "learning_rate": 1.9088518280949327e-06, "loss": 0.7744, "step": 52295 }, { "epoch": 0.6373929045860602, "grad_norm": 2.345534438459655, "learning_rate": 1.908531109685696e-06, "loss": 0.7477, "step": 52300 }, { "epoch": 0.6374538408102081, "grad_norm": 2.2662820011983698, "learning_rate": 1.908210391276459e-06, "loss": 0.7243, "step": 52305 }, { "epoch": 0.6375147770343559, "grad_norm": 2.1937048399040227, "learning_rate": 1.9078896728672226e-06, "loss": 0.6734, "step": 52310 }, { "epoch": 0.6375757132585036, "grad_norm": 2.6131434793642825, "learning_rate": 1.907568954457986e-06, "loss": 0.6887, "step": 52315 }, { "epoch": 0.6376366494826514, "grad_norm": 2.984324882691545, "learning_rate": 1.9072482360487493e-06, "loss": 0.7562, "step": 52320 }, { "epoch": 0.6376975857067992, "grad_norm": 2.222116393187698, "learning_rate": 1.9069275176395127e-06, "loss": 0.7181, "step": 52325 }, { "epoch": 0.6377585219309471, "grad_norm": 2.048991530977272, "learning_rate": 1.906606799230276e-06, "loss": 0.7028, "step": 52330 }, { "epoch": 0.6378194581550949, "grad_norm": 2.292078072122826, "learning_rate": 1.9062860808210392e-06, "loss": 0.7712, "step": 52335 }, { "epoch": 0.6378803943792427, "grad_norm": 2.6786447277993446, "learning_rate": 1.9059653624118026e-06, "loss": 0.7126, "step": 52340 }, { "epoch": 0.6379413306033905, "grad_norm": 2.204892533952739, "learning_rate": 1.9056446440025658e-06, "loss": 0.7181, "step": 52345 }, { "epoch": 0.6380022668275382, "grad_norm": 2.2490685994851876, "learning_rate": 1.9053239255933293e-06, "loss": 0.7873, "step": 52350 }, { "epoch": 0.6380632030516861, "grad_norm": 2.578591806269659, "learning_rate": 1.9050032071840927e-06, "loss": 0.758, "step": 52355 }, { "epoch": 0.6381241392758339, "grad_norm": 1.9663231190666304, "learning_rate": 1.9046824887748557e-06, "loss": 0.737, "step": 52360 }, { "epoch": 0.6381850754999817, "grad_norm": 2.4891684934877314, "learning_rate": 1.9043617703656192e-06, "loss": 0.7469, "step": 52365 }, { "epoch": 0.6382460117241295, "grad_norm": 2.5223560770842495, "learning_rate": 1.9040410519563824e-06, "loss": 0.7526, "step": 52370 }, { "epoch": 0.6383069479482774, "grad_norm": 2.805818183990748, "learning_rate": 1.9037203335471458e-06, "loss": 0.7269, "step": 52375 }, { "epoch": 0.6383678841724252, "grad_norm": 2.236427106683886, "learning_rate": 1.903399615137909e-06, "loss": 0.7971, "step": 52380 }, { "epoch": 0.6384288203965729, "grad_norm": 2.3337355993334734, "learning_rate": 1.9030788967286723e-06, "loss": 0.729, "step": 52385 }, { "epoch": 0.6384897566207207, "grad_norm": 2.319419492497133, "learning_rate": 1.9027581783194357e-06, "loss": 0.6988, "step": 52390 }, { "epoch": 0.6385506928448685, "grad_norm": 2.409177096104507, "learning_rate": 1.9024374599101992e-06, "loss": 0.7324, "step": 52395 }, { "epoch": 0.6386116290690164, "grad_norm": 2.5508775993322823, "learning_rate": 1.9021167415009622e-06, "loss": 0.7267, "step": 52400 }, { "epoch": 0.6386725652931642, "grad_norm": 2.4894912843350063, "learning_rate": 1.9017960230917256e-06, "loss": 0.6976, "step": 52405 }, { "epoch": 0.638733501517312, "grad_norm": 2.184081973590982, "learning_rate": 1.9014753046824889e-06, "loss": 0.7138, "step": 52410 }, { "epoch": 0.6387944377414598, "grad_norm": 2.6231313431235983, "learning_rate": 1.9011545862732523e-06, "loss": 0.7463, "step": 52415 }, { "epoch": 0.6388553739656075, "grad_norm": 3.4936853967471735, "learning_rate": 1.9008338678640155e-06, "loss": 0.614, "step": 52420 }, { "epoch": 0.6389163101897554, "grad_norm": 2.18988308760183, "learning_rate": 1.9005131494547788e-06, "loss": 0.757, "step": 52425 }, { "epoch": 0.6389772464139032, "grad_norm": 2.6406596964268374, "learning_rate": 1.9001924310455422e-06, "loss": 0.6954, "step": 52430 }, { "epoch": 0.639038182638051, "grad_norm": 2.608622841795069, "learning_rate": 1.8998717126363057e-06, "loss": 0.7554, "step": 52435 }, { "epoch": 0.6390991188621988, "grad_norm": 7.980778967159894, "learning_rate": 1.8995509942270687e-06, "loss": 0.692, "step": 52440 }, { "epoch": 0.6391600550863467, "grad_norm": 3.38881882559307, "learning_rate": 1.8992302758178321e-06, "loss": 0.7783, "step": 52445 }, { "epoch": 0.6392209913104945, "grad_norm": 2.423037148461552, "learning_rate": 1.8989095574085953e-06, "loss": 0.7158, "step": 52450 }, { "epoch": 0.6392819275346422, "grad_norm": 2.2091608356525287, "learning_rate": 1.8985888389993588e-06, "loss": 0.7507, "step": 52455 }, { "epoch": 0.63934286375879, "grad_norm": 2.483136493764017, "learning_rate": 1.898268120590122e-06, "loss": 0.7389, "step": 52460 }, { "epoch": 0.6394037999829378, "grad_norm": 3.213162307016781, "learning_rate": 1.8979474021808852e-06, "loss": 0.7731, "step": 52465 }, { "epoch": 0.6394647362070857, "grad_norm": 2.369759222809852, "learning_rate": 1.8976266837716487e-06, "loss": 0.7587, "step": 52470 }, { "epoch": 0.6395256724312335, "grad_norm": 3.443451891540851, "learning_rate": 1.8973059653624121e-06, "loss": 0.8385, "step": 52475 }, { "epoch": 0.6395866086553813, "grad_norm": 3.1238962958447853, "learning_rate": 1.8969852469531751e-06, "loss": 0.8093, "step": 52480 }, { "epoch": 0.6396475448795291, "grad_norm": 2.5611830428458866, "learning_rate": 1.8966645285439386e-06, "loss": 0.7028, "step": 52485 }, { "epoch": 0.6397084811036768, "grad_norm": 2.7708808553750024, "learning_rate": 1.8963438101347018e-06, "loss": 0.7541, "step": 52490 }, { "epoch": 0.6397694173278247, "grad_norm": 2.508375032635186, "learning_rate": 1.8960230917254652e-06, "loss": 0.7668, "step": 52495 }, { "epoch": 0.6398303535519725, "grad_norm": 2.609960468059713, "learning_rate": 1.8957023733162287e-06, "loss": 0.6626, "step": 52500 }, { "epoch": 0.6398912897761203, "grad_norm": 2.6506256417917404, "learning_rate": 1.8953816549069917e-06, "loss": 0.7411, "step": 52505 }, { "epoch": 0.6399522260002681, "grad_norm": 2.474604394922026, "learning_rate": 1.8950609364977551e-06, "loss": 0.766, "step": 52510 }, { "epoch": 0.640013162224416, "grad_norm": 2.32799101079029, "learning_rate": 1.8947402180885186e-06, "loss": 0.7159, "step": 52515 }, { "epoch": 0.6400740984485638, "grad_norm": 2.7061478553789757, "learning_rate": 1.8944194996792816e-06, "loss": 0.7653, "step": 52520 }, { "epoch": 0.6401350346727115, "grad_norm": 2.4331443512613466, "learning_rate": 1.894098781270045e-06, "loss": 0.7043, "step": 52525 }, { "epoch": 0.6401959708968593, "grad_norm": 2.3803222242496602, "learning_rate": 1.8937780628608083e-06, "loss": 0.7643, "step": 52530 }, { "epoch": 0.6402569071210071, "grad_norm": 2.6660319542998825, "learning_rate": 1.8934573444515717e-06, "loss": 0.7165, "step": 52535 }, { "epoch": 0.640317843345155, "grad_norm": 2.2271953412275893, "learning_rate": 1.8931366260423351e-06, "loss": 0.7407, "step": 52540 }, { "epoch": 0.6403787795693028, "grad_norm": 1.925415724833135, "learning_rate": 1.8928159076330982e-06, "loss": 0.7522, "step": 52545 }, { "epoch": 0.6404397157934506, "grad_norm": 2.8695049681395353, "learning_rate": 1.8924951892238616e-06, "loss": 0.7286, "step": 52550 }, { "epoch": 0.6405006520175984, "grad_norm": 2.442920973097767, "learning_rate": 1.892174470814625e-06, "loss": 0.7396, "step": 52555 }, { "epoch": 0.6405615882417461, "grad_norm": 2.478450729841807, "learning_rate": 1.891853752405388e-06, "loss": 0.7766, "step": 52560 }, { "epoch": 0.640622524465894, "grad_norm": 2.0265489776568066, "learning_rate": 1.8915330339961515e-06, "loss": 0.7654, "step": 52565 }, { "epoch": 0.6406834606900418, "grad_norm": 2.482224168070969, "learning_rate": 1.891212315586915e-06, "loss": 0.6976, "step": 52570 }, { "epoch": 0.6407443969141896, "grad_norm": 3.1967516898241572, "learning_rate": 1.8908915971776782e-06, "loss": 0.7792, "step": 52575 }, { "epoch": 0.6408053331383374, "grad_norm": 2.3568025538321487, "learning_rate": 1.8905708787684416e-06, "loss": 0.7546, "step": 52580 }, { "epoch": 0.6408662693624853, "grad_norm": 2.811460438563424, "learning_rate": 1.8902501603592046e-06, "loss": 0.704, "step": 52585 }, { "epoch": 0.6409272055866331, "grad_norm": 2.774960662077111, "learning_rate": 1.889929441949968e-06, "loss": 0.753, "step": 52590 }, { "epoch": 0.6409881418107808, "grad_norm": 2.9075549275030035, "learning_rate": 1.8896087235407315e-06, "loss": 0.7089, "step": 52595 }, { "epoch": 0.6410490780349286, "grad_norm": 2.2221103504676494, "learning_rate": 1.8892880051314947e-06, "loss": 0.7074, "step": 52600 }, { "epoch": 0.6411100142590764, "grad_norm": 2.0425672859989343, "learning_rate": 1.888967286722258e-06, "loss": 0.7431, "step": 52605 }, { "epoch": 0.6411709504832243, "grad_norm": 2.3391325645804284, "learning_rate": 1.8886465683130214e-06, "loss": 0.7366, "step": 52610 }, { "epoch": 0.6412318867073721, "grad_norm": 4.6551897834267235, "learning_rate": 1.8883258499037846e-06, "loss": 0.7574, "step": 52615 }, { "epoch": 0.6412928229315199, "grad_norm": 2.71432918478444, "learning_rate": 1.888005131494548e-06, "loss": 0.7857, "step": 52620 }, { "epoch": 0.6413537591556677, "grad_norm": 2.219216598147395, "learning_rate": 1.887684413085311e-06, "loss": 0.7298, "step": 52625 }, { "epoch": 0.6414146953798154, "grad_norm": 2.4072102818735, "learning_rate": 1.8873636946760745e-06, "loss": 0.7824, "step": 52630 }, { "epoch": 0.6414756316039633, "grad_norm": 3.2051903004111097, "learning_rate": 1.887042976266838e-06, "loss": 0.721, "step": 52635 }, { "epoch": 0.6415365678281111, "grad_norm": 2.210701350542128, "learning_rate": 1.8867222578576012e-06, "loss": 0.7632, "step": 52640 }, { "epoch": 0.6415975040522589, "grad_norm": 2.4337891128717746, "learning_rate": 1.8864015394483644e-06, "loss": 0.7735, "step": 52645 }, { "epoch": 0.6416584402764067, "grad_norm": 3.5895855893292716, "learning_rate": 1.8860808210391279e-06, "loss": 0.7634, "step": 52650 }, { "epoch": 0.6417193765005546, "grad_norm": 2.7527274193975595, "learning_rate": 1.885760102629891e-06, "loss": 0.7236, "step": 52655 }, { "epoch": 0.6417803127247024, "grad_norm": 2.5410223044442994, "learning_rate": 1.8854393842206545e-06, "loss": 0.7148, "step": 52660 }, { "epoch": 0.6418412489488501, "grad_norm": 2.3041322213070345, "learning_rate": 1.8851186658114176e-06, "loss": 0.7097, "step": 52665 }, { "epoch": 0.6419021851729979, "grad_norm": 2.771781920396711, "learning_rate": 1.884797947402181e-06, "loss": 0.7009, "step": 52670 }, { "epoch": 0.6419631213971457, "grad_norm": 2.8219674378785706, "learning_rate": 1.8844772289929444e-06, "loss": 0.7181, "step": 52675 }, { "epoch": 0.6420240576212936, "grad_norm": 2.51176863816475, "learning_rate": 1.8841565105837077e-06, "loss": 0.7439, "step": 52680 }, { "epoch": 0.6420849938454414, "grad_norm": 2.040708618868624, "learning_rate": 1.8838357921744709e-06, "loss": 0.7765, "step": 52685 }, { "epoch": 0.6421459300695892, "grad_norm": 2.405721885459395, "learning_rate": 1.8835150737652343e-06, "loss": 0.7836, "step": 52690 }, { "epoch": 0.642206866293737, "grad_norm": 3.0687272590323458, "learning_rate": 1.8831943553559976e-06, "loss": 0.7628, "step": 52695 }, { "epoch": 0.6422678025178847, "grad_norm": 2.101764139920981, "learning_rate": 1.882873636946761e-06, "loss": 0.6974, "step": 52700 }, { "epoch": 0.6423287387420326, "grad_norm": 2.240415690302403, "learning_rate": 1.882552918537524e-06, "loss": 0.7223, "step": 52705 }, { "epoch": 0.6423896749661804, "grad_norm": 2.708351599698188, "learning_rate": 1.8822322001282875e-06, "loss": 0.8041, "step": 52710 }, { "epoch": 0.6424506111903282, "grad_norm": 2.6198762365132877, "learning_rate": 1.881911481719051e-06, "loss": 0.7581, "step": 52715 }, { "epoch": 0.642511547414476, "grad_norm": 2.327938864295268, "learning_rate": 1.8815907633098141e-06, "loss": 0.7106, "step": 52720 }, { "epoch": 0.6425724836386238, "grad_norm": 2.629827386759918, "learning_rate": 1.8812700449005776e-06, "loss": 0.712, "step": 52725 }, { "epoch": 0.6426334198627717, "grad_norm": 4.834098219365102, "learning_rate": 1.8809493264913408e-06, "loss": 0.7219, "step": 52730 }, { "epoch": 0.6426943560869194, "grad_norm": 2.3604682528059318, "learning_rate": 1.880628608082104e-06, "loss": 0.8049, "step": 52735 }, { "epoch": 0.6427552923110672, "grad_norm": 2.7281444997298037, "learning_rate": 1.8803078896728675e-06, "loss": 0.7416, "step": 52740 }, { "epoch": 0.642816228535215, "grad_norm": 2.318248632283, "learning_rate": 1.8799871712636305e-06, "loss": 0.7283, "step": 52745 }, { "epoch": 0.6428771647593629, "grad_norm": 2.1190664243132953, "learning_rate": 1.879666452854394e-06, "loss": 0.7242, "step": 52750 }, { "epoch": 0.6429381009835107, "grad_norm": 2.251406697751947, "learning_rate": 1.8793457344451574e-06, "loss": 0.6759, "step": 52755 }, { "epoch": 0.6429990372076585, "grad_norm": 2.5415812629390846, "learning_rate": 1.8790250160359206e-06, "loss": 0.7873, "step": 52760 }, { "epoch": 0.6430599734318063, "grad_norm": 2.0665339718921834, "learning_rate": 1.878704297626684e-06, "loss": 0.7655, "step": 52765 }, { "epoch": 0.643120909655954, "grad_norm": 2.7888367013616717, "learning_rate": 1.8783835792174473e-06, "loss": 0.7353, "step": 52770 }, { "epoch": 0.6431818458801019, "grad_norm": 3.245221584175808, "learning_rate": 1.8780628608082105e-06, "loss": 0.7443, "step": 52775 }, { "epoch": 0.6432427821042497, "grad_norm": 2.6017200214787515, "learning_rate": 1.877742142398974e-06, "loss": 0.7403, "step": 52780 }, { "epoch": 0.6433037183283975, "grad_norm": 2.5408737776501296, "learning_rate": 1.877421423989737e-06, "loss": 0.7033, "step": 52785 }, { "epoch": 0.6433646545525453, "grad_norm": 2.4504984205268467, "learning_rate": 1.8771007055805004e-06, "loss": 0.7096, "step": 52790 }, { "epoch": 0.6434255907766931, "grad_norm": 2.1515300290602966, "learning_rate": 1.8767799871712638e-06, "loss": 0.712, "step": 52795 }, { "epoch": 0.643486527000841, "grad_norm": 2.6484719465556528, "learning_rate": 1.876459268762027e-06, "loss": 0.7099, "step": 52800 }, { "epoch": 0.6435474632249887, "grad_norm": 3.019697154200898, "learning_rate": 1.8761385503527905e-06, "loss": 0.8261, "step": 52805 }, { "epoch": 0.6436083994491365, "grad_norm": 3.4565148288623284, "learning_rate": 1.8758178319435537e-06, "loss": 0.7566, "step": 52810 }, { "epoch": 0.6436693356732843, "grad_norm": 2.3791234572066844, "learning_rate": 1.875497113534317e-06, "loss": 0.6987, "step": 52815 }, { "epoch": 0.6437302718974321, "grad_norm": 2.244811300835337, "learning_rate": 1.8751763951250804e-06, "loss": 0.7306, "step": 52820 }, { "epoch": 0.64379120812158, "grad_norm": 3.0154610032354103, "learning_rate": 1.8748556767158434e-06, "loss": 0.7247, "step": 52825 }, { "epoch": 0.6438521443457278, "grad_norm": 2.521801313025329, "learning_rate": 1.8745349583066068e-06, "loss": 0.7486, "step": 52830 }, { "epoch": 0.6439130805698756, "grad_norm": 3.74060783652842, "learning_rate": 1.8742142398973703e-06, "loss": 0.7033, "step": 52835 }, { "epoch": 0.6439740167940233, "grad_norm": 2.4912194678652906, "learning_rate": 1.8738935214881335e-06, "loss": 0.7223, "step": 52840 }, { "epoch": 0.6440349530181712, "grad_norm": 2.243382056873675, "learning_rate": 1.873572803078897e-06, "loss": 0.6344, "step": 52845 }, { "epoch": 0.644095889242319, "grad_norm": 3.1092234081572556, "learning_rate": 1.8732520846696604e-06, "loss": 0.7171, "step": 52850 }, { "epoch": 0.6441568254664668, "grad_norm": 2.7827543300016626, "learning_rate": 1.8729313662604234e-06, "loss": 0.716, "step": 52855 }, { "epoch": 0.6442177616906146, "grad_norm": 2.167254507900746, "learning_rate": 1.8726106478511869e-06, "loss": 0.7678, "step": 52860 }, { "epoch": 0.6442786979147624, "grad_norm": 2.351067491487649, "learning_rate": 1.87228992944195e-06, "loss": 0.7249, "step": 52865 }, { "epoch": 0.6443396341389103, "grad_norm": 2.4813015348727676, "learning_rate": 1.8719692110327133e-06, "loss": 0.6649, "step": 52870 }, { "epoch": 0.644400570363058, "grad_norm": 2.427504616013583, "learning_rate": 1.8716484926234767e-06, "loss": 0.7855, "step": 52875 }, { "epoch": 0.6444615065872058, "grad_norm": 2.9222848027253137, "learning_rate": 1.87132777421424e-06, "loss": 0.7956, "step": 52880 }, { "epoch": 0.6445224428113536, "grad_norm": 2.4869157545194525, "learning_rate": 1.8710070558050034e-06, "loss": 0.7712, "step": 52885 }, { "epoch": 0.6445833790355014, "grad_norm": 2.1550801320141595, "learning_rate": 1.8706863373957669e-06, "loss": 0.8203, "step": 52890 }, { "epoch": 0.6446443152596493, "grad_norm": 3.0393865062991, "learning_rate": 1.8703656189865299e-06, "loss": 0.7297, "step": 52895 }, { "epoch": 0.6447052514837971, "grad_norm": 2.968954262714973, "learning_rate": 1.8700449005772933e-06, "loss": 0.7218, "step": 52900 }, { "epoch": 0.6447661877079449, "grad_norm": 2.0780637510628845, "learning_rate": 1.8697241821680568e-06, "loss": 0.7655, "step": 52905 }, { "epoch": 0.6448271239320926, "grad_norm": 2.7048290761195344, "learning_rate": 1.8694034637588198e-06, "loss": 0.7129, "step": 52910 }, { "epoch": 0.6448880601562405, "grad_norm": 2.4676778042392042, "learning_rate": 1.8690827453495832e-06, "loss": 0.7707, "step": 52915 }, { "epoch": 0.6449489963803883, "grad_norm": 2.376821660799597, "learning_rate": 1.8687620269403464e-06, "loss": 0.7514, "step": 52920 }, { "epoch": 0.6450099326045361, "grad_norm": 2.5629251778073234, "learning_rate": 1.8684413085311099e-06, "loss": 0.7363, "step": 52925 }, { "epoch": 0.6450708688286839, "grad_norm": 2.9247749707277055, "learning_rate": 1.8681205901218733e-06, "loss": 0.7413, "step": 52930 }, { "epoch": 0.6451318050528317, "grad_norm": 2.2343698525607243, "learning_rate": 1.8677998717126363e-06, "loss": 0.7573, "step": 52935 }, { "epoch": 0.6451927412769796, "grad_norm": 2.4276510479263442, "learning_rate": 1.8674791533033998e-06, "loss": 0.8299, "step": 52940 }, { "epoch": 0.6452536775011273, "grad_norm": 2.862312301735434, "learning_rate": 1.8671584348941632e-06, "loss": 0.8151, "step": 52945 }, { "epoch": 0.6453146137252751, "grad_norm": 2.2148044597367176, "learning_rate": 1.8668377164849264e-06, "loss": 0.7581, "step": 52950 }, { "epoch": 0.6453755499494229, "grad_norm": 2.6489623753955023, "learning_rate": 1.8665169980756897e-06, "loss": 0.8233, "step": 52955 }, { "epoch": 0.6454364861735707, "grad_norm": 2.8267957542006474, "learning_rate": 1.866196279666453e-06, "loss": 0.7163, "step": 52960 }, { "epoch": 0.6454974223977186, "grad_norm": 2.6132348883865397, "learning_rate": 1.8658755612572163e-06, "loss": 0.7003, "step": 52965 }, { "epoch": 0.6455583586218664, "grad_norm": 2.3013859553861824, "learning_rate": 1.8655548428479798e-06, "loss": 0.6725, "step": 52970 }, { "epoch": 0.6456192948460142, "grad_norm": 2.0147065061796723, "learning_rate": 1.8652341244387428e-06, "loss": 0.7356, "step": 52975 }, { "epoch": 0.6456802310701619, "grad_norm": 2.6066279840383646, "learning_rate": 1.8649134060295062e-06, "loss": 0.7404, "step": 52980 }, { "epoch": 0.6457411672943097, "grad_norm": 2.2762529468721517, "learning_rate": 1.8645926876202697e-06, "loss": 0.7244, "step": 52985 }, { "epoch": 0.6458021035184576, "grad_norm": 2.318562402785372, "learning_rate": 1.864271969211033e-06, "loss": 0.6806, "step": 52990 }, { "epoch": 0.6458630397426054, "grad_norm": 2.157220801304666, "learning_rate": 1.8639512508017961e-06, "loss": 0.6626, "step": 52995 }, { "epoch": 0.6459239759667532, "grad_norm": 3.1878757060479654, "learning_rate": 1.8636305323925594e-06, "loss": 0.7635, "step": 53000 }, { "epoch": 0.645984912190901, "grad_norm": 2.5885223071404173, "learning_rate": 1.8633098139833228e-06, "loss": 0.6442, "step": 53005 }, { "epoch": 0.6460458484150489, "grad_norm": 4.1043414565696255, "learning_rate": 1.8629890955740862e-06, "loss": 0.7664, "step": 53010 }, { "epoch": 0.6461067846391966, "grad_norm": 4.31515731094257, "learning_rate": 1.8626683771648493e-06, "loss": 0.7097, "step": 53015 }, { "epoch": 0.6461677208633444, "grad_norm": 2.450082298552928, "learning_rate": 1.8623476587556127e-06, "loss": 0.721, "step": 53020 }, { "epoch": 0.6462286570874922, "grad_norm": 2.5365966530704993, "learning_rate": 1.8620269403463761e-06, "loss": 0.7088, "step": 53025 }, { "epoch": 0.64628959331164, "grad_norm": 2.117007736551586, "learning_rate": 1.8617062219371394e-06, "loss": 0.7437, "step": 53030 }, { "epoch": 0.6463505295357879, "grad_norm": 2.7586493095512337, "learning_rate": 1.8613855035279026e-06, "loss": 0.7678, "step": 53035 }, { "epoch": 0.6464114657599357, "grad_norm": 2.6881955017683423, "learning_rate": 1.8610647851186658e-06, "loss": 0.7682, "step": 53040 }, { "epoch": 0.6464724019840835, "grad_norm": 2.026540783000561, "learning_rate": 1.8607440667094293e-06, "loss": 0.7214, "step": 53045 }, { "epoch": 0.6465333382082312, "grad_norm": 2.4142587831662405, "learning_rate": 1.8604233483001927e-06, "loss": 0.7094, "step": 53050 }, { "epoch": 0.646594274432379, "grad_norm": 2.1693841967283984, "learning_rate": 1.8601026298909557e-06, "loss": 0.7491, "step": 53055 }, { "epoch": 0.6466552106565269, "grad_norm": 2.3289879988194437, "learning_rate": 1.8597819114817192e-06, "loss": 0.7348, "step": 53060 }, { "epoch": 0.6467161468806747, "grad_norm": 2.7111472825876612, "learning_rate": 1.8594611930724826e-06, "loss": 0.7733, "step": 53065 }, { "epoch": 0.6467770831048225, "grad_norm": 3.086313924585467, "learning_rate": 1.8591404746632458e-06, "loss": 0.7437, "step": 53070 }, { "epoch": 0.6468380193289703, "grad_norm": 2.6549311210114146, "learning_rate": 1.8588197562540093e-06, "loss": 0.7165, "step": 53075 }, { "epoch": 0.6468989555531182, "grad_norm": 2.7183675930796505, "learning_rate": 1.8584990378447723e-06, "loss": 0.7096, "step": 53080 }, { "epoch": 0.6469598917772659, "grad_norm": 2.0172985239713106, "learning_rate": 1.8581783194355357e-06, "loss": 0.7126, "step": 53085 }, { "epoch": 0.6470208280014137, "grad_norm": 2.1859407234823105, "learning_rate": 1.8578576010262992e-06, "loss": 0.7481, "step": 53090 }, { "epoch": 0.6470817642255615, "grad_norm": 2.449648135454168, "learning_rate": 1.8575368826170622e-06, "loss": 0.7897, "step": 53095 }, { "epoch": 0.6471427004497093, "grad_norm": 2.3489805780938195, "learning_rate": 1.8572161642078256e-06, "loss": 0.6816, "step": 53100 }, { "epoch": 0.6472036366738572, "grad_norm": 2.200639292547217, "learning_rate": 1.856895445798589e-06, "loss": 0.7456, "step": 53105 }, { "epoch": 0.647264572898005, "grad_norm": 2.8615141821506214, "learning_rate": 1.8565747273893523e-06, "loss": 0.7454, "step": 53110 }, { "epoch": 0.6473255091221528, "grad_norm": 2.1261240709439937, "learning_rate": 1.8562540089801157e-06, "loss": 0.774, "step": 53115 }, { "epoch": 0.6473864453463005, "grad_norm": 2.5441506324617564, "learning_rate": 1.8559332905708788e-06, "loss": 0.7022, "step": 53120 }, { "epoch": 0.6474473815704483, "grad_norm": 2.177783790775605, "learning_rate": 1.8556125721616422e-06, "loss": 0.7099, "step": 53125 }, { "epoch": 0.6475083177945962, "grad_norm": 2.833498559077052, "learning_rate": 1.8552918537524056e-06, "loss": 0.7611, "step": 53130 }, { "epoch": 0.647569254018744, "grad_norm": 2.434042176277285, "learning_rate": 1.8549711353431687e-06, "loss": 0.7077, "step": 53135 }, { "epoch": 0.6476301902428918, "grad_norm": 2.693785311923667, "learning_rate": 1.854650416933932e-06, "loss": 0.7936, "step": 53140 }, { "epoch": 0.6476911264670396, "grad_norm": 3.268471272210337, "learning_rate": 1.8543296985246955e-06, "loss": 0.7691, "step": 53145 }, { "epoch": 0.6477520626911875, "grad_norm": 2.088081601561792, "learning_rate": 1.8540089801154588e-06, "loss": 0.7363, "step": 53150 }, { "epoch": 0.6478129989153352, "grad_norm": 2.20545151112408, "learning_rate": 1.8536882617062222e-06, "loss": 0.8085, "step": 53155 }, { "epoch": 0.647873935139483, "grad_norm": 2.3890591123056937, "learning_rate": 1.8533675432969852e-06, "loss": 0.8447, "step": 53160 }, { "epoch": 0.6479348713636308, "grad_norm": 2.725329536040052, "learning_rate": 1.8530468248877487e-06, "loss": 0.7759, "step": 53165 }, { "epoch": 0.6479958075877786, "grad_norm": 2.3929726906524285, "learning_rate": 1.852726106478512e-06, "loss": 0.7424, "step": 53170 }, { "epoch": 0.6480567438119265, "grad_norm": 2.3855713300382253, "learning_rate": 1.8524053880692751e-06, "loss": 0.7534, "step": 53175 }, { "epoch": 0.6481176800360743, "grad_norm": 3.045921229838256, "learning_rate": 1.8520846696600386e-06, "loss": 0.7223, "step": 53180 }, { "epoch": 0.6481786162602221, "grad_norm": 2.385352278094427, "learning_rate": 1.851763951250802e-06, "loss": 0.7419, "step": 53185 }, { "epoch": 0.6482395524843698, "grad_norm": 2.5856674382394957, "learning_rate": 1.8514432328415652e-06, "loss": 0.7915, "step": 53190 }, { "epoch": 0.6483004887085176, "grad_norm": 2.208123201005676, "learning_rate": 1.8511225144323287e-06, "loss": 0.7335, "step": 53195 }, { "epoch": 0.6483614249326655, "grad_norm": 2.4941795534498206, "learning_rate": 1.8508017960230921e-06, "loss": 0.6986, "step": 53200 }, { "epoch": 0.6484223611568133, "grad_norm": 2.8364682623687965, "learning_rate": 1.8504810776138551e-06, "loss": 0.6576, "step": 53205 }, { "epoch": 0.6484832973809611, "grad_norm": 2.3231479725953363, "learning_rate": 1.8501603592046186e-06, "loss": 0.6777, "step": 53210 }, { "epoch": 0.6485442336051089, "grad_norm": 2.017194464533963, "learning_rate": 1.8498396407953818e-06, "loss": 0.7578, "step": 53215 }, { "epoch": 0.6486051698292566, "grad_norm": 2.1333805177062533, "learning_rate": 1.849518922386145e-06, "loss": 0.7922, "step": 53220 }, { "epoch": 0.6486661060534045, "grad_norm": 2.4479609668986866, "learning_rate": 1.8491982039769085e-06, "loss": 0.7538, "step": 53225 }, { "epoch": 0.6487270422775523, "grad_norm": 2.2747311276877675, "learning_rate": 1.8488774855676717e-06, "loss": 0.6953, "step": 53230 }, { "epoch": 0.6487879785017001, "grad_norm": 2.6429126116937414, "learning_rate": 1.8485567671584351e-06, "loss": 0.7198, "step": 53235 }, { "epoch": 0.6488489147258479, "grad_norm": 2.462800874625104, "learning_rate": 1.8482360487491986e-06, "loss": 0.7559, "step": 53240 }, { "epoch": 0.6489098509499958, "grad_norm": 2.988292911906311, "learning_rate": 1.8479153303399616e-06, "loss": 0.7919, "step": 53245 }, { "epoch": 0.6489707871741436, "grad_norm": 2.817113362039884, "learning_rate": 1.847594611930725e-06, "loss": 0.7525, "step": 53250 }, { "epoch": 0.6490317233982913, "grad_norm": 2.6433239024112223, "learning_rate": 1.8472738935214883e-06, "loss": 0.6833, "step": 53255 }, { "epoch": 0.6490926596224391, "grad_norm": 4.02126026369904, "learning_rate": 1.8469531751122515e-06, "loss": 0.7513, "step": 53260 }, { "epoch": 0.6491535958465869, "grad_norm": 2.3613425520409383, "learning_rate": 1.846632456703015e-06, "loss": 0.7569, "step": 53265 }, { "epoch": 0.6492145320707348, "grad_norm": 2.509012898305472, "learning_rate": 1.8463117382937782e-06, "loss": 0.7396, "step": 53270 }, { "epoch": 0.6492754682948826, "grad_norm": 3.2606291529110747, "learning_rate": 1.8459910198845416e-06, "loss": 0.7715, "step": 53275 }, { "epoch": 0.6493364045190304, "grad_norm": 2.649100480541536, "learning_rate": 1.845670301475305e-06, "loss": 0.7862, "step": 53280 }, { "epoch": 0.6493973407431782, "grad_norm": 2.3842420082963804, "learning_rate": 1.845349583066068e-06, "loss": 0.7709, "step": 53285 }, { "epoch": 0.6494582769673259, "grad_norm": 2.167716887401753, "learning_rate": 1.8450288646568315e-06, "loss": 0.7707, "step": 53290 }, { "epoch": 0.6495192131914738, "grad_norm": 2.1340507548118275, "learning_rate": 1.8447081462475947e-06, "loss": 0.7245, "step": 53295 }, { "epoch": 0.6495801494156216, "grad_norm": 2.5640717787148026, "learning_rate": 1.8443874278383582e-06, "loss": 0.7171, "step": 53300 }, { "epoch": 0.6496410856397694, "grad_norm": 2.8019476411897672, "learning_rate": 1.8440667094291214e-06, "loss": 0.7432, "step": 53305 }, { "epoch": 0.6497020218639172, "grad_norm": 2.3049954360281726, "learning_rate": 1.8437459910198846e-06, "loss": 0.693, "step": 53310 }, { "epoch": 0.6497629580880651, "grad_norm": 2.201595767284309, "learning_rate": 1.843425272610648e-06, "loss": 0.7476, "step": 53315 }, { "epoch": 0.6498238943122129, "grad_norm": 2.3601138942866817, "learning_rate": 1.8431045542014115e-06, "loss": 0.788, "step": 53320 }, { "epoch": 0.6498848305363606, "grad_norm": 2.4980770782046835, "learning_rate": 1.8427838357921745e-06, "loss": 0.7599, "step": 53325 }, { "epoch": 0.6499457667605084, "grad_norm": 2.7688551406857256, "learning_rate": 1.842463117382938e-06, "loss": 0.6872, "step": 53330 }, { "epoch": 0.6500067029846562, "grad_norm": 2.538452631777667, "learning_rate": 1.8421423989737012e-06, "loss": 0.7596, "step": 53335 }, { "epoch": 0.6500676392088041, "grad_norm": 2.005960051366549, "learning_rate": 1.8418216805644646e-06, "loss": 0.7009, "step": 53340 }, { "epoch": 0.6501285754329519, "grad_norm": 1.9631833211615015, "learning_rate": 1.8415009621552279e-06, "loss": 0.7489, "step": 53345 }, { "epoch": 0.6501895116570997, "grad_norm": 2.0547205662720383, "learning_rate": 1.841180243745991e-06, "loss": 0.7215, "step": 53350 }, { "epoch": 0.6502504478812475, "grad_norm": 2.496616560336218, "learning_rate": 1.8408595253367545e-06, "loss": 0.7604, "step": 53355 }, { "epoch": 0.6503113841053952, "grad_norm": 2.2057434920856283, "learning_rate": 1.840538806927518e-06, "loss": 0.7093, "step": 53360 }, { "epoch": 0.6503723203295431, "grad_norm": 2.9453057850722715, "learning_rate": 1.840218088518281e-06, "loss": 0.6645, "step": 53365 }, { "epoch": 0.6504332565536909, "grad_norm": 2.6468407355138512, "learning_rate": 1.8398973701090444e-06, "loss": 0.7646, "step": 53370 }, { "epoch": 0.6504941927778387, "grad_norm": 2.1194118159547144, "learning_rate": 1.8395766516998076e-06, "loss": 0.6295, "step": 53375 }, { "epoch": 0.6505551290019865, "grad_norm": 2.9072683098963106, "learning_rate": 1.839255933290571e-06, "loss": 0.7542, "step": 53380 }, { "epoch": 0.6506160652261344, "grad_norm": 2.6775651533323943, "learning_rate": 1.8389352148813343e-06, "loss": 0.8307, "step": 53385 }, { "epoch": 0.6506770014502822, "grad_norm": 2.469575090256617, "learning_rate": 1.8386144964720975e-06, "loss": 0.7381, "step": 53390 }, { "epoch": 0.6507379376744299, "grad_norm": 2.216982842100901, "learning_rate": 1.838293778062861e-06, "loss": 0.6996, "step": 53395 }, { "epoch": 0.6507988738985777, "grad_norm": 2.4689873189096074, "learning_rate": 1.8379730596536244e-06, "loss": 0.724, "step": 53400 }, { "epoch": 0.6508598101227255, "grad_norm": 2.5303449354443495, "learning_rate": 1.8376523412443874e-06, "loss": 0.7321, "step": 53405 }, { "epoch": 0.6509207463468734, "grad_norm": 2.482850309558558, "learning_rate": 1.8373316228351509e-06, "loss": 0.7298, "step": 53410 }, { "epoch": 0.6509816825710212, "grad_norm": 2.137900795357869, "learning_rate": 1.8370109044259141e-06, "loss": 0.6555, "step": 53415 }, { "epoch": 0.651042618795169, "grad_norm": 2.2676948148963474, "learning_rate": 1.8366901860166776e-06, "loss": 0.6798, "step": 53420 }, { "epoch": 0.6511035550193168, "grad_norm": 2.25536711541977, "learning_rate": 1.836369467607441e-06, "loss": 0.706, "step": 53425 }, { "epoch": 0.6511644912434645, "grad_norm": 2.3525442112781176, "learning_rate": 1.836048749198204e-06, "loss": 0.7528, "step": 53430 }, { "epoch": 0.6512254274676124, "grad_norm": 2.251657279309165, "learning_rate": 1.8357280307889675e-06, "loss": 0.7195, "step": 53435 }, { "epoch": 0.6512863636917602, "grad_norm": 2.377955535307694, "learning_rate": 1.8354073123797309e-06, "loss": 0.7526, "step": 53440 }, { "epoch": 0.651347299915908, "grad_norm": 2.216533867581534, "learning_rate": 1.835086593970494e-06, "loss": 0.7491, "step": 53445 }, { "epoch": 0.6514082361400558, "grad_norm": 2.4310030067980657, "learning_rate": 1.8347658755612573e-06, "loss": 0.7171, "step": 53450 }, { "epoch": 0.6514691723642037, "grad_norm": 2.3314265889496766, "learning_rate": 1.8344451571520206e-06, "loss": 0.7207, "step": 53455 }, { "epoch": 0.6515301085883515, "grad_norm": 2.309756948198666, "learning_rate": 1.834124438742784e-06, "loss": 0.71, "step": 53460 }, { "epoch": 0.6515910448124992, "grad_norm": 2.473827252316609, "learning_rate": 1.8338037203335475e-06, "loss": 0.7222, "step": 53465 }, { "epoch": 0.651651981036647, "grad_norm": 2.7779023533061853, "learning_rate": 1.8334830019243105e-06, "loss": 0.7553, "step": 53470 }, { "epoch": 0.6517129172607948, "grad_norm": 2.7417720511216803, "learning_rate": 1.833162283515074e-06, "loss": 0.6745, "step": 53475 }, { "epoch": 0.6517738534849427, "grad_norm": 3.555417567908662, "learning_rate": 1.8328415651058374e-06, "loss": 0.7506, "step": 53480 }, { "epoch": 0.6518347897090905, "grad_norm": 3.4379466776091965, "learning_rate": 1.8325208466966004e-06, "loss": 0.6702, "step": 53485 }, { "epoch": 0.6518957259332383, "grad_norm": 2.7938091288286127, "learning_rate": 1.8322001282873638e-06, "loss": 0.7105, "step": 53490 }, { "epoch": 0.6519566621573861, "grad_norm": 2.207982966460741, "learning_rate": 1.8318794098781273e-06, "loss": 0.761, "step": 53495 }, { "epoch": 0.6520175983815338, "grad_norm": 2.51204921580471, "learning_rate": 1.8315586914688905e-06, "loss": 0.8259, "step": 53500 }, { "epoch": 0.6520785346056817, "grad_norm": 2.439995083800442, "learning_rate": 1.831237973059654e-06, "loss": 0.7378, "step": 53505 }, { "epoch": 0.6521394708298295, "grad_norm": 2.6420686544252323, "learning_rate": 1.830917254650417e-06, "loss": 0.7176, "step": 53510 }, { "epoch": 0.6522004070539773, "grad_norm": 2.4075820816609204, "learning_rate": 1.8305965362411804e-06, "loss": 0.6885, "step": 53515 }, { "epoch": 0.6522613432781251, "grad_norm": 2.31178770542693, "learning_rate": 1.8302758178319438e-06, "loss": 0.7614, "step": 53520 }, { "epoch": 0.652322279502273, "grad_norm": 2.562506201190841, "learning_rate": 1.8299550994227068e-06, "loss": 0.7178, "step": 53525 }, { "epoch": 0.6523832157264208, "grad_norm": 2.8225327307151775, "learning_rate": 1.8296343810134703e-06, "loss": 0.7629, "step": 53530 }, { "epoch": 0.6524441519505685, "grad_norm": 2.5309285012143747, "learning_rate": 1.8293136626042337e-06, "loss": 0.6644, "step": 53535 }, { "epoch": 0.6525050881747163, "grad_norm": 2.760902263803306, "learning_rate": 1.828992944194997e-06, "loss": 0.7636, "step": 53540 }, { "epoch": 0.6525660243988641, "grad_norm": 2.1356475878133647, "learning_rate": 1.8286722257857604e-06, "loss": 0.6458, "step": 53545 }, { "epoch": 0.652626960623012, "grad_norm": 2.265819173527104, "learning_rate": 1.8283515073765234e-06, "loss": 0.7272, "step": 53550 }, { "epoch": 0.6526878968471598, "grad_norm": 2.2199919049081664, "learning_rate": 1.8280307889672868e-06, "loss": 0.7216, "step": 53555 }, { "epoch": 0.6527488330713076, "grad_norm": 2.496701109873142, "learning_rate": 1.8277100705580503e-06, "loss": 0.7368, "step": 53560 }, { "epoch": 0.6528097692954554, "grad_norm": 2.5395410049857565, "learning_rate": 1.8273893521488135e-06, "loss": 0.7737, "step": 53565 }, { "epoch": 0.6528707055196031, "grad_norm": 2.5409621215654137, "learning_rate": 1.8270686337395767e-06, "loss": 0.7245, "step": 53570 }, { "epoch": 0.652931641743751, "grad_norm": 2.2842964586489174, "learning_rate": 1.8267479153303402e-06, "loss": 0.6952, "step": 53575 }, { "epoch": 0.6529925779678988, "grad_norm": 2.6283852313935707, "learning_rate": 1.8264271969211034e-06, "loss": 0.7106, "step": 53580 }, { "epoch": 0.6530535141920466, "grad_norm": 3.086569073002301, "learning_rate": 1.8261064785118668e-06, "loss": 0.786, "step": 53585 }, { "epoch": 0.6531144504161944, "grad_norm": 2.816575578842752, "learning_rate": 1.8257857601026299e-06, "loss": 0.8006, "step": 53590 }, { "epoch": 0.6531753866403422, "grad_norm": 3.043069871640517, "learning_rate": 1.8254650416933933e-06, "loss": 0.6905, "step": 53595 }, { "epoch": 0.6532363228644901, "grad_norm": 4.037443798273215, "learning_rate": 1.8251443232841567e-06, "loss": 0.7341, "step": 53600 }, { "epoch": 0.6532972590886378, "grad_norm": 2.8632077388635615, "learning_rate": 1.82482360487492e-06, "loss": 0.7345, "step": 53605 }, { "epoch": 0.6533581953127856, "grad_norm": 2.184518759227439, "learning_rate": 1.8245028864656832e-06, "loss": 0.7231, "step": 53610 }, { "epoch": 0.6534191315369334, "grad_norm": 2.2815890491825748, "learning_rate": 1.8241821680564466e-06, "loss": 0.7851, "step": 53615 }, { "epoch": 0.6534800677610813, "grad_norm": 2.4679591337343014, "learning_rate": 1.8238614496472099e-06, "loss": 0.6917, "step": 53620 }, { "epoch": 0.6535410039852291, "grad_norm": 2.305369497999381, "learning_rate": 1.8235407312379733e-06, "loss": 0.8016, "step": 53625 }, { "epoch": 0.6536019402093769, "grad_norm": 3.084182466165951, "learning_rate": 1.8232200128287363e-06, "loss": 0.7706, "step": 53630 }, { "epoch": 0.6536628764335247, "grad_norm": 2.2209596341652347, "learning_rate": 1.8228992944194998e-06, "loss": 0.7163, "step": 53635 }, { "epoch": 0.6537238126576724, "grad_norm": 2.4254622763658076, "learning_rate": 1.8225785760102632e-06, "loss": 0.7331, "step": 53640 }, { "epoch": 0.6537847488818203, "grad_norm": 2.4269967721383714, "learning_rate": 1.8222578576010264e-06, "loss": 0.7488, "step": 53645 }, { "epoch": 0.6538456851059681, "grad_norm": 1.9673711493076187, "learning_rate": 1.8219371391917899e-06, "loss": 0.8206, "step": 53650 }, { "epoch": 0.6539066213301159, "grad_norm": 2.3625504991348008, "learning_rate": 1.8216164207825531e-06, "loss": 0.7283, "step": 53655 }, { "epoch": 0.6539675575542637, "grad_norm": 2.9530467606691166, "learning_rate": 1.8212957023733163e-06, "loss": 0.7136, "step": 53660 }, { "epoch": 0.6540284937784115, "grad_norm": 2.0984894687534634, "learning_rate": 1.8209749839640798e-06, "loss": 0.7058, "step": 53665 }, { "epoch": 0.6540894300025594, "grad_norm": 2.851662713336979, "learning_rate": 1.8206542655548428e-06, "loss": 0.7669, "step": 53670 }, { "epoch": 0.6541503662267071, "grad_norm": 2.4877274649984646, "learning_rate": 1.8203335471456062e-06, "loss": 0.7955, "step": 53675 }, { "epoch": 0.6542113024508549, "grad_norm": 2.2891894111950153, "learning_rate": 1.8200128287363697e-06, "loss": 0.7047, "step": 53680 }, { "epoch": 0.6542722386750027, "grad_norm": 2.1792696814869674, "learning_rate": 1.819692110327133e-06, "loss": 0.7839, "step": 53685 }, { "epoch": 0.6543331748991505, "grad_norm": 2.0594611244196668, "learning_rate": 1.8193713919178963e-06, "loss": 0.6496, "step": 53690 }, { "epoch": 0.6543941111232984, "grad_norm": 2.8362264915241626, "learning_rate": 1.8190506735086596e-06, "loss": 0.7674, "step": 53695 }, { "epoch": 0.6544550473474462, "grad_norm": 2.4838822855523848, "learning_rate": 1.8187299550994228e-06, "loss": 0.74, "step": 53700 }, { "epoch": 0.654515983571594, "grad_norm": 2.9283577136572125, "learning_rate": 1.8184092366901862e-06, "loss": 0.7691, "step": 53705 }, { "epoch": 0.6545769197957417, "grad_norm": 2.9162197932891862, "learning_rate": 1.8180885182809493e-06, "loss": 0.7032, "step": 53710 }, { "epoch": 0.6546378560198896, "grad_norm": 2.0288878955521006, "learning_rate": 1.8177677998717127e-06, "loss": 0.7874, "step": 53715 }, { "epoch": 0.6546987922440374, "grad_norm": 2.1738876231190143, "learning_rate": 1.8174470814624761e-06, "loss": 0.7178, "step": 53720 }, { "epoch": 0.6547597284681852, "grad_norm": 2.3984006368133675, "learning_rate": 1.8171263630532394e-06, "loss": 0.7223, "step": 53725 }, { "epoch": 0.654820664692333, "grad_norm": 2.633158641590456, "learning_rate": 1.8168056446440028e-06, "loss": 0.7174, "step": 53730 }, { "epoch": 0.6548816009164808, "grad_norm": 2.65882355849377, "learning_rate": 1.816484926234766e-06, "loss": 0.7493, "step": 53735 }, { "epoch": 0.6549425371406287, "grad_norm": 2.5265031037304073, "learning_rate": 1.8161642078255293e-06, "loss": 0.7052, "step": 53740 }, { "epoch": 0.6550034733647764, "grad_norm": 2.8805708037869393, "learning_rate": 1.8158434894162927e-06, "loss": 0.7115, "step": 53745 }, { "epoch": 0.6550644095889242, "grad_norm": 1.6687917379358308, "learning_rate": 1.8155227710070557e-06, "loss": 0.7408, "step": 53750 }, { "epoch": 0.655125345813072, "grad_norm": 2.7209298074189516, "learning_rate": 1.8152020525978192e-06, "loss": 0.7866, "step": 53755 }, { "epoch": 0.6551862820372198, "grad_norm": 2.377935827341508, "learning_rate": 1.8148813341885826e-06, "loss": 0.7134, "step": 53760 }, { "epoch": 0.6552472182613677, "grad_norm": 2.2884899762523183, "learning_rate": 1.8145606157793458e-06, "loss": 0.7652, "step": 53765 }, { "epoch": 0.6553081544855155, "grad_norm": 2.968101204398261, "learning_rate": 1.8142398973701093e-06, "loss": 0.7332, "step": 53770 }, { "epoch": 0.6553690907096633, "grad_norm": 2.0972520268910095, "learning_rate": 1.8139191789608727e-06, "loss": 0.69, "step": 53775 }, { "epoch": 0.655430026933811, "grad_norm": 2.9615882653445125, "learning_rate": 1.8135984605516357e-06, "loss": 0.7989, "step": 53780 }, { "epoch": 0.6554909631579589, "grad_norm": 2.063718111111413, "learning_rate": 1.8132777421423992e-06, "loss": 0.6967, "step": 53785 }, { "epoch": 0.6555518993821067, "grad_norm": 2.4021942467257285, "learning_rate": 1.8129570237331626e-06, "loss": 0.7281, "step": 53790 }, { "epoch": 0.6556128356062545, "grad_norm": 2.7756756800470646, "learning_rate": 1.8126363053239256e-06, "loss": 0.7604, "step": 53795 }, { "epoch": 0.6556737718304023, "grad_norm": 2.3916188497303836, "learning_rate": 1.812315586914689e-06, "loss": 0.7185, "step": 53800 }, { "epoch": 0.6557347080545501, "grad_norm": 2.6076336451833737, "learning_rate": 1.8119948685054523e-06, "loss": 0.8089, "step": 53805 }, { "epoch": 0.655795644278698, "grad_norm": 2.708132127067015, "learning_rate": 1.8116741500962157e-06, "loss": 0.769, "step": 53810 }, { "epoch": 0.6558565805028457, "grad_norm": 2.5098028392572633, "learning_rate": 1.8113534316869792e-06, "loss": 0.7359, "step": 53815 }, { "epoch": 0.6559175167269935, "grad_norm": 3.3378881899884503, "learning_rate": 1.8110327132777422e-06, "loss": 0.7624, "step": 53820 }, { "epoch": 0.6559784529511413, "grad_norm": 3.1920150942954004, "learning_rate": 1.8107119948685056e-06, "loss": 0.7021, "step": 53825 }, { "epoch": 0.6560393891752891, "grad_norm": 3.2202136206175833, "learning_rate": 1.810391276459269e-06, "loss": 0.8074, "step": 53830 }, { "epoch": 0.656100325399437, "grad_norm": 2.8489046650145, "learning_rate": 1.810070558050032e-06, "loss": 0.7184, "step": 53835 }, { "epoch": 0.6561612616235848, "grad_norm": 2.5017526263144916, "learning_rate": 1.8097498396407955e-06, "loss": 0.7362, "step": 53840 }, { "epoch": 0.6562221978477326, "grad_norm": 3.3103842384490023, "learning_rate": 1.8094291212315588e-06, "loss": 0.7291, "step": 53845 }, { "epoch": 0.6562831340718803, "grad_norm": 2.7720094215781392, "learning_rate": 1.8091084028223222e-06, "loss": 0.7157, "step": 53850 }, { "epoch": 0.6563440702960281, "grad_norm": 2.6122497761746803, "learning_rate": 1.8087876844130856e-06, "loss": 0.7751, "step": 53855 }, { "epoch": 0.656405006520176, "grad_norm": 2.6158539976814343, "learning_rate": 1.8084669660038487e-06, "loss": 0.736, "step": 53860 }, { "epoch": 0.6564659427443238, "grad_norm": 2.637839908021839, "learning_rate": 1.808146247594612e-06, "loss": 0.7387, "step": 53865 }, { "epoch": 0.6565268789684716, "grad_norm": 2.5819169869364575, "learning_rate": 1.8078255291853755e-06, "loss": 0.7819, "step": 53870 }, { "epoch": 0.6565878151926194, "grad_norm": 2.1146198862266723, "learning_rate": 1.8075048107761385e-06, "loss": 0.7064, "step": 53875 }, { "epoch": 0.6566487514167673, "grad_norm": 2.3597096786968925, "learning_rate": 1.807184092366902e-06, "loss": 0.6912, "step": 53880 }, { "epoch": 0.656709687640915, "grad_norm": 2.2933038440896927, "learning_rate": 1.8068633739576652e-06, "loss": 0.6703, "step": 53885 }, { "epoch": 0.6567706238650628, "grad_norm": 2.681833150675015, "learning_rate": 1.8065426555484287e-06, "loss": 0.666, "step": 53890 }, { "epoch": 0.6568315600892106, "grad_norm": 3.4254232605436323, "learning_rate": 1.806221937139192e-06, "loss": 0.8179, "step": 53895 }, { "epoch": 0.6568924963133584, "grad_norm": 2.7822763336220198, "learning_rate": 1.8059012187299551e-06, "loss": 0.7626, "step": 53900 }, { "epoch": 0.6569534325375063, "grad_norm": 2.223858838732501, "learning_rate": 1.8055805003207186e-06, "loss": 0.7411, "step": 53905 }, { "epoch": 0.6570143687616541, "grad_norm": 2.660675729320297, "learning_rate": 1.805259781911482e-06, "loss": 0.7189, "step": 53910 }, { "epoch": 0.6570753049858019, "grad_norm": 2.5257067856624786, "learning_rate": 1.8049390635022452e-06, "loss": 0.7675, "step": 53915 }, { "epoch": 0.6571362412099496, "grad_norm": 2.4263788194273204, "learning_rate": 1.8046183450930085e-06, "loss": 0.7421, "step": 53920 }, { "epoch": 0.6571971774340974, "grad_norm": 2.427491763742555, "learning_rate": 1.8042976266837717e-06, "loss": 0.7711, "step": 53925 }, { "epoch": 0.6572581136582453, "grad_norm": 4.763913778442543, "learning_rate": 1.8039769082745351e-06, "loss": 0.7238, "step": 53930 }, { "epoch": 0.6573190498823931, "grad_norm": 2.5796269957249076, "learning_rate": 1.8036561898652986e-06, "loss": 0.7308, "step": 53935 }, { "epoch": 0.6573799861065409, "grad_norm": 2.6629586224417943, "learning_rate": 1.8033354714560616e-06, "loss": 0.7596, "step": 53940 }, { "epoch": 0.6574409223306887, "grad_norm": 2.332174961309265, "learning_rate": 1.803014753046825e-06, "loss": 0.7572, "step": 53945 }, { "epoch": 0.6575018585548366, "grad_norm": 2.6996930578771465, "learning_rate": 1.8026940346375885e-06, "loss": 0.7454, "step": 53950 }, { "epoch": 0.6575627947789843, "grad_norm": 2.496656151846085, "learning_rate": 1.8023733162283517e-06, "loss": 0.769, "step": 53955 }, { "epoch": 0.6576237310031321, "grad_norm": 2.408248263273089, "learning_rate": 1.802052597819115e-06, "loss": 0.6424, "step": 53960 }, { "epoch": 0.6576846672272799, "grad_norm": 2.646365933181121, "learning_rate": 1.8017318794098781e-06, "loss": 0.7366, "step": 53965 }, { "epoch": 0.6577456034514277, "grad_norm": 2.2983950113197333, "learning_rate": 1.8014111610006416e-06, "loss": 0.7193, "step": 53970 }, { "epoch": 0.6578065396755756, "grad_norm": 2.3089023428258804, "learning_rate": 1.801090442591405e-06, "loss": 0.7053, "step": 53975 }, { "epoch": 0.6578674758997234, "grad_norm": 3.1839002843531863, "learning_rate": 1.800769724182168e-06, "loss": 0.7415, "step": 53980 }, { "epoch": 0.6579284121238712, "grad_norm": 3.6565532532174534, "learning_rate": 1.8004490057729315e-06, "loss": 0.7758, "step": 53985 }, { "epoch": 0.6579893483480189, "grad_norm": 2.4736565817206166, "learning_rate": 1.800128287363695e-06, "loss": 0.724, "step": 53990 }, { "epoch": 0.6580502845721667, "grad_norm": 3.1442584092462997, "learning_rate": 1.7998075689544582e-06, "loss": 0.6985, "step": 53995 }, { "epoch": 0.6581112207963146, "grad_norm": 2.7731653595116, "learning_rate": 1.7994868505452214e-06, "loss": 0.7193, "step": 54000 }, { "epoch": 0.6581721570204624, "grad_norm": 2.6980441040435954, "learning_rate": 1.7991661321359846e-06, "loss": 0.7646, "step": 54005 }, { "epoch": 0.6582330932446102, "grad_norm": 2.321318868816472, "learning_rate": 1.798845413726748e-06, "loss": 0.743, "step": 54010 }, { "epoch": 0.658294029468758, "grad_norm": 3.1129476793186956, "learning_rate": 1.7985246953175115e-06, "loss": 0.8019, "step": 54015 }, { "epoch": 0.6583549656929059, "grad_norm": 2.3261694259875783, "learning_rate": 1.7982039769082745e-06, "loss": 0.7672, "step": 54020 }, { "epoch": 0.6584159019170536, "grad_norm": 2.1103872282862284, "learning_rate": 1.797883258499038e-06, "loss": 0.7329, "step": 54025 }, { "epoch": 0.6584768381412014, "grad_norm": 2.1955204801019104, "learning_rate": 1.7975625400898014e-06, "loss": 0.7044, "step": 54030 }, { "epoch": 0.6585377743653492, "grad_norm": 2.2417896244784288, "learning_rate": 1.7972418216805646e-06, "loss": 0.7623, "step": 54035 }, { "epoch": 0.658598710589497, "grad_norm": 2.658737622837101, "learning_rate": 1.796921103271328e-06, "loss": 0.7569, "step": 54040 }, { "epoch": 0.6586596468136449, "grad_norm": 1.9505502131459074, "learning_rate": 1.796600384862091e-06, "loss": 0.7138, "step": 54045 }, { "epoch": 0.6587205830377927, "grad_norm": 2.6254306443390947, "learning_rate": 1.7962796664528545e-06, "loss": 0.7549, "step": 54050 }, { "epoch": 0.6587815192619405, "grad_norm": 2.568290144287845, "learning_rate": 1.795958948043618e-06, "loss": 0.7061, "step": 54055 }, { "epoch": 0.6588424554860882, "grad_norm": 2.523572308273842, "learning_rate": 1.795638229634381e-06, "loss": 0.7459, "step": 54060 }, { "epoch": 0.658903391710236, "grad_norm": 3.1081889805019407, "learning_rate": 1.7953175112251444e-06, "loss": 0.677, "step": 54065 }, { "epoch": 0.6589643279343839, "grad_norm": 2.5077013824580257, "learning_rate": 1.7949967928159079e-06, "loss": 0.8303, "step": 54070 }, { "epoch": 0.6590252641585317, "grad_norm": 2.3944630299553307, "learning_rate": 1.794676074406671e-06, "loss": 0.7693, "step": 54075 }, { "epoch": 0.6590862003826795, "grad_norm": 2.2371823429636155, "learning_rate": 1.7943553559974345e-06, "loss": 0.755, "step": 54080 }, { "epoch": 0.6591471366068273, "grad_norm": 2.3895197459382556, "learning_rate": 1.7940346375881977e-06, "loss": 0.6572, "step": 54085 }, { "epoch": 0.6592080728309752, "grad_norm": 2.631753958978807, "learning_rate": 1.793713919178961e-06, "loss": 0.7993, "step": 54090 }, { "epoch": 0.6592690090551229, "grad_norm": 1.8621018144585424, "learning_rate": 1.7933932007697244e-06, "loss": 0.7475, "step": 54095 }, { "epoch": 0.6593299452792707, "grad_norm": 2.591705612331172, "learning_rate": 1.7930724823604874e-06, "loss": 0.7285, "step": 54100 }, { "epoch": 0.6593908815034185, "grad_norm": 2.7276763301771054, "learning_rate": 1.7927517639512509e-06, "loss": 0.7462, "step": 54105 }, { "epoch": 0.6594518177275663, "grad_norm": 2.733995187839574, "learning_rate": 1.7924310455420143e-06, "loss": 0.7185, "step": 54110 }, { "epoch": 0.6595127539517142, "grad_norm": 2.027662197674209, "learning_rate": 1.7921103271327775e-06, "loss": 0.6951, "step": 54115 }, { "epoch": 0.659573690175862, "grad_norm": 2.4656450411914843, "learning_rate": 1.791789608723541e-06, "loss": 0.7105, "step": 54120 }, { "epoch": 0.6596346264000098, "grad_norm": 2.220159058038898, "learning_rate": 1.7914688903143044e-06, "loss": 0.7154, "step": 54125 }, { "epoch": 0.6596955626241575, "grad_norm": 2.637077814845905, "learning_rate": 1.7911481719050674e-06, "loss": 0.6919, "step": 54130 }, { "epoch": 0.6597564988483053, "grad_norm": 1.9552993834160959, "learning_rate": 1.7908274534958309e-06, "loss": 0.7018, "step": 54135 }, { "epoch": 0.6598174350724532, "grad_norm": 3.416869483192393, "learning_rate": 1.7905067350865941e-06, "loss": 0.766, "step": 54140 }, { "epoch": 0.659878371296601, "grad_norm": 2.385007118441968, "learning_rate": 1.7901860166773573e-06, "loss": 0.7434, "step": 54145 }, { "epoch": 0.6599393075207488, "grad_norm": 2.513097821867145, "learning_rate": 1.7898652982681208e-06, "loss": 0.8267, "step": 54150 }, { "epoch": 0.6600002437448966, "grad_norm": 2.2924243292546347, "learning_rate": 1.789544579858884e-06, "loss": 0.7527, "step": 54155 }, { "epoch": 0.6600611799690443, "grad_norm": 3.137899092730451, "learning_rate": 1.7892238614496474e-06, "loss": 0.7056, "step": 54160 }, { "epoch": 0.6601221161931922, "grad_norm": 2.627402661434551, "learning_rate": 1.7889031430404109e-06, "loss": 0.7398, "step": 54165 }, { "epoch": 0.66018305241734, "grad_norm": 2.667585834199965, "learning_rate": 1.788582424631174e-06, "loss": 0.682, "step": 54170 }, { "epoch": 0.6602439886414878, "grad_norm": 2.476030169696217, "learning_rate": 1.7882617062219373e-06, "loss": 0.7239, "step": 54175 }, { "epoch": 0.6603049248656356, "grad_norm": 2.4697312536335483, "learning_rate": 1.7879409878127006e-06, "loss": 0.7115, "step": 54180 }, { "epoch": 0.6603658610897835, "grad_norm": 4.806417367077274, "learning_rate": 1.7876202694034638e-06, "loss": 0.7867, "step": 54185 }, { "epoch": 0.6604267973139313, "grad_norm": 2.4275275426329563, "learning_rate": 1.7872995509942272e-06, "loss": 0.6959, "step": 54190 }, { "epoch": 0.660487733538079, "grad_norm": 2.721143863395712, "learning_rate": 1.7869788325849905e-06, "loss": 0.7318, "step": 54195 }, { "epoch": 0.6605486697622268, "grad_norm": 3.587988782906782, "learning_rate": 1.786658114175754e-06, "loss": 0.7177, "step": 54200 }, { "epoch": 0.6606096059863746, "grad_norm": 2.2187215932072832, "learning_rate": 1.7863373957665174e-06, "loss": 0.7967, "step": 54205 }, { "epoch": 0.6606705422105225, "grad_norm": 2.812157232691986, "learning_rate": 1.7860166773572804e-06, "loss": 0.7384, "step": 54210 }, { "epoch": 0.6607314784346703, "grad_norm": 2.4998846515943405, "learning_rate": 1.7856959589480438e-06, "loss": 0.6943, "step": 54215 }, { "epoch": 0.6607924146588181, "grad_norm": 2.8197323025871257, "learning_rate": 1.785375240538807e-06, "loss": 0.7339, "step": 54220 }, { "epoch": 0.6608533508829659, "grad_norm": 2.16674924485079, "learning_rate": 1.7850545221295703e-06, "loss": 0.6581, "step": 54225 }, { "epoch": 0.6609142871071136, "grad_norm": 2.856768698712992, "learning_rate": 1.7847338037203337e-06, "loss": 0.7576, "step": 54230 }, { "epoch": 0.6609752233312615, "grad_norm": 2.5462952297178894, "learning_rate": 1.784413085311097e-06, "loss": 0.8008, "step": 54235 }, { "epoch": 0.6610361595554093, "grad_norm": 3.297987216353527, "learning_rate": 1.7840923669018604e-06, "loss": 0.725, "step": 54240 }, { "epoch": 0.6610970957795571, "grad_norm": 2.376863957396165, "learning_rate": 1.7837716484926238e-06, "loss": 0.639, "step": 54245 }, { "epoch": 0.6611580320037049, "grad_norm": 3.3360177711718677, "learning_rate": 1.7834509300833868e-06, "loss": 0.7761, "step": 54250 }, { "epoch": 0.6612189682278528, "grad_norm": 2.3785939477795006, "learning_rate": 1.7831302116741503e-06, "loss": 0.7362, "step": 54255 }, { "epoch": 0.6612799044520006, "grad_norm": 2.3205232049030418, "learning_rate": 1.7828094932649135e-06, "loss": 0.8093, "step": 54260 }, { "epoch": 0.6613408406761483, "grad_norm": 3.23870030785935, "learning_rate": 1.782488774855677e-06, "loss": 0.8487, "step": 54265 }, { "epoch": 0.6614017769002961, "grad_norm": 2.3732879234093427, "learning_rate": 1.7821680564464402e-06, "loss": 0.6961, "step": 54270 }, { "epoch": 0.6614627131244439, "grad_norm": 2.4041215271843153, "learning_rate": 1.7818473380372034e-06, "loss": 0.6782, "step": 54275 }, { "epoch": 0.6615236493485918, "grad_norm": 2.7570587283291514, "learning_rate": 1.7815266196279668e-06, "loss": 0.7445, "step": 54280 }, { "epoch": 0.6615845855727396, "grad_norm": 1.7877689720900345, "learning_rate": 1.7812059012187303e-06, "loss": 0.6823, "step": 54285 }, { "epoch": 0.6616455217968874, "grad_norm": 2.8452647567256517, "learning_rate": 1.7808851828094933e-06, "loss": 0.7522, "step": 54290 }, { "epoch": 0.6617064580210352, "grad_norm": 2.5319483239699174, "learning_rate": 1.7805644644002567e-06, "loss": 0.6992, "step": 54295 }, { "epoch": 0.6617673942451829, "grad_norm": 2.399553647993972, "learning_rate": 1.78024374599102e-06, "loss": 0.7557, "step": 54300 }, { "epoch": 0.6618283304693308, "grad_norm": 2.7563527789723636, "learning_rate": 1.7799230275817834e-06, "loss": 0.8332, "step": 54305 }, { "epoch": 0.6618892666934786, "grad_norm": 2.676665343559781, "learning_rate": 1.7796023091725466e-06, "loss": 0.7229, "step": 54310 }, { "epoch": 0.6619502029176264, "grad_norm": 2.424912550854527, "learning_rate": 1.7792815907633099e-06, "loss": 0.709, "step": 54315 }, { "epoch": 0.6620111391417742, "grad_norm": 2.055628257477683, "learning_rate": 1.7789608723540733e-06, "loss": 0.6815, "step": 54320 }, { "epoch": 0.662072075365922, "grad_norm": 2.3811174504876016, "learning_rate": 1.7786401539448367e-06, "loss": 0.7411, "step": 54325 }, { "epoch": 0.6621330115900699, "grad_norm": 2.8124505023759494, "learning_rate": 1.7783194355355998e-06, "loss": 0.7453, "step": 54330 }, { "epoch": 0.6621939478142176, "grad_norm": 2.1746015963080736, "learning_rate": 1.7779987171263632e-06, "loss": 0.6473, "step": 54335 }, { "epoch": 0.6622548840383654, "grad_norm": 2.2919656929452996, "learning_rate": 1.7776779987171264e-06, "loss": 0.7502, "step": 54340 }, { "epoch": 0.6623158202625132, "grad_norm": 1.925831188519539, "learning_rate": 1.7773572803078899e-06, "loss": 0.698, "step": 54345 }, { "epoch": 0.6623767564866611, "grad_norm": 2.556971289104391, "learning_rate": 1.777036561898653e-06, "loss": 0.738, "step": 54350 }, { "epoch": 0.6624376927108089, "grad_norm": 3.162099311226851, "learning_rate": 1.7767158434894163e-06, "loss": 0.7719, "step": 54355 }, { "epoch": 0.6624986289349567, "grad_norm": 2.4621964722946657, "learning_rate": 1.7763951250801798e-06, "loss": 0.7032, "step": 54360 }, { "epoch": 0.6625595651591045, "grad_norm": 2.3121717672437243, "learning_rate": 1.7760744066709432e-06, "loss": 0.7481, "step": 54365 }, { "epoch": 0.6626205013832522, "grad_norm": 2.216958186490974, "learning_rate": 1.7757536882617062e-06, "loss": 0.7112, "step": 54370 }, { "epoch": 0.6626814376074001, "grad_norm": 2.903730199963244, "learning_rate": 1.7754329698524697e-06, "loss": 0.7538, "step": 54375 }, { "epoch": 0.6627423738315479, "grad_norm": 2.6956681019701993, "learning_rate": 1.775112251443233e-06, "loss": 0.7375, "step": 54380 }, { "epoch": 0.6628033100556957, "grad_norm": 2.4175767982536347, "learning_rate": 1.7747915330339963e-06, "loss": 0.7589, "step": 54385 }, { "epoch": 0.6628642462798435, "grad_norm": 2.598532656533878, "learning_rate": 1.7744708146247598e-06, "loss": 0.7212, "step": 54390 }, { "epoch": 0.6629251825039914, "grad_norm": 2.843065430101765, "learning_rate": 1.7741500962155228e-06, "loss": 0.6564, "step": 54395 }, { "epoch": 0.6629861187281392, "grad_norm": 2.3549165416220674, "learning_rate": 1.7738293778062862e-06, "loss": 0.6902, "step": 54400 }, { "epoch": 0.6630470549522869, "grad_norm": 2.365605786054902, "learning_rate": 1.7735086593970497e-06, "loss": 0.7224, "step": 54405 }, { "epoch": 0.6631079911764347, "grad_norm": 2.6857865769098734, "learning_rate": 1.7731879409878127e-06, "loss": 0.6755, "step": 54410 }, { "epoch": 0.6631689274005825, "grad_norm": 2.335879174175453, "learning_rate": 1.7728672225785761e-06, "loss": 0.6959, "step": 54415 }, { "epoch": 0.6632298636247304, "grad_norm": 2.2746913456364406, "learning_rate": 1.7725465041693396e-06, "loss": 0.8341, "step": 54420 }, { "epoch": 0.6632907998488782, "grad_norm": 2.717330525565925, "learning_rate": 1.7722257857601028e-06, "loss": 0.7193, "step": 54425 }, { "epoch": 0.663351736073026, "grad_norm": 2.3190287844540496, "learning_rate": 1.7719050673508662e-06, "loss": 0.7203, "step": 54430 }, { "epoch": 0.6634126722971738, "grad_norm": 2.7299037783302333, "learning_rate": 1.7715843489416293e-06, "loss": 0.7646, "step": 54435 }, { "epoch": 0.6634736085213215, "grad_norm": 3.1286972361435006, "learning_rate": 1.7712636305323927e-06, "loss": 0.7492, "step": 54440 }, { "epoch": 0.6635345447454694, "grad_norm": 1.9375800849311604, "learning_rate": 1.7709429121231561e-06, "loss": 0.7604, "step": 54445 }, { "epoch": 0.6635954809696172, "grad_norm": 2.2423461507781233, "learning_rate": 1.7706221937139191e-06, "loss": 0.7467, "step": 54450 }, { "epoch": 0.663656417193765, "grad_norm": 2.1049155829853485, "learning_rate": 1.7703014753046826e-06, "loss": 0.7533, "step": 54455 }, { "epoch": 0.6637173534179128, "grad_norm": 2.405292954527024, "learning_rate": 1.769980756895446e-06, "loss": 0.7215, "step": 54460 }, { "epoch": 0.6637782896420606, "grad_norm": 3.2805010403106616, "learning_rate": 1.7696600384862093e-06, "loss": 0.7807, "step": 54465 }, { "epoch": 0.6638392258662085, "grad_norm": 2.297729388272311, "learning_rate": 1.7693393200769727e-06, "loss": 0.7424, "step": 54470 }, { "epoch": 0.6639001620903562, "grad_norm": 2.4082449771506886, "learning_rate": 1.7690186016677357e-06, "loss": 0.7192, "step": 54475 }, { "epoch": 0.663961098314504, "grad_norm": 3.1472314242327752, "learning_rate": 1.7686978832584992e-06, "loss": 0.7647, "step": 54480 }, { "epoch": 0.6640220345386518, "grad_norm": 2.05211589227056, "learning_rate": 1.7683771648492626e-06, "loss": 0.6498, "step": 54485 }, { "epoch": 0.6640829707627997, "grad_norm": 2.35227233575935, "learning_rate": 1.7680564464400258e-06, "loss": 0.7938, "step": 54490 }, { "epoch": 0.6641439069869475, "grad_norm": 3.2872351411041194, "learning_rate": 1.767735728030789e-06, "loss": 0.6961, "step": 54495 }, { "epoch": 0.6642048432110953, "grad_norm": 2.354302066651377, "learning_rate": 1.7674150096215525e-06, "loss": 0.8393, "step": 54500 }, { "epoch": 0.6642657794352431, "grad_norm": 2.888613131494105, "learning_rate": 1.7670942912123157e-06, "loss": 0.752, "step": 54505 }, { "epoch": 0.6643267156593908, "grad_norm": 2.8223077193598862, "learning_rate": 1.7667735728030792e-06, "loss": 0.6982, "step": 54510 }, { "epoch": 0.6643876518835387, "grad_norm": 2.9787397424409896, "learning_rate": 1.7664528543938422e-06, "loss": 0.7798, "step": 54515 }, { "epoch": 0.6644485881076865, "grad_norm": 2.2127588695201066, "learning_rate": 1.7661321359846056e-06, "loss": 0.7061, "step": 54520 }, { "epoch": 0.6645095243318343, "grad_norm": 2.6414514416172965, "learning_rate": 1.765811417575369e-06, "loss": 0.7568, "step": 54525 }, { "epoch": 0.6645704605559821, "grad_norm": 2.9548155187660625, "learning_rate": 1.7654906991661323e-06, "loss": 0.7838, "step": 54530 }, { "epoch": 0.66463139678013, "grad_norm": 2.6266046477890916, "learning_rate": 1.7651699807568955e-06, "loss": 0.7779, "step": 54535 }, { "epoch": 0.6646923330042778, "grad_norm": 2.05100761220547, "learning_rate": 1.764849262347659e-06, "loss": 0.6833, "step": 54540 }, { "epoch": 0.6647532692284255, "grad_norm": 2.786338989493897, "learning_rate": 1.7645285439384222e-06, "loss": 0.7817, "step": 54545 }, { "epoch": 0.6648142054525733, "grad_norm": 2.130766842304409, "learning_rate": 1.7642078255291856e-06, "loss": 0.8001, "step": 54550 }, { "epoch": 0.6648751416767211, "grad_norm": 2.322817960216207, "learning_rate": 1.7638871071199486e-06, "loss": 0.7627, "step": 54555 }, { "epoch": 0.664936077900869, "grad_norm": 2.485374282701151, "learning_rate": 1.763566388710712e-06, "loss": 0.6691, "step": 54560 }, { "epoch": 0.6649970141250168, "grad_norm": 2.6534312543263514, "learning_rate": 1.7632456703014755e-06, "loss": 0.7125, "step": 54565 }, { "epoch": 0.6650579503491646, "grad_norm": 2.191188849678511, "learning_rate": 1.7629249518922388e-06, "loss": 0.6927, "step": 54570 }, { "epoch": 0.6651188865733124, "grad_norm": 2.9963673224379805, "learning_rate": 1.762604233483002e-06, "loss": 0.7821, "step": 54575 }, { "epoch": 0.6651798227974601, "grad_norm": 2.3381269841881247, "learning_rate": 1.7622835150737654e-06, "loss": 0.6773, "step": 54580 }, { "epoch": 0.665240759021608, "grad_norm": 2.6754414352972296, "learning_rate": 1.7619627966645286e-06, "loss": 0.6952, "step": 54585 }, { "epoch": 0.6653016952457558, "grad_norm": 3.2361504710717046, "learning_rate": 1.761642078255292e-06, "loss": 0.7385, "step": 54590 }, { "epoch": 0.6653626314699036, "grad_norm": 2.7330118211813654, "learning_rate": 1.761321359846055e-06, "loss": 0.7552, "step": 54595 }, { "epoch": 0.6654235676940514, "grad_norm": 2.3614824599975575, "learning_rate": 1.7610006414368185e-06, "loss": 0.7274, "step": 54600 }, { "epoch": 0.6654845039181992, "grad_norm": 2.6415654946635225, "learning_rate": 1.760679923027582e-06, "loss": 0.6841, "step": 54605 }, { "epoch": 0.6655454401423471, "grad_norm": 2.0460229594842274, "learning_rate": 1.7603592046183452e-06, "loss": 0.747, "step": 54610 }, { "epoch": 0.6656063763664948, "grad_norm": 2.685938247349457, "learning_rate": 1.7600384862091087e-06, "loss": 0.7976, "step": 54615 }, { "epoch": 0.6656673125906426, "grad_norm": 2.3764975045652545, "learning_rate": 1.7597177677998719e-06, "loss": 0.6687, "step": 54620 }, { "epoch": 0.6657282488147904, "grad_norm": 2.2810976453359992, "learning_rate": 1.7593970493906351e-06, "loss": 0.7176, "step": 54625 }, { "epoch": 0.6657891850389382, "grad_norm": 2.7638540424058347, "learning_rate": 1.7590763309813986e-06, "loss": 0.7005, "step": 54630 }, { "epoch": 0.6658501212630861, "grad_norm": 2.554616350097085, "learning_rate": 1.7587556125721616e-06, "loss": 0.6138, "step": 54635 }, { "epoch": 0.6659110574872339, "grad_norm": 2.255808635685914, "learning_rate": 1.758434894162925e-06, "loss": 0.7195, "step": 54640 }, { "epoch": 0.6659719937113817, "grad_norm": 2.6493112969249513, "learning_rate": 1.7581141757536884e-06, "loss": 0.6988, "step": 54645 }, { "epoch": 0.6660329299355294, "grad_norm": 2.302935519433298, "learning_rate": 1.7577934573444517e-06, "loss": 0.7497, "step": 54650 }, { "epoch": 0.6660938661596773, "grad_norm": 2.8436845101516064, "learning_rate": 1.7574727389352151e-06, "loss": 0.727, "step": 54655 }, { "epoch": 0.6661548023838251, "grad_norm": 2.4576401339886127, "learning_rate": 1.7571520205259783e-06, "loss": 0.6854, "step": 54660 }, { "epoch": 0.6662157386079729, "grad_norm": 2.5648445186270523, "learning_rate": 1.7568313021167416e-06, "loss": 0.7628, "step": 54665 }, { "epoch": 0.6662766748321207, "grad_norm": 2.555748521214452, "learning_rate": 1.756510583707505e-06, "loss": 0.7443, "step": 54670 }, { "epoch": 0.6663376110562685, "grad_norm": 2.6666446585871633, "learning_rate": 1.756189865298268e-06, "loss": 0.7256, "step": 54675 }, { "epoch": 0.6663985472804164, "grad_norm": 2.5547367554911142, "learning_rate": 1.7558691468890315e-06, "loss": 0.7525, "step": 54680 }, { "epoch": 0.6664594835045641, "grad_norm": 2.4721956386327757, "learning_rate": 1.755548428479795e-06, "loss": 0.682, "step": 54685 }, { "epoch": 0.6665204197287119, "grad_norm": 1.9662163501585066, "learning_rate": 1.7552277100705581e-06, "loss": 0.7841, "step": 54690 }, { "epoch": 0.6665813559528597, "grad_norm": 2.3515697112509124, "learning_rate": 1.7549069916613216e-06, "loss": 0.7087, "step": 54695 }, { "epoch": 0.6666422921770075, "grad_norm": 2.5102973785075187, "learning_rate": 1.7545862732520848e-06, "loss": 0.8314, "step": 54700 }, { "epoch": 0.6667032284011554, "grad_norm": 2.8291570783779623, "learning_rate": 1.754265554842848e-06, "loss": 0.7858, "step": 54705 }, { "epoch": 0.6667641646253032, "grad_norm": 2.502568737063561, "learning_rate": 1.7539448364336115e-06, "loss": 0.6885, "step": 54710 }, { "epoch": 0.666825100849451, "grad_norm": 3.5398907663536683, "learning_rate": 1.753624118024375e-06, "loss": 0.8585, "step": 54715 }, { "epoch": 0.6668860370735987, "grad_norm": 2.384866678205993, "learning_rate": 1.753303399615138e-06, "loss": 0.6967, "step": 54720 }, { "epoch": 0.6669469732977465, "grad_norm": 1.815761644585305, "learning_rate": 1.7529826812059014e-06, "loss": 0.7595, "step": 54725 }, { "epoch": 0.6670079095218944, "grad_norm": 2.484944601616666, "learning_rate": 1.7526619627966646e-06, "loss": 0.7919, "step": 54730 }, { "epoch": 0.6670688457460422, "grad_norm": 2.5757874046807117, "learning_rate": 1.752341244387428e-06, "loss": 0.6785, "step": 54735 }, { "epoch": 0.66712978197019, "grad_norm": 2.6534094693055845, "learning_rate": 1.7520205259781915e-06, "loss": 0.7488, "step": 54740 }, { "epoch": 0.6671907181943378, "grad_norm": 2.6999954973924303, "learning_rate": 1.7516998075689545e-06, "loss": 0.7702, "step": 54745 }, { "epoch": 0.6672516544184857, "grad_norm": 3.2004580158586005, "learning_rate": 1.751379089159718e-06, "loss": 0.7407, "step": 54750 }, { "epoch": 0.6673125906426334, "grad_norm": 2.156597526749729, "learning_rate": 1.7510583707504814e-06, "loss": 0.7266, "step": 54755 }, { "epoch": 0.6673735268667812, "grad_norm": 2.616744394389711, "learning_rate": 1.7507376523412444e-06, "loss": 0.7462, "step": 54760 }, { "epoch": 0.667434463090929, "grad_norm": 5.177175999444215, "learning_rate": 1.7504169339320078e-06, "loss": 0.8267, "step": 54765 }, { "epoch": 0.6674953993150768, "grad_norm": 3.4928482063934694, "learning_rate": 1.750096215522771e-06, "loss": 0.7913, "step": 54770 }, { "epoch": 0.6675563355392247, "grad_norm": 1.9757570622452858, "learning_rate": 1.7497754971135345e-06, "loss": 0.7398, "step": 54775 }, { "epoch": 0.6676172717633725, "grad_norm": 2.6989295259728463, "learning_rate": 1.749454778704298e-06, "loss": 0.7294, "step": 54780 }, { "epoch": 0.6676782079875203, "grad_norm": 2.6257197003639856, "learning_rate": 1.749134060295061e-06, "loss": 0.6961, "step": 54785 }, { "epoch": 0.667739144211668, "grad_norm": 2.411239724398205, "learning_rate": 1.7488133418858244e-06, "loss": 0.717, "step": 54790 }, { "epoch": 0.6678000804358158, "grad_norm": 2.6239970381401014, "learning_rate": 1.7484926234765878e-06, "loss": 0.7494, "step": 54795 }, { "epoch": 0.6678610166599637, "grad_norm": 2.163938758709984, "learning_rate": 1.7481719050673509e-06, "loss": 0.6774, "step": 54800 }, { "epoch": 0.6679219528841115, "grad_norm": 2.67207218551585, "learning_rate": 1.7478511866581143e-06, "loss": 0.7474, "step": 54805 }, { "epoch": 0.6679828891082593, "grad_norm": 2.5202284174673224, "learning_rate": 1.7475304682488775e-06, "loss": 0.7398, "step": 54810 }, { "epoch": 0.6680438253324071, "grad_norm": 2.4715524554179615, "learning_rate": 1.747209749839641e-06, "loss": 0.7867, "step": 54815 }, { "epoch": 0.668104761556555, "grad_norm": 2.2967735122855357, "learning_rate": 1.7468890314304044e-06, "loss": 0.6979, "step": 54820 }, { "epoch": 0.6681656977807027, "grad_norm": 2.795504763949965, "learning_rate": 1.7465683130211674e-06, "loss": 0.7009, "step": 54825 }, { "epoch": 0.6682266340048505, "grad_norm": 2.2087992517706745, "learning_rate": 1.7462475946119309e-06, "loss": 0.7404, "step": 54830 }, { "epoch": 0.6682875702289983, "grad_norm": 4.214861609865807, "learning_rate": 1.7459268762026943e-06, "loss": 0.7763, "step": 54835 }, { "epoch": 0.6683485064531461, "grad_norm": 2.671541596130308, "learning_rate": 1.7456061577934575e-06, "loss": 0.7187, "step": 54840 }, { "epoch": 0.668409442677294, "grad_norm": 2.741762849911037, "learning_rate": 1.7452854393842208e-06, "loss": 0.6876, "step": 54845 }, { "epoch": 0.6684703789014418, "grad_norm": 2.664660273054741, "learning_rate": 1.744964720974984e-06, "loss": 0.7094, "step": 54850 }, { "epoch": 0.6685313151255896, "grad_norm": 4.883055505765944, "learning_rate": 1.7446440025657474e-06, "loss": 0.8237, "step": 54855 }, { "epoch": 0.6685922513497373, "grad_norm": 2.5597082165843186, "learning_rate": 1.7443232841565109e-06, "loss": 0.7179, "step": 54860 }, { "epoch": 0.6686531875738851, "grad_norm": 2.294104297804365, "learning_rate": 1.7440025657472739e-06, "loss": 0.7152, "step": 54865 }, { "epoch": 0.668714123798033, "grad_norm": 2.6608827595652786, "learning_rate": 1.7436818473380373e-06, "loss": 0.7193, "step": 54870 }, { "epoch": 0.6687750600221808, "grad_norm": 2.1007928997624705, "learning_rate": 1.7433611289288008e-06, "loss": 0.6805, "step": 54875 }, { "epoch": 0.6688359962463286, "grad_norm": 2.237386523453887, "learning_rate": 1.743040410519564e-06, "loss": 0.7183, "step": 54880 }, { "epoch": 0.6688969324704764, "grad_norm": 1.950607146463091, "learning_rate": 1.7427196921103272e-06, "loss": 0.7652, "step": 54885 }, { "epoch": 0.6689578686946243, "grad_norm": 2.571465432698328, "learning_rate": 1.7423989737010905e-06, "loss": 0.7588, "step": 54890 }, { "epoch": 0.669018804918772, "grad_norm": 2.362827372542242, "learning_rate": 1.742078255291854e-06, "loss": 0.7572, "step": 54895 }, { "epoch": 0.6690797411429198, "grad_norm": 3.019135113646593, "learning_rate": 1.7417575368826173e-06, "loss": 0.7584, "step": 54900 }, { "epoch": 0.6691406773670676, "grad_norm": 2.27033460912575, "learning_rate": 1.7414368184733804e-06, "loss": 0.7615, "step": 54905 }, { "epoch": 0.6692016135912154, "grad_norm": 2.512394156588765, "learning_rate": 1.7411161000641438e-06, "loss": 0.6984, "step": 54910 }, { "epoch": 0.6692625498153633, "grad_norm": 2.186550925365761, "learning_rate": 1.7407953816549072e-06, "loss": 0.7424, "step": 54915 }, { "epoch": 0.6693234860395111, "grad_norm": 2.255942341885383, "learning_rate": 1.7404746632456705e-06, "loss": 0.7237, "step": 54920 }, { "epoch": 0.6693844222636589, "grad_norm": 2.283892955183747, "learning_rate": 1.7401539448364337e-06, "loss": 0.6478, "step": 54925 }, { "epoch": 0.6694453584878066, "grad_norm": 2.4689362958986805, "learning_rate": 1.739833226427197e-06, "loss": 0.6928, "step": 54930 }, { "epoch": 0.6695062947119544, "grad_norm": 2.727299004637704, "learning_rate": 1.7395125080179604e-06, "loss": 0.7496, "step": 54935 }, { "epoch": 0.6695672309361023, "grad_norm": 2.290602197886092, "learning_rate": 1.7391917896087238e-06, "loss": 0.5894, "step": 54940 }, { "epoch": 0.6696281671602501, "grad_norm": 2.5141929561822343, "learning_rate": 1.7388710711994868e-06, "loss": 0.7151, "step": 54945 }, { "epoch": 0.6696891033843979, "grad_norm": 2.5465863522133394, "learning_rate": 1.7385503527902503e-06, "loss": 0.7836, "step": 54950 }, { "epoch": 0.6697500396085457, "grad_norm": 2.2819834608493967, "learning_rate": 1.7382296343810137e-06, "loss": 0.7419, "step": 54955 }, { "epoch": 0.6698109758326936, "grad_norm": 4.79158453183217, "learning_rate": 1.737908915971777e-06, "loss": 0.7391, "step": 54960 }, { "epoch": 0.6698719120568413, "grad_norm": 2.1762497800919065, "learning_rate": 1.7375881975625404e-06, "loss": 0.7494, "step": 54965 }, { "epoch": 0.6699328482809891, "grad_norm": 5.4076171352611615, "learning_rate": 1.7372674791533034e-06, "loss": 0.7475, "step": 54970 }, { "epoch": 0.6699937845051369, "grad_norm": 2.2901009034111492, "learning_rate": 1.7369467607440668e-06, "loss": 0.7493, "step": 54975 }, { "epoch": 0.6700547207292847, "grad_norm": 2.258060434922424, "learning_rate": 1.7366260423348303e-06, "loss": 0.7612, "step": 54980 }, { "epoch": 0.6701156569534326, "grad_norm": 2.613499595623024, "learning_rate": 1.7363053239255933e-06, "loss": 0.6913, "step": 54985 }, { "epoch": 0.6701765931775804, "grad_norm": 2.362558666146263, "learning_rate": 1.7359846055163567e-06, "loss": 0.7042, "step": 54990 }, { "epoch": 0.6702375294017282, "grad_norm": 2.5159784373387684, "learning_rate": 1.7356638871071202e-06, "loss": 0.6919, "step": 54995 }, { "epoch": 0.6702984656258759, "grad_norm": 2.4033662528432687, "learning_rate": 1.7353431686978834e-06, "loss": 0.7145, "step": 55000 }, { "epoch": 0.6703594018500237, "grad_norm": 3.809763653174682, "learning_rate": 1.7350224502886468e-06, "loss": 0.7854, "step": 55005 }, { "epoch": 0.6704203380741716, "grad_norm": 2.805651974216761, "learning_rate": 1.73470173187941e-06, "loss": 0.7574, "step": 55010 }, { "epoch": 0.6704812742983194, "grad_norm": 3.098635707756865, "learning_rate": 1.7343810134701733e-06, "loss": 0.7837, "step": 55015 }, { "epoch": 0.6705422105224672, "grad_norm": 2.248411041331111, "learning_rate": 1.7340602950609367e-06, "loss": 0.7, "step": 55020 }, { "epoch": 0.670603146746615, "grad_norm": 2.1835519192469683, "learning_rate": 1.7337395766516997e-06, "loss": 0.6856, "step": 55025 }, { "epoch": 0.6706640829707629, "grad_norm": 3.1492145805450886, "learning_rate": 1.7334188582424632e-06, "loss": 0.7253, "step": 55030 }, { "epoch": 0.6707250191949106, "grad_norm": 2.3467023724423175, "learning_rate": 1.7330981398332266e-06, "loss": 0.713, "step": 55035 }, { "epoch": 0.6707859554190584, "grad_norm": 2.47349786188183, "learning_rate": 1.7327774214239899e-06, "loss": 0.7465, "step": 55040 }, { "epoch": 0.6708468916432062, "grad_norm": 2.5275599459575746, "learning_rate": 1.7324567030147533e-06, "loss": 0.727, "step": 55045 }, { "epoch": 0.670907827867354, "grad_norm": 2.9866327447872316, "learning_rate": 1.7321359846055165e-06, "loss": 0.7708, "step": 55050 }, { "epoch": 0.6709687640915019, "grad_norm": 2.761969119588432, "learning_rate": 1.7318152661962798e-06, "loss": 0.8161, "step": 55055 }, { "epoch": 0.6710297003156497, "grad_norm": 2.4949042188060218, "learning_rate": 1.7314945477870432e-06, "loss": 0.7172, "step": 55060 }, { "epoch": 0.6710906365397975, "grad_norm": 2.3224875109487666, "learning_rate": 1.7311738293778062e-06, "loss": 0.7563, "step": 55065 }, { "epoch": 0.6711515727639452, "grad_norm": 2.972490381766756, "learning_rate": 1.7308531109685697e-06, "loss": 0.7453, "step": 55070 }, { "epoch": 0.671212508988093, "grad_norm": 2.3318151196655412, "learning_rate": 1.730532392559333e-06, "loss": 0.7449, "step": 55075 }, { "epoch": 0.6712734452122409, "grad_norm": 2.9292635072709956, "learning_rate": 1.7302116741500963e-06, "loss": 0.6466, "step": 55080 }, { "epoch": 0.6713343814363887, "grad_norm": 2.654436874830971, "learning_rate": 1.7298909557408598e-06, "loss": 0.6889, "step": 55085 }, { "epoch": 0.6713953176605365, "grad_norm": 2.0404199096856526, "learning_rate": 1.7295702373316232e-06, "loss": 0.7054, "step": 55090 }, { "epoch": 0.6714562538846843, "grad_norm": 2.1488582168061625, "learning_rate": 1.7292495189223862e-06, "loss": 0.77, "step": 55095 }, { "epoch": 0.6715171901088322, "grad_norm": 2.4128646218007086, "learning_rate": 1.7289288005131497e-06, "loss": 0.7006, "step": 55100 }, { "epoch": 0.6715781263329799, "grad_norm": 2.1312156389594, "learning_rate": 1.7286080821039129e-06, "loss": 0.7254, "step": 55105 }, { "epoch": 0.6716390625571277, "grad_norm": 1.961881796241764, "learning_rate": 1.7282873636946761e-06, "loss": 0.6811, "step": 55110 }, { "epoch": 0.6716999987812755, "grad_norm": 2.1459201143390767, "learning_rate": 1.7279666452854396e-06, "loss": 0.7237, "step": 55115 }, { "epoch": 0.6717609350054233, "grad_norm": 2.782818822089372, "learning_rate": 1.7276459268762028e-06, "loss": 0.745, "step": 55120 }, { "epoch": 0.6718218712295712, "grad_norm": 2.035702847316375, "learning_rate": 1.7273252084669662e-06, "loss": 0.6771, "step": 55125 }, { "epoch": 0.671882807453719, "grad_norm": 2.2693865026764137, "learning_rate": 1.7270044900577297e-06, "loss": 0.677, "step": 55130 }, { "epoch": 0.6719437436778667, "grad_norm": 2.6324147778487847, "learning_rate": 1.7266837716484927e-06, "loss": 0.7195, "step": 55135 }, { "epoch": 0.6720046799020145, "grad_norm": 2.1230817533434854, "learning_rate": 1.7263630532392561e-06, "loss": 0.6946, "step": 55140 }, { "epoch": 0.6720656161261623, "grad_norm": 3.4292905766205624, "learning_rate": 1.7260423348300193e-06, "loss": 0.7559, "step": 55145 }, { "epoch": 0.6721265523503102, "grad_norm": 2.106329909940193, "learning_rate": 1.7257216164207826e-06, "loss": 0.7302, "step": 55150 }, { "epoch": 0.672187488574458, "grad_norm": 2.4656259013372392, "learning_rate": 1.725400898011546e-06, "loss": 0.6899, "step": 55155 }, { "epoch": 0.6722484247986058, "grad_norm": 3.1188057548267207, "learning_rate": 1.7250801796023092e-06, "loss": 0.7434, "step": 55160 }, { "epoch": 0.6723093610227536, "grad_norm": 2.4969642814441295, "learning_rate": 1.7247594611930727e-06, "loss": 0.7568, "step": 55165 }, { "epoch": 0.6723702972469013, "grad_norm": 1.8832671255919569, "learning_rate": 1.7244387427838361e-06, "loss": 0.6687, "step": 55170 }, { "epoch": 0.6724312334710492, "grad_norm": 3.0854954293640455, "learning_rate": 1.7241180243745991e-06, "loss": 0.8217, "step": 55175 }, { "epoch": 0.672492169695197, "grad_norm": 2.568795160376126, "learning_rate": 1.7237973059653626e-06, "loss": 0.7431, "step": 55180 }, { "epoch": 0.6725531059193448, "grad_norm": 2.4666705690165593, "learning_rate": 1.7234765875561258e-06, "loss": 0.6397, "step": 55185 }, { "epoch": 0.6726140421434926, "grad_norm": 2.9666077911736854, "learning_rate": 1.7231558691468893e-06, "loss": 0.7819, "step": 55190 }, { "epoch": 0.6726749783676405, "grad_norm": 2.318979291906525, "learning_rate": 1.7228351507376525e-06, "loss": 0.7509, "step": 55195 }, { "epoch": 0.6727359145917883, "grad_norm": 1.9224588955953101, "learning_rate": 1.7225144323284157e-06, "loss": 0.7894, "step": 55200 }, { "epoch": 0.672796850815936, "grad_norm": 2.4708874763536737, "learning_rate": 1.7221937139191792e-06, "loss": 0.75, "step": 55205 }, { "epoch": 0.6728577870400838, "grad_norm": 2.4499988715005103, "learning_rate": 1.7218729955099426e-06, "loss": 0.6465, "step": 55210 }, { "epoch": 0.6729187232642316, "grad_norm": 2.108370264484154, "learning_rate": 1.7215522771007056e-06, "loss": 0.6919, "step": 55215 }, { "epoch": 0.6729796594883795, "grad_norm": 2.9203446467610106, "learning_rate": 1.721231558691469e-06, "loss": 0.7365, "step": 55220 }, { "epoch": 0.6730405957125273, "grad_norm": 3.0291957183170246, "learning_rate": 1.7209108402822323e-06, "loss": 0.696, "step": 55225 }, { "epoch": 0.6731015319366751, "grad_norm": 2.5741295573847847, "learning_rate": 1.7205901218729957e-06, "loss": 0.7495, "step": 55230 }, { "epoch": 0.6731624681608229, "grad_norm": 2.460892144254999, "learning_rate": 1.720269403463759e-06, "loss": 0.6598, "step": 55235 }, { "epoch": 0.6732234043849706, "grad_norm": 2.1087916985955406, "learning_rate": 1.7199486850545222e-06, "loss": 0.808, "step": 55240 }, { "epoch": 0.6732843406091185, "grad_norm": 2.2271860850249885, "learning_rate": 1.7196279666452856e-06, "loss": 0.6945, "step": 55245 }, { "epoch": 0.6733452768332663, "grad_norm": 2.63490143509696, "learning_rate": 1.719307248236049e-06, "loss": 0.7316, "step": 55250 }, { "epoch": 0.6734062130574141, "grad_norm": 2.619693578653504, "learning_rate": 1.718986529826812e-06, "loss": 0.7207, "step": 55255 }, { "epoch": 0.6734671492815619, "grad_norm": 2.2563685255879604, "learning_rate": 1.7186658114175755e-06, "loss": 0.6645, "step": 55260 }, { "epoch": 0.6735280855057098, "grad_norm": 2.4769216730638624, "learning_rate": 1.7183450930083387e-06, "loss": 0.7267, "step": 55265 }, { "epoch": 0.6735890217298576, "grad_norm": 2.407448991076872, "learning_rate": 1.7180243745991022e-06, "loss": 0.6978, "step": 55270 }, { "epoch": 0.6736499579540053, "grad_norm": 2.7288716916126963, "learning_rate": 1.7177036561898654e-06, "loss": 0.7007, "step": 55275 }, { "epoch": 0.6737108941781531, "grad_norm": 2.0870951429760405, "learning_rate": 1.7173829377806286e-06, "loss": 0.7628, "step": 55280 }, { "epoch": 0.6737718304023009, "grad_norm": 2.655669558944582, "learning_rate": 1.717062219371392e-06, "loss": 0.7022, "step": 55285 }, { "epoch": 0.6738327666264488, "grad_norm": 2.9437918040490683, "learning_rate": 1.7167415009621555e-06, "loss": 0.7891, "step": 55290 }, { "epoch": 0.6738937028505966, "grad_norm": 2.1089105313619574, "learning_rate": 1.7164207825529185e-06, "loss": 0.7398, "step": 55295 }, { "epoch": 0.6739546390747444, "grad_norm": 2.345214862571129, "learning_rate": 1.716100064143682e-06, "loss": 0.7644, "step": 55300 }, { "epoch": 0.6740155752988922, "grad_norm": 2.267005419093096, "learning_rate": 1.7157793457344454e-06, "loss": 0.7738, "step": 55305 }, { "epoch": 0.6740765115230399, "grad_norm": 2.335604982972274, "learning_rate": 1.7154586273252086e-06, "loss": 0.7756, "step": 55310 }, { "epoch": 0.6741374477471878, "grad_norm": 2.5438466550747614, "learning_rate": 1.715137908915972e-06, "loss": 0.7652, "step": 55315 }, { "epoch": 0.6741983839713356, "grad_norm": 2.3023706504266648, "learning_rate": 1.714817190506735e-06, "loss": 0.7751, "step": 55320 }, { "epoch": 0.6742593201954834, "grad_norm": 2.291730624306704, "learning_rate": 1.7144964720974985e-06, "loss": 0.7118, "step": 55325 }, { "epoch": 0.6743202564196312, "grad_norm": 4.105673578524625, "learning_rate": 1.714175753688262e-06, "loss": 0.7999, "step": 55330 }, { "epoch": 0.674381192643779, "grad_norm": 2.247501686153987, "learning_rate": 1.713855035279025e-06, "loss": 0.7228, "step": 55335 }, { "epoch": 0.6744421288679269, "grad_norm": 2.5633838469379717, "learning_rate": 1.7135343168697884e-06, "loss": 0.7986, "step": 55340 }, { "epoch": 0.6745030650920746, "grad_norm": 2.317118616942666, "learning_rate": 1.7132135984605519e-06, "loss": 0.7051, "step": 55345 }, { "epoch": 0.6745640013162224, "grad_norm": 2.588547522249868, "learning_rate": 1.7128928800513151e-06, "loss": 0.6606, "step": 55350 }, { "epoch": 0.6746249375403702, "grad_norm": 2.527457601635358, "learning_rate": 1.7125721616420785e-06, "loss": 0.7236, "step": 55355 }, { "epoch": 0.674685873764518, "grad_norm": 2.422953819984765, "learning_rate": 1.7122514432328416e-06, "loss": 0.7201, "step": 55360 }, { "epoch": 0.6747468099886659, "grad_norm": 2.363488684046504, "learning_rate": 1.711930724823605e-06, "loss": 0.6993, "step": 55365 }, { "epoch": 0.6748077462128137, "grad_norm": 2.7318656596401594, "learning_rate": 1.7116100064143684e-06, "loss": 0.7594, "step": 55370 }, { "epoch": 0.6748686824369615, "grad_norm": 2.4643912663501735, "learning_rate": 1.7112892880051315e-06, "loss": 0.7149, "step": 55375 }, { "epoch": 0.6749296186611092, "grad_norm": 2.554352198791476, "learning_rate": 1.710968569595895e-06, "loss": 0.764, "step": 55380 }, { "epoch": 0.674990554885257, "grad_norm": 2.933159154573474, "learning_rate": 1.7106478511866583e-06, "loss": 0.6615, "step": 55385 }, { "epoch": 0.6750514911094049, "grad_norm": 2.468756923092906, "learning_rate": 1.7103271327774216e-06, "loss": 0.7438, "step": 55390 }, { "epoch": 0.6751124273335527, "grad_norm": 2.4108564652636395, "learning_rate": 1.710006414368185e-06, "loss": 0.7484, "step": 55395 }, { "epoch": 0.6751733635577005, "grad_norm": 3.528080916519759, "learning_rate": 1.709685695958948e-06, "loss": 0.6583, "step": 55400 }, { "epoch": 0.6752342997818483, "grad_norm": 1.9115457082877416, "learning_rate": 1.7093649775497115e-06, "loss": 0.7783, "step": 55405 }, { "epoch": 0.6752952360059962, "grad_norm": 3.0887132952657237, "learning_rate": 1.709044259140475e-06, "loss": 0.6848, "step": 55410 }, { "epoch": 0.6753561722301439, "grad_norm": 2.904426377972138, "learning_rate": 1.708723540731238e-06, "loss": 0.7236, "step": 55415 }, { "epoch": 0.6754171084542917, "grad_norm": 2.746233746894171, "learning_rate": 1.7084028223220014e-06, "loss": 0.7478, "step": 55420 }, { "epoch": 0.6754780446784395, "grad_norm": 2.8968791381051906, "learning_rate": 1.7080821039127648e-06, "loss": 0.7183, "step": 55425 }, { "epoch": 0.6755389809025873, "grad_norm": 2.525389550044302, "learning_rate": 1.707761385503528e-06, "loss": 0.787, "step": 55430 }, { "epoch": 0.6755999171267352, "grad_norm": 2.9823680646950192, "learning_rate": 1.7074406670942915e-06, "loss": 0.8709, "step": 55435 }, { "epoch": 0.675660853350883, "grad_norm": 2.2352110535608856, "learning_rate": 1.7071199486850545e-06, "loss": 0.7366, "step": 55440 }, { "epoch": 0.6757217895750308, "grad_norm": 2.2606612730719453, "learning_rate": 1.706799230275818e-06, "loss": 0.7302, "step": 55445 }, { "epoch": 0.6757827257991785, "grad_norm": 2.543421628441727, "learning_rate": 1.7064785118665814e-06, "loss": 0.6998, "step": 55450 }, { "epoch": 0.6758436620233264, "grad_norm": 3.0798798923255757, "learning_rate": 1.7061577934573446e-06, "loss": 0.737, "step": 55455 }, { "epoch": 0.6759045982474742, "grad_norm": 2.952060320161025, "learning_rate": 1.7058370750481078e-06, "loss": 0.7657, "step": 55460 }, { "epoch": 0.675965534471622, "grad_norm": 2.386862571713622, "learning_rate": 1.7055163566388713e-06, "loss": 0.7095, "step": 55465 }, { "epoch": 0.6760264706957698, "grad_norm": 2.2782246326451654, "learning_rate": 1.7051956382296345e-06, "loss": 0.7551, "step": 55470 }, { "epoch": 0.6760874069199176, "grad_norm": 2.136135587183736, "learning_rate": 1.704874919820398e-06, "loss": 0.6825, "step": 55475 }, { "epoch": 0.6761483431440655, "grad_norm": 2.3633592818359155, "learning_rate": 1.704554201411161e-06, "loss": 0.7988, "step": 55480 }, { "epoch": 0.6762092793682132, "grad_norm": 2.511399731583693, "learning_rate": 1.7042334830019244e-06, "loss": 0.7592, "step": 55485 }, { "epoch": 0.676270215592361, "grad_norm": 2.85356653164319, "learning_rate": 1.7039127645926878e-06, "loss": 0.7344, "step": 55490 }, { "epoch": 0.6763311518165088, "grad_norm": 2.656711344000587, "learning_rate": 1.703592046183451e-06, "loss": 0.7157, "step": 55495 }, { "epoch": 0.6763920880406566, "grad_norm": 2.922390874499204, "learning_rate": 1.7032713277742143e-06, "loss": 0.7212, "step": 55500 }, { "epoch": 0.6764530242648045, "grad_norm": 2.183818529612484, "learning_rate": 1.7029506093649777e-06, "loss": 0.7264, "step": 55505 }, { "epoch": 0.6765139604889523, "grad_norm": 2.342385087805259, "learning_rate": 1.702629890955741e-06, "loss": 0.7707, "step": 55510 }, { "epoch": 0.6765748967131001, "grad_norm": 2.360580469640214, "learning_rate": 1.7023091725465044e-06, "loss": 0.6556, "step": 55515 }, { "epoch": 0.6766358329372478, "grad_norm": 2.8521186740971882, "learning_rate": 1.7019884541372674e-06, "loss": 0.6945, "step": 55520 }, { "epoch": 0.6766967691613957, "grad_norm": 7.064113738330679, "learning_rate": 1.7016677357280309e-06, "loss": 0.7902, "step": 55525 }, { "epoch": 0.6767577053855435, "grad_norm": 2.3653406318904766, "learning_rate": 1.7013470173187943e-06, "loss": 0.7587, "step": 55530 }, { "epoch": 0.6768186416096913, "grad_norm": 2.3153467885737973, "learning_rate": 1.7010262989095575e-06, "loss": 0.6602, "step": 55535 }, { "epoch": 0.6768795778338391, "grad_norm": 2.992141816295201, "learning_rate": 1.700705580500321e-06, "loss": 0.8075, "step": 55540 }, { "epoch": 0.6769405140579869, "grad_norm": 2.2258743990143066, "learning_rate": 1.7003848620910842e-06, "loss": 0.7403, "step": 55545 }, { "epoch": 0.6770014502821348, "grad_norm": 2.3856687824884286, "learning_rate": 1.7000641436818474e-06, "loss": 0.6853, "step": 55550 }, { "epoch": 0.6770623865062825, "grad_norm": 2.3861822962014934, "learning_rate": 1.6997434252726109e-06, "loss": 0.7199, "step": 55555 }, { "epoch": 0.6771233227304303, "grad_norm": 2.141019170126935, "learning_rate": 1.6994227068633739e-06, "loss": 0.7839, "step": 55560 }, { "epoch": 0.6771842589545781, "grad_norm": 2.7085694874060855, "learning_rate": 1.6991019884541373e-06, "loss": 0.7418, "step": 55565 }, { "epoch": 0.677245195178726, "grad_norm": 2.799141298079315, "learning_rate": 1.6987812700449008e-06, "loss": 0.7228, "step": 55570 }, { "epoch": 0.6773061314028738, "grad_norm": 2.2254570570532173, "learning_rate": 1.698460551635664e-06, "loss": 0.7215, "step": 55575 }, { "epoch": 0.6773670676270216, "grad_norm": 2.5883382372145363, "learning_rate": 1.6981398332264274e-06, "loss": 0.7615, "step": 55580 }, { "epoch": 0.6774280038511694, "grad_norm": 2.8187502937898006, "learning_rate": 1.6978191148171907e-06, "loss": 0.7134, "step": 55585 }, { "epoch": 0.6774889400753171, "grad_norm": 2.6117404654522383, "learning_rate": 1.6974983964079539e-06, "loss": 0.7726, "step": 55590 }, { "epoch": 0.677549876299465, "grad_norm": 2.527375113293185, "learning_rate": 1.6971776779987173e-06, "loss": 0.7237, "step": 55595 }, { "epoch": 0.6776108125236128, "grad_norm": 1.911391267216389, "learning_rate": 1.6968569595894808e-06, "loss": 0.7278, "step": 55600 }, { "epoch": 0.6776717487477606, "grad_norm": 2.669718070366674, "learning_rate": 1.6965362411802438e-06, "loss": 0.7367, "step": 55605 }, { "epoch": 0.6777326849719084, "grad_norm": 2.099362603804477, "learning_rate": 1.6962155227710072e-06, "loss": 0.8082, "step": 55610 }, { "epoch": 0.6777936211960562, "grad_norm": 2.097555881871805, "learning_rate": 1.6958948043617705e-06, "loss": 0.6637, "step": 55615 }, { "epoch": 0.6778545574202041, "grad_norm": 2.406675200155001, "learning_rate": 1.695574085952534e-06, "loss": 0.6917, "step": 55620 }, { "epoch": 0.6779154936443518, "grad_norm": 2.3678503903275425, "learning_rate": 1.6952533675432971e-06, "loss": 0.7039, "step": 55625 }, { "epoch": 0.6779764298684996, "grad_norm": 2.500006934821808, "learning_rate": 1.6949326491340604e-06, "loss": 0.741, "step": 55630 }, { "epoch": 0.6780373660926474, "grad_norm": 2.3714223181227627, "learning_rate": 1.6946119307248238e-06, "loss": 0.7552, "step": 55635 }, { "epoch": 0.6780983023167952, "grad_norm": 2.493950075216967, "learning_rate": 1.6942912123155872e-06, "loss": 0.7492, "step": 55640 }, { "epoch": 0.6781592385409431, "grad_norm": 2.5318108367113217, "learning_rate": 1.6939704939063503e-06, "loss": 0.7168, "step": 55645 }, { "epoch": 0.6782201747650909, "grad_norm": 2.320112848960496, "learning_rate": 1.6936497754971137e-06, "loss": 0.6766, "step": 55650 }, { "epoch": 0.6782811109892387, "grad_norm": 3.0506619845842193, "learning_rate": 1.693329057087877e-06, "loss": 0.7603, "step": 55655 }, { "epoch": 0.6783420472133864, "grad_norm": 2.3587824865238396, "learning_rate": 1.6930083386786404e-06, "loss": 0.6752, "step": 55660 }, { "epoch": 0.6784029834375342, "grad_norm": 2.8567851021133976, "learning_rate": 1.6926876202694038e-06, "loss": 0.7069, "step": 55665 }, { "epoch": 0.6784639196616821, "grad_norm": 2.2283868671535267, "learning_rate": 1.6923669018601668e-06, "loss": 0.7889, "step": 55670 }, { "epoch": 0.6785248558858299, "grad_norm": 2.998221783185379, "learning_rate": 1.6920461834509303e-06, "loss": 0.8199, "step": 55675 }, { "epoch": 0.6785857921099777, "grad_norm": 2.3416820055750476, "learning_rate": 1.6917254650416937e-06, "loss": 0.7299, "step": 55680 }, { "epoch": 0.6786467283341255, "grad_norm": 2.3892652585304943, "learning_rate": 1.6914047466324567e-06, "loss": 0.7479, "step": 55685 }, { "epoch": 0.6787076645582734, "grad_norm": 2.602175153240951, "learning_rate": 1.6910840282232202e-06, "loss": 0.7766, "step": 55690 }, { "epoch": 0.6787686007824211, "grad_norm": 2.376274446178603, "learning_rate": 1.6907633098139834e-06, "loss": 0.7269, "step": 55695 }, { "epoch": 0.6788295370065689, "grad_norm": 2.480214239886696, "learning_rate": 1.6904425914047468e-06, "loss": 0.7318, "step": 55700 }, { "epoch": 0.6788904732307167, "grad_norm": 3.8694416376410268, "learning_rate": 1.6901218729955103e-06, "loss": 0.8503, "step": 55705 }, { "epoch": 0.6789514094548645, "grad_norm": 2.2942396647298615, "learning_rate": 1.6898011545862733e-06, "loss": 0.7028, "step": 55710 }, { "epoch": 0.6790123456790124, "grad_norm": 2.6525383962341933, "learning_rate": 1.6894804361770367e-06, "loss": 0.7291, "step": 55715 }, { "epoch": 0.6790732819031602, "grad_norm": 2.7505771688424585, "learning_rate": 1.6891597177678002e-06, "loss": 0.7313, "step": 55720 }, { "epoch": 0.679134218127308, "grad_norm": 2.5506084695979427, "learning_rate": 1.6888389993585632e-06, "loss": 0.6213, "step": 55725 }, { "epoch": 0.6791951543514557, "grad_norm": 2.6377914776580123, "learning_rate": 1.6885182809493266e-06, "loss": 0.7044, "step": 55730 }, { "epoch": 0.6792560905756035, "grad_norm": 2.5276371787446585, "learning_rate": 1.6881975625400898e-06, "loss": 0.7004, "step": 55735 }, { "epoch": 0.6793170267997514, "grad_norm": 2.4823563843562484, "learning_rate": 1.6878768441308533e-06, "loss": 0.818, "step": 55740 }, { "epoch": 0.6793779630238992, "grad_norm": 1.8218950992662097, "learning_rate": 1.6875561257216167e-06, "loss": 0.6875, "step": 55745 }, { "epoch": 0.679438899248047, "grad_norm": 2.1424983091628658, "learning_rate": 1.6872354073123797e-06, "loss": 0.6709, "step": 55750 }, { "epoch": 0.6794998354721948, "grad_norm": 2.663115882977802, "learning_rate": 1.6869146889031432e-06, "loss": 0.7097, "step": 55755 }, { "epoch": 0.6795607716963427, "grad_norm": 2.695306917161634, "learning_rate": 1.6865939704939066e-06, "loss": 0.7775, "step": 55760 }, { "epoch": 0.6796217079204904, "grad_norm": 2.4068786292890247, "learning_rate": 1.6862732520846696e-06, "loss": 0.7082, "step": 55765 }, { "epoch": 0.6796826441446382, "grad_norm": 2.4876083154266055, "learning_rate": 1.685952533675433e-06, "loss": 0.7446, "step": 55770 }, { "epoch": 0.679743580368786, "grad_norm": 2.9748679352740854, "learning_rate": 1.6856318152661963e-06, "loss": 0.7647, "step": 55775 }, { "epoch": 0.6798045165929338, "grad_norm": 2.2084419034515133, "learning_rate": 1.6853110968569597e-06, "loss": 0.7566, "step": 55780 }, { "epoch": 0.6798654528170817, "grad_norm": 2.718209307285031, "learning_rate": 1.6849903784477232e-06, "loss": 0.704, "step": 55785 }, { "epoch": 0.6799263890412295, "grad_norm": 2.5101245176864246, "learning_rate": 1.6846696600384862e-06, "loss": 0.7201, "step": 55790 }, { "epoch": 0.6799873252653773, "grad_norm": 3.2073224081235145, "learning_rate": 1.6843489416292496e-06, "loss": 0.6997, "step": 55795 }, { "epoch": 0.680048261489525, "grad_norm": 2.5471768940609922, "learning_rate": 1.684028223220013e-06, "loss": 0.7614, "step": 55800 }, { "epoch": 0.6801091977136728, "grad_norm": 2.616150752480177, "learning_rate": 1.6837075048107763e-06, "loss": 0.731, "step": 55805 }, { "epoch": 0.6801701339378207, "grad_norm": 3.892223065606388, "learning_rate": 1.6833867864015395e-06, "loss": 0.7014, "step": 55810 }, { "epoch": 0.6802310701619685, "grad_norm": 2.136822351953338, "learning_rate": 1.6830660679923028e-06, "loss": 0.7502, "step": 55815 }, { "epoch": 0.6802920063861163, "grad_norm": 2.413669031669656, "learning_rate": 1.6827453495830662e-06, "loss": 0.7231, "step": 55820 }, { "epoch": 0.6803529426102641, "grad_norm": 2.5722961108388853, "learning_rate": 1.6824246311738297e-06, "loss": 0.7569, "step": 55825 }, { "epoch": 0.680413878834412, "grad_norm": 2.1537364699434067, "learning_rate": 1.6821039127645927e-06, "loss": 0.7955, "step": 55830 }, { "epoch": 0.6804748150585597, "grad_norm": 2.246854165147695, "learning_rate": 1.6817831943553561e-06, "loss": 0.7092, "step": 55835 }, { "epoch": 0.6805357512827075, "grad_norm": 2.0157288367025274, "learning_rate": 1.6814624759461196e-06, "loss": 0.6871, "step": 55840 }, { "epoch": 0.6805966875068553, "grad_norm": 2.44163103177582, "learning_rate": 1.6811417575368828e-06, "loss": 0.796, "step": 55845 }, { "epoch": 0.6806576237310031, "grad_norm": 3.01882492705861, "learning_rate": 1.680821039127646e-06, "loss": 0.6688, "step": 55850 }, { "epoch": 0.680718559955151, "grad_norm": 1.9760522493225854, "learning_rate": 1.6805003207184092e-06, "loss": 0.7821, "step": 55855 }, { "epoch": 0.6807794961792988, "grad_norm": 2.4419624752510907, "learning_rate": 1.6801796023091727e-06, "loss": 0.6271, "step": 55860 }, { "epoch": 0.6808404324034466, "grad_norm": 2.8398996140723685, "learning_rate": 1.6798588838999361e-06, "loss": 0.7948, "step": 55865 }, { "epoch": 0.6809013686275943, "grad_norm": 2.463177736153319, "learning_rate": 1.6795381654906991e-06, "loss": 0.6963, "step": 55870 }, { "epoch": 0.6809623048517421, "grad_norm": 2.3795706438528894, "learning_rate": 1.6792174470814626e-06, "loss": 0.7454, "step": 55875 }, { "epoch": 0.68102324107589, "grad_norm": 2.2121309799160276, "learning_rate": 1.678896728672226e-06, "loss": 0.7266, "step": 55880 }, { "epoch": 0.6810841773000378, "grad_norm": 2.8136883381880806, "learning_rate": 1.6785760102629892e-06, "loss": 0.8029, "step": 55885 }, { "epoch": 0.6811451135241856, "grad_norm": 2.4321271741735964, "learning_rate": 1.6782552918537525e-06, "loss": 0.7052, "step": 55890 }, { "epoch": 0.6812060497483334, "grad_norm": 2.0496232893579562, "learning_rate": 1.677934573444516e-06, "loss": 0.7542, "step": 55895 }, { "epoch": 0.6812669859724813, "grad_norm": 2.052940173522507, "learning_rate": 1.6776138550352791e-06, "loss": 0.6499, "step": 55900 }, { "epoch": 0.681327922196629, "grad_norm": 2.071513530893845, "learning_rate": 1.6772931366260426e-06, "loss": 0.7448, "step": 55905 }, { "epoch": 0.6813888584207768, "grad_norm": 2.392444778072107, "learning_rate": 1.6769724182168056e-06, "loss": 0.73, "step": 55910 }, { "epoch": 0.6814497946449246, "grad_norm": 2.1451856224288552, "learning_rate": 1.676651699807569e-06, "loss": 0.705, "step": 55915 }, { "epoch": 0.6815107308690724, "grad_norm": 2.8057635200490103, "learning_rate": 1.6763309813983325e-06, "loss": 0.7526, "step": 55920 }, { "epoch": 0.6815716670932203, "grad_norm": 2.250620591485237, "learning_rate": 1.6760102629890957e-06, "loss": 0.7831, "step": 55925 }, { "epoch": 0.6816326033173681, "grad_norm": 2.4579083992626467, "learning_rate": 1.6756895445798591e-06, "loss": 0.725, "step": 55930 }, { "epoch": 0.6816935395415159, "grad_norm": 2.446642843665531, "learning_rate": 1.6753688261706224e-06, "loss": 0.7114, "step": 55935 }, { "epoch": 0.6817544757656636, "grad_norm": 2.2163617264129893, "learning_rate": 1.6750481077613856e-06, "loss": 0.6944, "step": 55940 }, { "epoch": 0.6818154119898114, "grad_norm": 2.3707168246667587, "learning_rate": 1.674727389352149e-06, "loss": 0.794, "step": 55945 }, { "epoch": 0.6818763482139593, "grad_norm": 1.929324359725557, "learning_rate": 1.674406670942912e-06, "loss": 0.7354, "step": 55950 }, { "epoch": 0.6819372844381071, "grad_norm": 2.5467707079054986, "learning_rate": 1.6740859525336755e-06, "loss": 0.7672, "step": 55955 }, { "epoch": 0.6819982206622549, "grad_norm": 2.986048602642856, "learning_rate": 1.673765234124439e-06, "loss": 0.7149, "step": 55960 }, { "epoch": 0.6820591568864027, "grad_norm": 2.1609604757523893, "learning_rate": 1.6734445157152022e-06, "loss": 0.7552, "step": 55965 }, { "epoch": 0.6821200931105506, "grad_norm": 2.6297082500755464, "learning_rate": 1.6731237973059656e-06, "loss": 0.8139, "step": 55970 }, { "epoch": 0.6821810293346983, "grad_norm": 3.933551718664778, "learning_rate": 1.6728030788967288e-06, "loss": 0.7446, "step": 55975 }, { "epoch": 0.6822419655588461, "grad_norm": 2.418654000284168, "learning_rate": 1.672482360487492e-06, "loss": 0.7395, "step": 55980 }, { "epoch": 0.6823029017829939, "grad_norm": 2.3924089314717323, "learning_rate": 1.6721616420782555e-06, "loss": 0.7242, "step": 55985 }, { "epoch": 0.6823638380071417, "grad_norm": 2.236652280606035, "learning_rate": 1.6718409236690185e-06, "loss": 0.7338, "step": 55990 }, { "epoch": 0.6824247742312896, "grad_norm": 2.653490951667674, "learning_rate": 1.671520205259782e-06, "loss": 0.6746, "step": 55995 }, { "epoch": 0.6824857104554374, "grad_norm": 2.7240585203457743, "learning_rate": 1.6711994868505454e-06, "loss": 0.7574, "step": 56000 }, { "epoch": 0.6825466466795852, "grad_norm": 2.431907423610703, "learning_rate": 1.6708787684413086e-06, "loss": 0.7232, "step": 56005 }, { "epoch": 0.6826075829037329, "grad_norm": 2.3591281008050955, "learning_rate": 1.670558050032072e-06, "loss": 0.7695, "step": 56010 }, { "epoch": 0.6826685191278807, "grad_norm": 2.5180920335678993, "learning_rate": 1.6702373316228355e-06, "loss": 0.6504, "step": 56015 }, { "epoch": 0.6827294553520286, "grad_norm": 3.239721354123163, "learning_rate": 1.6699166132135985e-06, "loss": 0.7654, "step": 56020 }, { "epoch": 0.6827903915761764, "grad_norm": 3.294985280547689, "learning_rate": 1.669595894804362e-06, "loss": 0.7227, "step": 56025 }, { "epoch": 0.6828513278003242, "grad_norm": 2.573937803295304, "learning_rate": 1.6692751763951252e-06, "loss": 0.7111, "step": 56030 }, { "epoch": 0.682912264024472, "grad_norm": 2.1603094652982002, "learning_rate": 1.6689544579858884e-06, "loss": 0.7067, "step": 56035 }, { "epoch": 0.6829732002486198, "grad_norm": 2.447458396209953, "learning_rate": 1.6686337395766519e-06, "loss": 0.7771, "step": 56040 }, { "epoch": 0.6830341364727676, "grad_norm": 2.5644393032567865, "learning_rate": 1.668313021167415e-06, "loss": 0.7513, "step": 56045 }, { "epoch": 0.6830950726969154, "grad_norm": 2.031609523099599, "learning_rate": 1.6679923027581785e-06, "loss": 0.7175, "step": 56050 }, { "epoch": 0.6831560089210632, "grad_norm": 2.218993749990862, "learning_rate": 1.667671584348942e-06, "loss": 0.7698, "step": 56055 }, { "epoch": 0.683216945145211, "grad_norm": 2.2203111688688164, "learning_rate": 1.667350865939705e-06, "loss": 0.7765, "step": 56060 }, { "epoch": 0.6832778813693589, "grad_norm": 2.27470920848685, "learning_rate": 1.6670301475304684e-06, "loss": 0.7404, "step": 56065 }, { "epoch": 0.6833388175935067, "grad_norm": 3.8642361857053036, "learning_rate": 1.6667094291212317e-06, "loss": 0.6842, "step": 56070 }, { "epoch": 0.6833997538176545, "grad_norm": 2.62561712001112, "learning_rate": 1.6663887107119949e-06, "loss": 0.748, "step": 56075 }, { "epoch": 0.6834606900418022, "grad_norm": 2.4265907358359673, "learning_rate": 1.6660679923027583e-06, "loss": 0.7816, "step": 56080 }, { "epoch": 0.68352162626595, "grad_norm": 2.3441552560983676, "learning_rate": 1.6657472738935216e-06, "loss": 0.6922, "step": 56085 }, { "epoch": 0.6835825624900979, "grad_norm": 2.657701947439411, "learning_rate": 1.665426555484285e-06, "loss": 0.7767, "step": 56090 }, { "epoch": 0.6836434987142457, "grad_norm": 2.5164880327176746, "learning_rate": 1.6651058370750484e-06, "loss": 0.7168, "step": 56095 }, { "epoch": 0.6837044349383935, "grad_norm": 3.3125789203741194, "learning_rate": 1.6647851186658115e-06, "loss": 0.7588, "step": 56100 }, { "epoch": 0.6837653711625413, "grad_norm": 2.0263535908897854, "learning_rate": 1.664464400256575e-06, "loss": 0.7724, "step": 56105 }, { "epoch": 0.683826307386689, "grad_norm": 2.388725107058125, "learning_rate": 1.6641436818473381e-06, "loss": 0.7252, "step": 56110 }, { "epoch": 0.6838872436108369, "grad_norm": 2.131284728987725, "learning_rate": 1.6638229634381014e-06, "loss": 0.6833, "step": 56115 }, { "epoch": 0.6839481798349847, "grad_norm": 2.0486811809547856, "learning_rate": 1.6635022450288648e-06, "loss": 0.6424, "step": 56120 }, { "epoch": 0.6840091160591325, "grad_norm": 2.4923027023945235, "learning_rate": 1.663181526619628e-06, "loss": 0.6998, "step": 56125 }, { "epoch": 0.6840700522832803, "grad_norm": 2.971324422686556, "learning_rate": 1.6628608082103915e-06, "loss": 0.756, "step": 56130 }, { "epoch": 0.6841309885074282, "grad_norm": 2.6314309710430996, "learning_rate": 1.662540089801155e-06, "loss": 0.6987, "step": 56135 }, { "epoch": 0.684191924731576, "grad_norm": 2.3003626202250036, "learning_rate": 1.662219371391918e-06, "loss": 0.7366, "step": 56140 }, { "epoch": 0.6842528609557237, "grad_norm": 2.6833114320075317, "learning_rate": 1.6618986529826814e-06, "loss": 0.7458, "step": 56145 }, { "epoch": 0.6843137971798715, "grad_norm": 2.245802071098176, "learning_rate": 1.6615779345734446e-06, "loss": 0.6877, "step": 56150 }, { "epoch": 0.6843747334040193, "grad_norm": 2.2961302977435407, "learning_rate": 1.661257216164208e-06, "loss": 0.7358, "step": 56155 }, { "epoch": 0.6844356696281672, "grad_norm": 2.477095544430048, "learning_rate": 1.6609364977549713e-06, "loss": 0.7712, "step": 56160 }, { "epoch": 0.684496605852315, "grad_norm": 2.4540787122656025, "learning_rate": 1.6606157793457345e-06, "loss": 0.685, "step": 56165 }, { "epoch": 0.6845575420764628, "grad_norm": 2.506393541894494, "learning_rate": 1.660295060936498e-06, "loss": 0.7242, "step": 56170 }, { "epoch": 0.6846184783006106, "grad_norm": 2.4790752966153966, "learning_rate": 1.6599743425272614e-06, "loss": 0.7467, "step": 56175 }, { "epoch": 0.6846794145247583, "grad_norm": 2.7748914131954767, "learning_rate": 1.6596536241180244e-06, "loss": 0.7118, "step": 56180 }, { "epoch": 0.6847403507489062, "grad_norm": 2.827165783829784, "learning_rate": 1.6593329057087878e-06, "loss": 0.7365, "step": 56185 }, { "epoch": 0.684801286973054, "grad_norm": 2.0387516392667333, "learning_rate": 1.659012187299551e-06, "loss": 0.7177, "step": 56190 }, { "epoch": 0.6848622231972018, "grad_norm": 2.0567451345302805, "learning_rate": 1.6586914688903145e-06, "loss": 0.7362, "step": 56195 }, { "epoch": 0.6849231594213496, "grad_norm": 2.9187809980363424, "learning_rate": 1.6583707504810777e-06, "loss": 0.7441, "step": 56200 }, { "epoch": 0.6849840956454974, "grad_norm": 2.880801711353988, "learning_rate": 1.658050032071841e-06, "loss": 0.7687, "step": 56205 }, { "epoch": 0.6850450318696453, "grad_norm": 2.309403246229778, "learning_rate": 1.6577293136626044e-06, "loss": 0.6331, "step": 56210 }, { "epoch": 0.685105968093793, "grad_norm": 2.4892403637581584, "learning_rate": 1.6574085952533678e-06, "loss": 0.7378, "step": 56215 }, { "epoch": 0.6851669043179408, "grad_norm": 2.3971565263509813, "learning_rate": 1.6570878768441308e-06, "loss": 0.7332, "step": 56220 }, { "epoch": 0.6852278405420886, "grad_norm": 2.8870078795371996, "learning_rate": 1.6567671584348943e-06, "loss": 0.7515, "step": 56225 }, { "epoch": 0.6852887767662365, "grad_norm": 2.427159713944658, "learning_rate": 1.6564464400256577e-06, "loss": 0.7035, "step": 56230 }, { "epoch": 0.6853497129903843, "grad_norm": 2.367837256032603, "learning_rate": 1.656125721616421e-06, "loss": 0.7344, "step": 56235 }, { "epoch": 0.6854106492145321, "grad_norm": 2.3827719758170334, "learning_rate": 1.6558050032071842e-06, "loss": 0.8085, "step": 56240 }, { "epoch": 0.6854715854386799, "grad_norm": 2.3788100890664508, "learning_rate": 1.6554842847979474e-06, "loss": 0.7356, "step": 56245 }, { "epoch": 0.6855325216628276, "grad_norm": 2.08348081676871, "learning_rate": 1.6551635663887109e-06, "loss": 0.7189, "step": 56250 }, { "epoch": 0.6855934578869755, "grad_norm": 2.255275478809079, "learning_rate": 1.6548428479794743e-06, "loss": 0.7513, "step": 56255 }, { "epoch": 0.6856543941111233, "grad_norm": 2.5123293977755767, "learning_rate": 1.6545221295702373e-06, "loss": 0.7027, "step": 56260 }, { "epoch": 0.6857153303352711, "grad_norm": 2.6736517501370147, "learning_rate": 1.6542014111610008e-06, "loss": 0.6965, "step": 56265 }, { "epoch": 0.6857762665594189, "grad_norm": 2.7701845922524435, "learning_rate": 1.6538806927517642e-06, "loss": 0.7199, "step": 56270 }, { "epoch": 0.6858372027835667, "grad_norm": 2.5216038021965854, "learning_rate": 1.6535599743425274e-06, "loss": 0.7559, "step": 56275 }, { "epoch": 0.6858981390077146, "grad_norm": 3.383146318180473, "learning_rate": 1.6532392559332909e-06, "loss": 0.7379, "step": 56280 }, { "epoch": 0.6859590752318623, "grad_norm": 2.5989505211620516, "learning_rate": 1.6529185375240539e-06, "loss": 0.7332, "step": 56285 }, { "epoch": 0.6860200114560101, "grad_norm": 2.577376181257041, "learning_rate": 1.6525978191148173e-06, "loss": 0.7581, "step": 56290 }, { "epoch": 0.6860809476801579, "grad_norm": 2.779224195669194, "learning_rate": 1.6522771007055808e-06, "loss": 0.6665, "step": 56295 }, { "epoch": 0.6861418839043057, "grad_norm": 2.2823735276863117, "learning_rate": 1.6519563822963438e-06, "loss": 0.7226, "step": 56300 }, { "epoch": 0.6862028201284536, "grad_norm": 3.8420262139986194, "learning_rate": 1.6516356638871072e-06, "loss": 0.7026, "step": 56305 }, { "epoch": 0.6862637563526014, "grad_norm": 2.7084356953255795, "learning_rate": 1.6513149454778707e-06, "loss": 0.6252, "step": 56310 }, { "epoch": 0.6863246925767492, "grad_norm": 2.7558956710471882, "learning_rate": 1.6509942270686339e-06, "loss": 0.735, "step": 56315 }, { "epoch": 0.6863856288008969, "grad_norm": 2.739112860106117, "learning_rate": 1.6506735086593973e-06, "loss": 0.7249, "step": 56320 }, { "epoch": 0.6864465650250448, "grad_norm": 2.6000904642521196, "learning_rate": 1.6503527902501603e-06, "loss": 0.7808, "step": 56325 }, { "epoch": 0.6865075012491926, "grad_norm": 2.3591720324395746, "learning_rate": 1.6500320718409238e-06, "loss": 0.7546, "step": 56330 }, { "epoch": 0.6865684374733404, "grad_norm": 3.0326378604655124, "learning_rate": 1.6497113534316872e-06, "loss": 0.7933, "step": 56335 }, { "epoch": 0.6866293736974882, "grad_norm": 3.492330670744462, "learning_rate": 1.6493906350224502e-06, "loss": 0.7625, "step": 56340 }, { "epoch": 0.686690309921636, "grad_norm": 2.5791529498357972, "learning_rate": 1.6490699166132137e-06, "loss": 0.7278, "step": 56345 }, { "epoch": 0.6867512461457839, "grad_norm": 2.0580315258692186, "learning_rate": 1.6487491982039771e-06, "loss": 0.7117, "step": 56350 }, { "epoch": 0.6868121823699316, "grad_norm": 2.003479874783246, "learning_rate": 1.6484284797947403e-06, "loss": 0.7528, "step": 56355 }, { "epoch": 0.6868731185940794, "grad_norm": 2.5680795562342524, "learning_rate": 1.6481077613855038e-06, "loss": 0.7229, "step": 56360 }, { "epoch": 0.6869340548182272, "grad_norm": 4.418263204340767, "learning_rate": 1.6477870429762668e-06, "loss": 0.7956, "step": 56365 }, { "epoch": 0.686994991042375, "grad_norm": 2.16361345209312, "learning_rate": 1.6474663245670302e-06, "loss": 0.6734, "step": 56370 }, { "epoch": 0.6870559272665229, "grad_norm": 2.999643746433652, "learning_rate": 1.6471456061577937e-06, "loss": 0.7254, "step": 56375 }, { "epoch": 0.6871168634906707, "grad_norm": 3.1657725417679443, "learning_rate": 1.646824887748557e-06, "loss": 0.752, "step": 56380 }, { "epoch": 0.6871777997148185, "grad_norm": 2.6558066519242622, "learning_rate": 1.6465041693393201e-06, "loss": 0.8064, "step": 56385 }, { "epoch": 0.6872387359389662, "grad_norm": 2.301375058744324, "learning_rate": 1.6461834509300836e-06, "loss": 0.7036, "step": 56390 }, { "epoch": 0.687299672163114, "grad_norm": 3.2266176815356205, "learning_rate": 1.6458627325208468e-06, "loss": 0.7216, "step": 56395 }, { "epoch": 0.6873606083872619, "grad_norm": 2.0733299801588783, "learning_rate": 1.6455420141116103e-06, "loss": 0.6809, "step": 56400 }, { "epoch": 0.6874215446114097, "grad_norm": 2.83870222011525, "learning_rate": 1.6452212957023733e-06, "loss": 0.77, "step": 56405 }, { "epoch": 0.6874824808355575, "grad_norm": 2.3119558573772134, "learning_rate": 1.6449005772931367e-06, "loss": 0.7387, "step": 56410 }, { "epoch": 0.6875434170597053, "grad_norm": 4.2275538139908, "learning_rate": 1.6445798588839001e-06, "loss": 0.7872, "step": 56415 }, { "epoch": 0.6876043532838532, "grad_norm": 2.863457020777118, "learning_rate": 1.6442591404746634e-06, "loss": 0.7911, "step": 56420 }, { "epoch": 0.6876652895080009, "grad_norm": 1.9040633859547595, "learning_rate": 1.6439384220654266e-06, "loss": 0.6887, "step": 56425 }, { "epoch": 0.6877262257321487, "grad_norm": 2.216100646795831, "learning_rate": 1.64361770365619e-06, "loss": 0.7265, "step": 56430 }, { "epoch": 0.6877871619562965, "grad_norm": 2.251679470584007, "learning_rate": 1.6432969852469533e-06, "loss": 0.7504, "step": 56435 }, { "epoch": 0.6878480981804443, "grad_norm": 2.4536853594860895, "learning_rate": 1.6429762668377167e-06, "loss": 0.7951, "step": 56440 }, { "epoch": 0.6879090344045922, "grad_norm": 2.494310383537088, "learning_rate": 1.6426555484284797e-06, "loss": 0.7273, "step": 56445 }, { "epoch": 0.68796997062874, "grad_norm": 2.3895935822004377, "learning_rate": 1.6423348300192432e-06, "loss": 0.7724, "step": 56450 }, { "epoch": 0.6880309068528878, "grad_norm": 2.189242164756568, "learning_rate": 1.6420141116100066e-06, "loss": 0.6507, "step": 56455 }, { "epoch": 0.6880918430770355, "grad_norm": 2.7122951252052574, "learning_rate": 1.6416933932007698e-06, "loss": 0.7496, "step": 56460 }, { "epoch": 0.6881527793011833, "grad_norm": 3.2443860553174204, "learning_rate": 1.641372674791533e-06, "loss": 0.7656, "step": 56465 }, { "epoch": 0.6882137155253312, "grad_norm": 2.4806135808889436, "learning_rate": 1.6410519563822965e-06, "loss": 0.7063, "step": 56470 }, { "epoch": 0.688274651749479, "grad_norm": 2.48674719924312, "learning_rate": 1.6407312379730597e-06, "loss": 0.7134, "step": 56475 }, { "epoch": 0.6883355879736268, "grad_norm": 2.6441583088436498, "learning_rate": 1.6404105195638232e-06, "loss": 0.7048, "step": 56480 }, { "epoch": 0.6883965241977746, "grad_norm": 4.494624898120333, "learning_rate": 1.6400898011545862e-06, "loss": 0.6786, "step": 56485 }, { "epoch": 0.6884574604219225, "grad_norm": 2.2660491544436563, "learning_rate": 1.6397690827453496e-06, "loss": 0.7924, "step": 56490 }, { "epoch": 0.6885183966460702, "grad_norm": 2.2308408184903357, "learning_rate": 1.639448364336113e-06, "loss": 0.7454, "step": 56495 }, { "epoch": 0.688579332870218, "grad_norm": 2.531118630060659, "learning_rate": 1.6391276459268763e-06, "loss": 0.7587, "step": 56500 }, { "epoch": 0.6886402690943658, "grad_norm": 2.4039370491184657, "learning_rate": 1.6388069275176397e-06, "loss": 0.779, "step": 56505 }, { "epoch": 0.6887012053185136, "grad_norm": 2.487335026636271, "learning_rate": 1.638486209108403e-06, "loss": 0.7798, "step": 56510 }, { "epoch": 0.6887621415426615, "grad_norm": 2.232534023858205, "learning_rate": 1.6381654906991662e-06, "loss": 0.7489, "step": 56515 }, { "epoch": 0.6888230777668093, "grad_norm": 2.4727759871170294, "learning_rate": 1.6378447722899296e-06, "loss": 0.766, "step": 56520 }, { "epoch": 0.6888840139909571, "grad_norm": 2.6084325286015018, "learning_rate": 1.637524053880693e-06, "loss": 0.7402, "step": 56525 }, { "epoch": 0.6889449502151048, "grad_norm": 2.58793448566489, "learning_rate": 1.637203335471456e-06, "loss": 0.6444, "step": 56530 }, { "epoch": 0.6890058864392526, "grad_norm": 1.919015471726367, "learning_rate": 1.6368826170622195e-06, "loss": 0.7238, "step": 56535 }, { "epoch": 0.6890668226634005, "grad_norm": 2.2030116890481084, "learning_rate": 1.6365618986529828e-06, "loss": 0.6796, "step": 56540 }, { "epoch": 0.6891277588875483, "grad_norm": 2.157827068914611, "learning_rate": 1.6362411802437462e-06, "loss": 0.6954, "step": 56545 }, { "epoch": 0.6891886951116961, "grad_norm": 2.4064407608195237, "learning_rate": 1.6359204618345094e-06, "loss": 0.769, "step": 56550 }, { "epoch": 0.6892496313358439, "grad_norm": 2.4389022778631886, "learning_rate": 1.6355997434252727e-06, "loss": 0.7111, "step": 56555 }, { "epoch": 0.6893105675599918, "grad_norm": 3.0095462230182366, "learning_rate": 1.6352790250160361e-06, "loss": 0.7467, "step": 56560 }, { "epoch": 0.6893715037841395, "grad_norm": 2.7516783108839515, "learning_rate": 1.6349583066067995e-06, "loss": 0.7913, "step": 56565 }, { "epoch": 0.6894324400082873, "grad_norm": 2.3180666087601955, "learning_rate": 1.6346375881975626e-06, "loss": 0.7219, "step": 56570 }, { "epoch": 0.6894933762324351, "grad_norm": 2.822173087791933, "learning_rate": 1.634316869788326e-06, "loss": 0.7319, "step": 56575 }, { "epoch": 0.6895543124565829, "grad_norm": 1.9187036036454617, "learning_rate": 1.6339961513790892e-06, "loss": 0.7326, "step": 56580 }, { "epoch": 0.6896152486807308, "grad_norm": 2.8499357639874097, "learning_rate": 1.6336754329698527e-06, "loss": 0.7047, "step": 56585 }, { "epoch": 0.6896761849048786, "grad_norm": 2.5767151513033735, "learning_rate": 1.633354714560616e-06, "loss": 0.7525, "step": 56590 }, { "epoch": 0.6897371211290264, "grad_norm": 2.591537391945988, "learning_rate": 1.6330339961513791e-06, "loss": 0.6998, "step": 56595 }, { "epoch": 0.6897980573531741, "grad_norm": 2.9038464904849897, "learning_rate": 1.6327132777421426e-06, "loss": 0.7257, "step": 56600 }, { "epoch": 0.6898589935773219, "grad_norm": 2.325566246531601, "learning_rate": 1.632392559332906e-06, "loss": 0.756, "step": 56605 }, { "epoch": 0.6899199298014698, "grad_norm": 2.910589303754972, "learning_rate": 1.632071840923669e-06, "loss": 0.7688, "step": 56610 }, { "epoch": 0.6899808660256176, "grad_norm": 2.9797531913224944, "learning_rate": 1.6317511225144325e-06, "loss": 0.8021, "step": 56615 }, { "epoch": 0.6900418022497654, "grad_norm": 2.4067149776226953, "learning_rate": 1.6314304041051957e-06, "loss": 0.8035, "step": 56620 }, { "epoch": 0.6901027384739132, "grad_norm": 3.188686797883385, "learning_rate": 1.6311096856959591e-06, "loss": 0.7453, "step": 56625 }, { "epoch": 0.6901636746980611, "grad_norm": 2.5547255093394408, "learning_rate": 1.6307889672867226e-06, "loss": 0.656, "step": 56630 }, { "epoch": 0.6902246109222088, "grad_norm": 3.086061201501139, "learning_rate": 1.6304682488774856e-06, "loss": 0.6981, "step": 56635 }, { "epoch": 0.6902855471463566, "grad_norm": 2.567792196846277, "learning_rate": 1.630147530468249e-06, "loss": 0.7117, "step": 56640 }, { "epoch": 0.6903464833705044, "grad_norm": 2.440756481687113, "learning_rate": 1.6298268120590125e-06, "loss": 0.6609, "step": 56645 }, { "epoch": 0.6904074195946522, "grad_norm": 2.721817729484157, "learning_rate": 1.6295060936497755e-06, "loss": 0.7291, "step": 56650 }, { "epoch": 0.6904683558188001, "grad_norm": 2.318676938758048, "learning_rate": 1.629185375240539e-06, "loss": 0.7073, "step": 56655 }, { "epoch": 0.6905292920429479, "grad_norm": 2.049106689836725, "learning_rate": 1.6288646568313022e-06, "loss": 0.7267, "step": 56660 }, { "epoch": 0.6905902282670957, "grad_norm": 2.897642380820264, "learning_rate": 1.6285439384220656e-06, "loss": 0.77, "step": 56665 }, { "epoch": 0.6906511644912434, "grad_norm": 3.0689615082941097, "learning_rate": 1.628223220012829e-06, "loss": 0.7598, "step": 56670 }, { "epoch": 0.6907121007153912, "grad_norm": 2.458132059588946, "learning_rate": 1.627902501603592e-06, "loss": 0.7783, "step": 56675 }, { "epoch": 0.6907730369395391, "grad_norm": 2.601676241359191, "learning_rate": 1.6275817831943555e-06, "loss": 0.7512, "step": 56680 }, { "epoch": 0.6908339731636869, "grad_norm": 3.268170319856707, "learning_rate": 1.627261064785119e-06, "loss": 0.68, "step": 56685 }, { "epoch": 0.6908949093878347, "grad_norm": 3.3145609874033073, "learning_rate": 1.626940346375882e-06, "loss": 0.7218, "step": 56690 }, { "epoch": 0.6909558456119825, "grad_norm": 2.313013834656481, "learning_rate": 1.6266196279666454e-06, "loss": 0.7628, "step": 56695 }, { "epoch": 0.6910167818361304, "grad_norm": 1.9685401047016826, "learning_rate": 1.6262989095574086e-06, "loss": 0.7709, "step": 56700 }, { "epoch": 0.6910777180602781, "grad_norm": 2.562775404098858, "learning_rate": 1.625978191148172e-06, "loss": 0.7279, "step": 56705 }, { "epoch": 0.6911386542844259, "grad_norm": 2.1230866949975487, "learning_rate": 1.6256574727389355e-06, "loss": 0.6658, "step": 56710 }, { "epoch": 0.6911995905085737, "grad_norm": 2.74620859730517, "learning_rate": 1.6253367543296985e-06, "loss": 0.6982, "step": 56715 }, { "epoch": 0.6912605267327215, "grad_norm": 2.3950564954129625, "learning_rate": 1.625016035920462e-06, "loss": 0.7203, "step": 56720 }, { "epoch": 0.6913214629568694, "grad_norm": 2.314270099724132, "learning_rate": 1.6246953175112254e-06, "loss": 0.7139, "step": 56725 }, { "epoch": 0.6913823991810172, "grad_norm": 2.892701059950646, "learning_rate": 1.6243745991019886e-06, "loss": 0.743, "step": 56730 }, { "epoch": 0.691443335405165, "grad_norm": 3.0201363937300734, "learning_rate": 1.6240538806927519e-06, "loss": 0.6861, "step": 56735 }, { "epoch": 0.6915042716293127, "grad_norm": 2.7200267849792783, "learning_rate": 1.623733162283515e-06, "loss": 0.7256, "step": 56740 }, { "epoch": 0.6915652078534605, "grad_norm": 2.5008043116598695, "learning_rate": 1.6234124438742785e-06, "loss": 0.7196, "step": 56745 }, { "epoch": 0.6916261440776084, "grad_norm": 2.513320872076185, "learning_rate": 1.623091725465042e-06, "loss": 0.7543, "step": 56750 }, { "epoch": 0.6916870803017562, "grad_norm": 2.6570179356639505, "learning_rate": 1.622771007055805e-06, "loss": 0.6663, "step": 56755 }, { "epoch": 0.691748016525904, "grad_norm": 2.868090664310053, "learning_rate": 1.6224502886465684e-06, "loss": 0.6639, "step": 56760 }, { "epoch": 0.6918089527500518, "grad_norm": 2.4073632451175744, "learning_rate": 1.6221295702373319e-06, "loss": 0.6939, "step": 56765 }, { "epoch": 0.6918698889741997, "grad_norm": 2.1216366846734926, "learning_rate": 1.621808851828095e-06, "loss": 0.7582, "step": 56770 }, { "epoch": 0.6919308251983474, "grad_norm": 2.224111215923055, "learning_rate": 1.6214881334188583e-06, "loss": 0.7227, "step": 56775 }, { "epoch": 0.6919917614224952, "grad_norm": 3.235409415440524, "learning_rate": 1.6211674150096216e-06, "loss": 0.7495, "step": 56780 }, { "epoch": 0.692052697646643, "grad_norm": 3.0602225060189285, "learning_rate": 1.620846696600385e-06, "loss": 0.6798, "step": 56785 }, { "epoch": 0.6921136338707908, "grad_norm": 2.8719859018933636, "learning_rate": 1.6205259781911484e-06, "loss": 0.6751, "step": 56790 }, { "epoch": 0.6921745700949387, "grad_norm": 2.612072905621919, "learning_rate": 1.6202052597819114e-06, "loss": 0.7379, "step": 56795 }, { "epoch": 0.6922355063190865, "grad_norm": 2.338915093730029, "learning_rate": 1.6198845413726749e-06, "loss": 0.6226, "step": 56800 }, { "epoch": 0.6922964425432343, "grad_norm": 2.2813834965302093, "learning_rate": 1.6195638229634383e-06, "loss": 0.7747, "step": 56805 }, { "epoch": 0.692357378767382, "grad_norm": 2.354764430874739, "learning_rate": 1.6192431045542016e-06, "loss": 0.7672, "step": 56810 }, { "epoch": 0.6924183149915298, "grad_norm": 2.732311708029809, "learning_rate": 1.6189223861449648e-06, "loss": 0.7139, "step": 56815 }, { "epoch": 0.6924792512156777, "grad_norm": 2.5450969680546716, "learning_rate": 1.6186016677357282e-06, "loss": 0.7422, "step": 56820 }, { "epoch": 0.6925401874398255, "grad_norm": 2.448374089968667, "learning_rate": 1.6182809493264915e-06, "loss": 0.7095, "step": 56825 }, { "epoch": 0.6926011236639733, "grad_norm": 2.6269927358680083, "learning_rate": 1.617960230917255e-06, "loss": 0.7504, "step": 56830 }, { "epoch": 0.6926620598881211, "grad_norm": 2.4094824146721643, "learning_rate": 1.617639512508018e-06, "loss": 0.7238, "step": 56835 }, { "epoch": 0.692722996112269, "grad_norm": 2.3699448195735893, "learning_rate": 1.6173187940987814e-06, "loss": 0.7192, "step": 56840 }, { "epoch": 0.6927839323364167, "grad_norm": 2.237226862187805, "learning_rate": 1.6169980756895448e-06, "loss": 0.7943, "step": 56845 }, { "epoch": 0.6928448685605645, "grad_norm": 2.386327724334484, "learning_rate": 1.616677357280308e-06, "loss": 0.71, "step": 56850 }, { "epoch": 0.6929058047847123, "grad_norm": 2.548500778134239, "learning_rate": 1.6163566388710715e-06, "loss": 0.7251, "step": 56855 }, { "epoch": 0.6929667410088601, "grad_norm": 2.2354526093526115, "learning_rate": 1.6160359204618347e-06, "loss": 0.7677, "step": 56860 }, { "epoch": 0.693027677233008, "grad_norm": 3.1274511017859035, "learning_rate": 1.615715202052598e-06, "loss": 0.725, "step": 56865 }, { "epoch": 0.6930886134571558, "grad_norm": 2.317489993083762, "learning_rate": 1.6153944836433614e-06, "loss": 0.7954, "step": 56870 }, { "epoch": 0.6931495496813036, "grad_norm": 2.554700163403112, "learning_rate": 1.6150737652341244e-06, "loss": 0.7296, "step": 56875 }, { "epoch": 0.6932104859054513, "grad_norm": 3.8590400530066065, "learning_rate": 1.6147530468248878e-06, "loss": 0.7156, "step": 56880 }, { "epoch": 0.6932714221295991, "grad_norm": 2.5540018046758433, "learning_rate": 1.6144323284156513e-06, "loss": 0.7608, "step": 56885 }, { "epoch": 0.693332358353747, "grad_norm": 2.1854942980970833, "learning_rate": 1.6141116100064145e-06, "loss": 0.7018, "step": 56890 }, { "epoch": 0.6933932945778948, "grad_norm": 3.263998781856589, "learning_rate": 1.613790891597178e-06, "loss": 0.7528, "step": 56895 }, { "epoch": 0.6934542308020426, "grad_norm": 2.470901997602236, "learning_rate": 1.6134701731879412e-06, "loss": 0.738, "step": 56900 }, { "epoch": 0.6935151670261904, "grad_norm": 2.4135817057681748, "learning_rate": 1.6131494547787044e-06, "loss": 0.7635, "step": 56905 }, { "epoch": 0.6935761032503382, "grad_norm": 2.6775805171110267, "learning_rate": 1.6128287363694678e-06, "loss": 0.7676, "step": 56910 }, { "epoch": 0.693637039474486, "grad_norm": 3.095986443066335, "learning_rate": 1.6125080179602308e-06, "loss": 0.7246, "step": 56915 }, { "epoch": 0.6936979756986338, "grad_norm": 2.1304932404409085, "learning_rate": 1.6121872995509943e-06, "loss": 0.7265, "step": 56920 }, { "epoch": 0.6937589119227816, "grad_norm": 2.8406516808573516, "learning_rate": 1.6118665811417577e-06, "loss": 0.7555, "step": 56925 }, { "epoch": 0.6938198481469294, "grad_norm": 3.1698633311108493, "learning_rate": 1.611545862732521e-06, "loss": 0.7816, "step": 56930 }, { "epoch": 0.6938807843710773, "grad_norm": 2.526485560235285, "learning_rate": 1.6112251443232844e-06, "loss": 0.6939, "step": 56935 }, { "epoch": 0.6939417205952251, "grad_norm": 2.940571892329082, "learning_rate": 1.6109044259140476e-06, "loss": 0.6773, "step": 56940 }, { "epoch": 0.6940026568193729, "grad_norm": 2.4319628803836015, "learning_rate": 1.6105837075048108e-06, "loss": 0.7831, "step": 56945 }, { "epoch": 0.6940635930435206, "grad_norm": 2.778810801201687, "learning_rate": 1.6102629890955743e-06, "loss": 0.7255, "step": 56950 }, { "epoch": 0.6941245292676684, "grad_norm": 2.3347113786300837, "learning_rate": 1.6099422706863373e-06, "loss": 0.6929, "step": 56955 }, { "epoch": 0.6941854654918163, "grad_norm": 2.4078740839385566, "learning_rate": 1.6096215522771007e-06, "loss": 0.7113, "step": 56960 }, { "epoch": 0.6942464017159641, "grad_norm": 3.375034994976199, "learning_rate": 1.6093008338678642e-06, "loss": 0.7975, "step": 56965 }, { "epoch": 0.6943073379401119, "grad_norm": 2.130572987409596, "learning_rate": 1.6089801154586274e-06, "loss": 0.6977, "step": 56970 }, { "epoch": 0.6943682741642597, "grad_norm": 2.462378993463973, "learning_rate": 1.6086593970493909e-06, "loss": 0.7271, "step": 56975 }, { "epoch": 0.6944292103884075, "grad_norm": 2.4671909066785496, "learning_rate": 1.6083386786401543e-06, "loss": 0.7061, "step": 56980 }, { "epoch": 0.6944901466125553, "grad_norm": 2.690839336924309, "learning_rate": 1.6080179602309173e-06, "loss": 0.7268, "step": 56985 }, { "epoch": 0.6945510828367031, "grad_norm": 2.806887967037074, "learning_rate": 1.6076972418216807e-06, "loss": 0.7556, "step": 56990 }, { "epoch": 0.6946120190608509, "grad_norm": 3.000610162287867, "learning_rate": 1.607376523412444e-06, "loss": 0.7462, "step": 56995 }, { "epoch": 0.6946729552849987, "grad_norm": 2.334959655413748, "learning_rate": 1.6070558050032072e-06, "loss": 0.6982, "step": 57000 }, { "epoch": 0.6947338915091466, "grad_norm": 2.4735946002771025, "learning_rate": 1.6067350865939706e-06, "loss": 0.7718, "step": 57005 }, { "epoch": 0.6947948277332944, "grad_norm": 2.354226131462631, "learning_rate": 1.6064143681847339e-06, "loss": 0.6997, "step": 57010 }, { "epoch": 0.6948557639574422, "grad_norm": 3.0033851604628503, "learning_rate": 1.6060936497754973e-06, "loss": 0.9036, "step": 57015 }, { "epoch": 0.6949167001815899, "grad_norm": 2.654116877548103, "learning_rate": 1.6057729313662608e-06, "loss": 0.7001, "step": 57020 }, { "epoch": 0.6949776364057377, "grad_norm": 2.303441307986489, "learning_rate": 1.6054522129570238e-06, "loss": 0.7385, "step": 57025 }, { "epoch": 0.6950385726298856, "grad_norm": 3.2174740594954754, "learning_rate": 1.6051314945477872e-06, "loss": 0.6797, "step": 57030 }, { "epoch": 0.6950995088540334, "grad_norm": 2.3509025154512817, "learning_rate": 1.6048107761385504e-06, "loss": 0.6972, "step": 57035 }, { "epoch": 0.6951604450781812, "grad_norm": 2.136913966541859, "learning_rate": 1.6044900577293137e-06, "loss": 0.7169, "step": 57040 }, { "epoch": 0.695221381302329, "grad_norm": 2.4155721385538764, "learning_rate": 1.6041693393200771e-06, "loss": 0.7439, "step": 57045 }, { "epoch": 0.6952823175264767, "grad_norm": 2.5694337067983515, "learning_rate": 1.6038486209108403e-06, "loss": 0.7723, "step": 57050 }, { "epoch": 0.6953432537506246, "grad_norm": 2.252053592349178, "learning_rate": 1.6035279025016038e-06, "loss": 0.747, "step": 57055 }, { "epoch": 0.6954041899747724, "grad_norm": 2.0662610529459675, "learning_rate": 1.6032071840923672e-06, "loss": 0.7756, "step": 57060 }, { "epoch": 0.6954651261989202, "grad_norm": 2.3119836830388594, "learning_rate": 1.6028864656831302e-06, "loss": 0.7596, "step": 57065 }, { "epoch": 0.695526062423068, "grad_norm": 3.0259012090001187, "learning_rate": 1.6025657472738937e-06, "loss": 0.7786, "step": 57070 }, { "epoch": 0.6955869986472158, "grad_norm": 2.0448121928946468, "learning_rate": 1.602245028864657e-06, "loss": 0.7577, "step": 57075 }, { "epoch": 0.6956479348713637, "grad_norm": 2.1899256512310235, "learning_rate": 1.6019243104554203e-06, "loss": 0.7556, "step": 57080 }, { "epoch": 0.6957088710955114, "grad_norm": 2.921333202786266, "learning_rate": 1.6016035920461836e-06, "loss": 0.7434, "step": 57085 }, { "epoch": 0.6957698073196592, "grad_norm": 3.059164571442734, "learning_rate": 1.6012828736369468e-06, "loss": 0.6865, "step": 57090 }, { "epoch": 0.695830743543807, "grad_norm": 3.216622632467619, "learning_rate": 1.6009621552277102e-06, "loss": 0.761, "step": 57095 }, { "epoch": 0.6958916797679549, "grad_norm": 2.4175712067816444, "learning_rate": 1.6006414368184737e-06, "loss": 0.7496, "step": 57100 }, { "epoch": 0.6959526159921027, "grad_norm": 2.8425933686193208, "learning_rate": 1.6003207184092367e-06, "loss": 0.7259, "step": 57105 }, { "epoch": 0.6960135522162505, "grad_norm": 2.5819469903261694, "learning_rate": 1.6000000000000001e-06, "loss": 0.6939, "step": 57110 }, { "epoch": 0.6960744884403983, "grad_norm": 3.0656078147820067, "learning_rate": 1.5996792815907636e-06, "loss": 0.76, "step": 57115 }, { "epoch": 0.696135424664546, "grad_norm": 2.703082094401006, "learning_rate": 1.5993585631815268e-06, "loss": 0.7319, "step": 57120 }, { "epoch": 0.6961963608886939, "grad_norm": 2.480171838219098, "learning_rate": 1.59903784477229e-06, "loss": 0.7027, "step": 57125 }, { "epoch": 0.6962572971128417, "grad_norm": 2.028014876120451, "learning_rate": 1.5987171263630533e-06, "loss": 0.7549, "step": 57130 }, { "epoch": 0.6963182333369895, "grad_norm": 3.90856537413757, "learning_rate": 1.5983964079538167e-06, "loss": 0.7639, "step": 57135 }, { "epoch": 0.6963791695611373, "grad_norm": 2.592163913736436, "learning_rate": 1.5980756895445801e-06, "loss": 0.6188, "step": 57140 }, { "epoch": 0.6964401057852851, "grad_norm": 2.489431441387434, "learning_rate": 1.5977549711353432e-06, "loss": 0.6886, "step": 57145 }, { "epoch": 0.696501042009433, "grad_norm": 2.213642646382818, "learning_rate": 1.5974342527261066e-06, "loss": 0.7768, "step": 57150 }, { "epoch": 0.6965619782335807, "grad_norm": 2.409700919835021, "learning_rate": 1.59711353431687e-06, "loss": 0.7246, "step": 57155 }, { "epoch": 0.6966229144577285, "grad_norm": 2.614949423678049, "learning_rate": 1.5967928159076333e-06, "loss": 0.7373, "step": 57160 }, { "epoch": 0.6966838506818763, "grad_norm": 2.449759263471321, "learning_rate": 1.5964720974983965e-06, "loss": 0.7157, "step": 57165 }, { "epoch": 0.6967447869060241, "grad_norm": 2.9107867558857734, "learning_rate": 1.5961513790891597e-06, "loss": 0.7941, "step": 57170 }, { "epoch": 0.696805723130172, "grad_norm": 2.398503081484955, "learning_rate": 1.5958306606799232e-06, "loss": 0.7201, "step": 57175 }, { "epoch": 0.6968666593543198, "grad_norm": 2.824745623340912, "learning_rate": 1.5955099422706866e-06, "loss": 0.7368, "step": 57180 }, { "epoch": 0.6969275955784676, "grad_norm": 2.2604169502728, "learning_rate": 1.5951892238614496e-06, "loss": 0.715, "step": 57185 }, { "epoch": 0.6969885318026153, "grad_norm": 2.3292273835480195, "learning_rate": 1.594868505452213e-06, "loss": 0.7071, "step": 57190 }, { "epoch": 0.6970494680267632, "grad_norm": 2.3121316640837954, "learning_rate": 1.5945477870429765e-06, "loss": 0.7965, "step": 57195 }, { "epoch": 0.697110404250911, "grad_norm": 2.301188657277512, "learning_rate": 1.5942270686337397e-06, "loss": 0.6757, "step": 57200 }, { "epoch": 0.6971713404750588, "grad_norm": 2.9964749486576876, "learning_rate": 1.5939063502245032e-06, "loss": 0.7277, "step": 57205 }, { "epoch": 0.6972322766992066, "grad_norm": 2.4397532458276827, "learning_rate": 1.5935856318152662e-06, "loss": 0.7387, "step": 57210 }, { "epoch": 0.6972932129233544, "grad_norm": 2.094601807125999, "learning_rate": 1.5932649134060296e-06, "loss": 0.753, "step": 57215 }, { "epoch": 0.6973541491475023, "grad_norm": 2.0611897313850513, "learning_rate": 1.592944194996793e-06, "loss": 0.7164, "step": 57220 }, { "epoch": 0.69741508537165, "grad_norm": 2.8451793730466024, "learning_rate": 1.592623476587556e-06, "loss": 0.7041, "step": 57225 }, { "epoch": 0.6974760215957978, "grad_norm": 2.2385594157192936, "learning_rate": 1.5923027581783195e-06, "loss": 0.7423, "step": 57230 }, { "epoch": 0.6975369578199456, "grad_norm": 2.348877093141845, "learning_rate": 1.591982039769083e-06, "loss": 0.7643, "step": 57235 }, { "epoch": 0.6975978940440934, "grad_norm": 2.943833134207229, "learning_rate": 1.5916613213598462e-06, "loss": 0.7665, "step": 57240 }, { "epoch": 0.6976588302682413, "grad_norm": 2.4931356897293555, "learning_rate": 1.5913406029506096e-06, "loss": 0.7217, "step": 57245 }, { "epoch": 0.6977197664923891, "grad_norm": 4.341629079812011, "learning_rate": 1.5910198845413727e-06, "loss": 0.6994, "step": 57250 }, { "epoch": 0.6977807027165369, "grad_norm": 3.084022317713293, "learning_rate": 1.590699166132136e-06, "loss": 0.7487, "step": 57255 }, { "epoch": 0.6978416389406846, "grad_norm": 2.6639570533475734, "learning_rate": 1.5903784477228995e-06, "loss": 0.7597, "step": 57260 }, { "epoch": 0.6979025751648325, "grad_norm": 3.076564689183599, "learning_rate": 1.5900577293136626e-06, "loss": 0.6444, "step": 57265 }, { "epoch": 0.6979635113889803, "grad_norm": 2.335614207707589, "learning_rate": 1.589737010904426e-06, "loss": 0.8014, "step": 57270 }, { "epoch": 0.6980244476131281, "grad_norm": 2.503113593636138, "learning_rate": 1.5894162924951894e-06, "loss": 0.6963, "step": 57275 }, { "epoch": 0.6980853838372759, "grad_norm": 4.122003085640102, "learning_rate": 1.5890955740859527e-06, "loss": 0.7878, "step": 57280 }, { "epoch": 0.6981463200614237, "grad_norm": 2.3136790956032764, "learning_rate": 1.588774855676716e-06, "loss": 0.6817, "step": 57285 }, { "epoch": 0.6982072562855716, "grad_norm": 2.7603164456783453, "learning_rate": 1.5884541372674791e-06, "loss": 0.7178, "step": 57290 }, { "epoch": 0.6982681925097193, "grad_norm": 2.5853929408211664, "learning_rate": 1.5881334188582426e-06, "loss": 0.8204, "step": 57295 }, { "epoch": 0.6983291287338671, "grad_norm": 2.5285282803026154, "learning_rate": 1.587812700449006e-06, "loss": 0.7065, "step": 57300 }, { "epoch": 0.6983900649580149, "grad_norm": 2.9999950663719614, "learning_rate": 1.587491982039769e-06, "loss": 0.7965, "step": 57305 }, { "epoch": 0.6984510011821627, "grad_norm": 2.0894747791530137, "learning_rate": 1.5871712636305325e-06, "loss": 0.7205, "step": 57310 }, { "epoch": 0.6985119374063106, "grad_norm": 2.4543376189315538, "learning_rate": 1.586850545221296e-06, "loss": 0.746, "step": 57315 }, { "epoch": 0.6985728736304584, "grad_norm": 2.3566211856546224, "learning_rate": 1.5865298268120591e-06, "loss": 0.7576, "step": 57320 }, { "epoch": 0.6986338098546062, "grad_norm": 2.318025859416399, "learning_rate": 1.5862091084028226e-06, "loss": 0.7387, "step": 57325 }, { "epoch": 0.6986947460787539, "grad_norm": 2.396273596549327, "learning_rate": 1.5858883899935856e-06, "loss": 0.7604, "step": 57330 }, { "epoch": 0.6987556823029017, "grad_norm": 2.4376307846589897, "learning_rate": 1.585567671584349e-06, "loss": 0.7725, "step": 57335 }, { "epoch": 0.6988166185270496, "grad_norm": 2.5190272719122806, "learning_rate": 1.5852469531751125e-06, "loss": 0.703, "step": 57340 }, { "epoch": 0.6988775547511974, "grad_norm": 2.49880384013375, "learning_rate": 1.5849262347658757e-06, "loss": 0.6851, "step": 57345 }, { "epoch": 0.6989384909753452, "grad_norm": 2.6445917837176784, "learning_rate": 1.584605516356639e-06, "loss": 0.6666, "step": 57350 }, { "epoch": 0.698999427199493, "grad_norm": 2.982493143429001, "learning_rate": 1.5842847979474024e-06, "loss": 0.7367, "step": 57355 }, { "epoch": 0.6990603634236409, "grad_norm": 3.157108732633575, "learning_rate": 1.5839640795381656e-06, "loss": 0.7061, "step": 57360 }, { "epoch": 0.6991212996477886, "grad_norm": 2.3709224751136326, "learning_rate": 1.583643361128929e-06, "loss": 0.7397, "step": 57365 }, { "epoch": 0.6991822358719364, "grad_norm": 2.5236677733346995, "learning_rate": 1.583322642719692e-06, "loss": 0.787, "step": 57370 }, { "epoch": 0.6992431720960842, "grad_norm": 2.34058349898563, "learning_rate": 1.5830019243104555e-06, "loss": 0.7063, "step": 57375 }, { "epoch": 0.699304108320232, "grad_norm": 2.736654116015282, "learning_rate": 1.582681205901219e-06, "loss": 0.6978, "step": 57380 }, { "epoch": 0.6993650445443799, "grad_norm": 3.0446146892754844, "learning_rate": 1.5823604874919822e-06, "loss": 0.6852, "step": 57385 }, { "epoch": 0.6994259807685277, "grad_norm": 2.237714852926698, "learning_rate": 1.5820397690827454e-06, "loss": 0.7407, "step": 57390 }, { "epoch": 0.6994869169926755, "grad_norm": 2.6862598190189297, "learning_rate": 1.5817190506735088e-06, "loss": 0.7521, "step": 57395 }, { "epoch": 0.6995478532168232, "grad_norm": 2.587133313648317, "learning_rate": 1.581398332264272e-06, "loss": 0.7729, "step": 57400 }, { "epoch": 0.699608789440971, "grad_norm": 2.479774643280182, "learning_rate": 1.5810776138550355e-06, "loss": 0.7484, "step": 57405 }, { "epoch": 0.6996697256651189, "grad_norm": 2.443404448703166, "learning_rate": 1.580756895445799e-06, "loss": 0.7525, "step": 57410 }, { "epoch": 0.6997306618892667, "grad_norm": 4.149377024637196, "learning_rate": 1.580436177036562e-06, "loss": 0.7419, "step": 57415 }, { "epoch": 0.6997915981134145, "grad_norm": 2.3182601823304485, "learning_rate": 1.5801154586273254e-06, "loss": 0.7256, "step": 57420 }, { "epoch": 0.6998525343375623, "grad_norm": 2.7505447701137924, "learning_rate": 1.5797947402180886e-06, "loss": 0.814, "step": 57425 }, { "epoch": 0.6999134705617102, "grad_norm": 3.6072483145909113, "learning_rate": 1.579474021808852e-06, "loss": 0.8354, "step": 57430 }, { "epoch": 0.6999744067858579, "grad_norm": 2.564333745509363, "learning_rate": 1.5791533033996153e-06, "loss": 0.6999, "step": 57435 }, { "epoch": 0.7000353430100057, "grad_norm": 2.4426507839822196, "learning_rate": 1.5788325849903785e-06, "loss": 0.6979, "step": 57440 }, { "epoch": 0.7000962792341535, "grad_norm": 2.2383887550679855, "learning_rate": 1.578511866581142e-06, "loss": 0.7709, "step": 57445 }, { "epoch": 0.7001572154583013, "grad_norm": 2.5450808627262997, "learning_rate": 1.5781911481719054e-06, "loss": 0.7088, "step": 57450 }, { "epoch": 0.7002181516824492, "grad_norm": 2.725625594062714, "learning_rate": 1.5778704297626684e-06, "loss": 0.7012, "step": 57455 }, { "epoch": 0.700279087906597, "grad_norm": 2.88656624062858, "learning_rate": 1.5775497113534319e-06, "loss": 0.7451, "step": 57460 }, { "epoch": 0.7003400241307448, "grad_norm": 2.689081945368233, "learning_rate": 1.577228992944195e-06, "loss": 0.7876, "step": 57465 }, { "epoch": 0.7004009603548925, "grad_norm": 2.287877363618339, "learning_rate": 1.5769082745349585e-06, "loss": 0.7669, "step": 57470 }, { "epoch": 0.7004618965790403, "grad_norm": 3.0533098898207927, "learning_rate": 1.5765875561257218e-06, "loss": 0.7074, "step": 57475 }, { "epoch": 0.7005228328031882, "grad_norm": 2.69232349026337, "learning_rate": 1.576266837716485e-06, "loss": 0.7304, "step": 57480 }, { "epoch": 0.700583769027336, "grad_norm": 3.1061984274825116, "learning_rate": 1.5759461193072484e-06, "loss": 0.7388, "step": 57485 }, { "epoch": 0.7006447052514838, "grad_norm": 2.8297960993128504, "learning_rate": 1.5756254008980119e-06, "loss": 0.7428, "step": 57490 }, { "epoch": 0.7007056414756316, "grad_norm": 2.1999597516998866, "learning_rate": 1.5753046824887749e-06, "loss": 0.7496, "step": 57495 }, { "epoch": 0.7007665776997795, "grad_norm": 3.044336792172517, "learning_rate": 1.5749839640795383e-06, "loss": 0.6804, "step": 57500 }, { "epoch": 0.7008275139239272, "grad_norm": 2.2510173091413734, "learning_rate": 1.5746632456703015e-06, "loss": 0.7381, "step": 57505 }, { "epoch": 0.700888450148075, "grad_norm": 2.8467016348379808, "learning_rate": 1.574342527261065e-06, "loss": 0.6571, "step": 57510 }, { "epoch": 0.7009493863722228, "grad_norm": 2.930142081828641, "learning_rate": 1.5740218088518282e-06, "loss": 0.7307, "step": 57515 }, { "epoch": 0.7010103225963706, "grad_norm": 3.0160194136748824, "learning_rate": 1.5737010904425914e-06, "loss": 0.7349, "step": 57520 }, { "epoch": 0.7010712588205185, "grad_norm": 2.042712159872649, "learning_rate": 1.5733803720333549e-06, "loss": 0.765, "step": 57525 }, { "epoch": 0.7011321950446663, "grad_norm": 2.0657631879877476, "learning_rate": 1.5730596536241183e-06, "loss": 0.7502, "step": 57530 }, { "epoch": 0.7011931312688141, "grad_norm": 2.51655426428372, "learning_rate": 1.5727389352148813e-06, "loss": 0.6977, "step": 57535 }, { "epoch": 0.7012540674929618, "grad_norm": 2.641447518948166, "learning_rate": 1.5724182168056448e-06, "loss": 0.7012, "step": 57540 }, { "epoch": 0.7013150037171096, "grad_norm": 2.5081445828688342, "learning_rate": 1.572097498396408e-06, "loss": 0.7164, "step": 57545 }, { "epoch": 0.7013759399412575, "grad_norm": 2.5210774457915988, "learning_rate": 1.5717767799871715e-06, "loss": 0.7524, "step": 57550 }, { "epoch": 0.7014368761654053, "grad_norm": 2.8129265349287116, "learning_rate": 1.5714560615779349e-06, "loss": 0.7248, "step": 57555 }, { "epoch": 0.7014978123895531, "grad_norm": 2.110708081652802, "learning_rate": 1.571135343168698e-06, "loss": 0.742, "step": 57560 }, { "epoch": 0.7015587486137009, "grad_norm": 1.9791531538124614, "learning_rate": 1.5708146247594613e-06, "loss": 0.7578, "step": 57565 }, { "epoch": 0.7016196848378488, "grad_norm": 2.124951593219998, "learning_rate": 1.5704939063502248e-06, "loss": 0.6972, "step": 57570 }, { "epoch": 0.7016806210619965, "grad_norm": 2.5710598800042535, "learning_rate": 1.5701731879409878e-06, "loss": 0.7416, "step": 57575 }, { "epoch": 0.7017415572861443, "grad_norm": 3.072575797300058, "learning_rate": 1.5698524695317512e-06, "loss": 0.6486, "step": 57580 }, { "epoch": 0.7018024935102921, "grad_norm": 2.080489541372447, "learning_rate": 1.5695317511225145e-06, "loss": 0.7108, "step": 57585 }, { "epoch": 0.7018634297344399, "grad_norm": 2.211872046313186, "learning_rate": 1.569211032713278e-06, "loss": 0.6998, "step": 57590 }, { "epoch": 0.7019243659585878, "grad_norm": 2.86663329134814, "learning_rate": 1.5688903143040414e-06, "loss": 0.7344, "step": 57595 }, { "epoch": 0.7019853021827356, "grad_norm": 2.6869473369484824, "learning_rate": 1.5685695958948044e-06, "loss": 0.7218, "step": 57600 }, { "epoch": 0.7020462384068834, "grad_norm": 3.2219434266717584, "learning_rate": 1.5682488774855678e-06, "loss": 0.7242, "step": 57605 }, { "epoch": 0.7021071746310311, "grad_norm": 2.324342676171835, "learning_rate": 1.5679281590763313e-06, "loss": 0.6934, "step": 57610 }, { "epoch": 0.7021681108551789, "grad_norm": 3.503204105175244, "learning_rate": 1.5676074406670943e-06, "loss": 0.7548, "step": 57615 }, { "epoch": 0.7022290470793268, "grad_norm": 4.409055732651158, "learning_rate": 1.5672867222578577e-06, "loss": 0.7682, "step": 57620 }, { "epoch": 0.7022899833034746, "grad_norm": 2.4457188486928683, "learning_rate": 1.566966003848621e-06, "loss": 0.8039, "step": 57625 }, { "epoch": 0.7023509195276224, "grad_norm": 2.5332441649930653, "learning_rate": 1.5666452854393844e-06, "loss": 0.7511, "step": 57630 }, { "epoch": 0.7024118557517702, "grad_norm": 2.645214278720749, "learning_rate": 1.5663245670301478e-06, "loss": 0.8086, "step": 57635 }, { "epoch": 0.702472791975918, "grad_norm": 2.3482763803432123, "learning_rate": 1.5660038486209108e-06, "loss": 0.649, "step": 57640 }, { "epoch": 0.7025337282000658, "grad_norm": 2.535110053270014, "learning_rate": 1.5656831302116743e-06, "loss": 0.7371, "step": 57645 }, { "epoch": 0.7025946644242136, "grad_norm": 2.23330769491593, "learning_rate": 1.5653624118024377e-06, "loss": 0.7546, "step": 57650 }, { "epoch": 0.7026556006483614, "grad_norm": 2.358074976995224, "learning_rate": 1.5650416933932007e-06, "loss": 0.7275, "step": 57655 }, { "epoch": 0.7027165368725092, "grad_norm": 2.588946168901457, "learning_rate": 1.5647209749839642e-06, "loss": 0.7516, "step": 57660 }, { "epoch": 0.7027774730966571, "grad_norm": 2.069022431965486, "learning_rate": 1.5644002565747274e-06, "loss": 0.6881, "step": 57665 }, { "epoch": 0.7028384093208049, "grad_norm": 2.806893392234529, "learning_rate": 1.5640795381654908e-06, "loss": 0.7889, "step": 57670 }, { "epoch": 0.7028993455449527, "grad_norm": 3.0627940870793937, "learning_rate": 1.5637588197562543e-06, "loss": 0.7529, "step": 57675 }, { "epoch": 0.7029602817691004, "grad_norm": 3.109022058385921, "learning_rate": 1.5634381013470173e-06, "loss": 0.7582, "step": 57680 }, { "epoch": 0.7030212179932482, "grad_norm": 2.3957078949941937, "learning_rate": 1.5631173829377807e-06, "loss": 0.7188, "step": 57685 }, { "epoch": 0.7030821542173961, "grad_norm": 2.3814385656409374, "learning_rate": 1.5627966645285442e-06, "loss": 0.6918, "step": 57690 }, { "epoch": 0.7031430904415439, "grad_norm": 2.921565133111606, "learning_rate": 1.5624759461193074e-06, "loss": 0.7384, "step": 57695 }, { "epoch": 0.7032040266656917, "grad_norm": 2.477218394782602, "learning_rate": 1.5621552277100706e-06, "loss": 0.7436, "step": 57700 }, { "epoch": 0.7032649628898395, "grad_norm": 3.9034051367476996, "learning_rate": 1.561834509300834e-06, "loss": 0.8229, "step": 57705 }, { "epoch": 0.7033258991139874, "grad_norm": 2.757030473612866, "learning_rate": 1.5615137908915973e-06, "loss": 0.739, "step": 57710 }, { "epoch": 0.7033868353381351, "grad_norm": 2.427015881709765, "learning_rate": 1.5611930724823607e-06, "loss": 0.6668, "step": 57715 }, { "epoch": 0.7034477715622829, "grad_norm": 2.5310887622091154, "learning_rate": 1.5608723540731238e-06, "loss": 0.7371, "step": 57720 }, { "epoch": 0.7035087077864307, "grad_norm": 2.5939729867953543, "learning_rate": 1.5605516356638872e-06, "loss": 0.7512, "step": 57725 }, { "epoch": 0.7035696440105785, "grad_norm": 2.327631264547607, "learning_rate": 1.5602309172546506e-06, "loss": 0.7514, "step": 57730 }, { "epoch": 0.7036305802347264, "grad_norm": 2.041403807249697, "learning_rate": 1.5599101988454139e-06, "loss": 0.7547, "step": 57735 }, { "epoch": 0.7036915164588742, "grad_norm": 2.325854346501472, "learning_rate": 1.559589480436177e-06, "loss": 0.6898, "step": 57740 }, { "epoch": 0.703752452683022, "grad_norm": 2.1935643113575196, "learning_rate": 1.5592687620269405e-06, "loss": 0.6829, "step": 57745 }, { "epoch": 0.7038133889071697, "grad_norm": 2.7947313022037394, "learning_rate": 1.5589480436177038e-06, "loss": 0.6655, "step": 57750 }, { "epoch": 0.7038743251313175, "grad_norm": 3.0382334264686746, "learning_rate": 1.5586273252084672e-06, "loss": 0.8017, "step": 57755 }, { "epoch": 0.7039352613554654, "grad_norm": 2.712191964613111, "learning_rate": 1.5583066067992302e-06, "loss": 0.6934, "step": 57760 }, { "epoch": 0.7039961975796132, "grad_norm": 2.5023407256397037, "learning_rate": 1.5579858883899937e-06, "loss": 0.831, "step": 57765 }, { "epoch": 0.704057133803761, "grad_norm": 2.2152684121135544, "learning_rate": 1.557665169980757e-06, "loss": 0.6772, "step": 57770 }, { "epoch": 0.7041180700279088, "grad_norm": 2.7933393246074663, "learning_rate": 1.5573444515715203e-06, "loss": 0.7521, "step": 57775 }, { "epoch": 0.7041790062520566, "grad_norm": 2.505417412688458, "learning_rate": 1.5570237331622836e-06, "loss": 0.6949, "step": 57780 }, { "epoch": 0.7042399424762044, "grad_norm": 3.349558507694369, "learning_rate": 1.556703014753047e-06, "loss": 0.6813, "step": 57785 }, { "epoch": 0.7043008787003522, "grad_norm": 2.1424878054332064, "learning_rate": 1.5563822963438102e-06, "loss": 0.7267, "step": 57790 }, { "epoch": 0.7043618149245, "grad_norm": 2.630570051576993, "learning_rate": 1.5560615779345737e-06, "loss": 0.7278, "step": 57795 }, { "epoch": 0.7044227511486478, "grad_norm": 2.2150971057813944, "learning_rate": 1.5557408595253367e-06, "loss": 0.6893, "step": 57800 }, { "epoch": 0.7044836873727957, "grad_norm": 2.242303521373948, "learning_rate": 1.5554201411161001e-06, "loss": 0.7264, "step": 57805 }, { "epoch": 0.7045446235969435, "grad_norm": 2.503248100520644, "learning_rate": 1.5550994227068636e-06, "loss": 0.6919, "step": 57810 }, { "epoch": 0.7046055598210913, "grad_norm": 2.2591632263888033, "learning_rate": 1.5547787042976268e-06, "loss": 0.7094, "step": 57815 }, { "epoch": 0.704666496045239, "grad_norm": 2.050275135460917, "learning_rate": 1.5544579858883902e-06, "loss": 0.7215, "step": 57820 }, { "epoch": 0.7047274322693868, "grad_norm": 2.5330073627488794, "learning_rate": 1.5541372674791535e-06, "loss": 0.7562, "step": 57825 }, { "epoch": 0.7047883684935347, "grad_norm": 3.485675198324539, "learning_rate": 1.5538165490699167e-06, "loss": 0.7991, "step": 57830 }, { "epoch": 0.7048493047176825, "grad_norm": 2.7608091518065776, "learning_rate": 1.5534958306606801e-06, "loss": 0.7005, "step": 57835 }, { "epoch": 0.7049102409418303, "grad_norm": 2.561785227526992, "learning_rate": 1.5531751122514432e-06, "loss": 0.7459, "step": 57840 }, { "epoch": 0.7049711771659781, "grad_norm": 2.347935994374485, "learning_rate": 1.5528543938422066e-06, "loss": 0.7591, "step": 57845 }, { "epoch": 0.705032113390126, "grad_norm": 2.7680837523420707, "learning_rate": 1.55253367543297e-06, "loss": 0.7609, "step": 57850 }, { "epoch": 0.7050930496142737, "grad_norm": 2.5288837930207135, "learning_rate": 1.5522129570237333e-06, "loss": 0.6751, "step": 57855 }, { "epoch": 0.7051539858384215, "grad_norm": 1.9990470792953394, "learning_rate": 1.5518922386144967e-06, "loss": 0.7224, "step": 57860 }, { "epoch": 0.7052149220625693, "grad_norm": 2.2883595617222623, "learning_rate": 1.55157152020526e-06, "loss": 0.7357, "step": 57865 }, { "epoch": 0.7052758582867171, "grad_norm": 2.1562967746382817, "learning_rate": 1.5512508017960232e-06, "loss": 0.7377, "step": 57870 }, { "epoch": 0.705336794510865, "grad_norm": 2.54120905407812, "learning_rate": 1.5509300833867866e-06, "loss": 0.7079, "step": 57875 }, { "epoch": 0.7053977307350128, "grad_norm": 2.7597383977944974, "learning_rate": 1.5506093649775496e-06, "loss": 0.7495, "step": 57880 }, { "epoch": 0.7054586669591606, "grad_norm": 2.2724497120530236, "learning_rate": 1.550288646568313e-06, "loss": 0.671, "step": 57885 }, { "epoch": 0.7055196031833083, "grad_norm": 2.7736961055113665, "learning_rate": 1.5499679281590765e-06, "loss": 0.7956, "step": 57890 }, { "epoch": 0.7055805394074561, "grad_norm": 2.304000424362384, "learning_rate": 1.5496472097498397e-06, "loss": 0.6833, "step": 57895 }, { "epoch": 0.705641475631604, "grad_norm": 2.231887060807785, "learning_rate": 1.5493264913406032e-06, "loss": 0.726, "step": 57900 }, { "epoch": 0.7057024118557518, "grad_norm": 2.087703804074507, "learning_rate": 1.5490057729313666e-06, "loss": 0.6928, "step": 57905 }, { "epoch": 0.7057633480798996, "grad_norm": 2.7085975698934384, "learning_rate": 1.5486850545221296e-06, "loss": 0.6884, "step": 57910 }, { "epoch": 0.7058242843040474, "grad_norm": 2.685571274340077, "learning_rate": 1.548364336112893e-06, "loss": 0.7931, "step": 57915 }, { "epoch": 0.7058852205281952, "grad_norm": 2.3967867593669707, "learning_rate": 1.5480436177036563e-06, "loss": 0.69, "step": 57920 }, { "epoch": 0.705946156752343, "grad_norm": 2.245521977034121, "learning_rate": 1.5477228992944195e-06, "loss": 0.7475, "step": 57925 }, { "epoch": 0.7060070929764908, "grad_norm": 3.298531819613045, "learning_rate": 1.547402180885183e-06, "loss": 0.707, "step": 57930 }, { "epoch": 0.7060680292006386, "grad_norm": 2.1601209588416026, "learning_rate": 1.5470814624759462e-06, "loss": 0.6902, "step": 57935 }, { "epoch": 0.7061289654247864, "grad_norm": 2.257198125248643, "learning_rate": 1.5467607440667096e-06, "loss": 0.7149, "step": 57940 }, { "epoch": 0.7061899016489342, "grad_norm": 3.5844765983023303, "learning_rate": 1.546440025657473e-06, "loss": 0.7684, "step": 57945 }, { "epoch": 0.7062508378730821, "grad_norm": 2.11916700029123, "learning_rate": 1.546119307248236e-06, "loss": 0.7271, "step": 57950 }, { "epoch": 0.7063117740972299, "grad_norm": 3.234648541501327, "learning_rate": 1.5457985888389995e-06, "loss": 0.7506, "step": 57955 }, { "epoch": 0.7063727103213776, "grad_norm": 2.6212761364300756, "learning_rate": 1.5454778704297628e-06, "loss": 0.7258, "step": 57960 }, { "epoch": 0.7064336465455254, "grad_norm": 2.497907754751553, "learning_rate": 1.545157152020526e-06, "loss": 0.7511, "step": 57965 }, { "epoch": 0.7064945827696733, "grad_norm": 2.563263197844093, "learning_rate": 1.5448364336112894e-06, "loss": 0.7451, "step": 57970 }, { "epoch": 0.7065555189938211, "grad_norm": 2.7892478763103856, "learning_rate": 1.5445157152020527e-06, "loss": 0.7933, "step": 57975 }, { "epoch": 0.7066164552179689, "grad_norm": 3.33180255907908, "learning_rate": 1.544194996792816e-06, "loss": 0.7344, "step": 57980 }, { "epoch": 0.7066773914421167, "grad_norm": 2.4219406981396774, "learning_rate": 1.5438742783835795e-06, "loss": 0.7638, "step": 57985 }, { "epoch": 0.7067383276662645, "grad_norm": 4.097337119470088, "learning_rate": 1.5435535599743425e-06, "loss": 0.7648, "step": 57990 }, { "epoch": 0.7067992638904123, "grad_norm": 2.4026655545441433, "learning_rate": 1.543232841565106e-06, "loss": 0.7277, "step": 57995 }, { "epoch": 0.7068602001145601, "grad_norm": 2.461969814428612, "learning_rate": 1.5429121231558692e-06, "loss": 0.6448, "step": 58000 }, { "epoch": 0.7069211363387079, "grad_norm": 3.1917121739000898, "learning_rate": 1.5425914047466324e-06, "loss": 0.683, "step": 58005 }, { "epoch": 0.7069820725628557, "grad_norm": 2.9362419540849705, "learning_rate": 1.5422706863373959e-06, "loss": 0.737, "step": 58010 }, { "epoch": 0.7070430087870035, "grad_norm": 2.5317254130975697, "learning_rate": 1.5419499679281591e-06, "loss": 0.7217, "step": 58015 }, { "epoch": 0.7071039450111514, "grad_norm": 2.3908125758279795, "learning_rate": 1.5416292495189226e-06, "loss": 0.82, "step": 58020 }, { "epoch": 0.7071648812352991, "grad_norm": 2.768905905725015, "learning_rate": 1.541308531109686e-06, "loss": 0.7431, "step": 58025 }, { "epoch": 0.7072258174594469, "grad_norm": 2.214533823733123, "learning_rate": 1.540987812700449e-06, "loss": 0.7845, "step": 58030 }, { "epoch": 0.7072867536835947, "grad_norm": 2.239509541654177, "learning_rate": 1.5406670942912125e-06, "loss": 0.7835, "step": 58035 }, { "epoch": 0.7073476899077425, "grad_norm": 2.7519016275466197, "learning_rate": 1.5403463758819759e-06, "loss": 0.7681, "step": 58040 }, { "epoch": 0.7074086261318904, "grad_norm": 2.4671016458743966, "learning_rate": 1.5400256574727391e-06, "loss": 0.689, "step": 58045 }, { "epoch": 0.7074695623560382, "grad_norm": 3.7683206825481084, "learning_rate": 1.5397049390635024e-06, "loss": 0.7028, "step": 58050 }, { "epoch": 0.707530498580186, "grad_norm": 2.14699165449103, "learning_rate": 1.5393842206542656e-06, "loss": 0.7496, "step": 58055 }, { "epoch": 0.7075914348043337, "grad_norm": 2.4634963830026777, "learning_rate": 1.539063502245029e-06, "loss": 0.7719, "step": 58060 }, { "epoch": 0.7076523710284816, "grad_norm": 2.701579942497539, "learning_rate": 1.5387427838357925e-06, "loss": 0.7098, "step": 58065 }, { "epoch": 0.7077133072526294, "grad_norm": 2.4422442369440893, "learning_rate": 1.5384220654265555e-06, "loss": 0.7158, "step": 58070 }, { "epoch": 0.7077742434767772, "grad_norm": 2.4117183159332884, "learning_rate": 1.538101347017319e-06, "loss": 0.7766, "step": 58075 }, { "epoch": 0.707835179700925, "grad_norm": 2.9898202840945087, "learning_rate": 1.5377806286080824e-06, "loss": 0.6785, "step": 58080 }, { "epoch": 0.7078961159250728, "grad_norm": 2.8556401424498734, "learning_rate": 1.5374599101988456e-06, "loss": 0.7146, "step": 58085 }, { "epoch": 0.7079570521492207, "grad_norm": 3.3763029603225982, "learning_rate": 1.5371391917896088e-06, "loss": 0.7391, "step": 58090 }, { "epoch": 0.7080179883733684, "grad_norm": 2.6118111700072846, "learning_rate": 1.536818473380372e-06, "loss": 0.7731, "step": 58095 }, { "epoch": 0.7080789245975162, "grad_norm": 2.1214592011567954, "learning_rate": 1.5364977549711355e-06, "loss": 0.7017, "step": 58100 }, { "epoch": 0.708139860821664, "grad_norm": 3.4407880471228385, "learning_rate": 1.536177036561899e-06, "loss": 0.7346, "step": 58105 }, { "epoch": 0.7082007970458118, "grad_norm": 2.341678942611237, "learning_rate": 1.535856318152662e-06, "loss": 0.7311, "step": 58110 }, { "epoch": 0.7082617332699597, "grad_norm": 2.658171609538031, "learning_rate": 1.5355355997434254e-06, "loss": 0.8022, "step": 58115 }, { "epoch": 0.7083226694941075, "grad_norm": 2.7873904789005173, "learning_rate": 1.5352148813341888e-06, "loss": 0.7337, "step": 58120 }, { "epoch": 0.7083836057182553, "grad_norm": 1.870416324531172, "learning_rate": 1.534894162924952e-06, "loss": 0.7186, "step": 58125 }, { "epoch": 0.708444541942403, "grad_norm": 1.8712062721678746, "learning_rate": 1.5345734445157153e-06, "loss": 0.7048, "step": 58130 }, { "epoch": 0.7085054781665509, "grad_norm": 2.2090697941695057, "learning_rate": 1.5342527261064785e-06, "loss": 0.7116, "step": 58135 }, { "epoch": 0.7085664143906987, "grad_norm": 2.4378942545606153, "learning_rate": 1.533932007697242e-06, "loss": 0.7418, "step": 58140 }, { "epoch": 0.7086273506148465, "grad_norm": 2.531977623526489, "learning_rate": 1.5336112892880054e-06, "loss": 0.723, "step": 58145 }, { "epoch": 0.7086882868389943, "grad_norm": 3.768558597226149, "learning_rate": 1.5332905708787684e-06, "loss": 0.8157, "step": 58150 }, { "epoch": 0.7087492230631421, "grad_norm": 2.020312471676409, "learning_rate": 1.5329698524695318e-06, "loss": 0.718, "step": 58155 }, { "epoch": 0.70881015928729, "grad_norm": 2.637396472384777, "learning_rate": 1.5326491340602953e-06, "loss": 0.7216, "step": 58160 }, { "epoch": 0.7088710955114377, "grad_norm": 2.93311020011788, "learning_rate": 1.5323284156510585e-06, "loss": 0.7278, "step": 58165 }, { "epoch": 0.7089320317355855, "grad_norm": 2.3895665515759372, "learning_rate": 1.532007697241822e-06, "loss": 0.7377, "step": 58170 }, { "epoch": 0.7089929679597333, "grad_norm": 3.368896300436091, "learning_rate": 1.531686978832585e-06, "loss": 0.72, "step": 58175 }, { "epoch": 0.7090539041838811, "grad_norm": 2.9078505808638186, "learning_rate": 1.5313662604233484e-06, "loss": 0.7781, "step": 58180 }, { "epoch": 0.709114840408029, "grad_norm": 2.412929329985393, "learning_rate": 1.5310455420141119e-06, "loss": 0.7063, "step": 58185 }, { "epoch": 0.7091757766321768, "grad_norm": 2.1471933482937566, "learning_rate": 1.5307248236048749e-06, "loss": 0.7651, "step": 58190 }, { "epoch": 0.7092367128563246, "grad_norm": 2.3977085527087483, "learning_rate": 1.5304041051956383e-06, "loss": 0.719, "step": 58195 }, { "epoch": 0.7092976490804723, "grad_norm": 2.3724035220037267, "learning_rate": 1.5300833867864017e-06, "loss": 0.6769, "step": 58200 }, { "epoch": 0.7093585853046201, "grad_norm": 2.314136444391021, "learning_rate": 1.529762668377165e-06, "loss": 0.7441, "step": 58205 }, { "epoch": 0.709419521528768, "grad_norm": 2.0875156846109393, "learning_rate": 1.5294419499679284e-06, "loss": 0.6823, "step": 58210 }, { "epoch": 0.7094804577529158, "grad_norm": 2.4390545090481957, "learning_rate": 1.5291212315586914e-06, "loss": 0.7884, "step": 58215 }, { "epoch": 0.7095413939770636, "grad_norm": 2.4017881101641136, "learning_rate": 1.5288005131494549e-06, "loss": 0.7327, "step": 58220 }, { "epoch": 0.7096023302012114, "grad_norm": 2.2489341268761183, "learning_rate": 1.5284797947402183e-06, "loss": 0.6891, "step": 58225 }, { "epoch": 0.7096632664253593, "grad_norm": 2.8993518121222173, "learning_rate": 1.5281590763309813e-06, "loss": 0.762, "step": 58230 }, { "epoch": 0.709724202649507, "grad_norm": 2.776327070054498, "learning_rate": 1.5278383579217448e-06, "loss": 0.7962, "step": 58235 }, { "epoch": 0.7097851388736548, "grad_norm": 2.437755610589727, "learning_rate": 1.5275176395125082e-06, "loss": 0.7295, "step": 58240 }, { "epoch": 0.7098460750978026, "grad_norm": 2.854754873029237, "learning_rate": 1.5271969211032714e-06, "loss": 0.6681, "step": 58245 }, { "epoch": 0.7099070113219504, "grad_norm": 2.6462281088997246, "learning_rate": 1.5268762026940349e-06, "loss": 0.6852, "step": 58250 }, { "epoch": 0.7099679475460983, "grad_norm": 2.9864633656268804, "learning_rate": 1.526555484284798e-06, "loss": 0.7463, "step": 58255 }, { "epoch": 0.7100288837702461, "grad_norm": 3.029245829685408, "learning_rate": 1.5262347658755613e-06, "loss": 0.6734, "step": 58260 }, { "epoch": 0.7100898199943939, "grad_norm": 3.1903239263948273, "learning_rate": 1.5259140474663248e-06, "loss": 0.705, "step": 58265 }, { "epoch": 0.7101507562185416, "grad_norm": 2.1298540542436313, "learning_rate": 1.525593329057088e-06, "loss": 0.682, "step": 58270 }, { "epoch": 0.7102116924426894, "grad_norm": 2.461281083083398, "learning_rate": 1.5252726106478512e-06, "loss": 0.7193, "step": 58275 }, { "epoch": 0.7102726286668373, "grad_norm": 3.1087992007554615, "learning_rate": 1.5249518922386147e-06, "loss": 0.7278, "step": 58280 }, { "epoch": 0.7103335648909851, "grad_norm": 2.1387761270566576, "learning_rate": 1.524631173829378e-06, "loss": 0.6658, "step": 58285 }, { "epoch": 0.7103945011151329, "grad_norm": 2.0538107662862055, "learning_rate": 1.5243104554201413e-06, "loss": 0.7313, "step": 58290 }, { "epoch": 0.7104554373392807, "grad_norm": 2.2622002752954384, "learning_rate": 1.5239897370109044e-06, "loss": 0.7713, "step": 58295 }, { "epoch": 0.7105163735634286, "grad_norm": 2.194452047050887, "learning_rate": 1.5236690186016678e-06, "loss": 0.7394, "step": 58300 }, { "epoch": 0.7105773097875763, "grad_norm": 2.4824358849822654, "learning_rate": 1.5233483001924312e-06, "loss": 0.5982, "step": 58305 }, { "epoch": 0.7106382460117241, "grad_norm": 2.6407364087714584, "learning_rate": 1.5230275817831945e-06, "loss": 0.7498, "step": 58310 }, { "epoch": 0.7106991822358719, "grad_norm": 2.3707679766014103, "learning_rate": 1.5227068633739577e-06, "loss": 0.6979, "step": 58315 }, { "epoch": 0.7107601184600197, "grad_norm": 2.790678105013251, "learning_rate": 1.5223861449647211e-06, "loss": 0.7103, "step": 58320 }, { "epoch": 0.7108210546841676, "grad_norm": 2.7534972616247355, "learning_rate": 1.5220654265554844e-06, "loss": 0.7235, "step": 58325 }, { "epoch": 0.7108819909083154, "grad_norm": 2.4477806767690526, "learning_rate": 1.5217447081462478e-06, "loss": 0.7177, "step": 58330 }, { "epoch": 0.7109429271324632, "grad_norm": 2.801351629088899, "learning_rate": 1.5214239897370112e-06, "loss": 0.7225, "step": 58335 }, { "epoch": 0.7110038633566109, "grad_norm": 2.3602840274652803, "learning_rate": 1.5211032713277743e-06, "loss": 0.7658, "step": 58340 }, { "epoch": 0.7110647995807587, "grad_norm": 3.3078709041902115, "learning_rate": 1.5207825529185377e-06, "loss": 0.8342, "step": 58345 }, { "epoch": 0.7111257358049066, "grad_norm": 2.6728484456262067, "learning_rate": 1.520461834509301e-06, "loss": 0.6897, "step": 58350 }, { "epoch": 0.7111866720290544, "grad_norm": 3.1722536556336496, "learning_rate": 1.5201411161000642e-06, "loss": 0.6824, "step": 58355 }, { "epoch": 0.7112476082532022, "grad_norm": 2.5700989030407038, "learning_rate": 1.5198203976908276e-06, "loss": 0.6277, "step": 58360 }, { "epoch": 0.71130854447735, "grad_norm": 1.9237580762624464, "learning_rate": 1.5194996792815908e-06, "loss": 0.7517, "step": 58365 }, { "epoch": 0.7113694807014979, "grad_norm": 2.5119579723979033, "learning_rate": 1.5191789608723543e-06, "loss": 0.692, "step": 58370 }, { "epoch": 0.7114304169256456, "grad_norm": 2.3573626137250403, "learning_rate": 1.5188582424631177e-06, "loss": 0.6764, "step": 58375 }, { "epoch": 0.7114913531497934, "grad_norm": 3.16704607168045, "learning_rate": 1.5185375240538807e-06, "loss": 0.8438, "step": 58380 }, { "epoch": 0.7115522893739412, "grad_norm": 2.58307379483025, "learning_rate": 1.5182168056446442e-06, "loss": 0.7313, "step": 58385 }, { "epoch": 0.711613225598089, "grad_norm": 2.6992273186105913, "learning_rate": 1.5178960872354074e-06, "loss": 0.689, "step": 58390 }, { "epoch": 0.7116741618222369, "grad_norm": 3.360897917314299, "learning_rate": 1.5175753688261708e-06, "loss": 0.7027, "step": 58395 }, { "epoch": 0.7117350980463847, "grad_norm": 2.222650091352564, "learning_rate": 1.517254650416934e-06, "loss": 0.7483, "step": 58400 }, { "epoch": 0.7117960342705325, "grad_norm": 3.142649444215901, "learning_rate": 1.5169339320076973e-06, "loss": 0.7504, "step": 58405 }, { "epoch": 0.7118569704946802, "grad_norm": 2.6189218560025065, "learning_rate": 1.5166132135984607e-06, "loss": 0.7337, "step": 58410 }, { "epoch": 0.711917906718828, "grad_norm": 2.441775076034765, "learning_rate": 1.5162924951892242e-06, "loss": 0.75, "step": 58415 }, { "epoch": 0.7119788429429759, "grad_norm": 2.7923738160367093, "learning_rate": 1.5159717767799872e-06, "loss": 0.7026, "step": 58420 }, { "epoch": 0.7120397791671237, "grad_norm": 2.6854738379756586, "learning_rate": 1.5156510583707506e-06, "loss": 0.7139, "step": 58425 }, { "epoch": 0.7121007153912715, "grad_norm": 2.611782772332096, "learning_rate": 1.5153303399615139e-06, "loss": 0.7299, "step": 58430 }, { "epoch": 0.7121616516154193, "grad_norm": 2.5728916655186, "learning_rate": 1.5150096215522773e-06, "loss": 0.7108, "step": 58435 }, { "epoch": 0.7122225878395672, "grad_norm": 2.4455003684237835, "learning_rate": 1.5146889031430405e-06, "loss": 0.7571, "step": 58440 }, { "epoch": 0.7122835240637149, "grad_norm": 2.9248902778982773, "learning_rate": 1.5143681847338038e-06, "loss": 0.782, "step": 58445 }, { "epoch": 0.7123444602878627, "grad_norm": 4.05854256714975, "learning_rate": 1.5140474663245672e-06, "loss": 0.7169, "step": 58450 }, { "epoch": 0.7124053965120105, "grad_norm": 2.6805656926021775, "learning_rate": 1.5137267479153306e-06, "loss": 0.7321, "step": 58455 }, { "epoch": 0.7124663327361583, "grad_norm": 3.238605152388739, "learning_rate": 1.5134060295060937e-06, "loss": 0.7406, "step": 58460 }, { "epoch": 0.7125272689603062, "grad_norm": 2.6044717988676065, "learning_rate": 1.513085311096857e-06, "loss": 0.7012, "step": 58465 }, { "epoch": 0.712588205184454, "grad_norm": 2.143074822860324, "learning_rate": 1.5127645926876203e-06, "loss": 0.6859, "step": 58470 }, { "epoch": 0.7126491414086018, "grad_norm": 2.5948139814423086, "learning_rate": 1.5124438742783838e-06, "loss": 0.8132, "step": 58475 }, { "epoch": 0.7127100776327495, "grad_norm": 2.5531001720068494, "learning_rate": 1.512123155869147e-06, "loss": 0.6441, "step": 58480 }, { "epoch": 0.7127710138568973, "grad_norm": 2.6119892251884322, "learning_rate": 1.5118024374599102e-06, "loss": 0.7366, "step": 58485 }, { "epoch": 0.7128319500810452, "grad_norm": 3.6260457887173847, "learning_rate": 1.5114817190506737e-06, "loss": 0.7671, "step": 58490 }, { "epoch": 0.712892886305193, "grad_norm": 3.204407499426421, "learning_rate": 1.511161000641437e-06, "loss": 0.688, "step": 58495 }, { "epoch": 0.7129538225293408, "grad_norm": 3.072420670908098, "learning_rate": 1.5108402822322001e-06, "loss": 0.7103, "step": 58500 }, { "epoch": 0.7130147587534886, "grad_norm": 2.4176850037504765, "learning_rate": 1.5105195638229636e-06, "loss": 0.7903, "step": 58505 }, { "epoch": 0.7130756949776365, "grad_norm": 2.3589901718920387, "learning_rate": 1.5101988454137268e-06, "loss": 0.6864, "step": 58510 }, { "epoch": 0.7131366312017842, "grad_norm": 2.4452853625166244, "learning_rate": 1.5098781270044902e-06, "loss": 0.7368, "step": 58515 }, { "epoch": 0.713197567425932, "grad_norm": 2.115054809985914, "learning_rate": 1.5095574085952537e-06, "loss": 0.6256, "step": 58520 }, { "epoch": 0.7132585036500798, "grad_norm": 2.2781932541848295, "learning_rate": 1.5092366901860167e-06, "loss": 0.6974, "step": 58525 }, { "epoch": 0.7133194398742276, "grad_norm": 2.166029254752752, "learning_rate": 1.5089159717767801e-06, "loss": 0.7228, "step": 58530 }, { "epoch": 0.7133803760983755, "grad_norm": 2.7083557022753046, "learning_rate": 1.5085952533675436e-06, "loss": 0.7864, "step": 58535 }, { "epoch": 0.7134413123225233, "grad_norm": 2.8181546942879208, "learning_rate": 1.5082745349583066e-06, "loss": 0.7301, "step": 58540 }, { "epoch": 0.7135022485466711, "grad_norm": 3.262177938234618, "learning_rate": 1.50795381654907e-06, "loss": 0.7605, "step": 58545 }, { "epoch": 0.7135631847708188, "grad_norm": 2.4868847556842786, "learning_rate": 1.5076330981398333e-06, "loss": 0.7219, "step": 58550 }, { "epoch": 0.7136241209949666, "grad_norm": 2.5376845524862244, "learning_rate": 1.5073123797305967e-06, "loss": 0.8802, "step": 58555 }, { "epoch": 0.7136850572191145, "grad_norm": 2.964453970886087, "learning_rate": 1.5069916613213601e-06, "loss": 0.7447, "step": 58560 }, { "epoch": 0.7137459934432623, "grad_norm": 4.296533374135128, "learning_rate": 1.5066709429121231e-06, "loss": 0.8187, "step": 58565 }, { "epoch": 0.7138069296674101, "grad_norm": 2.241532318410616, "learning_rate": 1.5063502245028866e-06, "loss": 0.7633, "step": 58570 }, { "epoch": 0.7138678658915579, "grad_norm": 3.0900432700478597, "learning_rate": 1.50602950609365e-06, "loss": 0.7412, "step": 58575 }, { "epoch": 0.7139288021157058, "grad_norm": 3.3888404521238464, "learning_rate": 1.505708787684413e-06, "loss": 0.7551, "step": 58580 }, { "epoch": 0.7139897383398535, "grad_norm": 3.08486358932676, "learning_rate": 1.5053880692751765e-06, "loss": 0.6765, "step": 58585 }, { "epoch": 0.7140506745640013, "grad_norm": 2.12098291867548, "learning_rate": 1.5050673508659397e-06, "loss": 0.6226, "step": 58590 }, { "epoch": 0.7141116107881491, "grad_norm": 2.052233929616671, "learning_rate": 1.5047466324567032e-06, "loss": 0.709, "step": 58595 }, { "epoch": 0.7141725470122969, "grad_norm": 2.6047513986686686, "learning_rate": 1.5044259140474666e-06, "loss": 0.6985, "step": 58600 }, { "epoch": 0.7142334832364448, "grad_norm": 2.333058688751736, "learning_rate": 1.5041051956382296e-06, "loss": 0.717, "step": 58605 }, { "epoch": 0.7142944194605926, "grad_norm": 2.3773497547915383, "learning_rate": 1.503784477228993e-06, "loss": 0.726, "step": 58610 }, { "epoch": 0.7143553556847404, "grad_norm": 2.8558066427797666, "learning_rate": 1.5034637588197565e-06, "loss": 0.7042, "step": 58615 }, { "epoch": 0.7144162919088881, "grad_norm": 3.01463677973223, "learning_rate": 1.5031430404105197e-06, "loss": 0.793, "step": 58620 }, { "epoch": 0.7144772281330359, "grad_norm": 2.8005677096894663, "learning_rate": 1.502822322001283e-06, "loss": 0.7817, "step": 58625 }, { "epoch": 0.7145381643571838, "grad_norm": 2.841168315523098, "learning_rate": 1.5025016035920464e-06, "loss": 0.7124, "step": 58630 }, { "epoch": 0.7145991005813316, "grad_norm": 2.423977146800525, "learning_rate": 1.5021808851828096e-06, "loss": 0.7266, "step": 58635 }, { "epoch": 0.7146600368054794, "grad_norm": 2.5983894518296258, "learning_rate": 1.501860166773573e-06, "loss": 0.7281, "step": 58640 }, { "epoch": 0.7147209730296272, "grad_norm": 2.842731324380239, "learning_rate": 1.501539448364336e-06, "loss": 0.7747, "step": 58645 }, { "epoch": 0.714781909253775, "grad_norm": 2.898531762947206, "learning_rate": 1.5012187299550995e-06, "loss": 0.6913, "step": 58650 }, { "epoch": 0.7148428454779228, "grad_norm": 2.0157199268118164, "learning_rate": 1.500898011545863e-06, "loss": 0.6806, "step": 58655 }, { "epoch": 0.7149037817020706, "grad_norm": 2.5915921775161364, "learning_rate": 1.5005772931366262e-06, "loss": 0.7736, "step": 58660 }, { "epoch": 0.7149647179262184, "grad_norm": 2.157646827320466, "learning_rate": 1.5002565747273894e-06, "loss": 0.7531, "step": 58665 }, { "epoch": 0.7150256541503662, "grad_norm": 2.5076428268628654, "learning_rate": 1.4999358563181529e-06, "loss": 0.779, "step": 58670 }, { "epoch": 0.715086590374514, "grad_norm": 2.627103232495889, "learning_rate": 1.499615137908916e-06, "loss": 0.7134, "step": 58675 }, { "epoch": 0.7151475265986619, "grad_norm": 2.885810694021646, "learning_rate": 1.4992944194996795e-06, "loss": 0.7693, "step": 58680 }, { "epoch": 0.7152084628228097, "grad_norm": 2.4335885789141742, "learning_rate": 1.4989737010904425e-06, "loss": 0.6907, "step": 58685 }, { "epoch": 0.7152693990469574, "grad_norm": 2.5621338503796616, "learning_rate": 1.498652982681206e-06, "loss": 0.7013, "step": 58690 }, { "epoch": 0.7153303352711052, "grad_norm": 2.24005549135812, "learning_rate": 1.4983322642719694e-06, "loss": 0.7271, "step": 58695 }, { "epoch": 0.7153912714952531, "grad_norm": 2.0617550898789525, "learning_rate": 1.4980115458627326e-06, "loss": 0.6736, "step": 58700 }, { "epoch": 0.7154522077194009, "grad_norm": 3.632195185004735, "learning_rate": 1.4976908274534959e-06, "loss": 0.781, "step": 58705 }, { "epoch": 0.7155131439435487, "grad_norm": 2.5036858899736973, "learning_rate": 1.4973701090442593e-06, "loss": 0.7107, "step": 58710 }, { "epoch": 0.7155740801676965, "grad_norm": 3.7776651574532227, "learning_rate": 1.4970493906350225e-06, "loss": 0.7024, "step": 58715 }, { "epoch": 0.7156350163918443, "grad_norm": 2.6089965586321124, "learning_rate": 1.496728672225786e-06, "loss": 0.7452, "step": 58720 }, { "epoch": 0.7156959526159921, "grad_norm": 2.4909025523261077, "learning_rate": 1.496407953816549e-06, "loss": 0.7321, "step": 58725 }, { "epoch": 0.7157568888401399, "grad_norm": 3.2021436348579373, "learning_rate": 1.4960872354073124e-06, "loss": 0.8251, "step": 58730 }, { "epoch": 0.7158178250642877, "grad_norm": 2.2947876355073493, "learning_rate": 1.4957665169980759e-06, "loss": 0.7614, "step": 58735 }, { "epoch": 0.7158787612884355, "grad_norm": 2.095388218024777, "learning_rate": 1.4954457985888391e-06, "loss": 0.7163, "step": 58740 }, { "epoch": 0.7159396975125834, "grad_norm": 2.076504057913314, "learning_rate": 1.4951250801796026e-06, "loss": 0.722, "step": 58745 }, { "epoch": 0.7160006337367312, "grad_norm": 2.9140976268809973, "learning_rate": 1.4948043617703658e-06, "loss": 0.7485, "step": 58750 }, { "epoch": 0.716061569960879, "grad_norm": 2.23290156215429, "learning_rate": 1.494483643361129e-06, "loss": 0.7961, "step": 58755 }, { "epoch": 0.7161225061850267, "grad_norm": 4.530124180135913, "learning_rate": 1.4941629249518924e-06, "loss": 0.7486, "step": 58760 }, { "epoch": 0.7161834424091745, "grad_norm": 2.8100697396393985, "learning_rate": 1.4938422065426555e-06, "loss": 0.7975, "step": 58765 }, { "epoch": 0.7162443786333224, "grad_norm": 4.41349042683286, "learning_rate": 1.493521488133419e-06, "loss": 0.7535, "step": 58770 }, { "epoch": 0.7163053148574702, "grad_norm": 2.1819054910378317, "learning_rate": 1.4932007697241823e-06, "loss": 0.6668, "step": 58775 }, { "epoch": 0.716366251081618, "grad_norm": 2.390698648222837, "learning_rate": 1.4928800513149456e-06, "loss": 0.7482, "step": 58780 }, { "epoch": 0.7164271873057658, "grad_norm": 2.60003416849732, "learning_rate": 1.492559332905709e-06, "loss": 0.7326, "step": 58785 }, { "epoch": 0.7164881235299136, "grad_norm": 2.6419760479380328, "learning_rate": 1.4922386144964722e-06, "loss": 0.7827, "step": 58790 }, { "epoch": 0.7165490597540614, "grad_norm": 2.636282526732174, "learning_rate": 1.4919178960872355e-06, "loss": 0.6455, "step": 58795 }, { "epoch": 0.7166099959782092, "grad_norm": 3.2585479318994417, "learning_rate": 1.491597177677999e-06, "loss": 0.7997, "step": 58800 }, { "epoch": 0.716670932202357, "grad_norm": 2.316456741637633, "learning_rate": 1.491276459268762e-06, "loss": 0.7169, "step": 58805 }, { "epoch": 0.7167318684265048, "grad_norm": 3.3018842231629506, "learning_rate": 1.4909557408595254e-06, "loss": 0.6489, "step": 58810 }, { "epoch": 0.7167928046506526, "grad_norm": 2.951001288777366, "learning_rate": 1.4906350224502888e-06, "loss": 0.6719, "step": 58815 }, { "epoch": 0.7168537408748005, "grad_norm": 2.777595847705651, "learning_rate": 1.490314304041052e-06, "loss": 0.6946, "step": 58820 }, { "epoch": 0.7169146770989483, "grad_norm": 2.1666596697430593, "learning_rate": 1.4899935856318155e-06, "loss": 0.7182, "step": 58825 }, { "epoch": 0.716975613323096, "grad_norm": 2.0477809509512017, "learning_rate": 1.4896728672225787e-06, "loss": 0.7554, "step": 58830 }, { "epoch": 0.7170365495472438, "grad_norm": 2.502082619108063, "learning_rate": 1.489352148813342e-06, "loss": 0.7299, "step": 58835 }, { "epoch": 0.7170974857713917, "grad_norm": 2.2742044675285373, "learning_rate": 1.4890314304041054e-06, "loss": 0.7476, "step": 58840 }, { "epoch": 0.7171584219955395, "grad_norm": 3.556491306592711, "learning_rate": 1.4887107119948684e-06, "loss": 0.7116, "step": 58845 }, { "epoch": 0.7172193582196873, "grad_norm": 2.0527456850286305, "learning_rate": 1.4883899935856318e-06, "loss": 0.7795, "step": 58850 }, { "epoch": 0.7172802944438351, "grad_norm": 2.1674054428945544, "learning_rate": 1.4880692751763953e-06, "loss": 0.7153, "step": 58855 }, { "epoch": 0.7173412306679829, "grad_norm": 2.3664848241697496, "learning_rate": 1.4877485567671585e-06, "loss": 0.726, "step": 58860 }, { "epoch": 0.7174021668921307, "grad_norm": 2.1091437399714827, "learning_rate": 1.487427838357922e-06, "loss": 0.7801, "step": 58865 }, { "epoch": 0.7174631031162785, "grad_norm": 2.3777069866023126, "learning_rate": 1.4871071199486854e-06, "loss": 0.7891, "step": 58870 }, { "epoch": 0.7175240393404263, "grad_norm": 2.4671253201869434, "learning_rate": 1.4867864015394484e-06, "loss": 0.7386, "step": 58875 }, { "epoch": 0.7175849755645741, "grad_norm": 2.7238757118384056, "learning_rate": 1.4864656831302118e-06, "loss": 0.7925, "step": 58880 }, { "epoch": 0.717645911788722, "grad_norm": 2.678028181613, "learning_rate": 1.486144964720975e-06, "loss": 0.7207, "step": 58885 }, { "epoch": 0.7177068480128698, "grad_norm": 2.3473047280899726, "learning_rate": 1.4858242463117383e-06, "loss": 0.7023, "step": 58890 }, { "epoch": 0.7177677842370176, "grad_norm": 2.30450351754141, "learning_rate": 1.4855035279025017e-06, "loss": 0.6938, "step": 58895 }, { "epoch": 0.7178287204611653, "grad_norm": 2.413422695432303, "learning_rate": 1.485182809493265e-06, "loss": 0.7401, "step": 58900 }, { "epoch": 0.7178896566853131, "grad_norm": 2.284015892201991, "learning_rate": 1.4848620910840284e-06, "loss": 0.6981, "step": 58905 }, { "epoch": 0.717950592909461, "grad_norm": 2.641582427571534, "learning_rate": 1.4845413726747918e-06, "loss": 0.768, "step": 58910 }, { "epoch": 0.7180115291336088, "grad_norm": 2.9136666771273827, "learning_rate": 1.4842206542655549e-06, "loss": 0.7596, "step": 58915 }, { "epoch": 0.7180724653577566, "grad_norm": 2.843368075398297, "learning_rate": 1.4838999358563183e-06, "loss": 0.7474, "step": 58920 }, { "epoch": 0.7181334015819044, "grad_norm": 3.510249650163063, "learning_rate": 1.4835792174470817e-06, "loss": 0.7034, "step": 58925 }, { "epoch": 0.7181943378060522, "grad_norm": 2.292829716771673, "learning_rate": 1.4832584990378448e-06, "loss": 0.7395, "step": 58930 }, { "epoch": 0.7182552740302, "grad_norm": 2.2983714601929996, "learning_rate": 1.4829377806286082e-06, "loss": 0.6614, "step": 58935 }, { "epoch": 0.7183162102543478, "grad_norm": 2.495753525233701, "learning_rate": 1.4826170622193714e-06, "loss": 0.7201, "step": 58940 }, { "epoch": 0.7183771464784956, "grad_norm": 2.415576128325486, "learning_rate": 1.4822963438101349e-06, "loss": 0.681, "step": 58945 }, { "epoch": 0.7184380827026434, "grad_norm": 2.5297085028468795, "learning_rate": 1.4819756254008983e-06, "loss": 0.6962, "step": 58950 }, { "epoch": 0.7184990189267912, "grad_norm": 2.516694976154019, "learning_rate": 1.4816549069916613e-06, "loss": 0.7877, "step": 58955 }, { "epoch": 0.7185599551509391, "grad_norm": 2.4459229543288035, "learning_rate": 1.4813341885824248e-06, "loss": 0.772, "step": 58960 }, { "epoch": 0.7186208913750868, "grad_norm": 2.7661807688417115, "learning_rate": 1.4810134701731882e-06, "loss": 0.7293, "step": 58965 }, { "epoch": 0.7186818275992346, "grad_norm": 2.8451582892377627, "learning_rate": 1.4806927517639514e-06, "loss": 0.7365, "step": 58970 }, { "epoch": 0.7187427638233824, "grad_norm": 2.3346832526684826, "learning_rate": 1.4803720333547147e-06, "loss": 0.6856, "step": 58975 }, { "epoch": 0.7188037000475302, "grad_norm": 2.73154035142005, "learning_rate": 1.4800513149454779e-06, "loss": 0.8121, "step": 58980 }, { "epoch": 0.7188646362716781, "grad_norm": 2.2596092578539246, "learning_rate": 1.4797305965362413e-06, "loss": 0.7269, "step": 58985 }, { "epoch": 0.7189255724958259, "grad_norm": 2.8163480108859837, "learning_rate": 1.4794098781270048e-06, "loss": 0.7247, "step": 58990 }, { "epoch": 0.7189865087199737, "grad_norm": 2.768906984777267, "learning_rate": 1.4790891597177678e-06, "loss": 0.7483, "step": 58995 }, { "epoch": 0.7190474449441214, "grad_norm": 2.1565561627005545, "learning_rate": 1.4787684413085312e-06, "loss": 0.6511, "step": 59000 }, { "epoch": 0.7191083811682693, "grad_norm": 2.177236526195593, "learning_rate": 1.4784477228992947e-06, "loss": 0.7107, "step": 59005 }, { "epoch": 0.7191693173924171, "grad_norm": 3.1589919849342967, "learning_rate": 1.478127004490058e-06, "loss": 0.7889, "step": 59010 }, { "epoch": 0.7192302536165649, "grad_norm": 2.7113132153020523, "learning_rate": 1.4778062860808211e-06, "loss": 0.6959, "step": 59015 }, { "epoch": 0.7192911898407127, "grad_norm": 2.498940238144874, "learning_rate": 1.4774855676715844e-06, "loss": 0.6318, "step": 59020 }, { "epoch": 0.7193521260648605, "grad_norm": 2.2677641447226886, "learning_rate": 1.4771648492623478e-06, "loss": 0.7253, "step": 59025 }, { "epoch": 0.7194130622890084, "grad_norm": 2.453823171373294, "learning_rate": 1.4768441308531112e-06, "loss": 0.7333, "step": 59030 }, { "epoch": 0.7194739985131561, "grad_norm": 2.901313296465961, "learning_rate": 1.4765234124438743e-06, "loss": 0.7146, "step": 59035 }, { "epoch": 0.7195349347373039, "grad_norm": 2.1987334098109046, "learning_rate": 1.4762026940346377e-06, "loss": 0.744, "step": 59040 }, { "epoch": 0.7195958709614517, "grad_norm": 2.777840507684233, "learning_rate": 1.4758819756254011e-06, "loss": 0.7006, "step": 59045 }, { "epoch": 0.7196568071855995, "grad_norm": 2.3207667387173534, "learning_rate": 1.4755612572161644e-06, "loss": 0.7663, "step": 59050 }, { "epoch": 0.7197177434097474, "grad_norm": 2.632625131905095, "learning_rate": 1.4752405388069276e-06, "loss": 0.7796, "step": 59055 }, { "epoch": 0.7197786796338952, "grad_norm": 2.7557160886603906, "learning_rate": 1.4749198203976908e-06, "loss": 0.8058, "step": 59060 }, { "epoch": 0.719839615858043, "grad_norm": 2.9382454947621626, "learning_rate": 1.4745991019884543e-06, "loss": 0.667, "step": 59065 }, { "epoch": 0.7199005520821907, "grad_norm": 2.86942365162214, "learning_rate": 1.4742783835792177e-06, "loss": 0.7013, "step": 59070 }, { "epoch": 0.7199614883063385, "grad_norm": 2.4646178572351736, "learning_rate": 1.4739576651699807e-06, "loss": 0.6877, "step": 59075 }, { "epoch": 0.7200224245304864, "grad_norm": 2.462394976803721, "learning_rate": 1.4736369467607442e-06, "loss": 0.6923, "step": 59080 }, { "epoch": 0.7200833607546342, "grad_norm": 2.4370244044496716, "learning_rate": 1.4733162283515076e-06, "loss": 0.6984, "step": 59085 }, { "epoch": 0.720144296978782, "grad_norm": 2.3854403129218436, "learning_rate": 1.4729955099422708e-06, "loss": 0.7925, "step": 59090 }, { "epoch": 0.7202052332029298, "grad_norm": 2.518475513911601, "learning_rate": 1.4726747915330343e-06, "loss": 0.7387, "step": 59095 }, { "epoch": 0.7202661694270777, "grad_norm": 2.9424335293101387, "learning_rate": 1.4723540731237973e-06, "loss": 0.7419, "step": 59100 }, { "epoch": 0.7203271056512254, "grad_norm": 2.139244681495111, "learning_rate": 1.4720333547145607e-06, "loss": 0.7379, "step": 59105 }, { "epoch": 0.7203880418753732, "grad_norm": 2.3968248933324063, "learning_rate": 1.4717126363053242e-06, "loss": 0.6664, "step": 59110 }, { "epoch": 0.720448978099521, "grad_norm": 2.306308914420859, "learning_rate": 1.4713919178960872e-06, "loss": 0.7733, "step": 59115 }, { "epoch": 0.7205099143236688, "grad_norm": 2.4344449773061783, "learning_rate": 1.4710711994868506e-06, "loss": 0.7447, "step": 59120 }, { "epoch": 0.7205708505478167, "grad_norm": 2.3849822522342077, "learning_rate": 1.470750481077614e-06, "loss": 0.7384, "step": 59125 }, { "epoch": 0.7206317867719645, "grad_norm": 2.720836211660135, "learning_rate": 1.4704297626683773e-06, "loss": 0.6926, "step": 59130 }, { "epoch": 0.7206927229961123, "grad_norm": 2.5013532707403514, "learning_rate": 1.4701090442591407e-06, "loss": 0.7907, "step": 59135 }, { "epoch": 0.72075365922026, "grad_norm": 4.481140661298618, "learning_rate": 1.4697883258499037e-06, "loss": 0.8061, "step": 59140 }, { "epoch": 0.7208145954444078, "grad_norm": 2.54608568347152, "learning_rate": 1.4694676074406672e-06, "loss": 0.7117, "step": 59145 }, { "epoch": 0.7208755316685557, "grad_norm": 2.3424978506039866, "learning_rate": 1.4691468890314306e-06, "loss": 0.6586, "step": 59150 }, { "epoch": 0.7209364678927035, "grad_norm": 2.313121811000467, "learning_rate": 1.4688261706221936e-06, "loss": 0.7149, "step": 59155 }, { "epoch": 0.7209974041168513, "grad_norm": 2.7352385167579913, "learning_rate": 1.468505452212957e-06, "loss": 0.6966, "step": 59160 }, { "epoch": 0.7210583403409991, "grad_norm": 2.2141320112704292, "learning_rate": 1.4681847338037205e-06, "loss": 0.7533, "step": 59165 }, { "epoch": 0.721119276565147, "grad_norm": 2.42733885311004, "learning_rate": 1.4678640153944838e-06, "loss": 0.7343, "step": 59170 }, { "epoch": 0.7211802127892947, "grad_norm": 2.2058693112345047, "learning_rate": 1.4675432969852472e-06, "loss": 0.7285, "step": 59175 }, { "epoch": 0.7212411490134425, "grad_norm": 2.1386131917851214, "learning_rate": 1.4672225785760102e-06, "loss": 0.7406, "step": 59180 }, { "epoch": 0.7213020852375903, "grad_norm": 2.193933568576336, "learning_rate": 1.4669018601667737e-06, "loss": 0.6795, "step": 59185 }, { "epoch": 0.7213630214617381, "grad_norm": 2.2328696132545582, "learning_rate": 1.466581141757537e-06, "loss": 0.7031, "step": 59190 }, { "epoch": 0.721423957685886, "grad_norm": 2.554512567788799, "learning_rate": 1.4662604233483001e-06, "loss": 0.6788, "step": 59195 }, { "epoch": 0.7214848939100338, "grad_norm": 2.662941103192533, "learning_rate": 1.4659397049390635e-06, "loss": 0.6846, "step": 59200 }, { "epoch": 0.7215458301341816, "grad_norm": 2.5053647484965875, "learning_rate": 1.465618986529827e-06, "loss": 0.7659, "step": 59205 }, { "epoch": 0.7216067663583293, "grad_norm": 2.7559748395143955, "learning_rate": 1.4652982681205902e-06, "loss": 0.7583, "step": 59210 }, { "epoch": 0.7216677025824771, "grad_norm": 1.8427480804557959, "learning_rate": 1.4649775497113537e-06, "loss": 0.7204, "step": 59215 }, { "epoch": 0.721728638806625, "grad_norm": 3.0498480246131403, "learning_rate": 1.464656831302117e-06, "loss": 0.8184, "step": 59220 }, { "epoch": 0.7217895750307728, "grad_norm": 2.6928807802484194, "learning_rate": 1.4643361128928801e-06, "loss": 0.7491, "step": 59225 }, { "epoch": 0.7218505112549206, "grad_norm": 2.055525441051212, "learning_rate": 1.4640153944836436e-06, "loss": 0.7052, "step": 59230 }, { "epoch": 0.7219114474790684, "grad_norm": 2.857774202540626, "learning_rate": 1.4636946760744068e-06, "loss": 0.7341, "step": 59235 }, { "epoch": 0.7219723837032163, "grad_norm": 2.3587165574286995, "learning_rate": 1.46337395766517e-06, "loss": 0.765, "step": 59240 }, { "epoch": 0.722033319927364, "grad_norm": 2.7218739497340896, "learning_rate": 1.4630532392559335e-06, "loss": 0.757, "step": 59245 }, { "epoch": 0.7220942561515118, "grad_norm": 2.673858622403918, "learning_rate": 1.4627325208466967e-06, "loss": 0.7158, "step": 59250 }, { "epoch": 0.7221551923756596, "grad_norm": 2.917359257458856, "learning_rate": 1.4624118024374601e-06, "loss": 0.7504, "step": 59255 }, { "epoch": 0.7222161285998074, "grad_norm": 2.370285292629086, "learning_rate": 1.4620910840282236e-06, "loss": 0.7119, "step": 59260 }, { "epoch": 0.7222770648239553, "grad_norm": 2.889924800508169, "learning_rate": 1.4617703656189866e-06, "loss": 0.7728, "step": 59265 }, { "epoch": 0.7223380010481031, "grad_norm": 2.8050782840322714, "learning_rate": 1.46144964720975e-06, "loss": 0.8081, "step": 59270 }, { "epoch": 0.7223989372722509, "grad_norm": 2.702004384129277, "learning_rate": 1.4611289288005132e-06, "loss": 0.7124, "step": 59275 }, { "epoch": 0.7224598734963986, "grad_norm": 3.2830308112282927, "learning_rate": 1.4608082103912765e-06, "loss": 0.6834, "step": 59280 }, { "epoch": 0.7225208097205464, "grad_norm": 2.4295713345500234, "learning_rate": 1.46048749198204e-06, "loss": 0.7921, "step": 59285 }, { "epoch": 0.7225817459446943, "grad_norm": 2.954272680270716, "learning_rate": 1.4601667735728031e-06, "loss": 0.6797, "step": 59290 }, { "epoch": 0.7226426821688421, "grad_norm": 2.837965935082943, "learning_rate": 1.4598460551635666e-06, "loss": 0.7214, "step": 59295 }, { "epoch": 0.7227036183929899, "grad_norm": 2.653203792335416, "learning_rate": 1.45952533675433e-06, "loss": 0.6637, "step": 59300 }, { "epoch": 0.7227645546171377, "grad_norm": 2.714427566584594, "learning_rate": 1.459204618345093e-06, "loss": 0.7054, "step": 59305 }, { "epoch": 0.7228254908412856, "grad_norm": 2.7745584367148632, "learning_rate": 1.4588838999358565e-06, "loss": 0.7533, "step": 59310 }, { "epoch": 0.7228864270654333, "grad_norm": 2.695529017644188, "learning_rate": 1.4585631815266197e-06, "loss": 0.6918, "step": 59315 }, { "epoch": 0.7229473632895811, "grad_norm": 2.382961433921296, "learning_rate": 1.4582424631173832e-06, "loss": 0.7673, "step": 59320 }, { "epoch": 0.7230082995137289, "grad_norm": 1.9741829024014823, "learning_rate": 1.4579217447081464e-06, "loss": 0.6925, "step": 59325 }, { "epoch": 0.7230692357378767, "grad_norm": 3.121998513704408, "learning_rate": 1.4576010262989096e-06, "loss": 0.7466, "step": 59330 }, { "epoch": 0.7231301719620246, "grad_norm": 2.945082262373936, "learning_rate": 1.457280307889673e-06, "loss": 0.7039, "step": 59335 }, { "epoch": 0.7231911081861724, "grad_norm": 2.6743556927928798, "learning_rate": 1.4569595894804365e-06, "loss": 0.7825, "step": 59340 }, { "epoch": 0.7232520444103202, "grad_norm": 2.0564263546050365, "learning_rate": 1.4566388710711995e-06, "loss": 0.6957, "step": 59345 }, { "epoch": 0.7233129806344679, "grad_norm": 2.2587844229453733, "learning_rate": 1.456318152661963e-06, "loss": 0.6176, "step": 59350 }, { "epoch": 0.7233739168586157, "grad_norm": 2.223879322078419, "learning_rate": 1.4559974342527262e-06, "loss": 0.7366, "step": 59355 }, { "epoch": 0.7234348530827636, "grad_norm": 1.9897335714611386, "learning_rate": 1.4556767158434896e-06, "loss": 0.7021, "step": 59360 }, { "epoch": 0.7234957893069114, "grad_norm": 2.714573599696404, "learning_rate": 1.4553559974342528e-06, "loss": 0.6985, "step": 59365 }, { "epoch": 0.7235567255310592, "grad_norm": 2.2942265073089096, "learning_rate": 1.455035279025016e-06, "loss": 0.7702, "step": 59370 }, { "epoch": 0.723617661755207, "grad_norm": 3.069533676577389, "learning_rate": 1.4547145606157795e-06, "loss": 0.6898, "step": 59375 }, { "epoch": 0.7236785979793549, "grad_norm": 3.5075233106427715, "learning_rate": 1.454393842206543e-06, "loss": 0.739, "step": 59380 }, { "epoch": 0.7237395342035026, "grad_norm": 2.433875384660764, "learning_rate": 1.454073123797306e-06, "loss": 0.7237, "step": 59385 }, { "epoch": 0.7238004704276504, "grad_norm": 2.805968771842103, "learning_rate": 1.4537524053880694e-06, "loss": 0.6695, "step": 59390 }, { "epoch": 0.7238614066517982, "grad_norm": 2.617612326524273, "learning_rate": 1.4534316869788326e-06, "loss": 0.7592, "step": 59395 }, { "epoch": 0.723922342875946, "grad_norm": 2.128583603770566, "learning_rate": 1.453110968569596e-06, "loss": 0.7222, "step": 59400 }, { "epoch": 0.7239832791000939, "grad_norm": 2.565045858090209, "learning_rate": 1.4527902501603593e-06, "loss": 0.7355, "step": 59405 }, { "epoch": 0.7240442153242417, "grad_norm": 2.6593030406498075, "learning_rate": 1.4524695317511225e-06, "loss": 0.6938, "step": 59410 }, { "epoch": 0.7241051515483895, "grad_norm": 1.951313174009009, "learning_rate": 1.452148813341886e-06, "loss": 0.7391, "step": 59415 }, { "epoch": 0.7241660877725372, "grad_norm": 2.331688226707966, "learning_rate": 1.4518280949326494e-06, "loss": 0.7273, "step": 59420 }, { "epoch": 0.724227023996685, "grad_norm": 2.4755813395020785, "learning_rate": 1.4515073765234124e-06, "loss": 0.6916, "step": 59425 }, { "epoch": 0.7242879602208329, "grad_norm": 2.52990765556926, "learning_rate": 1.4511866581141759e-06, "loss": 0.7049, "step": 59430 }, { "epoch": 0.7243488964449807, "grad_norm": 2.5270904762011583, "learning_rate": 1.450865939704939e-06, "loss": 0.7206, "step": 59435 }, { "epoch": 0.7244098326691285, "grad_norm": 2.619105278877154, "learning_rate": 1.4505452212957025e-06, "loss": 0.7157, "step": 59440 }, { "epoch": 0.7244707688932763, "grad_norm": 2.472157181651721, "learning_rate": 1.450224502886466e-06, "loss": 0.6987, "step": 59445 }, { "epoch": 0.7245317051174242, "grad_norm": 2.9794137892706942, "learning_rate": 1.449903784477229e-06, "loss": 0.7625, "step": 59450 }, { "epoch": 0.7245926413415719, "grad_norm": 2.7147314222998693, "learning_rate": 1.4495830660679924e-06, "loss": 0.6819, "step": 59455 }, { "epoch": 0.7246535775657197, "grad_norm": 1.8693098856141568, "learning_rate": 1.4492623476587559e-06, "loss": 0.7194, "step": 59460 }, { "epoch": 0.7247145137898675, "grad_norm": 2.5430404225511825, "learning_rate": 1.448941629249519e-06, "loss": 0.7757, "step": 59465 }, { "epoch": 0.7247754500140153, "grad_norm": 2.5737492032368112, "learning_rate": 1.4486209108402823e-06, "loss": 0.6658, "step": 59470 }, { "epoch": 0.7248363862381632, "grad_norm": 2.463872793250381, "learning_rate": 1.4483001924310456e-06, "loss": 0.7079, "step": 59475 }, { "epoch": 0.724897322462311, "grad_norm": 2.30138241868004, "learning_rate": 1.447979474021809e-06, "loss": 0.7244, "step": 59480 }, { "epoch": 0.7249582586864588, "grad_norm": 2.9876297731318795, "learning_rate": 1.4476587556125724e-06, "loss": 0.6578, "step": 59485 }, { "epoch": 0.7250191949106065, "grad_norm": 2.4472669325612992, "learning_rate": 1.4473380372033355e-06, "loss": 0.694, "step": 59490 }, { "epoch": 0.7250801311347543, "grad_norm": 2.4039161845643227, "learning_rate": 1.447017318794099e-06, "loss": 0.7631, "step": 59495 }, { "epoch": 0.7251410673589022, "grad_norm": 2.4130816102729544, "learning_rate": 1.4466966003848623e-06, "loss": 0.7091, "step": 59500 }, { "epoch": 0.72520200358305, "grad_norm": 2.673668382124178, "learning_rate": 1.4463758819756254e-06, "loss": 0.7415, "step": 59505 }, { "epoch": 0.7252629398071978, "grad_norm": 2.4928427975537653, "learning_rate": 1.4460551635663888e-06, "loss": 0.7759, "step": 59510 }, { "epoch": 0.7253238760313456, "grad_norm": 2.04370637283794, "learning_rate": 1.4457344451571522e-06, "loss": 0.7268, "step": 59515 }, { "epoch": 0.7253848122554934, "grad_norm": 2.3706211787194813, "learning_rate": 1.4454137267479155e-06, "loss": 0.6329, "step": 59520 }, { "epoch": 0.7254457484796412, "grad_norm": 2.8230076804918993, "learning_rate": 1.445093008338679e-06, "loss": 0.7176, "step": 59525 }, { "epoch": 0.725506684703789, "grad_norm": 2.080235393608714, "learning_rate": 1.444772289929442e-06, "loss": 0.7109, "step": 59530 }, { "epoch": 0.7255676209279368, "grad_norm": 2.0246944969429164, "learning_rate": 1.4444515715202054e-06, "loss": 0.734, "step": 59535 }, { "epoch": 0.7256285571520846, "grad_norm": 2.4770862084328193, "learning_rate": 1.4441308531109688e-06, "loss": 0.8121, "step": 59540 }, { "epoch": 0.7256894933762325, "grad_norm": 2.130748437139758, "learning_rate": 1.4438101347017318e-06, "loss": 0.6896, "step": 59545 }, { "epoch": 0.7257504296003803, "grad_norm": 3.1283681018799596, "learning_rate": 1.4434894162924953e-06, "loss": 0.6318, "step": 59550 }, { "epoch": 0.7258113658245281, "grad_norm": 2.6749901727443435, "learning_rate": 1.4431686978832587e-06, "loss": 0.6893, "step": 59555 }, { "epoch": 0.7258723020486758, "grad_norm": 3.5074288706975794, "learning_rate": 1.442847979474022e-06, "loss": 0.7489, "step": 59560 }, { "epoch": 0.7259332382728236, "grad_norm": 2.1202727530443517, "learning_rate": 1.4425272610647854e-06, "loss": 0.7123, "step": 59565 }, { "epoch": 0.7259941744969715, "grad_norm": 2.1481888010074055, "learning_rate": 1.4422065426555484e-06, "loss": 0.7352, "step": 59570 }, { "epoch": 0.7260551107211193, "grad_norm": 3.7669090607201445, "learning_rate": 1.4418858242463118e-06, "loss": 0.7948, "step": 59575 }, { "epoch": 0.7261160469452671, "grad_norm": 2.10123301447273, "learning_rate": 1.4415651058370753e-06, "loss": 0.6899, "step": 59580 }, { "epoch": 0.7261769831694149, "grad_norm": 2.0529190450981774, "learning_rate": 1.4412443874278385e-06, "loss": 0.7769, "step": 59585 }, { "epoch": 0.7262379193935627, "grad_norm": 2.2494503214749, "learning_rate": 1.4409236690186017e-06, "loss": 0.7085, "step": 59590 }, { "epoch": 0.7262988556177105, "grad_norm": 2.3846441621632177, "learning_rate": 1.4406029506093652e-06, "loss": 0.7435, "step": 59595 }, { "epoch": 0.7263597918418583, "grad_norm": 2.4429577102692828, "learning_rate": 1.4402822322001284e-06, "loss": 0.7427, "step": 59600 }, { "epoch": 0.7264207280660061, "grad_norm": 2.3107104288799722, "learning_rate": 1.4399615137908918e-06, "loss": 0.6964, "step": 59605 }, { "epoch": 0.7264816642901539, "grad_norm": 2.432329655705711, "learning_rate": 1.4396407953816549e-06, "loss": 0.6588, "step": 59610 }, { "epoch": 0.7265426005143018, "grad_norm": 2.4338699562586754, "learning_rate": 1.4393200769724183e-06, "loss": 0.7189, "step": 59615 }, { "epoch": 0.7266035367384496, "grad_norm": 2.348694503254242, "learning_rate": 1.4389993585631817e-06, "loss": 0.7294, "step": 59620 }, { "epoch": 0.7266644729625974, "grad_norm": 2.933905584598987, "learning_rate": 1.438678640153945e-06, "loss": 0.7715, "step": 59625 }, { "epoch": 0.7267254091867451, "grad_norm": 2.476924470629855, "learning_rate": 1.4383579217447082e-06, "loss": 0.799, "step": 59630 }, { "epoch": 0.7267863454108929, "grad_norm": 2.7902235544651655, "learning_rate": 1.4380372033354716e-06, "loss": 0.7194, "step": 59635 }, { "epoch": 0.7268472816350408, "grad_norm": 2.570844081469594, "learning_rate": 1.4377164849262349e-06, "loss": 0.7995, "step": 59640 }, { "epoch": 0.7269082178591886, "grad_norm": 2.307227658135863, "learning_rate": 1.4373957665169983e-06, "loss": 0.6939, "step": 59645 }, { "epoch": 0.7269691540833364, "grad_norm": 2.727576106417859, "learning_rate": 1.4370750481077613e-06, "loss": 0.7129, "step": 59650 }, { "epoch": 0.7270300903074842, "grad_norm": 2.960205819256933, "learning_rate": 1.4367543296985248e-06, "loss": 0.7544, "step": 59655 }, { "epoch": 0.727091026531632, "grad_norm": 2.4028150643711337, "learning_rate": 1.4364336112892882e-06, "loss": 0.7434, "step": 59660 }, { "epoch": 0.7271519627557798, "grad_norm": 2.142913263624622, "learning_rate": 1.4361128928800514e-06, "loss": 0.7045, "step": 59665 }, { "epoch": 0.7272128989799276, "grad_norm": 2.710179622402536, "learning_rate": 1.4357921744708147e-06, "loss": 0.7354, "step": 59670 }, { "epoch": 0.7272738352040754, "grad_norm": 2.3452945344808143, "learning_rate": 1.435471456061578e-06, "loss": 0.6851, "step": 59675 }, { "epoch": 0.7273347714282232, "grad_norm": 2.2395711276864074, "learning_rate": 1.4351507376523413e-06, "loss": 0.793, "step": 59680 }, { "epoch": 0.727395707652371, "grad_norm": 2.4932714902793767, "learning_rate": 1.4348300192431048e-06, "loss": 0.7504, "step": 59685 }, { "epoch": 0.7274566438765189, "grad_norm": 2.5236971899733804, "learning_rate": 1.4345093008338678e-06, "loss": 0.7145, "step": 59690 }, { "epoch": 0.7275175801006667, "grad_norm": 2.881127862207346, "learning_rate": 1.4341885824246312e-06, "loss": 0.796, "step": 59695 }, { "epoch": 0.7275785163248144, "grad_norm": 2.1027006381661404, "learning_rate": 1.4338678640153947e-06, "loss": 0.6423, "step": 59700 }, { "epoch": 0.7276394525489622, "grad_norm": 2.805475691294457, "learning_rate": 1.4335471456061579e-06, "loss": 0.784, "step": 59705 }, { "epoch": 0.72770038877311, "grad_norm": 2.1815026858655084, "learning_rate": 1.4332264271969213e-06, "loss": 0.7622, "step": 59710 }, { "epoch": 0.7277613249972579, "grad_norm": 2.414311397928978, "learning_rate": 1.4329057087876846e-06, "loss": 0.7256, "step": 59715 }, { "epoch": 0.7278222612214057, "grad_norm": 2.450960278740825, "learning_rate": 1.4325849903784478e-06, "loss": 0.7733, "step": 59720 }, { "epoch": 0.7278831974455535, "grad_norm": 2.3243484271482733, "learning_rate": 1.4322642719692112e-06, "loss": 0.7536, "step": 59725 }, { "epoch": 0.7279441336697013, "grad_norm": 2.144299013051616, "learning_rate": 1.4319435535599742e-06, "loss": 0.7514, "step": 59730 }, { "epoch": 0.728005069893849, "grad_norm": 2.4551062490342006, "learning_rate": 1.4316228351507377e-06, "loss": 0.7787, "step": 59735 }, { "epoch": 0.7280660061179969, "grad_norm": 2.5498182567429244, "learning_rate": 1.4313021167415011e-06, "loss": 0.7011, "step": 59740 }, { "epoch": 0.7281269423421447, "grad_norm": 2.1625553108011464, "learning_rate": 1.4309813983322644e-06, "loss": 0.7101, "step": 59745 }, { "epoch": 0.7281878785662925, "grad_norm": 2.4570393665459025, "learning_rate": 1.4306606799230278e-06, "loss": 0.6404, "step": 59750 }, { "epoch": 0.7282488147904403, "grad_norm": 2.501539072907796, "learning_rate": 1.430339961513791e-06, "loss": 0.729, "step": 59755 }, { "epoch": 0.7283097510145882, "grad_norm": 2.06264875438697, "learning_rate": 1.4300192431045542e-06, "loss": 0.6904, "step": 59760 }, { "epoch": 0.728370687238736, "grad_norm": 3.0672960089662737, "learning_rate": 1.4296985246953177e-06, "loss": 0.7321, "step": 59765 }, { "epoch": 0.7284316234628837, "grad_norm": 2.634089162900775, "learning_rate": 1.4293778062860807e-06, "loss": 0.6543, "step": 59770 }, { "epoch": 0.7284925596870315, "grad_norm": 2.561711830015053, "learning_rate": 1.4290570878768441e-06, "loss": 0.7365, "step": 59775 }, { "epoch": 0.7285534959111793, "grad_norm": 2.7126362392660672, "learning_rate": 1.4287363694676076e-06, "loss": 0.6915, "step": 59780 }, { "epoch": 0.7286144321353272, "grad_norm": 1.8537985995407744, "learning_rate": 1.4284156510583708e-06, "loss": 0.7329, "step": 59785 }, { "epoch": 0.728675368359475, "grad_norm": 2.7478479281948003, "learning_rate": 1.4280949326491343e-06, "loss": 0.7306, "step": 59790 }, { "epoch": 0.7287363045836228, "grad_norm": 2.4374005238287646, "learning_rate": 1.4277742142398977e-06, "loss": 0.738, "step": 59795 }, { "epoch": 0.7287972408077706, "grad_norm": 2.101399816934695, "learning_rate": 1.4274534958306607e-06, "loss": 0.756, "step": 59800 }, { "epoch": 0.7288581770319184, "grad_norm": 2.0673821992240193, "learning_rate": 1.4271327774214242e-06, "loss": 0.7355, "step": 59805 }, { "epoch": 0.7289191132560662, "grad_norm": 2.3245569169368636, "learning_rate": 1.4268120590121874e-06, "loss": 0.7956, "step": 59810 }, { "epoch": 0.728980049480214, "grad_norm": 2.780322775476431, "learning_rate": 1.4264913406029506e-06, "loss": 0.7238, "step": 59815 }, { "epoch": 0.7290409857043618, "grad_norm": 2.2923939983405446, "learning_rate": 1.426170622193714e-06, "loss": 0.7501, "step": 59820 }, { "epoch": 0.7291019219285096, "grad_norm": 2.248139983064029, "learning_rate": 1.4258499037844773e-06, "loss": 0.714, "step": 59825 }, { "epoch": 0.7291628581526575, "grad_norm": 2.0814408395649426, "learning_rate": 1.4255291853752407e-06, "loss": 0.7064, "step": 59830 }, { "epoch": 0.7292237943768053, "grad_norm": 2.493943518922062, "learning_rate": 1.4252084669660042e-06, "loss": 0.6923, "step": 59835 }, { "epoch": 0.729284730600953, "grad_norm": 2.1012914457739944, "learning_rate": 1.4248877485567672e-06, "loss": 0.7769, "step": 59840 }, { "epoch": 0.7293456668251008, "grad_norm": 2.135313053174759, "learning_rate": 1.4245670301475306e-06, "loss": 0.732, "step": 59845 }, { "epoch": 0.7294066030492486, "grad_norm": 3.3609556960191305, "learning_rate": 1.424246311738294e-06, "loss": 0.7571, "step": 59850 }, { "epoch": 0.7294675392733965, "grad_norm": 4.361551408759886, "learning_rate": 1.423925593329057e-06, "loss": 0.7187, "step": 59855 }, { "epoch": 0.7295284754975443, "grad_norm": 3.070371883831851, "learning_rate": 1.4236048749198205e-06, "loss": 0.7925, "step": 59860 }, { "epoch": 0.7295894117216921, "grad_norm": 1.9645246789729225, "learning_rate": 1.4232841565105837e-06, "loss": 0.6965, "step": 59865 }, { "epoch": 0.7296503479458399, "grad_norm": 2.8132062540194744, "learning_rate": 1.4229634381013472e-06, "loss": 0.8021, "step": 59870 }, { "epoch": 0.7297112841699877, "grad_norm": 2.798119707593599, "learning_rate": 1.4226427196921106e-06, "loss": 0.7482, "step": 59875 }, { "epoch": 0.7297722203941355, "grad_norm": 2.3621387672759147, "learning_rate": 1.4223220012828736e-06, "loss": 0.778, "step": 59880 }, { "epoch": 0.7298331566182833, "grad_norm": 2.212652983889399, "learning_rate": 1.422001282873637e-06, "loss": 0.7243, "step": 59885 }, { "epoch": 0.7298940928424311, "grad_norm": 2.319064946553784, "learning_rate": 1.4216805644644005e-06, "loss": 0.7208, "step": 59890 }, { "epoch": 0.7299550290665789, "grad_norm": 3.25112268990994, "learning_rate": 1.4213598460551635e-06, "loss": 0.7549, "step": 59895 }, { "epoch": 0.7300159652907268, "grad_norm": 2.1914294823514364, "learning_rate": 1.421039127645927e-06, "loss": 0.7088, "step": 59900 }, { "epoch": 0.7300769015148746, "grad_norm": 2.4262227089236865, "learning_rate": 1.4207184092366902e-06, "loss": 0.7382, "step": 59905 }, { "epoch": 0.7301378377390223, "grad_norm": 2.487039880214207, "learning_rate": 1.4203976908274536e-06, "loss": 0.7681, "step": 59910 }, { "epoch": 0.7301987739631701, "grad_norm": 2.4398155391300897, "learning_rate": 1.420076972418217e-06, "loss": 0.7163, "step": 59915 }, { "epoch": 0.730259710187318, "grad_norm": 2.349431519357196, "learning_rate": 1.41975625400898e-06, "loss": 0.7398, "step": 59920 }, { "epoch": 0.7303206464114658, "grad_norm": 2.2849390026794993, "learning_rate": 1.4194355355997435e-06, "loss": 0.7359, "step": 59925 }, { "epoch": 0.7303815826356136, "grad_norm": 2.1758600576077507, "learning_rate": 1.419114817190507e-06, "loss": 0.7474, "step": 59930 }, { "epoch": 0.7304425188597614, "grad_norm": 2.338193888548859, "learning_rate": 1.4187940987812702e-06, "loss": 0.7034, "step": 59935 }, { "epoch": 0.7305034550839091, "grad_norm": 2.3891738175210606, "learning_rate": 1.4184733803720334e-06, "loss": 0.6977, "step": 59940 }, { "epoch": 0.730564391308057, "grad_norm": 2.2510925016328196, "learning_rate": 1.4181526619627967e-06, "loss": 0.7254, "step": 59945 }, { "epoch": 0.7306253275322048, "grad_norm": 2.4936181523902365, "learning_rate": 1.4178319435535601e-06, "loss": 0.7267, "step": 59950 }, { "epoch": 0.7306862637563526, "grad_norm": 2.7518380521822343, "learning_rate": 1.4175112251443236e-06, "loss": 0.7328, "step": 59955 }, { "epoch": 0.7307471999805004, "grad_norm": 2.9874049622520373, "learning_rate": 1.4171905067350866e-06, "loss": 0.6852, "step": 59960 }, { "epoch": 0.7308081362046482, "grad_norm": 2.7752180111797093, "learning_rate": 1.41686978832585e-06, "loss": 0.7874, "step": 59965 }, { "epoch": 0.7308690724287961, "grad_norm": 2.671890832831904, "learning_rate": 1.4165490699166134e-06, "loss": 0.7161, "step": 59970 }, { "epoch": 0.7309300086529438, "grad_norm": 2.428722747189462, "learning_rate": 1.4162283515073767e-06, "loss": 0.7723, "step": 59975 }, { "epoch": 0.7309909448770916, "grad_norm": 2.9236475130581323, "learning_rate": 1.41590763309814e-06, "loss": 0.7221, "step": 59980 }, { "epoch": 0.7310518811012394, "grad_norm": 2.4096825350578914, "learning_rate": 1.4155869146889031e-06, "loss": 0.7346, "step": 59985 }, { "epoch": 0.7311128173253872, "grad_norm": 2.5922674879894547, "learning_rate": 1.4152661962796666e-06, "loss": 0.7932, "step": 59990 }, { "epoch": 0.7311737535495351, "grad_norm": 3.0786180314305387, "learning_rate": 1.41494547787043e-06, "loss": 0.769, "step": 59995 }, { "epoch": 0.7312346897736829, "grad_norm": 3.0633941720853266, "learning_rate": 1.414624759461193e-06, "loss": 0.7593, "step": 60000 }, { "epoch": 0.7312956259978307, "grad_norm": 2.8780626787603656, "learning_rate": 1.4143040410519565e-06, "loss": 0.7308, "step": 60005 }, { "epoch": 0.7313565622219784, "grad_norm": 2.5033622158162925, "learning_rate": 1.41398332264272e-06, "loss": 0.7154, "step": 60010 }, { "epoch": 0.7314174984461262, "grad_norm": 2.34419291412638, "learning_rate": 1.4136626042334831e-06, "loss": 0.7673, "step": 60015 }, { "epoch": 0.7314784346702741, "grad_norm": 2.3551552109023945, "learning_rate": 1.4133418858242464e-06, "loss": 0.7749, "step": 60020 }, { "epoch": 0.7315393708944219, "grad_norm": 2.574353972527878, "learning_rate": 1.4130211674150096e-06, "loss": 0.758, "step": 60025 }, { "epoch": 0.7316003071185697, "grad_norm": 2.276624744074029, "learning_rate": 1.412700449005773e-06, "loss": 0.7184, "step": 60030 }, { "epoch": 0.7316612433427175, "grad_norm": 2.4125979365087535, "learning_rate": 1.4123797305965365e-06, "loss": 0.763, "step": 60035 }, { "epoch": 0.7317221795668654, "grad_norm": 2.8516928227770326, "learning_rate": 1.4120590121872995e-06, "loss": 0.7681, "step": 60040 }, { "epoch": 0.7317831157910131, "grad_norm": 2.1189629765943274, "learning_rate": 1.411738293778063e-06, "loss": 0.6703, "step": 60045 }, { "epoch": 0.7318440520151609, "grad_norm": 2.863203824160164, "learning_rate": 1.4114175753688264e-06, "loss": 0.7519, "step": 60050 }, { "epoch": 0.7319049882393087, "grad_norm": 2.763143358798786, "learning_rate": 1.4110968569595896e-06, "loss": 0.6302, "step": 60055 }, { "epoch": 0.7319659244634565, "grad_norm": 3.068747951084621, "learning_rate": 1.410776138550353e-06, "loss": 0.7825, "step": 60060 }, { "epoch": 0.7320268606876044, "grad_norm": 2.4490503132000363, "learning_rate": 1.410455420141116e-06, "loss": 0.7144, "step": 60065 }, { "epoch": 0.7320877969117522, "grad_norm": 2.504976440345226, "learning_rate": 1.4101347017318795e-06, "loss": 0.7667, "step": 60070 }, { "epoch": 0.7321487331359, "grad_norm": 2.607078543726611, "learning_rate": 1.409813983322643e-06, "loss": 0.7296, "step": 60075 }, { "epoch": 0.7322096693600477, "grad_norm": 2.6925703038944255, "learning_rate": 1.409493264913406e-06, "loss": 0.7354, "step": 60080 }, { "epoch": 0.7322706055841955, "grad_norm": 2.346356261663195, "learning_rate": 1.4091725465041694e-06, "loss": 0.7139, "step": 60085 }, { "epoch": 0.7323315418083434, "grad_norm": 2.311088065275121, "learning_rate": 1.4088518280949328e-06, "loss": 0.7353, "step": 60090 }, { "epoch": 0.7323924780324912, "grad_norm": 2.431309935798283, "learning_rate": 1.408531109685696e-06, "loss": 0.6814, "step": 60095 }, { "epoch": 0.732453414256639, "grad_norm": 2.4756180108494696, "learning_rate": 1.4082103912764595e-06, "loss": 0.8096, "step": 60100 }, { "epoch": 0.7325143504807868, "grad_norm": 2.5746166639461845, "learning_rate": 1.4078896728672225e-06, "loss": 0.6389, "step": 60105 }, { "epoch": 0.7325752867049347, "grad_norm": 2.715067755482246, "learning_rate": 1.407568954457986e-06, "loss": 0.643, "step": 60110 }, { "epoch": 0.7326362229290824, "grad_norm": 2.754302613369321, "learning_rate": 1.4072482360487494e-06, "loss": 0.6706, "step": 60115 }, { "epoch": 0.7326971591532302, "grad_norm": 2.4649684791397517, "learning_rate": 1.4069275176395124e-06, "loss": 0.7192, "step": 60120 }, { "epoch": 0.732758095377378, "grad_norm": 2.1964078100891724, "learning_rate": 1.4066067992302759e-06, "loss": 0.6688, "step": 60125 }, { "epoch": 0.7328190316015258, "grad_norm": 2.380304261697456, "learning_rate": 1.4062860808210393e-06, "loss": 0.7389, "step": 60130 }, { "epoch": 0.7328799678256737, "grad_norm": 3.070085858164637, "learning_rate": 1.4059653624118025e-06, "loss": 0.7659, "step": 60135 }, { "epoch": 0.7329409040498215, "grad_norm": 2.4281286246393368, "learning_rate": 1.405644644002566e-06, "loss": 0.7212, "step": 60140 }, { "epoch": 0.7330018402739693, "grad_norm": 2.3569924205571158, "learning_rate": 1.4053239255933294e-06, "loss": 0.7274, "step": 60145 }, { "epoch": 0.733062776498117, "grad_norm": 2.509909953705884, "learning_rate": 1.4050032071840924e-06, "loss": 0.7161, "step": 60150 }, { "epoch": 0.7331237127222648, "grad_norm": 2.2754676842980963, "learning_rate": 1.4046824887748559e-06, "loss": 0.7136, "step": 60155 }, { "epoch": 0.7331846489464127, "grad_norm": 2.5586566853029438, "learning_rate": 1.404361770365619e-06, "loss": 0.6686, "step": 60160 }, { "epoch": 0.7332455851705605, "grad_norm": 2.221692758379626, "learning_rate": 1.4040410519563823e-06, "loss": 0.6401, "step": 60165 }, { "epoch": 0.7333065213947083, "grad_norm": 2.050054081354845, "learning_rate": 1.4037203335471458e-06, "loss": 0.7342, "step": 60170 }, { "epoch": 0.7333674576188561, "grad_norm": 2.9736725522060996, "learning_rate": 1.403399615137909e-06, "loss": 0.764, "step": 60175 }, { "epoch": 0.733428393843004, "grad_norm": 2.8489779874113834, "learning_rate": 1.4030788967286724e-06, "loss": 0.676, "step": 60180 }, { "epoch": 0.7334893300671517, "grad_norm": 2.799361065963253, "learning_rate": 1.4027581783194359e-06, "loss": 0.8178, "step": 60185 }, { "epoch": 0.7335502662912995, "grad_norm": 2.3965192234608983, "learning_rate": 1.4024374599101989e-06, "loss": 0.7213, "step": 60190 }, { "epoch": 0.7336112025154473, "grad_norm": 2.11843194773685, "learning_rate": 1.4021167415009623e-06, "loss": 0.7175, "step": 60195 }, { "epoch": 0.7336721387395951, "grad_norm": 2.4773460772555276, "learning_rate": 1.4017960230917256e-06, "loss": 0.702, "step": 60200 }, { "epoch": 0.733733074963743, "grad_norm": 2.6969353129256444, "learning_rate": 1.4014753046824888e-06, "loss": 0.7867, "step": 60205 }, { "epoch": 0.7337940111878908, "grad_norm": 2.592255393256819, "learning_rate": 1.4011545862732522e-06, "loss": 0.762, "step": 60210 }, { "epoch": 0.7338549474120386, "grad_norm": 2.5667400896394006, "learning_rate": 1.4008338678640155e-06, "loss": 0.7362, "step": 60215 }, { "epoch": 0.7339158836361863, "grad_norm": 2.2594485716954926, "learning_rate": 1.400513149454779e-06, "loss": 0.7312, "step": 60220 }, { "epoch": 0.7339768198603341, "grad_norm": 3.2203012020444723, "learning_rate": 1.4001924310455423e-06, "loss": 0.6927, "step": 60225 }, { "epoch": 0.734037756084482, "grad_norm": 2.164970211418535, "learning_rate": 1.3998717126363054e-06, "loss": 0.7148, "step": 60230 }, { "epoch": 0.7340986923086298, "grad_norm": 2.2618099148044033, "learning_rate": 1.3995509942270688e-06, "loss": 0.7045, "step": 60235 }, { "epoch": 0.7341596285327776, "grad_norm": 2.825231860305111, "learning_rate": 1.399230275817832e-06, "loss": 0.7504, "step": 60240 }, { "epoch": 0.7342205647569254, "grad_norm": 2.8341000345301572, "learning_rate": 1.3989095574085953e-06, "loss": 0.7315, "step": 60245 }, { "epoch": 0.7342815009810733, "grad_norm": 2.5630757645851543, "learning_rate": 1.3985888389993587e-06, "loss": 0.7627, "step": 60250 }, { "epoch": 0.734342437205221, "grad_norm": 2.0293453484865602, "learning_rate": 1.398268120590122e-06, "loss": 0.6964, "step": 60255 }, { "epoch": 0.7344033734293688, "grad_norm": 2.371913756724548, "learning_rate": 1.3979474021808854e-06, "loss": 0.6887, "step": 60260 }, { "epoch": 0.7344643096535166, "grad_norm": 2.9077857916920546, "learning_rate": 1.3976266837716488e-06, "loss": 0.7789, "step": 60265 }, { "epoch": 0.7345252458776644, "grad_norm": 1.953514873619218, "learning_rate": 1.3973059653624118e-06, "loss": 0.7427, "step": 60270 }, { "epoch": 0.7345861821018123, "grad_norm": 2.3432533803224436, "learning_rate": 1.3969852469531753e-06, "loss": 0.6982, "step": 60275 }, { "epoch": 0.7346471183259601, "grad_norm": 2.386297796253972, "learning_rate": 1.3966645285439385e-06, "loss": 0.7496, "step": 60280 }, { "epoch": 0.7347080545501079, "grad_norm": 2.0646743085691757, "learning_rate": 1.396343810134702e-06, "loss": 0.7481, "step": 60285 }, { "epoch": 0.7347689907742556, "grad_norm": 2.5927592043519954, "learning_rate": 1.3960230917254652e-06, "loss": 0.629, "step": 60290 }, { "epoch": 0.7348299269984034, "grad_norm": 2.530405203256553, "learning_rate": 1.3957023733162284e-06, "loss": 0.7114, "step": 60295 }, { "epoch": 0.7348908632225513, "grad_norm": 2.7193641650668456, "learning_rate": 1.3953816549069918e-06, "loss": 0.7056, "step": 60300 }, { "epoch": 0.7349517994466991, "grad_norm": 2.087388264837905, "learning_rate": 1.3950609364977553e-06, "loss": 0.7683, "step": 60305 }, { "epoch": 0.7350127356708469, "grad_norm": 2.1444468370408623, "learning_rate": 1.3947402180885183e-06, "loss": 0.7773, "step": 60310 }, { "epoch": 0.7350736718949947, "grad_norm": 3.09108819332541, "learning_rate": 1.3944194996792817e-06, "loss": 0.8428, "step": 60315 }, { "epoch": 0.7351346081191426, "grad_norm": 2.2325021809117302, "learning_rate": 1.394098781270045e-06, "loss": 0.7312, "step": 60320 }, { "epoch": 0.7351955443432903, "grad_norm": 2.8973053292938995, "learning_rate": 1.3937780628608084e-06, "loss": 0.7064, "step": 60325 }, { "epoch": 0.7352564805674381, "grad_norm": 2.6831702820959458, "learning_rate": 1.3934573444515716e-06, "loss": 0.7331, "step": 60330 }, { "epoch": 0.7353174167915859, "grad_norm": 2.472586347261164, "learning_rate": 1.3931366260423348e-06, "loss": 0.7781, "step": 60335 }, { "epoch": 0.7353783530157337, "grad_norm": 3.048097608152415, "learning_rate": 1.3928159076330983e-06, "loss": 0.763, "step": 60340 }, { "epoch": 0.7354392892398816, "grad_norm": 2.0948350310899286, "learning_rate": 1.3924951892238617e-06, "loss": 0.6555, "step": 60345 }, { "epoch": 0.7355002254640294, "grad_norm": 3.0980455107568683, "learning_rate": 1.3921744708146247e-06, "loss": 0.7031, "step": 60350 }, { "epoch": 0.7355611616881772, "grad_norm": 2.5145353115347278, "learning_rate": 1.3918537524053882e-06, "loss": 0.7312, "step": 60355 }, { "epoch": 0.7356220979123249, "grad_norm": 2.0555733864426067, "learning_rate": 1.3915330339961514e-06, "loss": 0.6341, "step": 60360 }, { "epoch": 0.7356830341364727, "grad_norm": 3.241807803283304, "learning_rate": 1.3912123155869149e-06, "loss": 0.7508, "step": 60365 }, { "epoch": 0.7357439703606206, "grad_norm": 2.4596223915773945, "learning_rate": 1.390891597177678e-06, "loss": 0.7123, "step": 60370 }, { "epoch": 0.7358049065847684, "grad_norm": 2.517119048923881, "learning_rate": 1.3905708787684413e-06, "loss": 0.7518, "step": 60375 }, { "epoch": 0.7358658428089162, "grad_norm": 2.434119686870651, "learning_rate": 1.3902501603592048e-06, "loss": 0.7197, "step": 60380 }, { "epoch": 0.735926779033064, "grad_norm": 2.4154062308380753, "learning_rate": 1.3899294419499682e-06, "loss": 0.6885, "step": 60385 }, { "epoch": 0.7359877152572118, "grad_norm": 2.8799702118311243, "learning_rate": 1.3896087235407312e-06, "loss": 0.7218, "step": 60390 }, { "epoch": 0.7360486514813596, "grad_norm": 2.8335493130134752, "learning_rate": 1.3892880051314946e-06, "loss": 0.7454, "step": 60395 }, { "epoch": 0.7361095877055074, "grad_norm": 2.5429261623696964, "learning_rate": 1.3889672867222579e-06, "loss": 0.7871, "step": 60400 }, { "epoch": 0.7361705239296552, "grad_norm": 3.1276528009841265, "learning_rate": 1.3886465683130213e-06, "loss": 0.7535, "step": 60405 }, { "epoch": 0.736231460153803, "grad_norm": 3.1791406768180526, "learning_rate": 1.3883258499037848e-06, "loss": 0.7555, "step": 60410 }, { "epoch": 0.7362923963779509, "grad_norm": 2.5858886004265584, "learning_rate": 1.3880051314945478e-06, "loss": 0.6473, "step": 60415 }, { "epoch": 0.7363533326020987, "grad_norm": 2.4532757165803005, "learning_rate": 1.3876844130853112e-06, "loss": 0.759, "step": 60420 }, { "epoch": 0.7364142688262465, "grad_norm": 2.1490188321736827, "learning_rate": 1.3873636946760747e-06, "loss": 0.721, "step": 60425 }, { "epoch": 0.7364752050503942, "grad_norm": 2.663086773683868, "learning_rate": 1.3870429762668377e-06, "loss": 0.6409, "step": 60430 }, { "epoch": 0.736536141274542, "grad_norm": 2.6196809934244336, "learning_rate": 1.3867222578576011e-06, "loss": 0.6875, "step": 60435 }, { "epoch": 0.7365970774986899, "grad_norm": 2.3162102113871037, "learning_rate": 1.3864015394483646e-06, "loss": 0.6614, "step": 60440 }, { "epoch": 0.7366580137228377, "grad_norm": 4.135976620248097, "learning_rate": 1.3860808210391278e-06, "loss": 0.7249, "step": 60445 }, { "epoch": 0.7367189499469855, "grad_norm": 2.919002930413734, "learning_rate": 1.3857601026298912e-06, "loss": 0.754, "step": 60450 }, { "epoch": 0.7367798861711333, "grad_norm": 3.135010765257266, "learning_rate": 1.3854393842206542e-06, "loss": 0.7119, "step": 60455 }, { "epoch": 0.7368408223952811, "grad_norm": 2.4460643177031085, "learning_rate": 1.3851186658114177e-06, "loss": 0.7356, "step": 60460 }, { "epoch": 0.7369017586194289, "grad_norm": 2.3336918248618157, "learning_rate": 1.3847979474021811e-06, "loss": 0.6773, "step": 60465 }, { "epoch": 0.7369626948435767, "grad_norm": 2.9055270015424712, "learning_rate": 1.3844772289929441e-06, "loss": 0.7214, "step": 60470 }, { "epoch": 0.7370236310677245, "grad_norm": 2.1698036237362235, "learning_rate": 1.3841565105837076e-06, "loss": 0.7007, "step": 60475 }, { "epoch": 0.7370845672918723, "grad_norm": 2.7501747289428016, "learning_rate": 1.383835792174471e-06, "loss": 0.7586, "step": 60480 }, { "epoch": 0.7371455035160202, "grad_norm": 2.7296794439678127, "learning_rate": 1.3835150737652342e-06, "loss": 0.8026, "step": 60485 }, { "epoch": 0.737206439740168, "grad_norm": 2.3402549141945883, "learning_rate": 1.3831943553559977e-06, "loss": 0.7074, "step": 60490 }, { "epoch": 0.7372673759643158, "grad_norm": 2.06993338831995, "learning_rate": 1.3828736369467607e-06, "loss": 0.6917, "step": 60495 }, { "epoch": 0.7373283121884635, "grad_norm": 2.6859650591003694, "learning_rate": 1.3825529185375241e-06, "loss": 0.7503, "step": 60500 }, { "epoch": 0.7373892484126113, "grad_norm": 2.2515396278519715, "learning_rate": 1.3822322001282876e-06, "loss": 0.6782, "step": 60505 }, { "epoch": 0.7374501846367592, "grad_norm": 2.357875568463405, "learning_rate": 1.3819114817190508e-06, "loss": 0.7234, "step": 60510 }, { "epoch": 0.737511120860907, "grad_norm": 2.1676353786770974, "learning_rate": 1.381590763309814e-06, "loss": 0.751, "step": 60515 }, { "epoch": 0.7375720570850548, "grad_norm": 2.41160158998727, "learning_rate": 1.3812700449005775e-06, "loss": 0.8027, "step": 60520 }, { "epoch": 0.7376329933092026, "grad_norm": 1.9584531620390542, "learning_rate": 1.3809493264913407e-06, "loss": 0.6975, "step": 60525 }, { "epoch": 0.7376939295333504, "grad_norm": 3.2277711085580476, "learning_rate": 1.3806286080821041e-06, "loss": 0.7463, "step": 60530 }, { "epoch": 0.7377548657574982, "grad_norm": 2.7345742298969324, "learning_rate": 1.3803078896728672e-06, "loss": 0.7671, "step": 60535 }, { "epoch": 0.737815801981646, "grad_norm": 2.6618926377715066, "learning_rate": 1.3799871712636306e-06, "loss": 0.7538, "step": 60540 }, { "epoch": 0.7378767382057938, "grad_norm": 3.2620805964500543, "learning_rate": 1.379666452854394e-06, "loss": 0.7242, "step": 60545 }, { "epoch": 0.7379376744299416, "grad_norm": 2.4193851787992458, "learning_rate": 1.3793457344451573e-06, "loss": 0.7027, "step": 60550 }, { "epoch": 0.7379986106540894, "grad_norm": 2.3222218312277305, "learning_rate": 1.3790250160359205e-06, "loss": 0.7537, "step": 60555 }, { "epoch": 0.7380595468782373, "grad_norm": 2.5872433971317217, "learning_rate": 1.378704297626684e-06, "loss": 0.6778, "step": 60560 }, { "epoch": 0.7381204831023851, "grad_norm": 2.563140844928594, "learning_rate": 1.3783835792174472e-06, "loss": 0.7036, "step": 60565 }, { "epoch": 0.7381814193265328, "grad_norm": 2.0598236618183976, "learning_rate": 1.3780628608082106e-06, "loss": 0.7543, "step": 60570 }, { "epoch": 0.7382423555506806, "grad_norm": 3.27448279183826, "learning_rate": 1.3777421423989736e-06, "loss": 0.7092, "step": 60575 }, { "epoch": 0.7383032917748285, "grad_norm": 2.4643525927604752, "learning_rate": 1.377421423989737e-06, "loss": 0.6757, "step": 60580 }, { "epoch": 0.7383642279989763, "grad_norm": 3.25525150886377, "learning_rate": 1.3771007055805005e-06, "loss": 0.688, "step": 60585 }, { "epoch": 0.7384251642231241, "grad_norm": 2.3143730184416365, "learning_rate": 1.3767799871712637e-06, "loss": 0.7365, "step": 60590 }, { "epoch": 0.7384861004472719, "grad_norm": 2.4731345690386277, "learning_rate": 1.376459268762027e-06, "loss": 0.743, "step": 60595 }, { "epoch": 0.7385470366714197, "grad_norm": 2.4864319719148402, "learning_rate": 1.3761385503527904e-06, "loss": 0.72, "step": 60600 }, { "epoch": 0.7386079728955675, "grad_norm": 3.0237176847688776, "learning_rate": 1.3758178319435536e-06, "loss": 0.7011, "step": 60605 }, { "epoch": 0.7386689091197153, "grad_norm": 2.536485548956506, "learning_rate": 1.375497113534317e-06, "loss": 0.7427, "step": 60610 }, { "epoch": 0.7387298453438631, "grad_norm": 2.4342187757526803, "learning_rate": 1.37517639512508e-06, "loss": 0.7079, "step": 60615 }, { "epoch": 0.7387907815680109, "grad_norm": 2.288567790912991, "learning_rate": 1.3748556767158435e-06, "loss": 0.6769, "step": 60620 }, { "epoch": 0.7388517177921587, "grad_norm": 2.2828182191807334, "learning_rate": 1.374534958306607e-06, "loss": 0.6487, "step": 60625 }, { "epoch": 0.7389126540163066, "grad_norm": 2.4761093288564293, "learning_rate": 1.3742142398973702e-06, "loss": 0.7463, "step": 60630 }, { "epoch": 0.7389735902404544, "grad_norm": 2.3369807548130352, "learning_rate": 1.3738935214881336e-06, "loss": 0.7275, "step": 60635 }, { "epoch": 0.7390345264646021, "grad_norm": 2.9767390923634554, "learning_rate": 1.3735728030788969e-06, "loss": 0.7462, "step": 60640 }, { "epoch": 0.7390954626887499, "grad_norm": 2.0900845273493287, "learning_rate": 1.37325208466966e-06, "loss": 0.7323, "step": 60645 }, { "epoch": 0.7391563989128977, "grad_norm": 3.0844526135672625, "learning_rate": 1.3729313662604235e-06, "loss": 0.7499, "step": 60650 }, { "epoch": 0.7392173351370456, "grad_norm": 3.1024370535208314, "learning_rate": 1.3726106478511866e-06, "loss": 0.7869, "step": 60655 }, { "epoch": 0.7392782713611934, "grad_norm": 2.6816598258253506, "learning_rate": 1.37228992944195e-06, "loss": 0.7217, "step": 60660 }, { "epoch": 0.7393392075853412, "grad_norm": 2.460926595399453, "learning_rate": 1.3719692110327134e-06, "loss": 0.674, "step": 60665 }, { "epoch": 0.739400143809489, "grad_norm": 2.494586919111076, "learning_rate": 1.3716484926234767e-06, "loss": 0.7398, "step": 60670 }, { "epoch": 0.7394610800336368, "grad_norm": 2.628569392103861, "learning_rate": 1.3713277742142401e-06, "loss": 0.703, "step": 60675 }, { "epoch": 0.7395220162577846, "grad_norm": 2.3919898920531204, "learning_rate": 1.3710070558050033e-06, "loss": 0.7208, "step": 60680 }, { "epoch": 0.7395829524819324, "grad_norm": 3.2880538486723343, "learning_rate": 1.3706863373957666e-06, "loss": 0.7618, "step": 60685 }, { "epoch": 0.7396438887060802, "grad_norm": 2.0256093188049644, "learning_rate": 1.37036561898653e-06, "loss": 0.7514, "step": 60690 }, { "epoch": 0.739704824930228, "grad_norm": 2.6742745908530976, "learning_rate": 1.370044900577293e-06, "loss": 0.7612, "step": 60695 }, { "epoch": 0.7397657611543759, "grad_norm": 3.0015738606237665, "learning_rate": 1.3697241821680565e-06, "loss": 0.7746, "step": 60700 }, { "epoch": 0.7398266973785237, "grad_norm": 2.6387979637948553, "learning_rate": 1.36940346375882e-06, "loss": 0.7825, "step": 60705 }, { "epoch": 0.7398876336026714, "grad_norm": 5.854646817717147, "learning_rate": 1.3690827453495831e-06, "loss": 0.7112, "step": 60710 }, { "epoch": 0.7399485698268192, "grad_norm": 2.875426480382152, "learning_rate": 1.3687620269403466e-06, "loss": 0.7704, "step": 60715 }, { "epoch": 0.740009506050967, "grad_norm": 2.7413499766319753, "learning_rate": 1.3684413085311098e-06, "loss": 0.75, "step": 60720 }, { "epoch": 0.7400704422751149, "grad_norm": 2.902402010654287, "learning_rate": 1.368120590121873e-06, "loss": 0.745, "step": 60725 }, { "epoch": 0.7401313784992627, "grad_norm": 3.0579160340836213, "learning_rate": 1.3677998717126365e-06, "loss": 0.7082, "step": 60730 }, { "epoch": 0.7401923147234105, "grad_norm": 2.2230381058237043, "learning_rate": 1.3674791533034e-06, "loss": 0.7462, "step": 60735 }, { "epoch": 0.7402532509475583, "grad_norm": 2.1657068449173695, "learning_rate": 1.367158434894163e-06, "loss": 0.7512, "step": 60740 }, { "epoch": 0.740314187171706, "grad_norm": 2.1837908624884768, "learning_rate": 1.3668377164849264e-06, "loss": 0.7159, "step": 60745 }, { "epoch": 0.7403751233958539, "grad_norm": 2.5386728850316445, "learning_rate": 1.3665169980756896e-06, "loss": 0.671, "step": 60750 }, { "epoch": 0.7404360596200017, "grad_norm": 2.69183976910996, "learning_rate": 1.366196279666453e-06, "loss": 0.6919, "step": 60755 }, { "epoch": 0.7404969958441495, "grad_norm": 2.7375479650218524, "learning_rate": 1.3658755612572165e-06, "loss": 0.7524, "step": 60760 }, { "epoch": 0.7405579320682973, "grad_norm": 2.778465535466215, "learning_rate": 1.3655548428479795e-06, "loss": 0.6944, "step": 60765 }, { "epoch": 0.7406188682924452, "grad_norm": 2.355213728134323, "learning_rate": 1.365234124438743e-06, "loss": 0.6773, "step": 60770 }, { "epoch": 0.740679804516593, "grad_norm": 2.4755705305057942, "learning_rate": 1.3649134060295064e-06, "loss": 0.7635, "step": 60775 }, { "epoch": 0.7407407407407407, "grad_norm": 2.0280354611319664, "learning_rate": 1.3645926876202694e-06, "loss": 0.7855, "step": 60780 }, { "epoch": 0.7408016769648885, "grad_norm": 2.295334991406356, "learning_rate": 1.3642719692110328e-06, "loss": 0.7017, "step": 60785 }, { "epoch": 0.7408626131890363, "grad_norm": 2.645199833447516, "learning_rate": 1.363951250801796e-06, "loss": 0.701, "step": 60790 }, { "epoch": 0.7409235494131842, "grad_norm": 2.3523582375912904, "learning_rate": 1.3636305323925595e-06, "loss": 0.6615, "step": 60795 }, { "epoch": 0.740984485637332, "grad_norm": 2.435210684561892, "learning_rate": 1.363309813983323e-06, "loss": 0.7315, "step": 60800 }, { "epoch": 0.7410454218614798, "grad_norm": 2.9257033980661027, "learning_rate": 1.362989095574086e-06, "loss": 0.7133, "step": 60805 }, { "epoch": 0.7411063580856276, "grad_norm": 4.008252871357874, "learning_rate": 1.3626683771648494e-06, "loss": 0.7184, "step": 60810 }, { "epoch": 0.7411672943097753, "grad_norm": 2.223907145022531, "learning_rate": 1.3623476587556128e-06, "loss": 0.7116, "step": 60815 }, { "epoch": 0.7412282305339232, "grad_norm": 2.7649854142620147, "learning_rate": 1.3620269403463759e-06, "loss": 0.73, "step": 60820 }, { "epoch": 0.741289166758071, "grad_norm": 2.804801979847047, "learning_rate": 1.3617062219371393e-06, "loss": 0.6905, "step": 60825 }, { "epoch": 0.7413501029822188, "grad_norm": 2.4782095514554676, "learning_rate": 1.3613855035279025e-06, "loss": 0.7198, "step": 60830 }, { "epoch": 0.7414110392063666, "grad_norm": 2.7340214346322362, "learning_rate": 1.361064785118666e-06, "loss": 0.7733, "step": 60835 }, { "epoch": 0.7414719754305145, "grad_norm": 3.1222803799895593, "learning_rate": 1.3607440667094294e-06, "loss": 0.7545, "step": 60840 }, { "epoch": 0.7415329116546623, "grad_norm": 2.196209409544147, "learning_rate": 1.3604233483001924e-06, "loss": 0.7736, "step": 60845 }, { "epoch": 0.74159384787881, "grad_norm": 2.4503390359073163, "learning_rate": 1.3601026298909559e-06, "loss": 0.7709, "step": 60850 }, { "epoch": 0.7416547841029578, "grad_norm": 2.4466555519822104, "learning_rate": 1.3597819114817193e-06, "loss": 0.7411, "step": 60855 }, { "epoch": 0.7417157203271056, "grad_norm": 2.7221325013860147, "learning_rate": 1.3594611930724825e-06, "loss": 0.731, "step": 60860 }, { "epoch": 0.7417766565512535, "grad_norm": 2.7233266579927187, "learning_rate": 1.3591404746632458e-06, "loss": 0.748, "step": 60865 }, { "epoch": 0.7418375927754013, "grad_norm": 2.613627398016951, "learning_rate": 1.358819756254009e-06, "loss": 0.7565, "step": 60870 }, { "epoch": 0.7418985289995491, "grad_norm": 2.5216587136576654, "learning_rate": 1.3584990378447724e-06, "loss": 0.6962, "step": 60875 }, { "epoch": 0.7419594652236968, "grad_norm": 2.7735835052297166, "learning_rate": 1.3581783194355359e-06, "loss": 0.7496, "step": 60880 }, { "epoch": 0.7420204014478446, "grad_norm": 2.0810381867241436, "learning_rate": 1.3578576010262989e-06, "loss": 0.7683, "step": 60885 }, { "epoch": 0.7420813376719925, "grad_norm": 2.4637731724040037, "learning_rate": 1.3575368826170623e-06, "loss": 0.7719, "step": 60890 }, { "epoch": 0.7421422738961403, "grad_norm": 2.6644446955851335, "learning_rate": 1.3572161642078258e-06, "loss": 0.7501, "step": 60895 }, { "epoch": 0.7422032101202881, "grad_norm": 3.3456520580183002, "learning_rate": 1.356895445798589e-06, "loss": 0.7593, "step": 60900 }, { "epoch": 0.7422641463444359, "grad_norm": 2.0331551763737568, "learning_rate": 1.3565747273893522e-06, "loss": 0.6456, "step": 60905 }, { "epoch": 0.7423250825685838, "grad_norm": 2.2875024246576263, "learning_rate": 1.3562540089801154e-06, "loss": 0.7724, "step": 60910 }, { "epoch": 0.7423860187927315, "grad_norm": 2.7339003514045084, "learning_rate": 1.3559332905708789e-06, "loss": 0.7182, "step": 60915 }, { "epoch": 0.7424469550168793, "grad_norm": 2.5437657998997802, "learning_rate": 1.3556125721616423e-06, "loss": 0.7532, "step": 60920 }, { "epoch": 0.7425078912410271, "grad_norm": 2.915789686336816, "learning_rate": 1.3552918537524053e-06, "loss": 0.7455, "step": 60925 }, { "epoch": 0.7425688274651749, "grad_norm": 2.5187276316932246, "learning_rate": 1.3549711353431688e-06, "loss": 0.6904, "step": 60930 }, { "epoch": 0.7426297636893228, "grad_norm": 2.3273747005020264, "learning_rate": 1.3546504169339322e-06, "loss": 0.7077, "step": 60935 }, { "epoch": 0.7426906999134706, "grad_norm": 3.287184042235002, "learning_rate": 1.3543296985246955e-06, "loss": 0.6721, "step": 60940 }, { "epoch": 0.7427516361376184, "grad_norm": 2.5851758697424723, "learning_rate": 1.3540089801154587e-06, "loss": 0.6801, "step": 60945 }, { "epoch": 0.7428125723617661, "grad_norm": 2.6082253581087755, "learning_rate": 1.353688261706222e-06, "loss": 0.763, "step": 60950 }, { "epoch": 0.7428735085859139, "grad_norm": 2.718904350640892, "learning_rate": 1.3533675432969854e-06, "loss": 0.7255, "step": 60955 }, { "epoch": 0.7429344448100618, "grad_norm": 2.1711717896130533, "learning_rate": 1.3530468248877488e-06, "loss": 0.6721, "step": 60960 }, { "epoch": 0.7429953810342096, "grad_norm": 2.9015531118253475, "learning_rate": 1.3527261064785118e-06, "loss": 0.8055, "step": 60965 }, { "epoch": 0.7430563172583574, "grad_norm": 2.330700576977922, "learning_rate": 1.3524053880692752e-06, "loss": 0.6982, "step": 60970 }, { "epoch": 0.7431172534825052, "grad_norm": 1.8992954177747512, "learning_rate": 1.3520846696600387e-06, "loss": 0.7029, "step": 60975 }, { "epoch": 0.7431781897066531, "grad_norm": 3.0364002015250966, "learning_rate": 1.351763951250802e-06, "loss": 0.7396, "step": 60980 }, { "epoch": 0.7432391259308008, "grad_norm": 3.1097908733302315, "learning_rate": 1.3514432328415654e-06, "loss": 0.741, "step": 60985 }, { "epoch": 0.7433000621549486, "grad_norm": 2.985324148964267, "learning_rate": 1.3511225144323284e-06, "loss": 0.7908, "step": 60990 }, { "epoch": 0.7433609983790964, "grad_norm": 2.5687442160592515, "learning_rate": 1.3508017960230918e-06, "loss": 0.7028, "step": 60995 }, { "epoch": 0.7434219346032442, "grad_norm": 2.1922151842267166, "learning_rate": 1.3504810776138553e-06, "loss": 0.7088, "step": 61000 }, { "epoch": 0.7434828708273921, "grad_norm": 2.2713728324748033, "learning_rate": 1.3501603592046183e-06, "loss": 0.7269, "step": 61005 }, { "epoch": 0.7435438070515399, "grad_norm": 2.4490182665925135, "learning_rate": 1.3498396407953817e-06, "loss": 0.6903, "step": 61010 }, { "epoch": 0.7436047432756877, "grad_norm": 3.406783392811351, "learning_rate": 1.3495189223861452e-06, "loss": 0.7662, "step": 61015 }, { "epoch": 0.7436656794998354, "grad_norm": 2.226412971459981, "learning_rate": 1.3491982039769084e-06, "loss": 0.7474, "step": 61020 }, { "epoch": 0.7437266157239832, "grad_norm": 2.5772559931062156, "learning_rate": 1.3488774855676718e-06, "loss": 0.7765, "step": 61025 }, { "epoch": 0.7437875519481311, "grad_norm": 2.4922051088513126, "learning_rate": 1.348556767158435e-06, "loss": 0.7259, "step": 61030 }, { "epoch": 0.7438484881722789, "grad_norm": 2.1425305296094597, "learning_rate": 1.3482360487491983e-06, "loss": 0.6961, "step": 61035 }, { "epoch": 0.7439094243964267, "grad_norm": 3.7411632240580657, "learning_rate": 1.3479153303399617e-06, "loss": 0.6897, "step": 61040 }, { "epoch": 0.7439703606205745, "grad_norm": 3.0997853520010694, "learning_rate": 1.3475946119307247e-06, "loss": 0.734, "step": 61045 }, { "epoch": 0.7440312968447224, "grad_norm": 3.517859533344, "learning_rate": 1.3472738935214882e-06, "loss": 0.7156, "step": 61050 }, { "epoch": 0.7440922330688701, "grad_norm": 2.85523632301689, "learning_rate": 1.3469531751122516e-06, "loss": 0.7542, "step": 61055 }, { "epoch": 0.7441531692930179, "grad_norm": 2.01735299253859, "learning_rate": 1.3466324567030148e-06, "loss": 0.6919, "step": 61060 }, { "epoch": 0.7442141055171657, "grad_norm": 2.735181295605741, "learning_rate": 1.3463117382937783e-06, "loss": 0.7687, "step": 61065 }, { "epoch": 0.7442750417413135, "grad_norm": 2.535436073640969, "learning_rate": 1.3459910198845415e-06, "loss": 0.7628, "step": 61070 }, { "epoch": 0.7443359779654614, "grad_norm": 3.0167785411391415, "learning_rate": 1.3456703014753047e-06, "loss": 0.7379, "step": 61075 }, { "epoch": 0.7443969141896092, "grad_norm": 2.332094751659843, "learning_rate": 1.3453495830660682e-06, "loss": 0.691, "step": 61080 }, { "epoch": 0.744457850413757, "grad_norm": 2.43856251758474, "learning_rate": 1.3450288646568312e-06, "loss": 0.7579, "step": 61085 }, { "epoch": 0.7445187866379047, "grad_norm": 2.818330776539066, "learning_rate": 1.3447081462475946e-06, "loss": 0.6642, "step": 61090 }, { "epoch": 0.7445797228620525, "grad_norm": 2.935563418309958, "learning_rate": 1.344387427838358e-06, "loss": 0.7143, "step": 61095 }, { "epoch": 0.7446406590862004, "grad_norm": 1.8669558560790707, "learning_rate": 1.3440667094291213e-06, "loss": 0.7225, "step": 61100 }, { "epoch": 0.7447015953103482, "grad_norm": 3.217487958326452, "learning_rate": 1.3437459910198847e-06, "loss": 0.7194, "step": 61105 }, { "epoch": 0.744762531534496, "grad_norm": 4.155157804174478, "learning_rate": 1.3434252726106482e-06, "loss": 0.7371, "step": 61110 }, { "epoch": 0.7448234677586438, "grad_norm": 2.310997773998048, "learning_rate": 1.3431045542014112e-06, "loss": 0.779, "step": 61115 }, { "epoch": 0.7448844039827917, "grad_norm": 2.244181722609912, "learning_rate": 1.3427838357921746e-06, "loss": 0.7434, "step": 61120 }, { "epoch": 0.7449453402069394, "grad_norm": 2.3235135549694528, "learning_rate": 1.3424631173829379e-06, "loss": 0.7624, "step": 61125 }, { "epoch": 0.7450062764310872, "grad_norm": 3.34607967056881, "learning_rate": 1.342142398973701e-06, "loss": 0.6951, "step": 61130 }, { "epoch": 0.745067212655235, "grad_norm": 2.130911452518784, "learning_rate": 1.3418216805644645e-06, "loss": 0.6889, "step": 61135 }, { "epoch": 0.7451281488793828, "grad_norm": 2.315363977851215, "learning_rate": 1.3415009621552278e-06, "loss": 0.7011, "step": 61140 }, { "epoch": 0.7451890851035307, "grad_norm": 2.6905383166809824, "learning_rate": 1.3411802437459912e-06, "loss": 0.742, "step": 61145 }, { "epoch": 0.7452500213276785, "grad_norm": 2.9413111008775963, "learning_rate": 1.3408595253367547e-06, "loss": 0.697, "step": 61150 }, { "epoch": 0.7453109575518263, "grad_norm": 2.4264678934717656, "learning_rate": 1.3405388069275177e-06, "loss": 0.6489, "step": 61155 }, { "epoch": 0.745371893775974, "grad_norm": 4.063022444489602, "learning_rate": 1.3402180885182811e-06, "loss": 0.7652, "step": 61160 }, { "epoch": 0.7454328300001218, "grad_norm": 2.5145010891176245, "learning_rate": 1.3398973701090443e-06, "loss": 0.7116, "step": 61165 }, { "epoch": 0.7454937662242697, "grad_norm": 2.6316663007783285, "learning_rate": 1.3395766516998076e-06, "loss": 0.7052, "step": 61170 }, { "epoch": 0.7455547024484175, "grad_norm": 2.2520797835771704, "learning_rate": 1.339255933290571e-06, "loss": 0.6679, "step": 61175 }, { "epoch": 0.7456156386725653, "grad_norm": 2.4981193570068014, "learning_rate": 1.3389352148813342e-06, "loss": 0.7243, "step": 61180 }, { "epoch": 0.7456765748967131, "grad_norm": 2.0366749922053495, "learning_rate": 1.3386144964720977e-06, "loss": 0.7305, "step": 61185 }, { "epoch": 0.745737511120861, "grad_norm": 1.9317251708027947, "learning_rate": 1.3382937780628611e-06, "loss": 0.755, "step": 61190 }, { "epoch": 0.7457984473450087, "grad_norm": 2.1164493969438016, "learning_rate": 1.3379730596536241e-06, "loss": 0.6874, "step": 61195 }, { "epoch": 0.7458593835691565, "grad_norm": 2.4805725982025524, "learning_rate": 1.3376523412443876e-06, "loss": 0.7778, "step": 61200 }, { "epoch": 0.7459203197933043, "grad_norm": 2.514841852026862, "learning_rate": 1.3373316228351508e-06, "loss": 0.7166, "step": 61205 }, { "epoch": 0.7459812560174521, "grad_norm": 2.5886211317519243, "learning_rate": 1.3370109044259142e-06, "loss": 0.7629, "step": 61210 }, { "epoch": 0.7460421922416, "grad_norm": 2.3220652200245437, "learning_rate": 1.3366901860166775e-06, "loss": 0.7024, "step": 61215 }, { "epoch": 0.7461031284657478, "grad_norm": 2.635094653403988, "learning_rate": 1.3363694676074407e-06, "loss": 0.7633, "step": 61220 }, { "epoch": 0.7461640646898956, "grad_norm": 1.92844690295024, "learning_rate": 1.3360487491982041e-06, "loss": 0.651, "step": 61225 }, { "epoch": 0.7462250009140433, "grad_norm": 2.383825181285419, "learning_rate": 1.3357280307889676e-06, "loss": 0.6587, "step": 61230 }, { "epoch": 0.7462859371381911, "grad_norm": 3.1028803292639315, "learning_rate": 1.3354073123797306e-06, "loss": 0.6996, "step": 61235 }, { "epoch": 0.746346873362339, "grad_norm": 2.4887688893182163, "learning_rate": 1.335086593970494e-06, "loss": 0.7652, "step": 61240 }, { "epoch": 0.7464078095864868, "grad_norm": 2.8164067312164858, "learning_rate": 1.3347658755612573e-06, "loss": 0.698, "step": 61245 }, { "epoch": 0.7464687458106346, "grad_norm": 2.5427493708830906, "learning_rate": 1.3344451571520207e-06, "loss": 0.6977, "step": 61250 }, { "epoch": 0.7465296820347824, "grad_norm": 1.9327302091618883, "learning_rate": 1.334124438742784e-06, "loss": 0.6948, "step": 61255 }, { "epoch": 0.7465906182589302, "grad_norm": 2.4727910075956236, "learning_rate": 1.3338037203335472e-06, "loss": 0.7442, "step": 61260 }, { "epoch": 0.746651554483078, "grad_norm": 2.2567192749296185, "learning_rate": 1.3334830019243106e-06, "loss": 0.7176, "step": 61265 }, { "epoch": 0.7467124907072258, "grad_norm": 2.4909825160297214, "learning_rate": 1.333162283515074e-06, "loss": 0.7693, "step": 61270 }, { "epoch": 0.7467734269313736, "grad_norm": 2.2632264454365325, "learning_rate": 1.332841565105837e-06, "loss": 0.7293, "step": 61275 }, { "epoch": 0.7468343631555214, "grad_norm": 2.303583729248536, "learning_rate": 1.3325208466966005e-06, "loss": 0.7461, "step": 61280 }, { "epoch": 0.7468952993796693, "grad_norm": 2.596273169708508, "learning_rate": 1.3322001282873637e-06, "loss": 0.7834, "step": 61285 }, { "epoch": 0.7469562356038171, "grad_norm": 2.5584630580635044, "learning_rate": 1.3318794098781272e-06, "loss": 0.7427, "step": 61290 }, { "epoch": 0.7470171718279649, "grad_norm": 2.145150100446654, "learning_rate": 1.3315586914688904e-06, "loss": 0.6901, "step": 61295 }, { "epoch": 0.7470781080521126, "grad_norm": 2.2153647256187443, "learning_rate": 1.3312379730596536e-06, "loss": 0.6967, "step": 61300 }, { "epoch": 0.7471390442762604, "grad_norm": 3.3863330329575945, "learning_rate": 1.330917254650417e-06, "loss": 0.6794, "step": 61305 }, { "epoch": 0.7471999805004083, "grad_norm": 2.0380873419867642, "learning_rate": 1.3305965362411805e-06, "loss": 0.7526, "step": 61310 }, { "epoch": 0.7472609167245561, "grad_norm": 2.5077240772316878, "learning_rate": 1.3302758178319435e-06, "loss": 0.8379, "step": 61315 }, { "epoch": 0.7473218529487039, "grad_norm": 2.3404990831887624, "learning_rate": 1.329955099422707e-06, "loss": 0.7172, "step": 61320 }, { "epoch": 0.7473827891728517, "grad_norm": 2.679612078800099, "learning_rate": 1.3296343810134702e-06, "loss": 0.7066, "step": 61325 }, { "epoch": 0.7474437253969995, "grad_norm": 4.430975851058749, "learning_rate": 1.3293136626042336e-06, "loss": 0.6822, "step": 61330 }, { "epoch": 0.7475046616211473, "grad_norm": 2.519104439140159, "learning_rate": 1.328992944194997e-06, "loss": 0.71, "step": 61335 }, { "epoch": 0.7475655978452951, "grad_norm": 2.3143648698803547, "learning_rate": 1.32867222578576e-06, "loss": 0.7461, "step": 61340 }, { "epoch": 0.7476265340694429, "grad_norm": 2.3918368296061687, "learning_rate": 1.3283515073765235e-06, "loss": 0.7493, "step": 61345 }, { "epoch": 0.7476874702935907, "grad_norm": 2.172068763963092, "learning_rate": 1.328030788967287e-06, "loss": 0.748, "step": 61350 }, { "epoch": 0.7477484065177386, "grad_norm": 2.777743982932784, "learning_rate": 1.32771007055805e-06, "loss": 0.7385, "step": 61355 }, { "epoch": 0.7478093427418864, "grad_norm": 2.999869198725781, "learning_rate": 1.3273893521488134e-06, "loss": 0.685, "step": 61360 }, { "epoch": 0.7478702789660342, "grad_norm": 2.659379540055145, "learning_rate": 1.3270686337395769e-06, "loss": 0.8199, "step": 61365 }, { "epoch": 0.7479312151901819, "grad_norm": 2.610621812727685, "learning_rate": 1.32674791533034e-06, "loss": 0.7082, "step": 61370 }, { "epoch": 0.7479921514143297, "grad_norm": 2.2726559262670754, "learning_rate": 1.3264271969211035e-06, "loss": 0.7242, "step": 61375 }, { "epoch": 0.7480530876384776, "grad_norm": 2.604296264126631, "learning_rate": 1.3261064785118666e-06, "loss": 0.7567, "step": 61380 }, { "epoch": 0.7481140238626254, "grad_norm": 2.127004880741312, "learning_rate": 1.32578576010263e-06, "loss": 0.6098, "step": 61385 }, { "epoch": 0.7481749600867732, "grad_norm": 3.5911767927635703, "learning_rate": 1.3254650416933934e-06, "loss": 0.7943, "step": 61390 }, { "epoch": 0.748235896310921, "grad_norm": 2.231860749567555, "learning_rate": 1.3251443232841565e-06, "loss": 0.6757, "step": 61395 }, { "epoch": 0.7482968325350688, "grad_norm": 2.54725448436787, "learning_rate": 1.3248236048749199e-06, "loss": 0.7372, "step": 61400 }, { "epoch": 0.7483577687592166, "grad_norm": 2.513206661850873, "learning_rate": 1.3245028864656833e-06, "loss": 0.7306, "step": 61405 }, { "epoch": 0.7484187049833644, "grad_norm": 2.6664215296907687, "learning_rate": 1.3241821680564466e-06, "loss": 0.6716, "step": 61410 }, { "epoch": 0.7484796412075122, "grad_norm": 2.790532804689896, "learning_rate": 1.32386144964721e-06, "loss": 0.7431, "step": 61415 }, { "epoch": 0.74854057743166, "grad_norm": 2.4149786680609076, "learning_rate": 1.323540731237973e-06, "loss": 0.7735, "step": 61420 }, { "epoch": 0.7486015136558078, "grad_norm": 2.3151550720817267, "learning_rate": 1.3232200128287365e-06, "loss": 0.8025, "step": 61425 }, { "epoch": 0.7486624498799557, "grad_norm": 2.657283106214289, "learning_rate": 1.3228992944195e-06, "loss": 0.7578, "step": 61430 }, { "epoch": 0.7487233861041035, "grad_norm": 2.3595557446500233, "learning_rate": 1.322578576010263e-06, "loss": 0.707, "step": 61435 }, { "epoch": 0.7487843223282512, "grad_norm": 2.758936379309383, "learning_rate": 1.3222578576010264e-06, "loss": 0.7213, "step": 61440 }, { "epoch": 0.748845258552399, "grad_norm": 2.636355572214012, "learning_rate": 1.3219371391917898e-06, "loss": 0.786, "step": 61445 }, { "epoch": 0.7489061947765469, "grad_norm": 2.26996346397113, "learning_rate": 1.321616420782553e-06, "loss": 0.7715, "step": 61450 }, { "epoch": 0.7489671310006947, "grad_norm": 3.0937687897875845, "learning_rate": 1.3212957023733165e-06, "loss": 0.6821, "step": 61455 }, { "epoch": 0.7490280672248425, "grad_norm": 2.2618518757371984, "learning_rate": 1.3209749839640795e-06, "loss": 0.7329, "step": 61460 }, { "epoch": 0.7490890034489903, "grad_norm": 2.6252933695294502, "learning_rate": 1.320654265554843e-06, "loss": 0.6736, "step": 61465 }, { "epoch": 0.7491499396731381, "grad_norm": 2.602335984094472, "learning_rate": 1.3203335471456064e-06, "loss": 0.6568, "step": 61470 }, { "epoch": 0.7492108758972859, "grad_norm": 2.3959723231193055, "learning_rate": 1.3200128287363696e-06, "loss": 0.7516, "step": 61475 }, { "epoch": 0.7492718121214337, "grad_norm": 2.9703228735941667, "learning_rate": 1.3196921103271328e-06, "loss": 0.6978, "step": 61480 }, { "epoch": 0.7493327483455815, "grad_norm": 2.809628858526891, "learning_rate": 1.3193713919178963e-06, "loss": 0.7474, "step": 61485 }, { "epoch": 0.7493936845697293, "grad_norm": 2.2693154997853746, "learning_rate": 1.3190506735086595e-06, "loss": 0.6976, "step": 61490 }, { "epoch": 0.7494546207938771, "grad_norm": 2.3632924261659647, "learning_rate": 1.318729955099423e-06, "loss": 0.6139, "step": 61495 }, { "epoch": 0.749515557018025, "grad_norm": 2.6558922430134344, "learning_rate": 1.318409236690186e-06, "loss": 0.7433, "step": 61500 }, { "epoch": 0.7495764932421728, "grad_norm": 2.4730465855751156, "learning_rate": 1.3180885182809494e-06, "loss": 0.7248, "step": 61505 }, { "epoch": 0.7496374294663205, "grad_norm": 3.4350724604102436, "learning_rate": 1.3177677998717128e-06, "loss": 0.7034, "step": 61510 }, { "epoch": 0.7496983656904683, "grad_norm": 2.988987140800498, "learning_rate": 1.317447081462476e-06, "loss": 0.7684, "step": 61515 }, { "epoch": 0.7497593019146161, "grad_norm": 2.186397906510244, "learning_rate": 1.3171263630532393e-06, "loss": 0.6821, "step": 61520 }, { "epoch": 0.749820238138764, "grad_norm": 2.3993472960900335, "learning_rate": 1.3168056446440027e-06, "loss": 0.7369, "step": 61525 }, { "epoch": 0.7498811743629118, "grad_norm": 2.814385149142544, "learning_rate": 1.316484926234766e-06, "loss": 0.7083, "step": 61530 }, { "epoch": 0.7499421105870596, "grad_norm": 2.735685776763372, "learning_rate": 1.3161642078255294e-06, "loss": 0.7997, "step": 61535 }, { "epoch": 0.7500030468112074, "grad_norm": 2.602199571790517, "learning_rate": 1.3158434894162924e-06, "loss": 0.7695, "step": 61540 }, { "epoch": 0.7500639830353552, "grad_norm": 2.6679451907347365, "learning_rate": 1.3155227710070558e-06, "loss": 0.6948, "step": 61545 }, { "epoch": 0.750124919259503, "grad_norm": 2.3951774495468454, "learning_rate": 1.3152020525978193e-06, "loss": 0.7343, "step": 61550 }, { "epoch": 0.7501858554836508, "grad_norm": 3.6972024512142023, "learning_rate": 1.3148813341885825e-06, "loss": 0.6889, "step": 61555 }, { "epoch": 0.7502467917077986, "grad_norm": 2.1710680228074746, "learning_rate": 1.3145606157793457e-06, "loss": 0.6651, "step": 61560 }, { "epoch": 0.7503077279319464, "grad_norm": 2.803169301449647, "learning_rate": 1.3142398973701092e-06, "loss": 0.7206, "step": 61565 }, { "epoch": 0.7503686641560943, "grad_norm": 2.3454787223815625, "learning_rate": 1.3139191789608724e-06, "loss": 0.7724, "step": 61570 }, { "epoch": 0.7504296003802421, "grad_norm": 2.8526611190449946, "learning_rate": 1.3135984605516359e-06, "loss": 0.7193, "step": 61575 }, { "epoch": 0.7504905366043898, "grad_norm": 3.318851631985172, "learning_rate": 1.3132777421423989e-06, "loss": 0.6943, "step": 61580 }, { "epoch": 0.7505514728285376, "grad_norm": 2.5943572299899404, "learning_rate": 1.3129570237331623e-06, "loss": 0.6834, "step": 61585 }, { "epoch": 0.7506124090526854, "grad_norm": 2.1726224503472786, "learning_rate": 1.3126363053239258e-06, "loss": 0.7363, "step": 61590 }, { "epoch": 0.7506733452768333, "grad_norm": 2.174179449627697, "learning_rate": 1.312315586914689e-06, "loss": 0.7551, "step": 61595 }, { "epoch": 0.7507342815009811, "grad_norm": 2.641600392611376, "learning_rate": 1.3119948685054524e-06, "loss": 0.8115, "step": 61600 }, { "epoch": 0.7507952177251289, "grad_norm": 1.8964785125646217, "learning_rate": 1.3116741500962156e-06, "loss": 0.8106, "step": 61605 }, { "epoch": 0.7508561539492767, "grad_norm": 4.817550834073835, "learning_rate": 1.3113534316869789e-06, "loss": 0.7844, "step": 61610 }, { "epoch": 0.7509170901734245, "grad_norm": 2.367505591821651, "learning_rate": 1.3110327132777423e-06, "loss": 0.7147, "step": 61615 }, { "epoch": 0.7509780263975723, "grad_norm": 2.5787207784690174, "learning_rate": 1.3107119948685053e-06, "loss": 0.7233, "step": 61620 }, { "epoch": 0.7510389626217201, "grad_norm": 5.138500056900199, "learning_rate": 1.3103912764592688e-06, "loss": 0.7192, "step": 61625 }, { "epoch": 0.7510998988458679, "grad_norm": 2.8454561683345707, "learning_rate": 1.3100705580500322e-06, "loss": 0.7158, "step": 61630 }, { "epoch": 0.7511608350700157, "grad_norm": 2.337517619634215, "learning_rate": 1.3097498396407954e-06, "loss": 0.677, "step": 61635 }, { "epoch": 0.7512217712941636, "grad_norm": 2.6375443407881543, "learning_rate": 1.3094291212315589e-06, "loss": 0.7213, "step": 61640 }, { "epoch": 0.7512827075183114, "grad_norm": 2.175627092133953, "learning_rate": 1.3091084028223221e-06, "loss": 0.6579, "step": 61645 }, { "epoch": 0.7513436437424591, "grad_norm": 3.2062116284376097, "learning_rate": 1.3087876844130853e-06, "loss": 0.6431, "step": 61650 }, { "epoch": 0.7514045799666069, "grad_norm": 2.6931095928498543, "learning_rate": 1.3084669660038488e-06, "loss": 0.7229, "step": 61655 }, { "epoch": 0.7514655161907547, "grad_norm": 2.077820292926999, "learning_rate": 1.3081462475946122e-06, "loss": 0.7536, "step": 61660 }, { "epoch": 0.7515264524149026, "grad_norm": 2.3345051810405435, "learning_rate": 1.3078255291853752e-06, "loss": 0.6935, "step": 61665 }, { "epoch": 0.7515873886390504, "grad_norm": 2.4779631242271294, "learning_rate": 1.3075048107761387e-06, "loss": 0.8133, "step": 61670 }, { "epoch": 0.7516483248631982, "grad_norm": 2.494433673022551, "learning_rate": 1.307184092366902e-06, "loss": 0.7562, "step": 61675 }, { "epoch": 0.751709261087346, "grad_norm": 2.692191096885574, "learning_rate": 1.3068633739576653e-06, "loss": 0.7146, "step": 61680 }, { "epoch": 0.7517701973114937, "grad_norm": 2.4649977383470487, "learning_rate": 1.3065426555484288e-06, "loss": 0.6547, "step": 61685 }, { "epoch": 0.7518311335356416, "grad_norm": 2.2313420284946806, "learning_rate": 1.3062219371391918e-06, "loss": 0.7813, "step": 61690 }, { "epoch": 0.7518920697597894, "grad_norm": 2.106275469021428, "learning_rate": 1.3059012187299552e-06, "loss": 0.762, "step": 61695 }, { "epoch": 0.7519530059839372, "grad_norm": 2.86315132169853, "learning_rate": 1.3055805003207187e-06, "loss": 0.7411, "step": 61700 }, { "epoch": 0.752013942208085, "grad_norm": 3.176059039311486, "learning_rate": 1.3052597819114817e-06, "loss": 0.7719, "step": 61705 }, { "epoch": 0.7520748784322329, "grad_norm": 2.2325856221664933, "learning_rate": 1.3049390635022451e-06, "loss": 0.6654, "step": 61710 }, { "epoch": 0.7521358146563807, "grad_norm": 4.4034324511847265, "learning_rate": 1.3046183450930084e-06, "loss": 0.7171, "step": 61715 }, { "epoch": 0.7521967508805284, "grad_norm": 4.54034324830159, "learning_rate": 1.3042976266837718e-06, "loss": 0.6646, "step": 61720 }, { "epoch": 0.7522576871046762, "grad_norm": 4.502118131254202, "learning_rate": 1.3039769082745353e-06, "loss": 0.7147, "step": 61725 }, { "epoch": 0.752318623328824, "grad_norm": 2.0051075157294265, "learning_rate": 1.3036561898652983e-06, "loss": 0.7422, "step": 61730 }, { "epoch": 0.7523795595529719, "grad_norm": 2.42124157033135, "learning_rate": 1.3033354714560617e-06, "loss": 0.6495, "step": 61735 }, { "epoch": 0.7524404957771197, "grad_norm": 2.8072562060656208, "learning_rate": 1.3030147530468251e-06, "loss": 0.8086, "step": 61740 }, { "epoch": 0.7525014320012675, "grad_norm": 1.9659844037788794, "learning_rate": 1.3026940346375882e-06, "loss": 0.6965, "step": 61745 }, { "epoch": 0.7525623682254153, "grad_norm": 3.292932462228801, "learning_rate": 1.3023733162283516e-06, "loss": 0.7613, "step": 61750 }, { "epoch": 0.752623304449563, "grad_norm": 2.1518560517992955, "learning_rate": 1.3020525978191148e-06, "loss": 0.6953, "step": 61755 }, { "epoch": 0.7526842406737109, "grad_norm": 3.7993688722283885, "learning_rate": 1.3017318794098783e-06, "loss": 0.7249, "step": 61760 }, { "epoch": 0.7527451768978587, "grad_norm": 2.67652618737315, "learning_rate": 1.3014111610006417e-06, "loss": 0.7328, "step": 61765 }, { "epoch": 0.7528061131220065, "grad_norm": 2.315445752778649, "learning_rate": 1.3010904425914047e-06, "loss": 0.7157, "step": 61770 }, { "epoch": 0.7528670493461543, "grad_norm": 2.9260559239601553, "learning_rate": 1.3007697241821682e-06, "loss": 0.7483, "step": 61775 }, { "epoch": 0.7529279855703022, "grad_norm": 3.0609760645781723, "learning_rate": 1.3004490057729316e-06, "loss": 0.7463, "step": 61780 }, { "epoch": 0.75298892179445, "grad_norm": 2.5074069029609065, "learning_rate": 1.3001282873636946e-06, "loss": 0.7177, "step": 61785 }, { "epoch": 0.7530498580185977, "grad_norm": 2.845921900399959, "learning_rate": 1.299807568954458e-06, "loss": 0.7531, "step": 61790 }, { "epoch": 0.7531107942427455, "grad_norm": 2.1604354040481555, "learning_rate": 1.2994868505452213e-06, "loss": 0.7686, "step": 61795 }, { "epoch": 0.7531717304668933, "grad_norm": 1.8709425914522293, "learning_rate": 1.2991661321359847e-06, "loss": 0.6546, "step": 61800 }, { "epoch": 0.7532326666910412, "grad_norm": 2.288451850853506, "learning_rate": 1.2988454137267482e-06, "loss": 0.7856, "step": 61805 }, { "epoch": 0.753293602915189, "grad_norm": 3.0656656381864296, "learning_rate": 1.2985246953175112e-06, "loss": 0.7287, "step": 61810 }, { "epoch": 0.7533545391393368, "grad_norm": 2.227270844412379, "learning_rate": 1.2982039769082746e-06, "loss": 0.716, "step": 61815 }, { "epoch": 0.7534154753634846, "grad_norm": 3.1503557766539947, "learning_rate": 1.297883258499038e-06, "loss": 0.669, "step": 61820 }, { "epoch": 0.7534764115876323, "grad_norm": 2.3685481289200085, "learning_rate": 1.2975625400898013e-06, "loss": 0.7452, "step": 61825 }, { "epoch": 0.7535373478117802, "grad_norm": 2.6461597555445824, "learning_rate": 1.2972418216805645e-06, "loss": 0.7581, "step": 61830 }, { "epoch": 0.753598284035928, "grad_norm": 3.1968240206106815, "learning_rate": 1.2969211032713278e-06, "loss": 0.6495, "step": 61835 }, { "epoch": 0.7536592202600758, "grad_norm": 2.4791188337120786, "learning_rate": 1.2966003848620912e-06, "loss": 0.6685, "step": 61840 }, { "epoch": 0.7537201564842236, "grad_norm": 2.481408238983216, "learning_rate": 1.2962796664528546e-06, "loss": 0.8271, "step": 61845 }, { "epoch": 0.7537810927083715, "grad_norm": 2.472853105953824, "learning_rate": 1.2959589480436177e-06, "loss": 0.7405, "step": 61850 }, { "epoch": 0.7538420289325192, "grad_norm": 2.359518763539414, "learning_rate": 1.295638229634381e-06, "loss": 0.7575, "step": 61855 }, { "epoch": 0.753902965156667, "grad_norm": 2.953309280360487, "learning_rate": 1.2953175112251445e-06, "loss": 0.7996, "step": 61860 }, { "epoch": 0.7539639013808148, "grad_norm": 2.3524371264256083, "learning_rate": 1.2949967928159078e-06, "loss": 0.6821, "step": 61865 }, { "epoch": 0.7540248376049626, "grad_norm": 2.2936993905118865, "learning_rate": 1.294676074406671e-06, "loss": 0.7506, "step": 61870 }, { "epoch": 0.7540857738291105, "grad_norm": 2.3580551167697115, "learning_rate": 1.2943553559974342e-06, "loss": 0.7138, "step": 61875 }, { "epoch": 0.7541467100532583, "grad_norm": 2.29761751308013, "learning_rate": 1.2940346375881977e-06, "loss": 0.6899, "step": 61880 }, { "epoch": 0.7542076462774061, "grad_norm": 2.6768835079168793, "learning_rate": 1.293713919178961e-06, "loss": 0.6636, "step": 61885 }, { "epoch": 0.7542685825015538, "grad_norm": 2.912993853329914, "learning_rate": 1.2933932007697241e-06, "loss": 0.6863, "step": 61890 }, { "epoch": 0.7543295187257016, "grad_norm": 2.503426144199369, "learning_rate": 1.2930724823604876e-06, "loss": 0.651, "step": 61895 }, { "epoch": 0.7543904549498495, "grad_norm": 1.8128787491364977, "learning_rate": 1.292751763951251e-06, "loss": 0.7051, "step": 61900 }, { "epoch": 0.7544513911739973, "grad_norm": 2.798654843217766, "learning_rate": 1.2924310455420142e-06, "loss": 0.8135, "step": 61905 }, { "epoch": 0.7545123273981451, "grad_norm": 2.378238985027714, "learning_rate": 1.2921103271327775e-06, "loss": 0.7082, "step": 61910 }, { "epoch": 0.7545732636222929, "grad_norm": 2.25587120761751, "learning_rate": 1.2917896087235407e-06, "loss": 0.6472, "step": 61915 }, { "epoch": 0.7546341998464408, "grad_norm": 3.021103681995277, "learning_rate": 1.2914688903143041e-06, "loss": 0.6536, "step": 61920 }, { "epoch": 0.7546951360705885, "grad_norm": 2.601703111900209, "learning_rate": 1.2911481719050676e-06, "loss": 0.7641, "step": 61925 }, { "epoch": 0.7547560722947363, "grad_norm": 2.391560021994367, "learning_rate": 1.2908274534958306e-06, "loss": 0.7067, "step": 61930 }, { "epoch": 0.7548170085188841, "grad_norm": 3.6409377714182263, "learning_rate": 1.290506735086594e-06, "loss": 0.7545, "step": 61935 }, { "epoch": 0.7548779447430319, "grad_norm": 2.6271512401158166, "learning_rate": 1.2901860166773575e-06, "loss": 0.6823, "step": 61940 }, { "epoch": 0.7549388809671798, "grad_norm": 2.32610021908982, "learning_rate": 1.2898652982681207e-06, "loss": 0.7187, "step": 61945 }, { "epoch": 0.7549998171913276, "grad_norm": 2.9942238147685423, "learning_rate": 1.2895445798588841e-06, "loss": 0.6827, "step": 61950 }, { "epoch": 0.7550607534154754, "grad_norm": 2.5143821340928083, "learning_rate": 1.2892238614496474e-06, "loss": 0.7074, "step": 61955 }, { "epoch": 0.7551216896396231, "grad_norm": 2.5511693301496723, "learning_rate": 1.2889031430404106e-06, "loss": 0.7452, "step": 61960 }, { "epoch": 0.7551826258637709, "grad_norm": 2.2311544856623997, "learning_rate": 1.288582424631174e-06, "loss": 0.7355, "step": 61965 }, { "epoch": 0.7552435620879188, "grad_norm": 2.313524270376501, "learning_rate": 1.288261706221937e-06, "loss": 0.7057, "step": 61970 }, { "epoch": 0.7553044983120666, "grad_norm": 2.1550949189500366, "learning_rate": 1.2879409878127005e-06, "loss": 0.6802, "step": 61975 }, { "epoch": 0.7553654345362144, "grad_norm": 2.9042387640670273, "learning_rate": 1.287620269403464e-06, "loss": 0.7921, "step": 61980 }, { "epoch": 0.7554263707603622, "grad_norm": 2.4134303914677253, "learning_rate": 1.2872995509942272e-06, "loss": 0.7822, "step": 61985 }, { "epoch": 0.75548730698451, "grad_norm": 2.6125214855041126, "learning_rate": 1.2869788325849906e-06, "loss": 0.7469, "step": 61990 }, { "epoch": 0.7555482432086578, "grad_norm": 2.2701271422430622, "learning_rate": 1.2866581141757538e-06, "loss": 0.7616, "step": 61995 }, { "epoch": 0.7556091794328056, "grad_norm": 2.4556639500710093, "learning_rate": 1.286337395766517e-06, "loss": 0.7398, "step": 62000 }, { "epoch": 0.7556701156569534, "grad_norm": 2.302445205679534, "learning_rate": 1.2860166773572805e-06, "loss": 0.6549, "step": 62005 }, { "epoch": 0.7557310518811012, "grad_norm": 2.2211418915848737, "learning_rate": 1.2856959589480435e-06, "loss": 0.6341, "step": 62010 }, { "epoch": 0.7557919881052491, "grad_norm": 2.6028952669049463, "learning_rate": 1.285375240538807e-06, "loss": 0.688, "step": 62015 }, { "epoch": 0.7558529243293969, "grad_norm": 2.1886435036635525, "learning_rate": 1.2850545221295704e-06, "loss": 0.6544, "step": 62020 }, { "epoch": 0.7559138605535447, "grad_norm": 2.3966854000176676, "learning_rate": 1.2847338037203336e-06, "loss": 0.7774, "step": 62025 }, { "epoch": 0.7559747967776924, "grad_norm": 3.1590224187637936, "learning_rate": 1.284413085311097e-06, "loss": 0.7448, "step": 62030 }, { "epoch": 0.7560357330018402, "grad_norm": 2.8897066087934253, "learning_rate": 1.2840923669018605e-06, "loss": 0.7174, "step": 62035 }, { "epoch": 0.7560966692259881, "grad_norm": 2.1620403227597427, "learning_rate": 1.2837716484926235e-06, "loss": 0.6695, "step": 62040 }, { "epoch": 0.7561576054501359, "grad_norm": 2.6132551878990093, "learning_rate": 1.283450930083387e-06, "loss": 0.7317, "step": 62045 }, { "epoch": 0.7562185416742837, "grad_norm": 2.5582643031962293, "learning_rate": 1.2831302116741502e-06, "loss": 0.7843, "step": 62050 }, { "epoch": 0.7562794778984315, "grad_norm": 2.7228618761807697, "learning_rate": 1.2828094932649134e-06, "loss": 0.7119, "step": 62055 }, { "epoch": 0.7563404141225794, "grad_norm": 2.829773938577751, "learning_rate": 1.2824887748556769e-06, "loss": 0.7783, "step": 62060 }, { "epoch": 0.7564013503467271, "grad_norm": 2.1704888488721488, "learning_rate": 1.28216805644644e-06, "loss": 0.7603, "step": 62065 }, { "epoch": 0.7564622865708749, "grad_norm": 2.7350481179645048, "learning_rate": 1.2818473380372035e-06, "loss": 0.7451, "step": 62070 }, { "epoch": 0.7565232227950227, "grad_norm": 2.7685752618917077, "learning_rate": 1.281526619627967e-06, "loss": 0.7415, "step": 62075 }, { "epoch": 0.7565841590191705, "grad_norm": 3.6273582768131103, "learning_rate": 1.28120590121873e-06, "loss": 0.8333, "step": 62080 }, { "epoch": 0.7566450952433184, "grad_norm": 2.4699093150317206, "learning_rate": 1.2808851828094934e-06, "loss": 0.7332, "step": 62085 }, { "epoch": 0.7567060314674662, "grad_norm": 3.110760007542088, "learning_rate": 1.2805644644002567e-06, "loss": 0.7236, "step": 62090 }, { "epoch": 0.756766967691614, "grad_norm": 3.5551845799965203, "learning_rate": 1.2802437459910199e-06, "loss": 0.7127, "step": 62095 }, { "epoch": 0.7568279039157617, "grad_norm": 2.45070007743504, "learning_rate": 1.2799230275817833e-06, "loss": 0.7488, "step": 62100 }, { "epoch": 0.7568888401399095, "grad_norm": 2.7176558939793063, "learning_rate": 1.2796023091725465e-06, "loss": 0.7349, "step": 62105 }, { "epoch": 0.7569497763640574, "grad_norm": 2.6808458492297427, "learning_rate": 1.27928159076331e-06, "loss": 0.8028, "step": 62110 }, { "epoch": 0.7570107125882052, "grad_norm": 2.6337880591635736, "learning_rate": 1.2789608723540734e-06, "loss": 0.7296, "step": 62115 }, { "epoch": 0.757071648812353, "grad_norm": 2.060274512569235, "learning_rate": 1.2786401539448364e-06, "loss": 0.6966, "step": 62120 }, { "epoch": 0.7571325850365008, "grad_norm": 2.7136622416948177, "learning_rate": 1.2783194355355999e-06, "loss": 0.682, "step": 62125 }, { "epoch": 0.7571935212606486, "grad_norm": 2.6292663530761278, "learning_rate": 1.2779987171263631e-06, "loss": 0.6533, "step": 62130 }, { "epoch": 0.7572544574847964, "grad_norm": 2.4328930301989353, "learning_rate": 1.2776779987171263e-06, "loss": 0.8062, "step": 62135 }, { "epoch": 0.7573153937089442, "grad_norm": 2.692679343744625, "learning_rate": 1.2773572803078898e-06, "loss": 0.7446, "step": 62140 }, { "epoch": 0.757376329933092, "grad_norm": 2.118625838504026, "learning_rate": 1.277036561898653e-06, "loss": 0.6799, "step": 62145 }, { "epoch": 0.7574372661572398, "grad_norm": 2.2855271254488034, "learning_rate": 1.2767158434894165e-06, "loss": 0.6201, "step": 62150 }, { "epoch": 0.7574982023813877, "grad_norm": 2.4531078066046894, "learning_rate": 1.2763951250801799e-06, "loss": 0.7091, "step": 62155 }, { "epoch": 0.7575591386055355, "grad_norm": 2.6682761257331906, "learning_rate": 1.276074406670943e-06, "loss": 0.6875, "step": 62160 }, { "epoch": 0.7576200748296833, "grad_norm": 2.3570910024345024, "learning_rate": 1.2757536882617063e-06, "loss": 0.7103, "step": 62165 }, { "epoch": 0.757681011053831, "grad_norm": 3.440883922112045, "learning_rate": 1.2754329698524696e-06, "loss": 0.8234, "step": 62170 }, { "epoch": 0.7577419472779788, "grad_norm": 2.60525316332045, "learning_rate": 1.275112251443233e-06, "loss": 0.7197, "step": 62175 }, { "epoch": 0.7578028835021267, "grad_norm": 2.5198132495613255, "learning_rate": 1.2747915330339962e-06, "loss": 0.6887, "step": 62180 }, { "epoch": 0.7578638197262745, "grad_norm": 2.0856298014552626, "learning_rate": 1.2744708146247595e-06, "loss": 0.6752, "step": 62185 }, { "epoch": 0.7579247559504223, "grad_norm": 2.9904134700083946, "learning_rate": 1.274150096215523e-06, "loss": 0.7458, "step": 62190 }, { "epoch": 0.7579856921745701, "grad_norm": 2.384216350605218, "learning_rate": 1.2738293778062864e-06, "loss": 0.753, "step": 62195 }, { "epoch": 0.758046628398718, "grad_norm": 2.1789061677825106, "learning_rate": 1.2735086593970494e-06, "loss": 0.7722, "step": 62200 }, { "epoch": 0.7581075646228657, "grad_norm": 2.4558578724948696, "learning_rate": 1.2731879409878128e-06, "loss": 0.7002, "step": 62205 }, { "epoch": 0.7581685008470135, "grad_norm": 2.5459285944752743, "learning_rate": 1.272867222578576e-06, "loss": 0.6955, "step": 62210 }, { "epoch": 0.7582294370711613, "grad_norm": 2.324438890576255, "learning_rate": 1.2725465041693395e-06, "loss": 0.7972, "step": 62215 }, { "epoch": 0.7582903732953091, "grad_norm": 2.5377327132419243, "learning_rate": 1.2722257857601027e-06, "loss": 0.763, "step": 62220 }, { "epoch": 0.758351309519457, "grad_norm": 3.253786036345107, "learning_rate": 1.271905067350866e-06, "loss": 0.7869, "step": 62225 }, { "epoch": 0.7584122457436048, "grad_norm": 2.1404867236187006, "learning_rate": 1.2715843489416294e-06, "loss": 0.6988, "step": 62230 }, { "epoch": 0.7584731819677526, "grad_norm": 2.5892018658902467, "learning_rate": 1.2712636305323928e-06, "loss": 0.737, "step": 62235 }, { "epoch": 0.7585341181919003, "grad_norm": 2.426407289952113, "learning_rate": 1.2709429121231558e-06, "loss": 0.7062, "step": 62240 }, { "epoch": 0.7585950544160481, "grad_norm": 2.503545577041097, "learning_rate": 1.2706221937139193e-06, "loss": 0.7115, "step": 62245 }, { "epoch": 0.758655990640196, "grad_norm": 2.2413198540699586, "learning_rate": 1.2703014753046827e-06, "loss": 0.7124, "step": 62250 }, { "epoch": 0.7587169268643438, "grad_norm": 2.7505869412896993, "learning_rate": 1.269980756895446e-06, "loss": 0.6847, "step": 62255 }, { "epoch": 0.7587778630884916, "grad_norm": 2.1713891038507747, "learning_rate": 1.2696600384862092e-06, "loss": 0.8065, "step": 62260 }, { "epoch": 0.7588387993126394, "grad_norm": 2.8433609625452942, "learning_rate": 1.2693393200769724e-06, "loss": 0.6824, "step": 62265 }, { "epoch": 0.7588997355367872, "grad_norm": 2.327707682267709, "learning_rate": 1.2690186016677358e-06, "loss": 0.7805, "step": 62270 }, { "epoch": 0.758960671760935, "grad_norm": 2.6188113309900682, "learning_rate": 1.2686978832584993e-06, "loss": 0.7718, "step": 62275 }, { "epoch": 0.7590216079850828, "grad_norm": 2.511697761408058, "learning_rate": 1.2683771648492623e-06, "loss": 0.7486, "step": 62280 }, { "epoch": 0.7590825442092306, "grad_norm": 2.630432613974141, "learning_rate": 1.2680564464400257e-06, "loss": 0.6969, "step": 62285 }, { "epoch": 0.7591434804333784, "grad_norm": 2.4483010788023973, "learning_rate": 1.2677357280307892e-06, "loss": 0.6482, "step": 62290 }, { "epoch": 0.7592044166575262, "grad_norm": 2.3947945503817585, "learning_rate": 1.2674150096215524e-06, "loss": 0.6865, "step": 62295 }, { "epoch": 0.7592653528816741, "grad_norm": 3.2008239518717017, "learning_rate": 1.2670942912123158e-06, "loss": 0.7437, "step": 62300 }, { "epoch": 0.7593262891058219, "grad_norm": 2.018475061125003, "learning_rate": 1.2667735728030789e-06, "loss": 0.7441, "step": 62305 }, { "epoch": 0.7593872253299696, "grad_norm": 2.5579809881314, "learning_rate": 1.2664528543938423e-06, "loss": 0.7036, "step": 62310 }, { "epoch": 0.7594481615541174, "grad_norm": 2.3540042855234318, "learning_rate": 1.2661321359846057e-06, "loss": 0.7958, "step": 62315 }, { "epoch": 0.7595090977782653, "grad_norm": 3.836235879168877, "learning_rate": 1.2658114175753688e-06, "loss": 0.6547, "step": 62320 }, { "epoch": 0.7595700340024131, "grad_norm": 2.25779522800436, "learning_rate": 1.2654906991661322e-06, "loss": 0.7235, "step": 62325 }, { "epoch": 0.7596309702265609, "grad_norm": 2.6651473809702946, "learning_rate": 1.2651699807568956e-06, "loss": 0.7687, "step": 62330 }, { "epoch": 0.7596919064507087, "grad_norm": 2.306177512155098, "learning_rate": 1.2648492623476589e-06, "loss": 0.7375, "step": 62335 }, { "epoch": 0.7597528426748565, "grad_norm": 2.3238388269171493, "learning_rate": 1.2645285439384223e-06, "loss": 0.6606, "step": 62340 }, { "epoch": 0.7598137788990043, "grad_norm": 2.8419166723304228, "learning_rate": 1.2642078255291853e-06, "loss": 0.7065, "step": 62345 }, { "epoch": 0.7598747151231521, "grad_norm": 2.6236585361365448, "learning_rate": 1.2638871071199488e-06, "loss": 0.7455, "step": 62350 }, { "epoch": 0.7599356513472999, "grad_norm": 2.770935140092045, "learning_rate": 1.2635663887107122e-06, "loss": 0.6893, "step": 62355 }, { "epoch": 0.7599965875714477, "grad_norm": 2.316033668386037, "learning_rate": 1.2632456703014752e-06, "loss": 0.7454, "step": 62360 }, { "epoch": 0.7600575237955955, "grad_norm": 2.1129638412144005, "learning_rate": 1.2629249518922387e-06, "loss": 0.7797, "step": 62365 }, { "epoch": 0.7601184600197434, "grad_norm": 2.8044545299130075, "learning_rate": 1.2626042334830021e-06, "loss": 0.7153, "step": 62370 }, { "epoch": 0.7601793962438912, "grad_norm": 2.2819729171384586, "learning_rate": 1.2622835150737653e-06, "loss": 0.6595, "step": 62375 }, { "epoch": 0.7602403324680389, "grad_norm": 2.2685714094727762, "learning_rate": 1.2619627966645288e-06, "loss": 0.7088, "step": 62380 }, { "epoch": 0.7603012686921867, "grad_norm": 2.421985119216476, "learning_rate": 1.2616420782552918e-06, "loss": 0.7592, "step": 62385 }, { "epoch": 0.7603622049163345, "grad_norm": 2.869931091796745, "learning_rate": 1.2613213598460552e-06, "loss": 0.7261, "step": 62390 }, { "epoch": 0.7604231411404824, "grad_norm": 2.192555483967302, "learning_rate": 1.2610006414368187e-06, "loss": 0.6548, "step": 62395 }, { "epoch": 0.7604840773646302, "grad_norm": 3.061542059460092, "learning_rate": 1.260679923027582e-06, "loss": 0.7865, "step": 62400 }, { "epoch": 0.760545013588778, "grad_norm": 2.923985621345745, "learning_rate": 1.2603592046183451e-06, "loss": 0.6867, "step": 62405 }, { "epoch": 0.7606059498129258, "grad_norm": 2.834129106093926, "learning_rate": 1.2600384862091086e-06, "loss": 0.6376, "step": 62410 }, { "epoch": 0.7606668860370736, "grad_norm": 2.2463927580454732, "learning_rate": 1.2597177677998718e-06, "loss": 0.7471, "step": 62415 }, { "epoch": 0.7607278222612214, "grad_norm": 2.6616038112720104, "learning_rate": 1.2593970493906352e-06, "loss": 0.7689, "step": 62420 }, { "epoch": 0.7607887584853692, "grad_norm": 2.789727136934145, "learning_rate": 1.2590763309813983e-06, "loss": 0.7594, "step": 62425 }, { "epoch": 0.760849694709517, "grad_norm": 2.4371976172929304, "learning_rate": 1.2587556125721617e-06, "loss": 0.7389, "step": 62430 }, { "epoch": 0.7609106309336648, "grad_norm": 2.517658131608643, "learning_rate": 1.2584348941629251e-06, "loss": 0.6744, "step": 62435 }, { "epoch": 0.7609715671578127, "grad_norm": 3.072018281401503, "learning_rate": 1.2581141757536884e-06, "loss": 0.7044, "step": 62440 }, { "epoch": 0.7610325033819605, "grad_norm": 2.7563839524133043, "learning_rate": 1.2577934573444516e-06, "loss": 0.7336, "step": 62445 }, { "epoch": 0.7610934396061082, "grad_norm": 2.726011679496643, "learning_rate": 1.257472738935215e-06, "loss": 0.7294, "step": 62450 }, { "epoch": 0.761154375830256, "grad_norm": 2.628110733652667, "learning_rate": 1.2571520205259783e-06, "loss": 0.6891, "step": 62455 }, { "epoch": 0.7612153120544038, "grad_norm": 2.24046730238184, "learning_rate": 1.2568313021167417e-06, "loss": 0.7303, "step": 62460 }, { "epoch": 0.7612762482785517, "grad_norm": 2.444580928537954, "learning_rate": 1.2565105837075047e-06, "loss": 0.7345, "step": 62465 }, { "epoch": 0.7613371845026995, "grad_norm": 1.808807542375854, "learning_rate": 1.2561898652982682e-06, "loss": 0.6908, "step": 62470 }, { "epoch": 0.7613981207268473, "grad_norm": 2.3934845027407996, "learning_rate": 1.2558691468890316e-06, "loss": 0.7137, "step": 62475 }, { "epoch": 0.7614590569509951, "grad_norm": 2.4931818406249615, "learning_rate": 1.2555484284797948e-06, "loss": 0.7352, "step": 62480 }, { "epoch": 0.7615199931751429, "grad_norm": 2.991028265786859, "learning_rate": 1.255227710070558e-06, "loss": 0.7024, "step": 62485 }, { "epoch": 0.7615809293992907, "grad_norm": 2.9228339621343595, "learning_rate": 1.2549069916613215e-06, "loss": 0.7156, "step": 62490 }, { "epoch": 0.7616418656234385, "grad_norm": 2.1702920651295736, "learning_rate": 1.2545862732520847e-06, "loss": 0.6932, "step": 62495 }, { "epoch": 0.7617028018475863, "grad_norm": 2.5718983114466654, "learning_rate": 1.2542655548428482e-06, "loss": 0.6831, "step": 62500 }, { "epoch": 0.7617637380717341, "grad_norm": 2.3413147236455063, "learning_rate": 1.2539448364336112e-06, "loss": 0.7271, "step": 62505 }, { "epoch": 0.761824674295882, "grad_norm": 2.503175212567611, "learning_rate": 1.2536241180243746e-06, "loss": 0.7585, "step": 62510 }, { "epoch": 0.7618856105200298, "grad_norm": 2.2826919846761653, "learning_rate": 1.253303399615138e-06, "loss": 0.6898, "step": 62515 }, { "epoch": 0.7619465467441775, "grad_norm": 2.32706819924545, "learning_rate": 1.2529826812059013e-06, "loss": 0.7156, "step": 62520 }, { "epoch": 0.7620074829683253, "grad_norm": 2.5784021028072104, "learning_rate": 1.2526619627966647e-06, "loss": 0.7376, "step": 62525 }, { "epoch": 0.7620684191924731, "grad_norm": 3.5126217126952244, "learning_rate": 1.252341244387428e-06, "loss": 0.7569, "step": 62530 }, { "epoch": 0.762129355416621, "grad_norm": 2.7755659507768686, "learning_rate": 1.2520205259781912e-06, "loss": 0.762, "step": 62535 }, { "epoch": 0.7621902916407688, "grad_norm": 2.30103379989962, "learning_rate": 1.2516998075689546e-06, "loss": 0.6947, "step": 62540 }, { "epoch": 0.7622512278649166, "grad_norm": 2.064318894792921, "learning_rate": 1.251379089159718e-06, "loss": 0.7914, "step": 62545 }, { "epoch": 0.7623121640890644, "grad_norm": 2.5353886617630375, "learning_rate": 1.251058370750481e-06, "loss": 0.705, "step": 62550 }, { "epoch": 0.7623731003132121, "grad_norm": 2.5381046985971065, "learning_rate": 1.2507376523412445e-06, "loss": 0.7238, "step": 62555 }, { "epoch": 0.76243403653736, "grad_norm": 2.1415528326346043, "learning_rate": 1.2504169339320078e-06, "loss": 0.678, "step": 62560 }, { "epoch": 0.7624949727615078, "grad_norm": 2.829354403859278, "learning_rate": 1.2500962155227712e-06, "loss": 0.8547, "step": 62565 }, { "epoch": 0.7625559089856556, "grad_norm": 2.152506014051376, "learning_rate": 1.2497754971135344e-06, "loss": 0.6772, "step": 62570 }, { "epoch": 0.7626168452098034, "grad_norm": 2.86096532068649, "learning_rate": 1.2494547787042979e-06, "loss": 0.7127, "step": 62575 }, { "epoch": 0.7626777814339513, "grad_norm": 2.8018138153805845, "learning_rate": 1.249134060295061e-06, "loss": 0.714, "step": 62580 }, { "epoch": 0.7627387176580991, "grad_norm": 2.304952626566469, "learning_rate": 1.2488133418858243e-06, "loss": 0.77, "step": 62585 }, { "epoch": 0.7627996538822468, "grad_norm": 2.675710109880836, "learning_rate": 1.2484926234765876e-06, "loss": 0.7638, "step": 62590 }, { "epoch": 0.7628605901063946, "grad_norm": 2.055763795977882, "learning_rate": 1.248171905067351e-06, "loss": 0.7135, "step": 62595 }, { "epoch": 0.7629215263305424, "grad_norm": 2.494369353309153, "learning_rate": 1.2478511866581142e-06, "loss": 0.6549, "step": 62600 }, { "epoch": 0.7629824625546903, "grad_norm": 3.002224053779353, "learning_rate": 1.2475304682488777e-06, "loss": 0.7365, "step": 62605 }, { "epoch": 0.7630433987788381, "grad_norm": 4.639397042897315, "learning_rate": 1.2472097498396409e-06, "loss": 0.6987, "step": 62610 }, { "epoch": 0.7631043350029859, "grad_norm": 2.3500668422825557, "learning_rate": 1.2468890314304043e-06, "loss": 0.7082, "step": 62615 }, { "epoch": 0.7631652712271337, "grad_norm": 2.4741809746675525, "learning_rate": 1.2465683130211676e-06, "loss": 0.6605, "step": 62620 }, { "epoch": 0.7632262074512814, "grad_norm": 2.4484684759295976, "learning_rate": 1.2462475946119308e-06, "loss": 0.7356, "step": 62625 }, { "epoch": 0.7632871436754293, "grad_norm": 3.0755870608174862, "learning_rate": 1.245926876202694e-06, "loss": 0.7304, "step": 62630 }, { "epoch": 0.7633480798995771, "grad_norm": 2.8079000884658596, "learning_rate": 1.2456061577934575e-06, "loss": 0.769, "step": 62635 }, { "epoch": 0.7634090161237249, "grad_norm": 2.400880029223427, "learning_rate": 1.2452854393842207e-06, "loss": 0.7566, "step": 62640 }, { "epoch": 0.7634699523478727, "grad_norm": 2.058514146661208, "learning_rate": 1.2449647209749841e-06, "loss": 0.7229, "step": 62645 }, { "epoch": 0.7635308885720206, "grad_norm": 2.716247715812676, "learning_rate": 1.2446440025657474e-06, "loss": 0.6721, "step": 62650 }, { "epoch": 0.7635918247961684, "grad_norm": 1.9859489942755606, "learning_rate": 1.2443232841565108e-06, "loss": 0.5994, "step": 62655 }, { "epoch": 0.7636527610203161, "grad_norm": 2.9415140654404297, "learning_rate": 1.244002565747274e-06, "loss": 0.7702, "step": 62660 }, { "epoch": 0.7637136972444639, "grad_norm": 1.9327604095503295, "learning_rate": 1.2436818473380373e-06, "loss": 0.7282, "step": 62665 }, { "epoch": 0.7637746334686117, "grad_norm": 2.3697974834417024, "learning_rate": 1.2433611289288005e-06, "loss": 0.644, "step": 62670 }, { "epoch": 0.7638355696927596, "grad_norm": 2.354929861891784, "learning_rate": 1.243040410519564e-06, "loss": 0.7651, "step": 62675 }, { "epoch": 0.7638965059169074, "grad_norm": 2.515438770427912, "learning_rate": 1.2427196921103271e-06, "loss": 0.7069, "step": 62680 }, { "epoch": 0.7639574421410552, "grad_norm": 2.502071363009405, "learning_rate": 1.2423989737010906e-06, "loss": 0.7908, "step": 62685 }, { "epoch": 0.764018378365203, "grad_norm": 3.019306231313722, "learning_rate": 1.2420782552918538e-06, "loss": 0.682, "step": 62690 }, { "epoch": 0.7640793145893507, "grad_norm": 1.9822576792552948, "learning_rate": 1.2417575368826173e-06, "loss": 0.701, "step": 62695 }, { "epoch": 0.7641402508134986, "grad_norm": 2.447556294480872, "learning_rate": 1.2414368184733805e-06, "loss": 0.7625, "step": 62700 }, { "epoch": 0.7642011870376464, "grad_norm": 2.393879458657644, "learning_rate": 1.2411161000641437e-06, "loss": 0.6742, "step": 62705 }, { "epoch": 0.7642621232617942, "grad_norm": 2.391425917967867, "learning_rate": 1.240795381654907e-06, "loss": 0.6968, "step": 62710 }, { "epoch": 0.764323059485942, "grad_norm": 3.207786238711789, "learning_rate": 1.2404746632456704e-06, "loss": 0.6921, "step": 62715 }, { "epoch": 0.7643839957100899, "grad_norm": 2.774920569486656, "learning_rate": 1.2401539448364338e-06, "loss": 0.707, "step": 62720 }, { "epoch": 0.7644449319342377, "grad_norm": 4.398513756340257, "learning_rate": 1.239833226427197e-06, "loss": 0.7302, "step": 62725 }, { "epoch": 0.7645058681583854, "grad_norm": 2.154667572987276, "learning_rate": 1.2395125080179603e-06, "loss": 0.7318, "step": 62730 }, { "epoch": 0.7645668043825332, "grad_norm": 2.5243938894910642, "learning_rate": 1.2391917896087237e-06, "loss": 0.7644, "step": 62735 }, { "epoch": 0.764627740606681, "grad_norm": 2.284237734824547, "learning_rate": 1.238871071199487e-06, "loss": 0.754, "step": 62740 }, { "epoch": 0.7646886768308289, "grad_norm": 2.7992950268409036, "learning_rate": 1.2385503527902502e-06, "loss": 0.7343, "step": 62745 }, { "epoch": 0.7647496130549767, "grad_norm": 2.951361478095452, "learning_rate": 1.2382296343810136e-06, "loss": 0.7183, "step": 62750 }, { "epoch": 0.7648105492791245, "grad_norm": 2.495381953357497, "learning_rate": 1.2379089159717768e-06, "loss": 0.6954, "step": 62755 }, { "epoch": 0.7648714855032723, "grad_norm": 2.538649496927516, "learning_rate": 1.2375881975625403e-06, "loss": 0.6945, "step": 62760 }, { "epoch": 0.76493242172742, "grad_norm": 2.5315684439070774, "learning_rate": 1.2372674791533035e-06, "loss": 0.7542, "step": 62765 }, { "epoch": 0.7649933579515679, "grad_norm": 2.5771501706546496, "learning_rate": 1.2369467607440667e-06, "loss": 0.6246, "step": 62770 }, { "epoch": 0.7650542941757157, "grad_norm": 2.6224154668849375, "learning_rate": 1.2366260423348302e-06, "loss": 0.7295, "step": 62775 }, { "epoch": 0.7651152303998635, "grad_norm": 2.5411862121347926, "learning_rate": 1.2363053239255934e-06, "loss": 0.7201, "step": 62780 }, { "epoch": 0.7651761666240113, "grad_norm": 2.2888652169956587, "learning_rate": 1.2359846055163566e-06, "loss": 0.7788, "step": 62785 }, { "epoch": 0.7652371028481592, "grad_norm": 2.840301544709531, "learning_rate": 1.23566388710712e-06, "loss": 0.7072, "step": 62790 }, { "epoch": 0.7652980390723069, "grad_norm": 4.2652383211015605, "learning_rate": 1.2353431686978833e-06, "loss": 0.7569, "step": 62795 }, { "epoch": 0.7653589752964547, "grad_norm": 2.497769368102169, "learning_rate": 1.2350224502886467e-06, "loss": 0.6954, "step": 62800 }, { "epoch": 0.7654199115206025, "grad_norm": 2.7550160420788403, "learning_rate": 1.23470173187941e-06, "loss": 0.716, "step": 62805 }, { "epoch": 0.7654808477447503, "grad_norm": 2.7617520722727655, "learning_rate": 1.2343810134701732e-06, "loss": 0.6832, "step": 62810 }, { "epoch": 0.7655417839688982, "grad_norm": 2.3880275947616925, "learning_rate": 1.2340602950609366e-06, "loss": 0.7171, "step": 62815 }, { "epoch": 0.765602720193046, "grad_norm": 2.626465431275185, "learning_rate": 1.2337395766516999e-06, "loss": 0.7692, "step": 62820 }, { "epoch": 0.7656636564171938, "grad_norm": 2.5151853955146333, "learning_rate": 1.233418858242463e-06, "loss": 0.635, "step": 62825 }, { "epoch": 0.7657245926413415, "grad_norm": 2.279663776224857, "learning_rate": 1.2330981398332265e-06, "loss": 0.6591, "step": 62830 }, { "epoch": 0.7657855288654893, "grad_norm": 2.3379940582441523, "learning_rate": 1.2327774214239898e-06, "loss": 0.7772, "step": 62835 }, { "epoch": 0.7658464650896372, "grad_norm": 2.9635926768980645, "learning_rate": 1.2324567030147532e-06, "loss": 0.7623, "step": 62840 }, { "epoch": 0.765907401313785, "grad_norm": 2.0131488058756526, "learning_rate": 1.2321359846055164e-06, "loss": 0.7433, "step": 62845 }, { "epoch": 0.7659683375379328, "grad_norm": 2.6985213894615745, "learning_rate": 1.2318152661962799e-06, "loss": 0.6901, "step": 62850 }, { "epoch": 0.7660292737620806, "grad_norm": 2.853299482201836, "learning_rate": 1.2314945477870431e-06, "loss": 0.7405, "step": 62855 }, { "epoch": 0.7660902099862285, "grad_norm": 2.743857756635091, "learning_rate": 1.2311738293778063e-06, "loss": 0.7194, "step": 62860 }, { "epoch": 0.7661511462103762, "grad_norm": 2.3767163511967606, "learning_rate": 1.2308531109685696e-06, "loss": 0.7535, "step": 62865 }, { "epoch": 0.766212082434524, "grad_norm": 2.7233089689069887, "learning_rate": 1.230532392559333e-06, "loss": 0.6615, "step": 62870 }, { "epoch": 0.7662730186586718, "grad_norm": 2.5952269113549904, "learning_rate": 1.2302116741500964e-06, "loss": 0.8012, "step": 62875 }, { "epoch": 0.7663339548828196, "grad_norm": 2.311389377949575, "learning_rate": 1.2298909557408597e-06, "loss": 0.6626, "step": 62880 }, { "epoch": 0.7663948911069675, "grad_norm": 2.9491646844542942, "learning_rate": 1.229570237331623e-06, "loss": 0.7318, "step": 62885 }, { "epoch": 0.7664558273311153, "grad_norm": 2.1533446674373526, "learning_rate": 1.2292495189223863e-06, "loss": 0.7395, "step": 62890 }, { "epoch": 0.7665167635552631, "grad_norm": 5.652306878994179, "learning_rate": 1.2289288005131496e-06, "loss": 0.6878, "step": 62895 }, { "epoch": 0.7665776997794108, "grad_norm": 2.2222020733238077, "learning_rate": 1.2286080821039128e-06, "loss": 0.6946, "step": 62900 }, { "epoch": 0.7666386360035586, "grad_norm": 3.022654272574028, "learning_rate": 1.228287363694676e-06, "loss": 0.7414, "step": 62905 }, { "epoch": 0.7666995722277065, "grad_norm": 2.2368646828793826, "learning_rate": 1.2279666452854395e-06, "loss": 0.7568, "step": 62910 }, { "epoch": 0.7667605084518543, "grad_norm": 2.2719396634898237, "learning_rate": 1.227645926876203e-06, "loss": 0.6953, "step": 62915 }, { "epoch": 0.7668214446760021, "grad_norm": 2.1520897910453187, "learning_rate": 1.2273252084669661e-06, "loss": 0.7086, "step": 62920 }, { "epoch": 0.7668823809001499, "grad_norm": 2.8849357248188783, "learning_rate": 1.2270044900577294e-06, "loss": 0.663, "step": 62925 }, { "epoch": 0.7669433171242978, "grad_norm": 3.2214437857268012, "learning_rate": 1.2266837716484928e-06, "loss": 0.7887, "step": 62930 }, { "epoch": 0.7670042533484455, "grad_norm": 2.301061210892112, "learning_rate": 1.226363053239256e-06, "loss": 0.7449, "step": 62935 }, { "epoch": 0.7670651895725933, "grad_norm": 2.2334793759098956, "learning_rate": 1.2260423348300193e-06, "loss": 0.6971, "step": 62940 }, { "epoch": 0.7671261257967411, "grad_norm": 3.38395863084685, "learning_rate": 1.2257216164207827e-06, "loss": 0.761, "step": 62945 }, { "epoch": 0.7671870620208889, "grad_norm": 2.4335686714579805, "learning_rate": 1.225400898011546e-06, "loss": 0.6425, "step": 62950 }, { "epoch": 0.7672479982450368, "grad_norm": 2.923227204935619, "learning_rate": 1.2250801796023094e-06, "loss": 0.8263, "step": 62955 }, { "epoch": 0.7673089344691846, "grad_norm": 2.536660312864129, "learning_rate": 1.2247594611930726e-06, "loss": 0.6824, "step": 62960 }, { "epoch": 0.7673698706933324, "grad_norm": 2.150038251693745, "learning_rate": 1.2244387427838358e-06, "loss": 0.7982, "step": 62965 }, { "epoch": 0.7674308069174801, "grad_norm": 2.2811181171701307, "learning_rate": 1.2241180243745993e-06, "loss": 0.7439, "step": 62970 }, { "epoch": 0.7674917431416279, "grad_norm": 2.8598464069993317, "learning_rate": 1.2237973059653625e-06, "loss": 0.7466, "step": 62975 }, { "epoch": 0.7675526793657758, "grad_norm": 3.0596284654134216, "learning_rate": 1.2234765875561257e-06, "loss": 0.6857, "step": 62980 }, { "epoch": 0.7676136155899236, "grad_norm": 2.512023651255015, "learning_rate": 1.2231558691468892e-06, "loss": 0.6663, "step": 62985 }, { "epoch": 0.7676745518140714, "grad_norm": 2.5504630647308018, "learning_rate": 1.2228351507376524e-06, "loss": 0.7244, "step": 62990 }, { "epoch": 0.7677354880382192, "grad_norm": 2.5954839589567014, "learning_rate": 1.2225144323284158e-06, "loss": 0.7346, "step": 62995 }, { "epoch": 0.767796424262367, "grad_norm": 2.9298179134476263, "learning_rate": 1.222193713919179e-06, "loss": 0.7319, "step": 63000 }, { "epoch": 0.7678573604865148, "grad_norm": 2.449080644858895, "learning_rate": 1.2218729955099423e-06, "loss": 0.7409, "step": 63005 }, { "epoch": 0.7679182967106626, "grad_norm": 2.8532760983761944, "learning_rate": 1.2215522771007057e-06, "loss": 0.7116, "step": 63010 }, { "epoch": 0.7679792329348104, "grad_norm": 2.141257502368498, "learning_rate": 1.221231558691469e-06, "loss": 0.6919, "step": 63015 }, { "epoch": 0.7680401691589582, "grad_norm": 2.1723034509501398, "learning_rate": 1.2209108402822322e-06, "loss": 0.7133, "step": 63020 }, { "epoch": 0.768101105383106, "grad_norm": 2.5971272851936815, "learning_rate": 1.2205901218729956e-06, "loss": 0.788, "step": 63025 }, { "epoch": 0.7681620416072539, "grad_norm": 2.311912876210138, "learning_rate": 1.2202694034637589e-06, "loss": 0.7594, "step": 63030 }, { "epoch": 0.7682229778314017, "grad_norm": 2.4947394667827694, "learning_rate": 1.2199486850545223e-06, "loss": 0.6694, "step": 63035 }, { "epoch": 0.7682839140555494, "grad_norm": 2.3144803916094903, "learning_rate": 1.2196279666452855e-06, "loss": 0.6839, "step": 63040 }, { "epoch": 0.7683448502796972, "grad_norm": 2.919803344693197, "learning_rate": 1.2193072482360488e-06, "loss": 0.764, "step": 63045 }, { "epoch": 0.7684057865038451, "grad_norm": 2.598214085762596, "learning_rate": 1.2189865298268122e-06, "loss": 0.8029, "step": 63050 }, { "epoch": 0.7684667227279929, "grad_norm": 2.4989058346751403, "learning_rate": 1.2186658114175754e-06, "loss": 0.7567, "step": 63055 }, { "epoch": 0.7685276589521407, "grad_norm": 2.431904487946369, "learning_rate": 1.2183450930083387e-06, "loss": 0.749, "step": 63060 }, { "epoch": 0.7685885951762885, "grad_norm": 2.5966141785280055, "learning_rate": 1.218024374599102e-06, "loss": 0.7773, "step": 63065 }, { "epoch": 0.7686495314004363, "grad_norm": 2.3862765189751753, "learning_rate": 1.2177036561898655e-06, "loss": 0.7579, "step": 63070 }, { "epoch": 0.7687104676245841, "grad_norm": 2.6003331891752968, "learning_rate": 1.2173829377806288e-06, "loss": 0.7048, "step": 63075 }, { "epoch": 0.7687714038487319, "grad_norm": 2.347408947472881, "learning_rate": 1.217062219371392e-06, "loss": 0.7377, "step": 63080 }, { "epoch": 0.7688323400728797, "grad_norm": 2.5520207351512942, "learning_rate": 1.2167415009621552e-06, "loss": 0.7545, "step": 63085 }, { "epoch": 0.7688932762970275, "grad_norm": 2.663628972199802, "learning_rate": 1.2164207825529187e-06, "loss": 0.7144, "step": 63090 }, { "epoch": 0.7689542125211754, "grad_norm": 2.018068500925716, "learning_rate": 1.2161000641436819e-06, "loss": 0.6855, "step": 63095 }, { "epoch": 0.7690151487453232, "grad_norm": 2.658532288390174, "learning_rate": 1.2157793457344451e-06, "loss": 0.7164, "step": 63100 }, { "epoch": 0.769076084969471, "grad_norm": 2.6106179030649637, "learning_rate": 1.2154586273252086e-06, "loss": 0.6895, "step": 63105 }, { "epoch": 0.7691370211936187, "grad_norm": 1.9194881750200934, "learning_rate": 1.215137908915972e-06, "loss": 0.7189, "step": 63110 }, { "epoch": 0.7691979574177665, "grad_norm": 2.4515396017346593, "learning_rate": 1.2148171905067352e-06, "loss": 0.6712, "step": 63115 }, { "epoch": 0.7692588936419144, "grad_norm": 2.5515627717958673, "learning_rate": 1.2144964720974985e-06, "loss": 0.7587, "step": 63120 }, { "epoch": 0.7693198298660622, "grad_norm": 2.1583245813364558, "learning_rate": 1.2141757536882617e-06, "loss": 0.7772, "step": 63125 }, { "epoch": 0.76938076609021, "grad_norm": 2.2858018708118757, "learning_rate": 1.2138550352790251e-06, "loss": 0.7922, "step": 63130 }, { "epoch": 0.7694417023143578, "grad_norm": 2.5648893398173644, "learning_rate": 1.2135343168697884e-06, "loss": 0.6043, "step": 63135 }, { "epoch": 0.7695026385385056, "grad_norm": 2.086734400892326, "learning_rate": 1.2132135984605518e-06, "loss": 0.6827, "step": 63140 }, { "epoch": 0.7695635747626534, "grad_norm": 2.1833983699105506, "learning_rate": 1.212892880051315e-06, "loss": 0.6994, "step": 63145 }, { "epoch": 0.7696245109868012, "grad_norm": 2.2128751336664063, "learning_rate": 1.2125721616420785e-06, "loss": 0.6774, "step": 63150 }, { "epoch": 0.769685447210949, "grad_norm": 2.0495511171786926, "learning_rate": 1.2122514432328417e-06, "loss": 0.7462, "step": 63155 }, { "epoch": 0.7697463834350968, "grad_norm": 3.113907864248507, "learning_rate": 1.211930724823605e-06, "loss": 0.7597, "step": 63160 }, { "epoch": 0.7698073196592446, "grad_norm": 2.619781025341101, "learning_rate": 1.2116100064143684e-06, "loss": 0.7078, "step": 63165 }, { "epoch": 0.7698682558833925, "grad_norm": 2.1558925926479677, "learning_rate": 1.2112892880051316e-06, "loss": 0.6418, "step": 63170 }, { "epoch": 0.7699291921075403, "grad_norm": 2.29130541510513, "learning_rate": 1.2109685695958948e-06, "loss": 0.7873, "step": 63175 }, { "epoch": 0.769990128331688, "grad_norm": 2.4622196162007333, "learning_rate": 1.2106478511866583e-06, "loss": 0.7923, "step": 63180 }, { "epoch": 0.7700510645558358, "grad_norm": 2.652618540822148, "learning_rate": 1.2103271327774215e-06, "loss": 0.8127, "step": 63185 }, { "epoch": 0.7701120007799837, "grad_norm": 2.607540717560217, "learning_rate": 1.210006414368185e-06, "loss": 0.777, "step": 63190 }, { "epoch": 0.7701729370041315, "grad_norm": 2.2754070292917237, "learning_rate": 1.2096856959589482e-06, "loss": 0.7, "step": 63195 }, { "epoch": 0.7702338732282793, "grad_norm": 2.372786589479103, "learning_rate": 1.2093649775497114e-06, "loss": 0.7864, "step": 63200 }, { "epoch": 0.7702948094524271, "grad_norm": 2.4794374616585757, "learning_rate": 1.2090442591404748e-06, "loss": 0.7675, "step": 63205 }, { "epoch": 0.7703557456765749, "grad_norm": 2.7894053069538853, "learning_rate": 1.208723540731238e-06, "loss": 0.7459, "step": 63210 }, { "epoch": 0.7704166819007227, "grad_norm": 3.251958549814105, "learning_rate": 1.2084028223220013e-06, "loss": 0.7869, "step": 63215 }, { "epoch": 0.7704776181248705, "grad_norm": 2.7108611126442086, "learning_rate": 1.2080821039127647e-06, "loss": 0.7714, "step": 63220 }, { "epoch": 0.7705385543490183, "grad_norm": 2.7234675851466674, "learning_rate": 1.2077613855035282e-06, "loss": 0.6647, "step": 63225 }, { "epoch": 0.7705994905731661, "grad_norm": 3.0869041978567977, "learning_rate": 1.2074406670942914e-06, "loss": 0.7345, "step": 63230 }, { "epoch": 0.770660426797314, "grad_norm": 2.565295481370773, "learning_rate": 1.2071199486850546e-06, "loss": 0.7454, "step": 63235 }, { "epoch": 0.7707213630214618, "grad_norm": 2.168659412176226, "learning_rate": 1.2067992302758178e-06, "loss": 0.6799, "step": 63240 }, { "epoch": 0.7707822992456096, "grad_norm": 2.2049349570585988, "learning_rate": 1.2064785118665813e-06, "loss": 0.6981, "step": 63245 }, { "epoch": 0.7708432354697573, "grad_norm": 2.5212808737212082, "learning_rate": 1.2061577934573445e-06, "loss": 0.7679, "step": 63250 }, { "epoch": 0.7709041716939051, "grad_norm": 3.2244684430457573, "learning_rate": 1.2058370750481077e-06, "loss": 0.6738, "step": 63255 }, { "epoch": 0.770965107918053, "grad_norm": 2.74939172978668, "learning_rate": 1.2055163566388712e-06, "loss": 0.7248, "step": 63260 }, { "epoch": 0.7710260441422008, "grad_norm": 2.492200641653029, "learning_rate": 1.2051956382296346e-06, "loss": 0.7254, "step": 63265 }, { "epoch": 0.7710869803663486, "grad_norm": 2.3605612272157046, "learning_rate": 1.2048749198203979e-06, "loss": 0.7294, "step": 63270 }, { "epoch": 0.7711479165904964, "grad_norm": 2.684424262869684, "learning_rate": 1.204554201411161e-06, "loss": 0.6576, "step": 63275 }, { "epoch": 0.7712088528146442, "grad_norm": 2.624658664545721, "learning_rate": 1.2042334830019243e-06, "loss": 0.7749, "step": 63280 }, { "epoch": 0.771269789038792, "grad_norm": 2.1524296430115775, "learning_rate": 1.2039127645926878e-06, "loss": 0.7481, "step": 63285 }, { "epoch": 0.7713307252629398, "grad_norm": 2.046174044088817, "learning_rate": 1.203592046183451e-06, "loss": 0.7335, "step": 63290 }, { "epoch": 0.7713916614870876, "grad_norm": 2.6467233039034967, "learning_rate": 1.2032713277742144e-06, "loss": 0.746, "step": 63295 }, { "epoch": 0.7714525977112354, "grad_norm": 2.147245982953829, "learning_rate": 1.2029506093649777e-06, "loss": 0.7154, "step": 63300 }, { "epoch": 0.7715135339353832, "grad_norm": 2.2222458024173157, "learning_rate": 1.202629890955741e-06, "loss": 0.7438, "step": 63305 }, { "epoch": 0.7715744701595311, "grad_norm": 2.564007620201285, "learning_rate": 1.2023091725465043e-06, "loss": 0.7154, "step": 63310 }, { "epoch": 0.7716354063836789, "grad_norm": 2.217480996409083, "learning_rate": 1.2019884541372675e-06, "loss": 0.7675, "step": 63315 }, { "epoch": 0.7716963426078266, "grad_norm": 3.7071914363885115, "learning_rate": 1.2016677357280308e-06, "loss": 0.7278, "step": 63320 }, { "epoch": 0.7717572788319744, "grad_norm": 2.984542107886613, "learning_rate": 1.2013470173187942e-06, "loss": 0.7537, "step": 63325 }, { "epoch": 0.7718182150561222, "grad_norm": 2.356688130121924, "learning_rate": 1.2010262989095574e-06, "loss": 0.6472, "step": 63330 }, { "epoch": 0.7718791512802701, "grad_norm": 2.2494643591421735, "learning_rate": 1.2007055805003209e-06, "loss": 0.7307, "step": 63335 }, { "epoch": 0.7719400875044179, "grad_norm": 2.818654462815613, "learning_rate": 1.2003848620910841e-06, "loss": 0.7157, "step": 63340 }, { "epoch": 0.7720010237285657, "grad_norm": 2.430824264349977, "learning_rate": 1.2000641436818476e-06, "loss": 0.737, "step": 63345 }, { "epoch": 0.7720619599527135, "grad_norm": 1.7938517038139812, "learning_rate": 1.1997434252726108e-06, "loss": 0.661, "step": 63350 }, { "epoch": 0.7721228961768613, "grad_norm": 2.602332176102616, "learning_rate": 1.199422706863374e-06, "loss": 0.7691, "step": 63355 }, { "epoch": 0.7721838324010091, "grad_norm": 2.0432750221722804, "learning_rate": 1.1991019884541372e-06, "loss": 0.6616, "step": 63360 }, { "epoch": 0.7722447686251569, "grad_norm": 3.007248612259652, "learning_rate": 1.1987812700449007e-06, "loss": 0.7023, "step": 63365 }, { "epoch": 0.7723057048493047, "grad_norm": 2.7297626256412224, "learning_rate": 1.198460551635664e-06, "loss": 0.6868, "step": 63370 }, { "epoch": 0.7723666410734525, "grad_norm": 2.1099746892828826, "learning_rate": 1.1981398332264273e-06, "loss": 0.6749, "step": 63375 }, { "epoch": 0.7724275772976004, "grad_norm": 2.2977348144801724, "learning_rate": 1.1978191148171906e-06, "loss": 0.7395, "step": 63380 }, { "epoch": 0.7724885135217482, "grad_norm": 2.844777057701892, "learning_rate": 1.197498396407954e-06, "loss": 0.7052, "step": 63385 }, { "epoch": 0.7725494497458959, "grad_norm": 2.3331126589333206, "learning_rate": 1.1971776779987172e-06, "loss": 0.6696, "step": 63390 }, { "epoch": 0.7726103859700437, "grad_norm": 2.112894389712619, "learning_rate": 1.1968569595894805e-06, "loss": 0.731, "step": 63395 }, { "epoch": 0.7726713221941915, "grad_norm": 2.461501421213769, "learning_rate": 1.1965362411802437e-06, "loss": 0.7366, "step": 63400 }, { "epoch": 0.7727322584183394, "grad_norm": 2.050232237594292, "learning_rate": 1.1962155227710071e-06, "loss": 0.7569, "step": 63405 }, { "epoch": 0.7727931946424872, "grad_norm": 2.400559863590997, "learning_rate": 1.1958948043617704e-06, "loss": 0.7633, "step": 63410 }, { "epoch": 0.772854130866635, "grad_norm": 2.1745884354451657, "learning_rate": 1.1955740859525338e-06, "loss": 0.7065, "step": 63415 }, { "epoch": 0.7729150670907828, "grad_norm": 2.5416638893388295, "learning_rate": 1.195253367543297e-06, "loss": 0.7066, "step": 63420 }, { "epoch": 0.7729760033149305, "grad_norm": 2.614527961824636, "learning_rate": 1.1949326491340605e-06, "loss": 0.6938, "step": 63425 }, { "epoch": 0.7730369395390784, "grad_norm": 3.176010462473335, "learning_rate": 1.1946119307248237e-06, "loss": 0.7008, "step": 63430 }, { "epoch": 0.7730978757632262, "grad_norm": 2.26128212113596, "learning_rate": 1.194291212315587e-06, "loss": 0.7055, "step": 63435 }, { "epoch": 0.773158811987374, "grad_norm": 2.4162630498076534, "learning_rate": 1.1939704939063502e-06, "loss": 0.6942, "step": 63440 }, { "epoch": 0.7732197482115218, "grad_norm": 2.347693150482852, "learning_rate": 1.1936497754971136e-06, "loss": 0.694, "step": 63445 }, { "epoch": 0.7732806844356697, "grad_norm": 2.987343752956312, "learning_rate": 1.1933290570878768e-06, "loss": 0.6931, "step": 63450 }, { "epoch": 0.7733416206598175, "grad_norm": 2.403001104636043, "learning_rate": 1.1930083386786403e-06, "loss": 0.707, "step": 63455 }, { "epoch": 0.7734025568839652, "grad_norm": 2.3978811554990247, "learning_rate": 1.1926876202694037e-06, "loss": 0.6847, "step": 63460 }, { "epoch": 0.773463493108113, "grad_norm": 2.6066699495813683, "learning_rate": 1.192366901860167e-06, "loss": 0.7139, "step": 63465 }, { "epoch": 0.7735244293322608, "grad_norm": 2.9147599916949747, "learning_rate": 1.1920461834509302e-06, "loss": 0.7124, "step": 63470 }, { "epoch": 0.7735853655564087, "grad_norm": 2.7382579167456944, "learning_rate": 1.1917254650416934e-06, "loss": 0.8139, "step": 63475 }, { "epoch": 0.7736463017805565, "grad_norm": 2.2288847556074938, "learning_rate": 1.1914047466324568e-06, "loss": 0.7123, "step": 63480 }, { "epoch": 0.7737072380047043, "grad_norm": 3.9292932794684448, "learning_rate": 1.19108402822322e-06, "loss": 0.7091, "step": 63485 }, { "epoch": 0.7737681742288521, "grad_norm": 2.1339074639504485, "learning_rate": 1.1907633098139835e-06, "loss": 0.7628, "step": 63490 }, { "epoch": 0.7738291104529998, "grad_norm": 2.50884914927649, "learning_rate": 1.1904425914047467e-06, "loss": 0.778, "step": 63495 }, { "epoch": 0.7738900466771477, "grad_norm": 1.9859276029247785, "learning_rate": 1.1901218729955102e-06, "loss": 0.6575, "step": 63500 }, { "epoch": 0.7739509829012955, "grad_norm": 3.167927548269634, "learning_rate": 1.1898011545862734e-06, "loss": 0.7553, "step": 63505 }, { "epoch": 0.7740119191254433, "grad_norm": 2.365849197709997, "learning_rate": 1.1894804361770366e-06, "loss": 0.7329, "step": 63510 }, { "epoch": 0.7740728553495911, "grad_norm": 2.1818680670077906, "learning_rate": 1.1891597177677999e-06, "loss": 0.6748, "step": 63515 }, { "epoch": 0.774133791573739, "grad_norm": 2.5658295048103574, "learning_rate": 1.1888389993585633e-06, "loss": 0.6998, "step": 63520 }, { "epoch": 0.7741947277978868, "grad_norm": 2.457096746204426, "learning_rate": 1.1885182809493265e-06, "loss": 0.722, "step": 63525 }, { "epoch": 0.7742556640220345, "grad_norm": 2.741725776323572, "learning_rate": 1.18819756254009e-06, "loss": 0.7295, "step": 63530 }, { "epoch": 0.7743166002461823, "grad_norm": 2.2509838809793887, "learning_rate": 1.1878768441308532e-06, "loss": 0.6856, "step": 63535 }, { "epoch": 0.7743775364703301, "grad_norm": 2.3132878874485607, "learning_rate": 1.1875561257216166e-06, "loss": 0.6897, "step": 63540 }, { "epoch": 0.774438472694478, "grad_norm": 2.3901671853033752, "learning_rate": 1.1872354073123799e-06, "loss": 0.6645, "step": 63545 }, { "epoch": 0.7744994089186258, "grad_norm": 2.7589775730038846, "learning_rate": 1.186914688903143e-06, "loss": 0.7485, "step": 63550 }, { "epoch": 0.7745603451427736, "grad_norm": 2.5771132920578994, "learning_rate": 1.1865939704939063e-06, "loss": 0.7381, "step": 63555 }, { "epoch": 0.7746212813669214, "grad_norm": 2.4548984628356902, "learning_rate": 1.1862732520846698e-06, "loss": 0.7595, "step": 63560 }, { "epoch": 0.7746822175910691, "grad_norm": 3.1779749975601344, "learning_rate": 1.185952533675433e-06, "loss": 0.7354, "step": 63565 }, { "epoch": 0.774743153815217, "grad_norm": 2.718836630998776, "learning_rate": 1.1856318152661964e-06, "loss": 0.742, "step": 63570 }, { "epoch": 0.7748040900393648, "grad_norm": 4.48243534847039, "learning_rate": 1.1853110968569597e-06, "loss": 0.7609, "step": 63575 }, { "epoch": 0.7748650262635126, "grad_norm": 2.8889877517154834, "learning_rate": 1.1849903784477231e-06, "loss": 0.7325, "step": 63580 }, { "epoch": 0.7749259624876604, "grad_norm": 2.214873196416027, "learning_rate": 1.1846696600384863e-06, "loss": 0.7123, "step": 63585 }, { "epoch": 0.7749868987118083, "grad_norm": 3.810624266548454, "learning_rate": 1.1843489416292496e-06, "loss": 0.7021, "step": 63590 }, { "epoch": 0.7750478349359561, "grad_norm": 3.6601772031934217, "learning_rate": 1.1840282232200128e-06, "loss": 0.7211, "step": 63595 }, { "epoch": 0.7751087711601038, "grad_norm": 2.892911131570815, "learning_rate": 1.1837075048107762e-06, "loss": 0.7104, "step": 63600 }, { "epoch": 0.7751697073842516, "grad_norm": 2.4334154702374633, "learning_rate": 1.1833867864015395e-06, "loss": 0.766, "step": 63605 }, { "epoch": 0.7752306436083994, "grad_norm": 2.942784209318979, "learning_rate": 1.183066067992303e-06, "loss": 0.6905, "step": 63610 }, { "epoch": 0.7752915798325473, "grad_norm": 2.8607112326219184, "learning_rate": 1.1827453495830661e-06, "loss": 0.6061, "step": 63615 }, { "epoch": 0.7753525160566951, "grad_norm": 2.1646484237994637, "learning_rate": 1.1824246311738296e-06, "loss": 0.6788, "step": 63620 }, { "epoch": 0.7754134522808429, "grad_norm": 2.424621160078215, "learning_rate": 1.1821039127645928e-06, "loss": 0.671, "step": 63625 }, { "epoch": 0.7754743885049907, "grad_norm": 2.5434206696717743, "learning_rate": 1.181783194355356e-06, "loss": 0.7206, "step": 63630 }, { "epoch": 0.7755353247291384, "grad_norm": 2.1863041021471337, "learning_rate": 1.1814624759461193e-06, "loss": 0.7295, "step": 63635 }, { "epoch": 0.7755962609532863, "grad_norm": 3.245998290783577, "learning_rate": 1.1811417575368827e-06, "loss": 0.7316, "step": 63640 }, { "epoch": 0.7756571971774341, "grad_norm": 2.4127158279744623, "learning_rate": 1.1808210391276461e-06, "loss": 0.7691, "step": 63645 }, { "epoch": 0.7757181334015819, "grad_norm": 2.7476144473097426, "learning_rate": 1.1805003207184094e-06, "loss": 0.6756, "step": 63650 }, { "epoch": 0.7757790696257297, "grad_norm": 2.1255859801131995, "learning_rate": 1.1801796023091726e-06, "loss": 0.6572, "step": 63655 }, { "epoch": 0.7758400058498776, "grad_norm": 2.49985989547932, "learning_rate": 1.179858883899936e-06, "loss": 0.7606, "step": 63660 }, { "epoch": 0.7759009420740254, "grad_norm": 1.9440949584332428, "learning_rate": 1.1795381654906993e-06, "loss": 0.6904, "step": 63665 }, { "epoch": 0.7759618782981731, "grad_norm": 2.404586170018207, "learning_rate": 1.1792174470814625e-06, "loss": 0.7857, "step": 63670 }, { "epoch": 0.7760228145223209, "grad_norm": 2.5177195930242964, "learning_rate": 1.1788967286722257e-06, "loss": 0.7375, "step": 63675 }, { "epoch": 0.7760837507464687, "grad_norm": 2.0537357574739468, "learning_rate": 1.1785760102629892e-06, "loss": 0.7506, "step": 63680 }, { "epoch": 0.7761446869706166, "grad_norm": 2.5363395892753062, "learning_rate": 1.1782552918537526e-06, "loss": 0.704, "step": 63685 }, { "epoch": 0.7762056231947644, "grad_norm": 1.9678642723900748, "learning_rate": 1.1779345734445158e-06, "loss": 0.7418, "step": 63690 }, { "epoch": 0.7762665594189122, "grad_norm": 2.9713096296243484, "learning_rate": 1.177613855035279e-06, "loss": 0.7632, "step": 63695 }, { "epoch": 0.77632749564306, "grad_norm": 2.1383857827638915, "learning_rate": 1.1772931366260425e-06, "loss": 0.7018, "step": 63700 }, { "epoch": 0.7763884318672077, "grad_norm": 3.223891364701128, "learning_rate": 1.1769724182168057e-06, "loss": 0.8035, "step": 63705 }, { "epoch": 0.7764493680913556, "grad_norm": 2.7211538322877753, "learning_rate": 1.176651699807569e-06, "loss": 0.7163, "step": 63710 }, { "epoch": 0.7765103043155034, "grad_norm": 2.349258939130802, "learning_rate": 1.1763309813983324e-06, "loss": 0.7496, "step": 63715 }, { "epoch": 0.7765712405396512, "grad_norm": 2.3840454541134863, "learning_rate": 1.1760102629890956e-06, "loss": 0.6435, "step": 63720 }, { "epoch": 0.776632176763799, "grad_norm": 2.6071115306367547, "learning_rate": 1.175689544579859e-06, "loss": 0.7751, "step": 63725 }, { "epoch": 0.7766931129879469, "grad_norm": 2.9685102687601828, "learning_rate": 1.1753688261706223e-06, "loss": 0.7602, "step": 63730 }, { "epoch": 0.7767540492120947, "grad_norm": 2.197105954428671, "learning_rate": 1.1750481077613855e-06, "loss": 0.6642, "step": 63735 }, { "epoch": 0.7768149854362424, "grad_norm": 2.877478125362928, "learning_rate": 1.174727389352149e-06, "loss": 0.6643, "step": 63740 }, { "epoch": 0.7768759216603902, "grad_norm": 2.497297109764227, "learning_rate": 1.1744066709429122e-06, "loss": 0.7283, "step": 63745 }, { "epoch": 0.776936857884538, "grad_norm": 2.986260659109347, "learning_rate": 1.1740859525336754e-06, "loss": 0.7315, "step": 63750 }, { "epoch": 0.7769977941086859, "grad_norm": 2.334436063832622, "learning_rate": 1.1737652341244389e-06, "loss": 0.7165, "step": 63755 }, { "epoch": 0.7770587303328337, "grad_norm": 2.3490069785277012, "learning_rate": 1.173444515715202e-06, "loss": 0.732, "step": 63760 }, { "epoch": 0.7771196665569815, "grad_norm": 2.8701376442290045, "learning_rate": 1.1731237973059655e-06, "loss": 0.7417, "step": 63765 }, { "epoch": 0.7771806027811292, "grad_norm": 3.375528842603815, "learning_rate": 1.1728030788967288e-06, "loss": 0.6863, "step": 63770 }, { "epoch": 0.777241539005277, "grad_norm": 2.3977832954402807, "learning_rate": 1.1724823604874922e-06, "loss": 0.7114, "step": 63775 }, { "epoch": 0.7773024752294249, "grad_norm": 2.733748496333701, "learning_rate": 1.1721616420782554e-06, "loss": 0.7478, "step": 63780 }, { "epoch": 0.7773634114535727, "grad_norm": 2.4929084578141607, "learning_rate": 1.1718409236690187e-06, "loss": 0.7271, "step": 63785 }, { "epoch": 0.7774243476777205, "grad_norm": 2.201681703081598, "learning_rate": 1.1715202052597819e-06, "loss": 0.7085, "step": 63790 }, { "epoch": 0.7774852839018683, "grad_norm": 2.3375951077076778, "learning_rate": 1.1711994868505453e-06, "loss": 0.735, "step": 63795 }, { "epoch": 0.7775462201260162, "grad_norm": 2.3580006063754397, "learning_rate": 1.1708787684413086e-06, "loss": 0.719, "step": 63800 }, { "epoch": 0.7776071563501639, "grad_norm": 2.818035710744093, "learning_rate": 1.170558050032072e-06, "loss": 0.7117, "step": 63805 }, { "epoch": 0.7776680925743117, "grad_norm": 2.7105035555948676, "learning_rate": 1.1702373316228352e-06, "loss": 0.7421, "step": 63810 }, { "epoch": 0.7777290287984595, "grad_norm": 3.065597656341958, "learning_rate": 1.1699166132135987e-06, "loss": 0.6255, "step": 63815 }, { "epoch": 0.7777899650226073, "grad_norm": 3.455262680959808, "learning_rate": 1.1695958948043619e-06, "loss": 0.7473, "step": 63820 }, { "epoch": 0.7778509012467552, "grad_norm": 3.4073128479450774, "learning_rate": 1.1692751763951251e-06, "loss": 0.7523, "step": 63825 }, { "epoch": 0.777911837470903, "grad_norm": 3.2777754947874556, "learning_rate": 1.1689544579858883e-06, "loss": 0.7411, "step": 63830 }, { "epoch": 0.7779727736950508, "grad_norm": 2.1709631276478727, "learning_rate": 1.1686337395766518e-06, "loss": 0.7149, "step": 63835 }, { "epoch": 0.7780337099191985, "grad_norm": 2.39828240125163, "learning_rate": 1.1683130211674152e-06, "loss": 0.7118, "step": 63840 }, { "epoch": 0.7780946461433463, "grad_norm": 2.753620965204782, "learning_rate": 1.1679923027581785e-06, "loss": 0.7365, "step": 63845 }, { "epoch": 0.7781555823674942, "grad_norm": 2.3800573176216773, "learning_rate": 1.1676715843489417e-06, "loss": 0.6408, "step": 63850 }, { "epoch": 0.778216518591642, "grad_norm": 2.809310731444927, "learning_rate": 1.1673508659397051e-06, "loss": 0.7144, "step": 63855 }, { "epoch": 0.7782774548157898, "grad_norm": 2.4387309429786, "learning_rate": 1.1670301475304684e-06, "loss": 0.6977, "step": 63860 }, { "epoch": 0.7783383910399376, "grad_norm": 2.3426739947059447, "learning_rate": 1.1667094291212316e-06, "loss": 0.8208, "step": 63865 }, { "epoch": 0.7783993272640854, "grad_norm": 2.443249231549865, "learning_rate": 1.166388710711995e-06, "loss": 0.7048, "step": 63870 }, { "epoch": 0.7784602634882332, "grad_norm": 2.793238760992281, "learning_rate": 1.1660679923027582e-06, "loss": 0.6638, "step": 63875 }, { "epoch": 0.778521199712381, "grad_norm": 2.1478153670541076, "learning_rate": 1.1657472738935217e-06, "loss": 0.7238, "step": 63880 }, { "epoch": 0.7785821359365288, "grad_norm": 2.744344798765803, "learning_rate": 1.165426555484285e-06, "loss": 0.7687, "step": 63885 }, { "epoch": 0.7786430721606766, "grad_norm": 2.282769113650472, "learning_rate": 1.1651058370750481e-06, "loss": 0.7975, "step": 63890 }, { "epoch": 0.7787040083848245, "grad_norm": 3.1262347265854458, "learning_rate": 1.1647851186658116e-06, "loss": 0.6757, "step": 63895 }, { "epoch": 0.7787649446089723, "grad_norm": 2.2266098400083076, "learning_rate": 1.1644644002565748e-06, "loss": 0.7776, "step": 63900 }, { "epoch": 0.7788258808331201, "grad_norm": 2.22941983485241, "learning_rate": 1.164143681847338e-06, "loss": 0.7338, "step": 63905 }, { "epoch": 0.7788868170572678, "grad_norm": 2.2034245610664103, "learning_rate": 1.1638229634381015e-06, "loss": 0.7377, "step": 63910 }, { "epoch": 0.7789477532814156, "grad_norm": 2.6376546072932396, "learning_rate": 1.1635022450288647e-06, "loss": 0.7015, "step": 63915 }, { "epoch": 0.7790086895055635, "grad_norm": 2.1989055622654514, "learning_rate": 1.1631815266196282e-06, "loss": 0.7006, "step": 63920 }, { "epoch": 0.7790696257297113, "grad_norm": 2.6024072061608323, "learning_rate": 1.1628608082103914e-06, "loss": 0.6698, "step": 63925 }, { "epoch": 0.7791305619538591, "grad_norm": 2.831317544665704, "learning_rate": 1.1625400898011546e-06, "loss": 0.7534, "step": 63930 }, { "epoch": 0.7791914981780069, "grad_norm": 2.170401189289899, "learning_rate": 1.162219371391918e-06, "loss": 0.6358, "step": 63935 }, { "epoch": 0.7792524344021547, "grad_norm": 2.652059994711764, "learning_rate": 1.1618986529826813e-06, "loss": 0.7272, "step": 63940 }, { "epoch": 0.7793133706263025, "grad_norm": 2.660705797870961, "learning_rate": 1.1615779345734445e-06, "loss": 0.7517, "step": 63945 }, { "epoch": 0.7793743068504503, "grad_norm": 2.301098867801621, "learning_rate": 1.161257216164208e-06, "loss": 0.6562, "step": 63950 }, { "epoch": 0.7794352430745981, "grad_norm": 2.4911622576972414, "learning_rate": 1.1609364977549712e-06, "loss": 0.7361, "step": 63955 }, { "epoch": 0.7794961792987459, "grad_norm": 2.243805658089406, "learning_rate": 1.1606157793457346e-06, "loss": 0.713, "step": 63960 }, { "epoch": 0.7795571155228938, "grad_norm": 2.2925665233834067, "learning_rate": 1.1602950609364978e-06, "loss": 0.7398, "step": 63965 }, { "epoch": 0.7796180517470416, "grad_norm": 2.459214945834029, "learning_rate": 1.159974342527261e-06, "loss": 0.8145, "step": 63970 }, { "epoch": 0.7796789879711894, "grad_norm": 2.45925532218836, "learning_rate": 1.1596536241180245e-06, "loss": 0.7217, "step": 63975 }, { "epoch": 0.7797399241953371, "grad_norm": 2.260580479362577, "learning_rate": 1.1593329057087877e-06, "loss": 0.7522, "step": 63980 }, { "epoch": 0.7798008604194849, "grad_norm": 2.475040001274598, "learning_rate": 1.159012187299551e-06, "loss": 0.6971, "step": 63985 }, { "epoch": 0.7798617966436328, "grad_norm": 1.9352476447031588, "learning_rate": 1.1586914688903144e-06, "loss": 0.6782, "step": 63990 }, { "epoch": 0.7799227328677806, "grad_norm": 2.1532579013640105, "learning_rate": 1.1583707504810779e-06, "loss": 0.6863, "step": 63995 }, { "epoch": 0.7799836690919284, "grad_norm": 2.9604145845136784, "learning_rate": 1.158050032071841e-06, "loss": 0.7845, "step": 64000 }, { "epoch": 0.7800446053160762, "grad_norm": 2.2763634579961294, "learning_rate": 1.1577293136626043e-06, "loss": 0.6494, "step": 64005 }, { "epoch": 0.780105541540224, "grad_norm": 2.427209945401523, "learning_rate": 1.1574085952533675e-06, "loss": 0.7053, "step": 64010 }, { "epoch": 0.7801664777643718, "grad_norm": 2.5324439090761124, "learning_rate": 1.157087876844131e-06, "loss": 0.6605, "step": 64015 }, { "epoch": 0.7802274139885196, "grad_norm": 2.3205348380399884, "learning_rate": 1.1567671584348942e-06, "loss": 0.6636, "step": 64020 }, { "epoch": 0.7802883502126674, "grad_norm": 2.018779481540009, "learning_rate": 1.1564464400256574e-06, "loss": 0.7476, "step": 64025 }, { "epoch": 0.7803492864368152, "grad_norm": 2.6240605000000086, "learning_rate": 1.1561257216164209e-06, "loss": 0.7051, "step": 64030 }, { "epoch": 0.780410222660963, "grad_norm": 2.714922218315194, "learning_rate": 1.1558050032071843e-06, "loss": 0.6512, "step": 64035 }, { "epoch": 0.7804711588851109, "grad_norm": 3.186918269073547, "learning_rate": 1.1554842847979475e-06, "loss": 0.6978, "step": 64040 }, { "epoch": 0.7805320951092587, "grad_norm": 2.819355214466182, "learning_rate": 1.1551635663887108e-06, "loss": 0.7201, "step": 64045 }, { "epoch": 0.7805930313334064, "grad_norm": 2.165787100178128, "learning_rate": 1.1548428479794742e-06, "loss": 0.6742, "step": 64050 }, { "epoch": 0.7806539675575542, "grad_norm": 2.4675095079913363, "learning_rate": 1.1545221295702374e-06, "loss": 0.7034, "step": 64055 }, { "epoch": 0.780714903781702, "grad_norm": 2.2283260195426955, "learning_rate": 1.1542014111610007e-06, "loss": 0.6425, "step": 64060 }, { "epoch": 0.7807758400058499, "grad_norm": 3.170581388833427, "learning_rate": 1.1538806927517641e-06, "loss": 0.7495, "step": 64065 }, { "epoch": 0.7808367762299977, "grad_norm": 2.9460245953938844, "learning_rate": 1.1535599743425273e-06, "loss": 0.6941, "step": 64070 }, { "epoch": 0.7808977124541455, "grad_norm": 2.860853276399928, "learning_rate": 1.1532392559332908e-06, "loss": 0.683, "step": 64075 }, { "epoch": 0.7809586486782933, "grad_norm": 2.310322348474092, "learning_rate": 1.152918537524054e-06, "loss": 0.7991, "step": 64080 }, { "epoch": 0.781019584902441, "grad_norm": 2.4565514072899637, "learning_rate": 1.1525978191148172e-06, "loss": 0.6659, "step": 64085 }, { "epoch": 0.7810805211265889, "grad_norm": 2.2447736671667595, "learning_rate": 1.1522771007055807e-06, "loss": 0.6656, "step": 64090 }, { "epoch": 0.7811414573507367, "grad_norm": 2.3363501010501544, "learning_rate": 1.151956382296344e-06, "loss": 0.7108, "step": 64095 }, { "epoch": 0.7812023935748845, "grad_norm": 2.3465447072204313, "learning_rate": 1.1516356638871071e-06, "loss": 0.7821, "step": 64100 }, { "epoch": 0.7812633297990323, "grad_norm": 4.182621465921319, "learning_rate": 1.1513149454778706e-06, "loss": 0.7241, "step": 64105 }, { "epoch": 0.7813242660231802, "grad_norm": 2.110746561933251, "learning_rate": 1.1509942270686338e-06, "loss": 0.7153, "step": 64110 }, { "epoch": 0.781385202247328, "grad_norm": 3.952586255596764, "learning_rate": 1.1506735086593972e-06, "loss": 0.6589, "step": 64115 }, { "epoch": 0.7814461384714757, "grad_norm": 2.3874206381492944, "learning_rate": 1.1503527902501605e-06, "loss": 0.7579, "step": 64120 }, { "epoch": 0.7815070746956235, "grad_norm": 1.9842320487957072, "learning_rate": 1.1500320718409237e-06, "loss": 0.7032, "step": 64125 }, { "epoch": 0.7815680109197713, "grad_norm": 2.2370298608975414, "learning_rate": 1.1497113534316871e-06, "loss": 0.7042, "step": 64130 }, { "epoch": 0.7816289471439192, "grad_norm": 2.478904396990391, "learning_rate": 1.1493906350224504e-06, "loss": 0.757, "step": 64135 }, { "epoch": 0.781689883368067, "grad_norm": 2.4106718845621993, "learning_rate": 1.1490699166132136e-06, "loss": 0.7239, "step": 64140 }, { "epoch": 0.7817508195922148, "grad_norm": 2.486900988755891, "learning_rate": 1.148749198203977e-06, "loss": 0.7562, "step": 64145 }, { "epoch": 0.7818117558163626, "grad_norm": 2.9801025674994883, "learning_rate": 1.1484284797947403e-06, "loss": 0.7162, "step": 64150 }, { "epoch": 0.7818726920405104, "grad_norm": 2.5420607479280557, "learning_rate": 1.1481077613855037e-06, "loss": 0.634, "step": 64155 }, { "epoch": 0.7819336282646582, "grad_norm": 2.560754172037828, "learning_rate": 1.147787042976267e-06, "loss": 0.7817, "step": 64160 }, { "epoch": 0.781994564488806, "grad_norm": 2.3880985571813422, "learning_rate": 1.1474663245670302e-06, "loss": 0.6165, "step": 64165 }, { "epoch": 0.7820555007129538, "grad_norm": 2.928022156421715, "learning_rate": 1.1471456061577936e-06, "loss": 0.6989, "step": 64170 }, { "epoch": 0.7821164369371016, "grad_norm": 2.8172558937874186, "learning_rate": 1.1468248877485568e-06, "loss": 0.687, "step": 64175 }, { "epoch": 0.7821773731612495, "grad_norm": 4.624050069866228, "learning_rate": 1.14650416933932e-06, "loss": 0.6726, "step": 64180 }, { "epoch": 0.7822383093853973, "grad_norm": 2.2245158141150116, "learning_rate": 1.1461834509300835e-06, "loss": 0.6607, "step": 64185 }, { "epoch": 0.782299245609545, "grad_norm": 2.6647252201212352, "learning_rate": 1.145862732520847e-06, "loss": 0.7265, "step": 64190 }, { "epoch": 0.7823601818336928, "grad_norm": 2.6183770301577014, "learning_rate": 1.1455420141116102e-06, "loss": 0.6934, "step": 64195 }, { "epoch": 0.7824211180578406, "grad_norm": 2.4003407699309203, "learning_rate": 1.1452212957023734e-06, "loss": 0.7574, "step": 64200 }, { "epoch": 0.7824820542819885, "grad_norm": 2.286274189974622, "learning_rate": 1.1449005772931366e-06, "loss": 0.6745, "step": 64205 }, { "epoch": 0.7825429905061363, "grad_norm": 2.628784753220128, "learning_rate": 1.1445798588839e-06, "loss": 0.6915, "step": 64210 }, { "epoch": 0.7826039267302841, "grad_norm": 2.3745952842444837, "learning_rate": 1.1442591404746633e-06, "loss": 0.7259, "step": 64215 }, { "epoch": 0.7826648629544319, "grad_norm": 2.3680899938516924, "learning_rate": 1.1439384220654265e-06, "loss": 0.7627, "step": 64220 }, { "epoch": 0.7827257991785797, "grad_norm": 2.4766473448720947, "learning_rate": 1.14361770365619e-06, "loss": 0.6791, "step": 64225 }, { "epoch": 0.7827867354027275, "grad_norm": 3.2222482338685072, "learning_rate": 1.1432969852469534e-06, "loss": 0.7044, "step": 64230 }, { "epoch": 0.7828476716268753, "grad_norm": 3.354806022813682, "learning_rate": 1.1429762668377166e-06, "loss": 0.7052, "step": 64235 }, { "epoch": 0.7829086078510231, "grad_norm": 2.2582471686766232, "learning_rate": 1.1426555484284799e-06, "loss": 0.7173, "step": 64240 }, { "epoch": 0.7829695440751709, "grad_norm": 1.9593030876406214, "learning_rate": 1.142334830019243e-06, "loss": 0.6779, "step": 64245 }, { "epoch": 0.7830304802993188, "grad_norm": 2.0724467284789787, "learning_rate": 1.1420141116100065e-06, "loss": 0.7389, "step": 64250 }, { "epoch": 0.7830914165234666, "grad_norm": 2.618870748009023, "learning_rate": 1.1416933932007698e-06, "loss": 0.7464, "step": 64255 }, { "epoch": 0.7831523527476143, "grad_norm": 3.1403612864758648, "learning_rate": 1.1413726747915332e-06, "loss": 0.7368, "step": 64260 }, { "epoch": 0.7832132889717621, "grad_norm": 2.4685052561404066, "learning_rate": 1.1410519563822964e-06, "loss": 0.7228, "step": 64265 }, { "epoch": 0.78327422519591, "grad_norm": 4.605524783861352, "learning_rate": 1.1407312379730599e-06, "loss": 0.7403, "step": 64270 }, { "epoch": 0.7833351614200578, "grad_norm": 2.2973926322726603, "learning_rate": 1.140410519563823e-06, "loss": 0.7294, "step": 64275 }, { "epoch": 0.7833960976442056, "grad_norm": 2.5845536941788647, "learning_rate": 1.1400898011545863e-06, "loss": 0.7221, "step": 64280 }, { "epoch": 0.7834570338683534, "grad_norm": 3.170736470602611, "learning_rate": 1.1397690827453496e-06, "loss": 0.7219, "step": 64285 }, { "epoch": 0.7835179700925012, "grad_norm": 2.5065263165378235, "learning_rate": 1.139448364336113e-06, "loss": 0.6569, "step": 64290 }, { "epoch": 0.783578906316649, "grad_norm": 2.169187099699112, "learning_rate": 1.1391276459268762e-06, "loss": 0.7596, "step": 64295 }, { "epoch": 0.7836398425407968, "grad_norm": 2.1406085218941504, "learning_rate": 1.1388069275176397e-06, "loss": 0.7553, "step": 64300 }, { "epoch": 0.7837007787649446, "grad_norm": 2.1567059643109494, "learning_rate": 1.1384862091084029e-06, "loss": 0.7308, "step": 64305 }, { "epoch": 0.7837617149890924, "grad_norm": 2.432668751358482, "learning_rate": 1.1381654906991663e-06, "loss": 0.6686, "step": 64310 }, { "epoch": 0.7838226512132402, "grad_norm": 2.344260325636954, "learning_rate": 1.1378447722899296e-06, "loss": 0.7399, "step": 64315 }, { "epoch": 0.7838835874373881, "grad_norm": 3.247940971711448, "learning_rate": 1.1375240538806928e-06, "loss": 0.7169, "step": 64320 }, { "epoch": 0.7839445236615359, "grad_norm": 2.7107970586177843, "learning_rate": 1.137203335471456e-06, "loss": 0.743, "step": 64325 }, { "epoch": 0.7840054598856836, "grad_norm": 3.0756974275140148, "learning_rate": 1.1368826170622195e-06, "loss": 0.7046, "step": 64330 }, { "epoch": 0.7840663961098314, "grad_norm": 2.608200331358776, "learning_rate": 1.1365618986529827e-06, "loss": 0.7065, "step": 64335 }, { "epoch": 0.7841273323339792, "grad_norm": 2.1202217096072373, "learning_rate": 1.1362411802437461e-06, "loss": 0.7279, "step": 64340 }, { "epoch": 0.7841882685581271, "grad_norm": 2.467149227855537, "learning_rate": 1.1359204618345094e-06, "loss": 0.7392, "step": 64345 }, { "epoch": 0.7842492047822749, "grad_norm": 2.950600817081903, "learning_rate": 1.1355997434252728e-06, "loss": 0.7436, "step": 64350 }, { "epoch": 0.7843101410064227, "grad_norm": 2.7108684352381958, "learning_rate": 1.135279025016036e-06, "loss": 0.7496, "step": 64355 }, { "epoch": 0.7843710772305705, "grad_norm": 2.5701674355526825, "learning_rate": 1.1349583066067993e-06, "loss": 0.6985, "step": 64360 }, { "epoch": 0.7844320134547182, "grad_norm": 2.792568306624074, "learning_rate": 1.1346375881975627e-06, "loss": 0.7803, "step": 64365 }, { "epoch": 0.7844929496788661, "grad_norm": 2.4524967818881627, "learning_rate": 1.134316869788326e-06, "loss": 0.7256, "step": 64370 }, { "epoch": 0.7845538859030139, "grad_norm": 2.481864788792726, "learning_rate": 1.1339961513790891e-06, "loss": 0.7352, "step": 64375 }, { "epoch": 0.7846148221271617, "grad_norm": 2.2419861404491095, "learning_rate": 1.1336754329698526e-06, "loss": 0.7663, "step": 64380 }, { "epoch": 0.7846757583513095, "grad_norm": 2.5775551990673535, "learning_rate": 1.133354714560616e-06, "loss": 0.7162, "step": 64385 }, { "epoch": 0.7847366945754574, "grad_norm": 2.1370524936239015, "learning_rate": 1.1330339961513793e-06, "loss": 0.7029, "step": 64390 }, { "epoch": 0.7847976307996052, "grad_norm": 2.7695085092194365, "learning_rate": 1.1327132777421425e-06, "loss": 0.6588, "step": 64395 }, { "epoch": 0.7848585670237529, "grad_norm": 2.4250965518599368, "learning_rate": 1.1323925593329057e-06, "loss": 0.6823, "step": 64400 }, { "epoch": 0.7849195032479007, "grad_norm": 2.413687221739242, "learning_rate": 1.1320718409236692e-06, "loss": 0.7187, "step": 64405 }, { "epoch": 0.7849804394720485, "grad_norm": 2.9592428646442235, "learning_rate": 1.1317511225144324e-06, "loss": 0.725, "step": 64410 }, { "epoch": 0.7850413756961964, "grad_norm": 2.40788606710119, "learning_rate": 1.1314304041051958e-06, "loss": 0.7912, "step": 64415 }, { "epoch": 0.7851023119203442, "grad_norm": 3.640642086688293, "learning_rate": 1.131109685695959e-06, "loss": 0.7479, "step": 64420 }, { "epoch": 0.785163248144492, "grad_norm": 2.3513823023880405, "learning_rate": 1.1307889672867225e-06, "loss": 0.6691, "step": 64425 }, { "epoch": 0.7852241843686398, "grad_norm": 4.691525385957585, "learning_rate": 1.1304682488774857e-06, "loss": 0.7403, "step": 64430 }, { "epoch": 0.7852851205927875, "grad_norm": 2.5460320437811133, "learning_rate": 1.130147530468249e-06, "loss": 0.8012, "step": 64435 }, { "epoch": 0.7853460568169354, "grad_norm": 2.49395536876321, "learning_rate": 1.1298268120590122e-06, "loss": 0.7422, "step": 64440 }, { "epoch": 0.7854069930410832, "grad_norm": 2.7504808964033294, "learning_rate": 1.1295060936497756e-06, "loss": 0.7771, "step": 64445 }, { "epoch": 0.785467929265231, "grad_norm": 2.2047521729479325, "learning_rate": 1.1291853752405388e-06, "loss": 0.6902, "step": 64450 }, { "epoch": 0.7855288654893788, "grad_norm": 2.411364784362266, "learning_rate": 1.1288646568313023e-06, "loss": 0.8109, "step": 64455 }, { "epoch": 0.7855898017135267, "grad_norm": 2.693395747387867, "learning_rate": 1.1285439384220655e-06, "loss": 0.7468, "step": 64460 }, { "epoch": 0.7856507379376745, "grad_norm": 3.097607659405549, "learning_rate": 1.128223220012829e-06, "loss": 0.7672, "step": 64465 }, { "epoch": 0.7857116741618222, "grad_norm": 2.479225982728002, "learning_rate": 1.1279025016035922e-06, "loss": 0.609, "step": 64470 }, { "epoch": 0.78577261038597, "grad_norm": 2.164931362146586, "learning_rate": 1.1275817831943554e-06, "loss": 0.7358, "step": 64475 }, { "epoch": 0.7858335466101178, "grad_norm": 3.094457190830263, "learning_rate": 1.1272610647851186e-06, "loss": 0.7136, "step": 64480 }, { "epoch": 0.7858944828342657, "grad_norm": 3.3567322317738513, "learning_rate": 1.126940346375882e-06, "loss": 0.7893, "step": 64485 }, { "epoch": 0.7859554190584135, "grad_norm": 3.4781999316115173, "learning_rate": 1.1266196279666453e-06, "loss": 0.732, "step": 64490 }, { "epoch": 0.7860163552825613, "grad_norm": 2.357130697845455, "learning_rate": 1.1262989095574088e-06, "loss": 0.7872, "step": 64495 }, { "epoch": 0.7860772915067091, "grad_norm": 2.3215040071845516, "learning_rate": 1.125978191148172e-06, "loss": 0.7581, "step": 64500 }, { "epoch": 0.7861382277308568, "grad_norm": 2.6020745461547232, "learning_rate": 1.1256574727389354e-06, "loss": 0.6501, "step": 64505 }, { "epoch": 0.7861991639550047, "grad_norm": 2.8101643351007897, "learning_rate": 1.1253367543296986e-06, "loss": 0.6974, "step": 64510 }, { "epoch": 0.7862601001791525, "grad_norm": 3.3125207722239676, "learning_rate": 1.1250160359204619e-06, "loss": 0.7275, "step": 64515 }, { "epoch": 0.7863210364033003, "grad_norm": 2.5228303610989107, "learning_rate": 1.1246953175112251e-06, "loss": 0.7408, "step": 64520 }, { "epoch": 0.7863819726274481, "grad_norm": 2.8669843029172877, "learning_rate": 1.1243745991019885e-06, "loss": 0.6652, "step": 64525 }, { "epoch": 0.786442908851596, "grad_norm": 2.410158025838282, "learning_rate": 1.1240538806927518e-06, "loss": 0.7218, "step": 64530 }, { "epoch": 0.7865038450757438, "grad_norm": 2.770890289100873, "learning_rate": 1.1237331622835152e-06, "loss": 0.7088, "step": 64535 }, { "epoch": 0.7865647812998915, "grad_norm": 2.112979438563271, "learning_rate": 1.1234124438742784e-06, "loss": 0.6983, "step": 64540 }, { "epoch": 0.7866257175240393, "grad_norm": 2.8477866684954587, "learning_rate": 1.1230917254650419e-06, "loss": 0.6892, "step": 64545 }, { "epoch": 0.7866866537481871, "grad_norm": 2.926849939266267, "learning_rate": 1.1227710070558051e-06, "loss": 0.7087, "step": 64550 }, { "epoch": 0.786747589972335, "grad_norm": 2.5538710518386827, "learning_rate": 1.1224502886465683e-06, "loss": 0.6911, "step": 64555 }, { "epoch": 0.7868085261964828, "grad_norm": 2.2548189576465116, "learning_rate": 1.1221295702373316e-06, "loss": 0.6786, "step": 64560 }, { "epoch": 0.7868694624206306, "grad_norm": 2.3153932268443387, "learning_rate": 1.121808851828095e-06, "loss": 0.7217, "step": 64565 }, { "epoch": 0.7869303986447784, "grad_norm": 2.4992660413205363, "learning_rate": 1.1214881334188582e-06, "loss": 0.7957, "step": 64570 }, { "epoch": 0.7869913348689261, "grad_norm": 2.2232343342042062, "learning_rate": 1.1211674150096217e-06, "loss": 0.7059, "step": 64575 }, { "epoch": 0.787052271093074, "grad_norm": 2.9811464630402487, "learning_rate": 1.120846696600385e-06, "loss": 0.7123, "step": 64580 }, { "epoch": 0.7871132073172218, "grad_norm": 1.9095192407931112, "learning_rate": 1.1205259781911483e-06, "loss": 0.6907, "step": 64585 }, { "epoch": 0.7871741435413696, "grad_norm": 2.745947875335695, "learning_rate": 1.1202052597819116e-06, "loss": 0.7467, "step": 64590 }, { "epoch": 0.7872350797655174, "grad_norm": 2.6605008287075593, "learning_rate": 1.1198845413726748e-06, "loss": 0.7107, "step": 64595 }, { "epoch": 0.7872960159896653, "grad_norm": 2.2099461938112808, "learning_rate": 1.119563822963438e-06, "loss": 0.6931, "step": 64600 }, { "epoch": 0.7873569522138131, "grad_norm": 2.8062205048284357, "learning_rate": 1.1192431045542015e-06, "loss": 0.7715, "step": 64605 }, { "epoch": 0.7874178884379608, "grad_norm": 2.7829588834385772, "learning_rate": 1.118922386144965e-06, "loss": 0.7083, "step": 64610 }, { "epoch": 0.7874788246621086, "grad_norm": 3.8421505878641677, "learning_rate": 1.1186016677357281e-06, "loss": 0.7087, "step": 64615 }, { "epoch": 0.7875397608862564, "grad_norm": 2.3336242765611277, "learning_rate": 1.1182809493264914e-06, "loss": 0.6959, "step": 64620 }, { "epoch": 0.7876006971104043, "grad_norm": 2.0145068968921933, "learning_rate": 1.1179602309172548e-06, "loss": 0.6727, "step": 64625 }, { "epoch": 0.7876616333345521, "grad_norm": 2.641206947243442, "learning_rate": 1.117639512508018e-06, "loss": 0.7003, "step": 64630 }, { "epoch": 0.7877225695586999, "grad_norm": 3.012093092613098, "learning_rate": 1.1173187940987813e-06, "loss": 0.741, "step": 64635 }, { "epoch": 0.7877835057828477, "grad_norm": 2.120531787535162, "learning_rate": 1.1169980756895447e-06, "loss": 0.7151, "step": 64640 }, { "epoch": 0.7878444420069954, "grad_norm": 2.800243411540173, "learning_rate": 1.116677357280308e-06, "loss": 0.6875, "step": 64645 }, { "epoch": 0.7879053782311433, "grad_norm": 3.270257701795969, "learning_rate": 1.1163566388710714e-06, "loss": 0.722, "step": 64650 }, { "epoch": 0.7879663144552911, "grad_norm": 2.685681706925829, "learning_rate": 1.1160359204618346e-06, "loss": 0.7788, "step": 64655 }, { "epoch": 0.7880272506794389, "grad_norm": 2.495166281091326, "learning_rate": 1.115715202052598e-06, "loss": 0.691, "step": 64660 }, { "epoch": 0.7880881869035867, "grad_norm": 2.3141979650599565, "learning_rate": 1.1153944836433613e-06, "loss": 0.7123, "step": 64665 }, { "epoch": 0.7881491231277346, "grad_norm": 2.6068353968576687, "learning_rate": 1.1150737652341245e-06, "loss": 0.7584, "step": 64670 }, { "epoch": 0.7882100593518824, "grad_norm": 2.3635958974995797, "learning_rate": 1.1147530468248877e-06, "loss": 0.7061, "step": 64675 }, { "epoch": 0.7882709955760301, "grad_norm": 2.4022726608754392, "learning_rate": 1.1144323284156512e-06, "loss": 0.6992, "step": 64680 }, { "epoch": 0.7883319318001779, "grad_norm": 2.491628851430404, "learning_rate": 1.1141116100064144e-06, "loss": 0.7258, "step": 64685 }, { "epoch": 0.7883928680243257, "grad_norm": 2.240119699337228, "learning_rate": 1.1137908915971778e-06, "loss": 0.6928, "step": 64690 }, { "epoch": 0.7884538042484736, "grad_norm": 2.3660549570717566, "learning_rate": 1.113470173187941e-06, "loss": 0.7501, "step": 64695 }, { "epoch": 0.7885147404726214, "grad_norm": 2.7252565782204683, "learning_rate": 1.1131494547787045e-06, "loss": 0.683, "step": 64700 }, { "epoch": 0.7885756766967692, "grad_norm": 2.381684852115711, "learning_rate": 1.1128287363694677e-06, "loss": 0.6858, "step": 64705 }, { "epoch": 0.7886366129209169, "grad_norm": 2.5745803728190406, "learning_rate": 1.112508017960231e-06, "loss": 0.803, "step": 64710 }, { "epoch": 0.7886975491450647, "grad_norm": 2.5295605155010046, "learning_rate": 1.1121872995509942e-06, "loss": 0.7424, "step": 64715 }, { "epoch": 0.7887584853692126, "grad_norm": 2.0694451262073787, "learning_rate": 1.1118665811417576e-06, "loss": 0.6991, "step": 64720 }, { "epoch": 0.7888194215933604, "grad_norm": 2.496751914678997, "learning_rate": 1.1115458627325209e-06, "loss": 0.7511, "step": 64725 }, { "epoch": 0.7888803578175082, "grad_norm": 2.4930270343344314, "learning_rate": 1.1112251443232843e-06, "loss": 0.6299, "step": 64730 }, { "epoch": 0.788941294041656, "grad_norm": 2.729365811507553, "learning_rate": 1.1109044259140475e-06, "loss": 0.7341, "step": 64735 }, { "epoch": 0.7890022302658038, "grad_norm": 2.708144074260456, "learning_rate": 1.110583707504811e-06, "loss": 0.773, "step": 64740 }, { "epoch": 0.7890631664899516, "grad_norm": 3.7110513459615087, "learning_rate": 1.1102629890955742e-06, "loss": 0.6994, "step": 64745 }, { "epoch": 0.7891241027140994, "grad_norm": 2.299657733207294, "learning_rate": 1.1099422706863374e-06, "loss": 0.6728, "step": 64750 }, { "epoch": 0.7891850389382472, "grad_norm": 2.1005314593120326, "learning_rate": 1.1096215522771007e-06, "loss": 0.7402, "step": 64755 }, { "epoch": 0.789245975162395, "grad_norm": 3.0061134952007813, "learning_rate": 1.109300833867864e-06, "loss": 0.7289, "step": 64760 }, { "epoch": 0.7893069113865429, "grad_norm": 3.9107149393397984, "learning_rate": 1.1089801154586275e-06, "loss": 0.7828, "step": 64765 }, { "epoch": 0.7893678476106907, "grad_norm": 2.061139810647795, "learning_rate": 1.1086593970493908e-06, "loss": 0.7102, "step": 64770 }, { "epoch": 0.7894287838348385, "grad_norm": 2.2274773811296114, "learning_rate": 1.108338678640154e-06, "loss": 0.7223, "step": 64775 }, { "epoch": 0.7894897200589862, "grad_norm": 3.5075622983662917, "learning_rate": 1.1080179602309174e-06, "loss": 0.728, "step": 64780 }, { "epoch": 0.789550656283134, "grad_norm": 2.3136107867501456, "learning_rate": 1.1076972418216807e-06, "loss": 0.6662, "step": 64785 }, { "epoch": 0.7896115925072819, "grad_norm": 2.7244433330062225, "learning_rate": 1.1073765234124439e-06, "loss": 0.6929, "step": 64790 }, { "epoch": 0.7896725287314297, "grad_norm": 3.6501951646826662, "learning_rate": 1.1070558050032071e-06, "loss": 0.6799, "step": 64795 }, { "epoch": 0.7897334649555775, "grad_norm": 3.681679234143439, "learning_rate": 1.1067350865939706e-06, "loss": 0.7802, "step": 64800 }, { "epoch": 0.7897944011797253, "grad_norm": 2.6899578006301375, "learning_rate": 1.106414368184734e-06, "loss": 0.696, "step": 64805 }, { "epoch": 0.7898553374038731, "grad_norm": 2.201205896311666, "learning_rate": 1.1060936497754972e-06, "loss": 0.7464, "step": 64810 }, { "epoch": 0.7899162736280209, "grad_norm": 2.452735852411856, "learning_rate": 1.1057729313662605e-06, "loss": 0.7492, "step": 64815 }, { "epoch": 0.7899772098521687, "grad_norm": 4.207027616113833, "learning_rate": 1.105452212957024e-06, "loss": 0.7396, "step": 64820 }, { "epoch": 0.7900381460763165, "grad_norm": 3.046775111897989, "learning_rate": 1.1051314945477871e-06, "loss": 0.713, "step": 64825 }, { "epoch": 0.7900990823004643, "grad_norm": 2.196873204898224, "learning_rate": 1.1048107761385504e-06, "loss": 0.712, "step": 64830 }, { "epoch": 0.7901600185246122, "grad_norm": 2.7449852071732104, "learning_rate": 1.1044900577293138e-06, "loss": 0.6647, "step": 64835 }, { "epoch": 0.79022095474876, "grad_norm": 2.260090563818607, "learning_rate": 1.104169339320077e-06, "loss": 0.7823, "step": 64840 }, { "epoch": 0.7902818909729078, "grad_norm": 2.267354381585145, "learning_rate": 1.1038486209108405e-06, "loss": 0.6516, "step": 64845 }, { "epoch": 0.7903428271970555, "grad_norm": 2.2092237457257835, "learning_rate": 1.1035279025016037e-06, "loss": 0.7091, "step": 64850 }, { "epoch": 0.7904037634212033, "grad_norm": 3.043878828002355, "learning_rate": 1.103207184092367e-06, "loss": 0.8092, "step": 64855 }, { "epoch": 0.7904646996453512, "grad_norm": 2.2831060444109252, "learning_rate": 1.1028864656831304e-06, "loss": 0.7598, "step": 64860 }, { "epoch": 0.790525635869499, "grad_norm": 2.24964548525009, "learning_rate": 1.1025657472738936e-06, "loss": 0.7141, "step": 64865 }, { "epoch": 0.7905865720936468, "grad_norm": 2.3178516024454123, "learning_rate": 1.1022450288646568e-06, "loss": 0.8106, "step": 64870 }, { "epoch": 0.7906475083177946, "grad_norm": 2.3463652384367735, "learning_rate": 1.1019243104554203e-06, "loss": 0.6976, "step": 64875 }, { "epoch": 0.7907084445419424, "grad_norm": 3.1202030310837023, "learning_rate": 1.1016035920461835e-06, "loss": 0.7653, "step": 64880 }, { "epoch": 0.7907693807660902, "grad_norm": 2.328169739458168, "learning_rate": 1.101282873636947e-06, "loss": 0.7974, "step": 64885 }, { "epoch": 0.790830316990238, "grad_norm": 2.5545810631156876, "learning_rate": 1.1009621552277102e-06, "loss": 0.7135, "step": 64890 }, { "epoch": 0.7908912532143858, "grad_norm": 2.0386428694727257, "learning_rate": 1.1006414368184734e-06, "loss": 0.7243, "step": 64895 }, { "epoch": 0.7909521894385336, "grad_norm": 3.127020838820833, "learning_rate": 1.1003207184092368e-06, "loss": 0.7395, "step": 64900 }, { "epoch": 0.7910131256626814, "grad_norm": 1.8926276168226859, "learning_rate": 1.1e-06, "loss": 0.7795, "step": 64905 }, { "epoch": 0.7910740618868293, "grad_norm": 2.3696315634396163, "learning_rate": 1.0996792815907633e-06, "loss": 0.6748, "step": 64910 }, { "epoch": 0.7911349981109771, "grad_norm": 2.3377598990593667, "learning_rate": 1.0993585631815267e-06, "loss": 0.731, "step": 64915 }, { "epoch": 0.7911959343351248, "grad_norm": 2.4883543924698714, "learning_rate": 1.09903784477229e-06, "loss": 0.7749, "step": 64920 }, { "epoch": 0.7912568705592726, "grad_norm": 2.427916681461966, "learning_rate": 1.0987171263630534e-06, "loss": 0.6651, "step": 64925 }, { "epoch": 0.7913178067834205, "grad_norm": 3.3727191335605626, "learning_rate": 1.0983964079538166e-06, "loss": 0.6912, "step": 64930 }, { "epoch": 0.7913787430075683, "grad_norm": 2.455470763192362, "learning_rate": 1.0980756895445799e-06, "loss": 0.6418, "step": 64935 }, { "epoch": 0.7914396792317161, "grad_norm": 2.3596408261668103, "learning_rate": 1.0977549711353433e-06, "loss": 0.7637, "step": 64940 }, { "epoch": 0.7915006154558639, "grad_norm": 2.692268156893427, "learning_rate": 1.0974342527261065e-06, "loss": 0.7954, "step": 64945 }, { "epoch": 0.7915615516800117, "grad_norm": 2.4675790116384912, "learning_rate": 1.0971135343168697e-06, "loss": 0.7427, "step": 64950 }, { "epoch": 0.7916224879041595, "grad_norm": 2.4432956512840884, "learning_rate": 1.0967928159076332e-06, "loss": 0.7621, "step": 64955 }, { "epoch": 0.7916834241283073, "grad_norm": 2.8931686686200067, "learning_rate": 1.0964720974983966e-06, "loss": 0.7257, "step": 64960 }, { "epoch": 0.7917443603524551, "grad_norm": 2.5910617342239624, "learning_rate": 1.0961513790891599e-06, "loss": 0.754, "step": 64965 }, { "epoch": 0.7918052965766029, "grad_norm": 2.699422221943552, "learning_rate": 1.095830660679923e-06, "loss": 0.8002, "step": 64970 }, { "epoch": 0.7918662328007507, "grad_norm": 3.0734733714832188, "learning_rate": 1.0955099422706865e-06, "loss": 0.6802, "step": 64975 }, { "epoch": 0.7919271690248986, "grad_norm": 2.9690478935725304, "learning_rate": 1.0951892238614498e-06, "loss": 0.6961, "step": 64980 }, { "epoch": 0.7919881052490464, "grad_norm": 2.477578961793725, "learning_rate": 1.094868505452213e-06, "loss": 0.6772, "step": 64985 }, { "epoch": 0.7920490414731941, "grad_norm": 2.594457911393059, "learning_rate": 1.0945477870429762e-06, "loss": 0.6982, "step": 64990 }, { "epoch": 0.7921099776973419, "grad_norm": 2.4657528377119484, "learning_rate": 1.0942270686337397e-06, "loss": 0.7743, "step": 64995 }, { "epoch": 0.7921709139214897, "grad_norm": 2.2508705861049902, "learning_rate": 1.093906350224503e-06, "loss": 0.7934, "step": 65000 }, { "epoch": 0.7922318501456376, "grad_norm": 2.5700388202369733, "learning_rate": 1.0935856318152663e-06, "loss": 0.6986, "step": 65005 }, { "epoch": 0.7922927863697854, "grad_norm": 3.044993096534547, "learning_rate": 1.0932649134060295e-06, "loss": 0.743, "step": 65010 }, { "epoch": 0.7923537225939332, "grad_norm": 2.1486275404526656, "learning_rate": 1.092944194996793e-06, "loss": 0.663, "step": 65015 }, { "epoch": 0.792414658818081, "grad_norm": 3.268145062740158, "learning_rate": 1.0926234765875562e-06, "loss": 0.6889, "step": 65020 }, { "epoch": 0.7924755950422288, "grad_norm": 2.6691689302746626, "learning_rate": 1.0923027581783194e-06, "loss": 0.6525, "step": 65025 }, { "epoch": 0.7925365312663766, "grad_norm": 2.1584758659075423, "learning_rate": 1.0919820397690829e-06, "loss": 0.6964, "step": 65030 }, { "epoch": 0.7925974674905244, "grad_norm": 2.493305988346229, "learning_rate": 1.0916613213598461e-06, "loss": 0.7282, "step": 65035 }, { "epoch": 0.7926584037146722, "grad_norm": 3.717595436909868, "learning_rate": 1.0913406029506096e-06, "loss": 0.7552, "step": 65040 }, { "epoch": 0.79271933993882, "grad_norm": 3.1934658643337346, "learning_rate": 1.0910198845413728e-06, "loss": 0.6896, "step": 65045 }, { "epoch": 0.7927802761629679, "grad_norm": 2.6763300360493654, "learning_rate": 1.090699166132136e-06, "loss": 0.8067, "step": 65050 }, { "epoch": 0.7928412123871157, "grad_norm": 1.8737759723379805, "learning_rate": 1.0903784477228995e-06, "loss": 0.712, "step": 65055 }, { "epoch": 0.7929021486112634, "grad_norm": 3.195900659023956, "learning_rate": 1.0900577293136627e-06, "loss": 0.7277, "step": 65060 }, { "epoch": 0.7929630848354112, "grad_norm": 2.4011702943133737, "learning_rate": 1.089737010904426e-06, "loss": 0.7562, "step": 65065 }, { "epoch": 0.793024021059559, "grad_norm": 3.4378642707310085, "learning_rate": 1.0894162924951894e-06, "loss": 0.7179, "step": 65070 }, { "epoch": 0.7930849572837069, "grad_norm": 2.766843192411246, "learning_rate": 1.0890955740859526e-06, "loss": 0.7693, "step": 65075 }, { "epoch": 0.7931458935078547, "grad_norm": 2.171815434070964, "learning_rate": 1.088774855676716e-06, "loss": 0.7243, "step": 65080 }, { "epoch": 0.7932068297320025, "grad_norm": 2.31247862054098, "learning_rate": 1.0884541372674792e-06, "loss": 0.6772, "step": 65085 }, { "epoch": 0.7932677659561503, "grad_norm": 2.479368882640507, "learning_rate": 1.0881334188582425e-06, "loss": 0.6737, "step": 65090 }, { "epoch": 0.793328702180298, "grad_norm": 2.1060480778422055, "learning_rate": 1.087812700449006e-06, "loss": 0.6777, "step": 65095 }, { "epoch": 0.7933896384044459, "grad_norm": 3.2034572776903607, "learning_rate": 1.0874919820397691e-06, "loss": 0.6936, "step": 65100 }, { "epoch": 0.7934505746285937, "grad_norm": 2.4523131927902595, "learning_rate": 1.0871712636305324e-06, "loss": 0.7089, "step": 65105 }, { "epoch": 0.7935115108527415, "grad_norm": 3.0990999857560726, "learning_rate": 1.0868505452212958e-06, "loss": 0.6697, "step": 65110 }, { "epoch": 0.7935724470768893, "grad_norm": 3.0026084003612, "learning_rate": 1.0865298268120593e-06, "loss": 0.7473, "step": 65115 }, { "epoch": 0.7936333833010372, "grad_norm": 2.340839055207168, "learning_rate": 1.0862091084028225e-06, "loss": 0.7432, "step": 65120 }, { "epoch": 0.793694319525185, "grad_norm": 2.4076190860926316, "learning_rate": 1.0858883899935857e-06, "loss": 0.701, "step": 65125 }, { "epoch": 0.7937552557493327, "grad_norm": 2.1677513630818055, "learning_rate": 1.085567671584349e-06, "loss": 0.691, "step": 65130 }, { "epoch": 0.7938161919734805, "grad_norm": 2.258213889940009, "learning_rate": 1.0852469531751124e-06, "loss": 0.7612, "step": 65135 }, { "epoch": 0.7938771281976283, "grad_norm": 2.268700417086637, "learning_rate": 1.0849262347658756e-06, "loss": 0.6883, "step": 65140 }, { "epoch": 0.7939380644217762, "grad_norm": 2.454116386435355, "learning_rate": 1.0846055163566388e-06, "loss": 0.6968, "step": 65145 }, { "epoch": 0.793999000645924, "grad_norm": 2.2160795733536025, "learning_rate": 1.0842847979474023e-06, "loss": 0.6766, "step": 65150 }, { "epoch": 0.7940599368700718, "grad_norm": 2.1544188421109682, "learning_rate": 1.0839640795381657e-06, "loss": 0.7241, "step": 65155 }, { "epoch": 0.7941208730942196, "grad_norm": 2.5911748343944976, "learning_rate": 1.083643361128929e-06, "loss": 0.7053, "step": 65160 }, { "epoch": 0.7941818093183673, "grad_norm": 3.4142317641201823, "learning_rate": 1.0833226427196922e-06, "loss": 0.738, "step": 65165 }, { "epoch": 0.7942427455425152, "grad_norm": 2.408853285051627, "learning_rate": 1.0830019243104554e-06, "loss": 0.7105, "step": 65170 }, { "epoch": 0.794303681766663, "grad_norm": 3.0038584292874098, "learning_rate": 1.0826812059012188e-06, "loss": 0.7405, "step": 65175 }, { "epoch": 0.7943646179908108, "grad_norm": 2.6224135361502925, "learning_rate": 1.082360487491982e-06, "loss": 0.7713, "step": 65180 }, { "epoch": 0.7944255542149586, "grad_norm": 2.6685236157699803, "learning_rate": 1.0820397690827455e-06, "loss": 0.6971, "step": 65185 }, { "epoch": 0.7944864904391065, "grad_norm": 2.7909074101920712, "learning_rate": 1.0817190506735087e-06, "loss": 0.7104, "step": 65190 }, { "epoch": 0.7945474266632543, "grad_norm": 2.9707064798946994, "learning_rate": 1.0813983322642722e-06, "loss": 0.7606, "step": 65195 }, { "epoch": 0.794608362887402, "grad_norm": 2.3552265702182478, "learning_rate": 1.0810776138550354e-06, "loss": 0.7062, "step": 65200 }, { "epoch": 0.7946692991115498, "grad_norm": 2.3988050487266652, "learning_rate": 1.0807568954457986e-06, "loss": 0.7291, "step": 65205 }, { "epoch": 0.7947302353356976, "grad_norm": 2.1776589655753207, "learning_rate": 1.0804361770365619e-06, "loss": 0.7473, "step": 65210 }, { "epoch": 0.7947911715598455, "grad_norm": 2.817637119095983, "learning_rate": 1.0801154586273253e-06, "loss": 0.7834, "step": 65215 }, { "epoch": 0.7948521077839933, "grad_norm": 2.979936604891487, "learning_rate": 1.0797947402180885e-06, "loss": 0.6805, "step": 65220 }, { "epoch": 0.7949130440081411, "grad_norm": 2.607303553381537, "learning_rate": 1.079474021808852e-06, "loss": 0.7349, "step": 65225 }, { "epoch": 0.7949739802322889, "grad_norm": 2.494592776597388, "learning_rate": 1.0791533033996152e-06, "loss": 0.7397, "step": 65230 }, { "epoch": 0.7950349164564366, "grad_norm": 2.774977880731262, "learning_rate": 1.0788325849903786e-06, "loss": 0.7582, "step": 65235 }, { "epoch": 0.7950958526805845, "grad_norm": 2.1535997971543015, "learning_rate": 1.0785118665811419e-06, "loss": 0.7263, "step": 65240 }, { "epoch": 0.7951567889047323, "grad_norm": 2.4729724722679407, "learning_rate": 1.078191148171905e-06, "loss": 0.7346, "step": 65245 }, { "epoch": 0.7952177251288801, "grad_norm": 2.222891463811535, "learning_rate": 1.0778704297626683e-06, "loss": 0.7532, "step": 65250 }, { "epoch": 0.7952786613530279, "grad_norm": 2.700546724652706, "learning_rate": 1.0775497113534318e-06, "loss": 0.7322, "step": 65255 }, { "epoch": 0.7953395975771758, "grad_norm": 2.767887001327789, "learning_rate": 1.077228992944195e-06, "loss": 0.6836, "step": 65260 }, { "epoch": 0.7954005338013236, "grad_norm": 2.4593055019864094, "learning_rate": 1.0769082745349584e-06, "loss": 0.7119, "step": 65265 }, { "epoch": 0.7954614700254713, "grad_norm": 2.6138852969889657, "learning_rate": 1.0765875561257217e-06, "loss": 0.6903, "step": 65270 }, { "epoch": 0.7955224062496191, "grad_norm": 2.6573451455689274, "learning_rate": 1.0762668377164851e-06, "loss": 0.6174, "step": 65275 }, { "epoch": 0.7955833424737669, "grad_norm": 2.085088140193009, "learning_rate": 1.0759461193072483e-06, "loss": 0.6731, "step": 65280 }, { "epoch": 0.7956442786979148, "grad_norm": 2.211545936124753, "learning_rate": 1.0756254008980116e-06, "loss": 0.7283, "step": 65285 }, { "epoch": 0.7957052149220626, "grad_norm": 3.2433776176313747, "learning_rate": 1.075304682488775e-06, "loss": 0.7246, "step": 65290 }, { "epoch": 0.7957661511462104, "grad_norm": 1.9860402852802999, "learning_rate": 1.0749839640795382e-06, "loss": 0.698, "step": 65295 }, { "epoch": 0.7958270873703582, "grad_norm": 2.869290479446236, "learning_rate": 1.0746632456703015e-06, "loss": 0.7237, "step": 65300 }, { "epoch": 0.7958880235945059, "grad_norm": 1.970042679227159, "learning_rate": 1.074342527261065e-06, "loss": 0.6805, "step": 65305 }, { "epoch": 0.7959489598186538, "grad_norm": 2.288055773702824, "learning_rate": 1.0740218088518283e-06, "loss": 0.6709, "step": 65310 }, { "epoch": 0.7960098960428016, "grad_norm": 2.7026778463153533, "learning_rate": 1.0737010904425916e-06, "loss": 0.7901, "step": 65315 }, { "epoch": 0.7960708322669494, "grad_norm": 2.5497525662236202, "learning_rate": 1.0733803720333548e-06, "loss": 0.6966, "step": 65320 }, { "epoch": 0.7961317684910972, "grad_norm": 3.228223667872513, "learning_rate": 1.073059653624118e-06, "loss": 0.6784, "step": 65325 }, { "epoch": 0.7961927047152451, "grad_norm": 2.3665004958913696, "learning_rate": 1.0727389352148815e-06, "loss": 0.8014, "step": 65330 }, { "epoch": 0.7962536409393929, "grad_norm": 2.0552563271161683, "learning_rate": 1.0724182168056447e-06, "loss": 0.7269, "step": 65335 }, { "epoch": 0.7963145771635406, "grad_norm": 2.983287701008765, "learning_rate": 1.072097498396408e-06, "loss": 0.7495, "step": 65340 }, { "epoch": 0.7963755133876884, "grad_norm": 2.2838338859258314, "learning_rate": 1.0717767799871714e-06, "loss": 0.7329, "step": 65345 }, { "epoch": 0.7964364496118362, "grad_norm": 3.2452978169068842, "learning_rate": 1.0714560615779348e-06, "loss": 0.7672, "step": 65350 }, { "epoch": 0.7964973858359841, "grad_norm": 2.512183950986697, "learning_rate": 1.071135343168698e-06, "loss": 0.7045, "step": 65355 }, { "epoch": 0.7965583220601319, "grad_norm": 2.1849717717165293, "learning_rate": 1.0708146247594613e-06, "loss": 0.704, "step": 65360 }, { "epoch": 0.7966192582842797, "grad_norm": 2.3263120497216336, "learning_rate": 1.0704939063502245e-06, "loss": 0.738, "step": 65365 }, { "epoch": 0.7966801945084275, "grad_norm": 2.517339375292495, "learning_rate": 1.070173187940988e-06, "loss": 0.7443, "step": 65370 }, { "epoch": 0.7967411307325752, "grad_norm": 2.665069794465946, "learning_rate": 1.0698524695317512e-06, "loss": 0.7297, "step": 65375 }, { "epoch": 0.7968020669567231, "grad_norm": 2.598781652735856, "learning_rate": 1.0695317511225146e-06, "loss": 0.6978, "step": 65380 }, { "epoch": 0.7968630031808709, "grad_norm": 3.7317692125690693, "learning_rate": 1.0692110327132778e-06, "loss": 0.7654, "step": 65385 }, { "epoch": 0.7969239394050187, "grad_norm": 2.506021357257216, "learning_rate": 1.0688903143040413e-06, "loss": 0.7, "step": 65390 }, { "epoch": 0.7969848756291665, "grad_norm": 3.1887471021735156, "learning_rate": 1.0685695958948045e-06, "loss": 0.7637, "step": 65395 }, { "epoch": 0.7970458118533144, "grad_norm": 3.4114512778991273, "learning_rate": 1.0682488774855677e-06, "loss": 0.6995, "step": 65400 }, { "epoch": 0.7971067480774622, "grad_norm": 2.276766844036485, "learning_rate": 1.067928159076331e-06, "loss": 0.7083, "step": 65405 }, { "epoch": 0.7971676843016099, "grad_norm": 2.6334251627304224, "learning_rate": 1.0676074406670944e-06, "loss": 0.7436, "step": 65410 }, { "epoch": 0.7972286205257577, "grad_norm": 3.118249298189932, "learning_rate": 1.0672867222578576e-06, "loss": 0.6899, "step": 65415 }, { "epoch": 0.7972895567499055, "grad_norm": 3.369291974587285, "learning_rate": 1.066966003848621e-06, "loss": 0.7135, "step": 65420 }, { "epoch": 0.7973504929740534, "grad_norm": 2.5999310382898915, "learning_rate": 1.0666452854393843e-06, "loss": 0.7441, "step": 65425 }, { "epoch": 0.7974114291982012, "grad_norm": 2.0272810604333533, "learning_rate": 1.0663245670301477e-06, "loss": 0.7676, "step": 65430 }, { "epoch": 0.797472365422349, "grad_norm": 2.414408707098818, "learning_rate": 1.066003848620911e-06, "loss": 0.6943, "step": 65435 }, { "epoch": 0.7975333016464968, "grad_norm": 2.401899713049386, "learning_rate": 1.0656831302116742e-06, "loss": 0.7288, "step": 65440 }, { "epoch": 0.7975942378706445, "grad_norm": 2.3847196775407027, "learning_rate": 1.0653624118024374e-06, "loss": 0.7444, "step": 65445 }, { "epoch": 0.7976551740947924, "grad_norm": 2.6316262606251826, "learning_rate": 1.0650416933932009e-06, "loss": 0.8274, "step": 65450 }, { "epoch": 0.7977161103189402, "grad_norm": 3.0853328617197655, "learning_rate": 1.064720974983964e-06, "loss": 0.7065, "step": 65455 }, { "epoch": 0.797777046543088, "grad_norm": 2.699749659059491, "learning_rate": 1.0644002565747275e-06, "loss": 0.7301, "step": 65460 }, { "epoch": 0.7978379827672358, "grad_norm": 2.468755737898456, "learning_rate": 1.0640795381654908e-06, "loss": 0.7575, "step": 65465 }, { "epoch": 0.7978989189913837, "grad_norm": 2.4538100636412494, "learning_rate": 1.0637588197562542e-06, "loss": 0.7328, "step": 65470 }, { "epoch": 0.7979598552155315, "grad_norm": 2.252718978363733, "learning_rate": 1.0634381013470174e-06, "loss": 0.7096, "step": 65475 }, { "epoch": 0.7980207914396792, "grad_norm": 2.6489185755334517, "learning_rate": 1.0631173829377807e-06, "loss": 0.6275, "step": 65480 }, { "epoch": 0.798081727663827, "grad_norm": 2.4715414307995807, "learning_rate": 1.0627966645285439e-06, "loss": 0.7158, "step": 65485 }, { "epoch": 0.7981426638879748, "grad_norm": 2.268403974831216, "learning_rate": 1.0624759461193073e-06, "loss": 0.7407, "step": 65490 }, { "epoch": 0.7982036001121227, "grad_norm": 2.6762468335584533, "learning_rate": 1.0621552277100706e-06, "loss": 0.759, "step": 65495 }, { "epoch": 0.7982645363362705, "grad_norm": 2.175337096844255, "learning_rate": 1.061834509300834e-06, "loss": 0.789, "step": 65500 }, { "epoch": 0.7983254725604183, "grad_norm": 3.293885382739598, "learning_rate": 1.0615137908915972e-06, "loss": 0.7173, "step": 65505 }, { "epoch": 0.7983864087845661, "grad_norm": 2.4121675575909847, "learning_rate": 1.0611930724823607e-06, "loss": 0.6408, "step": 65510 }, { "epoch": 0.7984473450087138, "grad_norm": 2.3173873671435867, "learning_rate": 1.0608723540731239e-06, "loss": 0.7232, "step": 65515 }, { "epoch": 0.7985082812328617, "grad_norm": 1.9950955031738404, "learning_rate": 1.0605516356638871e-06, "loss": 0.6763, "step": 65520 }, { "epoch": 0.7985692174570095, "grad_norm": 2.763780356057737, "learning_rate": 1.0602309172546503e-06, "loss": 0.7267, "step": 65525 }, { "epoch": 0.7986301536811573, "grad_norm": 2.877843368285334, "learning_rate": 1.0599101988454138e-06, "loss": 0.7884, "step": 65530 }, { "epoch": 0.7986910899053051, "grad_norm": 4.0756414275906305, "learning_rate": 1.0595894804361772e-06, "loss": 0.7039, "step": 65535 }, { "epoch": 0.798752026129453, "grad_norm": 2.759659117072868, "learning_rate": 1.0592687620269405e-06, "loss": 0.6798, "step": 65540 }, { "epoch": 0.7988129623536008, "grad_norm": 2.687260989614498, "learning_rate": 1.0589480436177037e-06, "loss": 0.716, "step": 65545 }, { "epoch": 0.7988738985777485, "grad_norm": 2.3320187599372275, "learning_rate": 1.0586273252084671e-06, "loss": 0.6268, "step": 65550 }, { "epoch": 0.7989348348018963, "grad_norm": 2.0065044780397274, "learning_rate": 1.0583066067992304e-06, "loss": 0.6552, "step": 65555 }, { "epoch": 0.7989957710260441, "grad_norm": 2.3235123460148537, "learning_rate": 1.0579858883899936e-06, "loss": 0.6822, "step": 65560 }, { "epoch": 0.799056707250192, "grad_norm": 2.5688863577763907, "learning_rate": 1.057665169980757e-06, "loss": 0.6876, "step": 65565 }, { "epoch": 0.7991176434743398, "grad_norm": 2.2195502165742216, "learning_rate": 1.0573444515715203e-06, "loss": 0.7354, "step": 65570 }, { "epoch": 0.7991785796984876, "grad_norm": 3.713591314868066, "learning_rate": 1.0570237331622837e-06, "loss": 0.7178, "step": 65575 }, { "epoch": 0.7992395159226354, "grad_norm": 2.5281301190638894, "learning_rate": 1.056703014753047e-06, "loss": 0.7182, "step": 65580 }, { "epoch": 0.7993004521467831, "grad_norm": 2.1380905719643857, "learning_rate": 1.0563822963438104e-06, "loss": 0.6976, "step": 65585 }, { "epoch": 0.799361388370931, "grad_norm": 2.2914415996919097, "learning_rate": 1.0560615779345736e-06, "loss": 0.7285, "step": 65590 }, { "epoch": 0.7994223245950788, "grad_norm": 3.4537856327308627, "learning_rate": 1.0557408595253368e-06, "loss": 0.689, "step": 65595 }, { "epoch": 0.7994832608192266, "grad_norm": 2.9912497403488834, "learning_rate": 1.0554201411161e-06, "loss": 0.7079, "step": 65600 }, { "epoch": 0.7995441970433744, "grad_norm": 2.297169520495871, "learning_rate": 1.0550994227068635e-06, "loss": 0.7334, "step": 65605 }, { "epoch": 0.7996051332675223, "grad_norm": 2.9917771594300167, "learning_rate": 1.0547787042976267e-06, "loss": 0.7675, "step": 65610 }, { "epoch": 0.7996660694916701, "grad_norm": 2.7699377859109884, "learning_rate": 1.0544579858883902e-06, "loss": 0.7569, "step": 65615 }, { "epoch": 0.7997270057158178, "grad_norm": 2.3549217803630236, "learning_rate": 1.0541372674791534e-06, "loss": 0.7536, "step": 65620 }, { "epoch": 0.7997879419399656, "grad_norm": 2.6199339419230845, "learning_rate": 1.0538165490699168e-06, "loss": 0.7051, "step": 65625 }, { "epoch": 0.7998488781641134, "grad_norm": 2.9928785248247345, "learning_rate": 1.05349583066068e-06, "loss": 0.615, "step": 65630 }, { "epoch": 0.7999098143882613, "grad_norm": 2.267992123926992, "learning_rate": 1.0531751122514433e-06, "loss": 0.6602, "step": 65635 }, { "epoch": 0.7999707506124091, "grad_norm": 2.4243490265024934, "learning_rate": 1.0528543938422065e-06, "loss": 0.7233, "step": 65640 }, { "epoch": 0.8000316868365569, "grad_norm": 2.8390084616947293, "learning_rate": 1.05253367543297e-06, "loss": 0.7707, "step": 65645 }, { "epoch": 0.8000926230607047, "grad_norm": 3.123992529158378, "learning_rate": 1.0522129570237332e-06, "loss": 0.714, "step": 65650 }, { "epoch": 0.8001535592848524, "grad_norm": 2.3099263315015994, "learning_rate": 1.0518922386144966e-06, "loss": 0.7161, "step": 65655 }, { "epoch": 0.8002144955090003, "grad_norm": 2.214228576931731, "learning_rate": 1.0515715202052598e-06, "loss": 0.7052, "step": 65660 }, { "epoch": 0.8002754317331481, "grad_norm": 2.3532837033069973, "learning_rate": 1.0512508017960233e-06, "loss": 0.7738, "step": 65665 }, { "epoch": 0.8003363679572959, "grad_norm": 2.8085061583475017, "learning_rate": 1.0509300833867865e-06, "loss": 0.766, "step": 65670 }, { "epoch": 0.8003973041814437, "grad_norm": 2.063303578984609, "learning_rate": 1.0506093649775497e-06, "loss": 0.6717, "step": 65675 }, { "epoch": 0.8004582404055915, "grad_norm": 2.4086266119152264, "learning_rate": 1.050288646568313e-06, "loss": 0.7663, "step": 65680 }, { "epoch": 0.8005191766297393, "grad_norm": 2.369891645595042, "learning_rate": 1.0499679281590764e-06, "loss": 0.7151, "step": 65685 }, { "epoch": 0.8005801128538871, "grad_norm": 2.411828112409181, "learning_rate": 1.0496472097498396e-06, "loss": 0.6826, "step": 65690 }, { "epoch": 0.8006410490780349, "grad_norm": 2.667493480659271, "learning_rate": 1.049326491340603e-06, "loss": 0.7031, "step": 65695 }, { "epoch": 0.8007019853021827, "grad_norm": 2.4192412617471275, "learning_rate": 1.0490057729313663e-06, "loss": 0.6797, "step": 65700 }, { "epoch": 0.8007629215263306, "grad_norm": 2.2123376311013176, "learning_rate": 1.0486850545221298e-06, "loss": 0.6859, "step": 65705 }, { "epoch": 0.8008238577504784, "grad_norm": 2.54875935242008, "learning_rate": 1.048364336112893e-06, "loss": 0.724, "step": 65710 }, { "epoch": 0.8008847939746262, "grad_norm": 3.307126556887661, "learning_rate": 1.0480436177036562e-06, "loss": 0.8164, "step": 65715 }, { "epoch": 0.8009457301987739, "grad_norm": 2.2933152900044442, "learning_rate": 1.0477228992944194e-06, "loss": 0.6731, "step": 65720 }, { "epoch": 0.8010066664229217, "grad_norm": 2.497883679554843, "learning_rate": 1.0474021808851829e-06, "loss": 0.6997, "step": 65725 }, { "epoch": 0.8010676026470696, "grad_norm": 3.5361549358404565, "learning_rate": 1.0470814624759463e-06, "loss": 0.7322, "step": 65730 }, { "epoch": 0.8011285388712174, "grad_norm": 4.269570917604763, "learning_rate": 1.0467607440667095e-06, "loss": 0.7549, "step": 65735 }, { "epoch": 0.8011894750953652, "grad_norm": 2.4294386279836124, "learning_rate": 1.0464400256574728e-06, "loss": 0.71, "step": 65740 }, { "epoch": 0.801250411319513, "grad_norm": 2.516089224362842, "learning_rate": 1.0461193072482362e-06, "loss": 0.7024, "step": 65745 }, { "epoch": 0.8013113475436608, "grad_norm": 2.7222710203522755, "learning_rate": 1.0457985888389994e-06, "loss": 0.7109, "step": 65750 }, { "epoch": 0.8013722837678086, "grad_norm": 2.676701443432324, "learning_rate": 1.0454778704297627e-06, "loss": 0.691, "step": 65755 }, { "epoch": 0.8014332199919564, "grad_norm": 2.4843412652227297, "learning_rate": 1.0451571520205261e-06, "loss": 0.6643, "step": 65760 }, { "epoch": 0.8014941562161042, "grad_norm": 2.3415379196246118, "learning_rate": 1.0448364336112893e-06, "loss": 0.7231, "step": 65765 }, { "epoch": 0.801555092440252, "grad_norm": 2.6807204942610214, "learning_rate": 1.0445157152020528e-06, "loss": 0.7636, "step": 65770 }, { "epoch": 0.8016160286643998, "grad_norm": 4.340808671272121, "learning_rate": 1.044194996792816e-06, "loss": 0.7631, "step": 65775 }, { "epoch": 0.8016769648885477, "grad_norm": 2.6222946760763217, "learning_rate": 1.0438742783835792e-06, "loss": 0.6959, "step": 65780 }, { "epoch": 0.8017379011126955, "grad_norm": 2.6040281000299177, "learning_rate": 1.0435535599743427e-06, "loss": 0.7246, "step": 65785 }, { "epoch": 0.8017988373368432, "grad_norm": 2.3097665766177338, "learning_rate": 1.043232841565106e-06, "loss": 0.7163, "step": 65790 }, { "epoch": 0.801859773560991, "grad_norm": 2.7388170044839364, "learning_rate": 1.0429121231558691e-06, "loss": 0.7856, "step": 65795 }, { "epoch": 0.8019207097851389, "grad_norm": 4.75254276040928, "learning_rate": 1.0425914047466326e-06, "loss": 0.6887, "step": 65800 }, { "epoch": 0.8019816460092867, "grad_norm": 2.229727315600332, "learning_rate": 1.0422706863373958e-06, "loss": 0.6526, "step": 65805 }, { "epoch": 0.8020425822334345, "grad_norm": 2.2452582943603887, "learning_rate": 1.0419499679281592e-06, "loss": 0.7375, "step": 65810 }, { "epoch": 0.8021035184575823, "grad_norm": 2.3328988547446468, "learning_rate": 1.0416292495189225e-06, "loss": 0.6835, "step": 65815 }, { "epoch": 0.8021644546817301, "grad_norm": 2.3350334029458297, "learning_rate": 1.0413085311096857e-06, "loss": 0.7085, "step": 65820 }, { "epoch": 0.8022253909058779, "grad_norm": 2.604110073419286, "learning_rate": 1.0409878127004491e-06, "loss": 0.6814, "step": 65825 }, { "epoch": 0.8022863271300257, "grad_norm": 6.566053135894386, "learning_rate": 1.0406670942912124e-06, "loss": 0.7474, "step": 65830 }, { "epoch": 0.8023472633541735, "grad_norm": 2.6438216078664762, "learning_rate": 1.0403463758819756e-06, "loss": 0.7755, "step": 65835 }, { "epoch": 0.8024081995783213, "grad_norm": 1.953591312363976, "learning_rate": 1.040025657472739e-06, "loss": 0.6542, "step": 65840 }, { "epoch": 0.8024691358024691, "grad_norm": 2.1972259327990957, "learning_rate": 1.0397049390635023e-06, "loss": 0.6632, "step": 65845 }, { "epoch": 0.802530072026617, "grad_norm": 2.4220763240660363, "learning_rate": 1.0393842206542657e-06, "loss": 0.7372, "step": 65850 }, { "epoch": 0.8025910082507648, "grad_norm": 2.755528810200542, "learning_rate": 1.039063502245029e-06, "loss": 0.6802, "step": 65855 }, { "epoch": 0.8026519444749125, "grad_norm": 2.1775448576499743, "learning_rate": 1.0387427838357924e-06, "loss": 0.7523, "step": 65860 }, { "epoch": 0.8027128806990603, "grad_norm": 2.3295041809006514, "learning_rate": 1.0384220654265556e-06, "loss": 0.6869, "step": 65865 }, { "epoch": 0.8027738169232081, "grad_norm": 3.2259335385166263, "learning_rate": 1.0381013470173188e-06, "loss": 0.6805, "step": 65870 }, { "epoch": 0.802834753147356, "grad_norm": 3.2256913165159555, "learning_rate": 1.037780628608082e-06, "loss": 0.7449, "step": 65875 }, { "epoch": 0.8028956893715038, "grad_norm": 2.7052994066580847, "learning_rate": 1.0374599101988455e-06, "loss": 0.6988, "step": 65880 }, { "epoch": 0.8029566255956516, "grad_norm": 1.9862161095668098, "learning_rate": 1.037139191789609e-06, "loss": 0.656, "step": 65885 }, { "epoch": 0.8030175618197994, "grad_norm": 2.252596355029255, "learning_rate": 1.0368184733803722e-06, "loss": 0.721, "step": 65890 }, { "epoch": 0.8030784980439472, "grad_norm": 2.5378263709837205, "learning_rate": 1.0364977549711354e-06, "loss": 0.7607, "step": 65895 }, { "epoch": 0.803139434268095, "grad_norm": 2.620707716019439, "learning_rate": 1.0361770365618988e-06, "loss": 0.733, "step": 65900 }, { "epoch": 0.8032003704922428, "grad_norm": 3.6011802948373797, "learning_rate": 1.035856318152662e-06, "loss": 0.7308, "step": 65905 }, { "epoch": 0.8032613067163906, "grad_norm": 4.648801498933274, "learning_rate": 1.0355355997434253e-06, "loss": 0.7928, "step": 65910 }, { "epoch": 0.8033222429405384, "grad_norm": 2.873397104915113, "learning_rate": 1.0352148813341885e-06, "loss": 0.7183, "step": 65915 }, { "epoch": 0.8033831791646863, "grad_norm": 2.8471677829804714, "learning_rate": 1.034894162924952e-06, "loss": 0.6985, "step": 65920 }, { "epoch": 0.8034441153888341, "grad_norm": 2.828662948436689, "learning_rate": 1.0345734445157154e-06, "loss": 0.7533, "step": 65925 }, { "epoch": 0.8035050516129818, "grad_norm": 2.5246513093624685, "learning_rate": 1.0342527261064786e-06, "loss": 0.7117, "step": 65930 }, { "epoch": 0.8035659878371296, "grad_norm": 2.8992424953358, "learning_rate": 1.0339320076972419e-06, "loss": 0.7185, "step": 65935 }, { "epoch": 0.8036269240612774, "grad_norm": 2.9819559627208387, "learning_rate": 1.0336112892880053e-06, "loss": 0.692, "step": 65940 }, { "epoch": 0.8036878602854253, "grad_norm": 2.3030138178419084, "learning_rate": 1.0332905708787685e-06, "loss": 0.7972, "step": 65945 }, { "epoch": 0.8037487965095731, "grad_norm": 2.447901982680771, "learning_rate": 1.0329698524695318e-06, "loss": 0.7425, "step": 65950 }, { "epoch": 0.8038097327337209, "grad_norm": 2.1727386948708913, "learning_rate": 1.0326491340602952e-06, "loss": 0.7837, "step": 65955 }, { "epoch": 0.8038706689578687, "grad_norm": 2.5212970523567297, "learning_rate": 1.0323284156510584e-06, "loss": 0.6559, "step": 65960 }, { "epoch": 0.8039316051820165, "grad_norm": 2.345806143257602, "learning_rate": 1.0320076972418219e-06, "loss": 0.7771, "step": 65965 }, { "epoch": 0.8039925414061643, "grad_norm": 2.1765287904687356, "learning_rate": 1.031686978832585e-06, "loss": 0.731, "step": 65970 }, { "epoch": 0.8040534776303121, "grad_norm": 2.7510668685014945, "learning_rate": 1.0313662604233483e-06, "loss": 0.6633, "step": 65975 }, { "epoch": 0.8041144138544599, "grad_norm": 2.0673559547847495, "learning_rate": 1.0310455420141118e-06, "loss": 0.7957, "step": 65980 }, { "epoch": 0.8041753500786077, "grad_norm": 2.377435844040414, "learning_rate": 1.030724823604875e-06, "loss": 0.7471, "step": 65985 }, { "epoch": 0.8042362863027556, "grad_norm": 2.655993102606498, "learning_rate": 1.0304041051956382e-06, "loss": 0.6783, "step": 65990 }, { "epoch": 0.8042972225269034, "grad_norm": 2.084443951955122, "learning_rate": 1.0300833867864017e-06, "loss": 0.677, "step": 65995 }, { "epoch": 0.8043581587510511, "grad_norm": 4.809629887851739, "learning_rate": 1.0297626683771649e-06, "loss": 0.7674, "step": 66000 }, { "epoch": 0.8044190949751989, "grad_norm": 2.289752535100736, "learning_rate": 1.0294419499679283e-06, "loss": 0.7097, "step": 66005 }, { "epoch": 0.8044800311993467, "grad_norm": 2.284622277359023, "learning_rate": 1.0291212315586916e-06, "loss": 0.7438, "step": 66010 }, { "epoch": 0.8045409674234946, "grad_norm": 4.118917908688283, "learning_rate": 1.0288005131494548e-06, "loss": 0.7583, "step": 66015 }, { "epoch": 0.8046019036476424, "grad_norm": 2.673476315558098, "learning_rate": 1.0284797947402182e-06, "loss": 0.6907, "step": 66020 }, { "epoch": 0.8046628398717902, "grad_norm": 5.442137523438746, "learning_rate": 1.0281590763309815e-06, "loss": 0.78, "step": 66025 }, { "epoch": 0.804723776095938, "grad_norm": 3.230314537020189, "learning_rate": 1.0278383579217447e-06, "loss": 0.7935, "step": 66030 }, { "epoch": 0.8047847123200857, "grad_norm": 2.191391979882014, "learning_rate": 1.0275176395125081e-06, "loss": 0.6997, "step": 66035 }, { "epoch": 0.8048456485442336, "grad_norm": 2.429732702276647, "learning_rate": 1.0271969211032714e-06, "loss": 0.7675, "step": 66040 }, { "epoch": 0.8049065847683814, "grad_norm": 2.2923832139859988, "learning_rate": 1.0268762026940348e-06, "loss": 0.6658, "step": 66045 }, { "epoch": 0.8049675209925292, "grad_norm": 2.697012140370637, "learning_rate": 1.026555484284798e-06, "loss": 0.7877, "step": 66050 }, { "epoch": 0.805028457216677, "grad_norm": 2.815847370580616, "learning_rate": 1.0262347658755613e-06, "loss": 0.645, "step": 66055 }, { "epoch": 0.8050893934408249, "grad_norm": 2.5201605155991706, "learning_rate": 1.0259140474663247e-06, "loss": 0.7483, "step": 66060 }, { "epoch": 0.8051503296649727, "grad_norm": 3.7951499696984246, "learning_rate": 1.025593329057088e-06, "loss": 0.7085, "step": 66065 }, { "epoch": 0.8052112658891204, "grad_norm": 2.239369772002401, "learning_rate": 1.0252726106478512e-06, "loss": 0.7974, "step": 66070 }, { "epoch": 0.8052722021132682, "grad_norm": 2.481610596393289, "learning_rate": 1.0249518922386146e-06, "loss": 0.7094, "step": 66075 }, { "epoch": 0.805333138337416, "grad_norm": 3.3120990646888466, "learning_rate": 1.024631173829378e-06, "loss": 0.7222, "step": 66080 }, { "epoch": 0.8053940745615639, "grad_norm": 2.783813738064753, "learning_rate": 1.0243104554201413e-06, "loss": 0.7159, "step": 66085 }, { "epoch": 0.8054550107857117, "grad_norm": 2.3751370657083557, "learning_rate": 1.0239897370109045e-06, "loss": 0.669, "step": 66090 }, { "epoch": 0.8055159470098595, "grad_norm": 2.7334635398481035, "learning_rate": 1.0236690186016677e-06, "loss": 0.6607, "step": 66095 }, { "epoch": 0.8055768832340073, "grad_norm": 2.2423313582273345, "learning_rate": 1.0233483001924312e-06, "loss": 0.6943, "step": 66100 }, { "epoch": 0.805637819458155, "grad_norm": 2.818897687810618, "learning_rate": 1.0230275817831944e-06, "loss": 0.6642, "step": 66105 }, { "epoch": 0.8056987556823029, "grad_norm": 2.462402576311654, "learning_rate": 1.0227068633739576e-06, "loss": 0.7358, "step": 66110 }, { "epoch": 0.8057596919064507, "grad_norm": 3.028341168923593, "learning_rate": 1.022386144964721e-06, "loss": 0.7688, "step": 66115 }, { "epoch": 0.8058206281305985, "grad_norm": 2.2222971795086934, "learning_rate": 1.0220654265554845e-06, "loss": 0.7389, "step": 66120 }, { "epoch": 0.8058815643547463, "grad_norm": 2.7331373140640256, "learning_rate": 1.0217447081462477e-06, "loss": 0.6862, "step": 66125 }, { "epoch": 0.8059425005788942, "grad_norm": 2.0828057311495782, "learning_rate": 1.021423989737011e-06, "loss": 0.7362, "step": 66130 }, { "epoch": 0.806003436803042, "grad_norm": 2.0358118810244683, "learning_rate": 1.0211032713277742e-06, "loss": 0.7185, "step": 66135 }, { "epoch": 0.8060643730271897, "grad_norm": 2.2021567970793128, "learning_rate": 1.0207825529185376e-06, "loss": 0.659, "step": 66140 }, { "epoch": 0.8061253092513375, "grad_norm": 3.030489558049744, "learning_rate": 1.0204618345093008e-06, "loss": 0.7149, "step": 66145 }, { "epoch": 0.8061862454754853, "grad_norm": 3.3327945612377428, "learning_rate": 1.0201411161000643e-06, "loss": 0.6986, "step": 66150 }, { "epoch": 0.8062471816996332, "grad_norm": 2.636786413215845, "learning_rate": 1.0198203976908275e-06, "loss": 0.7727, "step": 66155 }, { "epoch": 0.806308117923781, "grad_norm": 2.0780731002640405, "learning_rate": 1.019499679281591e-06, "loss": 0.7091, "step": 66160 }, { "epoch": 0.8063690541479288, "grad_norm": 2.841777175774939, "learning_rate": 1.0191789608723542e-06, "loss": 0.7048, "step": 66165 }, { "epoch": 0.8064299903720766, "grad_norm": 2.5794728871775154, "learning_rate": 1.0188582424631174e-06, "loss": 0.7386, "step": 66170 }, { "epoch": 0.8064909265962243, "grad_norm": 3.064503024150228, "learning_rate": 1.0185375240538809e-06, "loss": 0.7143, "step": 66175 }, { "epoch": 0.8065518628203722, "grad_norm": 2.0239003791432846, "learning_rate": 1.018216805644644e-06, "loss": 0.7149, "step": 66180 }, { "epoch": 0.80661279904452, "grad_norm": 2.3454756772088303, "learning_rate": 1.0178960872354073e-06, "loss": 0.7746, "step": 66185 }, { "epoch": 0.8066737352686678, "grad_norm": 2.7706951069251584, "learning_rate": 1.0175753688261708e-06, "loss": 0.7006, "step": 66190 }, { "epoch": 0.8067346714928156, "grad_norm": 2.3223941524571035, "learning_rate": 1.017254650416934e-06, "loss": 0.7041, "step": 66195 }, { "epoch": 0.8067956077169635, "grad_norm": 3.1926677002634865, "learning_rate": 1.0169339320076974e-06, "loss": 0.7721, "step": 66200 }, { "epoch": 0.8068565439411113, "grad_norm": 2.67447228123115, "learning_rate": 1.0166132135984607e-06, "loss": 0.6876, "step": 66205 }, { "epoch": 0.806917480165259, "grad_norm": 2.3857102852475855, "learning_rate": 1.0162924951892239e-06, "loss": 0.7006, "step": 66210 }, { "epoch": 0.8069784163894068, "grad_norm": 2.323217264420524, "learning_rate": 1.0159717767799873e-06, "loss": 0.729, "step": 66215 }, { "epoch": 0.8070393526135546, "grad_norm": 2.5444996931109682, "learning_rate": 1.0156510583707505e-06, "loss": 0.6863, "step": 66220 }, { "epoch": 0.8071002888377025, "grad_norm": 2.456793547570499, "learning_rate": 1.0153303399615138e-06, "loss": 0.7717, "step": 66225 }, { "epoch": 0.8071612250618503, "grad_norm": 2.5217902792210998, "learning_rate": 1.0150096215522772e-06, "loss": 0.7227, "step": 66230 }, { "epoch": 0.8072221612859981, "grad_norm": 2.8516790778470886, "learning_rate": 1.0146889031430407e-06, "loss": 0.7404, "step": 66235 }, { "epoch": 0.8072830975101459, "grad_norm": 2.1400731353053764, "learning_rate": 1.0143681847338039e-06, "loss": 0.7355, "step": 66240 }, { "epoch": 0.8073440337342936, "grad_norm": 2.791183337311952, "learning_rate": 1.0140474663245671e-06, "loss": 0.7338, "step": 66245 }, { "epoch": 0.8074049699584415, "grad_norm": 2.5328948987118807, "learning_rate": 1.0137267479153303e-06, "loss": 0.7515, "step": 66250 }, { "epoch": 0.8074659061825893, "grad_norm": 2.448970043627317, "learning_rate": 1.0134060295060938e-06, "loss": 0.6933, "step": 66255 }, { "epoch": 0.8075268424067371, "grad_norm": 2.1160323380229635, "learning_rate": 1.013085311096857e-06, "loss": 0.6977, "step": 66260 }, { "epoch": 0.8075877786308849, "grad_norm": 2.358268723737095, "learning_rate": 1.0127645926876202e-06, "loss": 0.696, "step": 66265 }, { "epoch": 0.8076487148550328, "grad_norm": 2.7488962211544865, "learning_rate": 1.0124438742783837e-06, "loss": 0.7572, "step": 66270 }, { "epoch": 0.8077096510791806, "grad_norm": 2.838066418521605, "learning_rate": 1.0121231558691471e-06, "loss": 0.7818, "step": 66275 }, { "epoch": 0.8077705873033283, "grad_norm": 3.469620586075875, "learning_rate": 1.0118024374599103e-06, "loss": 0.7466, "step": 66280 }, { "epoch": 0.8078315235274761, "grad_norm": 2.6753411119768726, "learning_rate": 1.0114817190506736e-06, "loss": 0.6853, "step": 66285 }, { "epoch": 0.8078924597516239, "grad_norm": 2.698106999350541, "learning_rate": 1.0111610006414368e-06, "loss": 0.7604, "step": 66290 }, { "epoch": 0.8079533959757718, "grad_norm": 2.362477570891358, "learning_rate": 1.0108402822322002e-06, "loss": 0.7038, "step": 66295 }, { "epoch": 0.8080143321999196, "grad_norm": 2.7296164210735916, "learning_rate": 1.0105195638229635e-06, "loss": 0.74, "step": 66300 }, { "epoch": 0.8080752684240674, "grad_norm": 2.360617354911016, "learning_rate": 1.010198845413727e-06, "loss": 0.7008, "step": 66305 }, { "epoch": 0.8081362046482152, "grad_norm": 2.805377805683778, "learning_rate": 1.0098781270044901e-06, "loss": 0.7148, "step": 66310 }, { "epoch": 0.8081971408723629, "grad_norm": 2.4897480539965375, "learning_rate": 1.0095574085952536e-06, "loss": 0.7642, "step": 66315 }, { "epoch": 0.8082580770965108, "grad_norm": 2.814936125711732, "learning_rate": 1.0092366901860168e-06, "loss": 0.728, "step": 66320 }, { "epoch": 0.8083190133206586, "grad_norm": 2.124466244468124, "learning_rate": 1.00891597177678e-06, "loss": 0.64, "step": 66325 }, { "epoch": 0.8083799495448064, "grad_norm": 2.8421132421481095, "learning_rate": 1.0085952533675433e-06, "loss": 0.6884, "step": 66330 }, { "epoch": 0.8084408857689542, "grad_norm": 2.2932702473604882, "learning_rate": 1.0082745349583067e-06, "loss": 0.715, "step": 66335 }, { "epoch": 0.808501821993102, "grad_norm": 2.8081361083838754, "learning_rate": 1.00795381654907e-06, "loss": 0.7737, "step": 66340 }, { "epoch": 0.8085627582172499, "grad_norm": 5.455657970346674, "learning_rate": 1.0076330981398334e-06, "loss": 0.7457, "step": 66345 }, { "epoch": 0.8086236944413976, "grad_norm": 2.2481079521876524, "learning_rate": 1.0073123797305966e-06, "loss": 0.7228, "step": 66350 }, { "epoch": 0.8086846306655454, "grad_norm": 2.918220930228079, "learning_rate": 1.00699166132136e-06, "loss": 0.6805, "step": 66355 }, { "epoch": 0.8087455668896932, "grad_norm": 2.714805473914434, "learning_rate": 1.0066709429121233e-06, "loss": 0.7444, "step": 66360 }, { "epoch": 0.8088065031138411, "grad_norm": 2.576690463279906, "learning_rate": 1.0063502245028865e-06, "loss": 0.7436, "step": 66365 }, { "epoch": 0.8088674393379889, "grad_norm": 2.381995605635183, "learning_rate": 1.0060295060936497e-06, "loss": 0.7777, "step": 66370 }, { "epoch": 0.8089283755621367, "grad_norm": 3.422533208923055, "learning_rate": 1.0057087876844132e-06, "loss": 0.7585, "step": 66375 }, { "epoch": 0.8089893117862845, "grad_norm": 2.3359488361093463, "learning_rate": 1.0053880692751764e-06, "loss": 0.7105, "step": 66380 }, { "epoch": 0.8090502480104322, "grad_norm": 3.616752466611515, "learning_rate": 1.0050673508659398e-06, "loss": 0.7109, "step": 66385 }, { "epoch": 0.8091111842345801, "grad_norm": 2.55888351968531, "learning_rate": 1.004746632456703e-06, "loss": 0.6767, "step": 66390 }, { "epoch": 0.8091721204587279, "grad_norm": 2.1961889784968815, "learning_rate": 1.0044259140474665e-06, "loss": 0.6752, "step": 66395 }, { "epoch": 0.8092330566828757, "grad_norm": 2.6163717685773156, "learning_rate": 1.0041051956382297e-06, "loss": 0.7315, "step": 66400 }, { "epoch": 0.8092939929070235, "grad_norm": 2.0603182272858485, "learning_rate": 1.003784477228993e-06, "loss": 0.703, "step": 66405 }, { "epoch": 0.8093549291311714, "grad_norm": 3.0765600891246914, "learning_rate": 1.0034637588197562e-06, "loss": 0.6866, "step": 66410 }, { "epoch": 0.8094158653553192, "grad_norm": 4.57960493312128, "learning_rate": 1.0031430404105196e-06, "loss": 0.711, "step": 66415 }, { "epoch": 0.8094768015794669, "grad_norm": 2.708307055895967, "learning_rate": 1.0028223220012829e-06, "loss": 0.7366, "step": 66420 }, { "epoch": 0.8095377378036147, "grad_norm": 2.7051628074554306, "learning_rate": 1.0025016035920463e-06, "loss": 0.6849, "step": 66425 }, { "epoch": 0.8095986740277625, "grad_norm": 2.1402757786795776, "learning_rate": 1.0021808851828095e-06, "loss": 0.6492, "step": 66430 }, { "epoch": 0.8096596102519104, "grad_norm": 2.7763668401422716, "learning_rate": 1.001860166773573e-06, "loss": 0.6911, "step": 66435 }, { "epoch": 0.8097205464760582, "grad_norm": 2.1538947313310257, "learning_rate": 1.0015394483643362e-06, "loss": 0.7412, "step": 66440 }, { "epoch": 0.809781482700206, "grad_norm": 2.2526449757751132, "learning_rate": 1.0012187299550994e-06, "loss": 0.6382, "step": 66445 }, { "epoch": 0.8098424189243538, "grad_norm": 2.267773733150997, "learning_rate": 1.0008980115458627e-06, "loss": 0.7427, "step": 66450 }, { "epoch": 0.8099033551485015, "grad_norm": 2.5053354408709967, "learning_rate": 1.000577293136626e-06, "loss": 0.7573, "step": 66455 }, { "epoch": 0.8099642913726494, "grad_norm": 2.639959574948548, "learning_rate": 1.0002565747273893e-06, "loss": 0.8269, "step": 66460 }, { "epoch": 0.8100252275967972, "grad_norm": 2.643345446330705, "learning_rate": 9.999358563181528e-07, "loss": 0.6789, "step": 66465 }, { "epoch": 0.810086163820945, "grad_norm": 2.7287993837037794, "learning_rate": 9.996151379089162e-07, "loss": 0.7166, "step": 66470 }, { "epoch": 0.8101471000450928, "grad_norm": 2.430088857235586, "learning_rate": 9.992944194996794e-07, "loss": 0.6365, "step": 66475 }, { "epoch": 0.8102080362692407, "grad_norm": 4.269865866040985, "learning_rate": 9.989737010904427e-07, "loss": 0.8492, "step": 66480 }, { "epoch": 0.8102689724933885, "grad_norm": 2.410503228189515, "learning_rate": 9.98652982681206e-07, "loss": 0.7542, "step": 66485 }, { "epoch": 0.8103299087175362, "grad_norm": 3.1352662904354647, "learning_rate": 9.983322642719693e-07, "loss": 0.7783, "step": 66490 }, { "epoch": 0.810390844941684, "grad_norm": 2.739550909414545, "learning_rate": 9.980115458627326e-07, "loss": 0.6877, "step": 66495 }, { "epoch": 0.8104517811658318, "grad_norm": 2.3446280134343334, "learning_rate": 9.97690827453496e-07, "loss": 0.6945, "step": 66500 }, { "epoch": 0.8105127173899797, "grad_norm": 2.69691195763896, "learning_rate": 9.973701090442592e-07, "loss": 0.7784, "step": 66505 }, { "epoch": 0.8105736536141275, "grad_norm": 2.8067209477432553, "learning_rate": 9.970493906350227e-07, "loss": 0.6754, "step": 66510 }, { "epoch": 0.8106345898382753, "grad_norm": 2.5985683696725506, "learning_rate": 9.96728672225786e-07, "loss": 0.6815, "step": 66515 }, { "epoch": 0.8106955260624231, "grad_norm": 2.3471898102785502, "learning_rate": 9.964079538165491e-07, "loss": 0.7396, "step": 66520 }, { "epoch": 0.8107564622865708, "grad_norm": 2.256608008568907, "learning_rate": 9.960872354073124e-07, "loss": 0.7555, "step": 66525 }, { "epoch": 0.8108173985107187, "grad_norm": 2.4096513952164957, "learning_rate": 9.957665169980758e-07, "loss": 0.7225, "step": 66530 }, { "epoch": 0.8108783347348665, "grad_norm": 2.196828630784951, "learning_rate": 9.95445798588839e-07, "loss": 0.6769, "step": 66535 }, { "epoch": 0.8109392709590143, "grad_norm": 2.1771734546370047, "learning_rate": 9.951250801796025e-07, "loss": 0.6768, "step": 66540 }, { "epoch": 0.8110002071831621, "grad_norm": 2.5108784961352635, "learning_rate": 9.948043617703657e-07, "loss": 0.7146, "step": 66545 }, { "epoch": 0.81106114340731, "grad_norm": 2.2501741943627285, "learning_rate": 9.944836433611291e-07, "loss": 0.6828, "step": 66550 }, { "epoch": 0.8111220796314578, "grad_norm": 2.3758271251499608, "learning_rate": 9.941629249518924e-07, "loss": 0.7542, "step": 66555 }, { "epoch": 0.8111830158556055, "grad_norm": 2.4314085826498246, "learning_rate": 9.938422065426556e-07, "loss": 0.7657, "step": 66560 }, { "epoch": 0.8112439520797533, "grad_norm": 2.4344156460790125, "learning_rate": 9.935214881334188e-07, "loss": 0.7381, "step": 66565 }, { "epoch": 0.8113048883039011, "grad_norm": 2.769263992495322, "learning_rate": 9.932007697241823e-07, "loss": 0.8197, "step": 66570 }, { "epoch": 0.811365824528049, "grad_norm": 3.5112703318015743, "learning_rate": 9.928800513149455e-07, "loss": 0.7742, "step": 66575 }, { "epoch": 0.8114267607521968, "grad_norm": 2.507180352061418, "learning_rate": 9.92559332905709e-07, "loss": 0.7733, "step": 66580 }, { "epoch": 0.8114876969763446, "grad_norm": 4.850661684847888, "learning_rate": 9.922386144964722e-07, "loss": 0.7245, "step": 66585 }, { "epoch": 0.8115486332004924, "grad_norm": 2.3319458352316995, "learning_rate": 9.919178960872356e-07, "loss": 0.6769, "step": 66590 }, { "epoch": 0.8116095694246401, "grad_norm": 2.3358924413943654, "learning_rate": 9.915971776779988e-07, "loss": 0.6882, "step": 66595 }, { "epoch": 0.811670505648788, "grad_norm": 2.383536021109581, "learning_rate": 9.91276459268762e-07, "loss": 0.7604, "step": 66600 }, { "epoch": 0.8117314418729358, "grad_norm": 2.1707143586245943, "learning_rate": 9.909557408595253e-07, "loss": 0.744, "step": 66605 }, { "epoch": 0.8117923780970836, "grad_norm": 2.019049421211836, "learning_rate": 9.906350224502887e-07, "loss": 0.7339, "step": 66610 }, { "epoch": 0.8118533143212314, "grad_norm": 2.5046168349580458, "learning_rate": 9.90314304041052e-07, "loss": 0.6984, "step": 66615 }, { "epoch": 0.8119142505453792, "grad_norm": 2.467212030982211, "learning_rate": 9.899935856318154e-07, "loss": 0.7308, "step": 66620 }, { "epoch": 0.811975186769527, "grad_norm": 2.908321131355749, "learning_rate": 9.896728672225786e-07, "loss": 0.6383, "step": 66625 }, { "epoch": 0.8120361229936748, "grad_norm": 2.2289697755107927, "learning_rate": 9.89352148813342e-07, "loss": 0.759, "step": 66630 }, { "epoch": 0.8120970592178226, "grad_norm": 2.6506696546869324, "learning_rate": 9.890314304041053e-07, "loss": 0.7342, "step": 66635 }, { "epoch": 0.8121579954419704, "grad_norm": 2.1219117712195765, "learning_rate": 9.887107119948685e-07, "loss": 0.6967, "step": 66640 }, { "epoch": 0.8122189316661182, "grad_norm": 2.4874263141762696, "learning_rate": 9.883899935856317e-07, "loss": 0.7166, "step": 66645 }, { "epoch": 0.8122798678902661, "grad_norm": 2.5496272650461513, "learning_rate": 9.880692751763952e-07, "loss": 0.7184, "step": 66650 }, { "epoch": 0.8123408041144139, "grad_norm": 4.21175484460809, "learning_rate": 9.877485567671586e-07, "loss": 0.6482, "step": 66655 }, { "epoch": 0.8124017403385616, "grad_norm": 2.220678366296348, "learning_rate": 9.874278383579219e-07, "loss": 0.7207, "step": 66660 }, { "epoch": 0.8124626765627094, "grad_norm": 4.4184214662762695, "learning_rate": 9.87107119948685e-07, "loss": 0.6952, "step": 66665 }, { "epoch": 0.8125236127868573, "grad_norm": 2.1522908365401356, "learning_rate": 9.867864015394485e-07, "loss": 0.7285, "step": 66670 }, { "epoch": 0.8125845490110051, "grad_norm": 2.6694552559230718, "learning_rate": 9.864656831302118e-07, "loss": 0.7294, "step": 66675 }, { "epoch": 0.8126454852351529, "grad_norm": 2.922978543499505, "learning_rate": 9.86144964720975e-07, "loss": 0.7858, "step": 66680 }, { "epoch": 0.8127064214593007, "grad_norm": 2.2707642579914866, "learning_rate": 9.858242463117382e-07, "loss": 0.7229, "step": 66685 }, { "epoch": 0.8127673576834485, "grad_norm": 2.450360979128499, "learning_rate": 9.855035279025017e-07, "loss": 0.7488, "step": 66690 }, { "epoch": 0.8128282939075963, "grad_norm": 2.5385346667641, "learning_rate": 9.85182809493265e-07, "loss": 0.6989, "step": 66695 }, { "epoch": 0.8128892301317441, "grad_norm": 3.3972418700019555, "learning_rate": 9.848620910840283e-07, "loss": 0.7105, "step": 66700 }, { "epoch": 0.8129501663558919, "grad_norm": 2.6326544776026313, "learning_rate": 9.845413726747916e-07, "loss": 0.6878, "step": 66705 }, { "epoch": 0.8130111025800397, "grad_norm": 2.654229910812892, "learning_rate": 9.84220654265555e-07, "loss": 0.7636, "step": 66710 }, { "epoch": 0.8130720388041875, "grad_norm": 2.506441863811556, "learning_rate": 9.838999358563182e-07, "loss": 0.7589, "step": 66715 }, { "epoch": 0.8131329750283354, "grad_norm": 2.791787390616325, "learning_rate": 9.835792174470814e-07, "loss": 0.7484, "step": 66720 }, { "epoch": 0.8131939112524832, "grad_norm": 2.6366101360371412, "learning_rate": 9.832584990378449e-07, "loss": 0.7515, "step": 66725 }, { "epoch": 0.8132548474766309, "grad_norm": 2.1889659005579123, "learning_rate": 9.829377806286081e-07, "loss": 0.7227, "step": 66730 }, { "epoch": 0.8133157837007787, "grad_norm": 2.3471448091077005, "learning_rate": 9.826170622193716e-07, "loss": 0.7096, "step": 66735 }, { "epoch": 0.8133767199249266, "grad_norm": 2.637277575722966, "learning_rate": 9.822963438101348e-07, "loss": 0.7195, "step": 66740 }, { "epoch": 0.8134376561490744, "grad_norm": 3.229470551076171, "learning_rate": 9.81975625400898e-07, "loss": 0.7027, "step": 66745 }, { "epoch": 0.8134985923732222, "grad_norm": 2.244085488436612, "learning_rate": 9.816549069916615e-07, "loss": 0.6706, "step": 66750 }, { "epoch": 0.81355952859737, "grad_norm": 2.76518073829049, "learning_rate": 9.813341885824247e-07, "loss": 0.6167, "step": 66755 }, { "epoch": 0.8136204648215178, "grad_norm": 2.420844048351067, "learning_rate": 9.81013470173188e-07, "loss": 0.8193, "step": 66760 }, { "epoch": 0.8136814010456656, "grad_norm": 3.306006029059431, "learning_rate": 9.806927517639514e-07, "loss": 0.7409, "step": 66765 }, { "epoch": 0.8137423372698134, "grad_norm": 2.918726031762288, "learning_rate": 9.803720333547146e-07, "loss": 0.7152, "step": 66770 }, { "epoch": 0.8138032734939612, "grad_norm": 2.630834449570551, "learning_rate": 9.80051314945478e-07, "loss": 0.7171, "step": 66775 }, { "epoch": 0.813864209718109, "grad_norm": 2.127382450199981, "learning_rate": 9.797305965362412e-07, "loss": 0.7054, "step": 66780 }, { "epoch": 0.8139251459422568, "grad_norm": 2.5947285451712623, "learning_rate": 9.794098781270047e-07, "loss": 0.7396, "step": 66785 }, { "epoch": 0.8139860821664047, "grad_norm": 2.4361020607145316, "learning_rate": 9.79089159717768e-07, "loss": 0.7403, "step": 66790 }, { "epoch": 0.8140470183905525, "grad_norm": 2.0287285110885414, "learning_rate": 9.787684413085311e-07, "loss": 0.6265, "step": 66795 }, { "epoch": 0.8141079546147002, "grad_norm": 2.277092702654607, "learning_rate": 9.784477228992944e-07, "loss": 0.72, "step": 66800 }, { "epoch": 0.814168890838848, "grad_norm": 2.605940212520995, "learning_rate": 9.781270044900578e-07, "loss": 0.6697, "step": 66805 }, { "epoch": 0.8142298270629958, "grad_norm": 2.0935553622567546, "learning_rate": 9.77806286080821e-07, "loss": 0.7385, "step": 66810 }, { "epoch": 0.8142907632871437, "grad_norm": 2.4084602187742385, "learning_rate": 9.774855676715845e-07, "loss": 0.6887, "step": 66815 }, { "epoch": 0.8143516995112915, "grad_norm": 2.2558924745039777, "learning_rate": 9.771648492623477e-07, "loss": 0.7814, "step": 66820 }, { "epoch": 0.8144126357354393, "grad_norm": 2.1956552822673046, "learning_rate": 9.768441308531112e-07, "loss": 0.7411, "step": 66825 }, { "epoch": 0.8144735719595871, "grad_norm": 2.9951241269527324, "learning_rate": 9.765234124438744e-07, "loss": 0.7977, "step": 66830 }, { "epoch": 0.8145345081837349, "grad_norm": 2.7490667519895515, "learning_rate": 9.762026940346376e-07, "loss": 0.7378, "step": 66835 }, { "epoch": 0.8145954444078827, "grad_norm": 2.2661177100795458, "learning_rate": 9.758819756254008e-07, "loss": 0.6878, "step": 66840 }, { "epoch": 0.8146563806320305, "grad_norm": 2.558115489882854, "learning_rate": 9.755612572161643e-07, "loss": 0.7506, "step": 66845 }, { "epoch": 0.8147173168561783, "grad_norm": 3.3542513860101386, "learning_rate": 9.752405388069277e-07, "loss": 0.7134, "step": 66850 }, { "epoch": 0.8147782530803261, "grad_norm": 2.858838089842505, "learning_rate": 9.74919820397691e-07, "loss": 0.7116, "step": 66855 }, { "epoch": 0.814839189304474, "grad_norm": 2.9430866761318892, "learning_rate": 9.745991019884542e-07, "loss": 0.7194, "step": 66860 }, { "epoch": 0.8149001255286218, "grad_norm": 2.6418427084556093, "learning_rate": 9.742783835792176e-07, "loss": 0.73, "step": 66865 }, { "epoch": 0.8149610617527695, "grad_norm": 2.43500303874016, "learning_rate": 9.739576651699808e-07, "loss": 0.7666, "step": 66870 }, { "epoch": 0.8150219979769173, "grad_norm": 1.9942619580515026, "learning_rate": 9.73636946760744e-07, "loss": 0.7071, "step": 66875 }, { "epoch": 0.8150829342010651, "grad_norm": 2.3280358335165086, "learning_rate": 9.733162283515073e-07, "loss": 0.7162, "step": 66880 }, { "epoch": 0.815143870425213, "grad_norm": 1.8307080299315315, "learning_rate": 9.729955099422707e-07, "loss": 0.7077, "step": 66885 }, { "epoch": 0.8152048066493608, "grad_norm": 2.374912565429933, "learning_rate": 9.726747915330342e-07, "loss": 0.7609, "step": 66890 }, { "epoch": 0.8152657428735086, "grad_norm": 2.4219342065689324, "learning_rate": 9.723540731237974e-07, "loss": 0.7083, "step": 66895 }, { "epoch": 0.8153266790976564, "grad_norm": 2.50749874676266, "learning_rate": 9.720333547145606e-07, "loss": 0.789, "step": 66900 }, { "epoch": 0.8153876153218041, "grad_norm": 2.1393084606500152, "learning_rate": 9.71712636305324e-07, "loss": 0.7336, "step": 66905 }, { "epoch": 0.815448551545952, "grad_norm": 3.349959859696303, "learning_rate": 9.713919178960873e-07, "loss": 0.7558, "step": 66910 }, { "epoch": 0.8155094877700998, "grad_norm": 2.095661086301291, "learning_rate": 9.710711994868505e-07, "loss": 0.7448, "step": 66915 }, { "epoch": 0.8155704239942476, "grad_norm": 3.188884676248198, "learning_rate": 9.70750481077614e-07, "loss": 0.7364, "step": 66920 }, { "epoch": 0.8156313602183954, "grad_norm": 3.0156852285783904, "learning_rate": 9.704297626683772e-07, "loss": 0.7493, "step": 66925 }, { "epoch": 0.8156922964425433, "grad_norm": 2.953631382247709, "learning_rate": 9.701090442591406e-07, "loss": 0.6495, "step": 66930 }, { "epoch": 0.8157532326666911, "grad_norm": 2.5532550254442308, "learning_rate": 9.697883258499039e-07, "loss": 0.7611, "step": 66935 }, { "epoch": 0.8158141688908388, "grad_norm": 2.5063715249186567, "learning_rate": 9.69467607440667e-07, "loss": 0.748, "step": 66940 }, { "epoch": 0.8158751051149866, "grad_norm": 2.7239811366644546, "learning_rate": 9.691468890314305e-07, "loss": 0.6896, "step": 66945 }, { "epoch": 0.8159360413391344, "grad_norm": 2.033166235186956, "learning_rate": 9.688261706221938e-07, "loss": 0.6448, "step": 66950 }, { "epoch": 0.8159969775632823, "grad_norm": 2.8758733369230485, "learning_rate": 9.68505452212957e-07, "loss": 0.7218, "step": 66955 }, { "epoch": 0.8160579137874301, "grad_norm": 2.235077983730683, "learning_rate": 9.681847338037204e-07, "loss": 0.654, "step": 66960 }, { "epoch": 0.8161188500115779, "grad_norm": 3.2121873488390076, "learning_rate": 9.678640153944837e-07, "loss": 0.7273, "step": 66965 }, { "epoch": 0.8161797862357257, "grad_norm": 2.1037889732641704, "learning_rate": 9.675432969852471e-07, "loss": 0.7214, "step": 66970 }, { "epoch": 0.8162407224598734, "grad_norm": 2.4679469589250695, "learning_rate": 9.672225785760103e-07, "loss": 0.6974, "step": 66975 }, { "epoch": 0.8163016586840213, "grad_norm": 2.5488859144275584, "learning_rate": 9.669018601667736e-07, "loss": 0.7462, "step": 66980 }, { "epoch": 0.8163625949081691, "grad_norm": 3.0681449736004986, "learning_rate": 9.66581141757537e-07, "loss": 0.7015, "step": 66985 }, { "epoch": 0.8164235311323169, "grad_norm": 3.0863635835482515, "learning_rate": 9.662604233483002e-07, "loss": 0.7211, "step": 66990 }, { "epoch": 0.8164844673564647, "grad_norm": 3.997010191504067, "learning_rate": 9.659397049390635e-07, "loss": 0.7533, "step": 66995 }, { "epoch": 0.8165454035806126, "grad_norm": 2.1444874899582334, "learning_rate": 9.65618986529827e-07, "loss": 0.6195, "step": 67000 }, { "epoch": 0.8166063398047604, "grad_norm": 3.0464668072502206, "learning_rate": 9.652982681205903e-07, "loss": 0.7276, "step": 67005 }, { "epoch": 0.8166672760289081, "grad_norm": 2.3246350034321472, "learning_rate": 9.649775497113536e-07, "loss": 0.6743, "step": 67010 }, { "epoch": 0.8167282122530559, "grad_norm": 2.592705611820434, "learning_rate": 9.646568313021168e-07, "loss": 0.6858, "step": 67015 }, { "epoch": 0.8167891484772037, "grad_norm": 3.3053523996337395, "learning_rate": 9.6433611289288e-07, "loss": 0.7634, "step": 67020 }, { "epoch": 0.8168500847013516, "grad_norm": 2.5197671080488475, "learning_rate": 9.640153944836435e-07, "loss": 0.7174, "step": 67025 }, { "epoch": 0.8169110209254994, "grad_norm": 2.2806695554420466, "learning_rate": 9.636946760744067e-07, "loss": 0.697, "step": 67030 }, { "epoch": 0.8169719571496472, "grad_norm": 2.369264104306292, "learning_rate": 9.6337395766517e-07, "loss": 0.7673, "step": 67035 }, { "epoch": 0.817032893373795, "grad_norm": 2.1230064351707987, "learning_rate": 9.630532392559334e-07, "loss": 0.7186, "step": 67040 }, { "epoch": 0.8170938295979427, "grad_norm": 2.216201761412461, "learning_rate": 9.627325208466968e-07, "loss": 0.7377, "step": 67045 }, { "epoch": 0.8171547658220906, "grad_norm": 2.813585073358265, "learning_rate": 9.6241180243746e-07, "loss": 0.7078, "step": 67050 }, { "epoch": 0.8172157020462384, "grad_norm": 2.2042929590218976, "learning_rate": 9.620910840282233e-07, "loss": 0.6626, "step": 67055 }, { "epoch": 0.8172766382703862, "grad_norm": 5.342020055059336, "learning_rate": 9.617703656189865e-07, "loss": 0.7445, "step": 67060 }, { "epoch": 0.817337574494534, "grad_norm": 3.20606810835011, "learning_rate": 9.6144964720975e-07, "loss": 0.7429, "step": 67065 }, { "epoch": 0.8173985107186819, "grad_norm": 2.6411494599351397, "learning_rate": 9.611289288005132e-07, "loss": 0.7151, "step": 67070 }, { "epoch": 0.8174594469428297, "grad_norm": 3.7714871896006463, "learning_rate": 9.608082103912766e-07, "loss": 0.7544, "step": 67075 }, { "epoch": 0.8175203831669774, "grad_norm": 2.782545029254416, "learning_rate": 9.604874919820398e-07, "loss": 0.7399, "step": 67080 }, { "epoch": 0.8175813193911252, "grad_norm": 2.597545535487235, "learning_rate": 9.601667735728033e-07, "loss": 0.6744, "step": 67085 }, { "epoch": 0.817642255615273, "grad_norm": 2.376898333644019, "learning_rate": 9.598460551635665e-07, "loss": 0.7914, "step": 67090 }, { "epoch": 0.8177031918394209, "grad_norm": 2.8940995005430348, "learning_rate": 9.595253367543297e-07, "loss": 0.7093, "step": 67095 }, { "epoch": 0.8177641280635687, "grad_norm": 1.751591709006775, "learning_rate": 9.592046183450932e-07, "loss": 0.7375, "step": 67100 }, { "epoch": 0.8178250642877165, "grad_norm": 2.2150335352287494, "learning_rate": 9.588838999358564e-07, "loss": 0.6553, "step": 67105 }, { "epoch": 0.8178860005118643, "grad_norm": 2.9160237931348196, "learning_rate": 9.585631815266196e-07, "loss": 0.7503, "step": 67110 }, { "epoch": 0.817946936736012, "grad_norm": 2.5125042761604752, "learning_rate": 9.58242463117383e-07, "loss": 0.7036, "step": 67115 }, { "epoch": 0.8180078729601599, "grad_norm": 3.068314605597554, "learning_rate": 9.579217447081463e-07, "loss": 0.7643, "step": 67120 }, { "epoch": 0.8180688091843077, "grad_norm": 2.2878857003690354, "learning_rate": 9.576010262989097e-07, "loss": 0.6856, "step": 67125 }, { "epoch": 0.8181297454084555, "grad_norm": 2.702166570503728, "learning_rate": 9.57280307889673e-07, "loss": 0.7623, "step": 67130 }, { "epoch": 0.8181906816326033, "grad_norm": 2.6559910399564055, "learning_rate": 9.569595894804362e-07, "loss": 0.7365, "step": 67135 }, { "epoch": 0.8182516178567512, "grad_norm": 2.4253403764414405, "learning_rate": 9.566388710711996e-07, "loss": 0.6669, "step": 67140 }, { "epoch": 0.818312554080899, "grad_norm": 2.2554993756806203, "learning_rate": 9.563181526619629e-07, "loss": 0.7265, "step": 67145 }, { "epoch": 0.8183734903050467, "grad_norm": 2.5048545465760155, "learning_rate": 9.55997434252726e-07, "loss": 0.7362, "step": 67150 }, { "epoch": 0.8184344265291945, "grad_norm": 2.7705978421825175, "learning_rate": 9.556767158434895e-07, "loss": 0.6838, "step": 67155 }, { "epoch": 0.8184953627533423, "grad_norm": 2.2525972193498696, "learning_rate": 9.553559974342528e-07, "loss": 0.6931, "step": 67160 }, { "epoch": 0.8185562989774902, "grad_norm": 2.1479636276738185, "learning_rate": 9.550352790250162e-07, "loss": 0.7525, "step": 67165 }, { "epoch": 0.818617235201638, "grad_norm": 2.698937860398385, "learning_rate": 9.547145606157794e-07, "loss": 0.7764, "step": 67170 }, { "epoch": 0.8186781714257858, "grad_norm": 2.3204220537629636, "learning_rate": 9.543938422065427e-07, "loss": 0.738, "step": 67175 }, { "epoch": 0.8187391076499336, "grad_norm": 2.503069353682955, "learning_rate": 9.54073123797306e-07, "loss": 0.6923, "step": 67180 }, { "epoch": 0.8188000438740813, "grad_norm": 2.676458443599035, "learning_rate": 9.537524053880693e-07, "loss": 0.7615, "step": 67185 }, { "epoch": 0.8188609800982292, "grad_norm": 2.3173918965897546, "learning_rate": 9.534316869788327e-07, "loss": 0.7713, "step": 67190 }, { "epoch": 0.818921916322377, "grad_norm": 2.2349437238533185, "learning_rate": 9.531109685695959e-07, "loss": 0.7566, "step": 67195 }, { "epoch": 0.8189828525465248, "grad_norm": 2.1069697635906146, "learning_rate": 9.527902501603593e-07, "loss": 0.7367, "step": 67200 }, { "epoch": 0.8190437887706726, "grad_norm": 2.458369134417258, "learning_rate": 9.524695317511227e-07, "loss": 0.7375, "step": 67205 }, { "epoch": 0.8191047249948205, "grad_norm": 1.952677438854369, "learning_rate": 9.521488133418859e-07, "loss": 0.6991, "step": 67210 }, { "epoch": 0.8191656612189683, "grad_norm": 2.488189674304916, "learning_rate": 9.518280949326491e-07, "loss": 0.7541, "step": 67215 }, { "epoch": 0.819226597443116, "grad_norm": 3.2076168009385264, "learning_rate": 9.515073765234126e-07, "loss": 0.7562, "step": 67220 }, { "epoch": 0.8192875336672638, "grad_norm": 2.8624625567180533, "learning_rate": 9.511866581141759e-07, "loss": 0.7603, "step": 67225 }, { "epoch": 0.8193484698914116, "grad_norm": 3.128313372759758, "learning_rate": 9.508659397049391e-07, "loss": 0.7719, "step": 67230 }, { "epoch": 0.8194094061155595, "grad_norm": 2.7094337358336222, "learning_rate": 9.505452212957024e-07, "loss": 0.7521, "step": 67235 }, { "epoch": 0.8194703423397073, "grad_norm": 2.680002779004052, "learning_rate": 9.502245028864658e-07, "loss": 0.7076, "step": 67240 }, { "epoch": 0.8195312785638551, "grad_norm": 2.4589023698400276, "learning_rate": 9.499037844772291e-07, "loss": 0.6381, "step": 67245 }, { "epoch": 0.8195922147880029, "grad_norm": 2.174201573907638, "learning_rate": 9.495830660679924e-07, "loss": 0.6794, "step": 67250 }, { "epoch": 0.8196531510121506, "grad_norm": 2.458844699703637, "learning_rate": 9.492623476587556e-07, "loss": 0.6937, "step": 67255 }, { "epoch": 0.8197140872362985, "grad_norm": 2.8626018044368418, "learning_rate": 9.48941629249519e-07, "loss": 0.7166, "step": 67260 }, { "epoch": 0.8197750234604463, "grad_norm": 5.477030373163367, "learning_rate": 9.486209108402824e-07, "loss": 0.6695, "step": 67265 }, { "epoch": 0.8198359596845941, "grad_norm": 2.816175460105419, "learning_rate": 9.483001924310456e-07, "loss": 0.7543, "step": 67270 }, { "epoch": 0.8198968959087419, "grad_norm": 3.047625855999363, "learning_rate": 9.479794740218089e-07, "loss": 0.7858, "step": 67275 }, { "epoch": 0.8199578321328898, "grad_norm": 2.35917577925544, "learning_rate": 9.476587556125723e-07, "loss": 0.6926, "step": 67280 }, { "epoch": 0.8200187683570376, "grad_norm": 1.8722933846481817, "learning_rate": 9.473380372033356e-07, "loss": 0.7049, "step": 67285 }, { "epoch": 0.8200797045811853, "grad_norm": 1.995655564263677, "learning_rate": 9.470173187940988e-07, "loss": 0.7343, "step": 67290 }, { "epoch": 0.8201406408053331, "grad_norm": 2.5989500976266005, "learning_rate": 9.466966003848622e-07, "loss": 0.7622, "step": 67295 }, { "epoch": 0.8202015770294809, "grad_norm": 3.2731200695661498, "learning_rate": 9.463758819756255e-07, "loss": 0.7242, "step": 67300 }, { "epoch": 0.8202625132536288, "grad_norm": 3.1443454276209994, "learning_rate": 9.460551635663888e-07, "loss": 0.735, "step": 67305 }, { "epoch": 0.8203234494777766, "grad_norm": 2.4786329357750745, "learning_rate": 9.45734445157152e-07, "loss": 0.7347, "step": 67310 }, { "epoch": 0.8203843857019244, "grad_norm": 2.350054171581267, "learning_rate": 9.454137267479154e-07, "loss": 0.7055, "step": 67315 }, { "epoch": 0.8204453219260722, "grad_norm": 3.1576511229342987, "learning_rate": 9.450930083386787e-07, "loss": 0.6938, "step": 67320 }, { "epoch": 0.8205062581502199, "grad_norm": 2.975183638502958, "learning_rate": 9.447722899294421e-07, "loss": 0.7816, "step": 67325 }, { "epoch": 0.8205671943743678, "grad_norm": 2.1682573297113747, "learning_rate": 9.444515715202053e-07, "loss": 0.7341, "step": 67330 }, { "epoch": 0.8206281305985156, "grad_norm": 2.2329997056969506, "learning_rate": 9.441308531109686e-07, "loss": 0.8044, "step": 67335 }, { "epoch": 0.8206890668226634, "grad_norm": 2.5328550358461768, "learning_rate": 9.43810134701732e-07, "loss": 0.7075, "step": 67340 }, { "epoch": 0.8207500030468112, "grad_norm": 1.9344855382383752, "learning_rate": 9.434894162924953e-07, "loss": 0.7547, "step": 67345 }, { "epoch": 0.820810939270959, "grad_norm": 2.537334531341477, "learning_rate": 9.431686978832585e-07, "loss": 0.7478, "step": 67350 }, { "epoch": 0.8208718754951069, "grad_norm": 2.455419010852479, "learning_rate": 9.428479794740218e-07, "loss": 0.7166, "step": 67355 }, { "epoch": 0.8209328117192546, "grad_norm": 2.670762629037783, "learning_rate": 9.425272610647852e-07, "loss": 0.767, "step": 67360 }, { "epoch": 0.8209937479434024, "grad_norm": 2.1914181537563433, "learning_rate": 9.422065426555485e-07, "loss": 0.7255, "step": 67365 }, { "epoch": 0.8210546841675502, "grad_norm": 2.541294994422307, "learning_rate": 9.418858242463117e-07, "loss": 0.7243, "step": 67370 }, { "epoch": 0.821115620391698, "grad_norm": 2.4124158965746343, "learning_rate": 9.415651058370752e-07, "loss": 0.7134, "step": 67375 }, { "epoch": 0.8211765566158459, "grad_norm": 2.43575008372678, "learning_rate": 9.412443874278385e-07, "loss": 0.7269, "step": 67380 }, { "epoch": 0.8212374928399937, "grad_norm": 2.4688461739592387, "learning_rate": 9.409236690186017e-07, "loss": 0.6422, "step": 67385 }, { "epoch": 0.8212984290641415, "grad_norm": 2.3926740298156433, "learning_rate": 9.40602950609365e-07, "loss": 0.734, "step": 67390 }, { "epoch": 0.8213593652882892, "grad_norm": 2.4671463139413388, "learning_rate": 9.402822322001284e-07, "loss": 0.7024, "step": 67395 }, { "epoch": 0.8214203015124371, "grad_norm": 3.0019354120712767, "learning_rate": 9.399615137908918e-07, "loss": 0.81, "step": 67400 }, { "epoch": 0.8214812377365849, "grad_norm": 2.5444260584551714, "learning_rate": 9.39640795381655e-07, "loss": 0.7066, "step": 67405 }, { "epoch": 0.8215421739607327, "grad_norm": 2.3161472418024664, "learning_rate": 9.393200769724182e-07, "loss": 0.6639, "step": 67410 }, { "epoch": 0.8216031101848805, "grad_norm": 2.9464870242719403, "learning_rate": 9.389993585631816e-07, "loss": 0.6939, "step": 67415 }, { "epoch": 0.8216640464090283, "grad_norm": 3.2590564328900653, "learning_rate": 9.38678640153945e-07, "loss": 0.7053, "step": 67420 }, { "epoch": 0.8217249826331762, "grad_norm": 2.396942929979849, "learning_rate": 9.383579217447082e-07, "loss": 0.7242, "step": 67425 }, { "epoch": 0.8217859188573239, "grad_norm": 2.442545965644817, "learning_rate": 9.380372033354714e-07, "loss": 0.7926, "step": 67430 }, { "epoch": 0.8218468550814717, "grad_norm": 4.171599333533202, "learning_rate": 9.377164849262349e-07, "loss": 0.7131, "step": 67435 }, { "epoch": 0.8219077913056195, "grad_norm": 3.0384100527874476, "learning_rate": 9.373957665169982e-07, "loss": 0.6876, "step": 67440 }, { "epoch": 0.8219687275297674, "grad_norm": 2.320373185229289, "learning_rate": 9.370750481077614e-07, "loss": 0.7565, "step": 67445 }, { "epoch": 0.8220296637539152, "grad_norm": 2.9037814366682193, "learning_rate": 9.367543296985248e-07, "loss": 0.6969, "step": 67450 }, { "epoch": 0.822090599978063, "grad_norm": 2.4239850845636113, "learning_rate": 9.364336112892881e-07, "loss": 0.7063, "step": 67455 }, { "epoch": 0.8221515362022108, "grad_norm": 2.951690366875839, "learning_rate": 9.361128928800514e-07, "loss": 0.7112, "step": 67460 }, { "epoch": 0.8222124724263585, "grad_norm": 2.36147156434543, "learning_rate": 9.357921744708147e-07, "loss": 0.7456, "step": 67465 }, { "epoch": 0.8222734086505064, "grad_norm": 2.6584711111638066, "learning_rate": 9.35471456061578e-07, "loss": 0.759, "step": 67470 }, { "epoch": 0.8223343448746542, "grad_norm": 2.6388281342637803, "learning_rate": 9.351507376523413e-07, "loss": 0.6976, "step": 67475 }, { "epoch": 0.822395281098802, "grad_norm": 3.31198013270782, "learning_rate": 9.348300192431047e-07, "loss": 0.8119, "step": 67480 }, { "epoch": 0.8224562173229498, "grad_norm": 2.027258686342131, "learning_rate": 9.345093008338679e-07, "loss": 0.6639, "step": 67485 }, { "epoch": 0.8225171535470976, "grad_norm": 2.5526774395806098, "learning_rate": 9.341885824246312e-07, "loss": 0.6807, "step": 67490 }, { "epoch": 0.8225780897712455, "grad_norm": 2.395872276954553, "learning_rate": 9.338678640153946e-07, "loss": 0.7388, "step": 67495 }, { "epoch": 0.8226390259953932, "grad_norm": 2.227810314232604, "learning_rate": 9.335471456061579e-07, "loss": 0.7432, "step": 67500 }, { "epoch": 0.822699962219541, "grad_norm": 2.237639090563749, "learning_rate": 9.332264271969211e-07, "loss": 0.7187, "step": 67505 }, { "epoch": 0.8227608984436888, "grad_norm": 2.598421058528419, "learning_rate": 9.329057087876845e-07, "loss": 0.714, "step": 67510 }, { "epoch": 0.8228218346678366, "grad_norm": 2.402062311244396, "learning_rate": 9.325849903784478e-07, "loss": 0.7432, "step": 67515 }, { "epoch": 0.8228827708919845, "grad_norm": 2.268400940532049, "learning_rate": 9.322642719692111e-07, "loss": 0.6329, "step": 67520 }, { "epoch": 0.8229437071161323, "grad_norm": 2.7379497887905773, "learning_rate": 9.319435535599744e-07, "loss": 0.7127, "step": 67525 }, { "epoch": 0.8230046433402801, "grad_norm": 2.392176428741753, "learning_rate": 9.316228351507377e-07, "loss": 0.7511, "step": 67530 }, { "epoch": 0.8230655795644278, "grad_norm": 2.5997876106790105, "learning_rate": 9.31302116741501e-07, "loss": 0.6608, "step": 67535 }, { "epoch": 0.8231265157885757, "grad_norm": 3.2014979455210075, "learning_rate": 9.309813983322644e-07, "loss": 0.8463, "step": 67540 }, { "epoch": 0.8231874520127235, "grad_norm": 2.7373866441972448, "learning_rate": 9.306606799230276e-07, "loss": 0.6788, "step": 67545 }, { "epoch": 0.8232483882368713, "grad_norm": 3.2926942933402197, "learning_rate": 9.303399615137909e-07, "loss": 0.734, "step": 67550 }, { "epoch": 0.8233093244610191, "grad_norm": 2.424518065054488, "learning_rate": 9.300192431045544e-07, "loss": 0.6859, "step": 67555 }, { "epoch": 0.8233702606851669, "grad_norm": 2.663319324273462, "learning_rate": 9.296985246953176e-07, "loss": 0.7109, "step": 67560 }, { "epoch": 0.8234311969093148, "grad_norm": 2.7280620247313245, "learning_rate": 9.293778062860808e-07, "loss": 0.6704, "step": 67565 }, { "epoch": 0.8234921331334625, "grad_norm": 3.5608074783585013, "learning_rate": 9.290570878768442e-07, "loss": 0.7159, "step": 67570 }, { "epoch": 0.8235530693576103, "grad_norm": 2.0544710779707223, "learning_rate": 9.287363694676076e-07, "loss": 0.7869, "step": 67575 }, { "epoch": 0.8236140055817581, "grad_norm": 2.1302741259180182, "learning_rate": 9.284156510583708e-07, "loss": 0.7263, "step": 67580 }, { "epoch": 0.823674941805906, "grad_norm": 2.258380644304083, "learning_rate": 9.280949326491341e-07, "loss": 0.718, "step": 67585 }, { "epoch": 0.8237358780300538, "grad_norm": 2.2908039052652933, "learning_rate": 9.277742142398974e-07, "loss": 0.7302, "step": 67590 }, { "epoch": 0.8237968142542016, "grad_norm": 2.1359572611411815, "learning_rate": 9.274534958306608e-07, "loss": 0.6533, "step": 67595 }, { "epoch": 0.8238577504783493, "grad_norm": 2.5121322122746514, "learning_rate": 9.271327774214241e-07, "loss": 0.7071, "step": 67600 }, { "epoch": 0.8239186867024971, "grad_norm": 2.3762711926186473, "learning_rate": 9.268120590121873e-07, "loss": 0.6864, "step": 67605 }, { "epoch": 0.823979622926645, "grad_norm": 2.5041759801991894, "learning_rate": 9.264913406029506e-07, "loss": 0.745, "step": 67610 }, { "epoch": 0.8240405591507928, "grad_norm": 2.564572482357087, "learning_rate": 9.261706221937141e-07, "loss": 0.6989, "step": 67615 }, { "epoch": 0.8241014953749406, "grad_norm": 2.375955853714714, "learning_rate": 9.258499037844773e-07, "loss": 0.6937, "step": 67620 }, { "epoch": 0.8241624315990884, "grad_norm": 2.667442465592986, "learning_rate": 9.255291853752406e-07, "loss": 0.6898, "step": 67625 }, { "epoch": 0.8242233678232362, "grad_norm": 2.3029807721812774, "learning_rate": 9.252084669660039e-07, "loss": 0.7153, "step": 67630 }, { "epoch": 0.824284304047384, "grad_norm": 2.8330421145660964, "learning_rate": 9.248877485567673e-07, "loss": 0.7064, "step": 67635 }, { "epoch": 0.8243452402715318, "grad_norm": 3.0221751160506996, "learning_rate": 9.245670301475305e-07, "loss": 0.7405, "step": 67640 }, { "epoch": 0.8244061764956796, "grad_norm": 3.004629057259358, "learning_rate": 9.242463117382939e-07, "loss": 0.6667, "step": 67645 }, { "epoch": 0.8244671127198274, "grad_norm": 3.3099844795848394, "learning_rate": 9.239255933290571e-07, "loss": 0.7367, "step": 67650 }, { "epoch": 0.8245280489439752, "grad_norm": 2.8844850476095925, "learning_rate": 9.236048749198205e-07, "loss": 0.7362, "step": 67655 }, { "epoch": 0.8245889851681231, "grad_norm": 2.5532775597165616, "learning_rate": 9.232841565105838e-07, "loss": 0.7162, "step": 67660 }, { "epoch": 0.8246499213922709, "grad_norm": 2.347287382522164, "learning_rate": 9.229634381013471e-07, "loss": 0.7268, "step": 67665 }, { "epoch": 0.8247108576164186, "grad_norm": 3.2881078222265416, "learning_rate": 9.226427196921103e-07, "loss": 0.7993, "step": 67670 }, { "epoch": 0.8247717938405664, "grad_norm": 2.409074136097903, "learning_rate": 9.223220012828738e-07, "loss": 0.7699, "step": 67675 }, { "epoch": 0.8248327300647142, "grad_norm": 2.1795377136388403, "learning_rate": 9.22001282873637e-07, "loss": 0.8043, "step": 67680 }, { "epoch": 0.8248936662888621, "grad_norm": 2.579912992040156, "learning_rate": 9.216805644644003e-07, "loss": 0.7992, "step": 67685 }, { "epoch": 0.8249546025130099, "grad_norm": 3.567720274521226, "learning_rate": 9.213598460551637e-07, "loss": 0.6967, "step": 67690 }, { "epoch": 0.8250155387371577, "grad_norm": 2.2338300040156813, "learning_rate": 9.21039127645927e-07, "loss": 0.6616, "step": 67695 }, { "epoch": 0.8250764749613055, "grad_norm": 2.82478158780662, "learning_rate": 9.207184092366902e-07, "loss": 0.6888, "step": 67700 }, { "epoch": 0.8251374111854533, "grad_norm": 2.6081384092269864, "learning_rate": 9.203976908274536e-07, "loss": 0.7254, "step": 67705 }, { "epoch": 0.8251983474096011, "grad_norm": 2.751429807682759, "learning_rate": 9.200769724182169e-07, "loss": 0.7301, "step": 67710 }, { "epoch": 0.8252592836337489, "grad_norm": 2.9914613470520344, "learning_rate": 9.197562540089802e-07, "loss": 0.7272, "step": 67715 }, { "epoch": 0.8253202198578967, "grad_norm": 3.5421224088211924, "learning_rate": 9.194355355997435e-07, "loss": 0.7617, "step": 67720 }, { "epoch": 0.8253811560820445, "grad_norm": 2.4877081205237896, "learning_rate": 9.191148171905068e-07, "loss": 0.7379, "step": 67725 }, { "epoch": 0.8254420923061924, "grad_norm": 2.9526512570113352, "learning_rate": 9.187940987812702e-07, "loss": 0.7264, "step": 67730 }, { "epoch": 0.8255030285303402, "grad_norm": 3.3924808996870763, "learning_rate": 9.184733803720335e-07, "loss": 0.7023, "step": 67735 }, { "epoch": 0.8255639647544879, "grad_norm": 2.156090205795728, "learning_rate": 9.181526619627967e-07, "loss": 0.6718, "step": 67740 }, { "epoch": 0.8256249009786357, "grad_norm": 3.125874009599377, "learning_rate": 9.1783194355356e-07, "loss": 0.7453, "step": 67745 }, { "epoch": 0.8256858372027835, "grad_norm": 2.496833635284323, "learning_rate": 9.175112251443235e-07, "loss": 0.6815, "step": 67750 }, { "epoch": 0.8257467734269314, "grad_norm": 2.500097445484025, "learning_rate": 9.171905067350867e-07, "loss": 0.7212, "step": 67755 }, { "epoch": 0.8258077096510792, "grad_norm": 3.009291047266713, "learning_rate": 9.168697883258499e-07, "loss": 0.7916, "step": 67760 }, { "epoch": 0.825868645875227, "grad_norm": 2.620239697080495, "learning_rate": 9.165490699166133e-07, "loss": 0.7075, "step": 67765 }, { "epoch": 0.8259295820993748, "grad_norm": 2.534803173426367, "learning_rate": 9.162283515073767e-07, "loss": 0.7334, "step": 67770 }, { "epoch": 0.8259905183235225, "grad_norm": 2.219988909094211, "learning_rate": 9.159076330981399e-07, "loss": 0.6855, "step": 67775 }, { "epoch": 0.8260514545476704, "grad_norm": 2.176485954589465, "learning_rate": 9.155869146889032e-07, "loss": 0.6443, "step": 67780 }, { "epoch": 0.8261123907718182, "grad_norm": 2.3010366587579116, "learning_rate": 9.152661962796665e-07, "loss": 0.7656, "step": 67785 }, { "epoch": 0.826173326995966, "grad_norm": 3.1136578609509167, "learning_rate": 9.149454778704299e-07, "loss": 0.7755, "step": 67790 }, { "epoch": 0.8262342632201138, "grad_norm": 2.4579175557103055, "learning_rate": 9.146247594611932e-07, "loss": 0.7721, "step": 67795 }, { "epoch": 0.8262951994442617, "grad_norm": 2.352633687380701, "learning_rate": 9.143040410519565e-07, "loss": 0.7185, "step": 67800 }, { "epoch": 0.8263561356684095, "grad_norm": 2.8632719511995974, "learning_rate": 9.139833226427197e-07, "loss": 0.708, "step": 67805 }, { "epoch": 0.8264170718925572, "grad_norm": 2.2275751465654516, "learning_rate": 9.136626042334832e-07, "loss": 0.7341, "step": 67810 }, { "epoch": 0.826478008116705, "grad_norm": 2.1972129145105694, "learning_rate": 9.133418858242464e-07, "loss": 0.7028, "step": 67815 }, { "epoch": 0.8265389443408528, "grad_norm": 3.5749868246475853, "learning_rate": 9.130211674150097e-07, "loss": 0.7158, "step": 67820 }, { "epoch": 0.8265998805650007, "grad_norm": 2.3303095816667923, "learning_rate": 9.12700449005773e-07, "loss": 0.7362, "step": 67825 }, { "epoch": 0.8266608167891485, "grad_norm": 2.9974963589637253, "learning_rate": 9.123797305965364e-07, "loss": 0.6986, "step": 67830 }, { "epoch": 0.8267217530132963, "grad_norm": 2.3053892279197084, "learning_rate": 9.120590121872996e-07, "loss": 0.6886, "step": 67835 }, { "epoch": 0.8267826892374441, "grad_norm": 2.652481056017254, "learning_rate": 9.11738293778063e-07, "loss": 0.8059, "step": 67840 }, { "epoch": 0.8268436254615918, "grad_norm": 2.145312867768713, "learning_rate": 9.114175753688262e-07, "loss": 0.7757, "step": 67845 }, { "epoch": 0.8269045616857397, "grad_norm": 2.505709083061329, "learning_rate": 9.110968569595896e-07, "loss": 0.7162, "step": 67850 }, { "epoch": 0.8269654979098875, "grad_norm": 2.5996206537448394, "learning_rate": 9.107761385503529e-07, "loss": 0.6876, "step": 67855 }, { "epoch": 0.8270264341340353, "grad_norm": 2.2674978369202052, "learning_rate": 9.104554201411162e-07, "loss": 0.7701, "step": 67860 }, { "epoch": 0.8270873703581831, "grad_norm": 2.260621544351538, "learning_rate": 9.101347017318794e-07, "loss": 0.7466, "step": 67865 }, { "epoch": 0.827148306582331, "grad_norm": 2.152708960722752, "learning_rate": 9.098139833226429e-07, "loss": 0.6879, "step": 67870 }, { "epoch": 0.8272092428064788, "grad_norm": 2.3096035611085144, "learning_rate": 9.094932649134061e-07, "loss": 0.706, "step": 67875 }, { "epoch": 0.8272701790306265, "grad_norm": 2.461762649663698, "learning_rate": 9.091725465041694e-07, "loss": 0.7439, "step": 67880 }, { "epoch": 0.8273311152547743, "grad_norm": 2.5150211395761737, "learning_rate": 9.088518280949326e-07, "loss": 0.7075, "step": 67885 }, { "epoch": 0.8273920514789221, "grad_norm": 2.292437022338418, "learning_rate": 9.085311096856961e-07, "loss": 0.7023, "step": 67890 }, { "epoch": 0.82745298770307, "grad_norm": 2.9238755501441887, "learning_rate": 9.082103912764593e-07, "loss": 0.71, "step": 67895 }, { "epoch": 0.8275139239272178, "grad_norm": 2.604002007219363, "learning_rate": 9.078896728672227e-07, "loss": 0.6921, "step": 67900 }, { "epoch": 0.8275748601513656, "grad_norm": 2.1400287442143, "learning_rate": 9.075689544579859e-07, "loss": 0.7217, "step": 67905 }, { "epoch": 0.8276357963755134, "grad_norm": 3.2303846834338534, "learning_rate": 9.072482360487493e-07, "loss": 0.6679, "step": 67910 }, { "epoch": 0.8276967325996611, "grad_norm": 2.1081419109214643, "learning_rate": 9.069275176395126e-07, "loss": 0.7206, "step": 67915 }, { "epoch": 0.827757668823809, "grad_norm": 4.043814844221396, "learning_rate": 9.066067992302759e-07, "loss": 0.6093, "step": 67920 }, { "epoch": 0.8278186050479568, "grad_norm": 2.1466909366584046, "learning_rate": 9.062860808210391e-07, "loss": 0.726, "step": 67925 }, { "epoch": 0.8278795412721046, "grad_norm": 1.9730883648762456, "learning_rate": 9.059653624118026e-07, "loss": 0.671, "step": 67930 }, { "epoch": 0.8279404774962524, "grad_norm": 2.155894742624399, "learning_rate": 9.056446440025658e-07, "loss": 0.7412, "step": 67935 }, { "epoch": 0.8280014137204003, "grad_norm": 2.3832289391606847, "learning_rate": 9.053239255933291e-07, "loss": 0.7525, "step": 67940 }, { "epoch": 0.8280623499445481, "grad_norm": 2.3725266140556562, "learning_rate": 9.050032071840923e-07, "loss": 0.7769, "step": 67945 }, { "epoch": 0.8281232861686958, "grad_norm": 3.1279333448857916, "learning_rate": 9.046824887748558e-07, "loss": 0.7371, "step": 67950 }, { "epoch": 0.8281842223928436, "grad_norm": 2.2790436050795675, "learning_rate": 9.04361770365619e-07, "loss": 0.7683, "step": 67955 }, { "epoch": 0.8282451586169914, "grad_norm": 2.661884963123946, "learning_rate": 9.040410519563823e-07, "loss": 0.6905, "step": 67960 }, { "epoch": 0.8283060948411393, "grad_norm": 2.1319883074037693, "learning_rate": 9.037203335471456e-07, "loss": 0.6675, "step": 67965 }, { "epoch": 0.8283670310652871, "grad_norm": 3.9245156600976094, "learning_rate": 9.03399615137909e-07, "loss": 0.6934, "step": 67970 }, { "epoch": 0.8284279672894349, "grad_norm": 2.9811198088187796, "learning_rate": 9.030788967286724e-07, "loss": 0.7266, "step": 67975 }, { "epoch": 0.8284889035135827, "grad_norm": 2.031015920676164, "learning_rate": 9.027581783194356e-07, "loss": 0.6682, "step": 67980 }, { "epoch": 0.8285498397377304, "grad_norm": 2.345153427505585, "learning_rate": 9.02437459910199e-07, "loss": 0.7116, "step": 67985 }, { "epoch": 0.8286107759618783, "grad_norm": 2.5422888328655193, "learning_rate": 9.021167415009622e-07, "loss": 0.6927, "step": 67990 }, { "epoch": 0.8286717121860261, "grad_norm": 2.325715252781502, "learning_rate": 9.017960230917256e-07, "loss": 0.7288, "step": 67995 }, { "epoch": 0.8287326484101739, "grad_norm": 2.0582920256874004, "learning_rate": 9.014753046824888e-07, "loss": 0.7412, "step": 68000 }, { "epoch": 0.8287935846343217, "grad_norm": 2.290111428078559, "learning_rate": 9.011545862732523e-07, "loss": 0.7133, "step": 68005 }, { "epoch": 0.8288545208584696, "grad_norm": 2.8074838120993375, "learning_rate": 9.008338678640155e-07, "loss": 0.7235, "step": 68010 }, { "epoch": 0.8289154570826174, "grad_norm": 2.54709026468071, "learning_rate": 9.005131494547788e-07, "loss": 0.6939, "step": 68015 }, { "epoch": 0.8289763933067651, "grad_norm": 2.5464029048710586, "learning_rate": 9.00192431045542e-07, "loss": 0.6557, "step": 68020 }, { "epoch": 0.8290373295309129, "grad_norm": 2.3803477260191177, "learning_rate": 8.998717126363055e-07, "loss": 0.6994, "step": 68025 }, { "epoch": 0.8290982657550607, "grad_norm": 2.6987490036515602, "learning_rate": 8.995509942270687e-07, "loss": 0.614, "step": 68030 }, { "epoch": 0.8291592019792086, "grad_norm": 2.2117807195041777, "learning_rate": 8.99230275817832e-07, "loss": 0.6605, "step": 68035 }, { "epoch": 0.8292201382033564, "grad_norm": 2.1339004735215843, "learning_rate": 8.989095574085953e-07, "loss": 0.7131, "step": 68040 }, { "epoch": 0.8292810744275042, "grad_norm": 2.1333401009985318, "learning_rate": 8.985888389993587e-07, "loss": 0.7657, "step": 68045 }, { "epoch": 0.829342010651652, "grad_norm": 2.06545683777824, "learning_rate": 8.982681205901219e-07, "loss": 0.6703, "step": 68050 }, { "epoch": 0.8294029468757997, "grad_norm": 3.155463580673451, "learning_rate": 8.979474021808853e-07, "loss": 0.7204, "step": 68055 }, { "epoch": 0.8294638830999476, "grad_norm": 2.1330057936855034, "learning_rate": 8.976266837716485e-07, "loss": 0.7092, "step": 68060 }, { "epoch": 0.8295248193240954, "grad_norm": 2.5165957131581056, "learning_rate": 8.97305965362412e-07, "loss": 0.7055, "step": 68065 }, { "epoch": 0.8295857555482432, "grad_norm": 2.802795026832859, "learning_rate": 8.969852469531752e-07, "loss": 0.7353, "step": 68070 }, { "epoch": 0.829646691772391, "grad_norm": 2.5719482027201233, "learning_rate": 8.966645285439385e-07, "loss": 0.6208, "step": 68075 }, { "epoch": 0.8297076279965389, "grad_norm": 2.7551132452451337, "learning_rate": 8.963438101347017e-07, "loss": 0.7826, "step": 68080 }, { "epoch": 0.8297685642206867, "grad_norm": 2.3595999045534413, "learning_rate": 8.960230917254652e-07, "loss": 0.7286, "step": 68085 }, { "epoch": 0.8298295004448344, "grad_norm": 2.4079356762388073, "learning_rate": 8.957023733162284e-07, "loss": 0.6776, "step": 68090 }, { "epoch": 0.8298904366689822, "grad_norm": 2.8343273259926445, "learning_rate": 8.953816549069917e-07, "loss": 0.7277, "step": 68095 }, { "epoch": 0.82995137289313, "grad_norm": 3.037791573940491, "learning_rate": 8.95060936497755e-07, "loss": 0.6566, "step": 68100 }, { "epoch": 0.8300123091172779, "grad_norm": 2.8809758159644296, "learning_rate": 8.947402180885184e-07, "loss": 0.7022, "step": 68105 }, { "epoch": 0.8300732453414257, "grad_norm": 2.8834839521330466, "learning_rate": 8.944194996792816e-07, "loss": 0.6961, "step": 68110 }, { "epoch": 0.8301341815655735, "grad_norm": 2.4742626897833673, "learning_rate": 8.94098781270045e-07, "loss": 0.6607, "step": 68115 }, { "epoch": 0.8301951177897213, "grad_norm": 2.119450924586928, "learning_rate": 8.937780628608082e-07, "loss": 0.7043, "step": 68120 }, { "epoch": 0.830256054013869, "grad_norm": 2.5055711291545006, "learning_rate": 8.934573444515716e-07, "loss": 0.7679, "step": 68125 }, { "epoch": 0.8303169902380169, "grad_norm": 2.972228787571923, "learning_rate": 8.931366260423349e-07, "loss": 0.6793, "step": 68130 }, { "epoch": 0.8303779264621647, "grad_norm": 3.843047978703156, "learning_rate": 8.928159076330982e-07, "loss": 0.7558, "step": 68135 }, { "epoch": 0.8304388626863125, "grad_norm": 2.6856633170323714, "learning_rate": 8.924951892238614e-07, "loss": 0.7613, "step": 68140 }, { "epoch": 0.8304997989104603, "grad_norm": 3.239521698558014, "learning_rate": 8.921744708146249e-07, "loss": 0.6979, "step": 68145 }, { "epoch": 0.8305607351346082, "grad_norm": 2.069731971499118, "learning_rate": 8.918537524053882e-07, "loss": 0.6498, "step": 68150 }, { "epoch": 0.830621671358756, "grad_norm": 2.340768535509239, "learning_rate": 8.915330339961514e-07, "loss": 0.654, "step": 68155 }, { "epoch": 0.8306826075829037, "grad_norm": 2.0233276518503858, "learning_rate": 8.912123155869147e-07, "loss": 0.7481, "step": 68160 }, { "epoch": 0.8307435438070515, "grad_norm": 3.0283452915780837, "learning_rate": 8.908915971776781e-07, "loss": 0.7623, "step": 68165 }, { "epoch": 0.8308044800311993, "grad_norm": 2.6209585472656927, "learning_rate": 8.905708787684414e-07, "loss": 0.674, "step": 68170 }, { "epoch": 0.8308654162553472, "grad_norm": 2.5669059542085924, "learning_rate": 8.902501603592047e-07, "loss": 0.7583, "step": 68175 }, { "epoch": 0.830926352479495, "grad_norm": 3.25408791973438, "learning_rate": 8.899294419499679e-07, "loss": 0.7509, "step": 68180 }, { "epoch": 0.8309872887036428, "grad_norm": 2.3688943322165787, "learning_rate": 8.896087235407313e-07, "loss": 0.7545, "step": 68185 }, { "epoch": 0.8310482249277906, "grad_norm": 3.0470410204293255, "learning_rate": 8.892880051314947e-07, "loss": 0.7252, "step": 68190 }, { "epoch": 0.8311091611519383, "grad_norm": 3.1165258259127597, "learning_rate": 8.889672867222579e-07, "loss": 0.7298, "step": 68195 }, { "epoch": 0.8311700973760862, "grad_norm": 3.131034243158076, "learning_rate": 8.886465683130211e-07, "loss": 0.7368, "step": 68200 }, { "epoch": 0.831231033600234, "grad_norm": 2.3471755919198434, "learning_rate": 8.883258499037846e-07, "loss": 0.7061, "step": 68205 }, { "epoch": 0.8312919698243818, "grad_norm": 2.4957810620918326, "learning_rate": 8.880051314945479e-07, "loss": 0.6964, "step": 68210 }, { "epoch": 0.8313529060485296, "grad_norm": 3.261544127664857, "learning_rate": 8.876844130853111e-07, "loss": 0.7864, "step": 68215 }, { "epoch": 0.8314138422726775, "grad_norm": 2.3378655964477018, "learning_rate": 8.873636946760745e-07, "loss": 0.7379, "step": 68220 }, { "epoch": 0.8314747784968253, "grad_norm": 2.8816140810974633, "learning_rate": 8.870429762668378e-07, "loss": 0.7552, "step": 68225 }, { "epoch": 0.831535714720973, "grad_norm": 2.242393920580102, "learning_rate": 8.867222578576011e-07, "loss": 0.6483, "step": 68230 }, { "epoch": 0.8315966509451208, "grad_norm": 2.950874338302673, "learning_rate": 8.864015394483644e-07, "loss": 0.7069, "step": 68235 }, { "epoch": 0.8316575871692686, "grad_norm": 2.351384736732433, "learning_rate": 8.860808210391277e-07, "loss": 0.6895, "step": 68240 }, { "epoch": 0.8317185233934165, "grad_norm": 2.6510768834394356, "learning_rate": 8.85760102629891e-07, "loss": 0.723, "step": 68245 }, { "epoch": 0.8317794596175643, "grad_norm": 2.2240499719794244, "learning_rate": 8.854393842206544e-07, "loss": 0.6712, "step": 68250 }, { "epoch": 0.8318403958417121, "grad_norm": 2.5256938964129216, "learning_rate": 8.851186658114176e-07, "loss": 0.7306, "step": 68255 }, { "epoch": 0.8319013320658599, "grad_norm": 4.405882888687032, "learning_rate": 8.847979474021809e-07, "loss": 0.7674, "step": 68260 }, { "epoch": 0.8319622682900076, "grad_norm": 1.9396934440855098, "learning_rate": 8.844772289929443e-07, "loss": 0.6987, "step": 68265 }, { "epoch": 0.8320232045141555, "grad_norm": 3.8230115476345885, "learning_rate": 8.841565105837076e-07, "loss": 0.7124, "step": 68270 }, { "epoch": 0.8320841407383033, "grad_norm": 2.6113312540578146, "learning_rate": 8.838357921744708e-07, "loss": 0.7059, "step": 68275 }, { "epoch": 0.8321450769624511, "grad_norm": 2.493989593638243, "learning_rate": 8.835150737652343e-07, "loss": 0.6852, "step": 68280 }, { "epoch": 0.8322060131865989, "grad_norm": 2.6136374601309798, "learning_rate": 8.831943553559975e-07, "loss": 0.8097, "step": 68285 }, { "epoch": 0.8322669494107467, "grad_norm": 2.55550831540161, "learning_rate": 8.828736369467608e-07, "loss": 0.7974, "step": 68290 }, { "epoch": 0.8323278856348946, "grad_norm": 2.377777383830356, "learning_rate": 8.825529185375241e-07, "loss": 0.7293, "step": 68295 }, { "epoch": 0.8323888218590423, "grad_norm": 2.8246732495036526, "learning_rate": 8.822322001282875e-07, "loss": 0.7149, "step": 68300 }, { "epoch": 0.8324497580831901, "grad_norm": 2.5535823565451254, "learning_rate": 8.819114817190507e-07, "loss": 0.7352, "step": 68305 }, { "epoch": 0.8325106943073379, "grad_norm": 1.8153621912655111, "learning_rate": 8.815907633098141e-07, "loss": 0.637, "step": 68310 }, { "epoch": 0.8325716305314858, "grad_norm": 2.37296748721989, "learning_rate": 8.812700449005773e-07, "loss": 0.6967, "step": 68315 }, { "epoch": 0.8326325667556336, "grad_norm": 2.8081328115208173, "learning_rate": 8.809493264913407e-07, "loss": 0.7288, "step": 68320 }, { "epoch": 0.8326935029797814, "grad_norm": 3.123394743221727, "learning_rate": 8.806286080821041e-07, "loss": 0.7185, "step": 68325 }, { "epoch": 0.8327544392039292, "grad_norm": 2.670966934682668, "learning_rate": 8.803078896728673e-07, "loss": 0.7086, "step": 68330 }, { "epoch": 0.8328153754280769, "grad_norm": 2.015598441964018, "learning_rate": 8.799871712636305e-07, "loss": 0.7968, "step": 68335 }, { "epoch": 0.8328763116522248, "grad_norm": 2.626777710251419, "learning_rate": 8.79666452854394e-07, "loss": 0.7669, "step": 68340 }, { "epoch": 0.8329372478763726, "grad_norm": 2.4005081288540677, "learning_rate": 8.793457344451573e-07, "loss": 0.7767, "step": 68345 }, { "epoch": 0.8329981841005204, "grad_norm": 2.676994657486277, "learning_rate": 8.790250160359205e-07, "loss": 0.7121, "step": 68350 }, { "epoch": 0.8330591203246682, "grad_norm": 2.921361326236697, "learning_rate": 8.787042976266838e-07, "loss": 0.6947, "step": 68355 }, { "epoch": 0.833120056548816, "grad_norm": 2.6610441302429524, "learning_rate": 8.783835792174472e-07, "loss": 0.7232, "step": 68360 }, { "epoch": 0.8331809927729639, "grad_norm": 2.8847771650026, "learning_rate": 8.780628608082105e-07, "loss": 0.7076, "step": 68365 }, { "epoch": 0.8332419289971116, "grad_norm": 4.098452473972096, "learning_rate": 8.777421423989738e-07, "loss": 0.7527, "step": 68370 }, { "epoch": 0.8333028652212594, "grad_norm": 2.6989506516248545, "learning_rate": 8.77421423989737e-07, "loss": 0.8199, "step": 68375 }, { "epoch": 0.8333638014454072, "grad_norm": 2.869226527899654, "learning_rate": 8.771007055805004e-07, "loss": 0.6475, "step": 68380 }, { "epoch": 0.833424737669555, "grad_norm": 2.86196478114673, "learning_rate": 8.767799871712638e-07, "loss": 0.6745, "step": 68385 }, { "epoch": 0.8334856738937029, "grad_norm": 2.3513434562018527, "learning_rate": 8.76459268762027e-07, "loss": 0.7112, "step": 68390 }, { "epoch": 0.8335466101178507, "grad_norm": 2.2915605077284877, "learning_rate": 8.761385503527903e-07, "loss": 0.7143, "step": 68395 }, { "epoch": 0.8336075463419985, "grad_norm": 2.609174827933448, "learning_rate": 8.758178319435537e-07, "loss": 0.7165, "step": 68400 }, { "epoch": 0.8336684825661462, "grad_norm": 2.361457083087555, "learning_rate": 8.75497113534317e-07, "loss": 0.7218, "step": 68405 }, { "epoch": 0.833729418790294, "grad_norm": 2.329214864182755, "learning_rate": 8.751763951250802e-07, "loss": 0.6749, "step": 68410 }, { "epoch": 0.8337903550144419, "grad_norm": 2.5929356349155057, "learning_rate": 8.748556767158436e-07, "loss": 0.7774, "step": 68415 }, { "epoch": 0.8338512912385897, "grad_norm": 2.6836309821718873, "learning_rate": 8.745349583066069e-07, "loss": 0.7304, "step": 68420 }, { "epoch": 0.8339122274627375, "grad_norm": 3.318723850850165, "learning_rate": 8.742142398973702e-07, "loss": 0.7186, "step": 68425 }, { "epoch": 0.8339731636868853, "grad_norm": 2.0291507259972175, "learning_rate": 8.738935214881335e-07, "loss": 0.683, "step": 68430 }, { "epoch": 0.8340340999110332, "grad_norm": 2.6716765021747206, "learning_rate": 8.735728030788968e-07, "loss": 0.6913, "step": 68435 }, { "epoch": 0.8340950361351809, "grad_norm": 2.768574381930845, "learning_rate": 8.732520846696601e-07, "loss": 0.8268, "step": 68440 }, { "epoch": 0.8341559723593287, "grad_norm": 2.051177745878202, "learning_rate": 8.729313662604235e-07, "loss": 0.7109, "step": 68445 }, { "epoch": 0.8342169085834765, "grad_norm": 2.413935599622399, "learning_rate": 8.726106478511867e-07, "loss": 0.6692, "step": 68450 }, { "epoch": 0.8342778448076243, "grad_norm": 2.3150215791614444, "learning_rate": 8.7228992944195e-07, "loss": 0.7613, "step": 68455 }, { "epoch": 0.8343387810317722, "grad_norm": 2.1909468169581263, "learning_rate": 8.719692110327134e-07, "loss": 0.7512, "step": 68460 }, { "epoch": 0.83439971725592, "grad_norm": 1.9792616557307967, "learning_rate": 8.716484926234767e-07, "loss": 0.6563, "step": 68465 }, { "epoch": 0.8344606534800678, "grad_norm": 2.255254696688571, "learning_rate": 8.713277742142399e-07, "loss": 0.6653, "step": 68470 }, { "epoch": 0.8345215897042155, "grad_norm": 2.1082890899165165, "learning_rate": 8.710070558050033e-07, "loss": 0.6386, "step": 68475 }, { "epoch": 0.8345825259283634, "grad_norm": 2.491718371588786, "learning_rate": 8.706863373957666e-07, "loss": 0.7203, "step": 68480 }, { "epoch": 0.8346434621525112, "grad_norm": 2.51024154586063, "learning_rate": 8.703656189865299e-07, "loss": 0.6998, "step": 68485 }, { "epoch": 0.834704398376659, "grad_norm": 2.2787112904978777, "learning_rate": 8.700449005772931e-07, "loss": 0.6862, "step": 68490 }, { "epoch": 0.8347653346008068, "grad_norm": 2.7874604963069456, "learning_rate": 8.697241821680565e-07, "loss": 0.7737, "step": 68495 }, { "epoch": 0.8348262708249546, "grad_norm": 2.762710521743989, "learning_rate": 8.694034637588199e-07, "loss": 0.8207, "step": 68500 }, { "epoch": 0.8348872070491025, "grad_norm": 2.5314088460080044, "learning_rate": 8.690827453495832e-07, "loss": 0.7259, "step": 68505 }, { "epoch": 0.8349481432732502, "grad_norm": 2.9655895122627998, "learning_rate": 8.687620269403464e-07, "loss": 0.7247, "step": 68510 }, { "epoch": 0.835009079497398, "grad_norm": 2.8910559488061573, "learning_rate": 8.684413085311097e-07, "loss": 0.7489, "step": 68515 }, { "epoch": 0.8350700157215458, "grad_norm": 2.6171298977404054, "learning_rate": 8.681205901218732e-07, "loss": 0.7104, "step": 68520 }, { "epoch": 0.8351309519456936, "grad_norm": 3.2129822631182936, "learning_rate": 8.677998717126364e-07, "loss": 0.7141, "step": 68525 }, { "epoch": 0.8351918881698415, "grad_norm": 2.813891666148527, "learning_rate": 8.674791533033996e-07, "loss": 0.7373, "step": 68530 }, { "epoch": 0.8352528243939893, "grad_norm": 3.2275840855717015, "learning_rate": 8.671584348941629e-07, "loss": 0.6803, "step": 68535 }, { "epoch": 0.835313760618137, "grad_norm": 2.079170159828591, "learning_rate": 8.668377164849264e-07, "loss": 0.7372, "step": 68540 }, { "epoch": 0.8353746968422848, "grad_norm": 2.9532745938403266, "learning_rate": 8.665169980756896e-07, "loss": 0.7232, "step": 68545 }, { "epoch": 0.8354356330664326, "grad_norm": 2.4082459791561033, "learning_rate": 8.661962796664528e-07, "loss": 0.728, "step": 68550 }, { "epoch": 0.8354965692905805, "grad_norm": 3.1340300209011853, "learning_rate": 8.658755612572162e-07, "loss": 0.7161, "step": 68555 }, { "epoch": 0.8355575055147283, "grad_norm": 2.451947637494099, "learning_rate": 8.655548428479796e-07, "loss": 0.8538, "step": 68560 }, { "epoch": 0.8356184417388761, "grad_norm": 1.9533378609128837, "learning_rate": 8.652341244387428e-07, "loss": 0.6796, "step": 68565 }, { "epoch": 0.8356793779630239, "grad_norm": 2.556699269046405, "learning_rate": 8.649134060295062e-07, "loss": 0.6909, "step": 68570 }, { "epoch": 0.8357403141871717, "grad_norm": 2.787746494442647, "learning_rate": 8.645926876202694e-07, "loss": 0.7244, "step": 68575 }, { "epoch": 0.8358012504113195, "grad_norm": 3.1592770432321977, "learning_rate": 8.642719692110329e-07, "loss": 0.6785, "step": 68580 }, { "epoch": 0.8358621866354673, "grad_norm": 2.5858936418416523, "learning_rate": 8.639512508017961e-07, "loss": 0.6821, "step": 68585 }, { "epoch": 0.8359231228596151, "grad_norm": 2.254898080873582, "learning_rate": 8.636305323925594e-07, "loss": 0.6399, "step": 68590 }, { "epoch": 0.8359840590837629, "grad_norm": 2.3245736136845467, "learning_rate": 8.633098139833227e-07, "loss": 0.6841, "step": 68595 }, { "epoch": 0.8360449953079108, "grad_norm": 4.460189502445428, "learning_rate": 8.629890955740861e-07, "loss": 0.7606, "step": 68600 }, { "epoch": 0.8361059315320586, "grad_norm": 2.166659628725092, "learning_rate": 8.626683771648493e-07, "loss": 0.7724, "step": 68605 }, { "epoch": 0.8361668677562063, "grad_norm": 2.584487780132711, "learning_rate": 8.623476587556126e-07, "loss": 0.7236, "step": 68610 }, { "epoch": 0.8362278039803541, "grad_norm": 3.6826401255324255, "learning_rate": 8.62026940346376e-07, "loss": 0.7292, "step": 68615 }, { "epoch": 0.836288740204502, "grad_norm": 2.1109186004603475, "learning_rate": 8.617062219371393e-07, "loss": 0.675, "step": 68620 }, { "epoch": 0.8363496764286498, "grad_norm": 2.492967569210042, "learning_rate": 8.613855035279025e-07, "loss": 0.6532, "step": 68625 }, { "epoch": 0.8364106126527976, "grad_norm": 2.479846975865965, "learning_rate": 8.610647851186659e-07, "loss": 0.6882, "step": 68630 }, { "epoch": 0.8364715488769454, "grad_norm": 2.4364761100214953, "learning_rate": 8.607440667094292e-07, "loss": 0.7453, "step": 68635 }, { "epoch": 0.8365324851010932, "grad_norm": 2.265494316066564, "learning_rate": 8.604233483001925e-07, "loss": 0.672, "step": 68640 }, { "epoch": 0.836593421325241, "grad_norm": 2.386073197620558, "learning_rate": 8.601026298909558e-07, "loss": 0.6894, "step": 68645 }, { "epoch": 0.8366543575493888, "grad_norm": 2.2804028620193093, "learning_rate": 8.597819114817191e-07, "loss": 0.7421, "step": 68650 }, { "epoch": 0.8367152937735366, "grad_norm": 2.5857450678591367, "learning_rate": 8.594611930724824e-07, "loss": 0.7428, "step": 68655 }, { "epoch": 0.8367762299976844, "grad_norm": 2.5037224274716685, "learning_rate": 8.591404746632458e-07, "loss": 0.7351, "step": 68660 }, { "epoch": 0.8368371662218322, "grad_norm": 2.7542830100170845, "learning_rate": 8.58819756254009e-07, "loss": 0.7989, "step": 68665 }, { "epoch": 0.8368981024459801, "grad_norm": 2.4211265058507823, "learning_rate": 8.584990378447723e-07, "loss": 0.6855, "step": 68670 }, { "epoch": 0.8369590386701279, "grad_norm": 2.4566571397796646, "learning_rate": 8.581783194355358e-07, "loss": 0.7616, "step": 68675 }, { "epoch": 0.8370199748942756, "grad_norm": 2.939559496046353, "learning_rate": 8.57857601026299e-07, "loss": 0.71, "step": 68680 }, { "epoch": 0.8370809111184234, "grad_norm": 2.8423770198612415, "learning_rate": 8.575368826170622e-07, "loss": 0.7173, "step": 68685 }, { "epoch": 0.8371418473425712, "grad_norm": 2.033417772671209, "learning_rate": 8.572161642078256e-07, "loss": 0.6431, "step": 68690 }, { "epoch": 0.8372027835667191, "grad_norm": 2.4781455053131087, "learning_rate": 8.56895445798589e-07, "loss": 0.6823, "step": 68695 }, { "epoch": 0.8372637197908669, "grad_norm": 2.3537906303182883, "learning_rate": 8.565747273893522e-07, "loss": 0.6796, "step": 68700 }, { "epoch": 0.8373246560150147, "grad_norm": 2.603371087233869, "learning_rate": 8.562540089801155e-07, "loss": 0.699, "step": 68705 }, { "epoch": 0.8373855922391625, "grad_norm": 1.9506281151762848, "learning_rate": 8.559332905708788e-07, "loss": 0.6524, "step": 68710 }, { "epoch": 0.8374465284633102, "grad_norm": 2.7859735976068456, "learning_rate": 8.556125721616422e-07, "loss": 0.718, "step": 68715 }, { "epoch": 0.8375074646874581, "grad_norm": 2.4249945105822373, "learning_rate": 8.552918537524055e-07, "loss": 0.7186, "step": 68720 }, { "epoch": 0.8375684009116059, "grad_norm": 2.7276461868279536, "learning_rate": 8.549711353431687e-07, "loss": 0.7893, "step": 68725 }, { "epoch": 0.8376293371357537, "grad_norm": 2.2374390354295692, "learning_rate": 8.54650416933932e-07, "loss": 0.7519, "step": 68730 }, { "epoch": 0.8376902733599015, "grad_norm": 1.9428195137655762, "learning_rate": 8.543296985246955e-07, "loss": 0.6944, "step": 68735 }, { "epoch": 0.8377512095840494, "grad_norm": 2.346370273288357, "learning_rate": 8.540089801154587e-07, "loss": 0.6861, "step": 68740 }, { "epoch": 0.8378121458081972, "grad_norm": 2.380824756407677, "learning_rate": 8.53688261706222e-07, "loss": 0.6598, "step": 68745 }, { "epoch": 0.8378730820323449, "grad_norm": 2.5205227394309015, "learning_rate": 8.533675432969853e-07, "loss": 0.7337, "step": 68750 }, { "epoch": 0.8379340182564927, "grad_norm": 3.424393879561758, "learning_rate": 8.530468248877487e-07, "loss": 0.7168, "step": 68755 }, { "epoch": 0.8379949544806405, "grad_norm": 2.441122870565609, "learning_rate": 8.527261064785119e-07, "loss": 0.7397, "step": 68760 }, { "epoch": 0.8380558907047884, "grad_norm": 2.316697110761656, "learning_rate": 8.524053880692753e-07, "loss": 0.7386, "step": 68765 }, { "epoch": 0.8381168269289362, "grad_norm": 2.7446712515103466, "learning_rate": 8.520846696600385e-07, "loss": 0.678, "step": 68770 }, { "epoch": 0.838177763153084, "grad_norm": 2.5628173611995932, "learning_rate": 8.517639512508019e-07, "loss": 0.6933, "step": 68775 }, { "epoch": 0.8382386993772318, "grad_norm": 3.003881467636877, "learning_rate": 8.514432328415652e-07, "loss": 0.7927, "step": 68780 }, { "epoch": 0.8382996356013795, "grad_norm": 2.5571798835977635, "learning_rate": 8.511225144323285e-07, "loss": 0.6702, "step": 68785 }, { "epoch": 0.8383605718255274, "grad_norm": 2.9296060833168953, "learning_rate": 8.508017960230917e-07, "loss": 0.7424, "step": 68790 }, { "epoch": 0.8384215080496752, "grad_norm": 2.479180826475556, "learning_rate": 8.504810776138552e-07, "loss": 0.7709, "step": 68795 }, { "epoch": 0.838482444273823, "grad_norm": 2.485059122676618, "learning_rate": 8.501603592046184e-07, "loss": 0.7206, "step": 68800 }, { "epoch": 0.8385433804979708, "grad_norm": 2.7656091440102086, "learning_rate": 8.498396407953817e-07, "loss": 0.7731, "step": 68805 }, { "epoch": 0.8386043167221187, "grad_norm": 2.416143276676119, "learning_rate": 8.49518922386145e-07, "loss": 0.7854, "step": 68810 }, { "epoch": 0.8386652529462665, "grad_norm": 2.6204990044157648, "learning_rate": 8.491982039769084e-07, "loss": 0.7326, "step": 68815 }, { "epoch": 0.8387261891704142, "grad_norm": 2.2951816668645826, "learning_rate": 8.488774855676716e-07, "loss": 0.6782, "step": 68820 }, { "epoch": 0.838787125394562, "grad_norm": 2.3480112883080597, "learning_rate": 8.48556767158435e-07, "loss": 0.8035, "step": 68825 }, { "epoch": 0.8388480616187098, "grad_norm": 2.3583129064444663, "learning_rate": 8.482360487491982e-07, "loss": 0.7243, "step": 68830 }, { "epoch": 0.8389089978428577, "grad_norm": 3.6681819594218927, "learning_rate": 8.479153303399616e-07, "loss": 0.6832, "step": 68835 }, { "epoch": 0.8389699340670055, "grad_norm": 2.4634389876010983, "learning_rate": 8.475946119307249e-07, "loss": 0.6713, "step": 68840 }, { "epoch": 0.8390308702911533, "grad_norm": 2.9069136132441384, "learning_rate": 8.472738935214882e-07, "loss": 0.6963, "step": 68845 }, { "epoch": 0.8390918065153011, "grad_norm": 2.412284871093627, "learning_rate": 8.469531751122514e-07, "loss": 0.6827, "step": 68850 }, { "epoch": 0.8391527427394488, "grad_norm": 2.4963893522421907, "learning_rate": 8.466324567030149e-07, "loss": 0.7845, "step": 68855 }, { "epoch": 0.8392136789635967, "grad_norm": 2.2215966724495715, "learning_rate": 8.463117382937781e-07, "loss": 0.6792, "step": 68860 }, { "epoch": 0.8392746151877445, "grad_norm": 2.6888884197565845, "learning_rate": 8.459910198845414e-07, "loss": 0.756, "step": 68865 }, { "epoch": 0.8393355514118923, "grad_norm": 2.5382620787216315, "learning_rate": 8.456703014753047e-07, "loss": 0.7441, "step": 68870 }, { "epoch": 0.8393964876360401, "grad_norm": 2.4593049063563495, "learning_rate": 8.453495830660681e-07, "loss": 0.7875, "step": 68875 }, { "epoch": 0.839457423860188, "grad_norm": 2.6644672306661104, "learning_rate": 8.450288646568313e-07, "loss": 0.7119, "step": 68880 }, { "epoch": 0.8395183600843358, "grad_norm": 2.7964231154209127, "learning_rate": 8.447081462475947e-07, "loss": 0.7691, "step": 68885 }, { "epoch": 0.8395792963084835, "grad_norm": 2.313452597114868, "learning_rate": 8.443874278383581e-07, "loss": 0.6835, "step": 68890 }, { "epoch": 0.8396402325326313, "grad_norm": 2.5271350550325544, "learning_rate": 8.440667094291213e-07, "loss": 0.7951, "step": 68895 }, { "epoch": 0.8397011687567791, "grad_norm": 2.1059027594653656, "learning_rate": 8.437459910198846e-07, "loss": 0.7279, "step": 68900 }, { "epoch": 0.839762104980927, "grad_norm": 2.042698540695207, "learning_rate": 8.434252726106479e-07, "loss": 0.6596, "step": 68905 }, { "epoch": 0.8398230412050748, "grad_norm": 2.5503653031438125, "learning_rate": 8.431045542014113e-07, "loss": 0.738, "step": 68910 }, { "epoch": 0.8398839774292226, "grad_norm": 2.3925988709117405, "learning_rate": 8.427838357921746e-07, "loss": 0.6752, "step": 68915 }, { "epoch": 0.8399449136533704, "grad_norm": 2.9113380652243235, "learning_rate": 8.424631173829379e-07, "loss": 0.7571, "step": 68920 }, { "epoch": 0.8400058498775181, "grad_norm": 2.4840505825093713, "learning_rate": 8.421423989737011e-07, "loss": 0.6936, "step": 68925 }, { "epoch": 0.840066786101666, "grad_norm": 2.247524278378319, "learning_rate": 8.418216805644646e-07, "loss": 0.7483, "step": 68930 }, { "epoch": 0.8401277223258138, "grad_norm": 2.4458383488250472, "learning_rate": 8.415009621552278e-07, "loss": 0.7509, "step": 68935 }, { "epoch": 0.8401886585499616, "grad_norm": 2.3488216366406918, "learning_rate": 8.411802437459911e-07, "loss": 0.6785, "step": 68940 }, { "epoch": 0.8402495947741094, "grad_norm": 2.767585477571255, "learning_rate": 8.408595253367544e-07, "loss": 0.7241, "step": 68945 }, { "epoch": 0.8403105309982573, "grad_norm": 2.843105382977346, "learning_rate": 8.405388069275178e-07, "loss": 0.739, "step": 68950 }, { "epoch": 0.8403714672224051, "grad_norm": 2.3651937164306873, "learning_rate": 8.40218088518281e-07, "loss": 0.6942, "step": 68955 }, { "epoch": 0.8404324034465528, "grad_norm": 2.1362515035729333, "learning_rate": 8.398973701090444e-07, "loss": 0.768, "step": 68960 }, { "epoch": 0.8404933396707006, "grad_norm": 2.5873702737443858, "learning_rate": 8.395766516998076e-07, "loss": 0.6851, "step": 68965 }, { "epoch": 0.8405542758948484, "grad_norm": 2.4699824276487887, "learning_rate": 8.39255933290571e-07, "loss": 0.7098, "step": 68970 }, { "epoch": 0.8406152121189963, "grad_norm": 3.6320338631983216, "learning_rate": 8.389352148813343e-07, "loss": 0.7584, "step": 68975 }, { "epoch": 0.8406761483431441, "grad_norm": 2.965145287574181, "learning_rate": 8.386144964720976e-07, "loss": 0.7604, "step": 68980 }, { "epoch": 0.8407370845672919, "grad_norm": 3.1633427570762827, "learning_rate": 8.382937780628608e-07, "loss": 0.7042, "step": 68985 }, { "epoch": 0.8407980207914397, "grad_norm": 2.8518178067410695, "learning_rate": 8.379730596536243e-07, "loss": 0.6856, "step": 68990 }, { "epoch": 0.8408589570155874, "grad_norm": 2.694252944840495, "learning_rate": 8.376523412443875e-07, "loss": 0.7071, "step": 68995 }, { "epoch": 0.8409198932397353, "grad_norm": 2.744362581803758, "learning_rate": 8.373316228351508e-07, "loss": 0.6703, "step": 69000 }, { "epoch": 0.8409808294638831, "grad_norm": 2.4344223239985823, "learning_rate": 8.37010904425914e-07, "loss": 0.7512, "step": 69005 }, { "epoch": 0.8410417656880309, "grad_norm": 2.612366291926994, "learning_rate": 8.366901860166775e-07, "loss": 0.7469, "step": 69010 }, { "epoch": 0.8411027019121787, "grad_norm": 2.664171345006734, "learning_rate": 8.363694676074407e-07, "loss": 0.6504, "step": 69015 }, { "epoch": 0.8411636381363266, "grad_norm": 1.9454465791264925, "learning_rate": 8.360487491982041e-07, "loss": 0.6797, "step": 69020 }, { "epoch": 0.8412245743604744, "grad_norm": 2.6167258862116203, "learning_rate": 8.357280307889673e-07, "loss": 0.7108, "step": 69025 }, { "epoch": 0.8412855105846221, "grad_norm": 2.291713718795671, "learning_rate": 8.354073123797307e-07, "loss": 0.7008, "step": 69030 }, { "epoch": 0.8413464468087699, "grad_norm": 2.4838972681016402, "learning_rate": 8.35086593970494e-07, "loss": 0.7751, "step": 69035 }, { "epoch": 0.8414073830329177, "grad_norm": 2.455723324732614, "learning_rate": 8.347658755612573e-07, "loss": 0.7234, "step": 69040 }, { "epoch": 0.8414683192570656, "grad_norm": 2.5277578853553258, "learning_rate": 8.344451571520205e-07, "loss": 0.7111, "step": 69045 }, { "epoch": 0.8415292554812134, "grad_norm": 2.87806581309349, "learning_rate": 8.34124438742784e-07, "loss": 0.7624, "step": 69050 }, { "epoch": 0.8415901917053612, "grad_norm": 2.621048328129328, "learning_rate": 8.338037203335472e-07, "loss": 0.7007, "step": 69055 }, { "epoch": 0.841651127929509, "grad_norm": 2.3329491515440686, "learning_rate": 8.334830019243105e-07, "loss": 0.7044, "step": 69060 }, { "epoch": 0.8417120641536567, "grad_norm": 2.153522882964694, "learning_rate": 8.331622835150737e-07, "loss": 0.7378, "step": 69065 }, { "epoch": 0.8417730003778046, "grad_norm": 3.1385928885187617, "learning_rate": 8.328415651058372e-07, "loss": 0.7697, "step": 69070 }, { "epoch": 0.8418339366019524, "grad_norm": 2.30377240159261, "learning_rate": 8.325208466966004e-07, "loss": 0.7026, "step": 69075 }, { "epoch": 0.8418948728261002, "grad_norm": 2.863794456005773, "learning_rate": 8.322001282873638e-07, "loss": 0.6671, "step": 69080 }, { "epoch": 0.841955809050248, "grad_norm": 2.472025341303988, "learning_rate": 8.31879409878127e-07, "loss": 0.7129, "step": 69085 }, { "epoch": 0.8420167452743959, "grad_norm": 1.7049887709121494, "learning_rate": 8.315586914688904e-07, "loss": 0.6327, "step": 69090 }, { "epoch": 0.8420776814985437, "grad_norm": 2.314606518919848, "learning_rate": 8.312379730596538e-07, "loss": 0.7266, "step": 69095 }, { "epoch": 0.8421386177226914, "grad_norm": 2.503934357291869, "learning_rate": 8.30917254650417e-07, "loss": 0.7429, "step": 69100 }, { "epoch": 0.8421995539468392, "grad_norm": 2.466347012420971, "learning_rate": 8.305965362411802e-07, "loss": 0.682, "step": 69105 }, { "epoch": 0.842260490170987, "grad_norm": 3.566841720696656, "learning_rate": 8.302758178319437e-07, "loss": 0.6448, "step": 69110 }, { "epoch": 0.8423214263951349, "grad_norm": 2.446039397856778, "learning_rate": 8.29955099422707e-07, "loss": 0.6501, "step": 69115 }, { "epoch": 0.8423823626192827, "grad_norm": 3.0121292309649044, "learning_rate": 8.296343810134702e-07, "loss": 0.7173, "step": 69120 }, { "epoch": 0.8424432988434305, "grad_norm": 2.3811236229859336, "learning_rate": 8.293136626042334e-07, "loss": 0.7093, "step": 69125 }, { "epoch": 0.8425042350675783, "grad_norm": 2.402428720270514, "learning_rate": 8.289929441949969e-07, "loss": 0.7152, "step": 69130 }, { "epoch": 0.842565171291726, "grad_norm": 2.64438783124761, "learning_rate": 8.286722257857602e-07, "loss": 0.7396, "step": 69135 }, { "epoch": 0.8426261075158739, "grad_norm": 2.547266998004949, "learning_rate": 8.283515073765234e-07, "loss": 0.6569, "step": 69140 }, { "epoch": 0.8426870437400217, "grad_norm": 2.157084347167501, "learning_rate": 8.280307889672867e-07, "loss": 0.6896, "step": 69145 }, { "epoch": 0.8427479799641695, "grad_norm": 2.424685518237245, "learning_rate": 8.277100705580501e-07, "loss": 0.6559, "step": 69150 }, { "epoch": 0.8428089161883173, "grad_norm": 2.238546075528217, "learning_rate": 8.273893521488134e-07, "loss": 0.7069, "step": 69155 }, { "epoch": 0.8428698524124651, "grad_norm": 3.3997957166066946, "learning_rate": 8.270686337395767e-07, "loss": 0.7637, "step": 69160 }, { "epoch": 0.842930788636613, "grad_norm": 2.3092363486546823, "learning_rate": 8.2674791533034e-07, "loss": 0.6848, "step": 69165 }, { "epoch": 0.8429917248607607, "grad_norm": 2.716184387495599, "learning_rate": 8.264271969211033e-07, "loss": 0.7257, "step": 69170 }, { "epoch": 0.8430526610849085, "grad_norm": 3.0246141608045685, "learning_rate": 8.261064785118667e-07, "loss": 0.7774, "step": 69175 }, { "epoch": 0.8431135973090563, "grad_norm": 2.1978581687560537, "learning_rate": 8.257857601026299e-07, "loss": 0.6394, "step": 69180 }, { "epoch": 0.8431745335332042, "grad_norm": 2.280220813553911, "learning_rate": 8.254650416933934e-07, "loss": 0.6442, "step": 69185 }, { "epoch": 0.843235469757352, "grad_norm": 2.7446130222489264, "learning_rate": 8.251443232841566e-07, "loss": 0.77, "step": 69190 }, { "epoch": 0.8432964059814998, "grad_norm": 2.6871758297457555, "learning_rate": 8.248236048749199e-07, "loss": 0.681, "step": 69195 }, { "epoch": 0.8433573422056476, "grad_norm": 2.2759669146277144, "learning_rate": 8.245028864656831e-07, "loss": 0.7404, "step": 69200 }, { "epoch": 0.8434182784297953, "grad_norm": 3.5462179838688757, "learning_rate": 8.241821680564466e-07, "loss": 0.7585, "step": 69205 }, { "epoch": 0.8434792146539432, "grad_norm": 2.9877629931003646, "learning_rate": 8.238614496472098e-07, "loss": 0.7002, "step": 69210 }, { "epoch": 0.843540150878091, "grad_norm": 2.7045609342392116, "learning_rate": 8.235407312379731e-07, "loss": 0.7194, "step": 69215 }, { "epoch": 0.8436010871022388, "grad_norm": 2.2917829328056665, "learning_rate": 8.232200128287364e-07, "loss": 0.7594, "step": 69220 }, { "epoch": 0.8436620233263866, "grad_norm": 2.1383098821800037, "learning_rate": 8.228992944194998e-07, "loss": 0.6808, "step": 69225 }, { "epoch": 0.8437229595505344, "grad_norm": 2.5837910757501943, "learning_rate": 8.22578576010263e-07, "loss": 0.735, "step": 69230 }, { "epoch": 0.8437838957746823, "grad_norm": 1.8149066115813317, "learning_rate": 8.222578576010264e-07, "loss": 0.7132, "step": 69235 }, { "epoch": 0.84384483199883, "grad_norm": 2.2866767252968145, "learning_rate": 8.219371391917896e-07, "loss": 0.6321, "step": 69240 }, { "epoch": 0.8439057682229778, "grad_norm": 2.7930136532105387, "learning_rate": 8.21616420782553e-07, "loss": 0.7291, "step": 69245 }, { "epoch": 0.8439667044471256, "grad_norm": 2.393189038836514, "learning_rate": 8.212957023733163e-07, "loss": 0.6498, "step": 69250 }, { "epoch": 0.8440276406712734, "grad_norm": 2.554736137241258, "learning_rate": 8.209749839640796e-07, "loss": 0.7015, "step": 69255 }, { "epoch": 0.8440885768954213, "grad_norm": 3.0959861486870324, "learning_rate": 8.206542655548428e-07, "loss": 0.716, "step": 69260 }, { "epoch": 0.8441495131195691, "grad_norm": 2.554007332118459, "learning_rate": 8.203335471456063e-07, "loss": 0.7403, "step": 69265 }, { "epoch": 0.8442104493437169, "grad_norm": 2.4193480006994665, "learning_rate": 8.200128287363696e-07, "loss": 0.7366, "step": 69270 }, { "epoch": 0.8442713855678646, "grad_norm": 2.3052865774907083, "learning_rate": 8.196921103271328e-07, "loss": 0.7543, "step": 69275 }, { "epoch": 0.8443323217920125, "grad_norm": 3.059002908321026, "learning_rate": 8.193713919178961e-07, "loss": 0.7406, "step": 69280 }, { "epoch": 0.8443932580161603, "grad_norm": 2.402386014586707, "learning_rate": 8.190506735086595e-07, "loss": 0.7585, "step": 69285 }, { "epoch": 0.8444541942403081, "grad_norm": 2.747225059990042, "learning_rate": 8.187299550994228e-07, "loss": 0.7223, "step": 69290 }, { "epoch": 0.8445151304644559, "grad_norm": 2.164219251273893, "learning_rate": 8.184092366901861e-07, "loss": 0.6496, "step": 69295 }, { "epoch": 0.8445760666886037, "grad_norm": 2.8761926261589306, "learning_rate": 8.180885182809493e-07, "loss": 0.7684, "step": 69300 }, { "epoch": 0.8446370029127516, "grad_norm": 2.5246189350384536, "learning_rate": 8.177677998717127e-07, "loss": 0.7497, "step": 69305 }, { "epoch": 0.8446979391368993, "grad_norm": 2.275572193392693, "learning_rate": 8.174470814624761e-07, "loss": 0.6931, "step": 69310 }, { "epoch": 0.8447588753610471, "grad_norm": 2.470669876882375, "learning_rate": 8.171263630532393e-07, "loss": 0.8336, "step": 69315 }, { "epoch": 0.8448198115851949, "grad_norm": 2.6289241111265382, "learning_rate": 8.168056446440025e-07, "loss": 0.6932, "step": 69320 }, { "epoch": 0.8448807478093427, "grad_norm": 2.3200173821172974, "learning_rate": 8.16484926234766e-07, "loss": 0.6897, "step": 69325 }, { "epoch": 0.8449416840334906, "grad_norm": 2.479812093674393, "learning_rate": 8.161642078255293e-07, "loss": 0.6403, "step": 69330 }, { "epoch": 0.8450026202576384, "grad_norm": 2.2877272893898284, "learning_rate": 8.158434894162925e-07, "loss": 0.7496, "step": 69335 }, { "epoch": 0.8450635564817862, "grad_norm": 2.1014327286257197, "learning_rate": 8.155227710070559e-07, "loss": 0.6942, "step": 69340 }, { "epoch": 0.8451244927059339, "grad_norm": 2.521345053827281, "learning_rate": 8.152020525978192e-07, "loss": 0.6663, "step": 69345 }, { "epoch": 0.8451854289300818, "grad_norm": 2.645502967344771, "learning_rate": 8.148813341885825e-07, "loss": 0.7553, "step": 69350 }, { "epoch": 0.8452463651542296, "grad_norm": 2.396625893608495, "learning_rate": 8.145606157793458e-07, "loss": 0.756, "step": 69355 }, { "epoch": 0.8453073013783774, "grad_norm": 2.019888845261268, "learning_rate": 8.142398973701091e-07, "loss": 0.7278, "step": 69360 }, { "epoch": 0.8453682376025252, "grad_norm": 3.911236544919196, "learning_rate": 8.139191789608724e-07, "loss": 0.6924, "step": 69365 }, { "epoch": 0.845429173826673, "grad_norm": 3.0538329544529352, "learning_rate": 8.135984605516358e-07, "loss": 0.7272, "step": 69370 }, { "epoch": 0.8454901100508209, "grad_norm": 3.0394377368774195, "learning_rate": 8.13277742142399e-07, "loss": 0.6985, "step": 69375 }, { "epoch": 0.8455510462749686, "grad_norm": 2.497349830171297, "learning_rate": 8.129570237331623e-07, "loss": 0.6963, "step": 69380 }, { "epoch": 0.8456119824991164, "grad_norm": 2.186043509988817, "learning_rate": 8.126363053239257e-07, "loss": 0.6439, "step": 69385 }, { "epoch": 0.8456729187232642, "grad_norm": 2.7558758852038494, "learning_rate": 8.12315586914689e-07, "loss": 0.6239, "step": 69390 }, { "epoch": 0.845733854947412, "grad_norm": 3.0792875442805134, "learning_rate": 8.119948685054522e-07, "loss": 0.7946, "step": 69395 }, { "epoch": 0.8457947911715599, "grad_norm": 2.5769461972639527, "learning_rate": 8.116741500962156e-07, "loss": 0.7812, "step": 69400 }, { "epoch": 0.8458557273957077, "grad_norm": 2.4212386614064623, "learning_rate": 8.113534316869789e-07, "loss": 0.7124, "step": 69405 }, { "epoch": 0.8459166636198555, "grad_norm": 2.319696824010445, "learning_rate": 8.110327132777422e-07, "loss": 0.7097, "step": 69410 }, { "epoch": 0.8459775998440032, "grad_norm": 2.149431159799512, "learning_rate": 8.107119948685055e-07, "loss": 0.6869, "step": 69415 }, { "epoch": 0.846038536068151, "grad_norm": 2.33302495307048, "learning_rate": 8.103912764592688e-07, "loss": 0.7405, "step": 69420 }, { "epoch": 0.8460994722922989, "grad_norm": 2.4938256477358376, "learning_rate": 8.100705580500321e-07, "loss": 0.7433, "step": 69425 }, { "epoch": 0.8461604085164467, "grad_norm": 2.295906850804997, "learning_rate": 8.097498396407955e-07, "loss": 0.7296, "step": 69430 }, { "epoch": 0.8462213447405945, "grad_norm": 2.527307788322963, "learning_rate": 8.094291212315587e-07, "loss": 0.7021, "step": 69435 }, { "epoch": 0.8462822809647423, "grad_norm": 2.161312828657728, "learning_rate": 8.09108402822322e-07, "loss": 0.6801, "step": 69440 }, { "epoch": 0.8463432171888902, "grad_norm": 2.7454292436294403, "learning_rate": 8.087876844130855e-07, "loss": 0.6854, "step": 69445 }, { "epoch": 0.8464041534130379, "grad_norm": 3.1325839496771564, "learning_rate": 8.084669660038487e-07, "loss": 0.6824, "step": 69450 }, { "epoch": 0.8464650896371857, "grad_norm": 2.570319200269122, "learning_rate": 8.081462475946119e-07, "loss": 0.7103, "step": 69455 }, { "epoch": 0.8465260258613335, "grad_norm": 2.636393220982342, "learning_rate": 8.078255291853753e-07, "loss": 0.7171, "step": 69460 }, { "epoch": 0.8465869620854813, "grad_norm": 2.471666693988169, "learning_rate": 8.075048107761387e-07, "loss": 0.6802, "step": 69465 }, { "epoch": 0.8466478983096292, "grad_norm": 2.082362793463948, "learning_rate": 8.071840923669019e-07, "loss": 0.6641, "step": 69470 }, { "epoch": 0.846708834533777, "grad_norm": 2.9993965993174934, "learning_rate": 8.068633739576652e-07, "loss": 0.7288, "step": 69475 }, { "epoch": 0.8467697707579248, "grad_norm": 2.2507283166381935, "learning_rate": 8.065426555484285e-07, "loss": 0.7103, "step": 69480 }, { "epoch": 0.8468307069820725, "grad_norm": 4.021330244341244, "learning_rate": 8.062219371391919e-07, "loss": 0.7138, "step": 69485 }, { "epoch": 0.8468916432062203, "grad_norm": 2.177715747489099, "learning_rate": 8.059012187299552e-07, "loss": 0.7511, "step": 69490 }, { "epoch": 0.8469525794303682, "grad_norm": 3.1934063584536694, "learning_rate": 8.055805003207184e-07, "loss": 0.7193, "step": 69495 }, { "epoch": 0.847013515654516, "grad_norm": 2.447834089160728, "learning_rate": 8.052597819114818e-07, "loss": 0.6727, "step": 69500 }, { "epoch": 0.8470744518786638, "grad_norm": 2.9421223077264838, "learning_rate": 8.049390635022452e-07, "loss": 0.6788, "step": 69505 }, { "epoch": 0.8471353881028116, "grad_norm": 2.6475629307631237, "learning_rate": 8.046183450930084e-07, "loss": 0.7222, "step": 69510 }, { "epoch": 0.8471963243269593, "grad_norm": 2.0834412965859954, "learning_rate": 8.042976266837717e-07, "loss": 0.6579, "step": 69515 }, { "epoch": 0.8472572605511072, "grad_norm": 2.399118422653714, "learning_rate": 8.039769082745351e-07, "loss": 0.7706, "step": 69520 }, { "epoch": 0.847318196775255, "grad_norm": 2.658320630885498, "learning_rate": 8.036561898652984e-07, "loss": 0.6213, "step": 69525 }, { "epoch": 0.8473791329994028, "grad_norm": 6.662176897112972, "learning_rate": 8.033354714560616e-07, "loss": 0.7352, "step": 69530 }, { "epoch": 0.8474400692235506, "grad_norm": 2.507107270515866, "learning_rate": 8.03014753046825e-07, "loss": 0.7918, "step": 69535 }, { "epoch": 0.8475010054476985, "grad_norm": 2.498889746770374, "learning_rate": 8.026940346375883e-07, "loss": 0.7543, "step": 69540 }, { "epoch": 0.8475619416718463, "grad_norm": 2.6822474349300265, "learning_rate": 8.023733162283516e-07, "loss": 0.6927, "step": 69545 }, { "epoch": 0.847622877895994, "grad_norm": 2.7153439518678835, "learning_rate": 8.020525978191149e-07, "loss": 0.7695, "step": 69550 }, { "epoch": 0.8476838141201418, "grad_norm": 2.264802710855302, "learning_rate": 8.017318794098782e-07, "loss": 0.7542, "step": 69555 }, { "epoch": 0.8477447503442896, "grad_norm": 2.4725169433661076, "learning_rate": 8.014111610006415e-07, "loss": 0.6581, "step": 69560 }, { "epoch": 0.8478056865684375, "grad_norm": 2.1827250761659664, "learning_rate": 8.010904425914049e-07, "loss": 0.7207, "step": 69565 }, { "epoch": 0.8478666227925853, "grad_norm": 2.9596124068036747, "learning_rate": 8.007697241821681e-07, "loss": 0.6787, "step": 69570 }, { "epoch": 0.8479275590167331, "grad_norm": 2.2288135263346143, "learning_rate": 8.004490057729314e-07, "loss": 0.6716, "step": 69575 }, { "epoch": 0.8479884952408809, "grad_norm": 3.0569394636843024, "learning_rate": 8.001282873636948e-07, "loss": 0.692, "step": 69580 }, { "epoch": 0.8480494314650286, "grad_norm": 2.3050312285064534, "learning_rate": 7.998075689544581e-07, "loss": 0.7015, "step": 69585 }, { "epoch": 0.8481103676891765, "grad_norm": 2.51500213802214, "learning_rate": 7.994868505452213e-07, "loss": 0.7045, "step": 69590 }, { "epoch": 0.8481713039133243, "grad_norm": 1.644102640394889, "learning_rate": 7.991661321359847e-07, "loss": 0.6442, "step": 69595 }, { "epoch": 0.8482322401374721, "grad_norm": 1.9924503400903508, "learning_rate": 7.98845413726748e-07, "loss": 0.7556, "step": 69600 }, { "epoch": 0.8482931763616199, "grad_norm": 2.118602924543552, "learning_rate": 7.985246953175113e-07, "loss": 0.6675, "step": 69605 }, { "epoch": 0.8483541125857678, "grad_norm": 2.9993550624117113, "learning_rate": 7.982039769082746e-07, "loss": 0.7416, "step": 69610 }, { "epoch": 0.8484150488099156, "grad_norm": 3.130335642555111, "learning_rate": 7.978832584990379e-07, "loss": 0.7475, "step": 69615 }, { "epoch": 0.8484759850340633, "grad_norm": 2.073944736770089, "learning_rate": 7.975625400898013e-07, "loss": 0.6848, "step": 69620 }, { "epoch": 0.8485369212582111, "grad_norm": 2.0299155551625194, "learning_rate": 7.972418216805646e-07, "loss": 0.6909, "step": 69625 }, { "epoch": 0.8485978574823589, "grad_norm": 2.116562187388217, "learning_rate": 7.969211032713278e-07, "loss": 0.7064, "step": 69630 }, { "epoch": 0.8486587937065068, "grad_norm": 2.992119048160773, "learning_rate": 7.966003848620911e-07, "loss": 0.7614, "step": 69635 }, { "epoch": 0.8487197299306546, "grad_norm": 2.9308252825036267, "learning_rate": 7.962796664528546e-07, "loss": 0.6983, "step": 69640 }, { "epoch": 0.8487806661548024, "grad_norm": 2.8575308556643493, "learning_rate": 7.959589480436178e-07, "loss": 0.7205, "step": 69645 }, { "epoch": 0.8488416023789502, "grad_norm": 5.134499622584162, "learning_rate": 7.95638229634381e-07, "loss": 0.7186, "step": 69650 }, { "epoch": 0.848902538603098, "grad_norm": 2.6856438458279106, "learning_rate": 7.953175112251443e-07, "loss": 0.7298, "step": 69655 }, { "epoch": 0.8489634748272458, "grad_norm": 2.526735624190644, "learning_rate": 7.949967928159078e-07, "loss": 0.7365, "step": 69660 }, { "epoch": 0.8490244110513936, "grad_norm": 2.3620883423765355, "learning_rate": 7.94676074406671e-07, "loss": 0.7557, "step": 69665 }, { "epoch": 0.8490853472755414, "grad_norm": 3.288036849274414, "learning_rate": 7.943553559974342e-07, "loss": 0.7504, "step": 69670 }, { "epoch": 0.8491462834996892, "grad_norm": 2.2397347392303786, "learning_rate": 7.940346375881976e-07, "loss": 0.7796, "step": 69675 }, { "epoch": 0.8492072197238371, "grad_norm": 2.5050406205540523, "learning_rate": 7.93713919178961e-07, "loss": 0.6776, "step": 69680 }, { "epoch": 0.8492681559479849, "grad_norm": 2.4324114585776266, "learning_rate": 7.933932007697243e-07, "loss": 0.7507, "step": 69685 }, { "epoch": 0.8493290921721326, "grad_norm": 2.835820437668577, "learning_rate": 7.930724823604876e-07, "loss": 0.8446, "step": 69690 }, { "epoch": 0.8493900283962804, "grad_norm": 3.491759674400122, "learning_rate": 7.927517639512508e-07, "loss": 0.6874, "step": 69695 }, { "epoch": 0.8494509646204282, "grad_norm": 3.344561643084615, "learning_rate": 7.924310455420143e-07, "loss": 0.7038, "step": 69700 }, { "epoch": 0.8495119008445761, "grad_norm": 3.4322622366708355, "learning_rate": 7.921103271327775e-07, "loss": 0.6667, "step": 69705 }, { "epoch": 0.8495728370687239, "grad_norm": 2.0140050568930676, "learning_rate": 7.917896087235408e-07, "loss": 0.6747, "step": 69710 }, { "epoch": 0.8496337732928717, "grad_norm": 2.6825301295843027, "learning_rate": 7.91468890314304e-07, "loss": 0.6819, "step": 69715 }, { "epoch": 0.8496947095170195, "grad_norm": 3.5623979061968676, "learning_rate": 7.911481719050675e-07, "loss": 0.7684, "step": 69720 }, { "epoch": 0.8497556457411672, "grad_norm": 2.216576491440463, "learning_rate": 7.908274534958307e-07, "loss": 0.7382, "step": 69725 }, { "epoch": 0.8498165819653151, "grad_norm": 2.635888822211797, "learning_rate": 7.90506735086594e-07, "loss": 0.7203, "step": 69730 }, { "epoch": 0.8498775181894629, "grad_norm": 2.3347389928029547, "learning_rate": 7.901860166773573e-07, "loss": 0.6565, "step": 69735 }, { "epoch": 0.8499384544136107, "grad_norm": 2.1510666468799173, "learning_rate": 7.898652982681207e-07, "loss": 0.6859, "step": 69740 }, { "epoch": 0.8499993906377585, "grad_norm": 2.2538138200477427, "learning_rate": 7.895445798588839e-07, "loss": 0.6828, "step": 69745 }, { "epoch": 0.8500603268619064, "grad_norm": 2.344532665060866, "learning_rate": 7.892238614496473e-07, "loss": 0.7204, "step": 69750 }, { "epoch": 0.8501212630860542, "grad_norm": 2.8244931571602043, "learning_rate": 7.889031430404105e-07, "loss": 0.8026, "step": 69755 }, { "epoch": 0.8501821993102019, "grad_norm": 2.602496471932544, "learning_rate": 7.88582424631174e-07, "loss": 0.7022, "step": 69760 }, { "epoch": 0.8502431355343497, "grad_norm": 2.7632918330069702, "learning_rate": 7.882617062219372e-07, "loss": 0.7499, "step": 69765 }, { "epoch": 0.8503040717584975, "grad_norm": 3.109128523334872, "learning_rate": 7.879409878127005e-07, "loss": 0.7032, "step": 69770 }, { "epoch": 0.8503650079826454, "grad_norm": 2.6555784450856796, "learning_rate": 7.876202694034637e-07, "loss": 0.7693, "step": 69775 }, { "epoch": 0.8504259442067932, "grad_norm": 2.8563671413077234, "learning_rate": 7.872995509942272e-07, "loss": 0.6902, "step": 69780 }, { "epoch": 0.850486880430941, "grad_norm": 2.5789460259583534, "learning_rate": 7.869788325849904e-07, "loss": 0.7869, "step": 69785 }, { "epoch": 0.8505478166550888, "grad_norm": 2.4099225514209617, "learning_rate": 7.866581141757537e-07, "loss": 0.7237, "step": 69790 }, { "epoch": 0.8506087528792365, "grad_norm": 2.230368673751701, "learning_rate": 7.863373957665171e-07, "loss": 0.735, "step": 69795 }, { "epoch": 0.8506696891033844, "grad_norm": 2.48422610347627, "learning_rate": 7.860166773572804e-07, "loss": 0.7275, "step": 69800 }, { "epoch": 0.8507306253275322, "grad_norm": 2.3551609320353144, "learning_rate": 7.856959589480436e-07, "loss": 0.7562, "step": 69805 }, { "epoch": 0.85079156155168, "grad_norm": 2.735935736332824, "learning_rate": 7.85375240538807e-07, "loss": 0.7046, "step": 69810 }, { "epoch": 0.8508524977758278, "grad_norm": 2.4079955383242573, "learning_rate": 7.850545221295704e-07, "loss": 0.6733, "step": 69815 }, { "epoch": 0.8509134339999757, "grad_norm": 2.1745540046842717, "learning_rate": 7.847338037203336e-07, "loss": 0.708, "step": 69820 }, { "epoch": 0.8509743702241235, "grad_norm": 2.327718203405709, "learning_rate": 7.844130853110969e-07, "loss": 0.7149, "step": 69825 }, { "epoch": 0.8510353064482712, "grad_norm": 2.356412429052598, "learning_rate": 7.840923669018602e-07, "loss": 0.7319, "step": 69830 }, { "epoch": 0.851096242672419, "grad_norm": 2.3806221536153633, "learning_rate": 7.837716484926236e-07, "loss": 0.6966, "step": 69835 }, { "epoch": 0.8511571788965668, "grad_norm": 2.649533074682495, "learning_rate": 7.834509300833869e-07, "loss": 0.6947, "step": 69840 }, { "epoch": 0.8512181151207147, "grad_norm": 2.112632707402404, "learning_rate": 7.831302116741501e-07, "loss": 0.898, "step": 69845 }, { "epoch": 0.8512790513448625, "grad_norm": 2.0572077871000394, "learning_rate": 7.828094932649134e-07, "loss": 0.7184, "step": 69850 }, { "epoch": 0.8513399875690103, "grad_norm": 2.1712193172844296, "learning_rate": 7.824887748556769e-07, "loss": 0.6973, "step": 69855 }, { "epoch": 0.8514009237931581, "grad_norm": 2.372019037403135, "learning_rate": 7.821680564464401e-07, "loss": 0.6846, "step": 69860 }, { "epoch": 0.8514618600173058, "grad_norm": 2.5323281949202596, "learning_rate": 7.818473380372034e-07, "loss": 0.7155, "step": 69865 }, { "epoch": 0.8515227962414537, "grad_norm": 3.5200042032559025, "learning_rate": 7.815266196279667e-07, "loss": 0.7208, "step": 69870 }, { "epoch": 0.8515837324656015, "grad_norm": 2.1371961845261342, "learning_rate": 7.812059012187301e-07, "loss": 0.6029, "step": 69875 }, { "epoch": 0.8516446686897493, "grad_norm": 2.5193811203674583, "learning_rate": 7.808851828094933e-07, "loss": 0.6436, "step": 69880 }, { "epoch": 0.8517056049138971, "grad_norm": 2.543881442794267, "learning_rate": 7.805644644002567e-07, "loss": 0.7705, "step": 69885 }, { "epoch": 0.851766541138045, "grad_norm": 2.792025044801583, "learning_rate": 7.802437459910199e-07, "loss": 0.6549, "step": 69890 }, { "epoch": 0.8518274773621928, "grad_norm": 3.3332598277457577, "learning_rate": 7.799230275817833e-07, "loss": 0.7096, "step": 69895 }, { "epoch": 0.8518884135863405, "grad_norm": 3.9391348651829294, "learning_rate": 7.796023091725466e-07, "loss": 0.6753, "step": 69900 }, { "epoch": 0.8519493498104883, "grad_norm": 4.541892216260448, "learning_rate": 7.792815907633099e-07, "loss": 0.6714, "step": 69905 }, { "epoch": 0.8520102860346361, "grad_norm": 2.578107374599531, "learning_rate": 7.789608723540731e-07, "loss": 0.7389, "step": 69910 }, { "epoch": 0.852071222258784, "grad_norm": 2.625216019487963, "learning_rate": 7.786401539448366e-07, "loss": 0.7458, "step": 69915 }, { "epoch": 0.8521321584829318, "grad_norm": 2.07352450526126, "learning_rate": 7.783194355355998e-07, "loss": 0.7231, "step": 69920 }, { "epoch": 0.8521930947070796, "grad_norm": 2.310453608893669, "learning_rate": 7.779987171263631e-07, "loss": 0.6448, "step": 69925 }, { "epoch": 0.8522540309312274, "grad_norm": 2.677111986099444, "learning_rate": 7.776779987171264e-07, "loss": 0.7104, "step": 69930 }, { "epoch": 0.8523149671553751, "grad_norm": 2.7260160232153483, "learning_rate": 7.773572803078898e-07, "loss": 0.7886, "step": 69935 }, { "epoch": 0.852375903379523, "grad_norm": 2.3873064356360247, "learning_rate": 7.77036561898653e-07, "loss": 0.7535, "step": 69940 }, { "epoch": 0.8524368396036708, "grad_norm": 3.4057112929511146, "learning_rate": 7.767158434894164e-07, "loss": 0.6665, "step": 69945 }, { "epoch": 0.8524977758278186, "grad_norm": 2.5308695620167225, "learning_rate": 7.763951250801796e-07, "loss": 0.6642, "step": 69950 }, { "epoch": 0.8525587120519664, "grad_norm": 2.6078861819514523, "learning_rate": 7.76074406670943e-07, "loss": 0.7084, "step": 69955 }, { "epoch": 0.8526196482761143, "grad_norm": 2.456641636841377, "learning_rate": 7.757536882617063e-07, "loss": 0.7473, "step": 69960 }, { "epoch": 0.8526805845002621, "grad_norm": 2.365866903166141, "learning_rate": 7.754329698524696e-07, "loss": 0.736, "step": 69965 }, { "epoch": 0.8527415207244098, "grad_norm": 2.6352665767077004, "learning_rate": 7.751122514432328e-07, "loss": 0.7703, "step": 69970 }, { "epoch": 0.8528024569485576, "grad_norm": 1.821769557717171, "learning_rate": 7.747915330339963e-07, "loss": 0.7358, "step": 69975 }, { "epoch": 0.8528633931727054, "grad_norm": 3.179393032106632, "learning_rate": 7.744708146247595e-07, "loss": 0.6799, "step": 69980 }, { "epoch": 0.8529243293968533, "grad_norm": 2.490493892080416, "learning_rate": 7.741500962155228e-07, "loss": 0.7669, "step": 69985 }, { "epoch": 0.8529852656210011, "grad_norm": 2.504348266751505, "learning_rate": 7.738293778062861e-07, "loss": 0.6898, "step": 69990 }, { "epoch": 0.8530462018451489, "grad_norm": 2.8960773299853018, "learning_rate": 7.735086593970495e-07, "loss": 0.7106, "step": 69995 }, { "epoch": 0.8531071380692967, "grad_norm": 2.4453411247979377, "learning_rate": 7.731879409878127e-07, "loss": 0.7758, "step": 70000 }, { "epoch": 0.8531680742934444, "grad_norm": 2.44627602463711, "learning_rate": 7.728672225785761e-07, "loss": 0.7444, "step": 70005 }, { "epoch": 0.8532290105175923, "grad_norm": 2.1502230512445455, "learning_rate": 7.725465041693393e-07, "loss": 0.6786, "step": 70010 }, { "epoch": 0.8532899467417401, "grad_norm": 3.495590346658272, "learning_rate": 7.722257857601027e-07, "loss": 0.7111, "step": 70015 }, { "epoch": 0.8533508829658879, "grad_norm": 3.8329399777725737, "learning_rate": 7.71905067350866e-07, "loss": 0.82, "step": 70020 }, { "epoch": 0.8534118191900357, "grad_norm": 2.4161728455151668, "learning_rate": 7.715843489416293e-07, "loss": 0.664, "step": 70025 }, { "epoch": 0.8534727554141835, "grad_norm": 3.6474027181027227, "learning_rate": 7.712636305323925e-07, "loss": 0.6679, "step": 70030 }, { "epoch": 0.8535336916383314, "grad_norm": 2.4396717142917885, "learning_rate": 7.70942912123156e-07, "loss": 0.7369, "step": 70035 }, { "epoch": 0.8535946278624791, "grad_norm": 2.2543219590809747, "learning_rate": 7.706221937139193e-07, "loss": 0.6979, "step": 70040 }, { "epoch": 0.8536555640866269, "grad_norm": 2.205888579219953, "learning_rate": 7.703014753046825e-07, "loss": 0.7289, "step": 70045 }, { "epoch": 0.8537165003107747, "grad_norm": 2.6197576854208626, "learning_rate": 7.699807568954458e-07, "loss": 0.6573, "step": 70050 }, { "epoch": 0.8537774365349226, "grad_norm": 2.409205368482219, "learning_rate": 7.696600384862092e-07, "loss": 0.7674, "step": 70055 }, { "epoch": 0.8538383727590704, "grad_norm": 2.452029336242238, "learning_rate": 7.693393200769725e-07, "loss": 0.6859, "step": 70060 }, { "epoch": 0.8538993089832182, "grad_norm": 2.5116955852992855, "learning_rate": 7.690186016677358e-07, "loss": 0.7153, "step": 70065 }, { "epoch": 0.853960245207366, "grad_norm": 2.419676219340091, "learning_rate": 7.68697883258499e-07, "loss": 0.7815, "step": 70070 }, { "epoch": 0.8540211814315137, "grad_norm": 2.2381881647166675, "learning_rate": 7.683771648492624e-07, "loss": 0.7079, "step": 70075 }, { "epoch": 0.8540821176556616, "grad_norm": 2.2228672650020638, "learning_rate": 7.680564464400258e-07, "loss": 0.7391, "step": 70080 }, { "epoch": 0.8541430538798094, "grad_norm": 2.8466244953308446, "learning_rate": 7.67735728030789e-07, "loss": 0.7842, "step": 70085 }, { "epoch": 0.8542039901039572, "grad_norm": 2.4047593330474424, "learning_rate": 7.674150096215524e-07, "loss": 0.6914, "step": 70090 }, { "epoch": 0.854264926328105, "grad_norm": 2.5139267815597215, "learning_rate": 7.670942912123157e-07, "loss": 0.747, "step": 70095 }, { "epoch": 0.8543258625522528, "grad_norm": 2.1445595592271216, "learning_rate": 7.66773572803079e-07, "loss": 0.6628, "step": 70100 }, { "epoch": 0.8543867987764007, "grad_norm": 2.388012439975601, "learning_rate": 7.664528543938422e-07, "loss": 0.6867, "step": 70105 }, { "epoch": 0.8544477350005484, "grad_norm": 2.2916326215365705, "learning_rate": 7.661321359846057e-07, "loss": 0.7375, "step": 70110 }, { "epoch": 0.8545086712246962, "grad_norm": 2.326965087785584, "learning_rate": 7.658114175753689e-07, "loss": 0.706, "step": 70115 }, { "epoch": 0.854569607448844, "grad_norm": 2.4831008679923303, "learning_rate": 7.654906991661322e-07, "loss": 0.7685, "step": 70120 }, { "epoch": 0.8546305436729918, "grad_norm": 3.3692875669244473, "learning_rate": 7.651699807568955e-07, "loss": 0.7289, "step": 70125 }, { "epoch": 0.8546914798971397, "grad_norm": 3.0430858413573385, "learning_rate": 7.648492623476589e-07, "loss": 0.6482, "step": 70130 }, { "epoch": 0.8547524161212875, "grad_norm": 2.360738775985557, "learning_rate": 7.645285439384221e-07, "loss": 0.7442, "step": 70135 }, { "epoch": 0.8548133523454353, "grad_norm": 2.6088938709193736, "learning_rate": 7.642078255291855e-07, "loss": 0.7666, "step": 70140 }, { "epoch": 0.854874288569583, "grad_norm": 2.283195721864601, "learning_rate": 7.638871071199487e-07, "loss": 0.7042, "step": 70145 }, { "epoch": 0.8549352247937309, "grad_norm": 2.3023794141409106, "learning_rate": 7.635663887107121e-07, "loss": 0.6821, "step": 70150 }, { "epoch": 0.8549961610178787, "grad_norm": 2.6896026277405216, "learning_rate": 7.632456703014754e-07, "loss": 0.7244, "step": 70155 }, { "epoch": 0.8550570972420265, "grad_norm": 2.7134080315479574, "learning_rate": 7.629249518922387e-07, "loss": 0.7007, "step": 70160 }, { "epoch": 0.8551180334661743, "grad_norm": 2.628373853577708, "learning_rate": 7.626042334830019e-07, "loss": 0.7149, "step": 70165 }, { "epoch": 0.8551789696903221, "grad_norm": 2.5074055105397353, "learning_rate": 7.622835150737654e-07, "loss": 0.7378, "step": 70170 }, { "epoch": 0.85523990591447, "grad_norm": 2.432843095508229, "learning_rate": 7.619627966645286e-07, "loss": 0.6756, "step": 70175 }, { "epoch": 0.8553008421386177, "grad_norm": 2.7546903038221275, "learning_rate": 7.616420782552919e-07, "loss": 0.759, "step": 70180 }, { "epoch": 0.8553617783627655, "grad_norm": 2.4748489359595784, "learning_rate": 7.613213598460552e-07, "loss": 0.8227, "step": 70185 }, { "epoch": 0.8554227145869133, "grad_norm": 2.516308278509504, "learning_rate": 7.610006414368186e-07, "loss": 0.7638, "step": 70190 }, { "epoch": 0.8554836508110611, "grad_norm": 2.4702955013071564, "learning_rate": 7.606799230275818e-07, "loss": 0.7581, "step": 70195 }, { "epoch": 0.855544587035209, "grad_norm": 3.1622206100617505, "learning_rate": 7.603592046183452e-07, "loss": 0.6918, "step": 70200 }, { "epoch": 0.8556055232593568, "grad_norm": 2.2233005914965385, "learning_rate": 7.600384862091084e-07, "loss": 0.7013, "step": 70205 }, { "epoch": 0.8556664594835046, "grad_norm": 2.426662884710349, "learning_rate": 7.597177677998718e-07, "loss": 0.6799, "step": 70210 }, { "epoch": 0.8557273957076523, "grad_norm": 2.160480997414614, "learning_rate": 7.593970493906352e-07, "loss": 0.7081, "step": 70215 }, { "epoch": 0.8557883319318002, "grad_norm": 2.4842294798226052, "learning_rate": 7.590763309813984e-07, "loss": 0.7469, "step": 70220 }, { "epoch": 0.855849268155948, "grad_norm": 2.7509715837335533, "learning_rate": 7.587556125721616e-07, "loss": 0.7121, "step": 70225 }, { "epoch": 0.8559102043800958, "grad_norm": 2.4358787308583967, "learning_rate": 7.584348941629251e-07, "loss": 0.7375, "step": 70230 }, { "epoch": 0.8559711406042436, "grad_norm": 2.978145864532266, "learning_rate": 7.581141757536884e-07, "loss": 0.7578, "step": 70235 }, { "epoch": 0.8560320768283914, "grad_norm": 2.126812207392727, "learning_rate": 7.577934573444516e-07, "loss": 0.7022, "step": 70240 }, { "epoch": 0.8560930130525393, "grad_norm": 2.694250390151618, "learning_rate": 7.574727389352148e-07, "loss": 0.7475, "step": 70245 }, { "epoch": 0.856153949276687, "grad_norm": 2.74792885716449, "learning_rate": 7.571520205259783e-07, "loss": 0.7852, "step": 70250 }, { "epoch": 0.8562148855008348, "grad_norm": 2.3912797436558013, "learning_rate": 7.568313021167416e-07, "loss": 0.7128, "step": 70255 }, { "epoch": 0.8562758217249826, "grad_norm": 2.5712885987287146, "learning_rate": 7.565105837075048e-07, "loss": 0.7182, "step": 70260 }, { "epoch": 0.8563367579491304, "grad_norm": 2.493054769140365, "learning_rate": 7.561898652982681e-07, "loss": 0.6747, "step": 70265 }, { "epoch": 0.8563976941732783, "grad_norm": 2.768456540773658, "learning_rate": 7.558691468890315e-07, "loss": 0.7584, "step": 70270 }, { "epoch": 0.8564586303974261, "grad_norm": 2.576329088488017, "learning_rate": 7.555484284797949e-07, "loss": 0.7012, "step": 70275 }, { "epoch": 0.8565195666215739, "grad_norm": 2.2301089082853047, "learning_rate": 7.552277100705581e-07, "loss": 0.764, "step": 70280 }, { "epoch": 0.8565805028457216, "grad_norm": 2.9695427873206017, "learning_rate": 7.549069916613214e-07, "loss": 0.7654, "step": 70285 }, { "epoch": 0.8566414390698694, "grad_norm": 3.492716031675098, "learning_rate": 7.545862732520847e-07, "loss": 0.6865, "step": 70290 }, { "epoch": 0.8567023752940173, "grad_norm": 2.3086639552516277, "learning_rate": 7.542655548428481e-07, "loss": 0.7499, "step": 70295 }, { "epoch": 0.8567633115181651, "grad_norm": 2.600953113215381, "learning_rate": 7.539448364336113e-07, "loss": 0.7422, "step": 70300 }, { "epoch": 0.8568242477423129, "grad_norm": 2.5007100257562382, "learning_rate": 7.536241180243746e-07, "loss": 0.6439, "step": 70305 }, { "epoch": 0.8568851839664607, "grad_norm": 2.1384700399811156, "learning_rate": 7.53303399615138e-07, "loss": 0.6965, "step": 70310 }, { "epoch": 0.8569461201906086, "grad_norm": 2.047307028841255, "learning_rate": 7.529826812059013e-07, "loss": 0.7169, "step": 70315 }, { "epoch": 0.8570070564147563, "grad_norm": 2.560797256490232, "learning_rate": 7.526619627966645e-07, "loss": 0.7175, "step": 70320 }, { "epoch": 0.8570679926389041, "grad_norm": 3.4128753892343173, "learning_rate": 7.523412443874279e-07, "loss": 0.754, "step": 70325 }, { "epoch": 0.8571289288630519, "grad_norm": 3.47550446819376, "learning_rate": 7.520205259781912e-07, "loss": 0.6952, "step": 70330 }, { "epoch": 0.8571898650871997, "grad_norm": 2.222035479939978, "learning_rate": 7.516998075689545e-07, "loss": 0.6742, "step": 70335 }, { "epoch": 0.8572508013113476, "grad_norm": 3.54833668124281, "learning_rate": 7.513790891597178e-07, "loss": 0.8518, "step": 70340 }, { "epoch": 0.8573117375354954, "grad_norm": 2.5058349478988666, "learning_rate": 7.510583707504811e-07, "loss": 0.6972, "step": 70345 }, { "epoch": 0.8573726737596432, "grad_norm": 2.6086886786713426, "learning_rate": 7.507376523412444e-07, "loss": 0.7883, "step": 70350 }, { "epoch": 0.8574336099837909, "grad_norm": 2.520337057249807, "learning_rate": 7.504169339320078e-07, "loss": 0.6751, "step": 70355 }, { "epoch": 0.8574945462079387, "grad_norm": 2.3382627688240563, "learning_rate": 7.50096215522771e-07, "loss": 0.6862, "step": 70360 }, { "epoch": 0.8575554824320866, "grad_norm": 2.0304505403404196, "learning_rate": 7.497754971135343e-07, "loss": 0.6824, "step": 70365 }, { "epoch": 0.8576164186562344, "grad_norm": 2.611109230600328, "learning_rate": 7.494547787042977e-07, "loss": 0.6488, "step": 70370 }, { "epoch": 0.8576773548803822, "grad_norm": 3.0971963937186593, "learning_rate": 7.49134060295061e-07, "loss": 0.6706, "step": 70375 }, { "epoch": 0.85773829110453, "grad_norm": 2.3569443748921395, "learning_rate": 7.488133418858242e-07, "loss": 0.7072, "step": 70380 }, { "epoch": 0.8577992273286779, "grad_norm": 2.6582226267786795, "learning_rate": 7.484926234765876e-07, "loss": 0.6784, "step": 70385 }, { "epoch": 0.8578601635528256, "grad_norm": 2.9594278161878598, "learning_rate": 7.48171905067351e-07, "loss": 0.7153, "step": 70390 }, { "epoch": 0.8579210997769734, "grad_norm": 2.2601442748543676, "learning_rate": 7.478511866581142e-07, "loss": 0.8147, "step": 70395 }, { "epoch": 0.8579820360011212, "grad_norm": 3.047169176839946, "learning_rate": 7.475304682488775e-07, "loss": 0.7295, "step": 70400 }, { "epoch": 0.858042972225269, "grad_norm": 2.4015020993731513, "learning_rate": 7.472097498396409e-07, "loss": 0.6981, "step": 70405 }, { "epoch": 0.8581039084494169, "grad_norm": 4.206071784135467, "learning_rate": 7.468890314304042e-07, "loss": 0.6713, "step": 70410 }, { "epoch": 0.8581648446735647, "grad_norm": 2.2136851814220364, "learning_rate": 7.465683130211675e-07, "loss": 0.7168, "step": 70415 }, { "epoch": 0.8582257808977125, "grad_norm": 2.3566346922929036, "learning_rate": 7.462475946119307e-07, "loss": 0.7175, "step": 70420 }, { "epoch": 0.8582867171218602, "grad_norm": 2.271403131778132, "learning_rate": 7.459268762026941e-07, "loss": 0.7665, "step": 70425 }, { "epoch": 0.858347653346008, "grad_norm": 2.509469514006412, "learning_rate": 7.456061577934575e-07, "loss": 0.7181, "step": 70430 }, { "epoch": 0.8584085895701559, "grad_norm": 2.8062781486383845, "learning_rate": 7.452854393842207e-07, "loss": 0.77, "step": 70435 }, { "epoch": 0.8584695257943037, "grad_norm": 2.440431063808936, "learning_rate": 7.449647209749839e-07, "loss": 0.6626, "step": 70440 }, { "epoch": 0.8585304620184515, "grad_norm": 3.038127665459619, "learning_rate": 7.446440025657474e-07, "loss": 0.6873, "step": 70445 }, { "epoch": 0.8585913982425993, "grad_norm": 2.857808597762591, "learning_rate": 7.443232841565107e-07, "loss": 0.6728, "step": 70450 }, { "epoch": 0.858652334466747, "grad_norm": 2.263691262659779, "learning_rate": 7.440025657472739e-07, "loss": 0.6483, "step": 70455 }, { "epoch": 0.8587132706908949, "grad_norm": 2.228587297452798, "learning_rate": 7.436818473380373e-07, "loss": 0.7222, "step": 70460 }, { "epoch": 0.8587742069150427, "grad_norm": 2.74638990874622, "learning_rate": 7.433611289288006e-07, "loss": 0.6608, "step": 70465 }, { "epoch": 0.8588351431391905, "grad_norm": 2.5563554702302533, "learning_rate": 7.430404105195639e-07, "loss": 0.7789, "step": 70470 }, { "epoch": 0.8588960793633383, "grad_norm": 2.843945493299573, "learning_rate": 7.427196921103272e-07, "loss": 0.6755, "step": 70475 }, { "epoch": 0.8589570155874862, "grad_norm": 2.201788376131899, "learning_rate": 7.423989737010905e-07, "loss": 0.7129, "step": 70480 }, { "epoch": 0.859017951811634, "grad_norm": 2.6181515028026503, "learning_rate": 7.420782552918538e-07, "loss": 0.6966, "step": 70485 }, { "epoch": 0.8590788880357817, "grad_norm": 2.729255505617527, "learning_rate": 7.417575368826172e-07, "loss": 0.7517, "step": 70490 }, { "epoch": 0.8591398242599295, "grad_norm": 2.7892419478981783, "learning_rate": 7.414368184733804e-07, "loss": 0.7798, "step": 70495 }, { "epoch": 0.8592007604840773, "grad_norm": 2.528175048869269, "learning_rate": 7.411161000641437e-07, "loss": 0.7062, "step": 70500 }, { "epoch": 0.8592616967082252, "grad_norm": 2.362749899969486, "learning_rate": 7.407953816549071e-07, "loss": 0.7445, "step": 70505 }, { "epoch": 0.859322632932373, "grad_norm": 3.0709833110542166, "learning_rate": 7.404746632456704e-07, "loss": 0.7322, "step": 70510 }, { "epoch": 0.8593835691565208, "grad_norm": 2.3442522190995487, "learning_rate": 7.401539448364336e-07, "loss": 0.6473, "step": 70515 }, { "epoch": 0.8594445053806686, "grad_norm": 2.861092577443972, "learning_rate": 7.39833226427197e-07, "loss": 0.7296, "step": 70520 }, { "epoch": 0.8595054416048163, "grad_norm": 2.1586433477822644, "learning_rate": 7.395125080179603e-07, "loss": 0.7385, "step": 70525 }, { "epoch": 0.8595663778289642, "grad_norm": 2.8407931323701483, "learning_rate": 7.391917896087236e-07, "loss": 0.7318, "step": 70530 }, { "epoch": 0.859627314053112, "grad_norm": 2.03615069044869, "learning_rate": 7.388710711994869e-07, "loss": 0.7007, "step": 70535 }, { "epoch": 0.8596882502772598, "grad_norm": 2.361464055293893, "learning_rate": 7.385503527902502e-07, "loss": 0.7388, "step": 70540 }, { "epoch": 0.8597491865014076, "grad_norm": 2.3850035136436145, "learning_rate": 7.382296343810135e-07, "loss": 0.7402, "step": 70545 }, { "epoch": 0.8598101227255555, "grad_norm": 2.7099610930096545, "learning_rate": 7.379089159717769e-07, "loss": 0.7106, "step": 70550 }, { "epoch": 0.8598710589497033, "grad_norm": 2.045292222950289, "learning_rate": 7.375881975625401e-07, "loss": 0.6738, "step": 70555 }, { "epoch": 0.859931995173851, "grad_norm": 2.3896444600105187, "learning_rate": 7.372674791533034e-07, "loss": 0.6623, "step": 70560 }, { "epoch": 0.8599929313979988, "grad_norm": 1.9129742267483498, "learning_rate": 7.369467607440669e-07, "loss": 0.6749, "step": 70565 }, { "epoch": 0.8600538676221466, "grad_norm": 1.9120437482445585, "learning_rate": 7.366260423348301e-07, "loss": 0.7565, "step": 70570 }, { "epoch": 0.8601148038462945, "grad_norm": 2.232583276247904, "learning_rate": 7.363053239255933e-07, "loss": 0.6963, "step": 70575 }, { "epoch": 0.8601757400704423, "grad_norm": 2.6673099714583626, "learning_rate": 7.359846055163567e-07, "loss": 0.7227, "step": 70580 }, { "epoch": 0.8602366762945901, "grad_norm": 2.2824816577261515, "learning_rate": 7.356638871071201e-07, "loss": 0.7646, "step": 70585 }, { "epoch": 0.8602976125187379, "grad_norm": 2.6141045854342644, "learning_rate": 7.353431686978833e-07, "loss": 0.7038, "step": 70590 }, { "epoch": 0.8603585487428856, "grad_norm": 2.170090242773011, "learning_rate": 7.350224502886466e-07, "loss": 0.7094, "step": 70595 }, { "epoch": 0.8604194849670335, "grad_norm": 2.796573978751406, "learning_rate": 7.347017318794099e-07, "loss": 0.7396, "step": 70600 }, { "epoch": 0.8604804211911813, "grad_norm": 2.4223442688498498, "learning_rate": 7.343810134701733e-07, "loss": 0.756, "step": 70605 }, { "epoch": 0.8605413574153291, "grad_norm": 2.7855172100075922, "learning_rate": 7.340602950609366e-07, "loss": 0.7234, "step": 70610 }, { "epoch": 0.8606022936394769, "grad_norm": 2.530478345055044, "learning_rate": 7.337395766516998e-07, "loss": 0.712, "step": 70615 }, { "epoch": 0.8606632298636248, "grad_norm": 3.027482210112803, "learning_rate": 7.334188582424631e-07, "loss": 0.7736, "step": 70620 }, { "epoch": 0.8607241660877726, "grad_norm": 2.4673680148974277, "learning_rate": 7.330981398332266e-07, "loss": 0.7, "step": 70625 }, { "epoch": 0.8607851023119203, "grad_norm": 2.5663849935402214, "learning_rate": 7.327774214239898e-07, "loss": 0.7358, "step": 70630 }, { "epoch": 0.8608460385360681, "grad_norm": 4.094667810931531, "learning_rate": 7.324567030147531e-07, "loss": 0.661, "step": 70635 }, { "epoch": 0.8609069747602159, "grad_norm": 3.5823820281298935, "learning_rate": 7.321359846055164e-07, "loss": 0.6397, "step": 70640 }, { "epoch": 0.8609679109843638, "grad_norm": 2.2500217546985612, "learning_rate": 7.318152661962798e-07, "loss": 0.6934, "step": 70645 }, { "epoch": 0.8610288472085116, "grad_norm": 1.9655874255821315, "learning_rate": 7.31494547787043e-07, "loss": 0.7626, "step": 70650 }, { "epoch": 0.8610897834326594, "grad_norm": 2.580505978710967, "learning_rate": 7.311738293778064e-07, "loss": 0.7476, "step": 70655 }, { "epoch": 0.8611507196568072, "grad_norm": 2.250463657831586, "learning_rate": 7.308531109685696e-07, "loss": 0.6759, "step": 70660 }, { "epoch": 0.8612116558809549, "grad_norm": 2.133090898074865, "learning_rate": 7.30532392559333e-07, "loss": 0.657, "step": 70665 }, { "epoch": 0.8612725921051028, "grad_norm": 2.2109065111236563, "learning_rate": 7.302116741500963e-07, "loss": 0.7319, "step": 70670 }, { "epoch": 0.8613335283292506, "grad_norm": 2.0177773038386717, "learning_rate": 7.298909557408596e-07, "loss": 0.6241, "step": 70675 }, { "epoch": 0.8613944645533984, "grad_norm": 2.307522542615936, "learning_rate": 7.295702373316228e-07, "loss": 0.6432, "step": 70680 }, { "epoch": 0.8614554007775462, "grad_norm": 2.1855396472258746, "learning_rate": 7.292495189223863e-07, "loss": 0.7139, "step": 70685 }, { "epoch": 0.861516337001694, "grad_norm": 2.8841017862920144, "learning_rate": 7.289288005131495e-07, "loss": 0.7316, "step": 70690 }, { "epoch": 0.8615772732258419, "grad_norm": 2.539952813809296, "learning_rate": 7.286080821039128e-07, "loss": 0.719, "step": 70695 }, { "epoch": 0.8616382094499896, "grad_norm": 3.51594220780105, "learning_rate": 7.282873636946762e-07, "loss": 0.7126, "step": 70700 }, { "epoch": 0.8616991456741374, "grad_norm": 2.4237065889816565, "learning_rate": 7.279666452854395e-07, "loss": 0.7747, "step": 70705 }, { "epoch": 0.8617600818982852, "grad_norm": 2.006773143727734, "learning_rate": 7.276459268762027e-07, "loss": 0.6395, "step": 70710 }, { "epoch": 0.8618210181224331, "grad_norm": 2.283054086512976, "learning_rate": 7.273252084669661e-07, "loss": 0.7252, "step": 70715 }, { "epoch": 0.8618819543465809, "grad_norm": 3.251363181715118, "learning_rate": 7.270044900577294e-07, "loss": 0.7512, "step": 70720 }, { "epoch": 0.8619428905707287, "grad_norm": 2.3912879285223756, "learning_rate": 7.266837716484927e-07, "loss": 0.7385, "step": 70725 }, { "epoch": 0.8620038267948765, "grad_norm": 2.8297008321680366, "learning_rate": 7.26363053239256e-07, "loss": 0.6747, "step": 70730 }, { "epoch": 0.8620647630190242, "grad_norm": 2.5965576204833414, "learning_rate": 7.260423348300193e-07, "loss": 0.702, "step": 70735 }, { "epoch": 0.8621256992431721, "grad_norm": 2.666248459667338, "learning_rate": 7.257216164207826e-07, "loss": 0.7497, "step": 70740 }, { "epoch": 0.8621866354673199, "grad_norm": 5.471360119449706, "learning_rate": 7.25400898011546e-07, "loss": 0.8121, "step": 70745 }, { "epoch": 0.8622475716914677, "grad_norm": 2.1994486357026783, "learning_rate": 7.250801796023092e-07, "loss": 0.7533, "step": 70750 }, { "epoch": 0.8623085079156155, "grad_norm": 2.4988670692208355, "learning_rate": 7.247594611930725e-07, "loss": 0.6686, "step": 70755 }, { "epoch": 0.8623694441397634, "grad_norm": 2.834765115479082, "learning_rate": 7.24438742783836e-07, "loss": 0.7348, "step": 70760 }, { "epoch": 0.8624303803639112, "grad_norm": 2.351779406617705, "learning_rate": 7.241180243745992e-07, "loss": 0.7537, "step": 70765 }, { "epoch": 0.8624913165880589, "grad_norm": 2.428399276968222, "learning_rate": 7.237973059653624e-07, "loss": 0.7033, "step": 70770 }, { "epoch": 0.8625522528122067, "grad_norm": 3.344419849776435, "learning_rate": 7.234765875561258e-07, "loss": 0.7998, "step": 70775 }, { "epoch": 0.8626131890363545, "grad_norm": 2.59131147556671, "learning_rate": 7.231558691468892e-07, "loss": 0.7048, "step": 70780 }, { "epoch": 0.8626741252605024, "grad_norm": 2.5895020169952407, "learning_rate": 7.228351507376524e-07, "loss": 0.6399, "step": 70785 }, { "epoch": 0.8627350614846502, "grad_norm": 2.2228881109460463, "learning_rate": 7.225144323284157e-07, "loss": 0.6786, "step": 70790 }, { "epoch": 0.862795997708798, "grad_norm": 2.6156212202241025, "learning_rate": 7.22193713919179e-07, "loss": 0.7771, "step": 70795 }, { "epoch": 0.8628569339329458, "grad_norm": 2.6139963012627834, "learning_rate": 7.218729955099424e-07, "loss": 0.6866, "step": 70800 }, { "epoch": 0.8629178701570935, "grad_norm": 2.307725779915422, "learning_rate": 7.215522771007057e-07, "loss": 0.7031, "step": 70805 }, { "epoch": 0.8629788063812414, "grad_norm": 2.4149279165065076, "learning_rate": 7.21231558691469e-07, "loss": 0.6936, "step": 70810 }, { "epoch": 0.8630397426053892, "grad_norm": 3.048212257520086, "learning_rate": 7.209108402822322e-07, "loss": 0.7472, "step": 70815 }, { "epoch": 0.863100678829537, "grad_norm": 2.4710511624560776, "learning_rate": 7.205901218729957e-07, "loss": 0.6988, "step": 70820 }, { "epoch": 0.8631616150536848, "grad_norm": 3.665474500110321, "learning_rate": 7.202694034637589e-07, "loss": 0.7167, "step": 70825 }, { "epoch": 0.8632225512778327, "grad_norm": 2.270822917469751, "learning_rate": 7.199486850545222e-07, "loss": 0.7468, "step": 70830 }, { "epoch": 0.8632834875019805, "grad_norm": 1.9775246071739165, "learning_rate": 7.196279666452854e-07, "loss": 0.7119, "step": 70835 }, { "epoch": 0.8633444237261282, "grad_norm": 3.029617093289065, "learning_rate": 7.193072482360489e-07, "loss": 0.6996, "step": 70840 }, { "epoch": 0.863405359950276, "grad_norm": 2.901546513287231, "learning_rate": 7.189865298268121e-07, "loss": 0.7736, "step": 70845 }, { "epoch": 0.8634662961744238, "grad_norm": 2.2972747680107424, "learning_rate": 7.186658114175755e-07, "loss": 0.6781, "step": 70850 }, { "epoch": 0.8635272323985717, "grad_norm": 3.308961501261841, "learning_rate": 7.183450930083387e-07, "loss": 0.7048, "step": 70855 }, { "epoch": 0.8635881686227195, "grad_norm": 2.445899840211673, "learning_rate": 7.180243745991021e-07, "loss": 0.7476, "step": 70860 }, { "epoch": 0.8636491048468673, "grad_norm": 3.3350685253745502, "learning_rate": 7.177036561898653e-07, "loss": 0.6994, "step": 70865 }, { "epoch": 0.8637100410710151, "grad_norm": 2.739790612560961, "learning_rate": 7.173829377806287e-07, "loss": 0.7265, "step": 70870 }, { "epoch": 0.8637709772951628, "grad_norm": 2.3208203742220563, "learning_rate": 7.170622193713919e-07, "loss": 0.7156, "step": 70875 }, { "epoch": 0.8638319135193107, "grad_norm": 2.6379549686004844, "learning_rate": 7.167415009621554e-07, "loss": 0.6912, "step": 70880 }, { "epoch": 0.8638928497434585, "grad_norm": 2.436687779192056, "learning_rate": 7.164207825529186e-07, "loss": 0.7121, "step": 70885 }, { "epoch": 0.8639537859676063, "grad_norm": 2.5890016652917756, "learning_rate": 7.161000641436819e-07, "loss": 0.7028, "step": 70890 }, { "epoch": 0.8640147221917541, "grad_norm": 2.312144604350158, "learning_rate": 7.157793457344451e-07, "loss": 0.6951, "step": 70895 }, { "epoch": 0.864075658415902, "grad_norm": 2.497242447199813, "learning_rate": 7.154586273252086e-07, "loss": 0.6652, "step": 70900 }, { "epoch": 0.8641365946400498, "grad_norm": 2.379740246687141, "learning_rate": 7.151379089159718e-07, "loss": 0.7101, "step": 70905 }, { "epoch": 0.8641975308641975, "grad_norm": 2.690171094238681, "learning_rate": 7.148171905067351e-07, "loss": 0.6624, "step": 70910 }, { "epoch": 0.8642584670883453, "grad_norm": 2.9970355081927775, "learning_rate": 7.144964720974984e-07, "loss": 0.7489, "step": 70915 }, { "epoch": 0.8643194033124931, "grad_norm": 2.5290281568263766, "learning_rate": 7.141757536882618e-07, "loss": 0.6889, "step": 70920 }, { "epoch": 0.864380339536641, "grad_norm": 2.212437291763742, "learning_rate": 7.13855035279025e-07, "loss": 0.8246, "step": 70925 }, { "epoch": 0.8644412757607888, "grad_norm": 2.440821779125065, "learning_rate": 7.135343168697884e-07, "loss": 0.721, "step": 70930 }, { "epoch": 0.8645022119849366, "grad_norm": 3.2123180975141112, "learning_rate": 7.132135984605516e-07, "loss": 0.6786, "step": 70935 }, { "epoch": 0.8645631482090844, "grad_norm": 2.8136474856999736, "learning_rate": 7.12892880051315e-07, "loss": 0.6817, "step": 70940 }, { "epoch": 0.8646240844332321, "grad_norm": 2.270814941914051, "learning_rate": 7.125721616420783e-07, "loss": 0.6461, "step": 70945 }, { "epoch": 0.86468502065738, "grad_norm": 2.7401439397366545, "learning_rate": 7.122514432328416e-07, "loss": 0.6422, "step": 70950 }, { "epoch": 0.8647459568815278, "grad_norm": 2.2779714501001673, "learning_rate": 7.119307248236048e-07, "loss": 0.721, "step": 70955 }, { "epoch": 0.8648068931056756, "grad_norm": 2.6461325901167636, "learning_rate": 7.116100064143683e-07, "loss": 0.731, "step": 70960 }, { "epoch": 0.8648678293298234, "grad_norm": 2.5095840251607684, "learning_rate": 7.112892880051315e-07, "loss": 0.6807, "step": 70965 }, { "epoch": 0.8649287655539712, "grad_norm": 3.9681188634884053, "learning_rate": 7.109685695958948e-07, "loss": 0.723, "step": 70970 }, { "epoch": 0.8649897017781191, "grad_norm": 2.6014089609350983, "learning_rate": 7.106478511866581e-07, "loss": 0.6685, "step": 70975 }, { "epoch": 0.8650506380022668, "grad_norm": 2.243219297995181, "learning_rate": 7.103271327774215e-07, "loss": 0.6839, "step": 70980 }, { "epoch": 0.8651115742264146, "grad_norm": 2.7766553054490477, "learning_rate": 7.100064143681848e-07, "loss": 0.7382, "step": 70985 }, { "epoch": 0.8651725104505624, "grad_norm": 2.6820017624584547, "learning_rate": 7.096856959589481e-07, "loss": 0.687, "step": 70990 }, { "epoch": 0.8652334466747102, "grad_norm": 2.379652753923558, "learning_rate": 7.093649775497113e-07, "loss": 0.7103, "step": 70995 }, { "epoch": 0.8652943828988581, "grad_norm": 2.449006722551416, "learning_rate": 7.090442591404747e-07, "loss": 0.7118, "step": 71000 }, { "epoch": 0.8653553191230059, "grad_norm": 3.285127225871425, "learning_rate": 7.087235407312381e-07, "loss": 0.7144, "step": 71005 }, { "epoch": 0.8654162553471537, "grad_norm": 1.704212932803043, "learning_rate": 7.084028223220013e-07, "loss": 0.7081, "step": 71010 }, { "epoch": 0.8654771915713014, "grad_norm": 2.6802051110032243, "learning_rate": 7.080821039127647e-07, "loss": 0.643, "step": 71015 }, { "epoch": 0.8655381277954493, "grad_norm": 3.0055611525588177, "learning_rate": 7.07761385503528e-07, "loss": 0.7353, "step": 71020 }, { "epoch": 0.8655990640195971, "grad_norm": 2.2510499004093982, "learning_rate": 7.074406670942913e-07, "loss": 0.6942, "step": 71025 }, { "epoch": 0.8656600002437449, "grad_norm": 2.683783848547264, "learning_rate": 7.071199486850545e-07, "loss": 0.6985, "step": 71030 }, { "epoch": 0.8657209364678927, "grad_norm": 2.349911619167041, "learning_rate": 7.06799230275818e-07, "loss": 0.7408, "step": 71035 }, { "epoch": 0.8657818726920405, "grad_norm": 2.22771290839658, "learning_rate": 7.064785118665812e-07, "loss": 0.6876, "step": 71040 }, { "epoch": 0.8658428089161884, "grad_norm": 2.459568100196795, "learning_rate": 7.061577934573445e-07, "loss": 0.6342, "step": 71045 }, { "epoch": 0.8659037451403361, "grad_norm": 2.423463387236638, "learning_rate": 7.058370750481078e-07, "loss": 0.7259, "step": 71050 }, { "epoch": 0.8659646813644839, "grad_norm": 2.0103689702296137, "learning_rate": 7.055163566388712e-07, "loss": 0.6931, "step": 71055 }, { "epoch": 0.8660256175886317, "grad_norm": 2.6436193099719425, "learning_rate": 7.051956382296344e-07, "loss": 0.7009, "step": 71060 }, { "epoch": 0.8660865538127795, "grad_norm": 2.918594520543871, "learning_rate": 7.048749198203978e-07, "loss": 0.7071, "step": 71065 }, { "epoch": 0.8661474900369274, "grad_norm": 2.5710407274111224, "learning_rate": 7.04554201411161e-07, "loss": 0.6912, "step": 71070 }, { "epoch": 0.8662084262610752, "grad_norm": 2.149130749110074, "learning_rate": 7.042334830019244e-07, "loss": 0.7591, "step": 71075 }, { "epoch": 0.866269362485223, "grad_norm": 2.3284402089898317, "learning_rate": 7.039127645926877e-07, "loss": 0.6646, "step": 71080 }, { "epoch": 0.8663302987093707, "grad_norm": 2.417531779953316, "learning_rate": 7.03592046183451e-07, "loss": 0.7086, "step": 71085 }, { "epoch": 0.8663912349335186, "grad_norm": 2.299249649097661, "learning_rate": 7.032713277742142e-07, "loss": 0.6964, "step": 71090 }, { "epoch": 0.8664521711576664, "grad_norm": 2.1425187720889576, "learning_rate": 7.029506093649777e-07, "loss": 0.7412, "step": 71095 }, { "epoch": 0.8665131073818142, "grad_norm": 2.595707889993005, "learning_rate": 7.026298909557409e-07, "loss": 0.7767, "step": 71100 }, { "epoch": 0.866574043605962, "grad_norm": 2.4292793133767554, "learning_rate": 7.023091725465042e-07, "loss": 0.7268, "step": 71105 }, { "epoch": 0.8666349798301098, "grad_norm": 2.3565140374897884, "learning_rate": 7.019884541372675e-07, "loss": 0.7353, "step": 71110 }, { "epoch": 0.8666959160542577, "grad_norm": 2.172111741296881, "learning_rate": 7.016677357280309e-07, "loss": 0.7472, "step": 71115 }, { "epoch": 0.8667568522784054, "grad_norm": 2.1256776613993744, "learning_rate": 7.013470173187941e-07, "loss": 0.7129, "step": 71120 }, { "epoch": 0.8668177885025532, "grad_norm": 2.8020156544667802, "learning_rate": 7.010262989095575e-07, "loss": 0.7487, "step": 71125 }, { "epoch": 0.866878724726701, "grad_norm": 2.4959641763238984, "learning_rate": 7.007055805003207e-07, "loss": 0.7879, "step": 71130 }, { "epoch": 0.8669396609508488, "grad_norm": 2.271735870390729, "learning_rate": 7.003848620910841e-07, "loss": 0.6711, "step": 71135 }, { "epoch": 0.8670005971749967, "grad_norm": 2.522017214819748, "learning_rate": 7.000641436818474e-07, "loss": 0.7053, "step": 71140 }, { "epoch": 0.8670615333991445, "grad_norm": 2.2363723932353556, "learning_rate": 6.997434252726107e-07, "loss": 0.6315, "step": 71145 }, { "epoch": 0.8671224696232923, "grad_norm": 2.316839232534852, "learning_rate": 6.994227068633739e-07, "loss": 0.7229, "step": 71150 }, { "epoch": 0.86718340584744, "grad_norm": 2.7040167148981333, "learning_rate": 6.991019884541374e-07, "loss": 0.7345, "step": 71155 }, { "epoch": 0.8672443420715878, "grad_norm": 3.273244952690671, "learning_rate": 6.987812700449007e-07, "loss": 0.6543, "step": 71160 }, { "epoch": 0.8673052782957357, "grad_norm": 2.8490889826552355, "learning_rate": 6.984605516356639e-07, "loss": 0.7606, "step": 71165 }, { "epoch": 0.8673662145198835, "grad_norm": 2.7756094985991524, "learning_rate": 6.981398332264272e-07, "loss": 0.7057, "step": 71170 }, { "epoch": 0.8674271507440313, "grad_norm": 2.57753679389734, "learning_rate": 6.978191148171906e-07, "loss": 0.7181, "step": 71175 }, { "epoch": 0.8674880869681791, "grad_norm": 2.112668153596091, "learning_rate": 6.974983964079539e-07, "loss": 0.796, "step": 71180 }, { "epoch": 0.867549023192327, "grad_norm": 2.832040647207072, "learning_rate": 6.971776779987172e-07, "loss": 0.7556, "step": 71185 }, { "epoch": 0.8676099594164747, "grad_norm": 2.9229593458015897, "learning_rate": 6.968569595894804e-07, "loss": 0.7131, "step": 71190 }, { "epoch": 0.8676708956406225, "grad_norm": 3.1896479763392107, "learning_rate": 6.965362411802438e-07, "loss": 0.6598, "step": 71195 }, { "epoch": 0.8677318318647703, "grad_norm": 2.6456394513021912, "learning_rate": 6.962155227710072e-07, "loss": 0.7657, "step": 71200 }, { "epoch": 0.8677927680889181, "grad_norm": 2.0627087595453175, "learning_rate": 6.958948043617704e-07, "loss": 0.649, "step": 71205 }, { "epoch": 0.867853704313066, "grad_norm": 2.629605359887001, "learning_rate": 6.955740859525336e-07, "loss": 0.6508, "step": 71210 }, { "epoch": 0.8679146405372138, "grad_norm": 2.7787552157269033, "learning_rate": 6.952533675432971e-07, "loss": 0.7033, "step": 71215 }, { "epoch": 0.8679755767613616, "grad_norm": 2.661773588298789, "learning_rate": 6.949326491340604e-07, "loss": 0.6984, "step": 71220 }, { "epoch": 0.8680365129855093, "grad_norm": 2.5454995773555615, "learning_rate": 6.946119307248236e-07, "loss": 0.7782, "step": 71225 }, { "epoch": 0.8680974492096571, "grad_norm": 3.9231417768300463, "learning_rate": 6.94291212315587e-07, "loss": 0.6578, "step": 71230 }, { "epoch": 0.868158385433805, "grad_norm": 2.38542124345321, "learning_rate": 6.939704939063503e-07, "loss": 0.7256, "step": 71235 }, { "epoch": 0.8682193216579528, "grad_norm": 3.446730070637057, "learning_rate": 6.936497754971136e-07, "loss": 0.7519, "step": 71240 }, { "epoch": 0.8682802578821006, "grad_norm": 3.487847614428397, "learning_rate": 6.933290570878769e-07, "loss": 0.7395, "step": 71245 }, { "epoch": 0.8683411941062484, "grad_norm": 3.454817347338232, "learning_rate": 6.930083386786402e-07, "loss": 0.711, "step": 71250 }, { "epoch": 0.8684021303303963, "grad_norm": 2.2772537658304213, "learning_rate": 6.926876202694035e-07, "loss": 0.802, "step": 71255 }, { "epoch": 0.868463066554544, "grad_norm": 2.2621535859548083, "learning_rate": 6.923669018601669e-07, "loss": 0.6804, "step": 71260 }, { "epoch": 0.8685240027786918, "grad_norm": 3.3945115683699, "learning_rate": 6.920461834509301e-07, "loss": 0.6721, "step": 71265 }, { "epoch": 0.8685849390028396, "grad_norm": 2.6664134959514754, "learning_rate": 6.917254650416934e-07, "loss": 0.6671, "step": 71270 }, { "epoch": 0.8686458752269874, "grad_norm": 2.8100842706389733, "learning_rate": 6.914047466324568e-07, "loss": 0.6976, "step": 71275 }, { "epoch": 0.8687068114511353, "grad_norm": 4.798269433145637, "learning_rate": 6.910840282232201e-07, "loss": 0.724, "step": 71280 }, { "epoch": 0.8687677476752831, "grad_norm": 2.4145038892856445, "learning_rate": 6.907633098139833e-07, "loss": 0.7325, "step": 71285 }, { "epoch": 0.8688286838994309, "grad_norm": 2.0775527703620873, "learning_rate": 6.904425914047467e-07, "loss": 0.6912, "step": 71290 }, { "epoch": 0.8688896201235786, "grad_norm": 2.557703464388005, "learning_rate": 6.9012187299551e-07, "loss": 0.8005, "step": 71295 }, { "epoch": 0.8689505563477264, "grad_norm": 2.497751447506644, "learning_rate": 6.898011545862733e-07, "loss": 0.6834, "step": 71300 }, { "epoch": 0.8690114925718743, "grad_norm": 2.265475625974636, "learning_rate": 6.894804361770366e-07, "loss": 0.7693, "step": 71305 }, { "epoch": 0.8690724287960221, "grad_norm": 2.2096463927201206, "learning_rate": 6.891597177678e-07, "loss": 0.7593, "step": 71310 }, { "epoch": 0.8691333650201699, "grad_norm": 2.377713280884153, "learning_rate": 6.888389993585632e-07, "loss": 0.6769, "step": 71315 }, { "epoch": 0.8691943012443177, "grad_norm": 2.3572897237943713, "learning_rate": 6.885182809493266e-07, "loss": 0.7343, "step": 71320 }, { "epoch": 0.8692552374684656, "grad_norm": 2.3791649834719575, "learning_rate": 6.881975625400898e-07, "loss": 0.8056, "step": 71325 }, { "epoch": 0.8693161736926133, "grad_norm": 3.3210203139383623, "learning_rate": 6.878768441308532e-07, "loss": 0.6988, "step": 71330 }, { "epoch": 0.8693771099167611, "grad_norm": 2.0594281153019507, "learning_rate": 6.875561257216166e-07, "loss": 0.7299, "step": 71335 }, { "epoch": 0.8694380461409089, "grad_norm": 2.229535822556713, "learning_rate": 6.872354073123798e-07, "loss": 0.61, "step": 71340 }, { "epoch": 0.8694989823650567, "grad_norm": 2.8059171715017315, "learning_rate": 6.86914688903143e-07, "loss": 0.7774, "step": 71345 }, { "epoch": 0.8695599185892046, "grad_norm": 2.336387951988472, "learning_rate": 6.865939704939065e-07, "loss": 0.7259, "step": 71350 }, { "epoch": 0.8696208548133524, "grad_norm": 2.4746227003347134, "learning_rate": 6.862732520846698e-07, "loss": 0.7608, "step": 71355 }, { "epoch": 0.8696817910375002, "grad_norm": 3.6301879474798318, "learning_rate": 6.85952533675433e-07, "loss": 0.7414, "step": 71360 }, { "epoch": 0.8697427272616479, "grad_norm": 2.648360040371147, "learning_rate": 6.856318152661962e-07, "loss": 0.7615, "step": 71365 }, { "epoch": 0.8698036634857957, "grad_norm": 2.4098593910452055, "learning_rate": 6.853110968569597e-07, "loss": 0.6577, "step": 71370 }, { "epoch": 0.8698645997099436, "grad_norm": 2.560548170065591, "learning_rate": 6.84990378447723e-07, "loss": 0.7379, "step": 71375 }, { "epoch": 0.8699255359340914, "grad_norm": 2.2092036855968207, "learning_rate": 6.846696600384863e-07, "loss": 0.662, "step": 71380 }, { "epoch": 0.8699864721582392, "grad_norm": 3.0853508006955748, "learning_rate": 6.843489416292495e-07, "loss": 0.6715, "step": 71385 }, { "epoch": 0.870047408382387, "grad_norm": 2.413111178767273, "learning_rate": 6.840282232200129e-07, "loss": 0.7184, "step": 71390 }, { "epoch": 0.8701083446065349, "grad_norm": 2.293212097593116, "learning_rate": 6.837075048107763e-07, "loss": 0.6766, "step": 71395 }, { "epoch": 0.8701692808306826, "grad_norm": 2.3208896724919637, "learning_rate": 6.833867864015395e-07, "loss": 0.7028, "step": 71400 }, { "epoch": 0.8702302170548304, "grad_norm": 2.8595365035655917, "learning_rate": 6.830660679923028e-07, "loss": 0.7515, "step": 71405 }, { "epoch": 0.8702911532789782, "grad_norm": 2.783162531370282, "learning_rate": 6.827453495830662e-07, "loss": 0.6817, "step": 71410 }, { "epoch": 0.870352089503126, "grad_norm": 2.712785228341685, "learning_rate": 6.824246311738295e-07, "loss": 0.8428, "step": 71415 }, { "epoch": 0.8704130257272739, "grad_norm": 2.5584908289441013, "learning_rate": 6.821039127645927e-07, "loss": 0.6677, "step": 71420 }, { "epoch": 0.8704739619514217, "grad_norm": 2.709895673880574, "learning_rate": 6.81783194355356e-07, "loss": 0.7236, "step": 71425 }, { "epoch": 0.8705348981755694, "grad_norm": 3.3110016330435283, "learning_rate": 6.814624759461194e-07, "loss": 0.773, "step": 71430 }, { "epoch": 0.8705958343997172, "grad_norm": 2.383663337616332, "learning_rate": 6.811417575368827e-07, "loss": 0.6663, "step": 71435 }, { "epoch": 0.870656770623865, "grad_norm": 3.146165107739139, "learning_rate": 6.80821039127646e-07, "loss": 0.7792, "step": 71440 }, { "epoch": 0.8707177068480129, "grad_norm": 2.776988105355453, "learning_rate": 6.805003207184093e-07, "loss": 0.7486, "step": 71445 }, { "epoch": 0.8707786430721607, "grad_norm": 2.519074869123377, "learning_rate": 6.801796023091726e-07, "loss": 0.7322, "step": 71450 }, { "epoch": 0.8708395792963085, "grad_norm": 2.4347520179778317, "learning_rate": 6.79858883899936e-07, "loss": 0.7638, "step": 71455 }, { "epoch": 0.8709005155204563, "grad_norm": 2.2269421070260744, "learning_rate": 6.795381654906992e-07, "loss": 0.7686, "step": 71460 }, { "epoch": 0.870961451744604, "grad_norm": 2.9314738457805416, "learning_rate": 6.792174470814625e-07, "loss": 0.715, "step": 71465 }, { "epoch": 0.8710223879687519, "grad_norm": 2.014923678813882, "learning_rate": 6.788967286722258e-07, "loss": 0.6886, "step": 71470 }, { "epoch": 0.8710833241928997, "grad_norm": 2.668388400095518, "learning_rate": 6.785760102629892e-07, "loss": 0.7307, "step": 71475 }, { "epoch": 0.8711442604170475, "grad_norm": 2.1735732711403855, "learning_rate": 6.782552918537524e-07, "loss": 0.7426, "step": 71480 }, { "epoch": 0.8712051966411953, "grad_norm": 3.3687810667836153, "learning_rate": 6.779345734445157e-07, "loss": 0.7632, "step": 71485 }, { "epoch": 0.8712661328653432, "grad_norm": 2.2395581744889963, "learning_rate": 6.776138550352791e-07, "loss": 0.7656, "step": 71490 }, { "epoch": 0.871327069089491, "grad_norm": 2.372391921414122, "learning_rate": 6.772931366260424e-07, "loss": 0.6756, "step": 71495 }, { "epoch": 0.8713880053136387, "grad_norm": 2.6899719799315656, "learning_rate": 6.769724182168056e-07, "loss": 0.78, "step": 71500 }, { "epoch": 0.8714489415377865, "grad_norm": 2.6631620796099633, "learning_rate": 6.76651699807569e-07, "loss": 0.7662, "step": 71505 }, { "epoch": 0.8715098777619343, "grad_norm": 2.4365001995924436, "learning_rate": 6.763309813983324e-07, "loss": 0.7187, "step": 71510 }, { "epoch": 0.8715708139860822, "grad_norm": 2.4885258836826387, "learning_rate": 6.760102629890956e-07, "loss": 0.7556, "step": 71515 }, { "epoch": 0.87163175021023, "grad_norm": 2.659628885808821, "learning_rate": 6.756895445798589e-07, "loss": 0.6935, "step": 71520 }, { "epoch": 0.8716926864343778, "grad_norm": 2.2963461498739774, "learning_rate": 6.753688261706222e-07, "loss": 0.7499, "step": 71525 }, { "epoch": 0.8717536226585256, "grad_norm": 3.3473316943459106, "learning_rate": 6.750481077613856e-07, "loss": 0.7235, "step": 71530 }, { "epoch": 0.8718145588826733, "grad_norm": 2.695456517658513, "learning_rate": 6.747273893521489e-07, "loss": 0.7582, "step": 71535 }, { "epoch": 0.8718754951068212, "grad_norm": 3.1292068960242, "learning_rate": 6.744066709429121e-07, "loss": 0.7419, "step": 71540 }, { "epoch": 0.871936431330969, "grad_norm": 2.51041355688047, "learning_rate": 6.740859525336754e-07, "loss": 0.7092, "step": 71545 }, { "epoch": 0.8719973675551168, "grad_norm": 2.645996373335453, "learning_rate": 6.737652341244389e-07, "loss": 0.7242, "step": 71550 }, { "epoch": 0.8720583037792646, "grad_norm": 2.1501874820650198, "learning_rate": 6.734445157152021e-07, "loss": 0.7427, "step": 71555 }, { "epoch": 0.8721192400034125, "grad_norm": 2.9905865508209026, "learning_rate": 6.731237973059653e-07, "loss": 0.6542, "step": 71560 }, { "epoch": 0.8721801762275603, "grad_norm": 2.116234463755068, "learning_rate": 6.728030788967287e-07, "loss": 0.6752, "step": 71565 }, { "epoch": 0.872241112451708, "grad_norm": 2.3789914258696543, "learning_rate": 6.724823604874921e-07, "loss": 0.7762, "step": 71570 }, { "epoch": 0.8723020486758558, "grad_norm": 3.611488566540613, "learning_rate": 6.721616420782553e-07, "loss": 0.7295, "step": 71575 }, { "epoch": 0.8723629849000036, "grad_norm": 2.0097612906706663, "learning_rate": 6.718409236690187e-07, "loss": 0.6649, "step": 71580 }, { "epoch": 0.8724239211241515, "grad_norm": 2.423084081052384, "learning_rate": 6.715202052597819e-07, "loss": 0.7336, "step": 71585 }, { "epoch": 0.8724848573482993, "grad_norm": 2.5050349066974005, "learning_rate": 6.711994868505453e-07, "loss": 0.7443, "step": 71590 }, { "epoch": 0.8725457935724471, "grad_norm": 2.1955019671422398, "learning_rate": 6.708787684413086e-07, "loss": 0.6767, "step": 71595 }, { "epoch": 0.8726067297965949, "grad_norm": 2.4849475489426407, "learning_rate": 6.705580500320719e-07, "loss": 0.653, "step": 71600 }, { "epoch": 0.8726676660207426, "grad_norm": 2.5337758196492595, "learning_rate": 6.702373316228352e-07, "loss": 0.6849, "step": 71605 }, { "epoch": 0.8727286022448905, "grad_norm": 2.8613183226604586, "learning_rate": 6.699166132135986e-07, "loss": 0.8047, "step": 71610 }, { "epoch": 0.8727895384690383, "grad_norm": 2.3387862961008303, "learning_rate": 6.695958948043618e-07, "loss": 0.663, "step": 71615 }, { "epoch": 0.8728504746931861, "grad_norm": 2.8737533100187345, "learning_rate": 6.692751763951251e-07, "loss": 0.7254, "step": 71620 }, { "epoch": 0.8729114109173339, "grad_norm": 2.0725917064073713, "learning_rate": 6.689544579858885e-07, "loss": 0.7065, "step": 71625 }, { "epoch": 0.8729723471414818, "grad_norm": 2.5131680598929393, "learning_rate": 6.686337395766518e-07, "loss": 0.752, "step": 71630 }, { "epoch": 0.8730332833656296, "grad_norm": 2.4628667340777173, "learning_rate": 6.68313021167415e-07, "loss": 0.7135, "step": 71635 }, { "epoch": 0.8730942195897773, "grad_norm": 2.35216027829319, "learning_rate": 6.679923027581784e-07, "loss": 0.7023, "step": 71640 }, { "epoch": 0.8731551558139251, "grad_norm": 2.7558214648828345, "learning_rate": 6.676715843489417e-07, "loss": 0.7028, "step": 71645 }, { "epoch": 0.8732160920380729, "grad_norm": 2.5602540424717795, "learning_rate": 6.67350865939705e-07, "loss": 0.6857, "step": 71650 }, { "epoch": 0.8732770282622208, "grad_norm": 2.3530385849954008, "learning_rate": 6.670301475304683e-07, "loss": 0.7217, "step": 71655 }, { "epoch": 0.8733379644863686, "grad_norm": 2.3192810357135727, "learning_rate": 6.667094291212316e-07, "loss": 0.6904, "step": 71660 }, { "epoch": 0.8733989007105164, "grad_norm": 3.8187827232565237, "learning_rate": 6.663887107119949e-07, "loss": 0.7232, "step": 71665 }, { "epoch": 0.8734598369346642, "grad_norm": 2.492201420237133, "learning_rate": 6.660679923027583e-07, "loss": 0.704, "step": 71670 }, { "epoch": 0.8735207731588119, "grad_norm": 2.3483023386467696, "learning_rate": 6.657472738935215e-07, "loss": 0.7344, "step": 71675 }, { "epoch": 0.8735817093829598, "grad_norm": 2.473178192081735, "learning_rate": 6.654265554842848e-07, "loss": 0.6627, "step": 71680 }, { "epoch": 0.8736426456071076, "grad_norm": 2.5783942630406536, "learning_rate": 6.651058370750482e-07, "loss": 0.7129, "step": 71685 }, { "epoch": 0.8737035818312554, "grad_norm": 2.36402907848057, "learning_rate": 6.647851186658115e-07, "loss": 0.6775, "step": 71690 }, { "epoch": 0.8737645180554032, "grad_norm": 2.070403625440897, "learning_rate": 6.644644002565747e-07, "loss": 0.6867, "step": 71695 }, { "epoch": 0.873825454279551, "grad_norm": 2.9534689542594132, "learning_rate": 6.641436818473381e-07, "loss": 0.6948, "step": 71700 }, { "epoch": 0.8738863905036989, "grad_norm": 2.3478000297221118, "learning_rate": 6.638229634381015e-07, "loss": 0.7539, "step": 71705 }, { "epoch": 0.8739473267278466, "grad_norm": 3.3029666601357706, "learning_rate": 6.635022450288647e-07, "loss": 0.694, "step": 71710 }, { "epoch": 0.8740082629519944, "grad_norm": 3.091448195468043, "learning_rate": 6.63181526619628e-07, "loss": 0.7222, "step": 71715 }, { "epoch": 0.8740691991761422, "grad_norm": 2.0593578676936413, "learning_rate": 6.628608082103913e-07, "loss": 0.7401, "step": 71720 }, { "epoch": 0.87413013540029, "grad_norm": 2.3162714330059324, "learning_rate": 6.625400898011547e-07, "loss": 0.676, "step": 71725 }, { "epoch": 0.8741910716244379, "grad_norm": 3.271692411856957, "learning_rate": 6.62219371391918e-07, "loss": 0.7076, "step": 71730 }, { "epoch": 0.8742520078485857, "grad_norm": 2.1657288614712074, "learning_rate": 6.618986529826812e-07, "loss": 0.6734, "step": 71735 }, { "epoch": 0.8743129440727335, "grad_norm": 2.6975825753687444, "learning_rate": 6.615779345734445e-07, "loss": 0.7058, "step": 71740 }, { "epoch": 0.8743738802968812, "grad_norm": 4.578794396843487, "learning_rate": 6.61257216164208e-07, "loss": 0.6876, "step": 71745 }, { "epoch": 0.8744348165210291, "grad_norm": 2.5883284524568246, "learning_rate": 6.609364977549712e-07, "loss": 0.7378, "step": 71750 }, { "epoch": 0.8744957527451769, "grad_norm": 2.6753572536125034, "learning_rate": 6.606157793457345e-07, "loss": 0.6224, "step": 71755 }, { "epoch": 0.8745566889693247, "grad_norm": 2.1605255021596097, "learning_rate": 6.602950609364978e-07, "loss": 0.6716, "step": 71760 }, { "epoch": 0.8746176251934725, "grad_norm": 2.6059565472783706, "learning_rate": 6.599743425272612e-07, "loss": 0.6767, "step": 71765 }, { "epoch": 0.8746785614176203, "grad_norm": 2.4270320222572073, "learning_rate": 6.596536241180244e-07, "loss": 0.6673, "step": 71770 }, { "epoch": 0.8747394976417682, "grad_norm": 2.662009956039124, "learning_rate": 6.593329057087878e-07, "loss": 0.7241, "step": 71775 }, { "epoch": 0.8748004338659159, "grad_norm": 2.2329513939885355, "learning_rate": 6.59012187299551e-07, "loss": 0.6775, "step": 71780 }, { "epoch": 0.8748613700900637, "grad_norm": 2.7875045716087783, "learning_rate": 6.586914688903144e-07, "loss": 0.7825, "step": 71785 }, { "epoch": 0.8749223063142115, "grad_norm": 2.8939142321862192, "learning_rate": 6.583707504810777e-07, "loss": 0.6989, "step": 71790 }, { "epoch": 0.8749832425383594, "grad_norm": 2.146756179563116, "learning_rate": 6.58050032071841e-07, "loss": 0.7381, "step": 71795 }, { "epoch": 0.8750441787625072, "grad_norm": 2.310779394234243, "learning_rate": 6.577293136626042e-07, "loss": 0.74, "step": 71800 }, { "epoch": 0.875105114986655, "grad_norm": 2.320030243158018, "learning_rate": 6.574085952533677e-07, "loss": 0.7631, "step": 71805 }, { "epoch": 0.8751660512108028, "grad_norm": 2.1084111785362993, "learning_rate": 6.570878768441309e-07, "loss": 0.7108, "step": 71810 }, { "epoch": 0.8752269874349505, "grad_norm": 2.4884772935478754, "learning_rate": 6.567671584348942e-07, "loss": 0.7562, "step": 71815 }, { "epoch": 0.8752879236590984, "grad_norm": 3.0155986232378624, "learning_rate": 6.564464400256575e-07, "loss": 0.7103, "step": 71820 }, { "epoch": 0.8753488598832462, "grad_norm": 2.081360872310059, "learning_rate": 6.561257216164209e-07, "loss": 0.7521, "step": 71825 }, { "epoch": 0.875409796107394, "grad_norm": 4.473646299441397, "learning_rate": 6.558050032071841e-07, "loss": 0.6535, "step": 71830 }, { "epoch": 0.8754707323315418, "grad_norm": 3.0944388213378025, "learning_rate": 6.554842847979475e-07, "loss": 0.7209, "step": 71835 }, { "epoch": 0.8755316685556896, "grad_norm": 2.1485803171539684, "learning_rate": 6.551635663887107e-07, "loss": 0.7389, "step": 71840 }, { "epoch": 0.8755926047798375, "grad_norm": 2.2869288869691164, "learning_rate": 6.548428479794741e-07, "loss": 0.7399, "step": 71845 }, { "epoch": 0.8756535410039852, "grad_norm": 2.9434185637121275, "learning_rate": 6.545221295702374e-07, "loss": 0.6755, "step": 71850 }, { "epoch": 0.875714477228133, "grad_norm": 2.677653158593933, "learning_rate": 6.542014111610007e-07, "loss": 0.7274, "step": 71855 }, { "epoch": 0.8757754134522808, "grad_norm": 2.697428375353786, "learning_rate": 6.538806927517639e-07, "loss": 0.7541, "step": 71860 }, { "epoch": 0.8758363496764286, "grad_norm": 2.8498010975352748, "learning_rate": 6.535599743425274e-07, "loss": 0.728, "step": 71865 }, { "epoch": 0.8758972859005765, "grad_norm": 2.634965113656421, "learning_rate": 6.532392559332906e-07, "loss": 0.7135, "step": 71870 }, { "epoch": 0.8759582221247243, "grad_norm": 3.3560912482568974, "learning_rate": 6.529185375240539e-07, "loss": 0.7545, "step": 71875 }, { "epoch": 0.8760191583488721, "grad_norm": 2.7752439931519217, "learning_rate": 6.525978191148172e-07, "loss": 0.6588, "step": 71880 }, { "epoch": 0.8760800945730198, "grad_norm": 2.3515192890087473, "learning_rate": 6.522771007055806e-07, "loss": 0.7356, "step": 71885 }, { "epoch": 0.8761410307971677, "grad_norm": 2.5971647989199163, "learning_rate": 6.519563822963438e-07, "loss": 0.7522, "step": 71890 }, { "epoch": 0.8762019670213155, "grad_norm": 2.855482370660879, "learning_rate": 6.516356638871072e-07, "loss": 0.747, "step": 71895 }, { "epoch": 0.8762629032454633, "grad_norm": 2.7232632077244463, "learning_rate": 6.513149454778704e-07, "loss": 0.6838, "step": 71900 }, { "epoch": 0.8763238394696111, "grad_norm": 2.6389345984778516, "learning_rate": 6.509942270686338e-07, "loss": 0.688, "step": 71905 }, { "epoch": 0.8763847756937589, "grad_norm": 2.9634857295004218, "learning_rate": 6.506735086593971e-07, "loss": 0.7254, "step": 71910 }, { "epoch": 0.8764457119179068, "grad_norm": 3.16880908247564, "learning_rate": 6.503527902501604e-07, "loss": 0.7647, "step": 71915 }, { "epoch": 0.8765066481420545, "grad_norm": 2.367397182747761, "learning_rate": 6.500320718409238e-07, "loss": 0.7382, "step": 71920 }, { "epoch": 0.8765675843662023, "grad_norm": 1.9840078780117576, "learning_rate": 6.497113534316871e-07, "loss": 0.6826, "step": 71925 }, { "epoch": 0.8766285205903501, "grad_norm": 2.9640516312121385, "learning_rate": 6.493906350224504e-07, "loss": 0.6803, "step": 71930 }, { "epoch": 0.876689456814498, "grad_norm": 2.6857852202912875, "learning_rate": 6.490699166132136e-07, "loss": 0.7451, "step": 71935 }, { "epoch": 0.8767503930386458, "grad_norm": 2.7393950349130143, "learning_rate": 6.487491982039771e-07, "loss": 0.7805, "step": 71940 }, { "epoch": 0.8768113292627936, "grad_norm": 2.3704612582478397, "learning_rate": 6.484284797947403e-07, "loss": 0.6724, "step": 71945 }, { "epoch": 0.8768722654869414, "grad_norm": 2.7531360484494987, "learning_rate": 6.481077613855036e-07, "loss": 0.7875, "step": 71950 }, { "epoch": 0.8769332017110891, "grad_norm": 2.665673597289869, "learning_rate": 6.477870429762669e-07, "loss": 0.7299, "step": 71955 }, { "epoch": 0.876994137935237, "grad_norm": 2.873804661796291, "learning_rate": 6.474663245670303e-07, "loss": 0.803, "step": 71960 }, { "epoch": 0.8770550741593848, "grad_norm": 3.1925291587481412, "learning_rate": 6.471456061577935e-07, "loss": 0.7533, "step": 71965 }, { "epoch": 0.8771160103835326, "grad_norm": 2.3802327033980157, "learning_rate": 6.468248877485569e-07, "loss": 0.7901, "step": 71970 }, { "epoch": 0.8771769466076804, "grad_norm": 2.4474254157881243, "learning_rate": 6.465041693393201e-07, "loss": 0.6751, "step": 71975 }, { "epoch": 0.8772378828318282, "grad_norm": 2.5016465449453884, "learning_rate": 6.461834509300835e-07, "loss": 0.6662, "step": 71980 }, { "epoch": 0.8772988190559761, "grad_norm": 2.224621540757306, "learning_rate": 6.458627325208468e-07, "loss": 0.6771, "step": 71985 }, { "epoch": 0.8773597552801238, "grad_norm": 2.4029770453016046, "learning_rate": 6.455420141116101e-07, "loss": 0.7436, "step": 71990 }, { "epoch": 0.8774206915042716, "grad_norm": 2.7699397306397477, "learning_rate": 6.452212957023733e-07, "loss": 0.6893, "step": 71995 }, { "epoch": 0.8774816277284194, "grad_norm": 2.479291400153094, "learning_rate": 6.449005772931368e-07, "loss": 0.7817, "step": 72000 }, { "epoch": 0.8775425639525672, "grad_norm": 3.9658800538083856, "learning_rate": 6.445798588839e-07, "loss": 0.7305, "step": 72005 }, { "epoch": 0.8776035001767151, "grad_norm": 2.6415323240142787, "learning_rate": 6.442591404746633e-07, "loss": 0.7286, "step": 72010 }, { "epoch": 0.8776644364008629, "grad_norm": 3.1626188235351456, "learning_rate": 6.439384220654265e-07, "loss": 0.7261, "step": 72015 }, { "epoch": 0.8777253726250107, "grad_norm": 2.870919425910916, "learning_rate": 6.4361770365619e-07, "loss": 0.685, "step": 72020 }, { "epoch": 0.8777863088491584, "grad_norm": 2.50282958839602, "learning_rate": 6.432969852469532e-07, "loss": 0.7169, "step": 72025 }, { "epoch": 0.8778472450733062, "grad_norm": 3.332590055688595, "learning_rate": 6.429762668377165e-07, "loss": 0.6625, "step": 72030 }, { "epoch": 0.8779081812974541, "grad_norm": 3.013692933180518, "learning_rate": 6.426555484284798e-07, "loss": 0.7271, "step": 72035 }, { "epoch": 0.8779691175216019, "grad_norm": 2.403057871469693, "learning_rate": 6.423348300192432e-07, "loss": 0.7111, "step": 72040 }, { "epoch": 0.8780300537457497, "grad_norm": 2.366009106542903, "learning_rate": 6.420141116100064e-07, "loss": 0.7406, "step": 72045 }, { "epoch": 0.8780909899698975, "grad_norm": 2.6906396804373665, "learning_rate": 6.416933932007698e-07, "loss": 0.6938, "step": 72050 }, { "epoch": 0.8781519261940454, "grad_norm": 3.2530953116392793, "learning_rate": 6.41372674791533e-07, "loss": 0.7721, "step": 72055 }, { "epoch": 0.8782128624181931, "grad_norm": 2.7649913803212813, "learning_rate": 6.410519563822965e-07, "loss": 0.7, "step": 72060 }, { "epoch": 0.8782737986423409, "grad_norm": 2.636383904322382, "learning_rate": 6.407312379730597e-07, "loss": 0.6248, "step": 72065 }, { "epoch": 0.8783347348664887, "grad_norm": 2.294755843203074, "learning_rate": 6.40410519563823e-07, "loss": 0.7165, "step": 72070 }, { "epoch": 0.8783956710906365, "grad_norm": 2.1056854208338383, "learning_rate": 6.400898011545862e-07, "loss": 0.716, "step": 72075 }, { "epoch": 0.8784566073147844, "grad_norm": 3.300290951548431, "learning_rate": 6.397690827453497e-07, "loss": 0.7439, "step": 72080 }, { "epoch": 0.8785175435389322, "grad_norm": 2.3287885093480183, "learning_rate": 6.394483643361129e-07, "loss": 0.8166, "step": 72085 }, { "epoch": 0.87857847976308, "grad_norm": 3.6951524175438912, "learning_rate": 6.391276459268762e-07, "loss": 0.7239, "step": 72090 }, { "epoch": 0.8786394159872277, "grad_norm": 3.7015469775677787, "learning_rate": 6.388069275176395e-07, "loss": 0.7007, "step": 72095 }, { "epoch": 0.8787003522113755, "grad_norm": 2.6267628327862815, "learning_rate": 6.384862091084029e-07, "loss": 0.761, "step": 72100 }, { "epoch": 0.8787612884355234, "grad_norm": 2.248273830262639, "learning_rate": 6.381654906991662e-07, "loss": 0.6549, "step": 72105 }, { "epoch": 0.8788222246596712, "grad_norm": 2.491967205526838, "learning_rate": 6.378447722899295e-07, "loss": 0.7323, "step": 72110 }, { "epoch": 0.878883160883819, "grad_norm": 2.8679607247147954, "learning_rate": 6.375240538806927e-07, "loss": 0.7249, "step": 72115 }, { "epoch": 0.8789440971079668, "grad_norm": 2.619006237766401, "learning_rate": 6.372033354714561e-07, "loss": 0.7269, "step": 72120 }, { "epoch": 0.8790050333321147, "grad_norm": 2.1588282593145283, "learning_rate": 6.368826170622195e-07, "loss": 0.7444, "step": 72125 }, { "epoch": 0.8790659695562624, "grad_norm": 3.5057274283926256, "learning_rate": 6.365618986529827e-07, "loss": 0.674, "step": 72130 }, { "epoch": 0.8791269057804102, "grad_norm": 2.336012767487436, "learning_rate": 6.362411802437459e-07, "loss": 0.7016, "step": 72135 }, { "epoch": 0.879187842004558, "grad_norm": 2.5696125930860325, "learning_rate": 6.359204618345094e-07, "loss": 0.6547, "step": 72140 }, { "epoch": 0.8792487782287058, "grad_norm": 2.5423392562682654, "learning_rate": 6.355997434252727e-07, "loss": 0.692, "step": 72145 }, { "epoch": 0.8793097144528537, "grad_norm": 2.2552283643653825, "learning_rate": 6.352790250160359e-07, "loss": 0.6804, "step": 72150 }, { "epoch": 0.8793706506770015, "grad_norm": 2.4116015550826297, "learning_rate": 6.349583066067992e-07, "loss": 0.7619, "step": 72155 }, { "epoch": 0.8794315869011493, "grad_norm": 2.28992107476897, "learning_rate": 6.346375881975626e-07, "loss": 0.7328, "step": 72160 }, { "epoch": 0.879492523125297, "grad_norm": 2.328819830768758, "learning_rate": 6.343168697883259e-07, "loss": 0.6998, "step": 72165 }, { "epoch": 0.8795534593494448, "grad_norm": 2.6768697162169968, "learning_rate": 6.339961513790892e-07, "loss": 0.6834, "step": 72170 }, { "epoch": 0.8796143955735927, "grad_norm": 2.4552867337968367, "learning_rate": 6.336754329698525e-07, "loss": 0.7618, "step": 72175 }, { "epoch": 0.8796753317977405, "grad_norm": 2.402441732912492, "learning_rate": 6.333547145606158e-07, "loss": 0.6702, "step": 72180 }, { "epoch": 0.8797362680218883, "grad_norm": 2.448647694507934, "learning_rate": 6.330339961513792e-07, "loss": 0.7447, "step": 72185 }, { "epoch": 0.8797972042460361, "grad_norm": 2.0788527093625984, "learning_rate": 6.327132777421424e-07, "loss": 0.7188, "step": 72190 }, { "epoch": 0.879858140470184, "grad_norm": 2.2753912673549803, "learning_rate": 6.323925593329057e-07, "loss": 0.7342, "step": 72195 }, { "epoch": 0.8799190766943317, "grad_norm": 2.435912837810526, "learning_rate": 6.320718409236691e-07, "loss": 0.664, "step": 72200 }, { "epoch": 0.8799800129184795, "grad_norm": 2.4351126175205984, "learning_rate": 6.317511225144324e-07, "loss": 0.7133, "step": 72205 }, { "epoch": 0.8800409491426273, "grad_norm": 2.552122807662946, "learning_rate": 6.314304041051956e-07, "loss": 0.7175, "step": 72210 }, { "epoch": 0.8801018853667751, "grad_norm": 2.3973404935212255, "learning_rate": 6.311096856959591e-07, "loss": 0.6788, "step": 72215 }, { "epoch": 0.880162821590923, "grad_norm": 3.5379170525112005, "learning_rate": 6.307889672867223e-07, "loss": 0.8489, "step": 72220 }, { "epoch": 0.8802237578150708, "grad_norm": 2.5576587799427686, "learning_rate": 6.304682488774856e-07, "loss": 0.6754, "step": 72225 }, { "epoch": 0.8802846940392186, "grad_norm": 2.562376746439344, "learning_rate": 6.301475304682489e-07, "loss": 0.7727, "step": 72230 }, { "epoch": 0.8803456302633663, "grad_norm": 2.2105513526901723, "learning_rate": 6.298268120590123e-07, "loss": 0.7777, "step": 72235 }, { "epoch": 0.8804065664875141, "grad_norm": 2.654234057815029, "learning_rate": 6.295060936497755e-07, "loss": 0.7316, "step": 72240 }, { "epoch": 0.880467502711662, "grad_norm": 2.323045585079548, "learning_rate": 6.291853752405389e-07, "loss": 0.723, "step": 72245 }, { "epoch": 0.8805284389358098, "grad_norm": 2.3587264627266316, "learning_rate": 6.288646568313021e-07, "loss": 0.6927, "step": 72250 }, { "epoch": 0.8805893751599576, "grad_norm": 2.375808035637013, "learning_rate": 6.285439384220655e-07, "loss": 0.7229, "step": 72255 }, { "epoch": 0.8806503113841054, "grad_norm": 2.4945171150812064, "learning_rate": 6.282232200128288e-07, "loss": 0.7117, "step": 72260 }, { "epoch": 0.8807112476082533, "grad_norm": 2.178235597615727, "learning_rate": 6.279025016035921e-07, "loss": 0.7365, "step": 72265 }, { "epoch": 0.880772183832401, "grad_norm": 3.080771244159634, "learning_rate": 6.275817831943553e-07, "loss": 0.7368, "step": 72270 }, { "epoch": 0.8808331200565488, "grad_norm": 4.068296456767621, "learning_rate": 6.272610647851188e-07, "loss": 0.7682, "step": 72275 }, { "epoch": 0.8808940562806966, "grad_norm": 2.3357427082407707, "learning_rate": 6.269403463758821e-07, "loss": 0.6845, "step": 72280 }, { "epoch": 0.8809549925048444, "grad_norm": 2.318929918146068, "learning_rate": 6.266196279666453e-07, "loss": 0.6755, "step": 72285 }, { "epoch": 0.8810159287289923, "grad_norm": 2.2348310783568652, "learning_rate": 6.262989095574086e-07, "loss": 0.6521, "step": 72290 }, { "epoch": 0.8810768649531401, "grad_norm": 2.6986151853670477, "learning_rate": 6.25978191148172e-07, "loss": 0.739, "step": 72295 }, { "epoch": 0.8811378011772879, "grad_norm": 2.4338668689982943, "learning_rate": 6.256574727389353e-07, "loss": 0.7161, "step": 72300 }, { "epoch": 0.8811987374014356, "grad_norm": 3.0495182349512935, "learning_rate": 6.253367543296986e-07, "loss": 0.7115, "step": 72305 }, { "epoch": 0.8812596736255834, "grad_norm": 2.199018531963257, "learning_rate": 6.250160359204618e-07, "loss": 0.7073, "step": 72310 }, { "epoch": 0.8813206098497313, "grad_norm": 2.3158771568904153, "learning_rate": 6.246953175112252e-07, "loss": 0.739, "step": 72315 }, { "epoch": 0.8813815460738791, "grad_norm": 2.0545403543254546, "learning_rate": 6.243745991019886e-07, "loss": 0.696, "step": 72320 }, { "epoch": 0.8814424822980269, "grad_norm": 1.9935785115159708, "learning_rate": 6.240538806927518e-07, "loss": 0.7968, "step": 72325 }, { "epoch": 0.8815034185221747, "grad_norm": 2.4156739023516574, "learning_rate": 6.237331622835151e-07, "loss": 0.7438, "step": 72330 }, { "epoch": 0.8815643547463226, "grad_norm": 1.906930751108153, "learning_rate": 6.234124438742785e-07, "loss": 0.6385, "step": 72335 }, { "epoch": 0.8816252909704703, "grad_norm": 2.34079018947231, "learning_rate": 6.230917254650418e-07, "loss": 0.7166, "step": 72340 }, { "epoch": 0.8816862271946181, "grad_norm": 2.7318263783922214, "learning_rate": 6.22771007055805e-07, "loss": 0.6861, "step": 72345 }, { "epoch": 0.8817471634187659, "grad_norm": 2.6660235932101535, "learning_rate": 6.224502886465684e-07, "loss": 0.6924, "step": 72350 }, { "epoch": 0.8818080996429137, "grad_norm": 2.8428356335640284, "learning_rate": 6.221295702373317e-07, "loss": 0.782, "step": 72355 }, { "epoch": 0.8818690358670616, "grad_norm": 2.9454732598493996, "learning_rate": 6.21808851828095e-07, "loss": 0.7601, "step": 72360 }, { "epoch": 0.8819299720912094, "grad_norm": 6.354392619135419, "learning_rate": 6.214881334188583e-07, "loss": 0.619, "step": 72365 }, { "epoch": 0.8819909083153572, "grad_norm": 2.0442911348683444, "learning_rate": 6.211674150096216e-07, "loss": 0.7041, "step": 72370 }, { "epoch": 0.8820518445395049, "grad_norm": 2.5617575340821204, "learning_rate": 6.208466966003849e-07, "loss": 0.7492, "step": 72375 }, { "epoch": 0.8821127807636527, "grad_norm": 2.2114403059711356, "learning_rate": 6.205259781911483e-07, "loss": 0.7181, "step": 72380 }, { "epoch": 0.8821737169878006, "grad_norm": 3.015510278489338, "learning_rate": 6.202052597819115e-07, "loss": 0.7439, "step": 72385 }, { "epoch": 0.8822346532119484, "grad_norm": 2.4770928097425498, "learning_rate": 6.198845413726748e-07, "loss": 0.7254, "step": 72390 }, { "epoch": 0.8822955894360962, "grad_norm": 2.1424956195510934, "learning_rate": 6.195638229634382e-07, "loss": 0.7674, "step": 72395 }, { "epoch": 0.882356525660244, "grad_norm": 2.3439940376342316, "learning_rate": 6.192431045542015e-07, "loss": 0.6479, "step": 72400 }, { "epoch": 0.8824174618843917, "grad_norm": 2.4664082937236005, "learning_rate": 6.189223861449647e-07, "loss": 0.7197, "step": 72405 }, { "epoch": 0.8824783981085396, "grad_norm": 1.9909377027665232, "learning_rate": 6.186016677357281e-07, "loss": 0.682, "step": 72410 }, { "epoch": 0.8825393343326874, "grad_norm": 2.0922460250447332, "learning_rate": 6.182809493264914e-07, "loss": 0.7282, "step": 72415 }, { "epoch": 0.8826002705568352, "grad_norm": 2.758378469907212, "learning_rate": 6.179602309172547e-07, "loss": 0.7018, "step": 72420 }, { "epoch": 0.882661206780983, "grad_norm": 2.691156035305814, "learning_rate": 6.17639512508018e-07, "loss": 0.7359, "step": 72425 }, { "epoch": 0.8827221430051309, "grad_norm": 2.3738700629155365, "learning_rate": 6.173187940987813e-07, "loss": 0.7143, "step": 72430 }, { "epoch": 0.8827830792292787, "grad_norm": 2.3735035827514395, "learning_rate": 6.169980756895446e-07, "loss": 0.6716, "step": 72435 }, { "epoch": 0.8828440154534264, "grad_norm": 2.232076543440578, "learning_rate": 6.16677357280308e-07, "loss": 0.7669, "step": 72440 }, { "epoch": 0.8829049516775742, "grad_norm": 3.2556290800834002, "learning_rate": 6.163566388710712e-07, "loss": 0.7499, "step": 72445 }, { "epoch": 0.882965887901722, "grad_norm": 2.9043070594707916, "learning_rate": 6.160359204618345e-07, "loss": 0.6062, "step": 72450 }, { "epoch": 0.8830268241258699, "grad_norm": 2.390852954371698, "learning_rate": 6.157152020525979e-07, "loss": 0.7444, "step": 72455 }, { "epoch": 0.8830877603500177, "grad_norm": 2.1561141897436893, "learning_rate": 6.153944836433612e-07, "loss": 0.6727, "step": 72460 }, { "epoch": 0.8831486965741655, "grad_norm": 3.1008090674438336, "learning_rate": 6.150737652341244e-07, "loss": 0.7355, "step": 72465 }, { "epoch": 0.8832096327983133, "grad_norm": 2.1366597534831704, "learning_rate": 6.147530468248878e-07, "loss": 0.7544, "step": 72470 }, { "epoch": 0.883270569022461, "grad_norm": 2.076014703987209, "learning_rate": 6.144323284156511e-07, "loss": 0.6482, "step": 72475 }, { "epoch": 0.8833315052466089, "grad_norm": 3.1595145768935766, "learning_rate": 6.141116100064144e-07, "loss": 0.8022, "step": 72480 }, { "epoch": 0.8833924414707567, "grad_norm": 1.8931282390316992, "learning_rate": 6.137908915971777e-07, "loss": 0.6764, "step": 72485 }, { "epoch": 0.8834533776949045, "grad_norm": 2.9803453332444265, "learning_rate": 6.134701731879411e-07, "loss": 0.7629, "step": 72490 }, { "epoch": 0.8835143139190523, "grad_norm": 2.2373613094388896, "learning_rate": 6.131494547787043e-07, "loss": 0.7234, "step": 72495 }, { "epoch": 0.8835752501432002, "grad_norm": 2.3962633059909475, "learning_rate": 6.128287363694677e-07, "loss": 0.6348, "step": 72500 }, { "epoch": 0.883636186367348, "grad_norm": 2.4615194863404697, "learning_rate": 6.125080179602309e-07, "loss": 0.7024, "step": 72505 }, { "epoch": 0.8836971225914957, "grad_norm": 2.4727651918015767, "learning_rate": 6.121872995509943e-07, "loss": 0.6931, "step": 72510 }, { "epoch": 0.8837580588156435, "grad_norm": 2.297111089377291, "learning_rate": 6.118665811417577e-07, "loss": 0.7814, "step": 72515 }, { "epoch": 0.8838189950397913, "grad_norm": 2.4361572945733783, "learning_rate": 6.115458627325209e-07, "loss": 0.6851, "step": 72520 }, { "epoch": 0.8838799312639392, "grad_norm": 2.6520970432495075, "learning_rate": 6.112251443232842e-07, "loss": 0.756, "step": 72525 }, { "epoch": 0.883940867488087, "grad_norm": 2.6123026413340105, "learning_rate": 6.109044259140476e-07, "loss": 0.7154, "step": 72530 }, { "epoch": 0.8840018037122348, "grad_norm": 3.266765841716261, "learning_rate": 6.105837075048109e-07, "loss": 0.709, "step": 72535 }, { "epoch": 0.8840627399363826, "grad_norm": 2.3640745391906925, "learning_rate": 6.102629890955741e-07, "loss": 0.7129, "step": 72540 }, { "epoch": 0.8841236761605303, "grad_norm": 2.294156491745804, "learning_rate": 6.099422706863375e-07, "loss": 0.6942, "step": 72545 }, { "epoch": 0.8841846123846782, "grad_norm": 3.926164251692698, "learning_rate": 6.096215522771008e-07, "loss": 0.7473, "step": 72550 }, { "epoch": 0.884245548608826, "grad_norm": 3.0600628271571493, "learning_rate": 6.093008338678641e-07, "loss": 0.7283, "step": 72555 }, { "epoch": 0.8843064848329738, "grad_norm": 2.676333203437597, "learning_rate": 6.089801154586274e-07, "loss": 0.7508, "step": 72560 }, { "epoch": 0.8843674210571216, "grad_norm": 3.105904596000583, "learning_rate": 6.086593970493907e-07, "loss": 0.7453, "step": 72565 }, { "epoch": 0.8844283572812695, "grad_norm": 2.5354491777147734, "learning_rate": 6.08338678640154e-07, "loss": 0.6669, "step": 72570 }, { "epoch": 0.8844892935054173, "grad_norm": 2.180323019551995, "learning_rate": 6.080179602309174e-07, "loss": 0.6611, "step": 72575 }, { "epoch": 0.884550229729565, "grad_norm": 2.3889829519144543, "learning_rate": 6.076972418216806e-07, "loss": 0.7185, "step": 72580 }, { "epoch": 0.8846111659537128, "grad_norm": 2.297276358309894, "learning_rate": 6.073765234124439e-07, "loss": 0.6932, "step": 72585 }, { "epoch": 0.8846721021778606, "grad_norm": 2.7856814030151633, "learning_rate": 6.070558050032073e-07, "loss": 0.712, "step": 72590 }, { "epoch": 0.8847330384020085, "grad_norm": 2.5981948012255893, "learning_rate": 6.067350865939706e-07, "loss": 0.7166, "step": 72595 }, { "epoch": 0.8847939746261563, "grad_norm": 2.3578835042215416, "learning_rate": 6.064143681847338e-07, "loss": 0.7957, "step": 72600 }, { "epoch": 0.8848549108503041, "grad_norm": 2.752348115356399, "learning_rate": 6.060936497754971e-07, "loss": 0.7024, "step": 72605 }, { "epoch": 0.8849158470744519, "grad_norm": 2.4358026972589997, "learning_rate": 6.057729313662605e-07, "loss": 0.7927, "step": 72610 }, { "epoch": 0.8849767832985996, "grad_norm": 3.0030639267009147, "learning_rate": 6.054522129570238e-07, "loss": 0.7179, "step": 72615 }, { "epoch": 0.8850377195227475, "grad_norm": 2.7730117505308223, "learning_rate": 6.05131494547787e-07, "loss": 0.7556, "step": 72620 }, { "epoch": 0.8850986557468953, "grad_norm": 2.516387860835535, "learning_rate": 6.048107761385504e-07, "loss": 0.693, "step": 72625 }, { "epoch": 0.8851595919710431, "grad_norm": 2.787607588983434, "learning_rate": 6.044900577293137e-07, "loss": 0.666, "step": 72630 }, { "epoch": 0.8852205281951909, "grad_norm": 2.7000466541036423, "learning_rate": 6.04169339320077e-07, "loss": 0.7177, "step": 72635 }, { "epoch": 0.8852814644193387, "grad_norm": 2.2121589461142595, "learning_rate": 6.038486209108403e-07, "loss": 0.7396, "step": 72640 }, { "epoch": 0.8853424006434866, "grad_norm": 2.1486375274542655, "learning_rate": 6.035279025016036e-07, "loss": 0.6736, "step": 72645 }, { "epoch": 0.8854033368676343, "grad_norm": 2.8928949208688928, "learning_rate": 6.032071840923669e-07, "loss": 0.7151, "step": 72650 }, { "epoch": 0.8854642730917821, "grad_norm": 2.407353816975187, "learning_rate": 6.028864656831303e-07, "loss": 0.6699, "step": 72655 }, { "epoch": 0.8855252093159299, "grad_norm": 3.1164623978938537, "learning_rate": 6.025657472738935e-07, "loss": 0.6723, "step": 72660 }, { "epoch": 0.8855861455400778, "grad_norm": 2.2745267339722828, "learning_rate": 6.02245028864657e-07, "loss": 0.6669, "step": 72665 }, { "epoch": 0.8856470817642256, "grad_norm": 3.626751722145028, "learning_rate": 6.019243104554202e-07, "loss": 0.7863, "step": 72670 }, { "epoch": 0.8857080179883734, "grad_norm": 2.0343441270988105, "learning_rate": 6.016035920461835e-07, "loss": 0.725, "step": 72675 }, { "epoch": 0.8857689542125212, "grad_norm": 2.1400533001493933, "learning_rate": 6.012828736369467e-07, "loss": 0.6515, "step": 72680 }, { "epoch": 0.8858298904366689, "grad_norm": 2.299634721549294, "learning_rate": 6.009621552277102e-07, "loss": 0.8135, "step": 72685 }, { "epoch": 0.8858908266608168, "grad_norm": 2.613530369445542, "learning_rate": 6.006414368184734e-07, "loss": 0.649, "step": 72690 }, { "epoch": 0.8859517628849646, "grad_norm": 3.021679464101044, "learning_rate": 6.003207184092367e-07, "loss": 0.7471, "step": 72695 }, { "epoch": 0.8860126991091124, "grad_norm": 2.3073047020759447, "learning_rate": 6.000000000000001e-07, "loss": 0.7147, "step": 72700 }, { "epoch": 0.8860736353332602, "grad_norm": 3.0058121114886727, "learning_rate": 5.996792815907634e-07, "loss": 0.7076, "step": 72705 }, { "epoch": 0.886134571557408, "grad_norm": 2.5558950678317203, "learning_rate": 5.993585631815266e-07, "loss": 0.7837, "step": 72710 }, { "epoch": 0.8861955077815559, "grad_norm": 2.3499868983636345, "learning_rate": 5.9903784477229e-07, "loss": 0.7221, "step": 72715 }, { "epoch": 0.8862564440057036, "grad_norm": 2.588467697750636, "learning_rate": 5.987171263630533e-07, "loss": 0.732, "step": 72720 }, { "epoch": 0.8863173802298514, "grad_norm": 3.075524066898237, "learning_rate": 5.983964079538166e-07, "loss": 0.7747, "step": 72725 }, { "epoch": 0.8863783164539992, "grad_norm": 2.08572593902349, "learning_rate": 5.980756895445799e-07, "loss": 0.6769, "step": 72730 }, { "epoch": 0.886439252678147, "grad_norm": 2.9549049656312527, "learning_rate": 5.977549711353432e-07, "loss": 0.7166, "step": 72735 }, { "epoch": 0.8865001889022949, "grad_norm": 2.2313494302498698, "learning_rate": 5.974342527261065e-07, "loss": 0.7374, "step": 72740 }, { "epoch": 0.8865611251264427, "grad_norm": 2.6342521936233214, "learning_rate": 5.971135343168699e-07, "loss": 0.7392, "step": 72745 }, { "epoch": 0.8866220613505905, "grad_norm": 2.387183337239981, "learning_rate": 5.967928159076331e-07, "loss": 0.7189, "step": 72750 }, { "epoch": 0.8866829975747382, "grad_norm": 2.5026702987389053, "learning_rate": 5.964720974983964e-07, "loss": 0.7214, "step": 72755 }, { "epoch": 0.886743933798886, "grad_norm": 2.258515559927505, "learning_rate": 5.961513790891598e-07, "loss": 0.6955, "step": 72760 }, { "epoch": 0.8868048700230339, "grad_norm": 2.468946801495067, "learning_rate": 5.958306606799231e-07, "loss": 0.7612, "step": 72765 }, { "epoch": 0.8868658062471817, "grad_norm": 2.218874923475416, "learning_rate": 5.955099422706863e-07, "loss": 0.7441, "step": 72770 }, { "epoch": 0.8869267424713295, "grad_norm": 2.0839628098662253, "learning_rate": 5.951892238614497e-07, "loss": 0.6805, "step": 72775 }, { "epoch": 0.8869876786954773, "grad_norm": 2.3467893065535557, "learning_rate": 5.94868505452213e-07, "loss": 0.7162, "step": 72780 }, { "epoch": 0.8870486149196252, "grad_norm": 4.323427135322531, "learning_rate": 5.945477870429763e-07, "loss": 0.6581, "step": 72785 }, { "epoch": 0.8871095511437729, "grad_norm": 3.05442328085377, "learning_rate": 5.942270686337396e-07, "loss": 0.6751, "step": 72790 }, { "epoch": 0.8871704873679207, "grad_norm": 2.5384234226451317, "learning_rate": 5.939063502245029e-07, "loss": 0.7494, "step": 72795 }, { "epoch": 0.8872314235920685, "grad_norm": 2.7688108192587655, "learning_rate": 5.935856318152662e-07, "loss": 0.7558, "step": 72800 }, { "epoch": 0.8872923598162163, "grad_norm": 2.5563615319381867, "learning_rate": 5.932649134060296e-07, "loss": 0.6996, "step": 72805 }, { "epoch": 0.8873532960403642, "grad_norm": 2.2785904741684155, "learning_rate": 5.929441949967929e-07, "loss": 0.6323, "step": 72810 }, { "epoch": 0.887414232264512, "grad_norm": 2.841455694847207, "learning_rate": 5.926234765875561e-07, "loss": 0.727, "step": 72815 }, { "epoch": 0.8874751684886598, "grad_norm": 2.5146654363570775, "learning_rate": 5.923027581783195e-07, "loss": 0.6924, "step": 72820 }, { "epoch": 0.8875361047128075, "grad_norm": 2.8689260565224677, "learning_rate": 5.919820397690828e-07, "loss": 0.7722, "step": 72825 }, { "epoch": 0.8875970409369554, "grad_norm": 2.2649966159417665, "learning_rate": 5.916613213598461e-07, "loss": 0.791, "step": 72830 }, { "epoch": 0.8876579771611032, "grad_norm": 2.4906118609944956, "learning_rate": 5.913406029506094e-07, "loss": 0.7544, "step": 72835 }, { "epoch": 0.887718913385251, "grad_norm": 2.528751991159419, "learning_rate": 5.910198845413728e-07, "loss": 0.7014, "step": 72840 }, { "epoch": 0.8877798496093988, "grad_norm": 2.621447050970572, "learning_rate": 5.90699166132136e-07, "loss": 0.6965, "step": 72845 }, { "epoch": 0.8878407858335466, "grad_norm": 2.3485648143314144, "learning_rate": 5.903784477228994e-07, "loss": 0.6818, "step": 72850 }, { "epoch": 0.8879017220576945, "grad_norm": 2.4426484177018306, "learning_rate": 5.900577293136626e-07, "loss": 0.7479, "step": 72855 }, { "epoch": 0.8879626582818422, "grad_norm": 2.139493326228574, "learning_rate": 5.89737010904426e-07, "loss": 0.6134, "step": 72860 }, { "epoch": 0.88802359450599, "grad_norm": 1.953158066724232, "learning_rate": 5.894162924951893e-07, "loss": 0.6958, "step": 72865 }, { "epoch": 0.8880845307301378, "grad_norm": 2.213418146976434, "learning_rate": 5.890955740859526e-07, "loss": 0.7322, "step": 72870 }, { "epoch": 0.8881454669542856, "grad_norm": 3.0077472308559923, "learning_rate": 5.887748556767159e-07, "loss": 0.7035, "step": 72875 }, { "epoch": 0.8882064031784335, "grad_norm": 2.9614508970238047, "learning_rate": 5.884541372674793e-07, "loss": 0.6949, "step": 72880 }, { "epoch": 0.8882673394025813, "grad_norm": 2.0473050180051273, "learning_rate": 5.881334188582425e-07, "loss": 0.7424, "step": 72885 }, { "epoch": 0.8883282756267291, "grad_norm": 2.6710632996644237, "learning_rate": 5.878127004490058e-07, "loss": 0.7266, "step": 72890 }, { "epoch": 0.8883892118508768, "grad_norm": 2.216630805164564, "learning_rate": 5.874919820397692e-07, "loss": 0.6536, "step": 72895 }, { "epoch": 0.8884501480750246, "grad_norm": 2.3629029307703395, "learning_rate": 5.871712636305325e-07, "loss": 0.7327, "step": 72900 }, { "epoch": 0.8885110842991725, "grad_norm": 2.1832980435567855, "learning_rate": 5.868505452212957e-07, "loss": 0.6606, "step": 72905 }, { "epoch": 0.8885720205233203, "grad_norm": 2.6671039381009702, "learning_rate": 5.865298268120591e-07, "loss": 0.6645, "step": 72910 }, { "epoch": 0.8886329567474681, "grad_norm": 2.806583657331513, "learning_rate": 5.862091084028224e-07, "loss": 0.6736, "step": 72915 }, { "epoch": 0.8886938929716159, "grad_norm": 1.9963121725800843, "learning_rate": 5.858883899935857e-07, "loss": 0.7423, "step": 72920 }, { "epoch": 0.8887548291957638, "grad_norm": 2.3665432349819584, "learning_rate": 5.85567671584349e-07, "loss": 0.7003, "step": 72925 }, { "epoch": 0.8888157654199115, "grad_norm": 2.760619668320608, "learning_rate": 5.852469531751123e-07, "loss": 0.7014, "step": 72930 }, { "epoch": 0.8888767016440593, "grad_norm": 3.6308638965519524, "learning_rate": 5.849262347658756e-07, "loss": 0.6988, "step": 72935 }, { "epoch": 0.8889376378682071, "grad_norm": 2.9732272813616647, "learning_rate": 5.84605516356639e-07, "loss": 0.7182, "step": 72940 }, { "epoch": 0.8889985740923549, "grad_norm": 2.837531389726055, "learning_rate": 5.842847979474022e-07, "loss": 0.75, "step": 72945 }, { "epoch": 0.8890595103165028, "grad_norm": 2.6202482943097376, "learning_rate": 5.839640795381655e-07, "loss": 0.8199, "step": 72950 }, { "epoch": 0.8891204465406506, "grad_norm": 3.441054940600352, "learning_rate": 5.836433611289289e-07, "loss": 0.6739, "step": 72955 }, { "epoch": 0.8891813827647984, "grad_norm": 2.6525210773224814, "learning_rate": 5.833226427196922e-07, "loss": 0.736, "step": 72960 }, { "epoch": 0.8892423189889461, "grad_norm": 2.4252558008525598, "learning_rate": 5.830019243104554e-07, "loss": 0.6511, "step": 72965 }, { "epoch": 0.889303255213094, "grad_norm": 2.2612855333062085, "learning_rate": 5.826812059012188e-07, "loss": 0.7424, "step": 72970 }, { "epoch": 0.8893641914372418, "grad_norm": 2.1884529291396655, "learning_rate": 5.823604874919821e-07, "loss": 0.6759, "step": 72975 }, { "epoch": 0.8894251276613896, "grad_norm": 2.5789988189666446, "learning_rate": 5.820397690827454e-07, "loss": 0.7144, "step": 72980 }, { "epoch": 0.8894860638855374, "grad_norm": 2.973240198039257, "learning_rate": 5.817190506735087e-07, "loss": 0.6907, "step": 72985 }, { "epoch": 0.8895470001096852, "grad_norm": 2.3579024211990007, "learning_rate": 5.81398332264272e-07, "loss": 0.7339, "step": 72990 }, { "epoch": 0.8896079363338331, "grad_norm": 2.04901978027673, "learning_rate": 5.810776138550353e-07, "loss": 0.677, "step": 72995 }, { "epoch": 0.8896688725579808, "grad_norm": 3.167902962011468, "learning_rate": 5.807568954457987e-07, "loss": 0.7143, "step": 73000 }, { "epoch": 0.8897298087821286, "grad_norm": 2.729888993204399, "learning_rate": 5.804361770365619e-07, "loss": 0.7029, "step": 73005 }, { "epoch": 0.8897907450062764, "grad_norm": 5.4437750413965995, "learning_rate": 5.801154586273252e-07, "loss": 0.7406, "step": 73010 }, { "epoch": 0.8898516812304242, "grad_norm": 3.266837568580869, "learning_rate": 5.797947402180886e-07, "loss": 0.7634, "step": 73015 }, { "epoch": 0.8899126174545721, "grad_norm": 3.8316148712698355, "learning_rate": 5.794740218088519e-07, "loss": 0.7854, "step": 73020 }, { "epoch": 0.8899735536787199, "grad_norm": 2.669894732328307, "learning_rate": 5.791533033996151e-07, "loss": 0.717, "step": 73025 }, { "epoch": 0.8900344899028677, "grad_norm": 2.771260486916655, "learning_rate": 5.788325849903785e-07, "loss": 0.6568, "step": 73030 }, { "epoch": 0.8900954261270154, "grad_norm": 2.622084096034204, "learning_rate": 5.785118665811418e-07, "loss": 0.7016, "step": 73035 }, { "epoch": 0.8901563623511632, "grad_norm": 2.515972143750958, "learning_rate": 5.781911481719051e-07, "loss": 0.7092, "step": 73040 }, { "epoch": 0.8902172985753111, "grad_norm": 2.596644619507147, "learning_rate": 5.778704297626684e-07, "loss": 0.7137, "step": 73045 }, { "epoch": 0.8902782347994589, "grad_norm": 3.1174464536882596, "learning_rate": 5.775497113534318e-07, "loss": 0.746, "step": 73050 }, { "epoch": 0.8903391710236067, "grad_norm": 2.822735709173467, "learning_rate": 5.77228992944195e-07, "loss": 0.7783, "step": 73055 }, { "epoch": 0.8904001072477545, "grad_norm": 2.828163147616261, "learning_rate": 5.769082745349584e-07, "loss": 0.7305, "step": 73060 }, { "epoch": 0.8904610434719024, "grad_norm": 2.419389760903534, "learning_rate": 5.765875561257216e-07, "loss": 0.6766, "step": 73065 }, { "epoch": 0.8905219796960501, "grad_norm": 2.4436014641039563, "learning_rate": 5.76266837716485e-07, "loss": 0.7307, "step": 73070 }, { "epoch": 0.8905829159201979, "grad_norm": 2.021498975252245, "learning_rate": 5.759461193072483e-07, "loss": 0.7125, "step": 73075 }, { "epoch": 0.8906438521443457, "grad_norm": 4.8700288673181955, "learning_rate": 5.756254008980116e-07, "loss": 0.7488, "step": 73080 }, { "epoch": 0.8907047883684935, "grad_norm": 2.3065839841118705, "learning_rate": 5.753046824887749e-07, "loss": 0.5943, "step": 73085 }, { "epoch": 0.8907657245926414, "grad_norm": 2.3249828789936835, "learning_rate": 5.749839640795383e-07, "loss": 0.7199, "step": 73090 }, { "epoch": 0.8908266608167892, "grad_norm": 2.2923018765842116, "learning_rate": 5.746632456703015e-07, "loss": 0.6963, "step": 73095 }, { "epoch": 0.890887597040937, "grad_norm": 2.2096726566462492, "learning_rate": 5.743425272610648e-07, "loss": 0.7396, "step": 73100 }, { "epoch": 0.8909485332650847, "grad_norm": 2.1993117973503935, "learning_rate": 5.740218088518282e-07, "loss": 0.7701, "step": 73105 }, { "epoch": 0.8910094694892325, "grad_norm": 2.9541603131057004, "learning_rate": 5.737010904425915e-07, "loss": 0.6767, "step": 73110 }, { "epoch": 0.8910704057133804, "grad_norm": 2.3384047122596856, "learning_rate": 5.733803720333548e-07, "loss": 0.6727, "step": 73115 }, { "epoch": 0.8911313419375282, "grad_norm": 2.2915401405857105, "learning_rate": 5.73059653624118e-07, "loss": 0.7196, "step": 73120 }, { "epoch": 0.891192278161676, "grad_norm": 2.713763192411969, "learning_rate": 5.727389352148814e-07, "loss": 0.7776, "step": 73125 }, { "epoch": 0.8912532143858238, "grad_norm": 2.48964144023601, "learning_rate": 5.724182168056447e-07, "loss": 0.6874, "step": 73130 }, { "epoch": 0.8913141506099717, "grad_norm": 2.1357713423705103, "learning_rate": 5.720974983964081e-07, "loss": 0.7725, "step": 73135 }, { "epoch": 0.8913750868341194, "grad_norm": 2.93512870445562, "learning_rate": 5.717767799871713e-07, "loss": 0.67, "step": 73140 }, { "epoch": 0.8914360230582672, "grad_norm": 2.2566216801950043, "learning_rate": 5.714560615779346e-07, "loss": 0.6365, "step": 73145 }, { "epoch": 0.891496959282415, "grad_norm": 2.5954423680161605, "learning_rate": 5.71135343168698e-07, "loss": 0.7397, "step": 73150 }, { "epoch": 0.8915578955065628, "grad_norm": 2.5372680107565055, "learning_rate": 5.708146247594613e-07, "loss": 0.678, "step": 73155 }, { "epoch": 0.8916188317307107, "grad_norm": 3.5946671628992144, "learning_rate": 5.704939063502245e-07, "loss": 0.7127, "step": 73160 }, { "epoch": 0.8916797679548585, "grad_norm": 2.574793913220345, "learning_rate": 5.701731879409878e-07, "loss": 0.7382, "step": 73165 }, { "epoch": 0.8917407041790063, "grad_norm": 2.242871463009489, "learning_rate": 5.698524695317512e-07, "loss": 0.6854, "step": 73170 }, { "epoch": 0.891801640403154, "grad_norm": 2.4492166226071195, "learning_rate": 5.695317511225145e-07, "loss": 0.6881, "step": 73175 }, { "epoch": 0.8918625766273018, "grad_norm": 2.4423377653508993, "learning_rate": 5.692110327132777e-07, "loss": 0.6743, "step": 73180 }, { "epoch": 0.8919235128514497, "grad_norm": 2.5986207819094567, "learning_rate": 5.688903143040411e-07, "loss": 0.7317, "step": 73185 }, { "epoch": 0.8919844490755975, "grad_norm": 2.5878643632322293, "learning_rate": 5.685695958948044e-07, "loss": 0.6798, "step": 73190 }, { "epoch": 0.8920453852997453, "grad_norm": 2.2723144967187303, "learning_rate": 5.682488774855678e-07, "loss": 0.7488, "step": 73195 }, { "epoch": 0.8921063215238931, "grad_norm": 2.494784759802897, "learning_rate": 5.67928159076331e-07, "loss": 0.726, "step": 73200 }, { "epoch": 0.892167257748041, "grad_norm": 2.2495087128855724, "learning_rate": 5.676074406670943e-07, "loss": 0.6762, "step": 73205 }, { "epoch": 0.8922281939721887, "grad_norm": 2.5906877093262697, "learning_rate": 5.672867222578576e-07, "loss": 0.6949, "step": 73210 }, { "epoch": 0.8922891301963365, "grad_norm": 2.0799463890592316, "learning_rate": 5.66966003848621e-07, "loss": 0.5994, "step": 73215 }, { "epoch": 0.8923500664204843, "grad_norm": 2.779833963469829, "learning_rate": 5.666452854393842e-07, "loss": 0.6697, "step": 73220 }, { "epoch": 0.8924110026446321, "grad_norm": 2.7744778427563497, "learning_rate": 5.663245670301477e-07, "loss": 0.659, "step": 73225 }, { "epoch": 0.89247193886878, "grad_norm": 2.74301799595038, "learning_rate": 5.660038486209109e-07, "loss": 0.6967, "step": 73230 }, { "epoch": 0.8925328750929278, "grad_norm": 2.303767180075033, "learning_rate": 5.656831302116742e-07, "loss": 0.7214, "step": 73235 }, { "epoch": 0.8925938113170756, "grad_norm": 2.0600849474218546, "learning_rate": 5.653624118024374e-07, "loss": 0.719, "step": 73240 }, { "epoch": 0.8926547475412233, "grad_norm": 2.2588280874108424, "learning_rate": 5.650416933932009e-07, "loss": 0.6304, "step": 73245 }, { "epoch": 0.8927156837653711, "grad_norm": 3.379798024953491, "learning_rate": 5.647209749839641e-07, "loss": 0.7511, "step": 73250 }, { "epoch": 0.892776619989519, "grad_norm": 2.2961757672923535, "learning_rate": 5.644002565747274e-07, "loss": 0.6802, "step": 73255 }, { "epoch": 0.8928375562136668, "grad_norm": 2.804847647296184, "learning_rate": 5.640795381654908e-07, "loss": 0.6766, "step": 73260 }, { "epoch": 0.8928984924378146, "grad_norm": 2.62297036165599, "learning_rate": 5.637588197562541e-07, "loss": 0.7778, "step": 73265 }, { "epoch": 0.8929594286619624, "grad_norm": 2.761263166985846, "learning_rate": 5.634381013470173e-07, "loss": 0.7482, "step": 73270 }, { "epoch": 0.8930203648861103, "grad_norm": 2.8842505519711867, "learning_rate": 5.631173829377807e-07, "loss": 0.7026, "step": 73275 }, { "epoch": 0.893081301110258, "grad_norm": 2.778344373461053, "learning_rate": 5.62796664528544e-07, "loss": 0.7475, "step": 73280 }, { "epoch": 0.8931422373344058, "grad_norm": 2.8812118077034174, "learning_rate": 5.624759461193073e-07, "loss": 0.7162, "step": 73285 }, { "epoch": 0.8932031735585536, "grad_norm": 2.7354684117683963, "learning_rate": 5.621552277100706e-07, "loss": 0.7453, "step": 73290 }, { "epoch": 0.8932641097827014, "grad_norm": 2.1643675023703723, "learning_rate": 5.618345093008339e-07, "loss": 0.7101, "step": 73295 }, { "epoch": 0.8933250460068493, "grad_norm": 3.541067925075952, "learning_rate": 5.615137908915972e-07, "loss": 0.782, "step": 73300 }, { "epoch": 0.8933859822309971, "grad_norm": 2.305177657400107, "learning_rate": 5.611930724823606e-07, "loss": 0.7943, "step": 73305 }, { "epoch": 0.8934469184551449, "grad_norm": 2.057838158984747, "learning_rate": 5.608723540731238e-07, "loss": 0.7099, "step": 73310 }, { "epoch": 0.8935078546792926, "grad_norm": 3.525208948940039, "learning_rate": 5.605516356638871e-07, "loss": 0.7093, "step": 73315 }, { "epoch": 0.8935687909034404, "grad_norm": 2.673674403393285, "learning_rate": 5.602309172546505e-07, "loss": 0.7809, "step": 73320 }, { "epoch": 0.8936297271275883, "grad_norm": 2.3670322461446154, "learning_rate": 5.599101988454138e-07, "loss": 0.7482, "step": 73325 }, { "epoch": 0.8936906633517361, "grad_norm": 2.339443633401619, "learning_rate": 5.59589480436177e-07, "loss": 0.6793, "step": 73330 }, { "epoch": 0.8937515995758839, "grad_norm": 2.3390198469786188, "learning_rate": 5.592687620269404e-07, "loss": 0.7493, "step": 73335 }, { "epoch": 0.8938125358000317, "grad_norm": 2.529344732463578, "learning_rate": 5.589480436177037e-07, "loss": 0.8583, "step": 73340 }, { "epoch": 0.8938734720241794, "grad_norm": 2.769200943721157, "learning_rate": 5.58627325208467e-07, "loss": 0.6823, "step": 73345 }, { "epoch": 0.8939344082483273, "grad_norm": 2.3430375822253393, "learning_rate": 5.583066067992303e-07, "loss": 0.7104, "step": 73350 }, { "epoch": 0.8939953444724751, "grad_norm": 2.089469800351457, "learning_rate": 5.579858883899936e-07, "loss": 0.7533, "step": 73355 }, { "epoch": 0.8940562806966229, "grad_norm": 2.51290569703882, "learning_rate": 5.576651699807569e-07, "loss": 0.6464, "step": 73360 }, { "epoch": 0.8941172169207707, "grad_norm": 3.3134748578724817, "learning_rate": 5.573444515715203e-07, "loss": 0.7, "step": 73365 }, { "epoch": 0.8941781531449186, "grad_norm": 2.25085551685027, "learning_rate": 5.570237331622835e-07, "loss": 0.7563, "step": 73370 }, { "epoch": 0.8942390893690664, "grad_norm": 2.6788592719122475, "learning_rate": 5.567030147530468e-07, "loss": 0.7006, "step": 73375 }, { "epoch": 0.8943000255932141, "grad_norm": 2.0532605157610258, "learning_rate": 5.563822963438102e-07, "loss": 0.6623, "step": 73380 }, { "epoch": 0.8943609618173619, "grad_norm": 2.4402541366094437, "learning_rate": 5.560615779345735e-07, "loss": 0.6497, "step": 73385 }, { "epoch": 0.8944218980415097, "grad_norm": 2.3117624533949312, "learning_rate": 5.557408595253367e-07, "loss": 0.7154, "step": 73390 }, { "epoch": 0.8944828342656576, "grad_norm": 2.489986760959243, "learning_rate": 5.554201411161001e-07, "loss": 0.7374, "step": 73395 }, { "epoch": 0.8945437704898054, "grad_norm": 2.500830560492145, "learning_rate": 5.550994227068634e-07, "loss": 0.6988, "step": 73400 }, { "epoch": 0.8946047067139532, "grad_norm": 2.064081442025378, "learning_rate": 5.547787042976267e-07, "loss": 0.752, "step": 73405 }, { "epoch": 0.894665642938101, "grad_norm": 2.233365651258023, "learning_rate": 5.5445798588839e-07, "loss": 0.6867, "step": 73410 }, { "epoch": 0.8947265791622487, "grad_norm": 2.3684509879414697, "learning_rate": 5.541372674791533e-07, "loss": 0.6997, "step": 73415 }, { "epoch": 0.8947875153863966, "grad_norm": 2.3089613444370554, "learning_rate": 5.538165490699167e-07, "loss": 0.6736, "step": 73420 }, { "epoch": 0.8948484516105444, "grad_norm": 2.498120647859194, "learning_rate": 5.5349583066068e-07, "loss": 0.7019, "step": 73425 }, { "epoch": 0.8949093878346922, "grad_norm": 2.423255908815685, "learning_rate": 5.531751122514433e-07, "loss": 0.7002, "step": 73430 }, { "epoch": 0.89497032405884, "grad_norm": 2.708696598183909, "learning_rate": 5.528543938422066e-07, "loss": 0.7333, "step": 73435 }, { "epoch": 0.8950312602829879, "grad_norm": 2.458344403963257, "learning_rate": 5.5253367543297e-07, "loss": 0.7093, "step": 73440 }, { "epoch": 0.8950921965071357, "grad_norm": 2.8175690142699854, "learning_rate": 5.522129570237332e-07, "loss": 0.7572, "step": 73445 }, { "epoch": 0.8951531327312834, "grad_norm": 3.4078767598667556, "learning_rate": 5.518922386144965e-07, "loss": 0.7445, "step": 73450 }, { "epoch": 0.8952140689554312, "grad_norm": 3.6036756971872967, "learning_rate": 5.515715202052599e-07, "loss": 0.6556, "step": 73455 }, { "epoch": 0.895275005179579, "grad_norm": 2.4104129826200777, "learning_rate": 5.512508017960232e-07, "loss": 0.7557, "step": 73460 }, { "epoch": 0.8953359414037269, "grad_norm": 2.8279222096621655, "learning_rate": 5.509300833867864e-07, "loss": 0.7338, "step": 73465 }, { "epoch": 0.8953968776278747, "grad_norm": 2.7268436252426147, "learning_rate": 5.506093649775498e-07, "loss": 0.7868, "step": 73470 }, { "epoch": 0.8954578138520225, "grad_norm": 2.705845553639639, "learning_rate": 5.502886465683131e-07, "loss": 0.6562, "step": 73475 }, { "epoch": 0.8955187500761703, "grad_norm": 2.4867628600169556, "learning_rate": 5.499679281590764e-07, "loss": 0.6533, "step": 73480 }, { "epoch": 0.895579686300318, "grad_norm": 2.5666128905993566, "learning_rate": 5.496472097498397e-07, "loss": 0.8011, "step": 73485 }, { "epoch": 0.8956406225244659, "grad_norm": 2.375965808569693, "learning_rate": 5.49326491340603e-07, "loss": 0.7101, "step": 73490 }, { "epoch": 0.8957015587486137, "grad_norm": 2.458793665096007, "learning_rate": 5.490057729313663e-07, "loss": 0.7403, "step": 73495 }, { "epoch": 0.8957624949727615, "grad_norm": 4.176947315996527, "learning_rate": 5.486850545221297e-07, "loss": 0.6952, "step": 73500 }, { "epoch": 0.8958234311969093, "grad_norm": 3.1002606377188244, "learning_rate": 5.483643361128929e-07, "loss": 0.6999, "step": 73505 }, { "epoch": 0.8958843674210571, "grad_norm": 2.87772909940321, "learning_rate": 5.480436177036562e-07, "loss": 0.7481, "step": 73510 }, { "epoch": 0.895945303645205, "grad_norm": 2.6627288543283947, "learning_rate": 5.477228992944196e-07, "loss": 0.6883, "step": 73515 }, { "epoch": 0.8960062398693527, "grad_norm": 3.3104424031261064, "learning_rate": 5.474021808851829e-07, "loss": 0.6904, "step": 73520 }, { "epoch": 0.8960671760935005, "grad_norm": 2.1748738078353007, "learning_rate": 5.470814624759461e-07, "loss": 0.7226, "step": 73525 }, { "epoch": 0.8961281123176483, "grad_norm": 2.553349469422919, "learning_rate": 5.467607440667095e-07, "loss": 0.7308, "step": 73530 }, { "epoch": 0.8961890485417962, "grad_norm": 2.363620765052069, "learning_rate": 5.464400256574728e-07, "loss": 0.7696, "step": 73535 }, { "epoch": 0.896249984765944, "grad_norm": 2.5085828140213655, "learning_rate": 5.461193072482361e-07, "loss": 0.6602, "step": 73540 }, { "epoch": 0.8963109209900918, "grad_norm": 2.459225169587037, "learning_rate": 5.457985888389994e-07, "loss": 0.6807, "step": 73545 }, { "epoch": 0.8963718572142396, "grad_norm": 2.3437079066263866, "learning_rate": 5.454778704297627e-07, "loss": 0.6954, "step": 73550 }, { "epoch": 0.8964327934383873, "grad_norm": 2.198087269954479, "learning_rate": 5.45157152020526e-07, "loss": 0.7269, "step": 73555 }, { "epoch": 0.8964937296625352, "grad_norm": 2.1809916050446705, "learning_rate": 5.448364336112894e-07, "loss": 0.6934, "step": 73560 }, { "epoch": 0.896554665886683, "grad_norm": 2.4353159342094615, "learning_rate": 5.445157152020526e-07, "loss": 0.6655, "step": 73565 }, { "epoch": 0.8966156021108308, "grad_norm": 2.645232006022298, "learning_rate": 5.441949967928159e-07, "loss": 0.6846, "step": 73570 }, { "epoch": 0.8966765383349786, "grad_norm": 2.404547279035965, "learning_rate": 5.438742783835793e-07, "loss": 0.7589, "step": 73575 }, { "epoch": 0.8967374745591264, "grad_norm": 4.859797545668976, "learning_rate": 5.435535599743426e-07, "loss": 0.7343, "step": 73580 }, { "epoch": 0.8967984107832743, "grad_norm": 3.247913378064294, "learning_rate": 5.432328415651058e-07, "loss": 0.6621, "step": 73585 }, { "epoch": 0.896859347007422, "grad_norm": 2.5702983428394965, "learning_rate": 5.429121231558692e-07, "loss": 0.7193, "step": 73590 }, { "epoch": 0.8969202832315698, "grad_norm": 2.9782998963754137, "learning_rate": 5.425914047466325e-07, "loss": 0.7036, "step": 73595 }, { "epoch": 0.8969812194557176, "grad_norm": 3.5664273961874216, "learning_rate": 5.422706863373958e-07, "loss": 0.6986, "step": 73600 }, { "epoch": 0.8970421556798654, "grad_norm": 2.5335467633574993, "learning_rate": 5.419499679281591e-07, "loss": 0.7229, "step": 73605 }, { "epoch": 0.8971030919040133, "grad_norm": 2.351471675917104, "learning_rate": 5.416292495189225e-07, "loss": 0.6317, "step": 73610 }, { "epoch": 0.8971640281281611, "grad_norm": 2.476484043184491, "learning_rate": 5.413085311096857e-07, "loss": 0.748, "step": 73615 }, { "epoch": 0.8972249643523089, "grad_norm": 6.7033755828260455, "learning_rate": 5.409878127004491e-07, "loss": 0.7042, "step": 73620 }, { "epoch": 0.8972859005764566, "grad_norm": 2.2968234122689433, "learning_rate": 5.406670942912123e-07, "loss": 0.7011, "step": 73625 }, { "epoch": 0.8973468368006045, "grad_norm": 2.2200549903590456, "learning_rate": 5.403463758819757e-07, "loss": 0.7082, "step": 73630 }, { "epoch": 0.8974077730247523, "grad_norm": 3.190593013586102, "learning_rate": 5.40025657472739e-07, "loss": 0.7346, "step": 73635 }, { "epoch": 0.8974687092489001, "grad_norm": 2.7373616145580377, "learning_rate": 5.397049390635023e-07, "loss": 0.7088, "step": 73640 }, { "epoch": 0.8975296454730479, "grad_norm": 2.4094854274411865, "learning_rate": 5.393842206542656e-07, "loss": 0.7264, "step": 73645 }, { "epoch": 0.8975905816971957, "grad_norm": 3.206575769352526, "learning_rate": 5.39063502245029e-07, "loss": 0.7335, "step": 73650 }, { "epoch": 0.8976515179213436, "grad_norm": 2.1649944295541443, "learning_rate": 5.387427838357922e-07, "loss": 0.6957, "step": 73655 }, { "epoch": 0.8977124541454913, "grad_norm": 2.9073560710989264, "learning_rate": 5.384220654265555e-07, "loss": 0.7337, "step": 73660 }, { "epoch": 0.8977733903696391, "grad_norm": 2.6927448532241383, "learning_rate": 5.381013470173189e-07, "loss": 0.7092, "step": 73665 }, { "epoch": 0.8978343265937869, "grad_norm": 2.431219876775895, "learning_rate": 5.377806286080822e-07, "loss": 0.7403, "step": 73670 }, { "epoch": 0.8978952628179347, "grad_norm": 3.042829213883179, "learning_rate": 5.374599101988454e-07, "loss": 0.7269, "step": 73675 }, { "epoch": 0.8979561990420826, "grad_norm": 2.7935500815972376, "learning_rate": 5.371391917896088e-07, "loss": 0.724, "step": 73680 }, { "epoch": 0.8980171352662304, "grad_norm": 2.4314298908988183, "learning_rate": 5.368184733803721e-07, "loss": 0.7598, "step": 73685 }, { "epoch": 0.8980780714903782, "grad_norm": 2.466770891823043, "learning_rate": 5.364977549711354e-07, "loss": 0.7062, "step": 73690 }, { "epoch": 0.8981390077145259, "grad_norm": 2.5019831819921556, "learning_rate": 5.361770365618987e-07, "loss": 0.7195, "step": 73695 }, { "epoch": 0.8981999439386738, "grad_norm": 3.227897182479963, "learning_rate": 5.35856318152662e-07, "loss": 0.8183, "step": 73700 }, { "epoch": 0.8982608801628216, "grad_norm": 2.600266592466481, "learning_rate": 5.355355997434253e-07, "loss": 0.6909, "step": 73705 }, { "epoch": 0.8983218163869694, "grad_norm": 3.737854341604512, "learning_rate": 5.352148813341887e-07, "loss": 0.6508, "step": 73710 }, { "epoch": 0.8983827526111172, "grad_norm": 3.793196473322551, "learning_rate": 5.348941629249519e-07, "loss": 0.6928, "step": 73715 }, { "epoch": 0.898443688835265, "grad_norm": 2.2112467408898038, "learning_rate": 5.345734445157152e-07, "loss": 0.7407, "step": 73720 }, { "epoch": 0.8985046250594129, "grad_norm": 3.085143981555986, "learning_rate": 5.342527261064786e-07, "loss": 0.7072, "step": 73725 }, { "epoch": 0.8985655612835606, "grad_norm": 2.6836871921213294, "learning_rate": 5.339320076972419e-07, "loss": 0.6578, "step": 73730 }, { "epoch": 0.8986264975077084, "grad_norm": 1.8795251159262218, "learning_rate": 5.336112892880052e-07, "loss": 0.7005, "step": 73735 }, { "epoch": 0.8986874337318562, "grad_norm": 3.8698258076159773, "learning_rate": 5.332905708787684e-07, "loss": 0.7127, "step": 73740 }, { "epoch": 0.898748369956004, "grad_norm": 2.2161595991828404, "learning_rate": 5.329698524695318e-07, "loss": 0.7115, "step": 73745 }, { "epoch": 0.8988093061801519, "grad_norm": 2.313841023158755, "learning_rate": 5.326491340602951e-07, "loss": 0.7408, "step": 73750 }, { "epoch": 0.8988702424042997, "grad_norm": 2.1941510488060505, "learning_rate": 5.323284156510585e-07, "loss": 0.7786, "step": 73755 }, { "epoch": 0.8989311786284475, "grad_norm": 2.5217649292649886, "learning_rate": 5.320076972418217e-07, "loss": 0.7613, "step": 73760 }, { "epoch": 0.8989921148525952, "grad_norm": 3.321139811641904, "learning_rate": 5.31686978832585e-07, "loss": 0.7467, "step": 73765 }, { "epoch": 0.899053051076743, "grad_norm": 2.6746365906713865, "learning_rate": 5.313662604233483e-07, "loss": 0.6618, "step": 73770 }, { "epoch": 0.8991139873008909, "grad_norm": 2.366500139594564, "learning_rate": 5.310455420141117e-07, "loss": 0.7006, "step": 73775 }, { "epoch": 0.8991749235250387, "grad_norm": 2.540332788307711, "learning_rate": 5.307248236048749e-07, "loss": 0.7115, "step": 73780 }, { "epoch": 0.8992358597491865, "grad_norm": 2.3688453333234145, "learning_rate": 5.304041051956384e-07, "loss": 0.6656, "step": 73785 }, { "epoch": 0.8992967959733343, "grad_norm": 3.109791977965321, "learning_rate": 5.300833867864016e-07, "loss": 0.7268, "step": 73790 }, { "epoch": 0.8993577321974822, "grad_norm": 3.0161492701681683, "learning_rate": 5.297626683771649e-07, "loss": 0.7743, "step": 73795 }, { "epoch": 0.8994186684216299, "grad_norm": 2.1070118013313652, "learning_rate": 5.294419499679281e-07, "loss": 0.6399, "step": 73800 }, { "epoch": 0.8994796046457777, "grad_norm": 3.192223462748077, "learning_rate": 5.291212315586916e-07, "loss": 0.7473, "step": 73805 }, { "epoch": 0.8995405408699255, "grad_norm": 2.6478242703338744, "learning_rate": 5.288005131494548e-07, "loss": 0.7292, "step": 73810 }, { "epoch": 0.8996014770940733, "grad_norm": 3.3977259917380698, "learning_rate": 5.284797947402181e-07, "loss": 0.7341, "step": 73815 }, { "epoch": 0.8996624133182212, "grad_norm": 2.5611011281543994, "learning_rate": 5.281590763309815e-07, "loss": 0.7518, "step": 73820 }, { "epoch": 0.899723349542369, "grad_norm": 2.206100198351378, "learning_rate": 5.278383579217448e-07, "loss": 0.792, "step": 73825 }, { "epoch": 0.8997842857665168, "grad_norm": 3.1705460310700073, "learning_rate": 5.27517639512508e-07, "loss": 0.7743, "step": 73830 }, { "epoch": 0.8998452219906645, "grad_norm": 2.596625184231993, "learning_rate": 5.271969211032714e-07, "loss": 0.7442, "step": 73835 }, { "epoch": 0.8999061582148123, "grad_norm": 2.345242061047058, "learning_rate": 5.268762026940347e-07, "loss": 0.7028, "step": 73840 }, { "epoch": 0.8999670944389602, "grad_norm": 2.4395830072260987, "learning_rate": 5.26555484284798e-07, "loss": 0.5821, "step": 73845 }, { "epoch": 0.900028030663108, "grad_norm": 4.205148962437231, "learning_rate": 5.262347658755613e-07, "loss": 0.7181, "step": 73850 }, { "epoch": 0.9000889668872558, "grad_norm": 2.5141800225926474, "learning_rate": 5.259140474663246e-07, "loss": 0.7098, "step": 73855 }, { "epoch": 0.9001499031114036, "grad_norm": 2.40358132184488, "learning_rate": 5.255933290570879e-07, "loss": 0.728, "step": 73860 }, { "epoch": 0.9002108393355515, "grad_norm": 2.6816498162010296, "learning_rate": 5.252726106478513e-07, "loss": 0.7382, "step": 73865 }, { "epoch": 0.9002717755596992, "grad_norm": 2.1995934364245002, "learning_rate": 5.249518922386145e-07, "loss": 0.7345, "step": 73870 }, { "epoch": 0.900332711783847, "grad_norm": 2.440481960133198, "learning_rate": 5.246311738293778e-07, "loss": 0.7591, "step": 73875 }, { "epoch": 0.9003936480079948, "grad_norm": 2.855288422991087, "learning_rate": 5.243104554201412e-07, "loss": 0.6258, "step": 73880 }, { "epoch": 0.9004545842321426, "grad_norm": 2.221038713617708, "learning_rate": 5.239897370109045e-07, "loss": 0.7764, "step": 73885 }, { "epoch": 0.9005155204562905, "grad_norm": 2.5172218160182767, "learning_rate": 5.236690186016677e-07, "loss": 0.6289, "step": 73890 }, { "epoch": 0.9005764566804383, "grad_norm": 2.478097032593514, "learning_rate": 5.233483001924311e-07, "loss": 0.7265, "step": 73895 }, { "epoch": 0.9006373929045861, "grad_norm": 2.125379878290536, "learning_rate": 5.230275817831944e-07, "loss": 0.669, "step": 73900 }, { "epoch": 0.9006983291287338, "grad_norm": 2.429670006021786, "learning_rate": 5.227068633739577e-07, "loss": 0.7082, "step": 73905 }, { "epoch": 0.9007592653528816, "grad_norm": 2.866516888637587, "learning_rate": 5.22386144964721e-07, "loss": 0.7383, "step": 73910 }, { "epoch": 0.9008202015770295, "grad_norm": 2.4397438223049477, "learning_rate": 5.220654265554843e-07, "loss": 0.7298, "step": 73915 }, { "epoch": 0.9008811378011773, "grad_norm": 2.701972495430139, "learning_rate": 5.217447081462476e-07, "loss": 0.7408, "step": 73920 }, { "epoch": 0.9009420740253251, "grad_norm": 2.094600103583202, "learning_rate": 5.21423989737011e-07, "loss": 0.6589, "step": 73925 }, { "epoch": 0.9010030102494729, "grad_norm": 2.575006892113507, "learning_rate": 5.211032713277742e-07, "loss": 0.6614, "step": 73930 }, { "epoch": 0.9010639464736208, "grad_norm": 2.431996227973051, "learning_rate": 5.207825529185375e-07, "loss": 0.7035, "step": 73935 }, { "epoch": 0.9011248826977685, "grad_norm": 2.250064762408811, "learning_rate": 5.204618345093009e-07, "loss": 0.7049, "step": 73940 }, { "epoch": 0.9011858189219163, "grad_norm": 2.5361575801439242, "learning_rate": 5.201411161000642e-07, "loss": 0.6847, "step": 73945 }, { "epoch": 0.9012467551460641, "grad_norm": 2.674994809221987, "learning_rate": 5.198203976908274e-07, "loss": 0.7111, "step": 73950 }, { "epoch": 0.9013076913702119, "grad_norm": 2.866097683074841, "learning_rate": 5.194996792815908e-07, "loss": 0.7432, "step": 73955 }, { "epoch": 0.9013686275943598, "grad_norm": 2.0868429126925827, "learning_rate": 5.191789608723541e-07, "loss": 0.6888, "step": 73960 }, { "epoch": 0.9014295638185076, "grad_norm": 3.1251345220213764, "learning_rate": 5.188582424631174e-07, "loss": 0.714, "step": 73965 }, { "epoch": 0.9014905000426554, "grad_norm": 2.2892978453960535, "learning_rate": 5.185375240538807e-07, "loss": 0.6939, "step": 73970 }, { "epoch": 0.9015514362668031, "grad_norm": 2.574530378218425, "learning_rate": 5.18216805644644e-07, "loss": 0.7609, "step": 73975 }, { "epoch": 0.9016123724909509, "grad_norm": 2.5095082223131664, "learning_rate": 5.178960872354073e-07, "loss": 0.6842, "step": 73980 }, { "epoch": 0.9016733087150988, "grad_norm": 3.2654226102384523, "learning_rate": 5.175753688261707e-07, "loss": 0.7568, "step": 73985 }, { "epoch": 0.9017342449392466, "grad_norm": 2.7858872736194913, "learning_rate": 5.172546504169339e-07, "loss": 0.7293, "step": 73990 }, { "epoch": 0.9017951811633944, "grad_norm": 2.7613463622555554, "learning_rate": 5.169339320076973e-07, "loss": 0.6932, "step": 73995 }, { "epoch": 0.9018561173875422, "grad_norm": 2.60994545860747, "learning_rate": 5.166132135984606e-07, "loss": 0.7275, "step": 74000 }, { "epoch": 0.9019170536116901, "grad_norm": 2.4550984299472915, "learning_rate": 5.162924951892239e-07, "loss": 0.7709, "step": 74005 }, { "epoch": 0.9019779898358378, "grad_norm": 2.3002704107999765, "learning_rate": 5.159717767799871e-07, "loss": 0.6821, "step": 74010 }, { "epoch": 0.9020389260599856, "grad_norm": 2.129049459086151, "learning_rate": 5.156510583707506e-07, "loss": 0.6585, "step": 74015 }, { "epoch": 0.9020998622841334, "grad_norm": 2.4765856610542425, "learning_rate": 5.153303399615139e-07, "loss": 0.7158, "step": 74020 }, { "epoch": 0.9021607985082812, "grad_norm": 3.191446707290185, "learning_rate": 5.150096215522771e-07, "loss": 0.7456, "step": 74025 }, { "epoch": 0.9022217347324291, "grad_norm": 2.9411609505297474, "learning_rate": 5.146889031430405e-07, "loss": 0.7737, "step": 74030 }, { "epoch": 0.9022826709565769, "grad_norm": 2.6377495573532235, "learning_rate": 5.143681847338038e-07, "loss": 0.647, "step": 74035 }, { "epoch": 0.9023436071807247, "grad_norm": 3.1807784880804815, "learning_rate": 5.140474663245671e-07, "loss": 0.6852, "step": 74040 }, { "epoch": 0.9024045434048724, "grad_norm": 2.3983715657770652, "learning_rate": 5.137267479153304e-07, "loss": 0.7998, "step": 74045 }, { "epoch": 0.9024654796290202, "grad_norm": 2.7088334859773324, "learning_rate": 5.134060295060937e-07, "loss": 0.7143, "step": 74050 }, { "epoch": 0.9025264158531681, "grad_norm": 2.4378222878319904, "learning_rate": 5.13085311096857e-07, "loss": 0.7213, "step": 74055 }, { "epoch": 0.9025873520773159, "grad_norm": 3.2087077634109993, "learning_rate": 5.127645926876204e-07, "loss": 0.7443, "step": 74060 }, { "epoch": 0.9026482883014637, "grad_norm": 2.795634476960718, "learning_rate": 5.124438742783836e-07, "loss": 0.7198, "step": 74065 }, { "epoch": 0.9027092245256115, "grad_norm": 2.2451066252244267, "learning_rate": 5.121231558691469e-07, "loss": 0.7142, "step": 74070 }, { "epoch": 0.9027701607497594, "grad_norm": 2.7825292987333126, "learning_rate": 5.118024374599103e-07, "loss": 0.6824, "step": 74075 }, { "epoch": 0.9028310969739071, "grad_norm": 2.417290818175633, "learning_rate": 5.114817190506736e-07, "loss": 0.6832, "step": 74080 }, { "epoch": 0.9028920331980549, "grad_norm": 2.3397691209392235, "learning_rate": 5.111610006414368e-07, "loss": 0.7239, "step": 74085 }, { "epoch": 0.9029529694222027, "grad_norm": 2.390803571961016, "learning_rate": 5.108402822322002e-07, "loss": 0.754, "step": 74090 }, { "epoch": 0.9030139056463505, "grad_norm": 5.204010479677515, "learning_rate": 5.105195638229635e-07, "loss": 0.6589, "step": 74095 }, { "epoch": 0.9030748418704984, "grad_norm": 2.516008288846806, "learning_rate": 5.101988454137268e-07, "loss": 0.663, "step": 74100 }, { "epoch": 0.9031357780946462, "grad_norm": 2.3805708957736162, "learning_rate": 5.098781270044901e-07, "loss": 0.6844, "step": 74105 }, { "epoch": 0.903196714318794, "grad_norm": 1.9458508839233342, "learning_rate": 5.095574085952534e-07, "loss": 0.6493, "step": 74110 }, { "epoch": 0.9032576505429417, "grad_norm": 2.5691840147002263, "learning_rate": 5.092366901860167e-07, "loss": 0.6577, "step": 74115 }, { "epoch": 0.9033185867670895, "grad_norm": 2.054143145217014, "learning_rate": 5.089159717767801e-07, "loss": 0.728, "step": 74120 }, { "epoch": 0.9033795229912374, "grad_norm": 2.308550745984355, "learning_rate": 5.085952533675433e-07, "loss": 0.7321, "step": 74125 }, { "epoch": 0.9034404592153852, "grad_norm": 2.0074568722391883, "learning_rate": 5.082745349583066e-07, "loss": 0.6329, "step": 74130 }, { "epoch": 0.903501395439533, "grad_norm": 2.840280606730584, "learning_rate": 5.0795381654907e-07, "loss": 0.7172, "step": 74135 }, { "epoch": 0.9035623316636808, "grad_norm": 2.757487062462688, "learning_rate": 5.076330981398333e-07, "loss": 0.6819, "step": 74140 }, { "epoch": 0.9036232678878287, "grad_norm": 2.7435456476091815, "learning_rate": 5.073123797305965e-07, "loss": 0.6447, "step": 74145 }, { "epoch": 0.9036842041119764, "grad_norm": 3.304886830837671, "learning_rate": 5.069916613213599e-07, "loss": 0.7269, "step": 74150 }, { "epoch": 0.9037451403361242, "grad_norm": 2.37303164980306, "learning_rate": 5.066709429121232e-07, "loss": 0.7649, "step": 74155 }, { "epoch": 0.903806076560272, "grad_norm": 2.6868599908921187, "learning_rate": 5.063502245028865e-07, "loss": 0.722, "step": 74160 }, { "epoch": 0.9038670127844198, "grad_norm": 2.9525334238137093, "learning_rate": 5.060295060936498e-07, "loss": 0.7075, "step": 74165 }, { "epoch": 0.9039279490085677, "grad_norm": 2.201419798743817, "learning_rate": 5.057087876844132e-07, "loss": 0.7294, "step": 74170 }, { "epoch": 0.9039888852327155, "grad_norm": 2.1268562525787797, "learning_rate": 5.053880692751764e-07, "loss": 0.6918, "step": 74175 }, { "epoch": 0.9040498214568633, "grad_norm": 2.3210131070673277, "learning_rate": 5.050673508659398e-07, "loss": 0.7559, "step": 74180 }, { "epoch": 0.904110757681011, "grad_norm": 2.6017804878204913, "learning_rate": 5.04746632456703e-07, "loss": 0.6833, "step": 74185 }, { "epoch": 0.9041716939051588, "grad_norm": 2.4167348466331204, "learning_rate": 5.044259140474664e-07, "loss": 0.6279, "step": 74190 }, { "epoch": 0.9042326301293067, "grad_norm": 3.3967687018718737, "learning_rate": 5.041051956382297e-07, "loss": 0.6874, "step": 74195 }, { "epoch": 0.9042935663534545, "grad_norm": 2.8401149718902365, "learning_rate": 5.03784477228993e-07, "loss": 0.7072, "step": 74200 }, { "epoch": 0.9043545025776023, "grad_norm": 2.528438889536104, "learning_rate": 5.034637588197563e-07, "loss": 0.6594, "step": 74205 }, { "epoch": 0.9044154388017501, "grad_norm": 2.169789843093869, "learning_rate": 5.031430404105197e-07, "loss": 0.6968, "step": 74210 }, { "epoch": 0.904476375025898, "grad_norm": 2.5165927036052085, "learning_rate": 5.028223220012829e-07, "loss": 0.6551, "step": 74215 }, { "epoch": 0.9045373112500457, "grad_norm": 2.68654410913398, "learning_rate": 5.025016035920462e-07, "loss": 0.7305, "step": 74220 }, { "epoch": 0.9045982474741935, "grad_norm": 2.4278273535117574, "learning_rate": 5.021808851828096e-07, "loss": 0.7086, "step": 74225 }, { "epoch": 0.9046591836983413, "grad_norm": 2.4634763588538457, "learning_rate": 5.018601667735729e-07, "loss": 0.6753, "step": 74230 }, { "epoch": 0.9047201199224891, "grad_norm": 2.4393089658889986, "learning_rate": 5.015394483643361e-07, "loss": 0.7509, "step": 74235 }, { "epoch": 0.904781056146637, "grad_norm": 2.484450748547762, "learning_rate": 5.012187299550995e-07, "loss": 0.7508, "step": 74240 }, { "epoch": 0.9048419923707848, "grad_norm": 2.822043920699717, "learning_rate": 5.008980115458628e-07, "loss": 0.749, "step": 74245 }, { "epoch": 0.9049029285949326, "grad_norm": 1.9758817678133, "learning_rate": 5.005772931366261e-07, "loss": 0.6557, "step": 74250 }, { "epoch": 0.9049638648190803, "grad_norm": 2.4104088379865516, "learning_rate": 5.002565747273894e-07, "loss": 0.6338, "step": 74255 }, { "epoch": 0.9050248010432281, "grad_norm": 2.673815655467153, "learning_rate": 4.999358563181527e-07, "loss": 0.7349, "step": 74260 }, { "epoch": 0.905085737267376, "grad_norm": 2.6798751816996425, "learning_rate": 4.99615137908916e-07, "loss": 0.6883, "step": 74265 }, { "epoch": 0.9051466734915238, "grad_norm": 3.105964077246625, "learning_rate": 4.992944194996794e-07, "loss": 0.7277, "step": 74270 }, { "epoch": 0.9052076097156716, "grad_norm": 2.4205012629928913, "learning_rate": 4.989737010904426e-07, "loss": 0.7333, "step": 74275 }, { "epoch": 0.9052685459398194, "grad_norm": 2.947986797480213, "learning_rate": 4.986529826812059e-07, "loss": 0.706, "step": 74280 }, { "epoch": 0.9053294821639672, "grad_norm": 2.2643708044802033, "learning_rate": 4.983322642719693e-07, "loss": 0.7554, "step": 74285 }, { "epoch": 0.905390418388115, "grad_norm": 2.342324376762972, "learning_rate": 4.980115458627326e-07, "loss": 0.8142, "step": 74290 }, { "epoch": 0.9054513546122628, "grad_norm": 2.676409411557491, "learning_rate": 4.976908274534958e-07, "loss": 0.6801, "step": 74295 }, { "epoch": 0.9055122908364106, "grad_norm": 3.0270182392706717, "learning_rate": 4.973701090442592e-07, "loss": 0.7254, "step": 74300 }, { "epoch": 0.9055732270605584, "grad_norm": 2.3207091259353376, "learning_rate": 4.970493906350225e-07, "loss": 0.6317, "step": 74305 }, { "epoch": 0.9056341632847063, "grad_norm": 2.3961356621343795, "learning_rate": 4.967286722257858e-07, "loss": 0.7138, "step": 74310 }, { "epoch": 0.9056950995088541, "grad_norm": 2.4436769275095815, "learning_rate": 4.96407953816549e-07, "loss": 0.6322, "step": 74315 }, { "epoch": 0.9057560357330018, "grad_norm": 2.3348226003070844, "learning_rate": 4.960872354073124e-07, "loss": 0.7505, "step": 74320 }, { "epoch": 0.9058169719571496, "grad_norm": 1.9243103503613994, "learning_rate": 4.957665169980757e-07, "loss": 0.6942, "step": 74325 }, { "epoch": 0.9058779081812974, "grad_norm": 1.922689711061103, "learning_rate": 4.95445798588839e-07, "loss": 0.7057, "step": 74330 }, { "epoch": 0.9059388444054453, "grad_norm": 3.004865453305153, "learning_rate": 4.951250801796024e-07, "loss": 0.7411, "step": 74335 }, { "epoch": 0.9059997806295931, "grad_norm": 2.299127671893687, "learning_rate": 4.948043617703656e-07, "loss": 0.7066, "step": 74340 }, { "epoch": 0.9060607168537409, "grad_norm": 2.72408550356686, "learning_rate": 4.94483643361129e-07, "loss": 0.6906, "step": 74345 }, { "epoch": 0.9061216530778887, "grad_norm": 2.4973952544370546, "learning_rate": 4.941629249518923e-07, "loss": 0.7535, "step": 74350 }, { "epoch": 0.9061825893020364, "grad_norm": 2.506962840890618, "learning_rate": 4.938422065426556e-07, "loss": 0.692, "step": 74355 }, { "epoch": 0.9062435255261843, "grad_norm": 2.44969087793488, "learning_rate": 4.935214881334188e-07, "loss": 0.7119, "step": 74360 }, { "epoch": 0.9063044617503321, "grad_norm": 2.523910350363338, "learning_rate": 4.932007697241823e-07, "loss": 0.7503, "step": 74365 }, { "epoch": 0.9063653979744799, "grad_norm": 2.401581765208398, "learning_rate": 4.928800513149455e-07, "loss": 0.8002, "step": 74370 }, { "epoch": 0.9064263341986277, "grad_norm": 3.5058916996227003, "learning_rate": 4.925593329057088e-07, "loss": 0.6969, "step": 74375 }, { "epoch": 0.9064872704227755, "grad_norm": 2.6530354170480694, "learning_rate": 4.922386144964722e-07, "loss": 0.7436, "step": 74380 }, { "epoch": 0.9065482066469234, "grad_norm": 2.3810542835921056, "learning_rate": 4.919178960872355e-07, "loss": 0.6516, "step": 74385 }, { "epoch": 0.9066091428710711, "grad_norm": 2.171796070226211, "learning_rate": 4.915971776779987e-07, "loss": 0.684, "step": 74390 }, { "epoch": 0.9066700790952189, "grad_norm": 1.8679746877757122, "learning_rate": 4.912764592687621e-07, "loss": 0.7015, "step": 74395 }, { "epoch": 0.9067310153193667, "grad_norm": 2.235106304359452, "learning_rate": 4.909557408595254e-07, "loss": 0.6593, "step": 74400 }, { "epoch": 0.9067919515435146, "grad_norm": 2.8999602777771316, "learning_rate": 4.906350224502887e-07, "loss": 0.7029, "step": 74405 }, { "epoch": 0.9068528877676624, "grad_norm": 2.3509086349061943, "learning_rate": 4.90314304041052e-07, "loss": 0.6268, "step": 74410 }, { "epoch": 0.9069138239918102, "grad_norm": 3.449532638047288, "learning_rate": 4.899935856318153e-07, "loss": 0.7068, "step": 74415 }, { "epoch": 0.906974760215958, "grad_norm": 3.3612318356071356, "learning_rate": 4.896728672225786e-07, "loss": 0.7895, "step": 74420 }, { "epoch": 0.9070356964401057, "grad_norm": 2.3380649369835833, "learning_rate": 4.89352148813342e-07, "loss": 0.6827, "step": 74425 }, { "epoch": 0.9070966326642536, "grad_norm": 2.5156540987142373, "learning_rate": 4.890314304041052e-07, "loss": 0.6709, "step": 74430 }, { "epoch": 0.9071575688884014, "grad_norm": 1.8492435406355767, "learning_rate": 4.887107119948685e-07, "loss": 0.6899, "step": 74435 }, { "epoch": 0.9072185051125492, "grad_norm": 2.051818538897579, "learning_rate": 4.883899935856319e-07, "loss": 0.7274, "step": 74440 }, { "epoch": 0.907279441336697, "grad_norm": 5.8250042281809, "learning_rate": 4.880692751763952e-07, "loss": 0.7201, "step": 74445 }, { "epoch": 0.9073403775608448, "grad_norm": 2.4250902486418644, "learning_rate": 4.877485567671584e-07, "loss": 0.7517, "step": 74450 }, { "epoch": 0.9074013137849927, "grad_norm": 2.1027555436606415, "learning_rate": 4.874278383579218e-07, "loss": 0.6747, "step": 74455 }, { "epoch": 0.9074622500091404, "grad_norm": 2.151336585864554, "learning_rate": 4.871071199486851e-07, "loss": 0.6384, "step": 74460 }, { "epoch": 0.9075231862332882, "grad_norm": 2.5131319768033302, "learning_rate": 4.867864015394484e-07, "loss": 0.7431, "step": 74465 }, { "epoch": 0.907584122457436, "grad_norm": 2.511792752161031, "learning_rate": 4.864656831302117e-07, "loss": 0.7288, "step": 74470 }, { "epoch": 0.9076450586815838, "grad_norm": 2.3975660512899064, "learning_rate": 4.86144964720975e-07, "loss": 0.6619, "step": 74475 }, { "epoch": 0.9077059949057317, "grad_norm": 2.2471428511027876, "learning_rate": 4.858242463117383e-07, "loss": 0.7547, "step": 74480 }, { "epoch": 0.9077669311298795, "grad_norm": 2.572517462069633, "learning_rate": 4.855035279025017e-07, "loss": 0.6775, "step": 74485 }, { "epoch": 0.9078278673540273, "grad_norm": 2.7663735948296977, "learning_rate": 4.851828094932649e-07, "loss": 0.7183, "step": 74490 }, { "epoch": 0.907888803578175, "grad_norm": 2.586715260967192, "learning_rate": 4.848620910840282e-07, "loss": 0.6209, "step": 74495 }, { "epoch": 0.9079497398023229, "grad_norm": 2.469799175037203, "learning_rate": 4.845413726747916e-07, "loss": 0.7547, "step": 74500 }, { "epoch": 0.9080106760264707, "grad_norm": 2.943913295415256, "learning_rate": 4.842206542655549e-07, "loss": 0.6755, "step": 74505 }, { "epoch": 0.9080716122506185, "grad_norm": 2.530334310016989, "learning_rate": 4.838999358563181e-07, "loss": 0.7238, "step": 74510 }, { "epoch": 0.9081325484747663, "grad_norm": 2.538631527411898, "learning_rate": 4.835792174470815e-07, "loss": 0.6628, "step": 74515 }, { "epoch": 0.9081934846989141, "grad_norm": 2.4172386845330234, "learning_rate": 4.832584990378448e-07, "loss": 0.7405, "step": 74520 }, { "epoch": 0.908254420923062, "grad_norm": 1.9443252329496667, "learning_rate": 4.829377806286081e-07, "loss": 0.7001, "step": 74525 }, { "epoch": 0.9083153571472097, "grad_norm": 2.737867868465688, "learning_rate": 4.826170622193714e-07, "loss": 0.7067, "step": 74530 }, { "epoch": 0.9083762933713575, "grad_norm": 1.7672250911912486, "learning_rate": 4.822963438101347e-07, "loss": 0.6114, "step": 74535 }, { "epoch": 0.9084372295955053, "grad_norm": 2.741878371649692, "learning_rate": 4.81975625400898e-07, "loss": 0.7341, "step": 74540 }, { "epoch": 0.9084981658196531, "grad_norm": 3.2218477556191063, "learning_rate": 4.816549069916614e-07, "loss": 0.6548, "step": 74545 }, { "epoch": 0.908559102043801, "grad_norm": 2.5144043267440828, "learning_rate": 4.813341885824246e-07, "loss": 0.6967, "step": 74550 }, { "epoch": 0.9086200382679488, "grad_norm": 2.2394126923089646, "learning_rate": 4.81013470173188e-07, "loss": 0.679, "step": 74555 }, { "epoch": 0.9086809744920966, "grad_norm": 2.421402824839856, "learning_rate": 4.806927517639513e-07, "loss": 0.7366, "step": 74560 }, { "epoch": 0.9087419107162443, "grad_norm": 2.576842364758172, "learning_rate": 4.803720333547146e-07, "loss": 0.6881, "step": 74565 }, { "epoch": 0.9088028469403922, "grad_norm": 2.92228344037224, "learning_rate": 4.800513149454778e-07, "loss": 0.6734, "step": 74570 }, { "epoch": 0.90886378316454, "grad_norm": 2.5907529458671825, "learning_rate": 4.797305965362413e-07, "loss": 0.6506, "step": 74575 }, { "epoch": 0.9089247193886878, "grad_norm": 2.4832268627770717, "learning_rate": 4.794098781270045e-07, "loss": 0.697, "step": 74580 }, { "epoch": 0.9089856556128356, "grad_norm": 2.4202416790760752, "learning_rate": 4.790891597177678e-07, "loss": 0.7216, "step": 74585 }, { "epoch": 0.9090465918369834, "grad_norm": 2.5491929723284286, "learning_rate": 4.787684413085312e-07, "loss": 0.6708, "step": 74590 }, { "epoch": 0.9091075280611313, "grad_norm": 2.2689419402889204, "learning_rate": 4.784477228992945e-07, "loss": 0.6932, "step": 74595 }, { "epoch": 0.909168464285279, "grad_norm": 2.964832050737843, "learning_rate": 4.781270044900577e-07, "loss": 0.7151, "step": 74600 }, { "epoch": 0.9092294005094268, "grad_norm": 2.4373786828749133, "learning_rate": 4.778062860808211e-07, "loss": 0.7041, "step": 74605 }, { "epoch": 0.9092903367335746, "grad_norm": 3.2974310586023607, "learning_rate": 4.774855676715844e-07, "loss": 0.6763, "step": 74610 }, { "epoch": 0.9093512729577224, "grad_norm": 2.213159611187901, "learning_rate": 4.771648492623477e-07, "loss": 0.7034, "step": 74615 }, { "epoch": 0.9094122091818703, "grad_norm": 2.5351185732887753, "learning_rate": 4.76844130853111e-07, "loss": 0.7607, "step": 74620 }, { "epoch": 0.9094731454060181, "grad_norm": 2.455222495478162, "learning_rate": 4.765234124438743e-07, "loss": 0.7463, "step": 74625 }, { "epoch": 0.9095340816301659, "grad_norm": 6.024308260103688, "learning_rate": 4.762026940346377e-07, "loss": 0.6272, "step": 74630 }, { "epoch": 0.9095950178543136, "grad_norm": 2.5927220474815966, "learning_rate": 4.758819756254009e-07, "loss": 0.6871, "step": 74635 }, { "epoch": 0.9096559540784614, "grad_norm": 2.574489309821621, "learning_rate": 4.755612572161643e-07, "loss": 0.6912, "step": 74640 }, { "epoch": 0.9097168903026093, "grad_norm": 2.680410014976757, "learning_rate": 4.7524053880692753e-07, "loss": 0.6873, "step": 74645 }, { "epoch": 0.9097778265267571, "grad_norm": 2.3449293005247043, "learning_rate": 4.749198203976909e-07, "loss": 0.674, "step": 74650 }, { "epoch": 0.9098387627509049, "grad_norm": 2.083934130207349, "learning_rate": 4.7459910198845415e-07, "loss": 0.755, "step": 74655 }, { "epoch": 0.9098996989750527, "grad_norm": 3.112124807330796, "learning_rate": 4.7427838357921753e-07, "loss": 0.7769, "step": 74660 }, { "epoch": 0.9099606351992006, "grad_norm": 2.5153246507575435, "learning_rate": 4.739576651699808e-07, "loss": 0.7404, "step": 74665 }, { "epoch": 0.9100215714233483, "grad_norm": 2.481422515838497, "learning_rate": 4.7363694676074415e-07, "loss": 0.6813, "step": 74670 }, { "epoch": 0.9100825076474961, "grad_norm": 3.04932221605849, "learning_rate": 4.7331622835150743e-07, "loss": 0.69, "step": 74675 }, { "epoch": 0.9101434438716439, "grad_norm": 2.987073617373093, "learning_rate": 4.7299550994227077e-07, "loss": 0.693, "step": 74680 }, { "epoch": 0.9102043800957917, "grad_norm": 3.874831310036004, "learning_rate": 4.7267479153303405e-07, "loss": 0.6691, "step": 74685 }, { "epoch": 0.9102653163199396, "grad_norm": 2.441434497315268, "learning_rate": 4.723540731237974e-07, "loss": 0.7609, "step": 74690 }, { "epoch": 0.9103262525440874, "grad_norm": 2.643971538183827, "learning_rate": 4.7203335471456066e-07, "loss": 0.7415, "step": 74695 }, { "epoch": 0.9103871887682352, "grad_norm": 2.291936483048033, "learning_rate": 4.71712636305324e-07, "loss": 0.659, "step": 74700 }, { "epoch": 0.9104481249923829, "grad_norm": 2.8989325848825955, "learning_rate": 4.713919178960873e-07, "loss": 0.7022, "step": 74705 }, { "epoch": 0.9105090612165307, "grad_norm": 2.514398527061446, "learning_rate": 4.710711994868506e-07, "loss": 0.7531, "step": 74710 }, { "epoch": 0.9105699974406786, "grad_norm": 2.5358386297575284, "learning_rate": 4.707504810776139e-07, "loss": 0.7118, "step": 74715 }, { "epoch": 0.9106309336648264, "grad_norm": 2.313108972229892, "learning_rate": 4.7042976266837723e-07, "loss": 0.6646, "step": 74720 }, { "epoch": 0.9106918698889742, "grad_norm": 2.3132944296383733, "learning_rate": 4.701090442591405e-07, "loss": 0.7265, "step": 74725 }, { "epoch": 0.910752806113122, "grad_norm": 2.8750906764759634, "learning_rate": 4.6978832584990385e-07, "loss": 0.7621, "step": 74730 }, { "epoch": 0.9108137423372699, "grad_norm": 3.685417095767064, "learning_rate": 4.6946760744066713e-07, "loss": 0.7698, "step": 74735 }, { "epoch": 0.9108746785614176, "grad_norm": 2.5481316508653413, "learning_rate": 4.6914688903143046e-07, "loss": 0.644, "step": 74740 }, { "epoch": 0.9109356147855654, "grad_norm": 1.961011974724956, "learning_rate": 4.6882617062219374e-07, "loss": 0.6986, "step": 74745 }, { "epoch": 0.9109965510097132, "grad_norm": 2.549383647905471, "learning_rate": 4.685054522129571e-07, "loss": 0.7046, "step": 74750 }, { "epoch": 0.911057487233861, "grad_norm": 2.3723818010177546, "learning_rate": 4.6818473380372036e-07, "loss": 0.6277, "step": 74755 }, { "epoch": 0.9111184234580089, "grad_norm": 3.6949901806409007, "learning_rate": 4.678640153944837e-07, "loss": 0.7413, "step": 74760 }, { "epoch": 0.9111793596821567, "grad_norm": 2.8506250989483504, "learning_rate": 4.67543296985247e-07, "loss": 0.7185, "step": 74765 }, { "epoch": 0.9112402959063045, "grad_norm": 2.0777591183683715, "learning_rate": 4.672225785760103e-07, "loss": 0.7099, "step": 74770 }, { "epoch": 0.9113012321304522, "grad_norm": 2.4830126267112713, "learning_rate": 4.669018601667736e-07, "loss": 0.6877, "step": 74775 }, { "epoch": 0.9113621683546, "grad_norm": 2.183735435853133, "learning_rate": 4.665811417575369e-07, "loss": 0.736, "step": 74780 }, { "epoch": 0.9114231045787479, "grad_norm": 2.89027742741854, "learning_rate": 4.662604233483002e-07, "loss": 0.7682, "step": 74785 }, { "epoch": 0.9114840408028957, "grad_norm": 2.7557470800610546, "learning_rate": 4.6593970493906354e-07, "loss": 0.698, "step": 74790 }, { "epoch": 0.9115449770270435, "grad_norm": 2.02230730119286, "learning_rate": 4.656189865298268e-07, "loss": 0.6797, "step": 74795 }, { "epoch": 0.9116059132511913, "grad_norm": 2.367810607880172, "learning_rate": 4.6529826812059016e-07, "loss": 0.7655, "step": 74800 }, { "epoch": 0.9116668494753392, "grad_norm": 2.221774628556317, "learning_rate": 4.6497754971135344e-07, "loss": 0.6527, "step": 74805 }, { "epoch": 0.9117277856994869, "grad_norm": 3.6057180935782576, "learning_rate": 4.6465683130211677e-07, "loss": 0.7253, "step": 74810 }, { "epoch": 0.9117887219236347, "grad_norm": 5.221076844861467, "learning_rate": 4.6433611289288005e-07, "loss": 0.7528, "step": 74815 }, { "epoch": 0.9118496581477825, "grad_norm": 2.8941536242581978, "learning_rate": 4.640153944836434e-07, "loss": 0.7005, "step": 74820 }, { "epoch": 0.9119105943719303, "grad_norm": 4.4169211671580575, "learning_rate": 4.6369467607440667e-07, "loss": 0.7037, "step": 74825 }, { "epoch": 0.9119715305960782, "grad_norm": 2.326341504663347, "learning_rate": 4.6337395766517e-07, "loss": 0.6791, "step": 74830 }, { "epoch": 0.912032466820226, "grad_norm": 2.177940294034775, "learning_rate": 4.630532392559333e-07, "loss": 0.7142, "step": 74835 }, { "epoch": 0.9120934030443738, "grad_norm": 2.2562123172769586, "learning_rate": 4.6273252084669667e-07, "loss": 0.7153, "step": 74840 }, { "epoch": 0.9121543392685215, "grad_norm": 2.128251072908, "learning_rate": 4.624118024374599e-07, "loss": 0.6909, "step": 74845 }, { "epoch": 0.9122152754926693, "grad_norm": 2.1850997020611493, "learning_rate": 4.620910840282233e-07, "loss": 0.7133, "step": 74850 }, { "epoch": 0.9122762117168172, "grad_norm": 2.5526725789622295, "learning_rate": 4.617703656189865e-07, "loss": 0.7102, "step": 74855 }, { "epoch": 0.912337147940965, "grad_norm": 2.4757304214908538, "learning_rate": 4.614496472097499e-07, "loss": 0.7636, "step": 74860 }, { "epoch": 0.9123980841651128, "grad_norm": 2.1457806360550795, "learning_rate": 4.6112892880051313e-07, "loss": 0.7379, "step": 74865 }, { "epoch": 0.9124590203892606, "grad_norm": 2.7970939239442623, "learning_rate": 4.608082103912765e-07, "loss": 0.7339, "step": 74870 }, { "epoch": 0.9125199566134085, "grad_norm": 3.0106251424286374, "learning_rate": 4.604874919820398e-07, "loss": 0.8151, "step": 74875 }, { "epoch": 0.9125808928375562, "grad_norm": 2.5092270878003573, "learning_rate": 4.6016677357280314e-07, "loss": 0.7756, "step": 74880 }, { "epoch": 0.912641829061704, "grad_norm": 2.702044212580317, "learning_rate": 4.598460551635664e-07, "loss": 0.6974, "step": 74885 }, { "epoch": 0.9127027652858518, "grad_norm": 2.238769818593151, "learning_rate": 4.5952533675432975e-07, "loss": 0.7531, "step": 74890 }, { "epoch": 0.9127637015099996, "grad_norm": 3.903316048348651, "learning_rate": 4.5920461834509303e-07, "loss": 0.7443, "step": 74895 }, { "epoch": 0.9128246377341475, "grad_norm": 2.8362238488259517, "learning_rate": 4.5888389993585637e-07, "loss": 0.7431, "step": 74900 }, { "epoch": 0.9128855739582953, "grad_norm": 2.1079206998833007, "learning_rate": 4.5856318152661965e-07, "loss": 0.7543, "step": 74905 }, { "epoch": 0.9129465101824431, "grad_norm": 2.4690461679837967, "learning_rate": 4.58242463117383e-07, "loss": 0.6479, "step": 74910 }, { "epoch": 0.9130074464065908, "grad_norm": 2.66968221632954, "learning_rate": 4.5792174470814627e-07, "loss": 0.7193, "step": 74915 }, { "epoch": 0.9130683826307386, "grad_norm": 3.165727910848034, "learning_rate": 4.576010262989096e-07, "loss": 0.7989, "step": 74920 }, { "epoch": 0.9131293188548865, "grad_norm": 2.5985183908281, "learning_rate": 4.5728030788967293e-07, "loss": 0.7578, "step": 74925 }, { "epoch": 0.9131902550790343, "grad_norm": 2.5413973059562043, "learning_rate": 4.569595894804362e-07, "loss": 0.7683, "step": 74930 }, { "epoch": 0.9132511913031821, "grad_norm": 1.9951827569440514, "learning_rate": 4.5663887107119955e-07, "loss": 0.6009, "step": 74935 }, { "epoch": 0.9133121275273299, "grad_norm": 2.666276553877675, "learning_rate": 4.5631815266196283e-07, "loss": 0.7692, "step": 74940 }, { "epoch": 0.9133730637514778, "grad_norm": 2.397539548530579, "learning_rate": 4.5599743425272617e-07, "loss": 0.7196, "step": 74945 }, { "epoch": 0.9134339999756255, "grad_norm": 1.9880629002953905, "learning_rate": 4.5567671584348945e-07, "loss": 0.6798, "step": 74950 }, { "epoch": 0.9134949361997733, "grad_norm": 2.297524529956151, "learning_rate": 4.553559974342528e-07, "loss": 0.7262, "step": 74955 }, { "epoch": 0.9135558724239211, "grad_norm": 2.488452430179477, "learning_rate": 4.5503527902501606e-07, "loss": 0.734, "step": 74960 }, { "epoch": 0.9136168086480689, "grad_norm": 2.9911501417471666, "learning_rate": 4.547145606157794e-07, "loss": 0.7198, "step": 74965 }, { "epoch": 0.9136777448722168, "grad_norm": 2.4292863302995795, "learning_rate": 4.543938422065427e-07, "loss": 0.7769, "step": 74970 }, { "epoch": 0.9137386810963646, "grad_norm": 2.8023254565160443, "learning_rate": 4.54073123797306e-07, "loss": 0.7012, "step": 74975 }, { "epoch": 0.9137996173205124, "grad_norm": 2.4366939405400236, "learning_rate": 4.537524053880693e-07, "loss": 0.7292, "step": 74980 }, { "epoch": 0.9138605535446601, "grad_norm": 2.920199859512733, "learning_rate": 4.5343168697883263e-07, "loss": 0.6805, "step": 74985 }, { "epoch": 0.9139214897688079, "grad_norm": 2.675300360151054, "learning_rate": 4.531109685695959e-07, "loss": 0.7467, "step": 74990 }, { "epoch": 0.9139824259929558, "grad_norm": 2.669250883578298, "learning_rate": 4.5279025016035925e-07, "loss": 0.7284, "step": 74995 }, { "epoch": 0.9140433622171036, "grad_norm": 2.5516524659989255, "learning_rate": 4.5246953175112253e-07, "loss": 0.6892, "step": 75000 }, { "epoch": 0.9141042984412514, "grad_norm": 2.2041297425940254, "learning_rate": 4.5214881334188586e-07, "loss": 0.6518, "step": 75005 }, { "epoch": 0.9141652346653992, "grad_norm": 2.5215217956234226, "learning_rate": 4.5182809493264914e-07, "loss": 0.7197, "step": 75010 }, { "epoch": 0.914226170889547, "grad_norm": 2.3269869992926484, "learning_rate": 4.5150737652341253e-07, "loss": 0.6984, "step": 75015 }, { "epoch": 0.9142871071136948, "grad_norm": 2.432437490966155, "learning_rate": 4.5118665811417576e-07, "loss": 0.6348, "step": 75020 }, { "epoch": 0.9143480433378426, "grad_norm": 2.2937103011828155, "learning_rate": 4.5086593970493915e-07, "loss": 0.741, "step": 75025 }, { "epoch": 0.9144089795619904, "grad_norm": 2.8730903857982057, "learning_rate": 4.505452212957024e-07, "loss": 0.7865, "step": 75030 }, { "epoch": 0.9144699157861382, "grad_norm": 2.206468568787552, "learning_rate": 4.5022450288646576e-07, "loss": 0.7764, "step": 75035 }, { "epoch": 0.914530852010286, "grad_norm": 2.4861690893346364, "learning_rate": 4.49903784477229e-07, "loss": 0.6908, "step": 75040 }, { "epoch": 0.9145917882344339, "grad_norm": 2.751532178884095, "learning_rate": 4.495830660679924e-07, "loss": 0.7317, "step": 75045 }, { "epoch": 0.9146527244585817, "grad_norm": 2.220080525863649, "learning_rate": 4.4926234765875566e-07, "loss": 0.7669, "step": 75050 }, { "epoch": 0.9147136606827294, "grad_norm": 2.3518100335919487, "learning_rate": 4.48941629249519e-07, "loss": 0.7107, "step": 75055 }, { "epoch": 0.9147745969068772, "grad_norm": 2.835586993287377, "learning_rate": 4.486209108402823e-07, "loss": 0.7794, "step": 75060 }, { "epoch": 0.9148355331310251, "grad_norm": 2.3310513863462856, "learning_rate": 4.483001924310456e-07, "loss": 0.6923, "step": 75065 }, { "epoch": 0.9148964693551729, "grad_norm": 2.499762239920655, "learning_rate": 4.479794740218089e-07, "loss": 0.6582, "step": 75070 }, { "epoch": 0.9149574055793207, "grad_norm": 2.254021056781153, "learning_rate": 4.476587556125722e-07, "loss": 0.6781, "step": 75075 }, { "epoch": 0.9150183418034685, "grad_norm": 2.5722788699604364, "learning_rate": 4.473380372033355e-07, "loss": 0.7494, "step": 75080 }, { "epoch": 0.9150792780276163, "grad_norm": 2.0505173974971234, "learning_rate": 4.4701731879409884e-07, "loss": 0.7184, "step": 75085 }, { "epoch": 0.9151402142517641, "grad_norm": 2.426539138642407, "learning_rate": 4.466966003848621e-07, "loss": 0.7243, "step": 75090 }, { "epoch": 0.9152011504759119, "grad_norm": 2.6362416384310294, "learning_rate": 4.4637588197562546e-07, "loss": 0.7135, "step": 75095 }, { "epoch": 0.9152620867000597, "grad_norm": 4.557986540057934, "learning_rate": 4.4605516356638874e-07, "loss": 0.7068, "step": 75100 }, { "epoch": 0.9153230229242075, "grad_norm": 2.852690009534867, "learning_rate": 4.4573444515715207e-07, "loss": 0.7531, "step": 75105 }, { "epoch": 0.9153839591483554, "grad_norm": 2.386995003230738, "learning_rate": 4.4541372674791535e-07, "loss": 0.7552, "step": 75110 }, { "epoch": 0.9154448953725032, "grad_norm": 2.441039459103039, "learning_rate": 4.450930083386787e-07, "loss": 0.7441, "step": 75115 }, { "epoch": 0.915505831596651, "grad_norm": 3.0411708393430645, "learning_rate": 4.4477228992944197e-07, "loss": 0.6729, "step": 75120 }, { "epoch": 0.9155667678207987, "grad_norm": 2.22121362945975, "learning_rate": 4.444515715202053e-07, "loss": 0.6678, "step": 75125 }, { "epoch": 0.9156277040449465, "grad_norm": 2.08118385349535, "learning_rate": 4.441308531109686e-07, "loss": 0.7175, "step": 75130 }, { "epoch": 0.9156886402690944, "grad_norm": 2.8520525040544578, "learning_rate": 4.438101347017319e-07, "loss": 0.6607, "step": 75135 }, { "epoch": 0.9157495764932422, "grad_norm": 2.245850203515474, "learning_rate": 4.434894162924952e-07, "loss": 0.693, "step": 75140 }, { "epoch": 0.91581051271739, "grad_norm": 2.3211374452269693, "learning_rate": 4.4316869788325854e-07, "loss": 0.7621, "step": 75145 }, { "epoch": 0.9158714489415378, "grad_norm": 2.6826612729761483, "learning_rate": 4.428479794740218e-07, "loss": 0.7003, "step": 75150 }, { "epoch": 0.9159323851656856, "grad_norm": 2.482309336291415, "learning_rate": 4.4252726106478515e-07, "loss": 0.6881, "step": 75155 }, { "epoch": 0.9159933213898334, "grad_norm": 3.1300174226929154, "learning_rate": 4.4220654265554843e-07, "loss": 0.7095, "step": 75160 }, { "epoch": 0.9160542576139812, "grad_norm": 3.1461351434313722, "learning_rate": 4.4188582424631177e-07, "loss": 0.732, "step": 75165 }, { "epoch": 0.916115193838129, "grad_norm": 2.845064399425084, "learning_rate": 4.4156510583707505e-07, "loss": 0.7454, "step": 75170 }, { "epoch": 0.9161761300622768, "grad_norm": 3.3024741115515206, "learning_rate": 4.412443874278384e-07, "loss": 0.6635, "step": 75175 }, { "epoch": 0.9162370662864247, "grad_norm": 2.328436722229763, "learning_rate": 4.4092366901860167e-07, "loss": 0.7567, "step": 75180 }, { "epoch": 0.9162980025105725, "grad_norm": 2.105687988206033, "learning_rate": 4.40602950609365e-07, "loss": 0.6484, "step": 75185 }, { "epoch": 0.9163589387347203, "grad_norm": 3.3722115594764426, "learning_rate": 4.402822322001283e-07, "loss": 0.7431, "step": 75190 }, { "epoch": 0.916419874958868, "grad_norm": 2.472327278550293, "learning_rate": 4.399615137908916e-07, "loss": 0.7273, "step": 75195 }, { "epoch": 0.9164808111830158, "grad_norm": 2.520723669320648, "learning_rate": 4.396407953816549e-07, "loss": 0.6777, "step": 75200 }, { "epoch": 0.9165417474071637, "grad_norm": 2.2318191299013317, "learning_rate": 4.3932007697241823e-07, "loss": 0.7088, "step": 75205 }, { "epoch": 0.9166026836313115, "grad_norm": 2.7244015121754517, "learning_rate": 4.389993585631815e-07, "loss": 0.65, "step": 75210 }, { "epoch": 0.9166636198554593, "grad_norm": 2.4071754868226605, "learning_rate": 4.3867864015394485e-07, "loss": 0.7255, "step": 75215 }, { "epoch": 0.9167245560796071, "grad_norm": 3.245719588871178, "learning_rate": 4.3835792174470813e-07, "loss": 0.774, "step": 75220 }, { "epoch": 0.916785492303755, "grad_norm": 2.5213919007311945, "learning_rate": 4.380372033354715e-07, "loss": 0.7378, "step": 75225 }, { "epoch": 0.9168464285279027, "grad_norm": 2.1173731961806483, "learning_rate": 4.3771648492623485e-07, "loss": 0.7584, "step": 75230 }, { "epoch": 0.9169073647520505, "grad_norm": 2.130485078892219, "learning_rate": 4.3739576651699813e-07, "loss": 0.7077, "step": 75235 }, { "epoch": 0.9169683009761983, "grad_norm": 2.5381595152221963, "learning_rate": 4.3707504810776147e-07, "loss": 0.7579, "step": 75240 }, { "epoch": 0.9170292372003461, "grad_norm": 2.925947486408773, "learning_rate": 4.3675432969852475e-07, "loss": 0.6295, "step": 75245 }, { "epoch": 0.917090173424494, "grad_norm": 2.958806474385472, "learning_rate": 4.364336112892881e-07, "loss": 0.7537, "step": 75250 }, { "epoch": 0.9171511096486418, "grad_norm": 3.200128275268916, "learning_rate": 4.3611289288005136e-07, "loss": 0.6747, "step": 75255 }, { "epoch": 0.9172120458727895, "grad_norm": 1.8033862111086116, "learning_rate": 4.357921744708147e-07, "loss": 0.7159, "step": 75260 }, { "epoch": 0.9172729820969373, "grad_norm": 2.6420065726516855, "learning_rate": 4.35471456061578e-07, "loss": 0.7291, "step": 75265 }, { "epoch": 0.9173339183210851, "grad_norm": 3.058736603403392, "learning_rate": 4.351507376523413e-07, "loss": 0.7072, "step": 75270 }, { "epoch": 0.917394854545233, "grad_norm": 2.4001580428584206, "learning_rate": 4.348300192431046e-07, "loss": 0.6678, "step": 75275 }, { "epoch": 0.9174557907693808, "grad_norm": 2.4973670390796223, "learning_rate": 4.3450930083386793e-07, "loss": 0.7694, "step": 75280 }, { "epoch": 0.9175167269935286, "grad_norm": 2.2331861371979764, "learning_rate": 4.341885824246312e-07, "loss": 0.6945, "step": 75285 }, { "epoch": 0.9175776632176764, "grad_norm": 2.752936279067423, "learning_rate": 4.3386786401539455e-07, "loss": 0.708, "step": 75290 }, { "epoch": 0.9176385994418241, "grad_norm": 2.729298393376369, "learning_rate": 4.3354714560615783e-07, "loss": 0.6969, "step": 75295 }, { "epoch": 0.917699535665972, "grad_norm": 2.2026394774685043, "learning_rate": 4.3322642719692116e-07, "loss": 0.7388, "step": 75300 }, { "epoch": 0.9177604718901198, "grad_norm": 2.1057588306565487, "learning_rate": 4.3290570878768444e-07, "loss": 0.6705, "step": 75305 }, { "epoch": 0.9178214081142676, "grad_norm": 2.8710492686961735, "learning_rate": 4.325849903784478e-07, "loss": 0.7262, "step": 75310 }, { "epoch": 0.9178823443384154, "grad_norm": 2.46123723589629, "learning_rate": 4.3226427196921106e-07, "loss": 0.7457, "step": 75315 }, { "epoch": 0.9179432805625632, "grad_norm": 3.328510356073932, "learning_rate": 4.319435535599744e-07, "loss": 0.7068, "step": 75320 }, { "epoch": 0.9180042167867111, "grad_norm": 2.6686318149539754, "learning_rate": 4.316228351507377e-07, "loss": 0.6023, "step": 75325 }, { "epoch": 0.9180651530108588, "grad_norm": 2.528126486808314, "learning_rate": 4.31302116741501e-07, "loss": 0.7096, "step": 75330 }, { "epoch": 0.9181260892350066, "grad_norm": 2.621642027889952, "learning_rate": 4.309813983322643e-07, "loss": 0.7029, "step": 75335 }, { "epoch": 0.9181870254591544, "grad_norm": 2.402771607562862, "learning_rate": 4.306606799230276e-07, "loss": 0.6916, "step": 75340 }, { "epoch": 0.9182479616833022, "grad_norm": 2.4324096064000815, "learning_rate": 4.303399615137909e-07, "loss": 0.7202, "step": 75345 }, { "epoch": 0.9183088979074501, "grad_norm": 1.9698404067401452, "learning_rate": 4.3001924310455424e-07, "loss": 0.5856, "step": 75350 }, { "epoch": 0.9183698341315979, "grad_norm": 2.225312361212136, "learning_rate": 4.296985246953175e-07, "loss": 0.7492, "step": 75355 }, { "epoch": 0.9184307703557457, "grad_norm": 3.5809290523202497, "learning_rate": 4.2937780628608086e-07, "loss": 0.7059, "step": 75360 }, { "epoch": 0.9184917065798934, "grad_norm": 2.415428615532759, "learning_rate": 4.2905708787684414e-07, "loss": 0.82, "step": 75365 }, { "epoch": 0.9185526428040413, "grad_norm": 2.488287611816699, "learning_rate": 4.287363694676075e-07, "loss": 0.7285, "step": 75370 }, { "epoch": 0.9186135790281891, "grad_norm": 2.415579410485473, "learning_rate": 4.2841565105837076e-07, "loss": 0.6304, "step": 75375 }, { "epoch": 0.9186745152523369, "grad_norm": 2.3189293370488673, "learning_rate": 4.280949326491341e-07, "loss": 0.7364, "step": 75380 }, { "epoch": 0.9187354514764847, "grad_norm": 2.2776958077632474, "learning_rate": 4.2777421423989737e-07, "loss": 0.7561, "step": 75385 }, { "epoch": 0.9187963877006325, "grad_norm": 2.3733853686512454, "learning_rate": 4.274534958306607e-07, "loss": 0.6616, "step": 75390 }, { "epoch": 0.9188573239247804, "grad_norm": 2.7629153690352903, "learning_rate": 4.27132777421424e-07, "loss": 0.6784, "step": 75395 }, { "epoch": 0.9189182601489281, "grad_norm": 2.333484566146661, "learning_rate": 4.268120590121874e-07, "loss": 0.6814, "step": 75400 }, { "epoch": 0.9189791963730759, "grad_norm": 2.285178938878037, "learning_rate": 4.264913406029506e-07, "loss": 0.6184, "step": 75405 }, { "epoch": 0.9190401325972237, "grad_norm": 2.494732965985156, "learning_rate": 4.26170622193714e-07, "loss": 0.6581, "step": 75410 }, { "epoch": 0.9191010688213715, "grad_norm": 2.9500656310114235, "learning_rate": 4.258499037844772e-07, "loss": 0.697, "step": 75415 }, { "epoch": 0.9191620050455194, "grad_norm": 2.6878370499246484, "learning_rate": 4.255291853752406e-07, "loss": 0.7241, "step": 75420 }, { "epoch": 0.9192229412696672, "grad_norm": 2.2121768801018726, "learning_rate": 4.2520846696600383e-07, "loss": 0.6925, "step": 75425 }, { "epoch": 0.919283877493815, "grad_norm": 2.451415015799348, "learning_rate": 4.248877485567672e-07, "loss": 0.6468, "step": 75430 }, { "epoch": 0.9193448137179627, "grad_norm": 2.413687490085307, "learning_rate": 4.245670301475305e-07, "loss": 0.6959, "step": 75435 }, { "epoch": 0.9194057499421106, "grad_norm": 2.6609880194726143, "learning_rate": 4.2424631173829384e-07, "loss": 0.6738, "step": 75440 }, { "epoch": 0.9194666861662584, "grad_norm": 2.49514631221866, "learning_rate": 4.239255933290571e-07, "loss": 0.7292, "step": 75445 }, { "epoch": 0.9195276223904062, "grad_norm": 2.7638706705311424, "learning_rate": 4.2360487491982045e-07, "loss": 0.7635, "step": 75450 }, { "epoch": 0.919588558614554, "grad_norm": 3.4002512998030188, "learning_rate": 4.2328415651058374e-07, "loss": 0.7235, "step": 75455 }, { "epoch": 0.9196494948387018, "grad_norm": 2.401493146715304, "learning_rate": 4.2296343810134707e-07, "loss": 0.6915, "step": 75460 }, { "epoch": 0.9197104310628497, "grad_norm": 2.5520787885891347, "learning_rate": 4.2264271969211035e-07, "loss": 0.7722, "step": 75465 }, { "epoch": 0.9197713672869974, "grad_norm": 2.329089877372336, "learning_rate": 4.223220012828737e-07, "loss": 0.753, "step": 75470 }, { "epoch": 0.9198323035111452, "grad_norm": 2.194639876236992, "learning_rate": 4.2200128287363697e-07, "loss": 0.6638, "step": 75475 }, { "epoch": 0.919893239735293, "grad_norm": 4.746884235713024, "learning_rate": 4.216805644644003e-07, "loss": 0.8099, "step": 75480 }, { "epoch": 0.9199541759594408, "grad_norm": 2.4524769772704764, "learning_rate": 4.213598460551636e-07, "loss": 0.6846, "step": 75485 }, { "epoch": 0.9200151121835887, "grad_norm": 2.6794467118463507, "learning_rate": 4.210391276459269e-07, "loss": 0.7034, "step": 75490 }, { "epoch": 0.9200760484077365, "grad_norm": 2.3035917363130176, "learning_rate": 4.207184092366902e-07, "loss": 0.7546, "step": 75495 }, { "epoch": 0.9201369846318843, "grad_norm": 2.562109786686738, "learning_rate": 4.2039769082745353e-07, "loss": 0.7051, "step": 75500 }, { "epoch": 0.920197920856032, "grad_norm": 2.8856244690314155, "learning_rate": 4.200769724182168e-07, "loss": 0.7477, "step": 75505 }, { "epoch": 0.9202588570801798, "grad_norm": 3.2892913039907894, "learning_rate": 4.1975625400898015e-07, "loss": 0.7462, "step": 75510 }, { "epoch": 0.9203197933043277, "grad_norm": 2.675910806299564, "learning_rate": 4.1943553559974343e-07, "loss": 0.7917, "step": 75515 }, { "epoch": 0.9203807295284755, "grad_norm": 2.2992347908725534, "learning_rate": 4.1911481719050676e-07, "loss": 0.6855, "step": 75520 }, { "epoch": 0.9204416657526233, "grad_norm": 3.2054075663765422, "learning_rate": 4.1879409878127005e-07, "loss": 0.6321, "step": 75525 }, { "epoch": 0.9205026019767711, "grad_norm": 2.3576780142345304, "learning_rate": 4.184733803720334e-07, "loss": 0.6793, "step": 75530 }, { "epoch": 0.920563538200919, "grad_norm": 3.1675546744747343, "learning_rate": 4.181526619627967e-07, "loss": 0.712, "step": 75535 }, { "epoch": 0.9206244744250667, "grad_norm": 2.492871900195609, "learning_rate": 4.1783194355356e-07, "loss": 0.6914, "step": 75540 }, { "epoch": 0.9206854106492145, "grad_norm": 2.256870334354645, "learning_rate": 4.1751122514432333e-07, "loss": 0.7611, "step": 75545 }, { "epoch": 0.9207463468733623, "grad_norm": 2.9172355057996118, "learning_rate": 4.171905067350866e-07, "loss": 0.7214, "step": 75550 }, { "epoch": 0.9208072830975101, "grad_norm": 2.395435210413219, "learning_rate": 4.1686978832584995e-07, "loss": 0.6613, "step": 75555 }, { "epoch": 0.920868219321658, "grad_norm": 2.823852120724572, "learning_rate": 4.1654906991661323e-07, "loss": 0.7146, "step": 75560 }, { "epoch": 0.9209291555458058, "grad_norm": 2.1471100443501543, "learning_rate": 4.1622835150737656e-07, "loss": 0.6739, "step": 75565 }, { "epoch": 0.9209900917699536, "grad_norm": 2.760215586680165, "learning_rate": 4.1590763309813984e-07, "loss": 0.7556, "step": 75570 }, { "epoch": 0.9210510279941013, "grad_norm": 2.8214439443567922, "learning_rate": 4.1558691468890323e-07, "loss": 0.6241, "step": 75575 }, { "epoch": 0.9211119642182491, "grad_norm": 2.9579533667942344, "learning_rate": 4.1526619627966646e-07, "loss": 0.7581, "step": 75580 }, { "epoch": 0.921172900442397, "grad_norm": 2.220858617402152, "learning_rate": 4.1494547787042985e-07, "loss": 0.6775, "step": 75585 }, { "epoch": 0.9212338366665448, "grad_norm": 3.3515597533877033, "learning_rate": 4.146247594611931e-07, "loss": 0.673, "step": 75590 }, { "epoch": 0.9212947728906926, "grad_norm": 2.613430252354799, "learning_rate": 4.1430404105195646e-07, "loss": 0.701, "step": 75595 }, { "epoch": 0.9213557091148404, "grad_norm": 2.365543040363089, "learning_rate": 4.139833226427197e-07, "loss": 0.7461, "step": 75600 }, { "epoch": 0.9214166453389883, "grad_norm": 2.314083490495841, "learning_rate": 4.136626042334831e-07, "loss": 0.7226, "step": 75605 }, { "epoch": 0.921477581563136, "grad_norm": 2.7153617598531667, "learning_rate": 4.1334188582424636e-07, "loss": 0.7063, "step": 75610 }, { "epoch": 0.9215385177872838, "grad_norm": 2.1104537659141815, "learning_rate": 4.130211674150097e-07, "loss": 0.649, "step": 75615 }, { "epoch": 0.9215994540114316, "grad_norm": 1.9377718205234267, "learning_rate": 4.12700449005773e-07, "loss": 0.6824, "step": 75620 }, { "epoch": 0.9216603902355794, "grad_norm": 2.283304921211555, "learning_rate": 4.123797305965363e-07, "loss": 0.6005, "step": 75625 }, { "epoch": 0.9217213264597273, "grad_norm": 2.999629407533403, "learning_rate": 4.120590121872996e-07, "loss": 0.6944, "step": 75630 }, { "epoch": 0.9217822626838751, "grad_norm": 2.397712571461247, "learning_rate": 4.1173829377806293e-07, "loss": 0.6741, "step": 75635 }, { "epoch": 0.9218431989080229, "grad_norm": 2.400985251114525, "learning_rate": 4.114175753688262e-07, "loss": 0.7249, "step": 75640 }, { "epoch": 0.9219041351321706, "grad_norm": 3.0330648117452728, "learning_rate": 4.1109685695958954e-07, "loss": 0.7406, "step": 75645 }, { "epoch": 0.9219650713563184, "grad_norm": 3.1483786581388933, "learning_rate": 4.107761385503528e-07, "loss": 0.6907, "step": 75650 }, { "epoch": 0.9220260075804663, "grad_norm": 3.081840369006584, "learning_rate": 4.1045542014111616e-07, "loss": 0.7069, "step": 75655 }, { "epoch": 0.9220869438046141, "grad_norm": 3.0721397107813018, "learning_rate": 4.1013470173187944e-07, "loss": 0.75, "step": 75660 }, { "epoch": 0.9221478800287619, "grad_norm": 2.7546404376708447, "learning_rate": 4.098139833226428e-07, "loss": 0.7444, "step": 75665 }, { "epoch": 0.9222088162529097, "grad_norm": 3.1812364458697524, "learning_rate": 4.0949326491340606e-07, "loss": 0.7101, "step": 75670 }, { "epoch": 0.9222697524770576, "grad_norm": 2.4656013811348783, "learning_rate": 4.091725465041694e-07, "loss": 0.6978, "step": 75675 }, { "epoch": 0.9223306887012053, "grad_norm": 3.6943757002968423, "learning_rate": 4.0885182809493267e-07, "loss": 0.705, "step": 75680 }, { "epoch": 0.9223916249253531, "grad_norm": 2.6225249103234627, "learning_rate": 4.08531109685696e-07, "loss": 0.7201, "step": 75685 }, { "epoch": 0.9224525611495009, "grad_norm": 2.3228006387570166, "learning_rate": 4.082103912764593e-07, "loss": 0.7009, "step": 75690 }, { "epoch": 0.9225134973736487, "grad_norm": 1.9198209178990706, "learning_rate": 4.078896728672226e-07, "loss": 0.7138, "step": 75695 }, { "epoch": 0.9225744335977966, "grad_norm": 2.3413230863465753, "learning_rate": 4.075689544579859e-07, "loss": 0.7012, "step": 75700 }, { "epoch": 0.9226353698219444, "grad_norm": 2.574361306559665, "learning_rate": 4.0724823604874924e-07, "loss": 0.7095, "step": 75705 }, { "epoch": 0.9226963060460922, "grad_norm": 2.701191613366126, "learning_rate": 4.069275176395125e-07, "loss": 0.7299, "step": 75710 }, { "epoch": 0.9227572422702399, "grad_norm": 2.8673755309204063, "learning_rate": 4.0660679923027585e-07, "loss": 0.7206, "step": 75715 }, { "epoch": 0.9228181784943877, "grad_norm": 2.1096895423021653, "learning_rate": 4.0628608082103914e-07, "loss": 0.6713, "step": 75720 }, { "epoch": 0.9228791147185356, "grad_norm": 2.077611192567133, "learning_rate": 4.0596536241180247e-07, "loss": 0.6516, "step": 75725 }, { "epoch": 0.9229400509426834, "grad_norm": 3.136597293713886, "learning_rate": 4.0564464400256575e-07, "loss": 0.7105, "step": 75730 }, { "epoch": 0.9230009871668312, "grad_norm": 3.1157597910720267, "learning_rate": 4.053239255933291e-07, "loss": 0.7477, "step": 75735 }, { "epoch": 0.923061923390979, "grad_norm": 2.5585790664262067, "learning_rate": 4.0500320718409237e-07, "loss": 0.7232, "step": 75740 }, { "epoch": 0.9231228596151269, "grad_norm": 2.4672053373516554, "learning_rate": 4.046824887748557e-07, "loss": 0.7472, "step": 75745 }, { "epoch": 0.9231837958392746, "grad_norm": 2.9928311273030257, "learning_rate": 4.04361770365619e-07, "loss": 0.7034, "step": 75750 }, { "epoch": 0.9232447320634224, "grad_norm": 2.1688632684287876, "learning_rate": 4.040410519563823e-07, "loss": 0.7553, "step": 75755 }, { "epoch": 0.9233056682875702, "grad_norm": 2.462289292882147, "learning_rate": 4.037203335471456e-07, "loss": 0.7002, "step": 75760 }, { "epoch": 0.923366604511718, "grad_norm": 2.394727904209992, "learning_rate": 4.0339961513790893e-07, "loss": 0.7108, "step": 75765 }, { "epoch": 0.9234275407358659, "grad_norm": 2.5316705021217447, "learning_rate": 4.030788967286722e-07, "loss": 0.645, "step": 75770 }, { "epoch": 0.9234884769600137, "grad_norm": 2.412248683465593, "learning_rate": 4.0275817831943555e-07, "loss": 0.7052, "step": 75775 }, { "epoch": 0.9235494131841615, "grad_norm": 2.088063949152179, "learning_rate": 4.0243745991019883e-07, "loss": 0.7722, "step": 75780 }, { "epoch": 0.9236103494083092, "grad_norm": 2.573272896626526, "learning_rate": 4.021167415009622e-07, "loss": 0.7339, "step": 75785 }, { "epoch": 0.923671285632457, "grad_norm": 2.395738829166575, "learning_rate": 4.0179602309172545e-07, "loss": 0.6684, "step": 75790 }, { "epoch": 0.9237322218566049, "grad_norm": 2.5232407063052267, "learning_rate": 4.0147530468248883e-07, "loss": 0.7175, "step": 75795 }, { "epoch": 0.9237931580807527, "grad_norm": 3.0181415573059884, "learning_rate": 4.0115458627325206e-07, "loss": 0.7332, "step": 75800 }, { "epoch": 0.9238540943049005, "grad_norm": 2.740652625011262, "learning_rate": 4.0083386786401545e-07, "loss": 0.696, "step": 75805 }, { "epoch": 0.9239150305290483, "grad_norm": 2.502575107845618, "learning_rate": 4.005131494547787e-07, "loss": 0.6875, "step": 75810 }, { "epoch": 0.9239759667531962, "grad_norm": 3.4168872749873325, "learning_rate": 4.0019243104554207e-07, "loss": 0.7658, "step": 75815 }, { "epoch": 0.9240369029773439, "grad_norm": 2.9867990355694336, "learning_rate": 3.9987171263630535e-07, "loss": 0.7407, "step": 75820 }, { "epoch": 0.9240978392014917, "grad_norm": 2.5714993350275743, "learning_rate": 3.995509942270687e-07, "loss": 0.7233, "step": 75825 }, { "epoch": 0.9241587754256395, "grad_norm": 2.537671528483247, "learning_rate": 3.9923027581783196e-07, "loss": 0.7703, "step": 75830 }, { "epoch": 0.9242197116497873, "grad_norm": 2.5255387268919063, "learning_rate": 3.989095574085953e-07, "loss": 0.7434, "step": 75835 }, { "epoch": 0.9242806478739352, "grad_norm": 2.560481962075527, "learning_rate": 3.9858883899935863e-07, "loss": 0.7007, "step": 75840 }, { "epoch": 0.924341584098083, "grad_norm": 2.964190818700658, "learning_rate": 3.982681205901219e-07, "loss": 0.7354, "step": 75845 }, { "epoch": 0.9244025203222308, "grad_norm": 2.5568147715749316, "learning_rate": 3.9794740218088525e-07, "loss": 0.6662, "step": 75850 }, { "epoch": 0.9244634565463785, "grad_norm": 2.934005105105734, "learning_rate": 3.9762668377164853e-07, "loss": 0.7447, "step": 75855 }, { "epoch": 0.9245243927705263, "grad_norm": 2.5395553353821416, "learning_rate": 3.9730596536241186e-07, "loss": 0.6879, "step": 75860 }, { "epoch": 0.9245853289946742, "grad_norm": 2.411771661512256, "learning_rate": 3.9698524695317515e-07, "loss": 0.6637, "step": 75865 }, { "epoch": 0.924646265218822, "grad_norm": 2.45970526178455, "learning_rate": 3.966645285439385e-07, "loss": 0.6704, "step": 75870 }, { "epoch": 0.9247072014429698, "grad_norm": 2.288461429377542, "learning_rate": 3.9634381013470176e-07, "loss": 0.7054, "step": 75875 }, { "epoch": 0.9247681376671176, "grad_norm": 2.247829869557551, "learning_rate": 3.960230917254651e-07, "loss": 0.7806, "step": 75880 }, { "epoch": 0.9248290738912655, "grad_norm": 2.0643770209997347, "learning_rate": 3.957023733162284e-07, "loss": 0.6696, "step": 75885 }, { "epoch": 0.9248900101154132, "grad_norm": 2.6696071150987475, "learning_rate": 3.953816549069917e-07, "loss": 0.754, "step": 75890 }, { "epoch": 0.924950946339561, "grad_norm": 2.3576458668611697, "learning_rate": 3.95060936497755e-07, "loss": 0.7333, "step": 75895 }, { "epoch": 0.9250118825637088, "grad_norm": 2.3952685395426516, "learning_rate": 3.9474021808851833e-07, "loss": 0.7296, "step": 75900 }, { "epoch": 0.9250728187878566, "grad_norm": 2.8331828206625995, "learning_rate": 3.944194996792816e-07, "loss": 0.7317, "step": 75905 }, { "epoch": 0.9251337550120045, "grad_norm": 2.6037244395819203, "learning_rate": 3.9409878127004494e-07, "loss": 0.7453, "step": 75910 }, { "epoch": 0.9251946912361523, "grad_norm": 2.578009686000317, "learning_rate": 3.937780628608082e-07, "loss": 0.7123, "step": 75915 }, { "epoch": 0.9252556274603001, "grad_norm": 2.4732077495642812, "learning_rate": 3.9345734445157156e-07, "loss": 0.7525, "step": 75920 }, { "epoch": 0.9253165636844478, "grad_norm": 2.4993696239412553, "learning_rate": 3.9313662604233484e-07, "loss": 0.6826, "step": 75925 }, { "epoch": 0.9253774999085956, "grad_norm": 2.4755563250624415, "learning_rate": 3.928159076330982e-07, "loss": 0.7334, "step": 75930 }, { "epoch": 0.9254384361327435, "grad_norm": 2.4691705474066223, "learning_rate": 3.9249518922386146e-07, "loss": 0.6967, "step": 75935 }, { "epoch": 0.9254993723568913, "grad_norm": 2.8632112769043787, "learning_rate": 3.921744708146248e-07, "loss": 0.7101, "step": 75940 }, { "epoch": 0.9255603085810391, "grad_norm": 3.0566833493262227, "learning_rate": 3.9185375240538807e-07, "loss": 0.7209, "step": 75945 }, { "epoch": 0.9256212448051869, "grad_norm": 2.259669144320295, "learning_rate": 3.915330339961514e-07, "loss": 0.6967, "step": 75950 }, { "epoch": 0.9256821810293347, "grad_norm": 2.504276285235259, "learning_rate": 3.912123155869147e-07, "loss": 0.6351, "step": 75955 }, { "epoch": 0.9257431172534825, "grad_norm": 3.1835391903983115, "learning_rate": 3.908915971776781e-07, "loss": 0.6536, "step": 75960 }, { "epoch": 0.9258040534776303, "grad_norm": 3.0334440391844124, "learning_rate": 3.905708787684413e-07, "loss": 0.6839, "step": 75965 }, { "epoch": 0.9258649897017781, "grad_norm": 2.635416486014732, "learning_rate": 3.902501603592047e-07, "loss": 0.7189, "step": 75970 }, { "epoch": 0.9259259259259259, "grad_norm": 2.4492060658142725, "learning_rate": 3.899294419499679e-07, "loss": 0.7576, "step": 75975 }, { "epoch": 0.9259868621500738, "grad_norm": 2.586589228145052, "learning_rate": 3.896087235407313e-07, "loss": 0.7836, "step": 75980 }, { "epoch": 0.9260477983742216, "grad_norm": 2.579419437539582, "learning_rate": 3.8928800513149454e-07, "loss": 0.7246, "step": 75985 }, { "epoch": 0.9261087345983694, "grad_norm": 2.625711676186707, "learning_rate": 3.889672867222579e-07, "loss": 0.7383, "step": 75990 }, { "epoch": 0.9261696708225171, "grad_norm": 2.804919762042465, "learning_rate": 3.886465683130212e-07, "loss": 0.7375, "step": 75995 }, { "epoch": 0.9262306070466649, "grad_norm": 2.5403849075937144, "learning_rate": 3.8832584990378454e-07, "loss": 0.7143, "step": 76000 }, { "epoch": 0.9262915432708128, "grad_norm": 3.1072870303325355, "learning_rate": 3.880051314945478e-07, "loss": 0.773, "step": 76005 }, { "epoch": 0.9263524794949606, "grad_norm": 2.333072108523667, "learning_rate": 3.8768441308531116e-07, "loss": 0.6999, "step": 76010 }, { "epoch": 0.9264134157191084, "grad_norm": 2.9743900389975626, "learning_rate": 3.8736369467607444e-07, "loss": 0.785, "step": 76015 }, { "epoch": 0.9264743519432562, "grad_norm": 2.9532337912126265, "learning_rate": 3.8704297626683777e-07, "loss": 0.7139, "step": 76020 }, { "epoch": 0.926535288167404, "grad_norm": 3.1538499544306107, "learning_rate": 3.8672225785760105e-07, "loss": 0.7092, "step": 76025 }, { "epoch": 0.9265962243915518, "grad_norm": 2.578274446708381, "learning_rate": 3.864015394483644e-07, "loss": 0.7282, "step": 76030 }, { "epoch": 0.9266571606156996, "grad_norm": 2.110354570044919, "learning_rate": 3.8608082103912767e-07, "loss": 0.6162, "step": 76035 }, { "epoch": 0.9267180968398474, "grad_norm": 2.2207505550354725, "learning_rate": 3.85760102629891e-07, "loss": 0.6961, "step": 76040 }, { "epoch": 0.9267790330639952, "grad_norm": 2.3485414067059147, "learning_rate": 3.854393842206543e-07, "loss": 0.6438, "step": 76045 }, { "epoch": 0.926839969288143, "grad_norm": 2.9647066374313087, "learning_rate": 3.851186658114176e-07, "loss": 0.7642, "step": 76050 }, { "epoch": 0.9269009055122909, "grad_norm": 2.226883031944763, "learning_rate": 3.847979474021809e-07, "loss": 0.6656, "step": 76055 }, { "epoch": 0.9269618417364387, "grad_norm": 3.0656887325801008, "learning_rate": 3.8447722899294423e-07, "loss": 0.8105, "step": 76060 }, { "epoch": 0.9270227779605864, "grad_norm": 2.1456238875078424, "learning_rate": 3.841565105837075e-07, "loss": 0.6913, "step": 76065 }, { "epoch": 0.9270837141847342, "grad_norm": 2.8197933707753533, "learning_rate": 3.8383579217447085e-07, "loss": 0.7031, "step": 76070 }, { "epoch": 0.927144650408882, "grad_norm": 2.75602780408589, "learning_rate": 3.8351507376523413e-07, "loss": 0.7719, "step": 76075 }, { "epoch": 0.9272055866330299, "grad_norm": 3.2503485456903247, "learning_rate": 3.8319435535599747e-07, "loss": 0.7552, "step": 76080 }, { "epoch": 0.9272665228571777, "grad_norm": 2.6306196259372023, "learning_rate": 3.8287363694676075e-07, "loss": 0.6976, "step": 76085 }, { "epoch": 0.9273274590813255, "grad_norm": 2.267950381701, "learning_rate": 3.825529185375241e-07, "loss": 0.7343, "step": 76090 }, { "epoch": 0.9273883953054733, "grad_norm": 2.1207501970037375, "learning_rate": 3.8223220012828736e-07, "loss": 0.6787, "step": 76095 }, { "epoch": 0.9274493315296211, "grad_norm": 2.4380452142180467, "learning_rate": 3.819114817190507e-07, "loss": 0.7401, "step": 76100 }, { "epoch": 0.9275102677537689, "grad_norm": 2.664476085715227, "learning_rate": 3.81590763309814e-07, "loss": 0.7923, "step": 76105 }, { "epoch": 0.9275712039779167, "grad_norm": 2.607920265822972, "learning_rate": 3.812700449005773e-07, "loss": 0.6878, "step": 76110 }, { "epoch": 0.9276321402020645, "grad_norm": 2.7071710390689976, "learning_rate": 3.809493264913406e-07, "loss": 0.7321, "step": 76115 }, { "epoch": 0.9276930764262123, "grad_norm": 3.649062850394511, "learning_rate": 3.8062860808210393e-07, "loss": 0.7416, "step": 76120 }, { "epoch": 0.9277540126503602, "grad_norm": 3.152652093965401, "learning_rate": 3.803078896728672e-07, "loss": 0.7068, "step": 76125 }, { "epoch": 0.927814948874508, "grad_norm": 2.3489846884695544, "learning_rate": 3.7998717126363055e-07, "loss": 0.6662, "step": 76130 }, { "epoch": 0.9278758850986557, "grad_norm": 3.326984127038099, "learning_rate": 3.7966645285439393e-07, "loss": 0.7941, "step": 76135 }, { "epoch": 0.9279368213228035, "grad_norm": 2.07713401528663, "learning_rate": 3.7934573444515716e-07, "loss": 0.7136, "step": 76140 }, { "epoch": 0.9279977575469514, "grad_norm": 2.984931802543642, "learning_rate": 3.7902501603592055e-07, "loss": 0.6834, "step": 76145 }, { "epoch": 0.9280586937710992, "grad_norm": 2.30091834889918, "learning_rate": 3.787042976266838e-07, "loss": 0.7883, "step": 76150 }, { "epoch": 0.928119629995247, "grad_norm": 2.606714295544604, "learning_rate": 3.7838357921744716e-07, "loss": 0.6702, "step": 76155 }, { "epoch": 0.9281805662193948, "grad_norm": 2.4059459146600064, "learning_rate": 3.780628608082104e-07, "loss": 0.7268, "step": 76160 }, { "epoch": 0.9282415024435426, "grad_norm": 2.053620043078054, "learning_rate": 3.777421423989738e-07, "loss": 0.7088, "step": 76165 }, { "epoch": 0.9283024386676904, "grad_norm": 2.610382948392736, "learning_rate": 3.7742142398973706e-07, "loss": 0.68, "step": 76170 }, { "epoch": 0.9283633748918382, "grad_norm": 2.094487750096667, "learning_rate": 3.771007055805004e-07, "loss": 0.6808, "step": 76175 }, { "epoch": 0.928424311115986, "grad_norm": 2.512609630264881, "learning_rate": 3.767799871712637e-07, "loss": 0.7195, "step": 76180 }, { "epoch": 0.9284852473401338, "grad_norm": 2.5989619432465108, "learning_rate": 3.76459268762027e-07, "loss": 0.6688, "step": 76185 }, { "epoch": 0.9285461835642816, "grad_norm": 2.743915307199868, "learning_rate": 3.761385503527903e-07, "loss": 0.7268, "step": 76190 }, { "epoch": 0.9286071197884295, "grad_norm": 2.4598771591652273, "learning_rate": 3.7581783194355363e-07, "loss": 0.7567, "step": 76195 }, { "epoch": 0.9286680560125773, "grad_norm": 2.7180903604066926, "learning_rate": 3.754971135343169e-07, "loss": 0.752, "step": 76200 }, { "epoch": 0.928728992236725, "grad_norm": 2.64628606943886, "learning_rate": 3.7517639512508024e-07, "loss": 0.7736, "step": 76205 }, { "epoch": 0.9287899284608728, "grad_norm": 2.3069048212897787, "learning_rate": 3.748556767158435e-07, "loss": 0.6709, "step": 76210 }, { "epoch": 0.9288508646850206, "grad_norm": 3.4737085635716065, "learning_rate": 3.7453495830660686e-07, "loss": 0.6615, "step": 76215 }, { "epoch": 0.9289118009091685, "grad_norm": 2.409054933835362, "learning_rate": 3.7421423989737014e-07, "loss": 0.7154, "step": 76220 }, { "epoch": 0.9289727371333163, "grad_norm": 2.073539496020373, "learning_rate": 3.738935214881335e-07, "loss": 0.6502, "step": 76225 }, { "epoch": 0.9290336733574641, "grad_norm": 2.746247395159327, "learning_rate": 3.7357280307889676e-07, "loss": 0.7522, "step": 76230 }, { "epoch": 0.9290946095816118, "grad_norm": 3.9636725040444105, "learning_rate": 3.732520846696601e-07, "loss": 0.6793, "step": 76235 }, { "epoch": 0.9291555458057597, "grad_norm": 2.6841613111818363, "learning_rate": 3.729313662604234e-07, "loss": 0.7168, "step": 76240 }, { "epoch": 0.9292164820299075, "grad_norm": 2.9739896442311498, "learning_rate": 3.726106478511867e-07, "loss": 0.7343, "step": 76245 }, { "epoch": 0.9292774182540553, "grad_norm": 2.3929267649743187, "learning_rate": 3.7228992944195e-07, "loss": 0.707, "step": 76250 }, { "epoch": 0.9293383544782031, "grad_norm": 3.2702058231415485, "learning_rate": 3.719692110327133e-07, "loss": 0.6982, "step": 76255 }, { "epoch": 0.9293992907023509, "grad_norm": 2.43743707135732, "learning_rate": 3.716484926234766e-07, "loss": 0.7244, "step": 76260 }, { "epoch": 0.9294602269264988, "grad_norm": 2.2553866697498046, "learning_rate": 3.7132777421423994e-07, "loss": 0.7504, "step": 76265 }, { "epoch": 0.9295211631506465, "grad_norm": 2.287134782505428, "learning_rate": 3.710070558050032e-07, "loss": 0.7332, "step": 76270 }, { "epoch": 0.9295820993747943, "grad_norm": 2.667964777775553, "learning_rate": 3.7068633739576656e-07, "loss": 0.6731, "step": 76275 }, { "epoch": 0.9296430355989421, "grad_norm": 4.333192210123319, "learning_rate": 3.7036561898652984e-07, "loss": 0.7085, "step": 76280 }, { "epoch": 0.92970397182309, "grad_norm": 2.3596683877272757, "learning_rate": 3.7004490057729317e-07, "loss": 0.7806, "step": 76285 }, { "epoch": 0.9297649080472378, "grad_norm": 2.955341609878652, "learning_rate": 3.6972418216805645e-07, "loss": 0.7265, "step": 76290 }, { "epoch": 0.9298258442713856, "grad_norm": 3.144073786638054, "learning_rate": 3.694034637588198e-07, "loss": 0.7176, "step": 76295 }, { "epoch": 0.9298867804955334, "grad_norm": 2.932238085562955, "learning_rate": 3.6908274534958307e-07, "loss": 0.7012, "step": 76300 }, { "epoch": 0.9299477167196811, "grad_norm": 2.5425321501932725, "learning_rate": 3.687620269403464e-07, "loss": 0.7173, "step": 76305 }, { "epoch": 0.930008652943829, "grad_norm": 2.396214208819783, "learning_rate": 3.684413085311097e-07, "loss": 0.7146, "step": 76310 }, { "epoch": 0.9300695891679768, "grad_norm": 2.7111743714804604, "learning_rate": 3.68120590121873e-07, "loss": 0.6924, "step": 76315 }, { "epoch": 0.9301305253921246, "grad_norm": 3.1171757592144185, "learning_rate": 3.677998717126363e-07, "loss": 0.7645, "step": 76320 }, { "epoch": 0.9301914616162724, "grad_norm": 2.294343117346639, "learning_rate": 3.6747915330339964e-07, "loss": 0.7663, "step": 76325 }, { "epoch": 0.9302523978404202, "grad_norm": 2.763381617236352, "learning_rate": 3.671584348941629e-07, "loss": 0.7219, "step": 76330 }, { "epoch": 0.9303133340645681, "grad_norm": 3.1557339658237336, "learning_rate": 3.6683771648492625e-07, "loss": 0.6851, "step": 76335 }, { "epoch": 0.9303742702887158, "grad_norm": 2.5207409552828515, "learning_rate": 3.6651699807568953e-07, "loss": 0.7557, "step": 76340 }, { "epoch": 0.9304352065128636, "grad_norm": 2.4924732960212688, "learning_rate": 3.661962796664529e-07, "loss": 0.7186, "step": 76345 }, { "epoch": 0.9304961427370114, "grad_norm": 2.3112268837873864, "learning_rate": 3.6587556125721615e-07, "loss": 0.759, "step": 76350 }, { "epoch": 0.9305570789611592, "grad_norm": 2.4272654490695587, "learning_rate": 3.6555484284797954e-07, "loss": 0.6221, "step": 76355 }, { "epoch": 0.9306180151853071, "grad_norm": 3.513844942503601, "learning_rate": 3.6523412443874276e-07, "loss": 0.8037, "step": 76360 }, { "epoch": 0.9306789514094549, "grad_norm": 2.3513508334363897, "learning_rate": 3.6491340602950615e-07, "loss": 0.7505, "step": 76365 }, { "epoch": 0.9307398876336027, "grad_norm": 3.0364817673723694, "learning_rate": 3.645926876202694e-07, "loss": 0.6741, "step": 76370 }, { "epoch": 0.9308008238577504, "grad_norm": 4.73430790695655, "learning_rate": 3.6427196921103277e-07, "loss": 0.6803, "step": 76375 }, { "epoch": 0.9308617600818982, "grad_norm": 2.310839139789138, "learning_rate": 3.6395125080179605e-07, "loss": 0.6889, "step": 76380 }, { "epoch": 0.9309226963060461, "grad_norm": 2.0959914286924874, "learning_rate": 3.636305323925594e-07, "loss": 0.6865, "step": 76385 }, { "epoch": 0.9309836325301939, "grad_norm": 2.6476105934103136, "learning_rate": 3.6330981398332266e-07, "loss": 0.6886, "step": 76390 }, { "epoch": 0.9310445687543417, "grad_norm": 2.4601988631211555, "learning_rate": 3.62989095574086e-07, "loss": 0.7295, "step": 76395 }, { "epoch": 0.9311055049784895, "grad_norm": 2.635975502023277, "learning_rate": 3.626683771648493e-07, "loss": 0.6634, "step": 76400 }, { "epoch": 0.9311664412026374, "grad_norm": 2.57983115032485, "learning_rate": 3.623476587556126e-07, "loss": 0.678, "step": 76405 }, { "epoch": 0.9312273774267851, "grad_norm": 2.5452436391012947, "learning_rate": 3.620269403463759e-07, "loss": 0.7728, "step": 76410 }, { "epoch": 0.9312883136509329, "grad_norm": 2.482995740983812, "learning_rate": 3.6170622193713923e-07, "loss": 0.7447, "step": 76415 }, { "epoch": 0.9313492498750807, "grad_norm": 2.3785959029219472, "learning_rate": 3.613855035279025e-07, "loss": 0.7519, "step": 76420 }, { "epoch": 0.9314101860992285, "grad_norm": 1.9740238497829548, "learning_rate": 3.6106478511866585e-07, "loss": 0.7088, "step": 76425 }, { "epoch": 0.9314711223233764, "grad_norm": 2.7679441680589503, "learning_rate": 3.6074406670942913e-07, "loss": 0.7217, "step": 76430 }, { "epoch": 0.9315320585475242, "grad_norm": 2.9691640829150696, "learning_rate": 3.6042334830019246e-07, "loss": 0.7178, "step": 76435 }, { "epoch": 0.931592994771672, "grad_norm": 2.5282328832239016, "learning_rate": 3.601026298909558e-07, "loss": 0.6768, "step": 76440 }, { "epoch": 0.9316539309958197, "grad_norm": 2.683890271649376, "learning_rate": 3.597819114817191e-07, "loss": 0.7124, "step": 76445 }, { "epoch": 0.9317148672199675, "grad_norm": 2.4709831272701646, "learning_rate": 3.594611930724824e-07, "loss": 0.7619, "step": 76450 }, { "epoch": 0.9317758034441154, "grad_norm": 2.10615111689623, "learning_rate": 3.591404746632457e-07, "loss": 0.6252, "step": 76455 }, { "epoch": 0.9318367396682632, "grad_norm": 2.6054132523015103, "learning_rate": 3.5881975625400903e-07, "loss": 0.6349, "step": 76460 }, { "epoch": 0.931897675892411, "grad_norm": 2.3141905660376674, "learning_rate": 3.584990378447723e-07, "loss": 0.662, "step": 76465 }, { "epoch": 0.9319586121165588, "grad_norm": 3.0421053884162657, "learning_rate": 3.5817831943553564e-07, "loss": 0.7132, "step": 76470 }, { "epoch": 0.9320195483407067, "grad_norm": 2.649611744311231, "learning_rate": 3.578576010262989e-07, "loss": 0.6993, "step": 76475 }, { "epoch": 0.9320804845648544, "grad_norm": 2.4672659249982964, "learning_rate": 3.5753688261706226e-07, "loss": 0.7044, "step": 76480 }, { "epoch": 0.9321414207890022, "grad_norm": 2.390127906045598, "learning_rate": 3.5721616420782554e-07, "loss": 0.7337, "step": 76485 }, { "epoch": 0.93220235701315, "grad_norm": 2.4443091567013333, "learning_rate": 3.568954457985889e-07, "loss": 0.76, "step": 76490 }, { "epoch": 0.9322632932372978, "grad_norm": 2.0891748448445555, "learning_rate": 3.5657472738935216e-07, "loss": 0.8159, "step": 76495 }, { "epoch": 0.9323242294614457, "grad_norm": 2.46849873846156, "learning_rate": 3.562540089801155e-07, "loss": 0.7747, "step": 76500 }, { "epoch": 0.9323851656855935, "grad_norm": 2.314158644351937, "learning_rate": 3.559332905708788e-07, "loss": 0.6636, "step": 76505 }, { "epoch": 0.9324461019097413, "grad_norm": 2.61857882268769, "learning_rate": 3.556125721616421e-07, "loss": 0.7422, "step": 76510 }, { "epoch": 0.932507038133889, "grad_norm": 2.3353281639033274, "learning_rate": 3.552918537524054e-07, "loss": 0.7285, "step": 76515 }, { "epoch": 0.9325679743580368, "grad_norm": 2.712649187849534, "learning_rate": 3.549711353431688e-07, "loss": 0.712, "step": 76520 }, { "epoch": 0.9326289105821847, "grad_norm": 2.7106939213380956, "learning_rate": 3.54650416933932e-07, "loss": 0.6037, "step": 76525 }, { "epoch": 0.9326898468063325, "grad_norm": 2.4669284765755517, "learning_rate": 3.543296985246954e-07, "loss": 0.7351, "step": 76530 }, { "epoch": 0.9327507830304803, "grad_norm": 2.2657199585381256, "learning_rate": 3.540089801154586e-07, "loss": 0.7498, "step": 76535 }, { "epoch": 0.9328117192546281, "grad_norm": 2.9285465422626036, "learning_rate": 3.53688261706222e-07, "loss": 0.6968, "step": 76540 }, { "epoch": 0.932872655478776, "grad_norm": 3.741962754508761, "learning_rate": 3.5336754329698524e-07, "loss": 0.7098, "step": 76545 }, { "epoch": 0.9329335917029237, "grad_norm": 2.616497039149756, "learning_rate": 3.530468248877486e-07, "loss": 0.6889, "step": 76550 }, { "epoch": 0.9329945279270715, "grad_norm": 2.0533297654066844, "learning_rate": 3.527261064785119e-07, "loss": 0.6857, "step": 76555 }, { "epoch": 0.9330554641512193, "grad_norm": 2.1873758513513364, "learning_rate": 3.5240538806927524e-07, "loss": 0.7636, "step": 76560 }, { "epoch": 0.9331164003753671, "grad_norm": 2.7835466507054307, "learning_rate": 3.520846696600385e-07, "loss": 0.6755, "step": 76565 }, { "epoch": 0.933177336599515, "grad_norm": 2.136606065823681, "learning_rate": 3.5176395125080186e-07, "loss": 0.6105, "step": 76570 }, { "epoch": 0.9332382728236628, "grad_norm": 2.7340548141977634, "learning_rate": 3.5144323284156514e-07, "loss": 0.6582, "step": 76575 }, { "epoch": 0.9332992090478106, "grad_norm": 2.5894030176537632, "learning_rate": 3.5112251443232847e-07, "loss": 0.6857, "step": 76580 }, { "epoch": 0.9333601452719583, "grad_norm": 3.3650555424621067, "learning_rate": 3.5080179602309175e-07, "loss": 0.7371, "step": 76585 }, { "epoch": 0.9334210814961061, "grad_norm": 2.703162232861773, "learning_rate": 3.504810776138551e-07, "loss": 0.746, "step": 76590 }, { "epoch": 0.933482017720254, "grad_norm": 2.7714066136962283, "learning_rate": 3.5016035920461837e-07, "loss": 0.7239, "step": 76595 }, { "epoch": 0.9335429539444018, "grad_norm": 4.993196534415471, "learning_rate": 3.498396407953817e-07, "loss": 0.6864, "step": 76600 }, { "epoch": 0.9336038901685496, "grad_norm": 2.8495795515700384, "learning_rate": 3.49518922386145e-07, "loss": 0.7159, "step": 76605 }, { "epoch": 0.9336648263926974, "grad_norm": 2.818857581005424, "learning_rate": 3.491982039769083e-07, "loss": 0.6755, "step": 76610 }, { "epoch": 0.9337257626168453, "grad_norm": 2.7455719145190733, "learning_rate": 3.488774855676716e-07, "loss": 0.6523, "step": 76615 }, { "epoch": 0.933786698840993, "grad_norm": 2.4468879865919337, "learning_rate": 3.4855676715843494e-07, "loss": 0.6913, "step": 76620 }, { "epoch": 0.9338476350651408, "grad_norm": 2.7110424918558165, "learning_rate": 3.482360487491982e-07, "loss": 0.7224, "step": 76625 }, { "epoch": 0.9339085712892886, "grad_norm": 2.2431894223122844, "learning_rate": 3.4791533033996155e-07, "loss": 0.6612, "step": 76630 }, { "epoch": 0.9339695075134364, "grad_norm": 4.1148770347248, "learning_rate": 3.4759461193072483e-07, "loss": 0.7149, "step": 76635 }, { "epoch": 0.9340304437375843, "grad_norm": 2.4582405291245895, "learning_rate": 3.4727389352148817e-07, "loss": 0.6921, "step": 76640 }, { "epoch": 0.9340913799617321, "grad_norm": 3.1419658320095447, "learning_rate": 3.4695317511225145e-07, "loss": 0.7099, "step": 76645 }, { "epoch": 0.9341523161858799, "grad_norm": 2.2394633518879736, "learning_rate": 3.466324567030148e-07, "loss": 0.7221, "step": 76650 }, { "epoch": 0.9342132524100276, "grad_norm": 2.5648115890806618, "learning_rate": 3.4631173829377807e-07, "loss": 0.7549, "step": 76655 }, { "epoch": 0.9342741886341754, "grad_norm": 2.3741836064319983, "learning_rate": 3.459910198845414e-07, "loss": 0.6965, "step": 76660 }, { "epoch": 0.9343351248583233, "grad_norm": 4.603425318730039, "learning_rate": 3.456703014753047e-07, "loss": 0.7397, "step": 76665 }, { "epoch": 0.9343960610824711, "grad_norm": 2.2964837598966703, "learning_rate": 3.45349583066068e-07, "loss": 0.6582, "step": 76670 }, { "epoch": 0.9344569973066189, "grad_norm": 2.7340736171450692, "learning_rate": 3.450288646568313e-07, "loss": 0.6641, "step": 76675 }, { "epoch": 0.9345179335307667, "grad_norm": 2.1627176121040526, "learning_rate": 3.4470814624759463e-07, "loss": 0.6681, "step": 76680 }, { "epoch": 0.9345788697549146, "grad_norm": 2.1361698416699437, "learning_rate": 3.443874278383579e-07, "loss": 0.7557, "step": 76685 }, { "epoch": 0.9346398059790623, "grad_norm": 2.5698265970220713, "learning_rate": 3.4406670942912125e-07, "loss": 0.7386, "step": 76690 }, { "epoch": 0.9347007422032101, "grad_norm": 2.983371520305561, "learning_rate": 3.4374599101988453e-07, "loss": 0.7284, "step": 76695 }, { "epoch": 0.9347616784273579, "grad_norm": 2.1863590600637783, "learning_rate": 3.4342527261064786e-07, "loss": 0.6735, "step": 76700 }, { "epoch": 0.9348226146515057, "grad_norm": 2.5290622431492307, "learning_rate": 3.4310455420141114e-07, "loss": 0.699, "step": 76705 }, { "epoch": 0.9348835508756536, "grad_norm": 2.1644819054661744, "learning_rate": 3.427838357921745e-07, "loss": 0.699, "step": 76710 }, { "epoch": 0.9349444870998014, "grad_norm": 2.814447924533142, "learning_rate": 3.4246311738293776e-07, "loss": 0.6486, "step": 76715 }, { "epoch": 0.9350054233239492, "grad_norm": 2.5125550141902093, "learning_rate": 3.421423989737011e-07, "loss": 0.7119, "step": 76720 }, { "epoch": 0.9350663595480969, "grad_norm": 2.708097203821121, "learning_rate": 3.418216805644644e-07, "loss": 0.6869, "step": 76725 }, { "epoch": 0.9351272957722447, "grad_norm": 2.278373903727722, "learning_rate": 3.4150096215522776e-07, "loss": 0.7171, "step": 76730 }, { "epoch": 0.9351882319963926, "grad_norm": 2.8949478915226416, "learning_rate": 3.41180243745991e-07, "loss": 0.6243, "step": 76735 }, { "epoch": 0.9352491682205404, "grad_norm": 2.317323923597274, "learning_rate": 3.408595253367544e-07, "loss": 0.6544, "step": 76740 }, { "epoch": 0.9353101044446882, "grad_norm": 2.7971003197617277, "learning_rate": 3.405388069275177e-07, "loss": 0.6988, "step": 76745 }, { "epoch": 0.935371040668836, "grad_norm": 2.2601465542173864, "learning_rate": 3.40218088518281e-07, "loss": 0.7812, "step": 76750 }, { "epoch": 0.9354319768929839, "grad_norm": 2.1718855205640417, "learning_rate": 3.3989737010904433e-07, "loss": 0.7562, "step": 76755 }, { "epoch": 0.9354929131171316, "grad_norm": 2.574439694710292, "learning_rate": 3.395766516998076e-07, "loss": 0.7022, "step": 76760 }, { "epoch": 0.9355538493412794, "grad_norm": 3.2892204462709254, "learning_rate": 3.3925593329057095e-07, "loss": 0.7557, "step": 76765 }, { "epoch": 0.9356147855654272, "grad_norm": 2.2162128328756583, "learning_rate": 3.3893521488133423e-07, "loss": 0.6962, "step": 76770 }, { "epoch": 0.935675721789575, "grad_norm": 3.11126045901115, "learning_rate": 3.3861449647209756e-07, "loss": 0.7327, "step": 76775 }, { "epoch": 0.9357366580137229, "grad_norm": 2.611020697024137, "learning_rate": 3.3829377806286084e-07, "loss": 0.7096, "step": 76780 }, { "epoch": 0.9357975942378707, "grad_norm": 2.2665230155927114, "learning_rate": 3.379730596536242e-07, "loss": 0.7048, "step": 76785 }, { "epoch": 0.9358585304620185, "grad_norm": 2.9411728164332005, "learning_rate": 3.3765234124438746e-07, "loss": 0.7181, "step": 76790 }, { "epoch": 0.9359194666861662, "grad_norm": 2.086148057567354, "learning_rate": 3.373316228351508e-07, "loss": 0.7363, "step": 76795 }, { "epoch": 0.935980402910314, "grad_norm": 2.338406686484917, "learning_rate": 3.370109044259141e-07, "loss": 0.741, "step": 76800 }, { "epoch": 0.9360413391344619, "grad_norm": 2.3204262284678596, "learning_rate": 3.366901860166774e-07, "loss": 0.6849, "step": 76805 }, { "epoch": 0.9361022753586097, "grad_norm": 3.107457085463635, "learning_rate": 3.363694676074407e-07, "loss": 0.7679, "step": 76810 }, { "epoch": 0.9361632115827575, "grad_norm": 2.8433172984289734, "learning_rate": 3.36048749198204e-07, "loss": 0.8123, "step": 76815 }, { "epoch": 0.9362241478069053, "grad_norm": 2.361929764442949, "learning_rate": 3.357280307889673e-07, "loss": 0.6139, "step": 76820 }, { "epoch": 0.9362850840310531, "grad_norm": 2.672971588662378, "learning_rate": 3.3540731237973064e-07, "loss": 0.7164, "step": 76825 }, { "epoch": 0.9363460202552009, "grad_norm": 3.1487582032362536, "learning_rate": 3.350865939704939e-07, "loss": 0.6809, "step": 76830 }, { "epoch": 0.9364069564793487, "grad_norm": 2.4975207769661525, "learning_rate": 3.3476587556125726e-07, "loss": 0.6738, "step": 76835 }, { "epoch": 0.9364678927034965, "grad_norm": 2.0109326534936725, "learning_rate": 3.3444515715202054e-07, "loss": 0.7339, "step": 76840 }, { "epoch": 0.9365288289276443, "grad_norm": 2.717253254760342, "learning_rate": 3.3412443874278387e-07, "loss": 0.7343, "step": 76845 }, { "epoch": 0.9365897651517922, "grad_norm": 2.788586243829547, "learning_rate": 3.3380372033354715e-07, "loss": 0.7617, "step": 76850 }, { "epoch": 0.93665070137594, "grad_norm": 2.403899056896022, "learning_rate": 3.334830019243105e-07, "loss": 0.7634, "step": 76855 }, { "epoch": 0.9367116376000878, "grad_norm": 2.8928655360957514, "learning_rate": 3.3316228351507377e-07, "loss": 0.6881, "step": 76860 }, { "epoch": 0.9367725738242355, "grad_norm": 2.6679288462392052, "learning_rate": 3.328415651058371e-07, "loss": 0.7492, "step": 76865 }, { "epoch": 0.9368335100483833, "grad_norm": 2.9147451677096416, "learning_rate": 3.325208466966004e-07, "loss": 0.6747, "step": 76870 }, { "epoch": 0.9368944462725312, "grad_norm": 2.842705236748688, "learning_rate": 3.322001282873637e-07, "loss": 0.6386, "step": 76875 }, { "epoch": 0.936955382496679, "grad_norm": 2.390299015595561, "learning_rate": 3.31879409878127e-07, "loss": 0.6987, "step": 76880 }, { "epoch": 0.9370163187208268, "grad_norm": 2.600887170155124, "learning_rate": 3.3155869146889034e-07, "loss": 0.6398, "step": 76885 }, { "epoch": 0.9370772549449746, "grad_norm": 2.23375399697281, "learning_rate": 3.312379730596536e-07, "loss": 0.6275, "step": 76890 }, { "epoch": 0.9371381911691224, "grad_norm": 2.4564865863614482, "learning_rate": 3.3091725465041695e-07, "loss": 0.7594, "step": 76895 }, { "epoch": 0.9371991273932702, "grad_norm": 2.490050980934175, "learning_rate": 3.3059653624118023e-07, "loss": 0.7513, "step": 76900 }, { "epoch": 0.937260063617418, "grad_norm": 2.369542678058699, "learning_rate": 3.302758178319436e-07, "loss": 0.7298, "step": 76905 }, { "epoch": 0.9373209998415658, "grad_norm": 2.748217239766035, "learning_rate": 3.2995509942270685e-07, "loss": 0.7512, "step": 76910 }, { "epoch": 0.9373819360657136, "grad_norm": 2.4747999175235167, "learning_rate": 3.2963438101347024e-07, "loss": 0.7033, "step": 76915 }, { "epoch": 0.9374428722898615, "grad_norm": 2.697871001086919, "learning_rate": 3.2931366260423347e-07, "loss": 0.7319, "step": 76920 }, { "epoch": 0.9375038085140093, "grad_norm": 2.750965748413149, "learning_rate": 3.2899294419499685e-07, "loss": 0.7251, "step": 76925 }, { "epoch": 0.9375647447381571, "grad_norm": 2.2932947568973594, "learning_rate": 3.286722257857601e-07, "loss": 0.7046, "step": 76930 }, { "epoch": 0.9376256809623048, "grad_norm": 2.86912968872302, "learning_rate": 3.2835150737652347e-07, "loss": 0.7185, "step": 76935 }, { "epoch": 0.9376866171864526, "grad_norm": 2.7018289528434143, "learning_rate": 3.2803078896728675e-07, "loss": 0.7264, "step": 76940 }, { "epoch": 0.9377475534106005, "grad_norm": 2.8504467341451574, "learning_rate": 3.277100705580501e-07, "loss": 0.7038, "step": 76945 }, { "epoch": 0.9378084896347483, "grad_norm": 3.444759383307953, "learning_rate": 3.2738935214881337e-07, "loss": 0.7127, "step": 76950 }, { "epoch": 0.9378694258588961, "grad_norm": 2.0366128244446924, "learning_rate": 3.270686337395767e-07, "loss": 0.7035, "step": 76955 }, { "epoch": 0.9379303620830439, "grad_norm": 3.1593596409665072, "learning_rate": 3.2674791533034e-07, "loss": 0.7859, "step": 76960 }, { "epoch": 0.9379912983071917, "grad_norm": 2.3950655866971826, "learning_rate": 3.264271969211033e-07, "loss": 0.7155, "step": 76965 }, { "epoch": 0.9380522345313395, "grad_norm": 2.2510532077515415, "learning_rate": 3.261064785118666e-07, "loss": 0.7024, "step": 76970 }, { "epoch": 0.9381131707554873, "grad_norm": 2.931160392646631, "learning_rate": 3.2578576010262993e-07, "loss": 0.7346, "step": 76975 }, { "epoch": 0.9381741069796351, "grad_norm": 3.2947763814465625, "learning_rate": 3.254650416933932e-07, "loss": 0.6423, "step": 76980 }, { "epoch": 0.9382350432037829, "grad_norm": 2.7716063417072183, "learning_rate": 3.2514432328415655e-07, "loss": 0.805, "step": 76985 }, { "epoch": 0.9382959794279307, "grad_norm": 2.3513808795369133, "learning_rate": 3.2482360487491983e-07, "loss": 0.7808, "step": 76990 }, { "epoch": 0.9383569156520786, "grad_norm": 2.5970093128666156, "learning_rate": 3.2450288646568316e-07, "loss": 0.6996, "step": 76995 }, { "epoch": 0.9384178518762264, "grad_norm": 2.1845083098575393, "learning_rate": 3.2418216805644645e-07, "loss": 0.6403, "step": 77000 }, { "epoch": 0.9384787881003741, "grad_norm": 2.1671412845411053, "learning_rate": 3.238614496472098e-07, "loss": 0.6802, "step": 77005 }, { "epoch": 0.9385397243245219, "grad_norm": 2.2902183181862847, "learning_rate": 3.2354073123797306e-07, "loss": 0.7035, "step": 77010 }, { "epoch": 0.9386006605486698, "grad_norm": 2.2446520798200362, "learning_rate": 3.232200128287364e-07, "loss": 0.7078, "step": 77015 }, { "epoch": 0.9386615967728176, "grad_norm": 3.4752583906587384, "learning_rate": 3.228992944194997e-07, "loss": 0.6064, "step": 77020 }, { "epoch": 0.9387225329969654, "grad_norm": 2.695286516294426, "learning_rate": 3.22578576010263e-07, "loss": 0.64, "step": 77025 }, { "epoch": 0.9387834692211132, "grad_norm": 2.3653478245295, "learning_rate": 3.222578576010263e-07, "loss": 0.7352, "step": 77030 }, { "epoch": 0.938844405445261, "grad_norm": 2.5802156729488472, "learning_rate": 3.2193713919178963e-07, "loss": 0.6946, "step": 77035 }, { "epoch": 0.9389053416694088, "grad_norm": 2.36966762981368, "learning_rate": 3.2161642078255296e-07, "loss": 0.7094, "step": 77040 }, { "epoch": 0.9389662778935566, "grad_norm": 2.987744884326965, "learning_rate": 3.2129570237331624e-07, "loss": 0.6699, "step": 77045 }, { "epoch": 0.9390272141177044, "grad_norm": 2.3401867624483628, "learning_rate": 3.209749839640796e-07, "loss": 0.7535, "step": 77050 }, { "epoch": 0.9390881503418522, "grad_norm": 2.618730736333867, "learning_rate": 3.2065426555484286e-07, "loss": 0.7053, "step": 77055 }, { "epoch": 0.939149086566, "grad_norm": 2.208358433070889, "learning_rate": 3.203335471456062e-07, "loss": 0.6667, "step": 77060 }, { "epoch": 0.9392100227901479, "grad_norm": 3.3452192303430333, "learning_rate": 3.200128287363695e-07, "loss": 0.7466, "step": 77065 }, { "epoch": 0.9392709590142957, "grad_norm": 2.394975058343216, "learning_rate": 3.196921103271328e-07, "loss": 0.7293, "step": 77070 }, { "epoch": 0.9393318952384434, "grad_norm": 2.4557660299226876, "learning_rate": 3.193713919178961e-07, "loss": 0.7373, "step": 77075 }, { "epoch": 0.9393928314625912, "grad_norm": 2.7056889002719613, "learning_rate": 3.190506735086595e-07, "loss": 0.6623, "step": 77080 }, { "epoch": 0.939453767686739, "grad_norm": 2.1488095584668674, "learning_rate": 3.187299550994227e-07, "loss": 0.6743, "step": 77085 }, { "epoch": 0.9395147039108869, "grad_norm": 2.1725792005406084, "learning_rate": 3.184092366901861e-07, "loss": 0.6913, "step": 77090 }, { "epoch": 0.9395756401350347, "grad_norm": 2.484157539564692, "learning_rate": 3.180885182809493e-07, "loss": 0.7696, "step": 77095 }, { "epoch": 0.9396365763591825, "grad_norm": 2.624731743424842, "learning_rate": 3.177677998717127e-07, "loss": 0.7297, "step": 77100 }, { "epoch": 0.9396975125833303, "grad_norm": 2.528392178844456, "learning_rate": 3.1744708146247594e-07, "loss": 0.6893, "step": 77105 }, { "epoch": 0.939758448807478, "grad_norm": 2.178152015732075, "learning_rate": 3.171263630532393e-07, "loss": 0.6057, "step": 77110 }, { "epoch": 0.9398193850316259, "grad_norm": 4.137992104161738, "learning_rate": 3.168056446440026e-07, "loss": 0.7165, "step": 77115 }, { "epoch": 0.9398803212557737, "grad_norm": 1.9861011239200967, "learning_rate": 3.1648492623476594e-07, "loss": 0.6759, "step": 77120 }, { "epoch": 0.9399412574799215, "grad_norm": 2.3302218851367367, "learning_rate": 3.161642078255292e-07, "loss": 0.7125, "step": 77125 }, { "epoch": 0.9400021937040693, "grad_norm": 2.598983042684411, "learning_rate": 3.1584348941629256e-07, "loss": 0.652, "step": 77130 }, { "epoch": 0.9400631299282172, "grad_norm": 2.8342482613879216, "learning_rate": 3.1552277100705584e-07, "loss": 0.7241, "step": 77135 }, { "epoch": 0.940124066152365, "grad_norm": 3.889840038020175, "learning_rate": 3.152020525978192e-07, "loss": 0.6713, "step": 77140 }, { "epoch": 0.9401850023765127, "grad_norm": 2.451874352876717, "learning_rate": 3.1488133418858246e-07, "loss": 0.7052, "step": 77145 }, { "epoch": 0.9402459386006605, "grad_norm": 2.638889665823373, "learning_rate": 3.145606157793458e-07, "loss": 0.6689, "step": 77150 }, { "epoch": 0.9403068748248083, "grad_norm": 2.1877877638796703, "learning_rate": 3.1423989737010907e-07, "loss": 0.6524, "step": 77155 }, { "epoch": 0.9403678110489562, "grad_norm": 2.21776466418634, "learning_rate": 3.139191789608724e-07, "loss": 0.742, "step": 77160 }, { "epoch": 0.940428747273104, "grad_norm": 2.684961708370127, "learning_rate": 3.135984605516357e-07, "loss": 0.685, "step": 77165 }, { "epoch": 0.9404896834972518, "grad_norm": 2.295708948006843, "learning_rate": 3.13277742142399e-07, "loss": 0.6949, "step": 77170 }, { "epoch": 0.9405506197213995, "grad_norm": 2.4529292512190337, "learning_rate": 3.129570237331623e-07, "loss": 0.7008, "step": 77175 }, { "epoch": 0.9406115559455474, "grad_norm": 3.1396223416879083, "learning_rate": 3.1263630532392564e-07, "loss": 0.7538, "step": 77180 }, { "epoch": 0.9406724921696952, "grad_norm": 2.8822207493192322, "learning_rate": 3.123155869146889e-07, "loss": 0.7563, "step": 77185 }, { "epoch": 0.940733428393843, "grad_norm": 2.441014322527808, "learning_rate": 3.1199486850545225e-07, "loss": 0.6218, "step": 77190 }, { "epoch": 0.9407943646179908, "grad_norm": 2.551208421514284, "learning_rate": 3.116741500962156e-07, "loss": 0.7669, "step": 77195 }, { "epoch": 0.9408553008421386, "grad_norm": 2.3201059462009703, "learning_rate": 3.1135343168697887e-07, "loss": 0.738, "step": 77200 }, { "epoch": 0.9409162370662865, "grad_norm": 2.8316088725849684, "learning_rate": 3.110327132777422e-07, "loss": 0.7121, "step": 77205 }, { "epoch": 0.9409771732904342, "grad_norm": 2.973402662043502, "learning_rate": 3.107119948685055e-07, "loss": 0.7148, "step": 77210 }, { "epoch": 0.941038109514582, "grad_norm": 3.5975202033908085, "learning_rate": 3.103912764592688e-07, "loss": 0.7281, "step": 77215 }, { "epoch": 0.9410990457387298, "grad_norm": 3.152050351391977, "learning_rate": 3.100705580500321e-07, "loss": 0.7681, "step": 77220 }, { "epoch": 0.9411599819628776, "grad_norm": 2.3072388412992066, "learning_rate": 3.0974983964079544e-07, "loss": 0.7356, "step": 77225 }, { "epoch": 0.9412209181870255, "grad_norm": 2.9463449658363587, "learning_rate": 3.094291212315587e-07, "loss": 0.7332, "step": 77230 }, { "epoch": 0.9412818544111733, "grad_norm": 2.526696264479326, "learning_rate": 3.0910840282232205e-07, "loss": 0.7241, "step": 77235 }, { "epoch": 0.9413427906353211, "grad_norm": 2.1014080002432514, "learning_rate": 3.0878768441308533e-07, "loss": 0.7355, "step": 77240 }, { "epoch": 0.9414037268594688, "grad_norm": 2.1539840543499005, "learning_rate": 3.0846696600384867e-07, "loss": 0.7526, "step": 77245 }, { "epoch": 0.9414646630836166, "grad_norm": 3.1089630507737587, "learning_rate": 3.0814624759461195e-07, "loss": 0.6526, "step": 77250 }, { "epoch": 0.9415255993077645, "grad_norm": 3.1565481761373597, "learning_rate": 3.078255291853753e-07, "loss": 0.6904, "step": 77255 }, { "epoch": 0.9415865355319123, "grad_norm": 3.7687161698217526, "learning_rate": 3.0750481077613856e-07, "loss": 0.7135, "step": 77260 }, { "epoch": 0.9416474717560601, "grad_norm": 2.5112758971389386, "learning_rate": 3.071840923669019e-07, "loss": 0.6879, "step": 77265 }, { "epoch": 0.9417084079802079, "grad_norm": 2.795380271022812, "learning_rate": 3.068633739576652e-07, "loss": 0.7502, "step": 77270 }, { "epoch": 0.9417693442043558, "grad_norm": 2.264550650793381, "learning_rate": 3.065426555484285e-07, "loss": 0.6257, "step": 77275 }, { "epoch": 0.9418302804285035, "grad_norm": 2.6351533777825806, "learning_rate": 3.062219371391918e-07, "loss": 0.7086, "step": 77280 }, { "epoch": 0.9418912166526513, "grad_norm": 3.9968844896512183, "learning_rate": 3.0590121872995513e-07, "loss": 0.7189, "step": 77285 }, { "epoch": 0.9419521528767991, "grad_norm": 2.645631366875921, "learning_rate": 3.0558050032071847e-07, "loss": 0.7434, "step": 77290 }, { "epoch": 0.9420130891009469, "grad_norm": 2.1271144586574593, "learning_rate": 3.0525978191148175e-07, "loss": 0.7235, "step": 77295 }, { "epoch": 0.9420740253250948, "grad_norm": 2.5861388847550897, "learning_rate": 3.049390635022451e-07, "loss": 0.7379, "step": 77300 }, { "epoch": 0.9421349615492426, "grad_norm": 1.932358988274978, "learning_rate": 3.0461834509300836e-07, "loss": 0.6636, "step": 77305 }, { "epoch": 0.9421958977733904, "grad_norm": 2.548699428294081, "learning_rate": 3.042976266837717e-07, "loss": 0.6679, "step": 77310 }, { "epoch": 0.9422568339975381, "grad_norm": 3.1594968485979567, "learning_rate": 3.03976908274535e-07, "loss": 0.7697, "step": 77315 }, { "epoch": 0.942317770221686, "grad_norm": 2.3834579109204537, "learning_rate": 3.036561898652983e-07, "loss": 0.7302, "step": 77320 }, { "epoch": 0.9423787064458338, "grad_norm": 2.232970143427374, "learning_rate": 3.033354714560616e-07, "loss": 0.7035, "step": 77325 }, { "epoch": 0.9424396426699816, "grad_norm": 2.6504020853048225, "learning_rate": 3.0301475304682493e-07, "loss": 0.7295, "step": 77330 }, { "epoch": 0.9425005788941294, "grad_norm": 2.823576180160115, "learning_rate": 3.026940346375882e-07, "loss": 0.6519, "step": 77335 }, { "epoch": 0.9425615151182772, "grad_norm": 2.307005429892723, "learning_rate": 3.0237331622835154e-07, "loss": 0.7306, "step": 77340 }, { "epoch": 0.9426224513424251, "grad_norm": 2.780477050537473, "learning_rate": 3.020525978191148e-07, "loss": 0.6921, "step": 77345 }, { "epoch": 0.9426833875665728, "grad_norm": 2.5531566528763707, "learning_rate": 3.0173187940987816e-07, "loss": 0.6832, "step": 77350 }, { "epoch": 0.9427443237907206, "grad_norm": 2.63666524881343, "learning_rate": 3.0141116100064144e-07, "loss": 0.7479, "step": 77355 }, { "epoch": 0.9428052600148684, "grad_norm": 2.0587183803030595, "learning_rate": 3.010904425914048e-07, "loss": 0.6969, "step": 77360 }, { "epoch": 0.9428661962390162, "grad_norm": 3.3084293268814204, "learning_rate": 3.0076972418216806e-07, "loss": 0.7091, "step": 77365 }, { "epoch": 0.9429271324631641, "grad_norm": 2.3549517404796183, "learning_rate": 3.004490057729314e-07, "loss": 0.7397, "step": 77370 }, { "epoch": 0.9429880686873119, "grad_norm": 2.2472103736405993, "learning_rate": 3.001282873636947e-07, "loss": 0.6641, "step": 77375 }, { "epoch": 0.9430490049114597, "grad_norm": 2.456080394674296, "learning_rate": 2.99807568954458e-07, "loss": 0.6945, "step": 77380 }, { "epoch": 0.9431099411356074, "grad_norm": 2.114051658150797, "learning_rate": 2.994868505452213e-07, "loss": 0.6877, "step": 77385 }, { "epoch": 0.9431708773597552, "grad_norm": 3.184831373225314, "learning_rate": 2.991661321359846e-07, "loss": 0.6724, "step": 77390 }, { "epoch": 0.9432318135839031, "grad_norm": 2.303247956386005, "learning_rate": 2.9884541372674796e-07, "loss": 0.7028, "step": 77395 }, { "epoch": 0.9432927498080509, "grad_norm": 3.2536021403382094, "learning_rate": 2.9852469531751124e-07, "loss": 0.6938, "step": 77400 }, { "epoch": 0.9433536860321987, "grad_norm": 2.5367479908487995, "learning_rate": 2.982039769082746e-07, "loss": 0.7129, "step": 77405 }, { "epoch": 0.9434146222563465, "grad_norm": 2.3165246663336267, "learning_rate": 2.9788325849903786e-07, "loss": 0.7141, "step": 77410 }, { "epoch": 0.9434755584804944, "grad_norm": 2.4039141723534097, "learning_rate": 2.975625400898012e-07, "loss": 0.7158, "step": 77415 }, { "epoch": 0.9435364947046421, "grad_norm": 3.070927967237497, "learning_rate": 2.9724182168056447e-07, "loss": 0.6515, "step": 77420 }, { "epoch": 0.9435974309287899, "grad_norm": 2.4627811979534004, "learning_rate": 2.969211032713278e-07, "loss": 0.7074, "step": 77425 }, { "epoch": 0.9436583671529377, "grad_norm": 2.87408906523296, "learning_rate": 2.966003848620911e-07, "loss": 0.6824, "step": 77430 }, { "epoch": 0.9437193033770855, "grad_norm": 2.2631871151619176, "learning_rate": 2.962796664528544e-07, "loss": 0.7083, "step": 77435 }, { "epoch": 0.9437802396012334, "grad_norm": 2.614007328996326, "learning_rate": 2.959589480436177e-07, "loss": 0.7112, "step": 77440 }, { "epoch": 0.9438411758253812, "grad_norm": 3.2579561393648615, "learning_rate": 2.9563822963438104e-07, "loss": 0.7007, "step": 77445 }, { "epoch": 0.943902112049529, "grad_norm": 2.5046094495970586, "learning_rate": 2.953175112251443e-07, "loss": 0.764, "step": 77450 }, { "epoch": 0.9439630482736767, "grad_norm": 2.8999406351350094, "learning_rate": 2.9499679281590765e-07, "loss": 0.6735, "step": 77455 }, { "epoch": 0.9440239844978245, "grad_norm": 2.6881455821759306, "learning_rate": 2.9467607440667094e-07, "loss": 0.6997, "step": 77460 }, { "epoch": 0.9440849207219724, "grad_norm": 2.2126268308919452, "learning_rate": 2.9435535599743427e-07, "loss": 0.707, "step": 77465 }, { "epoch": 0.9441458569461202, "grad_norm": 2.6739587601744206, "learning_rate": 2.9403463758819755e-07, "loss": 0.7338, "step": 77470 }, { "epoch": 0.944206793170268, "grad_norm": 2.3057836557723963, "learning_rate": 2.937139191789609e-07, "loss": 0.6985, "step": 77475 }, { "epoch": 0.9442677293944158, "grad_norm": 2.4394722497883197, "learning_rate": 2.9339320076972417e-07, "loss": 0.7669, "step": 77480 }, { "epoch": 0.9443286656185637, "grad_norm": 2.6776067379628743, "learning_rate": 2.930724823604875e-07, "loss": 0.6847, "step": 77485 }, { "epoch": 0.9443896018427114, "grad_norm": 2.371367228695812, "learning_rate": 2.927517639512508e-07, "loss": 0.6936, "step": 77490 }, { "epoch": 0.9444505380668592, "grad_norm": 2.531160112122364, "learning_rate": 2.9243104554201417e-07, "loss": 0.7075, "step": 77495 }, { "epoch": 0.944511474291007, "grad_norm": 3.669669541082669, "learning_rate": 2.9211032713277745e-07, "loss": 0.6829, "step": 77500 }, { "epoch": 0.9445724105151548, "grad_norm": 2.806779587101077, "learning_rate": 2.917896087235408e-07, "loss": 0.7461, "step": 77505 }, { "epoch": 0.9446333467393027, "grad_norm": 2.7199873161209838, "learning_rate": 2.9146889031430407e-07, "loss": 0.6469, "step": 77510 }, { "epoch": 0.9446942829634505, "grad_norm": 2.869831165528753, "learning_rate": 2.911481719050674e-07, "loss": 0.7201, "step": 77515 }, { "epoch": 0.9447552191875983, "grad_norm": 2.401990090582093, "learning_rate": 2.908274534958307e-07, "loss": 0.6888, "step": 77520 }, { "epoch": 0.944816155411746, "grad_norm": 2.4209667328735014, "learning_rate": 2.90506735086594e-07, "loss": 0.8094, "step": 77525 }, { "epoch": 0.9448770916358938, "grad_norm": 2.5267701166095184, "learning_rate": 2.901860166773573e-07, "loss": 0.6988, "step": 77530 }, { "epoch": 0.9449380278600417, "grad_norm": 2.5902147948796928, "learning_rate": 2.8986529826812063e-07, "loss": 0.7519, "step": 77535 }, { "epoch": 0.9449989640841895, "grad_norm": 2.676639663418273, "learning_rate": 2.895445798588839e-07, "loss": 0.6886, "step": 77540 }, { "epoch": 0.9450599003083373, "grad_norm": 2.480762815335789, "learning_rate": 2.8922386144964725e-07, "loss": 0.7033, "step": 77545 }, { "epoch": 0.9451208365324851, "grad_norm": 2.39711586158507, "learning_rate": 2.8890314304041053e-07, "loss": 0.7199, "step": 77550 }, { "epoch": 0.945181772756633, "grad_norm": 2.603454948809559, "learning_rate": 2.8858242463117387e-07, "loss": 0.7743, "step": 77555 }, { "epoch": 0.9452427089807807, "grad_norm": 2.077900311697323, "learning_rate": 2.8826170622193715e-07, "loss": 0.7001, "step": 77560 }, { "epoch": 0.9453036452049285, "grad_norm": 2.237149683986019, "learning_rate": 2.879409878127005e-07, "loss": 0.7387, "step": 77565 }, { "epoch": 0.9453645814290763, "grad_norm": 2.777911280360535, "learning_rate": 2.876202694034638e-07, "loss": 0.736, "step": 77570 }, { "epoch": 0.9454255176532241, "grad_norm": 2.6434353183340837, "learning_rate": 2.872995509942271e-07, "loss": 0.8182, "step": 77575 }, { "epoch": 0.945486453877372, "grad_norm": 3.0717663036847225, "learning_rate": 2.8697883258499043e-07, "loss": 0.6784, "step": 77580 }, { "epoch": 0.9455473901015198, "grad_norm": 2.4739421847581795, "learning_rate": 2.866581141757537e-07, "loss": 0.7217, "step": 77585 }, { "epoch": 0.9456083263256676, "grad_norm": 3.050304853682301, "learning_rate": 2.8633739576651705e-07, "loss": 0.6832, "step": 77590 }, { "epoch": 0.9456692625498153, "grad_norm": 2.18686344041764, "learning_rate": 2.8601667735728033e-07, "loss": 0.6484, "step": 77595 }, { "epoch": 0.9457301987739631, "grad_norm": 4.012010298122347, "learning_rate": 2.8569595894804366e-07, "loss": 0.7597, "step": 77600 }, { "epoch": 0.945791134998111, "grad_norm": 3.1389281865260372, "learning_rate": 2.8537524053880695e-07, "loss": 0.7328, "step": 77605 }, { "epoch": 0.9458520712222588, "grad_norm": 2.4923177353560972, "learning_rate": 2.850545221295703e-07, "loss": 0.7526, "step": 77610 }, { "epoch": 0.9459130074464066, "grad_norm": 2.8973444831656563, "learning_rate": 2.8473380372033356e-07, "loss": 0.6964, "step": 77615 }, { "epoch": 0.9459739436705544, "grad_norm": 2.5083820487984476, "learning_rate": 2.844130853110969e-07, "loss": 0.7274, "step": 77620 }, { "epoch": 0.9460348798947023, "grad_norm": 2.607580276936873, "learning_rate": 2.840923669018602e-07, "loss": 0.7426, "step": 77625 }, { "epoch": 0.94609581611885, "grad_norm": 2.6349707247006777, "learning_rate": 2.837716484926235e-07, "loss": 0.769, "step": 77630 }, { "epoch": 0.9461567523429978, "grad_norm": 2.499548590595114, "learning_rate": 2.834509300833868e-07, "loss": 0.6984, "step": 77635 }, { "epoch": 0.9462176885671456, "grad_norm": 2.6325651223021165, "learning_rate": 2.8313021167415013e-07, "loss": 0.7932, "step": 77640 }, { "epoch": 0.9462786247912934, "grad_norm": 2.462631896201931, "learning_rate": 2.828094932649134e-07, "loss": 0.71, "step": 77645 }, { "epoch": 0.9463395610154413, "grad_norm": 3.1696875891136678, "learning_rate": 2.8248877485567674e-07, "loss": 0.7331, "step": 77650 }, { "epoch": 0.9464004972395891, "grad_norm": 2.667569036694177, "learning_rate": 2.8216805644644e-07, "loss": 0.6177, "step": 77655 }, { "epoch": 0.9464614334637369, "grad_norm": 2.7013448531265807, "learning_rate": 2.8184733803720336e-07, "loss": 0.6592, "step": 77660 }, { "epoch": 0.9465223696878846, "grad_norm": 2.2366456159797683, "learning_rate": 2.8152661962796664e-07, "loss": 0.6851, "step": 77665 }, { "epoch": 0.9465833059120324, "grad_norm": 3.3866547302877668, "learning_rate": 2.8120590121873e-07, "loss": 0.7935, "step": 77670 }, { "epoch": 0.9466442421361803, "grad_norm": 2.4695534272081803, "learning_rate": 2.808851828094933e-07, "loss": 0.7816, "step": 77675 }, { "epoch": 0.9467051783603281, "grad_norm": 2.42339032717056, "learning_rate": 2.805644644002566e-07, "loss": 0.6834, "step": 77680 }, { "epoch": 0.9467661145844759, "grad_norm": 2.3175439492988024, "learning_rate": 2.802437459910199e-07, "loss": 0.7017, "step": 77685 }, { "epoch": 0.9468270508086237, "grad_norm": 2.4107788038599773, "learning_rate": 2.799230275817832e-07, "loss": 0.6638, "step": 77690 }, { "epoch": 0.9468879870327715, "grad_norm": 2.7498538085393833, "learning_rate": 2.7960230917254654e-07, "loss": 0.7272, "step": 77695 }, { "epoch": 0.9469489232569193, "grad_norm": 2.8966556846182248, "learning_rate": 2.792815907633098e-07, "loss": 0.7147, "step": 77700 }, { "epoch": 0.9470098594810671, "grad_norm": 2.7331815323886386, "learning_rate": 2.7896087235407316e-07, "loss": 0.7778, "step": 77705 }, { "epoch": 0.9470707957052149, "grad_norm": 2.26747080136628, "learning_rate": 2.7864015394483644e-07, "loss": 0.7664, "step": 77710 }, { "epoch": 0.9471317319293627, "grad_norm": 2.320741723062206, "learning_rate": 2.7831943553559977e-07, "loss": 0.6563, "step": 77715 }, { "epoch": 0.9471926681535106, "grad_norm": 2.917940057613421, "learning_rate": 2.7799871712636305e-07, "loss": 0.6971, "step": 77720 }, { "epoch": 0.9472536043776584, "grad_norm": 2.709100692695516, "learning_rate": 2.776779987171264e-07, "loss": 0.6895, "step": 77725 }, { "epoch": 0.9473145406018062, "grad_norm": 2.5548192313328757, "learning_rate": 2.7735728030788967e-07, "loss": 0.6946, "step": 77730 }, { "epoch": 0.9473754768259539, "grad_norm": 2.0823159296071014, "learning_rate": 2.77036561898653e-07, "loss": 0.6702, "step": 77735 }, { "epoch": 0.9474364130501017, "grad_norm": 2.5835268753160294, "learning_rate": 2.767158434894163e-07, "loss": 0.7186, "step": 77740 }, { "epoch": 0.9474973492742496, "grad_norm": 2.582103582859799, "learning_rate": 2.763951250801796e-07, "loss": 0.6509, "step": 77745 }, { "epoch": 0.9475582854983974, "grad_norm": 2.4092885986421115, "learning_rate": 2.760744066709429e-07, "loss": 0.6876, "step": 77750 }, { "epoch": 0.9476192217225452, "grad_norm": 2.1497849499565365, "learning_rate": 2.7575368826170624e-07, "loss": 0.6999, "step": 77755 }, { "epoch": 0.947680157946693, "grad_norm": 2.5357949865133182, "learning_rate": 2.754329698524695e-07, "loss": 0.7093, "step": 77760 }, { "epoch": 0.9477410941708408, "grad_norm": 2.4920033679752254, "learning_rate": 2.7511225144323285e-07, "loss": 0.7754, "step": 77765 }, { "epoch": 0.9478020303949886, "grad_norm": 2.4163538530367794, "learning_rate": 2.7479153303399613e-07, "loss": 0.7443, "step": 77770 }, { "epoch": 0.9478629666191364, "grad_norm": 2.2911055063327086, "learning_rate": 2.7447081462475947e-07, "loss": 0.7605, "step": 77775 }, { "epoch": 0.9479239028432842, "grad_norm": 3.4981123634139157, "learning_rate": 2.741500962155228e-07, "loss": 0.7663, "step": 77780 }, { "epoch": 0.947984839067432, "grad_norm": 2.2614016672181942, "learning_rate": 2.738293778062861e-07, "loss": 0.693, "step": 77785 }, { "epoch": 0.9480457752915799, "grad_norm": 2.4891962356146884, "learning_rate": 2.735086593970494e-07, "loss": 0.7195, "step": 77790 }, { "epoch": 0.9481067115157277, "grad_norm": 2.5619898312264704, "learning_rate": 2.7318794098781275e-07, "loss": 0.7142, "step": 77795 }, { "epoch": 0.9481676477398755, "grad_norm": 2.2728609001274003, "learning_rate": 2.7286722257857603e-07, "loss": 0.7246, "step": 77800 }, { "epoch": 0.9482285839640232, "grad_norm": 2.3535719874337384, "learning_rate": 2.7254650416933937e-07, "loss": 0.6777, "step": 77805 }, { "epoch": 0.948289520188171, "grad_norm": 2.8034373245981525, "learning_rate": 2.7222578576010265e-07, "loss": 0.7461, "step": 77810 }, { "epoch": 0.9483504564123189, "grad_norm": 2.29362341743839, "learning_rate": 2.71905067350866e-07, "loss": 0.7282, "step": 77815 }, { "epoch": 0.9484113926364667, "grad_norm": 2.0749406908999726, "learning_rate": 2.7158434894162927e-07, "loss": 0.6855, "step": 77820 }, { "epoch": 0.9484723288606145, "grad_norm": 1.9691484086165045, "learning_rate": 2.712636305323926e-07, "loss": 0.7097, "step": 77825 }, { "epoch": 0.9485332650847623, "grad_norm": 3.1469916111510923, "learning_rate": 2.709429121231559e-07, "loss": 0.646, "step": 77830 }, { "epoch": 0.9485942013089101, "grad_norm": 2.4911782691097657, "learning_rate": 2.706221937139192e-07, "loss": 0.6669, "step": 77835 }, { "epoch": 0.9486551375330579, "grad_norm": 2.4003553125033847, "learning_rate": 2.703014753046825e-07, "loss": 0.6603, "step": 77840 }, { "epoch": 0.9487160737572057, "grad_norm": 2.4413976346552952, "learning_rate": 2.6998075689544583e-07, "loss": 0.7168, "step": 77845 }, { "epoch": 0.9487770099813535, "grad_norm": 2.268882705010962, "learning_rate": 2.6966003848620917e-07, "loss": 0.7044, "step": 77850 }, { "epoch": 0.9488379462055013, "grad_norm": 2.2977044487023437, "learning_rate": 2.6933932007697245e-07, "loss": 0.6842, "step": 77855 }, { "epoch": 0.9488988824296491, "grad_norm": 2.875704759428122, "learning_rate": 2.690186016677358e-07, "loss": 0.7733, "step": 77860 }, { "epoch": 0.948959818653797, "grad_norm": 2.5779451301656993, "learning_rate": 2.6869788325849906e-07, "loss": 0.6672, "step": 77865 }, { "epoch": 0.9490207548779448, "grad_norm": 2.26091288080629, "learning_rate": 2.683771648492624e-07, "loss": 0.8032, "step": 77870 }, { "epoch": 0.9490816911020925, "grad_norm": 2.821055391716681, "learning_rate": 2.680564464400257e-07, "loss": 0.7312, "step": 77875 }, { "epoch": 0.9491426273262403, "grad_norm": 2.1188448725657922, "learning_rate": 2.67735728030789e-07, "loss": 0.7505, "step": 77880 }, { "epoch": 0.9492035635503882, "grad_norm": 5.4647779160739445, "learning_rate": 2.674150096215523e-07, "loss": 0.6969, "step": 77885 }, { "epoch": 0.949264499774536, "grad_norm": 2.3664006082343647, "learning_rate": 2.6709429121231563e-07, "loss": 0.6972, "step": 77890 }, { "epoch": 0.9493254359986838, "grad_norm": 2.530603861064709, "learning_rate": 2.667735728030789e-07, "loss": 0.6636, "step": 77895 }, { "epoch": 0.9493863722228316, "grad_norm": 2.3480990394359353, "learning_rate": 2.6645285439384225e-07, "loss": 0.6438, "step": 77900 }, { "epoch": 0.9494473084469794, "grad_norm": 2.5830655984737874, "learning_rate": 2.6613213598460553e-07, "loss": 0.7049, "step": 77905 }, { "epoch": 0.9495082446711272, "grad_norm": 2.9431463172065673, "learning_rate": 2.6581141757536886e-07, "loss": 0.7352, "step": 77910 }, { "epoch": 0.949569180895275, "grad_norm": 2.606502920087153, "learning_rate": 2.6549069916613214e-07, "loss": 0.6841, "step": 77915 }, { "epoch": 0.9496301171194228, "grad_norm": 3.153012708743604, "learning_rate": 2.651699807568955e-07, "loss": 0.725, "step": 77920 }, { "epoch": 0.9496910533435706, "grad_norm": 2.1983030724384065, "learning_rate": 2.6484926234765876e-07, "loss": 0.7437, "step": 77925 }, { "epoch": 0.9497519895677184, "grad_norm": 2.443227201925259, "learning_rate": 2.645285439384221e-07, "loss": 0.7215, "step": 77930 }, { "epoch": 0.9498129257918663, "grad_norm": 2.58953303776092, "learning_rate": 2.642078255291854e-07, "loss": 0.6966, "step": 77935 }, { "epoch": 0.9498738620160141, "grad_norm": 2.8623734589401644, "learning_rate": 2.638871071199487e-07, "loss": 0.7644, "step": 77940 }, { "epoch": 0.9499347982401618, "grad_norm": 2.142739988846747, "learning_rate": 2.63566388710712e-07, "loss": 0.6918, "step": 77945 }, { "epoch": 0.9499957344643096, "grad_norm": 2.6088370961967486, "learning_rate": 2.632456703014753e-07, "loss": 0.6919, "step": 77950 }, { "epoch": 0.9500566706884574, "grad_norm": 2.9933309055123773, "learning_rate": 2.6292495189223866e-07, "loss": 0.6203, "step": 77955 }, { "epoch": 0.9501176069126053, "grad_norm": 3.8795954676534916, "learning_rate": 2.6260423348300194e-07, "loss": 0.7118, "step": 77960 }, { "epoch": 0.9501785431367531, "grad_norm": 1.9790659646409057, "learning_rate": 2.622835150737653e-07, "loss": 0.6983, "step": 77965 }, { "epoch": 0.9502394793609009, "grad_norm": 2.2608762644460776, "learning_rate": 2.6196279666452856e-07, "loss": 0.6636, "step": 77970 }, { "epoch": 0.9503004155850487, "grad_norm": 2.617535023814246, "learning_rate": 2.616420782552919e-07, "loss": 0.7369, "step": 77975 }, { "epoch": 0.9503613518091965, "grad_norm": 3.4481454069593087, "learning_rate": 2.6132135984605517e-07, "loss": 0.6798, "step": 77980 }, { "epoch": 0.9504222880333443, "grad_norm": 2.522906107530012, "learning_rate": 2.610006414368185e-07, "loss": 0.7871, "step": 77985 }, { "epoch": 0.9504832242574921, "grad_norm": 3.041418298713597, "learning_rate": 2.606799230275818e-07, "loss": 0.7204, "step": 77990 }, { "epoch": 0.9505441604816399, "grad_norm": 2.7562348604137483, "learning_rate": 2.603592046183451e-07, "loss": 0.7394, "step": 77995 }, { "epoch": 0.9506050967057877, "grad_norm": 3.376225692485336, "learning_rate": 2.600384862091084e-07, "loss": 0.6858, "step": 78000 }, { "epoch": 0.9506660329299356, "grad_norm": 2.335132533410096, "learning_rate": 2.5971776779987174e-07, "loss": 0.6905, "step": 78005 }, { "epoch": 0.9507269691540834, "grad_norm": 3.1395134716457744, "learning_rate": 2.59397049390635e-07, "loss": 0.7409, "step": 78010 }, { "epoch": 0.9507879053782311, "grad_norm": 2.5135555947598682, "learning_rate": 2.5907633098139836e-07, "loss": 0.645, "step": 78015 }, { "epoch": 0.9508488416023789, "grad_norm": 3.2916053671724472, "learning_rate": 2.5875561257216164e-07, "loss": 0.7, "step": 78020 }, { "epoch": 0.9509097778265267, "grad_norm": 2.621016278130952, "learning_rate": 2.5843489416292497e-07, "loss": 0.7279, "step": 78025 }, { "epoch": 0.9509707140506746, "grad_norm": 2.414045866719224, "learning_rate": 2.5811417575368825e-07, "loss": 0.747, "step": 78030 }, { "epoch": 0.9510316502748224, "grad_norm": 2.292851776850594, "learning_rate": 2.577934573444516e-07, "loss": 0.6533, "step": 78035 }, { "epoch": 0.9510925864989702, "grad_norm": 2.272248005914364, "learning_rate": 2.5747273893521487e-07, "loss": 0.6854, "step": 78040 }, { "epoch": 0.951153522723118, "grad_norm": 2.1408536951977113, "learning_rate": 2.571520205259782e-07, "loss": 0.6991, "step": 78045 }, { "epoch": 0.9512144589472658, "grad_norm": 3.49231916855418, "learning_rate": 2.568313021167415e-07, "loss": 0.6972, "step": 78050 }, { "epoch": 0.9512753951714136, "grad_norm": 2.6899641462887334, "learning_rate": 2.565105837075048e-07, "loss": 0.6415, "step": 78055 }, { "epoch": 0.9513363313955614, "grad_norm": 2.2358621463030577, "learning_rate": 2.5618986529826815e-07, "loss": 0.7472, "step": 78060 }, { "epoch": 0.9513972676197092, "grad_norm": 2.2622662823060486, "learning_rate": 2.5586914688903143e-07, "loss": 0.72, "step": 78065 }, { "epoch": 0.951458203843857, "grad_norm": 2.245060507654296, "learning_rate": 2.5554842847979477e-07, "loss": 0.6981, "step": 78070 }, { "epoch": 0.9515191400680049, "grad_norm": 2.7920230611706596, "learning_rate": 2.5522771007055805e-07, "loss": 0.7476, "step": 78075 }, { "epoch": 0.9515800762921527, "grad_norm": 2.1331834811243096, "learning_rate": 2.549069916613214e-07, "loss": 0.6636, "step": 78080 }, { "epoch": 0.9516410125163004, "grad_norm": 2.4808802825168295, "learning_rate": 2.5458627325208467e-07, "loss": 0.7521, "step": 78085 }, { "epoch": 0.9517019487404482, "grad_norm": 2.077412351602525, "learning_rate": 2.54265554842848e-07, "loss": 0.6582, "step": 78090 }, { "epoch": 0.951762884964596, "grad_norm": 2.236976715153453, "learning_rate": 2.539448364336113e-07, "loss": 0.7835, "step": 78095 }, { "epoch": 0.9518238211887439, "grad_norm": 2.536879500914084, "learning_rate": 2.536241180243746e-07, "loss": 0.672, "step": 78100 }, { "epoch": 0.9518847574128917, "grad_norm": 2.113417657917053, "learning_rate": 2.5330339961513795e-07, "loss": 0.6748, "step": 78105 }, { "epoch": 0.9519456936370395, "grad_norm": 1.9563581672716666, "learning_rate": 2.5298268120590123e-07, "loss": 0.6812, "step": 78110 }, { "epoch": 0.9520066298611873, "grad_norm": 3.139008584633983, "learning_rate": 2.5266196279666457e-07, "loss": 0.7295, "step": 78115 }, { "epoch": 0.952067566085335, "grad_norm": 2.4409414832123693, "learning_rate": 2.5234124438742785e-07, "loss": 0.7169, "step": 78120 }, { "epoch": 0.9521285023094829, "grad_norm": 2.259643829652961, "learning_rate": 2.520205259781912e-07, "loss": 0.7007, "step": 78125 }, { "epoch": 0.9521894385336307, "grad_norm": 4.220862991151335, "learning_rate": 2.516998075689545e-07, "loss": 0.7883, "step": 78130 }, { "epoch": 0.9522503747577785, "grad_norm": 2.2042166242612944, "learning_rate": 2.513790891597178e-07, "loss": 0.7475, "step": 78135 }, { "epoch": 0.9523113109819263, "grad_norm": 2.552286627220114, "learning_rate": 2.5105837075048113e-07, "loss": 0.7629, "step": 78140 }, { "epoch": 0.9523722472060742, "grad_norm": 2.218677016393224, "learning_rate": 2.507376523412444e-07, "loss": 0.6558, "step": 78145 }, { "epoch": 0.9524331834302219, "grad_norm": 2.283276297088849, "learning_rate": 2.5041693393200775e-07, "loss": 0.7701, "step": 78150 }, { "epoch": 0.9524941196543697, "grad_norm": 2.3390590527078796, "learning_rate": 2.5009621552277103e-07, "loss": 0.7148, "step": 78155 }, { "epoch": 0.9525550558785175, "grad_norm": 2.6429211440414013, "learning_rate": 2.4977549711353437e-07, "loss": 0.676, "step": 78160 }, { "epoch": 0.9526159921026653, "grad_norm": 2.396211240228705, "learning_rate": 2.4945477870429765e-07, "loss": 0.7679, "step": 78165 }, { "epoch": 0.9526769283268132, "grad_norm": 4.0710597095497745, "learning_rate": 2.49134060295061e-07, "loss": 0.6799, "step": 78170 }, { "epoch": 0.952737864550961, "grad_norm": 2.667731458321221, "learning_rate": 2.4881334188582426e-07, "loss": 0.5797, "step": 78175 }, { "epoch": 0.9527988007751088, "grad_norm": 3.0169043017533603, "learning_rate": 2.484926234765876e-07, "loss": 0.7235, "step": 78180 }, { "epoch": 0.9528597369992565, "grad_norm": 2.554483652801768, "learning_rate": 2.481719050673509e-07, "loss": 0.7255, "step": 78185 }, { "epoch": 0.9529206732234043, "grad_norm": 2.8552139768652016, "learning_rate": 2.478511866581142e-07, "loss": 0.6854, "step": 78190 }, { "epoch": 0.9529816094475522, "grad_norm": 2.3108589095168264, "learning_rate": 2.475304682488775e-07, "loss": 0.6772, "step": 78195 }, { "epoch": 0.9530425456717, "grad_norm": 2.2198352754251225, "learning_rate": 2.4720974983964083e-07, "loss": 0.6525, "step": 78200 }, { "epoch": 0.9531034818958478, "grad_norm": 2.1292315314182666, "learning_rate": 2.468890314304041e-07, "loss": 0.6971, "step": 78205 }, { "epoch": 0.9531644181199956, "grad_norm": 2.785403921415304, "learning_rate": 2.4656831302116744e-07, "loss": 0.688, "step": 78210 }, { "epoch": 0.9532253543441435, "grad_norm": 3.9690146655127516, "learning_rate": 2.462475946119307e-07, "loss": 0.6913, "step": 78215 }, { "epoch": 0.9532862905682912, "grad_norm": 2.6679377145218814, "learning_rate": 2.4592687620269406e-07, "loss": 0.6836, "step": 78220 }, { "epoch": 0.953347226792439, "grad_norm": 3.040302369127603, "learning_rate": 2.4560615779345734e-07, "loss": 0.7629, "step": 78225 }, { "epoch": 0.9534081630165868, "grad_norm": 2.7045096820793546, "learning_rate": 2.452854393842207e-07, "loss": 0.7644, "step": 78230 }, { "epoch": 0.9534690992407346, "grad_norm": 2.255467605451789, "learning_rate": 2.44964720974984e-07, "loss": 0.7081, "step": 78235 }, { "epoch": 0.9535300354648825, "grad_norm": 2.5591482285047586, "learning_rate": 2.446440025657473e-07, "loss": 0.7701, "step": 78240 }, { "epoch": 0.9535909716890303, "grad_norm": 2.3586891448692007, "learning_rate": 2.4432328415651063e-07, "loss": 0.7152, "step": 78245 }, { "epoch": 0.9536519079131781, "grad_norm": 3.0157395853209352, "learning_rate": 2.440025657472739e-07, "loss": 0.6409, "step": 78250 }, { "epoch": 0.9537128441373258, "grad_norm": 2.8141943338714324, "learning_rate": 2.4368184733803724e-07, "loss": 0.7483, "step": 78255 }, { "epoch": 0.9537737803614736, "grad_norm": 2.4000620326348145, "learning_rate": 2.433611289288005e-07, "loss": 0.6517, "step": 78260 }, { "epoch": 0.9538347165856215, "grad_norm": 2.302901999033486, "learning_rate": 2.4304041051956386e-07, "loss": 0.6751, "step": 78265 }, { "epoch": 0.9538956528097693, "grad_norm": 3.1078135348811964, "learning_rate": 2.4271969211032714e-07, "loss": 0.7108, "step": 78270 }, { "epoch": 0.9539565890339171, "grad_norm": 1.9732092637417915, "learning_rate": 2.423989737010905e-07, "loss": 0.6957, "step": 78275 }, { "epoch": 0.9540175252580649, "grad_norm": 2.933476970651906, "learning_rate": 2.4207825529185376e-07, "loss": 0.7934, "step": 78280 }, { "epoch": 0.9540784614822128, "grad_norm": 2.8987158516699, "learning_rate": 2.417575368826171e-07, "loss": 0.7244, "step": 78285 }, { "epoch": 0.9541393977063605, "grad_norm": 3.0260392260250812, "learning_rate": 2.4143681847338037e-07, "loss": 0.7157, "step": 78290 }, { "epoch": 0.9542003339305083, "grad_norm": 2.6847212629452044, "learning_rate": 2.411161000641437e-07, "loss": 0.7007, "step": 78295 }, { "epoch": 0.9542612701546561, "grad_norm": 3.6820898590576703, "learning_rate": 2.40795381654907e-07, "loss": 0.703, "step": 78300 }, { "epoch": 0.9543222063788039, "grad_norm": 3.191975316200466, "learning_rate": 2.404746632456703e-07, "loss": 0.7691, "step": 78305 }, { "epoch": 0.9543831426029518, "grad_norm": 2.746977831470905, "learning_rate": 2.401539448364336e-07, "loss": 0.8211, "step": 78310 }, { "epoch": 0.9544440788270996, "grad_norm": 4.4414298813297535, "learning_rate": 2.3983322642719694e-07, "loss": 0.7114, "step": 78315 }, { "epoch": 0.9545050150512474, "grad_norm": 3.469805981614335, "learning_rate": 2.395125080179602e-07, "loss": 0.7097, "step": 78320 }, { "epoch": 0.9545659512753951, "grad_norm": 2.235682008568365, "learning_rate": 2.3919178960872355e-07, "loss": 0.6556, "step": 78325 }, { "epoch": 0.9546268874995429, "grad_norm": 2.690437753094673, "learning_rate": 2.3887107119948684e-07, "loss": 0.7049, "step": 78330 }, { "epoch": 0.9546878237236908, "grad_norm": 2.446655361394614, "learning_rate": 2.3855035279025017e-07, "loss": 0.6444, "step": 78335 }, { "epoch": 0.9547487599478386, "grad_norm": 2.2277979358270663, "learning_rate": 2.3822963438101348e-07, "loss": 0.786, "step": 78340 }, { "epoch": 0.9548096961719864, "grad_norm": 2.7854804684271777, "learning_rate": 2.3790891597177679e-07, "loss": 0.704, "step": 78345 }, { "epoch": 0.9548706323961342, "grad_norm": 2.547885308040858, "learning_rate": 2.375881975625401e-07, "loss": 0.7255, "step": 78350 }, { "epoch": 0.9549315686202821, "grad_norm": 2.410826467999104, "learning_rate": 2.372674791533034e-07, "loss": 0.7389, "step": 78355 }, { "epoch": 0.9549925048444298, "grad_norm": 2.843392802023527, "learning_rate": 2.369467607440667e-07, "loss": 0.6746, "step": 78360 }, { "epoch": 0.9550534410685776, "grad_norm": 2.6463875660982374, "learning_rate": 2.3662604233483002e-07, "loss": 0.7053, "step": 78365 }, { "epoch": 0.9551143772927254, "grad_norm": 2.3213057534944546, "learning_rate": 2.3630532392559333e-07, "loss": 0.6154, "step": 78370 }, { "epoch": 0.9551753135168732, "grad_norm": 2.4994514842373787, "learning_rate": 2.3598460551635663e-07, "loss": 0.8035, "step": 78375 }, { "epoch": 0.9552362497410211, "grad_norm": 2.2850419153267483, "learning_rate": 2.3566388710711994e-07, "loss": 0.7323, "step": 78380 }, { "epoch": 0.9552971859651689, "grad_norm": 3.3290603139895705, "learning_rate": 2.3534316869788325e-07, "loss": 0.7323, "step": 78385 }, { "epoch": 0.9553581221893167, "grad_norm": 2.5124199777561746, "learning_rate": 2.3502245028864658e-07, "loss": 0.6953, "step": 78390 }, { "epoch": 0.9554190584134644, "grad_norm": 2.376483954463512, "learning_rate": 2.347017318794099e-07, "loss": 0.7116, "step": 78395 }, { "epoch": 0.9554799946376122, "grad_norm": 2.6862259923560283, "learning_rate": 2.3438101347017323e-07, "loss": 0.7107, "step": 78400 }, { "epoch": 0.9555409308617601, "grad_norm": 3.1438251374620823, "learning_rate": 2.3406029506093653e-07, "loss": 0.7243, "step": 78405 }, { "epoch": 0.9556018670859079, "grad_norm": 2.4925767894475115, "learning_rate": 2.3373957665169984e-07, "loss": 0.6993, "step": 78410 }, { "epoch": 0.9556628033100557, "grad_norm": 2.9624779113290316, "learning_rate": 2.3341885824246315e-07, "loss": 0.7845, "step": 78415 }, { "epoch": 0.9557237395342035, "grad_norm": 2.2864370438097263, "learning_rate": 2.3309813983322646e-07, "loss": 0.6423, "step": 78420 }, { "epoch": 0.9557846757583514, "grad_norm": 2.1022937582704513, "learning_rate": 2.3277742142398977e-07, "loss": 0.6547, "step": 78425 }, { "epoch": 0.9558456119824991, "grad_norm": 2.463321478252574, "learning_rate": 2.3245670301475307e-07, "loss": 0.6847, "step": 78430 }, { "epoch": 0.9559065482066469, "grad_norm": 2.705239113430178, "learning_rate": 2.3213598460551638e-07, "loss": 0.7164, "step": 78435 }, { "epoch": 0.9559674844307947, "grad_norm": 2.5619086462655876, "learning_rate": 2.318152661962797e-07, "loss": 0.7765, "step": 78440 }, { "epoch": 0.9560284206549425, "grad_norm": 2.3843103946831423, "learning_rate": 2.31494547787043e-07, "loss": 0.7291, "step": 78445 }, { "epoch": 0.9560893568790904, "grad_norm": 2.384569092645407, "learning_rate": 2.311738293778063e-07, "loss": 0.7074, "step": 78450 }, { "epoch": 0.9561502931032382, "grad_norm": 2.840523397245113, "learning_rate": 2.308531109685696e-07, "loss": 0.7583, "step": 78455 }, { "epoch": 0.956211229327386, "grad_norm": 2.2145209898243507, "learning_rate": 2.3053239255933295e-07, "loss": 0.7898, "step": 78460 }, { "epoch": 0.9562721655515337, "grad_norm": 2.7675629486895077, "learning_rate": 2.3021167415009626e-07, "loss": 0.7093, "step": 78465 }, { "epoch": 0.9563331017756815, "grad_norm": 2.327203084014467, "learning_rate": 2.2989095574085956e-07, "loss": 0.7057, "step": 78470 }, { "epoch": 0.9563940379998294, "grad_norm": 2.449626575408869, "learning_rate": 2.2957023733162287e-07, "loss": 0.6884, "step": 78475 }, { "epoch": 0.9564549742239772, "grad_norm": 3.5726112329969197, "learning_rate": 2.2924951892238618e-07, "loss": 0.7379, "step": 78480 }, { "epoch": 0.956515910448125, "grad_norm": 2.259734803990044, "learning_rate": 2.289288005131495e-07, "loss": 0.6946, "step": 78485 }, { "epoch": 0.9565768466722728, "grad_norm": 2.3293556692852486, "learning_rate": 2.286080821039128e-07, "loss": 0.7162, "step": 78490 }, { "epoch": 0.9566377828964207, "grad_norm": 3.448077162251225, "learning_rate": 2.282873636946761e-07, "loss": 0.6511, "step": 78495 }, { "epoch": 0.9566987191205684, "grad_norm": 2.353629109653985, "learning_rate": 2.279666452854394e-07, "loss": 0.6871, "step": 78500 }, { "epoch": 0.9567596553447162, "grad_norm": 2.3630089287434415, "learning_rate": 2.2764592687620272e-07, "loss": 0.7203, "step": 78505 }, { "epoch": 0.956820591568864, "grad_norm": 2.867601608086292, "learning_rate": 2.2732520846696603e-07, "loss": 0.6799, "step": 78510 }, { "epoch": 0.9568815277930118, "grad_norm": 2.292328824348252, "learning_rate": 2.2700449005772934e-07, "loss": 0.7665, "step": 78515 }, { "epoch": 0.9569424640171597, "grad_norm": 2.598343181994508, "learning_rate": 2.2668377164849264e-07, "loss": 0.6896, "step": 78520 }, { "epoch": 0.9570034002413075, "grad_norm": 2.721822860615541, "learning_rate": 2.2636305323925595e-07, "loss": 0.7016, "step": 78525 }, { "epoch": 0.9570643364654553, "grad_norm": 2.469222489090605, "learning_rate": 2.2604233483001926e-07, "loss": 0.7948, "step": 78530 }, { "epoch": 0.957125272689603, "grad_norm": 2.28320528015153, "learning_rate": 2.2572161642078257e-07, "loss": 0.7084, "step": 78535 }, { "epoch": 0.9571862089137508, "grad_norm": 2.638580312167148, "learning_rate": 2.2540089801154587e-07, "loss": 0.7309, "step": 78540 }, { "epoch": 0.9572471451378987, "grad_norm": 2.2090980742493054, "learning_rate": 2.2508017960230918e-07, "loss": 0.7195, "step": 78545 }, { "epoch": 0.9573080813620465, "grad_norm": 4.89873038577443, "learning_rate": 2.247594611930725e-07, "loss": 0.7407, "step": 78550 }, { "epoch": 0.9573690175861943, "grad_norm": 2.7367144157642276, "learning_rate": 2.244387427838358e-07, "loss": 0.7189, "step": 78555 }, { "epoch": 0.9574299538103421, "grad_norm": 2.6968774523215657, "learning_rate": 2.241180243745991e-07, "loss": 0.7069, "step": 78560 }, { "epoch": 0.95749089003449, "grad_norm": 2.3253334372006162, "learning_rate": 2.2379730596536244e-07, "loss": 0.6398, "step": 78565 }, { "epoch": 0.9575518262586377, "grad_norm": 2.168670970523453, "learning_rate": 2.2347658755612575e-07, "loss": 0.6553, "step": 78570 }, { "epoch": 0.9576127624827855, "grad_norm": 2.9105304645676173, "learning_rate": 2.2315586914688906e-07, "loss": 0.7626, "step": 78575 }, { "epoch": 0.9576736987069333, "grad_norm": 1.9546766065558585, "learning_rate": 2.2283515073765236e-07, "loss": 0.706, "step": 78580 }, { "epoch": 0.9577346349310811, "grad_norm": 3.0751718616247934, "learning_rate": 2.2251443232841567e-07, "loss": 0.7119, "step": 78585 }, { "epoch": 0.957795571155229, "grad_norm": 3.1881206179660593, "learning_rate": 2.2219371391917898e-07, "loss": 0.6554, "step": 78590 }, { "epoch": 0.9578565073793768, "grad_norm": 2.739133765810682, "learning_rate": 2.218729955099423e-07, "loss": 0.6846, "step": 78595 }, { "epoch": 0.9579174436035246, "grad_norm": 2.4420418588712733, "learning_rate": 2.215522771007056e-07, "loss": 0.7051, "step": 78600 }, { "epoch": 0.9579783798276723, "grad_norm": 3.151980760285507, "learning_rate": 2.212315586914689e-07, "loss": 0.7014, "step": 78605 }, { "epoch": 0.9580393160518201, "grad_norm": 2.5203455289831, "learning_rate": 2.209108402822322e-07, "loss": 0.7251, "step": 78610 }, { "epoch": 0.958100252275968, "grad_norm": 2.8639731011173453, "learning_rate": 2.2059012187299552e-07, "loss": 0.7201, "step": 78615 }, { "epoch": 0.9581611885001158, "grad_norm": 5.595850644847893, "learning_rate": 2.2026940346375883e-07, "loss": 0.7031, "step": 78620 }, { "epoch": 0.9582221247242636, "grad_norm": 2.042819725500577, "learning_rate": 2.1994868505452214e-07, "loss": 0.7664, "step": 78625 }, { "epoch": 0.9582830609484114, "grad_norm": 2.846371267900269, "learning_rate": 2.1962796664528544e-07, "loss": 0.6795, "step": 78630 }, { "epoch": 0.9583439971725592, "grad_norm": 2.9099405561413194, "learning_rate": 2.1930724823604875e-07, "loss": 0.7499, "step": 78635 }, { "epoch": 0.958404933396707, "grad_norm": 3.0878534592995033, "learning_rate": 2.1898652982681206e-07, "loss": 0.8261, "step": 78640 }, { "epoch": 0.9584658696208548, "grad_norm": 3.471341733953832, "learning_rate": 2.1866581141757537e-07, "loss": 0.6884, "step": 78645 }, { "epoch": 0.9585268058450026, "grad_norm": 2.3221051537188035, "learning_rate": 2.1834509300833868e-07, "loss": 0.7336, "step": 78650 }, { "epoch": 0.9585877420691504, "grad_norm": 2.1824621173610694, "learning_rate": 2.1802437459910198e-07, "loss": 0.6968, "step": 78655 }, { "epoch": 0.9586486782932983, "grad_norm": 2.681542589625425, "learning_rate": 2.177036561898653e-07, "loss": 0.6944, "step": 78660 }, { "epoch": 0.9587096145174461, "grad_norm": 2.1506764483005, "learning_rate": 2.173829377806286e-07, "loss": 0.7585, "step": 78665 }, { "epoch": 0.9587705507415939, "grad_norm": 2.5895513402198693, "learning_rate": 2.1706221937139193e-07, "loss": 0.7035, "step": 78670 }, { "epoch": 0.9588314869657416, "grad_norm": 2.420671690501162, "learning_rate": 2.1674150096215524e-07, "loss": 0.7909, "step": 78675 }, { "epoch": 0.9588924231898894, "grad_norm": 2.4760658434339855, "learning_rate": 2.1642078255291855e-07, "loss": 0.6477, "step": 78680 }, { "epoch": 0.9589533594140373, "grad_norm": 3.2848202212155027, "learning_rate": 2.1610006414368186e-07, "loss": 0.656, "step": 78685 }, { "epoch": 0.9590142956381851, "grad_norm": 2.7903974785690124, "learning_rate": 2.1577934573444517e-07, "loss": 0.7429, "step": 78690 }, { "epoch": 0.9590752318623329, "grad_norm": 3.491482850907302, "learning_rate": 2.1545862732520847e-07, "loss": 0.6742, "step": 78695 }, { "epoch": 0.9591361680864807, "grad_norm": 3.143236144478266, "learning_rate": 2.1513790891597178e-07, "loss": 0.7944, "step": 78700 }, { "epoch": 0.9591971043106285, "grad_norm": 2.1283630645032625, "learning_rate": 2.1481719050673512e-07, "loss": 0.6449, "step": 78705 }, { "epoch": 0.9592580405347763, "grad_norm": 3.3750217524275445, "learning_rate": 2.1449647209749842e-07, "loss": 0.6901, "step": 78710 }, { "epoch": 0.9593189767589241, "grad_norm": 2.4389343345149923, "learning_rate": 2.1417575368826173e-07, "loss": 0.7109, "step": 78715 }, { "epoch": 0.9593799129830719, "grad_norm": 2.639002637300817, "learning_rate": 2.1385503527902504e-07, "loss": 0.7312, "step": 78720 }, { "epoch": 0.9594408492072197, "grad_norm": 2.2906142231040088, "learning_rate": 2.1353431686978835e-07, "loss": 0.6795, "step": 78725 }, { "epoch": 0.9595017854313675, "grad_norm": 2.046152813092949, "learning_rate": 2.1321359846055166e-07, "loss": 0.6576, "step": 78730 }, { "epoch": 0.9595627216555154, "grad_norm": 2.2503449705971965, "learning_rate": 2.1289288005131496e-07, "loss": 0.6892, "step": 78735 }, { "epoch": 0.9596236578796632, "grad_norm": 2.6205504795332737, "learning_rate": 2.125721616420783e-07, "loss": 0.7229, "step": 78740 }, { "epoch": 0.9596845941038109, "grad_norm": 3.0244808101132556, "learning_rate": 2.122514432328416e-07, "loss": 0.68, "step": 78745 }, { "epoch": 0.9597455303279587, "grad_norm": 2.5736960861069753, "learning_rate": 2.1193072482360491e-07, "loss": 0.7421, "step": 78750 }, { "epoch": 0.9598064665521066, "grad_norm": 2.3174920799267293, "learning_rate": 2.1161000641436822e-07, "loss": 0.6959, "step": 78755 }, { "epoch": 0.9598674027762544, "grad_norm": 3.5402470375229065, "learning_rate": 2.1128928800513153e-07, "loss": 0.6687, "step": 78760 }, { "epoch": 0.9599283390004022, "grad_norm": 2.3963601682937514, "learning_rate": 2.1096856959589484e-07, "loss": 0.7479, "step": 78765 }, { "epoch": 0.95998927522455, "grad_norm": 3.109499880537245, "learning_rate": 2.1064785118665815e-07, "loss": 0.7403, "step": 78770 }, { "epoch": 0.9600502114486978, "grad_norm": 2.412432122829563, "learning_rate": 2.1032713277742145e-07, "loss": 0.6787, "step": 78775 }, { "epoch": 0.9601111476728456, "grad_norm": 2.2470834381484517, "learning_rate": 2.1000641436818476e-07, "loss": 0.7529, "step": 78780 }, { "epoch": 0.9601720838969934, "grad_norm": 2.1008890205830717, "learning_rate": 2.0968569595894807e-07, "loss": 0.7188, "step": 78785 }, { "epoch": 0.9602330201211412, "grad_norm": 2.3812329697397336, "learning_rate": 2.0936497754971138e-07, "loss": 0.6362, "step": 78790 }, { "epoch": 0.960293956345289, "grad_norm": 2.2574598190051534, "learning_rate": 2.0904425914047469e-07, "loss": 0.6749, "step": 78795 }, { "epoch": 0.9603548925694368, "grad_norm": 3.0806692560477815, "learning_rate": 2.08723540731238e-07, "loss": 0.725, "step": 78800 }, { "epoch": 0.9604158287935847, "grad_norm": 2.6970083256735977, "learning_rate": 2.084028223220013e-07, "loss": 0.7274, "step": 78805 }, { "epoch": 0.9604767650177325, "grad_norm": 2.7372812080497964, "learning_rate": 2.080821039127646e-07, "loss": 0.6628, "step": 78810 }, { "epoch": 0.9605377012418802, "grad_norm": 2.2305423995744236, "learning_rate": 2.0776138550352792e-07, "loss": 0.7161, "step": 78815 }, { "epoch": 0.960598637466028, "grad_norm": 2.0891016693894695, "learning_rate": 2.0744066709429123e-07, "loss": 0.7157, "step": 78820 }, { "epoch": 0.9606595736901758, "grad_norm": 2.0099294043773717, "learning_rate": 2.0711994868505453e-07, "loss": 0.7343, "step": 78825 }, { "epoch": 0.9607205099143237, "grad_norm": 2.242150942688369, "learning_rate": 2.0679923027581784e-07, "loss": 0.7301, "step": 78830 }, { "epoch": 0.9607814461384715, "grad_norm": 2.2732955522283804, "learning_rate": 2.0647851186658115e-07, "loss": 0.6066, "step": 78835 }, { "epoch": 0.9608423823626193, "grad_norm": 2.47046671521065, "learning_rate": 2.0615779345734446e-07, "loss": 0.7436, "step": 78840 }, { "epoch": 0.9609033185867671, "grad_norm": 2.4335074166410524, "learning_rate": 2.058370750481078e-07, "loss": 0.7097, "step": 78845 }, { "epoch": 0.9609642548109149, "grad_norm": 1.9985557506450844, "learning_rate": 2.055163566388711e-07, "loss": 0.6922, "step": 78850 }, { "epoch": 0.9610251910350627, "grad_norm": 3.085691982502298, "learning_rate": 2.051956382296344e-07, "loss": 0.7388, "step": 78855 }, { "epoch": 0.9610861272592105, "grad_norm": 2.455602539673449, "learning_rate": 2.0487491982039772e-07, "loss": 0.7417, "step": 78860 }, { "epoch": 0.9611470634833583, "grad_norm": 2.399033355300363, "learning_rate": 2.0455420141116102e-07, "loss": 0.7451, "step": 78865 }, { "epoch": 0.9612079997075061, "grad_norm": 2.331024797210489, "learning_rate": 2.0423348300192433e-07, "loss": 0.6528, "step": 78870 }, { "epoch": 0.961268935931654, "grad_norm": 2.596121833791918, "learning_rate": 2.0391276459268764e-07, "loss": 0.5993, "step": 78875 }, { "epoch": 0.9613298721558018, "grad_norm": 2.616201165965006, "learning_rate": 2.0359204618345095e-07, "loss": 0.733, "step": 78880 }, { "epoch": 0.9613908083799495, "grad_norm": 2.438067717927401, "learning_rate": 2.0327132777421426e-07, "loss": 0.7236, "step": 78885 }, { "epoch": 0.9614517446040973, "grad_norm": 2.8509588859239923, "learning_rate": 2.0295060936497756e-07, "loss": 0.7203, "step": 78890 }, { "epoch": 0.9615126808282451, "grad_norm": 2.891741298796359, "learning_rate": 2.0262989095574087e-07, "loss": 0.6449, "step": 78895 }, { "epoch": 0.961573617052393, "grad_norm": 2.284367364741834, "learning_rate": 2.0230917254650418e-07, "loss": 0.7315, "step": 78900 }, { "epoch": 0.9616345532765408, "grad_norm": 2.754790774798083, "learning_rate": 2.019884541372675e-07, "loss": 0.7612, "step": 78905 }, { "epoch": 0.9616954895006886, "grad_norm": 2.678973101312473, "learning_rate": 2.016677357280308e-07, "loss": 0.6894, "step": 78910 }, { "epoch": 0.9617564257248364, "grad_norm": 2.839816795633314, "learning_rate": 2.013470173187941e-07, "loss": 0.76, "step": 78915 }, { "epoch": 0.9618173619489842, "grad_norm": 3.2472670890012942, "learning_rate": 2.010262989095574e-07, "loss": 0.6895, "step": 78920 }, { "epoch": 0.961878298173132, "grad_norm": 2.4623857407634273, "learning_rate": 2.0070558050032072e-07, "loss": 0.7687, "step": 78925 }, { "epoch": 0.9619392343972798, "grad_norm": 2.5252170854717075, "learning_rate": 2.0038486209108403e-07, "loss": 0.7124, "step": 78930 }, { "epoch": 0.9620001706214276, "grad_norm": 3.0291561937981664, "learning_rate": 2.0006414368184733e-07, "loss": 0.7483, "step": 78935 }, { "epoch": 0.9620611068455754, "grad_norm": 2.1885253050634366, "learning_rate": 1.9974342527261064e-07, "loss": 0.73, "step": 78940 }, { "epoch": 0.9621220430697233, "grad_norm": 2.432931087092274, "learning_rate": 1.9942270686337395e-07, "loss": 0.6911, "step": 78945 }, { "epoch": 0.9621829792938711, "grad_norm": 2.450701097320337, "learning_rate": 1.9910198845413728e-07, "loss": 0.6866, "step": 78950 }, { "epoch": 0.9622439155180188, "grad_norm": 2.1156378735489194, "learning_rate": 1.987812700449006e-07, "loss": 0.7543, "step": 78955 }, { "epoch": 0.9623048517421666, "grad_norm": 2.374607683221746, "learning_rate": 1.984605516356639e-07, "loss": 0.668, "step": 78960 }, { "epoch": 0.9623657879663144, "grad_norm": 3.571132445902236, "learning_rate": 1.981398332264272e-07, "loss": 0.683, "step": 78965 }, { "epoch": 0.9624267241904623, "grad_norm": 2.4901982204227324, "learning_rate": 1.9781911481719052e-07, "loss": 0.7284, "step": 78970 }, { "epoch": 0.9624876604146101, "grad_norm": 2.4193614493534947, "learning_rate": 1.9749839640795382e-07, "loss": 0.71, "step": 78975 }, { "epoch": 0.9625485966387579, "grad_norm": 2.440858201565042, "learning_rate": 1.9717767799871713e-07, "loss": 0.7233, "step": 78980 }, { "epoch": 0.9626095328629057, "grad_norm": 2.330746017869037, "learning_rate": 1.9685695958948044e-07, "loss": 0.6721, "step": 78985 }, { "epoch": 0.9626704690870534, "grad_norm": 2.3784123895090477, "learning_rate": 1.9653624118024375e-07, "loss": 0.7031, "step": 78990 }, { "epoch": 0.9627314053112013, "grad_norm": 2.209760292340648, "learning_rate": 1.9621552277100706e-07, "loss": 0.634, "step": 78995 }, { "epoch": 0.9627923415353491, "grad_norm": 3.0391988290463634, "learning_rate": 1.9589480436177036e-07, "loss": 0.7574, "step": 79000 }, { "epoch": 0.9628532777594969, "grad_norm": 2.097619639888259, "learning_rate": 1.955740859525337e-07, "loss": 0.6822, "step": 79005 }, { "epoch": 0.9629142139836447, "grad_norm": 2.9171131521455327, "learning_rate": 1.95253367543297e-07, "loss": 0.7606, "step": 79010 }, { "epoch": 0.9629751502077926, "grad_norm": 2.2770634927199165, "learning_rate": 1.9493264913406031e-07, "loss": 0.734, "step": 79015 }, { "epoch": 0.9630360864319404, "grad_norm": 3.255877046358328, "learning_rate": 1.9461193072482365e-07, "loss": 0.7311, "step": 79020 }, { "epoch": 0.9630970226560881, "grad_norm": 2.3679673300501025, "learning_rate": 1.9429121231558696e-07, "loss": 0.7736, "step": 79025 }, { "epoch": 0.9631579588802359, "grad_norm": 2.5138479604501485, "learning_rate": 1.9397049390635026e-07, "loss": 0.7401, "step": 79030 }, { "epoch": 0.9632188951043837, "grad_norm": 2.5115190875569557, "learning_rate": 1.9364977549711357e-07, "loss": 0.7027, "step": 79035 }, { "epoch": 0.9632798313285316, "grad_norm": 2.4469378999797318, "learning_rate": 1.9332905708787688e-07, "loss": 0.6952, "step": 79040 }, { "epoch": 0.9633407675526794, "grad_norm": 2.503848347739468, "learning_rate": 1.930083386786402e-07, "loss": 0.6542, "step": 79045 }, { "epoch": 0.9634017037768272, "grad_norm": 2.4427460250419406, "learning_rate": 1.926876202694035e-07, "loss": 0.7117, "step": 79050 }, { "epoch": 0.963462640000975, "grad_norm": 2.27663022708962, "learning_rate": 1.923669018601668e-07, "loss": 0.6923, "step": 79055 }, { "epoch": 0.9635235762251227, "grad_norm": 2.439385312705559, "learning_rate": 1.920461834509301e-07, "loss": 0.7624, "step": 79060 }, { "epoch": 0.9635845124492706, "grad_norm": 2.9013451528569223, "learning_rate": 1.9172546504169342e-07, "loss": 0.6684, "step": 79065 }, { "epoch": 0.9636454486734184, "grad_norm": 2.558269240678919, "learning_rate": 1.9140474663245673e-07, "loss": 0.8149, "step": 79070 }, { "epoch": 0.9637063848975662, "grad_norm": 2.515195787324844, "learning_rate": 1.9108402822322004e-07, "loss": 0.7381, "step": 79075 }, { "epoch": 0.963767321121714, "grad_norm": 5.798385820997153, "learning_rate": 1.9076330981398334e-07, "loss": 0.7031, "step": 79080 }, { "epoch": 0.9638282573458619, "grad_norm": 2.006357538970502, "learning_rate": 1.9044259140474665e-07, "loss": 0.6917, "step": 79085 }, { "epoch": 0.9638891935700096, "grad_norm": 2.151815147189928, "learning_rate": 1.9012187299550996e-07, "loss": 0.7239, "step": 79090 }, { "epoch": 0.9639501297941574, "grad_norm": 2.5200649292039863, "learning_rate": 1.8980115458627327e-07, "loss": 0.7121, "step": 79095 }, { "epoch": 0.9640110660183052, "grad_norm": 3.8140418272777423, "learning_rate": 1.8948043617703658e-07, "loss": 0.7537, "step": 79100 }, { "epoch": 0.964072002242453, "grad_norm": 2.2668680594571895, "learning_rate": 1.8915971776779988e-07, "loss": 0.7185, "step": 79105 }, { "epoch": 0.9641329384666009, "grad_norm": 2.774683503932325, "learning_rate": 1.888389993585632e-07, "loss": 0.7569, "step": 79110 }, { "epoch": 0.9641938746907487, "grad_norm": 2.6996330488308544, "learning_rate": 1.885182809493265e-07, "loss": 0.7931, "step": 79115 }, { "epoch": 0.9642548109148965, "grad_norm": 2.798472086033227, "learning_rate": 1.881975625400898e-07, "loss": 0.7372, "step": 79120 }, { "epoch": 0.9643157471390442, "grad_norm": 3.6417594196936656, "learning_rate": 1.8787684413085314e-07, "loss": 0.7023, "step": 79125 }, { "epoch": 0.964376683363192, "grad_norm": 2.5897998584712782, "learning_rate": 1.8755612572161645e-07, "loss": 0.7015, "step": 79130 }, { "epoch": 0.9644376195873399, "grad_norm": 2.7427198697589055, "learning_rate": 1.8723540731237976e-07, "loss": 0.7436, "step": 79135 }, { "epoch": 0.9644985558114877, "grad_norm": 2.0332075254578394, "learning_rate": 1.8691468890314307e-07, "loss": 0.6438, "step": 79140 }, { "epoch": 0.9645594920356355, "grad_norm": 2.0273134132491597, "learning_rate": 1.8659397049390637e-07, "loss": 0.7185, "step": 79145 }, { "epoch": 0.9646204282597833, "grad_norm": 2.8394043920182526, "learning_rate": 1.8627325208466968e-07, "loss": 0.7352, "step": 79150 }, { "epoch": 0.9646813644839312, "grad_norm": 2.5108495163784608, "learning_rate": 1.85952533675433e-07, "loss": 0.7461, "step": 79155 }, { "epoch": 0.9647423007080789, "grad_norm": 2.6967624140976403, "learning_rate": 1.856318152661963e-07, "loss": 0.7525, "step": 79160 }, { "epoch": 0.9648032369322267, "grad_norm": 2.2442882453476356, "learning_rate": 1.853110968569596e-07, "loss": 0.6932, "step": 79165 }, { "epoch": 0.9648641731563745, "grad_norm": 2.3486149630385893, "learning_rate": 1.8499037844772291e-07, "loss": 0.758, "step": 79170 }, { "epoch": 0.9649251093805223, "grad_norm": 2.473765137834519, "learning_rate": 1.8466966003848622e-07, "loss": 0.7514, "step": 79175 }, { "epoch": 0.9649860456046702, "grad_norm": 2.529243816049154, "learning_rate": 1.8434894162924953e-07, "loss": 0.7118, "step": 79180 }, { "epoch": 0.965046981828818, "grad_norm": 2.3970539848397308, "learning_rate": 1.8402822322001284e-07, "loss": 0.6926, "step": 79185 }, { "epoch": 0.9651079180529658, "grad_norm": 2.3230096309406214, "learning_rate": 1.8370750481077615e-07, "loss": 0.7452, "step": 79190 }, { "epoch": 0.9651688542771135, "grad_norm": 1.963939616952966, "learning_rate": 1.8338678640153945e-07, "loss": 0.6858, "step": 79195 }, { "epoch": 0.9652297905012613, "grad_norm": 2.449254743890231, "learning_rate": 1.8306606799230276e-07, "loss": 0.7776, "step": 79200 }, { "epoch": 0.9652907267254092, "grad_norm": 2.4754204457097044, "learning_rate": 1.8274534958306607e-07, "loss": 0.7102, "step": 79205 }, { "epoch": 0.965351662949557, "grad_norm": 2.141341680465715, "learning_rate": 1.8242463117382938e-07, "loss": 0.6125, "step": 79210 }, { "epoch": 0.9654125991737048, "grad_norm": 2.7936586745222813, "learning_rate": 1.8210391276459269e-07, "loss": 0.7203, "step": 79215 }, { "epoch": 0.9654735353978526, "grad_norm": 2.7383438237337114, "learning_rate": 1.81783194355356e-07, "loss": 0.7564, "step": 79220 }, { "epoch": 0.9655344716220005, "grad_norm": 2.249348443757862, "learning_rate": 1.814624759461193e-07, "loss": 0.7373, "step": 79225 }, { "epoch": 0.9655954078461482, "grad_norm": 2.027032340821768, "learning_rate": 1.8114175753688264e-07, "loss": 0.7612, "step": 79230 }, { "epoch": 0.965656344070296, "grad_norm": 2.8545958829413642, "learning_rate": 1.8082103912764594e-07, "loss": 0.6906, "step": 79235 }, { "epoch": 0.9657172802944438, "grad_norm": 2.9222795965138566, "learning_rate": 1.8050032071840925e-07, "loss": 0.7246, "step": 79240 }, { "epoch": 0.9657782165185916, "grad_norm": 2.1235426324523754, "learning_rate": 1.8017960230917256e-07, "loss": 0.67, "step": 79245 }, { "epoch": 0.9658391527427395, "grad_norm": 2.3843234595297433, "learning_rate": 1.7985888389993587e-07, "loss": 0.7239, "step": 79250 }, { "epoch": 0.9659000889668873, "grad_norm": 2.362474029980951, "learning_rate": 1.7953816549069918e-07, "loss": 0.7735, "step": 79255 }, { "epoch": 0.9659610251910351, "grad_norm": 2.8278212662792894, "learning_rate": 1.7921744708146248e-07, "loss": 0.732, "step": 79260 }, { "epoch": 0.9660219614151828, "grad_norm": 2.3892782519516906, "learning_rate": 1.788967286722258e-07, "loss": 0.6518, "step": 79265 }, { "epoch": 0.9660828976393306, "grad_norm": 2.9912062519572022, "learning_rate": 1.785760102629891e-07, "loss": 0.6703, "step": 79270 }, { "epoch": 0.9661438338634785, "grad_norm": 2.572504562345122, "learning_rate": 1.782552918537524e-07, "loss": 0.7699, "step": 79275 }, { "epoch": 0.9662047700876263, "grad_norm": 2.5118572025245527, "learning_rate": 1.7793457344451572e-07, "loss": 0.7398, "step": 79280 }, { "epoch": 0.9662657063117741, "grad_norm": 2.513185605483322, "learning_rate": 1.7761385503527902e-07, "loss": 0.6582, "step": 79285 }, { "epoch": 0.9663266425359219, "grad_norm": 2.003014222935611, "learning_rate": 1.7729313662604233e-07, "loss": 0.6309, "step": 79290 }, { "epoch": 0.9663875787600698, "grad_norm": 2.564121726269227, "learning_rate": 1.7697241821680564e-07, "loss": 0.748, "step": 79295 }, { "epoch": 0.9664485149842175, "grad_norm": 2.7078896011054194, "learning_rate": 1.7665169980756895e-07, "loss": 0.6933, "step": 79300 }, { "epoch": 0.9665094512083653, "grad_norm": 2.3302572105697794, "learning_rate": 1.763309813983323e-07, "loss": 0.7824, "step": 79305 }, { "epoch": 0.9665703874325131, "grad_norm": 2.637322595043217, "learning_rate": 1.7601026298909562e-07, "loss": 0.7635, "step": 79310 }, { "epoch": 0.9666313236566609, "grad_norm": 2.381487654273186, "learning_rate": 1.7568954457985892e-07, "loss": 0.6893, "step": 79315 }, { "epoch": 0.9666922598808088, "grad_norm": 2.4707227267592224, "learning_rate": 1.7536882617062223e-07, "loss": 0.7282, "step": 79320 }, { "epoch": 0.9667531961049566, "grad_norm": 2.9056441037153524, "learning_rate": 1.7504810776138554e-07, "loss": 0.6834, "step": 79325 }, { "epoch": 0.9668141323291044, "grad_norm": 2.4110122602698074, "learning_rate": 1.7472738935214885e-07, "loss": 0.7568, "step": 79330 }, { "epoch": 0.9668750685532521, "grad_norm": 2.702876717667558, "learning_rate": 1.7440667094291216e-07, "loss": 0.7452, "step": 79335 }, { "epoch": 0.9669360047773999, "grad_norm": 2.115782189103661, "learning_rate": 1.7408595253367546e-07, "loss": 0.6835, "step": 79340 }, { "epoch": 0.9669969410015478, "grad_norm": 2.6723670963770725, "learning_rate": 1.7376523412443877e-07, "loss": 0.7334, "step": 79345 }, { "epoch": 0.9670578772256956, "grad_norm": 2.505508840017685, "learning_rate": 1.7344451571520208e-07, "loss": 0.7618, "step": 79350 }, { "epoch": 0.9671188134498434, "grad_norm": 2.1665670171324662, "learning_rate": 1.731237973059654e-07, "loss": 0.6659, "step": 79355 }, { "epoch": 0.9671797496739912, "grad_norm": 2.7339936899857475, "learning_rate": 1.728030788967287e-07, "loss": 0.7746, "step": 79360 }, { "epoch": 0.967240685898139, "grad_norm": 2.8880901151955176, "learning_rate": 1.72482360487492e-07, "loss": 0.7248, "step": 79365 }, { "epoch": 0.9673016221222868, "grad_norm": 2.4485037905236116, "learning_rate": 1.721616420782553e-07, "loss": 0.7788, "step": 79370 }, { "epoch": 0.9673625583464346, "grad_norm": 2.567978724971516, "learning_rate": 1.7184092366901862e-07, "loss": 0.7026, "step": 79375 }, { "epoch": 0.9674234945705824, "grad_norm": 1.9174399555724133, "learning_rate": 1.7152020525978193e-07, "loss": 0.6643, "step": 79380 }, { "epoch": 0.9674844307947302, "grad_norm": 2.9335961457424, "learning_rate": 1.7119948685054523e-07, "loss": 0.6393, "step": 79385 }, { "epoch": 0.967545367018878, "grad_norm": 2.4570462254699317, "learning_rate": 1.7087876844130854e-07, "loss": 0.7502, "step": 79390 }, { "epoch": 0.9676063032430259, "grad_norm": 1.931895121694432, "learning_rate": 1.7055805003207185e-07, "loss": 0.6894, "step": 79395 }, { "epoch": 0.9676672394671737, "grad_norm": 2.5211437230737404, "learning_rate": 1.7023733162283516e-07, "loss": 0.7065, "step": 79400 }, { "epoch": 0.9677281756913214, "grad_norm": 2.228692906526887, "learning_rate": 1.699166132135985e-07, "loss": 0.7507, "step": 79405 }, { "epoch": 0.9677891119154692, "grad_norm": 2.9459312377297446, "learning_rate": 1.695958948043618e-07, "loss": 0.7656, "step": 79410 }, { "epoch": 0.9678500481396171, "grad_norm": 2.4563637543693937, "learning_rate": 1.692751763951251e-07, "loss": 0.7229, "step": 79415 }, { "epoch": 0.9679109843637649, "grad_norm": 2.689690611402401, "learning_rate": 1.6895445798588842e-07, "loss": 0.6822, "step": 79420 }, { "epoch": 0.9679719205879127, "grad_norm": 2.95925674428381, "learning_rate": 1.6863373957665172e-07, "loss": 0.6411, "step": 79425 }, { "epoch": 0.9680328568120605, "grad_norm": 2.321658838973259, "learning_rate": 1.6831302116741503e-07, "loss": 0.7325, "step": 79430 }, { "epoch": 0.9680937930362083, "grad_norm": 2.6354700002501423, "learning_rate": 1.6799230275817834e-07, "loss": 0.6282, "step": 79435 }, { "epoch": 0.9681547292603561, "grad_norm": 2.91264812536496, "learning_rate": 1.6767158434894165e-07, "loss": 0.7286, "step": 79440 }, { "epoch": 0.9682156654845039, "grad_norm": 2.2847652945560757, "learning_rate": 1.6735086593970496e-07, "loss": 0.8368, "step": 79445 }, { "epoch": 0.9682766017086517, "grad_norm": 2.3231663395116375, "learning_rate": 1.6703014753046826e-07, "loss": 0.732, "step": 79450 }, { "epoch": 0.9683375379327995, "grad_norm": 2.760603194722034, "learning_rate": 1.6670942912123157e-07, "loss": 0.6993, "step": 79455 }, { "epoch": 0.9683984741569474, "grad_norm": 2.710936019280668, "learning_rate": 1.6638871071199488e-07, "loss": 0.7712, "step": 79460 }, { "epoch": 0.9684594103810952, "grad_norm": 2.3678734916887447, "learning_rate": 1.660679923027582e-07, "loss": 0.7281, "step": 79465 }, { "epoch": 0.968520346605243, "grad_norm": 2.4244242679760606, "learning_rate": 1.657472738935215e-07, "loss": 0.7324, "step": 79470 }, { "epoch": 0.9685812828293907, "grad_norm": 2.171689327753146, "learning_rate": 1.654265554842848e-07, "loss": 0.6906, "step": 79475 }, { "epoch": 0.9686422190535385, "grad_norm": 2.4464911542877283, "learning_rate": 1.651058370750481e-07, "loss": 0.6737, "step": 79480 }, { "epoch": 0.9687031552776864, "grad_norm": 3.397167306474302, "learning_rate": 1.6478511866581142e-07, "loss": 0.67, "step": 79485 }, { "epoch": 0.9687640915018342, "grad_norm": 2.5353653122032047, "learning_rate": 1.6446440025657473e-07, "loss": 0.7353, "step": 79490 }, { "epoch": 0.968825027725982, "grad_norm": 2.8287449836236944, "learning_rate": 1.6414368184733804e-07, "loss": 0.6817, "step": 79495 }, { "epoch": 0.9688859639501298, "grad_norm": 2.3227735383329504, "learning_rate": 1.6382296343810134e-07, "loss": 0.7766, "step": 79500 }, { "epoch": 0.9689469001742776, "grad_norm": 2.468000368245022, "learning_rate": 1.6350224502886465e-07, "loss": 0.7861, "step": 79505 }, { "epoch": 0.9690078363984254, "grad_norm": 2.347626214727445, "learning_rate": 1.6318152661962799e-07, "loss": 0.719, "step": 79510 }, { "epoch": 0.9690687726225732, "grad_norm": 2.6845951279410345, "learning_rate": 1.628608082103913e-07, "loss": 0.665, "step": 79515 }, { "epoch": 0.969129708846721, "grad_norm": 2.3690537009719566, "learning_rate": 1.625400898011546e-07, "loss": 0.7227, "step": 79520 }, { "epoch": 0.9691906450708688, "grad_norm": 2.2337414528132653, "learning_rate": 1.622193713919179e-07, "loss": 0.6617, "step": 79525 }, { "epoch": 0.9692515812950167, "grad_norm": 2.869714149600625, "learning_rate": 1.6189865298268122e-07, "loss": 0.6691, "step": 79530 }, { "epoch": 0.9693125175191645, "grad_norm": 2.6995370143998834, "learning_rate": 1.6157793457344453e-07, "loss": 0.6861, "step": 79535 }, { "epoch": 0.9693734537433123, "grad_norm": 2.6398095319179764, "learning_rate": 1.6125721616420783e-07, "loss": 0.7355, "step": 79540 }, { "epoch": 0.96943438996746, "grad_norm": 2.3310358208196487, "learning_rate": 1.6093649775497114e-07, "loss": 0.7552, "step": 79545 }, { "epoch": 0.9694953261916078, "grad_norm": 3.6802663601646546, "learning_rate": 1.6061577934573445e-07, "loss": 0.6518, "step": 79550 }, { "epoch": 0.9695562624157557, "grad_norm": 2.2382335004683496, "learning_rate": 1.6029506093649776e-07, "loss": 0.6821, "step": 79555 }, { "epoch": 0.9696171986399035, "grad_norm": 3.2581369945082024, "learning_rate": 1.5997434252726107e-07, "loss": 0.7054, "step": 79560 }, { "epoch": 0.9696781348640513, "grad_norm": 2.390774342336454, "learning_rate": 1.5965362411802437e-07, "loss": 0.6758, "step": 79565 }, { "epoch": 0.9697390710881991, "grad_norm": 2.6722837879624244, "learning_rate": 1.5933290570878768e-07, "loss": 0.6754, "step": 79570 }, { "epoch": 0.969800007312347, "grad_norm": 2.2882336828077117, "learning_rate": 1.59012187299551e-07, "loss": 0.7048, "step": 79575 }, { "epoch": 0.9698609435364947, "grad_norm": 2.3846575312269587, "learning_rate": 1.586914688903143e-07, "loss": 0.7057, "step": 79580 }, { "epoch": 0.9699218797606425, "grad_norm": 2.920666343082699, "learning_rate": 1.583707504810776e-07, "loss": 0.7037, "step": 79585 }, { "epoch": 0.9699828159847903, "grad_norm": 2.411685816398206, "learning_rate": 1.5805003207184091e-07, "loss": 0.7772, "step": 79590 }, { "epoch": 0.9700437522089381, "grad_norm": 2.825254752180577, "learning_rate": 1.5772931366260422e-07, "loss": 0.7067, "step": 79595 }, { "epoch": 0.970104688433086, "grad_norm": 2.4974793783031832, "learning_rate": 1.5740859525336753e-07, "loss": 0.7099, "step": 79600 }, { "epoch": 0.9701656246572338, "grad_norm": 2.1382027546567435, "learning_rate": 1.5708787684413084e-07, "loss": 0.652, "step": 79605 }, { "epoch": 0.9702265608813816, "grad_norm": 2.2178023122907176, "learning_rate": 1.567671584348942e-07, "loss": 0.7059, "step": 79610 }, { "epoch": 0.9702874971055293, "grad_norm": 2.203351017884091, "learning_rate": 1.564464400256575e-07, "loss": 0.7134, "step": 79615 }, { "epoch": 0.9703484333296771, "grad_norm": 2.4253283013910667, "learning_rate": 1.561257216164208e-07, "loss": 0.721, "step": 79620 }, { "epoch": 0.970409369553825, "grad_norm": 2.7239368511904463, "learning_rate": 1.558050032071841e-07, "loss": 0.7805, "step": 79625 }, { "epoch": 0.9704703057779728, "grad_norm": 2.8129556564771323, "learning_rate": 1.554842847979474e-07, "loss": 0.752, "step": 79630 }, { "epoch": 0.9705312420021206, "grad_norm": 2.2282714365605605, "learning_rate": 1.551635663887107e-07, "loss": 0.7004, "step": 79635 }, { "epoch": 0.9705921782262684, "grad_norm": 2.1303054707684024, "learning_rate": 1.5484284797947402e-07, "loss": 0.6744, "step": 79640 }, { "epoch": 0.9706531144504162, "grad_norm": 2.8864521948753254, "learning_rate": 1.5452212957023733e-07, "loss": 0.7494, "step": 79645 }, { "epoch": 0.970714050674564, "grad_norm": 2.506406136207266, "learning_rate": 1.5420141116100066e-07, "loss": 0.7422, "step": 79650 }, { "epoch": 0.9707749868987118, "grad_norm": 2.438149713296227, "learning_rate": 1.5388069275176397e-07, "loss": 0.6451, "step": 79655 }, { "epoch": 0.9708359231228596, "grad_norm": 2.4368888342327124, "learning_rate": 1.5355997434252728e-07, "loss": 0.6983, "step": 79660 }, { "epoch": 0.9708968593470074, "grad_norm": 2.4736686732409736, "learning_rate": 1.5323925593329059e-07, "loss": 0.6944, "step": 79665 }, { "epoch": 0.9709577955711552, "grad_norm": 2.392501576324179, "learning_rate": 1.529185375240539e-07, "loss": 0.7223, "step": 79670 }, { "epoch": 0.9710187317953031, "grad_norm": 2.72345608490914, "learning_rate": 1.525978191148172e-07, "loss": 0.6992, "step": 79675 }, { "epoch": 0.9710796680194509, "grad_norm": 2.3899183486841737, "learning_rate": 1.522771007055805e-07, "loss": 0.7651, "step": 79680 }, { "epoch": 0.9711406042435986, "grad_norm": 2.2702126822892423, "learning_rate": 1.5195638229634384e-07, "loss": 0.695, "step": 79685 }, { "epoch": 0.9712015404677464, "grad_norm": 3.365142000117008, "learning_rate": 1.5163566388710715e-07, "loss": 0.6838, "step": 79690 }, { "epoch": 0.9712624766918942, "grad_norm": 3.516426980974721, "learning_rate": 1.5131494547787046e-07, "loss": 0.6878, "step": 79695 }, { "epoch": 0.9713234129160421, "grad_norm": 2.3015351583186843, "learning_rate": 1.5099422706863377e-07, "loss": 0.6157, "step": 79700 }, { "epoch": 0.9713843491401899, "grad_norm": 2.4855239284515025, "learning_rate": 1.5067350865939708e-07, "loss": 0.8019, "step": 79705 }, { "epoch": 0.9714452853643377, "grad_norm": 2.728185826872347, "learning_rate": 1.5035279025016038e-07, "loss": 0.7554, "step": 79710 }, { "epoch": 0.9715062215884855, "grad_norm": 2.328390900414891, "learning_rate": 1.500320718409237e-07, "loss": 0.6918, "step": 79715 }, { "epoch": 0.9715671578126333, "grad_norm": 3.106976533683743, "learning_rate": 1.49711353431687e-07, "loss": 0.6366, "step": 79720 }, { "epoch": 0.9716280940367811, "grad_norm": 3.03874929187203, "learning_rate": 1.493906350224503e-07, "loss": 0.6858, "step": 79725 }, { "epoch": 0.9716890302609289, "grad_norm": 2.8367741452917867, "learning_rate": 1.4906991661321362e-07, "loss": 0.7039, "step": 79730 }, { "epoch": 0.9717499664850767, "grad_norm": 2.507932450957858, "learning_rate": 1.4874919820397692e-07, "loss": 0.7007, "step": 79735 }, { "epoch": 0.9718109027092245, "grad_norm": 2.402354772326163, "learning_rate": 1.4842847979474023e-07, "loss": 0.7458, "step": 79740 }, { "epoch": 0.9718718389333724, "grad_norm": 2.6721535467884645, "learning_rate": 1.4810776138550354e-07, "loss": 0.6737, "step": 79745 }, { "epoch": 0.9719327751575202, "grad_norm": 4.48032485906148, "learning_rate": 1.4778704297626685e-07, "loss": 0.6625, "step": 79750 }, { "epoch": 0.9719937113816679, "grad_norm": 3.0098526979558424, "learning_rate": 1.4746632456703016e-07, "loss": 0.6633, "step": 79755 }, { "epoch": 0.9720546476058157, "grad_norm": 2.461424007176272, "learning_rate": 1.4714560615779346e-07, "loss": 0.6362, "step": 79760 }, { "epoch": 0.9721155838299635, "grad_norm": 2.361937831204743, "learning_rate": 1.4682488774855677e-07, "loss": 0.7048, "step": 79765 }, { "epoch": 0.9721765200541114, "grad_norm": 2.369930019809479, "learning_rate": 1.4650416933932008e-07, "loss": 0.6621, "step": 79770 }, { "epoch": 0.9722374562782592, "grad_norm": 2.4981288433545545, "learning_rate": 1.461834509300834e-07, "loss": 0.7229, "step": 79775 }, { "epoch": 0.972298392502407, "grad_norm": 2.363504746465321, "learning_rate": 1.458627325208467e-07, "loss": 0.7463, "step": 79780 }, { "epoch": 0.9723593287265548, "grad_norm": 2.5231262989507006, "learning_rate": 1.4554201411161e-07, "loss": 0.7427, "step": 79785 }, { "epoch": 0.9724202649507026, "grad_norm": 2.115951705352585, "learning_rate": 1.4522129570237334e-07, "loss": 0.7002, "step": 79790 }, { "epoch": 0.9724812011748504, "grad_norm": 2.599681094938765, "learning_rate": 1.4490057729313665e-07, "loss": 0.6923, "step": 79795 }, { "epoch": 0.9725421373989982, "grad_norm": 1.9529816836645146, "learning_rate": 1.4457985888389995e-07, "loss": 0.7057, "step": 79800 }, { "epoch": 0.972603073623146, "grad_norm": 2.807523910241046, "learning_rate": 1.4425914047466326e-07, "loss": 0.7653, "step": 79805 }, { "epoch": 0.9726640098472938, "grad_norm": 2.4469941911110418, "learning_rate": 1.4393842206542657e-07, "loss": 0.7241, "step": 79810 }, { "epoch": 0.9727249460714417, "grad_norm": 2.5004577192445923, "learning_rate": 1.4361770365618988e-07, "loss": 0.6829, "step": 79815 }, { "epoch": 0.9727858822955895, "grad_norm": 2.4250834006903275, "learning_rate": 1.4329698524695318e-07, "loss": 0.6401, "step": 79820 }, { "epoch": 0.9728468185197372, "grad_norm": 2.5929076830704094, "learning_rate": 1.429762668377165e-07, "loss": 0.7257, "step": 79825 }, { "epoch": 0.972907754743885, "grad_norm": 1.9664939920611924, "learning_rate": 1.426555484284798e-07, "loss": 0.6883, "step": 79830 }, { "epoch": 0.9729686909680328, "grad_norm": 2.8342467379787304, "learning_rate": 1.4233483001924314e-07, "loss": 0.7042, "step": 79835 }, { "epoch": 0.9730296271921807, "grad_norm": 3.1116205100913494, "learning_rate": 1.4201411161000644e-07, "loss": 0.7314, "step": 79840 }, { "epoch": 0.9730905634163285, "grad_norm": 2.6178612222669173, "learning_rate": 1.4169339320076975e-07, "loss": 0.7697, "step": 79845 }, { "epoch": 0.9731514996404763, "grad_norm": 3.050512047248277, "learning_rate": 1.4137267479153306e-07, "loss": 0.6958, "step": 79850 }, { "epoch": 0.9732124358646241, "grad_norm": 2.883646433390485, "learning_rate": 1.4105195638229637e-07, "loss": 0.7448, "step": 79855 }, { "epoch": 0.9732733720887718, "grad_norm": 2.7322325253082296, "learning_rate": 1.4073123797305967e-07, "loss": 0.6818, "step": 79860 }, { "epoch": 0.9733343083129197, "grad_norm": 2.5992214670359584, "learning_rate": 1.4041051956382298e-07, "loss": 0.6663, "step": 79865 }, { "epoch": 0.9733952445370675, "grad_norm": 2.3757934070905433, "learning_rate": 1.400898011545863e-07, "loss": 0.6687, "step": 79870 }, { "epoch": 0.9734561807612153, "grad_norm": 2.304572030463508, "learning_rate": 1.397690827453496e-07, "loss": 0.7174, "step": 79875 }, { "epoch": 0.9735171169853631, "grad_norm": 2.399629035345546, "learning_rate": 1.394483643361129e-07, "loss": 0.6878, "step": 79880 }, { "epoch": 0.973578053209511, "grad_norm": 2.653136341508827, "learning_rate": 1.3912764592687621e-07, "loss": 0.7794, "step": 79885 }, { "epoch": 0.9736389894336588, "grad_norm": 2.2235141351771572, "learning_rate": 1.3880692751763952e-07, "loss": 0.733, "step": 79890 }, { "epoch": 0.9736999256578065, "grad_norm": 2.7485705265981473, "learning_rate": 1.3848620910840283e-07, "loss": 0.6557, "step": 79895 }, { "epoch": 0.9737608618819543, "grad_norm": 2.9832150048426525, "learning_rate": 1.3816549069916614e-07, "loss": 0.7482, "step": 79900 }, { "epoch": 0.9738217981061021, "grad_norm": 2.1186202928946383, "learning_rate": 1.3784477228992945e-07, "loss": 0.7349, "step": 79905 }, { "epoch": 0.97388273433025, "grad_norm": 2.625891583261049, "learning_rate": 1.3752405388069275e-07, "loss": 0.6555, "step": 79910 }, { "epoch": 0.9739436705543978, "grad_norm": 2.195392566560842, "learning_rate": 1.3720333547145606e-07, "loss": 0.6656, "step": 79915 }, { "epoch": 0.9740046067785456, "grad_norm": 3.1465735584534036, "learning_rate": 1.3688261706221937e-07, "loss": 0.7079, "step": 79920 }, { "epoch": 0.9740655430026934, "grad_norm": 3.545998274601747, "learning_rate": 1.3656189865298268e-07, "loss": 0.6955, "step": 79925 }, { "epoch": 0.9741264792268411, "grad_norm": 2.703888008919821, "learning_rate": 1.36241180243746e-07, "loss": 0.7175, "step": 79930 }, { "epoch": 0.974187415450989, "grad_norm": 2.9789928353529143, "learning_rate": 1.3592046183450932e-07, "loss": 0.7504, "step": 79935 }, { "epoch": 0.9742483516751368, "grad_norm": 3.941929132190353, "learning_rate": 1.3559974342527263e-07, "loss": 0.6929, "step": 79940 }, { "epoch": 0.9743092878992846, "grad_norm": 3.7954442384140363, "learning_rate": 1.3527902501603594e-07, "loss": 0.7462, "step": 79945 }, { "epoch": 0.9743702241234324, "grad_norm": 2.81375981642278, "learning_rate": 1.3495830660679924e-07, "loss": 0.6948, "step": 79950 }, { "epoch": 0.9744311603475803, "grad_norm": 2.44016038301953, "learning_rate": 1.3463758819756255e-07, "loss": 0.7698, "step": 79955 }, { "epoch": 0.9744920965717281, "grad_norm": 2.3132555218483644, "learning_rate": 1.3431686978832586e-07, "loss": 0.7234, "step": 79960 }, { "epoch": 0.9745530327958758, "grad_norm": 2.8634944716366753, "learning_rate": 1.3399615137908917e-07, "loss": 0.7373, "step": 79965 }, { "epoch": 0.9746139690200236, "grad_norm": 2.091629652206521, "learning_rate": 1.3367543296985248e-07, "loss": 0.7411, "step": 79970 }, { "epoch": 0.9746749052441714, "grad_norm": 2.5535640357525735, "learning_rate": 1.3335471456061578e-07, "loss": 0.6759, "step": 79975 }, { "epoch": 0.9747358414683193, "grad_norm": 2.4784033379292207, "learning_rate": 1.330339961513791e-07, "loss": 0.748, "step": 79980 }, { "epoch": 0.9747967776924671, "grad_norm": 2.206910443879213, "learning_rate": 1.3271327774214243e-07, "loss": 0.7654, "step": 79985 }, { "epoch": 0.9748577139166149, "grad_norm": 2.7150100840309848, "learning_rate": 1.3239255933290573e-07, "loss": 0.7645, "step": 79990 }, { "epoch": 0.9749186501407627, "grad_norm": 2.212829958145881, "learning_rate": 1.3207184092366904e-07, "loss": 0.6491, "step": 79995 }, { "epoch": 0.9749795863649104, "grad_norm": 3.480423203604795, "learning_rate": 1.3175112251443235e-07, "loss": 0.7592, "step": 80000 }, { "epoch": 0.9750405225890583, "grad_norm": 3.8777859095961538, "learning_rate": 1.3143040410519566e-07, "loss": 0.7136, "step": 80005 }, { "epoch": 0.9751014588132061, "grad_norm": 2.3439823107750892, "learning_rate": 1.3110968569595897e-07, "loss": 0.6926, "step": 80010 }, { "epoch": 0.9751623950373539, "grad_norm": 2.42919241444328, "learning_rate": 1.3078896728672227e-07, "loss": 0.6665, "step": 80015 }, { "epoch": 0.9752233312615017, "grad_norm": 3.321593544886975, "learning_rate": 1.3046824887748558e-07, "loss": 0.7065, "step": 80020 }, { "epoch": 0.9752842674856496, "grad_norm": 2.7984409798779177, "learning_rate": 1.301475304682489e-07, "loss": 0.7102, "step": 80025 }, { "epoch": 0.9753452037097974, "grad_norm": 2.424953880822192, "learning_rate": 1.298268120590122e-07, "loss": 0.7666, "step": 80030 }, { "epoch": 0.9754061399339451, "grad_norm": 2.3600759821286474, "learning_rate": 1.295060936497755e-07, "loss": 0.7361, "step": 80035 }, { "epoch": 0.9754670761580929, "grad_norm": 2.1658458009775265, "learning_rate": 1.2918537524053881e-07, "loss": 0.6952, "step": 80040 }, { "epoch": 0.9755280123822407, "grad_norm": 2.5652449650647196, "learning_rate": 1.2886465683130212e-07, "loss": 0.6803, "step": 80045 }, { "epoch": 0.9755889486063886, "grad_norm": 2.697499090884346, "learning_rate": 1.2854393842206543e-07, "loss": 0.7066, "step": 80050 }, { "epoch": 0.9756498848305364, "grad_norm": 2.3519019307444724, "learning_rate": 1.2822322001282874e-07, "loss": 0.7209, "step": 80055 }, { "epoch": 0.9757108210546842, "grad_norm": 2.1071216504461, "learning_rate": 1.2790250160359205e-07, "loss": 0.7098, "step": 80060 }, { "epoch": 0.9757717572788319, "grad_norm": 2.405896542895476, "learning_rate": 1.2758178319435535e-07, "loss": 0.5779, "step": 80065 }, { "epoch": 0.9758326935029797, "grad_norm": 2.677300535999789, "learning_rate": 1.272610647851187e-07, "loss": 0.7372, "step": 80070 }, { "epoch": 0.9758936297271276, "grad_norm": 2.0878253281130945, "learning_rate": 1.26940346375882e-07, "loss": 0.6613, "step": 80075 }, { "epoch": 0.9759545659512754, "grad_norm": 2.7310051779092683, "learning_rate": 1.266196279666453e-07, "loss": 0.7181, "step": 80080 }, { "epoch": 0.9760155021754232, "grad_norm": 2.0829896192839152, "learning_rate": 1.262989095574086e-07, "loss": 0.6696, "step": 80085 }, { "epoch": 0.976076438399571, "grad_norm": 3.4653188482669437, "learning_rate": 1.2597819114817192e-07, "loss": 0.7092, "step": 80090 }, { "epoch": 0.9761373746237189, "grad_norm": 2.227598387007115, "learning_rate": 1.2565747273893523e-07, "loss": 0.711, "step": 80095 }, { "epoch": 0.9761983108478666, "grad_norm": 3.0251170245928107, "learning_rate": 1.2533675432969854e-07, "loss": 0.688, "step": 80100 }, { "epoch": 0.9762592470720144, "grad_norm": 2.087195582247233, "learning_rate": 1.2501603592046184e-07, "loss": 0.6507, "step": 80105 }, { "epoch": 0.9763201832961622, "grad_norm": 2.365514136252083, "learning_rate": 1.2469531751122515e-07, "loss": 0.645, "step": 80110 }, { "epoch": 0.97638111952031, "grad_norm": 2.5855113396695697, "learning_rate": 1.2437459910198846e-07, "loss": 0.7596, "step": 80115 }, { "epoch": 0.9764420557444579, "grad_norm": 2.25487742619731, "learning_rate": 1.2405388069275177e-07, "loss": 0.6492, "step": 80120 }, { "epoch": 0.9765029919686057, "grad_norm": 3.6298297080649946, "learning_rate": 1.2373316228351508e-07, "loss": 0.667, "step": 80125 }, { "epoch": 0.9765639281927535, "grad_norm": 2.464468463647117, "learning_rate": 1.2341244387427838e-07, "loss": 0.7106, "step": 80130 }, { "epoch": 0.9766248644169012, "grad_norm": 2.9732936760282955, "learning_rate": 1.230917254650417e-07, "loss": 0.6103, "step": 80135 }, { "epoch": 0.976685800641049, "grad_norm": 2.4813332604117404, "learning_rate": 1.2277100705580503e-07, "loss": 0.7437, "step": 80140 }, { "epoch": 0.9767467368651969, "grad_norm": 3.2237857521974, "learning_rate": 1.2245028864656833e-07, "loss": 0.7967, "step": 80145 }, { "epoch": 0.9768076730893447, "grad_norm": 2.991413482374214, "learning_rate": 1.2212957023733164e-07, "loss": 0.8044, "step": 80150 }, { "epoch": 0.9768686093134925, "grad_norm": 2.673278991617057, "learning_rate": 1.2180885182809495e-07, "loss": 0.7181, "step": 80155 }, { "epoch": 0.9769295455376403, "grad_norm": 2.480419337841867, "learning_rate": 1.2148813341885826e-07, "loss": 0.7431, "step": 80160 }, { "epoch": 0.9769904817617882, "grad_norm": 2.435198875100443, "learning_rate": 1.2116741500962157e-07, "loss": 0.7827, "step": 80165 }, { "epoch": 0.9770514179859359, "grad_norm": 2.8525198720346734, "learning_rate": 1.2084669660038487e-07, "loss": 0.7135, "step": 80170 }, { "epoch": 0.9771123542100837, "grad_norm": 2.4276876638250227, "learning_rate": 1.2052597819114818e-07, "loss": 0.6569, "step": 80175 }, { "epoch": 0.9771732904342315, "grad_norm": 2.9146204889418232, "learning_rate": 1.202052597819115e-07, "loss": 0.6663, "step": 80180 }, { "epoch": 0.9772342266583793, "grad_norm": 2.3470407302901135, "learning_rate": 1.198845413726748e-07, "loss": 0.6625, "step": 80185 }, { "epoch": 0.9772951628825272, "grad_norm": 2.565463152500646, "learning_rate": 1.195638229634381e-07, "loss": 0.6959, "step": 80190 }, { "epoch": 0.977356099106675, "grad_norm": 2.372471910889423, "learning_rate": 1.192431045542014e-07, "loss": 0.7615, "step": 80195 }, { "epoch": 0.9774170353308228, "grad_norm": 2.7584799526636536, "learning_rate": 1.1892238614496473e-07, "loss": 0.6597, "step": 80200 }, { "epoch": 0.9774779715549705, "grad_norm": 2.6833646980948718, "learning_rate": 1.1860166773572804e-07, "loss": 0.6632, "step": 80205 }, { "epoch": 0.9775389077791183, "grad_norm": 2.771869303514449, "learning_rate": 1.1828094932649135e-07, "loss": 0.7869, "step": 80210 }, { "epoch": 0.9775998440032662, "grad_norm": 2.5833478626461606, "learning_rate": 1.1796023091725466e-07, "loss": 0.6605, "step": 80215 }, { "epoch": 0.977660780227414, "grad_norm": 3.1284833447661002, "learning_rate": 1.1763951250801797e-07, "loss": 0.7215, "step": 80220 }, { "epoch": 0.9777217164515618, "grad_norm": 2.537952756457777, "learning_rate": 1.1731879409878127e-07, "loss": 0.7142, "step": 80225 }, { "epoch": 0.9777826526757096, "grad_norm": 2.515338405279036, "learning_rate": 1.1699807568954458e-07, "loss": 0.7313, "step": 80230 }, { "epoch": 0.9778435888998575, "grad_norm": 2.5474476505068626, "learning_rate": 1.166773572803079e-07, "loss": 0.7147, "step": 80235 }, { "epoch": 0.9779045251240052, "grad_norm": 3.1815844210825417, "learning_rate": 1.1635663887107121e-07, "loss": 0.799, "step": 80240 }, { "epoch": 0.977965461348153, "grad_norm": 2.149819559889884, "learning_rate": 1.1603592046183452e-07, "loss": 0.672, "step": 80245 }, { "epoch": 0.9780263975723008, "grad_norm": 3.2063960100970483, "learning_rate": 1.1571520205259783e-07, "loss": 0.7384, "step": 80250 }, { "epoch": 0.9780873337964486, "grad_norm": 2.7419943157456945, "learning_rate": 1.1539448364336113e-07, "loss": 0.7568, "step": 80255 }, { "epoch": 0.9781482700205965, "grad_norm": 2.919590484234684, "learning_rate": 1.1507376523412444e-07, "loss": 0.7448, "step": 80260 }, { "epoch": 0.9782092062447443, "grad_norm": 4.363068402234689, "learning_rate": 1.1475304682488775e-07, "loss": 0.7448, "step": 80265 }, { "epoch": 0.9782701424688921, "grad_norm": 2.408982766102489, "learning_rate": 1.1443232841565106e-07, "loss": 0.7137, "step": 80270 }, { "epoch": 0.9783310786930398, "grad_norm": 2.296013689913045, "learning_rate": 1.1411161000641437e-07, "loss": 0.6703, "step": 80275 }, { "epoch": 0.9783920149171876, "grad_norm": 3.173588441367948, "learning_rate": 1.1379089159717767e-07, "loss": 0.7813, "step": 80280 }, { "epoch": 0.9784529511413355, "grad_norm": 2.3441965794069732, "learning_rate": 1.1347017318794098e-07, "loss": 0.6532, "step": 80285 }, { "epoch": 0.9785138873654833, "grad_norm": 3.434023178349852, "learning_rate": 1.1314945477870432e-07, "loss": 0.7141, "step": 80290 }, { "epoch": 0.9785748235896311, "grad_norm": 2.4215243401195328, "learning_rate": 1.1282873636946762e-07, "loss": 0.7199, "step": 80295 }, { "epoch": 0.9786357598137789, "grad_norm": 2.533157496135611, "learning_rate": 1.1250801796023093e-07, "loss": 0.7207, "step": 80300 }, { "epoch": 0.9786966960379267, "grad_norm": 2.28936310974708, "learning_rate": 1.1218729955099424e-07, "loss": 0.7526, "step": 80305 }, { "epoch": 0.9787576322620745, "grad_norm": 2.9173596030411297, "learning_rate": 1.1186658114175755e-07, "loss": 0.7003, "step": 80310 }, { "epoch": 0.9788185684862223, "grad_norm": 2.0006188864254253, "learning_rate": 1.1154586273252086e-07, "loss": 0.6805, "step": 80315 }, { "epoch": 0.9788795047103701, "grad_norm": 2.399481903092374, "learning_rate": 1.1122514432328416e-07, "loss": 0.6532, "step": 80320 }, { "epoch": 0.9789404409345179, "grad_norm": 3.099061740115043, "learning_rate": 1.1090442591404749e-07, "loss": 0.7013, "step": 80325 }, { "epoch": 0.9790013771586658, "grad_norm": 2.2714085276804274, "learning_rate": 1.105837075048108e-07, "loss": 0.7181, "step": 80330 }, { "epoch": 0.9790623133828136, "grad_norm": 2.715459276053279, "learning_rate": 1.102629890955741e-07, "loss": 0.7813, "step": 80335 }, { "epoch": 0.9791232496069614, "grad_norm": 2.8175528526721267, "learning_rate": 1.0994227068633741e-07, "loss": 0.7416, "step": 80340 }, { "epoch": 0.9791841858311091, "grad_norm": 2.752845013702769, "learning_rate": 1.0962155227710072e-07, "loss": 0.674, "step": 80345 }, { "epoch": 0.9792451220552569, "grad_norm": 2.54711794880768, "learning_rate": 1.0930083386786403e-07, "loss": 0.7444, "step": 80350 }, { "epoch": 0.9793060582794048, "grad_norm": 2.7948699919123974, "learning_rate": 1.0898011545862733e-07, "loss": 0.6612, "step": 80355 }, { "epoch": 0.9793669945035526, "grad_norm": 2.3483186469889064, "learning_rate": 1.0865939704939064e-07, "loss": 0.6682, "step": 80360 }, { "epoch": 0.9794279307277004, "grad_norm": 2.7014066327022896, "learning_rate": 1.0833867864015395e-07, "loss": 0.6721, "step": 80365 }, { "epoch": 0.9794888669518482, "grad_norm": 2.3063571454331457, "learning_rate": 1.0801796023091726e-07, "loss": 0.6841, "step": 80370 }, { "epoch": 0.979549803175996, "grad_norm": 2.775034772266801, "learning_rate": 1.0769724182168058e-07, "loss": 0.7266, "step": 80375 }, { "epoch": 0.9796107394001438, "grad_norm": 2.52202421010099, "learning_rate": 1.0737652341244389e-07, "loss": 0.6901, "step": 80380 }, { "epoch": 0.9796716756242916, "grad_norm": 2.6470358572273534, "learning_rate": 1.070558050032072e-07, "loss": 0.6392, "step": 80385 }, { "epoch": 0.9797326118484394, "grad_norm": 3.039238202106876, "learning_rate": 1.067350865939705e-07, "loss": 0.6357, "step": 80390 }, { "epoch": 0.9797935480725872, "grad_norm": 1.9177456751059296, "learning_rate": 1.0641436818473381e-07, "loss": 0.7533, "step": 80395 }, { "epoch": 0.979854484296735, "grad_norm": 2.343329782571377, "learning_rate": 1.0609364977549712e-07, "loss": 0.7857, "step": 80400 }, { "epoch": 0.9799154205208829, "grad_norm": 2.35485953388055, "learning_rate": 1.0577293136626043e-07, "loss": 0.6657, "step": 80405 }, { "epoch": 0.9799763567450307, "grad_norm": 2.3979184735567487, "learning_rate": 1.0545221295702373e-07, "loss": 0.6755, "step": 80410 }, { "epoch": 0.9800372929691784, "grad_norm": 2.9825086826707583, "learning_rate": 1.0513149454778704e-07, "loss": 0.6991, "step": 80415 }, { "epoch": 0.9800982291933262, "grad_norm": 2.2356850279052223, "learning_rate": 1.0481077613855035e-07, "loss": 0.7331, "step": 80420 }, { "epoch": 0.980159165417474, "grad_norm": 2.353315145546378, "learning_rate": 1.0449005772931366e-07, "loss": 0.6757, "step": 80425 }, { "epoch": 0.9802201016416219, "grad_norm": 2.237389509503716, "learning_rate": 1.0416933932007698e-07, "loss": 0.6773, "step": 80430 }, { "epoch": 0.9802810378657697, "grad_norm": 1.9783224717546206, "learning_rate": 1.0384862091084029e-07, "loss": 0.7228, "step": 80435 }, { "epoch": 0.9803419740899175, "grad_norm": 5.186264353717816, "learning_rate": 1.0352790250160361e-07, "loss": 0.7869, "step": 80440 }, { "epoch": 0.9804029103140653, "grad_norm": 2.42019132833484, "learning_rate": 1.0320718409236692e-07, "loss": 0.6796, "step": 80445 }, { "epoch": 0.9804638465382131, "grad_norm": 2.5770450994451966, "learning_rate": 1.0288646568313022e-07, "loss": 0.7697, "step": 80450 }, { "epoch": 0.9805247827623609, "grad_norm": 3.685963933394925, "learning_rate": 1.0256574727389353e-07, "loss": 0.719, "step": 80455 }, { "epoch": 0.9805857189865087, "grad_norm": 2.437252420642249, "learning_rate": 1.0224502886465684e-07, "loss": 0.7371, "step": 80460 }, { "epoch": 0.9806466552106565, "grad_norm": 2.300890406338427, "learning_rate": 1.0192431045542016e-07, "loss": 0.7314, "step": 80465 }, { "epoch": 0.9807075914348043, "grad_norm": 2.661081211276311, "learning_rate": 1.0160359204618347e-07, "loss": 0.669, "step": 80470 }, { "epoch": 0.9807685276589522, "grad_norm": 2.3298814866427535, "learning_rate": 1.0128287363694678e-07, "loss": 0.6772, "step": 80475 }, { "epoch": 0.9808294638831, "grad_norm": 3.353600920186339, "learning_rate": 1.0096215522771008e-07, "loss": 0.6545, "step": 80480 }, { "epoch": 0.9808904001072477, "grad_norm": 2.251711382037373, "learning_rate": 1.0064143681847339e-07, "loss": 0.7465, "step": 80485 }, { "epoch": 0.9809513363313955, "grad_norm": 3.1842343840753826, "learning_rate": 1.003207184092367e-07, "loss": 0.745, "step": 80490 }, { "epoch": 0.9810122725555434, "grad_norm": 2.2742605588486184, "learning_rate": 1.0000000000000001e-07, "loss": 0.6776, "step": 80495 }, { "epoch": 0.9810732087796912, "grad_norm": 2.2560104729337365, "learning_rate": 9.967928159076332e-08, "loss": 0.6998, "step": 80500 }, { "epoch": 0.981134145003839, "grad_norm": 2.6366635731246393, "learning_rate": 9.935856318152662e-08, "loss": 0.6776, "step": 80505 }, { "epoch": 0.9811950812279868, "grad_norm": 2.6768053394011884, "learning_rate": 9.903784477228993e-08, "loss": 0.6876, "step": 80510 }, { "epoch": 0.9812560174521346, "grad_norm": 2.3482175330081807, "learning_rate": 9.871712636305325e-08, "loss": 0.7161, "step": 80515 }, { "epoch": 0.9813169536762824, "grad_norm": 2.287867585448301, "learning_rate": 9.839640795381656e-08, "loss": 0.7014, "step": 80520 }, { "epoch": 0.9813778899004302, "grad_norm": 2.1918701202011857, "learning_rate": 9.807568954457987e-08, "loss": 0.6975, "step": 80525 }, { "epoch": 0.981438826124578, "grad_norm": 3.496410880124882, "learning_rate": 9.775497113534318e-08, "loss": 0.7345, "step": 80530 }, { "epoch": 0.9814997623487258, "grad_norm": 2.486391571058108, "learning_rate": 9.743425272610649e-08, "loss": 0.7209, "step": 80535 }, { "epoch": 0.9815606985728736, "grad_norm": 2.1620515571117713, "learning_rate": 9.71135343168698e-08, "loss": 0.6958, "step": 80540 }, { "epoch": 0.9816216347970215, "grad_norm": 2.556136385683305, "learning_rate": 9.67928159076331e-08, "loss": 0.718, "step": 80545 }, { "epoch": 0.9816825710211693, "grad_norm": 3.07599054920643, "learning_rate": 9.647209749839641e-08, "loss": 0.7206, "step": 80550 }, { "epoch": 0.981743507245317, "grad_norm": 2.35921744181619, "learning_rate": 9.615137908915972e-08, "loss": 0.6814, "step": 80555 }, { "epoch": 0.9818044434694648, "grad_norm": 2.2985108744363307, "learning_rate": 9.583066067992303e-08, "loss": 0.7172, "step": 80560 }, { "epoch": 0.9818653796936126, "grad_norm": 2.914299501631383, "learning_rate": 9.550994227068633e-08, "loss": 0.7921, "step": 80565 }, { "epoch": 0.9819263159177605, "grad_norm": 2.4333780618145697, "learning_rate": 9.518922386144965e-08, "loss": 0.64, "step": 80570 }, { "epoch": 0.9819872521419083, "grad_norm": 2.2537290346076015, "learning_rate": 9.486850545221296e-08, "loss": 0.6744, "step": 80575 }, { "epoch": 0.9820481883660561, "grad_norm": 2.00561675764396, "learning_rate": 9.454778704297627e-08, "loss": 0.7041, "step": 80580 }, { "epoch": 0.9821091245902039, "grad_norm": 2.7431519368964117, "learning_rate": 9.422706863373958e-08, "loss": 0.6732, "step": 80585 }, { "epoch": 0.9821700608143517, "grad_norm": 2.2568038999280353, "learning_rate": 9.39063502245029e-08, "loss": 0.6549, "step": 80590 }, { "epoch": 0.9822309970384995, "grad_norm": 2.4026985679524286, "learning_rate": 9.358563181526621e-08, "loss": 0.7803, "step": 80595 }, { "epoch": 0.9822919332626473, "grad_norm": 1.8195696447620768, "learning_rate": 9.326491340602952e-08, "loss": 0.7228, "step": 80600 }, { "epoch": 0.9823528694867951, "grad_norm": 2.7567417948252384, "learning_rate": 9.294419499679284e-08, "loss": 0.6815, "step": 80605 }, { "epoch": 0.982413805710943, "grad_norm": 3.415209460121021, "learning_rate": 9.262347658755614e-08, "loss": 0.7094, "step": 80610 }, { "epoch": 0.9824747419350908, "grad_norm": 2.3007830602024324, "learning_rate": 9.230275817831945e-08, "loss": 0.7047, "step": 80615 }, { "epoch": 0.9825356781592386, "grad_norm": 2.2596816078965056, "learning_rate": 9.198203976908276e-08, "loss": 0.6916, "step": 80620 }, { "epoch": 0.9825966143833863, "grad_norm": 2.9305658775569876, "learning_rate": 9.166132135984607e-08, "loss": 0.7388, "step": 80625 }, { "epoch": 0.9826575506075341, "grad_norm": 2.1533805348334187, "learning_rate": 9.134060295060938e-08, "loss": 0.702, "step": 80630 }, { "epoch": 0.982718486831682, "grad_norm": 2.759956170274782, "learning_rate": 9.101988454137268e-08, "loss": 0.7527, "step": 80635 }, { "epoch": 0.9827794230558298, "grad_norm": 2.220465223571161, "learning_rate": 9.069916613213599e-08, "loss": 0.7683, "step": 80640 }, { "epoch": 0.9828403592799776, "grad_norm": 2.394796903279226, "learning_rate": 9.03784477228993e-08, "loss": 0.7375, "step": 80645 }, { "epoch": 0.9829012955041254, "grad_norm": 2.4860328173965955, "learning_rate": 9.005772931366261e-08, "loss": 0.7257, "step": 80650 }, { "epoch": 0.9829622317282732, "grad_norm": 2.072133814291604, "learning_rate": 8.973701090442592e-08, "loss": 0.6868, "step": 80655 }, { "epoch": 0.983023167952421, "grad_norm": 2.3223764689783093, "learning_rate": 8.941629249518924e-08, "loss": 0.7331, "step": 80660 }, { "epoch": 0.9830841041765688, "grad_norm": 2.5655355850108834, "learning_rate": 8.909557408595254e-08, "loss": 0.7223, "step": 80665 }, { "epoch": 0.9831450404007166, "grad_norm": 2.8034882998650605, "learning_rate": 8.877485567671585e-08, "loss": 0.6911, "step": 80670 }, { "epoch": 0.9832059766248644, "grad_norm": 2.6827545367806125, "learning_rate": 8.845413726747916e-08, "loss": 0.6741, "step": 80675 }, { "epoch": 0.9832669128490122, "grad_norm": 2.9048379491626615, "learning_rate": 8.813341885824247e-08, "loss": 0.7301, "step": 80680 }, { "epoch": 0.9833278490731601, "grad_norm": 2.6975193927330494, "learning_rate": 8.781270044900578e-08, "loss": 0.7013, "step": 80685 }, { "epoch": 0.9833887852973079, "grad_norm": 2.8274670289247346, "learning_rate": 8.749198203976908e-08, "loss": 0.6791, "step": 80690 }, { "epoch": 0.9834497215214556, "grad_norm": 2.764581568773466, "learning_rate": 8.717126363053239e-08, "loss": 0.7627, "step": 80695 }, { "epoch": 0.9835106577456034, "grad_norm": 2.7932732236201576, "learning_rate": 8.68505452212957e-08, "loss": 0.7351, "step": 80700 }, { "epoch": 0.9835715939697512, "grad_norm": 2.2565360687003384, "learning_rate": 8.652982681205901e-08, "loss": 0.7132, "step": 80705 }, { "epoch": 0.9836325301938991, "grad_norm": 2.3797437553978793, "learning_rate": 8.620910840282233e-08, "loss": 0.7593, "step": 80710 }, { "epoch": 0.9836934664180469, "grad_norm": 2.1822522176658206, "learning_rate": 8.588838999358564e-08, "loss": 0.7156, "step": 80715 }, { "epoch": 0.9837544026421947, "grad_norm": 2.4770722957460545, "learning_rate": 8.556767158434895e-08, "loss": 0.7184, "step": 80720 }, { "epoch": 0.9838153388663425, "grad_norm": 3.273993186265265, "learning_rate": 8.524695317511225e-08, "loss": 0.8132, "step": 80725 }, { "epoch": 0.9838762750904902, "grad_norm": 4.120168105959297, "learning_rate": 8.492623476587556e-08, "loss": 0.7344, "step": 80730 }, { "epoch": 0.9839372113146381, "grad_norm": 3.50858298083492, "learning_rate": 8.460551635663887e-08, "loss": 0.709, "step": 80735 }, { "epoch": 0.9839981475387859, "grad_norm": 2.100073453336328, "learning_rate": 8.428479794740219e-08, "loss": 0.6971, "step": 80740 }, { "epoch": 0.9840590837629337, "grad_norm": 4.479487666309653, "learning_rate": 8.396407953816551e-08, "loss": 0.7285, "step": 80745 }, { "epoch": 0.9841200199870815, "grad_norm": 2.5945615271164777, "learning_rate": 8.364336112892882e-08, "loss": 0.6956, "step": 80750 }, { "epoch": 0.9841809562112294, "grad_norm": 2.653270014865654, "learning_rate": 8.332264271969213e-08, "loss": 0.7401, "step": 80755 }, { "epoch": 0.9842418924353772, "grad_norm": 2.7786254497840863, "learning_rate": 8.300192431045544e-08, "loss": 0.7134, "step": 80760 }, { "epoch": 0.9843028286595249, "grad_norm": 3.480744758468327, "learning_rate": 8.268120590121874e-08, "loss": 0.7809, "step": 80765 }, { "epoch": 0.9843637648836727, "grad_norm": 2.104982854297891, "learning_rate": 8.236048749198205e-08, "loss": 0.6388, "step": 80770 }, { "epoch": 0.9844247011078205, "grad_norm": 2.7319789404192045, "learning_rate": 8.203976908274536e-08, "loss": 0.6813, "step": 80775 }, { "epoch": 0.9844856373319684, "grad_norm": 2.332993265619136, "learning_rate": 8.171905067350867e-08, "loss": 0.7212, "step": 80780 }, { "epoch": 0.9845465735561162, "grad_norm": 2.5554213504641012, "learning_rate": 8.139833226427198e-08, "loss": 0.6895, "step": 80785 }, { "epoch": 0.984607509780264, "grad_norm": 2.875320249192024, "learning_rate": 8.107761385503528e-08, "loss": 0.6878, "step": 80790 }, { "epoch": 0.9846684460044118, "grad_norm": 2.1901203909525404, "learning_rate": 8.075689544579859e-08, "loss": 0.6579, "step": 80795 }, { "epoch": 0.9847293822285595, "grad_norm": 2.6309622355714173, "learning_rate": 8.043617703656191e-08, "loss": 0.7027, "step": 80800 }, { "epoch": 0.9847903184527074, "grad_norm": 2.3644459467200183, "learning_rate": 8.011545862732522e-08, "loss": 0.6496, "step": 80805 }, { "epoch": 0.9848512546768552, "grad_norm": 2.3646543649269813, "learning_rate": 7.979474021808853e-08, "loss": 0.7186, "step": 80810 }, { "epoch": 0.984912190901003, "grad_norm": 3.4098610025523066, "learning_rate": 7.947402180885184e-08, "loss": 0.7253, "step": 80815 }, { "epoch": 0.9849731271251508, "grad_norm": 2.618137131046574, "learning_rate": 7.915330339961514e-08, "loss": 0.7698, "step": 80820 }, { "epoch": 0.9850340633492987, "grad_norm": 2.755746403718643, "learning_rate": 7.883258499037845e-08, "loss": 0.7294, "step": 80825 }, { "epoch": 0.9850949995734465, "grad_norm": 2.8683901228301534, "learning_rate": 7.851186658114176e-08, "loss": 0.7042, "step": 80830 }, { "epoch": 0.9851559357975942, "grad_norm": 2.4623411962024706, "learning_rate": 7.819114817190507e-08, "loss": 0.6656, "step": 80835 }, { "epoch": 0.985216872021742, "grad_norm": 2.8459880208615638, "learning_rate": 7.787042976266838e-08, "loss": 0.7388, "step": 80840 }, { "epoch": 0.9852778082458898, "grad_norm": 2.646816683990292, "learning_rate": 7.754971135343168e-08, "loss": 0.7449, "step": 80845 }, { "epoch": 0.9853387444700377, "grad_norm": 2.241425630739252, "learning_rate": 7.7228992944195e-08, "loss": 0.7221, "step": 80850 }, { "epoch": 0.9853996806941855, "grad_norm": 2.215155067769655, "learning_rate": 7.690827453495831e-08, "loss": 0.7167, "step": 80855 }, { "epoch": 0.9854606169183333, "grad_norm": 2.3352857900377764, "learning_rate": 7.658755612572162e-08, "loss": 0.7071, "step": 80860 }, { "epoch": 0.9855215531424811, "grad_norm": 2.7895936167886655, "learning_rate": 7.626683771648493e-08, "loss": 0.7469, "step": 80865 }, { "epoch": 0.9855824893666288, "grad_norm": 2.631129518416139, "learning_rate": 7.594611930724825e-08, "loss": 0.7543, "step": 80870 }, { "epoch": 0.9856434255907767, "grad_norm": 2.4567779960221485, "learning_rate": 7.562540089801156e-08, "loss": 0.7377, "step": 80875 }, { "epoch": 0.9857043618149245, "grad_norm": 2.5290755353828276, "learning_rate": 7.530468248877487e-08, "loss": 0.6606, "step": 80880 }, { "epoch": 0.9857652980390723, "grad_norm": 2.175680970812715, "learning_rate": 7.498396407953817e-08, "loss": 0.7147, "step": 80885 }, { "epoch": 0.9858262342632201, "grad_norm": 2.212629307674809, "learning_rate": 7.466324567030148e-08, "loss": 0.6911, "step": 80890 }, { "epoch": 0.985887170487368, "grad_norm": 2.3010019918225804, "learning_rate": 7.434252726106479e-08, "loss": 0.6149, "step": 80895 }, { "epoch": 0.9859481067115158, "grad_norm": 2.0853665340440455, "learning_rate": 7.40218088518281e-08, "loss": 0.6512, "step": 80900 }, { "epoch": 0.9860090429356635, "grad_norm": 2.397691995469832, "learning_rate": 7.37010904425914e-08, "loss": 0.817, "step": 80905 }, { "epoch": 0.9860699791598113, "grad_norm": 2.7659207788101474, "learning_rate": 7.338037203335471e-08, "loss": 0.7202, "step": 80910 }, { "epoch": 0.9861309153839591, "grad_norm": 2.9567298545961105, "learning_rate": 7.305965362411802e-08, "loss": 0.6754, "step": 80915 }, { "epoch": 0.986191851608107, "grad_norm": 2.2935703125395257, "learning_rate": 7.273893521488134e-08, "loss": 0.7302, "step": 80920 }, { "epoch": 0.9862527878322548, "grad_norm": 3.1409637973535123, "learning_rate": 7.241821680564465e-08, "loss": 0.8438, "step": 80925 }, { "epoch": 0.9863137240564026, "grad_norm": 2.1792003048194006, "learning_rate": 7.209749839640796e-08, "loss": 0.6818, "step": 80930 }, { "epoch": 0.9863746602805504, "grad_norm": 3.6667701123955494, "learning_rate": 7.177677998717127e-08, "loss": 0.6683, "step": 80935 }, { "epoch": 0.9864355965046981, "grad_norm": 3.899515234435039, "learning_rate": 7.145606157793459e-08, "loss": 0.7147, "step": 80940 }, { "epoch": 0.986496532728846, "grad_norm": 3.049635028461996, "learning_rate": 7.11353431686979e-08, "loss": 0.7028, "step": 80945 }, { "epoch": 0.9865574689529938, "grad_norm": 2.7930677123812564, "learning_rate": 7.08146247594612e-08, "loss": 0.7819, "step": 80950 }, { "epoch": 0.9866184051771416, "grad_norm": 2.545613090218418, "learning_rate": 7.049390635022451e-08, "loss": 0.6398, "step": 80955 }, { "epoch": 0.9866793414012894, "grad_norm": 3.310259077676296, "learning_rate": 7.017318794098782e-08, "loss": 0.6733, "step": 80960 }, { "epoch": 0.9867402776254373, "grad_norm": 2.213731463241539, "learning_rate": 6.985246953175113e-08, "loss": 0.7021, "step": 80965 }, { "epoch": 0.9868012138495851, "grad_norm": 2.6882803373934503, "learning_rate": 6.953175112251444e-08, "loss": 0.6191, "step": 80970 }, { "epoch": 0.9868621500737328, "grad_norm": 1.9733417765041954, "learning_rate": 6.921103271327774e-08, "loss": 0.6748, "step": 80975 }, { "epoch": 0.9869230862978806, "grad_norm": 2.8578930541802356, "learning_rate": 6.889031430404105e-08, "loss": 0.706, "step": 80980 }, { "epoch": 0.9869840225220284, "grad_norm": 2.7349746609646246, "learning_rate": 6.856959589480436e-08, "loss": 0.7467, "step": 80985 }, { "epoch": 0.9870449587461763, "grad_norm": 2.2364925176795976, "learning_rate": 6.824887748556768e-08, "loss": 0.7667, "step": 80990 }, { "epoch": 0.9871058949703241, "grad_norm": 2.7223200522215594, "learning_rate": 6.792815907633099e-08, "loss": 0.7646, "step": 80995 }, { "epoch": 0.9871668311944719, "grad_norm": 2.243924884959642, "learning_rate": 6.76074406670943e-08, "loss": 0.6881, "step": 81000 }, { "epoch": 0.9872277674186196, "grad_norm": 2.2441876335311486, "learning_rate": 6.72867222578576e-08, "loss": 0.651, "step": 81005 }, { "epoch": 0.9872887036427674, "grad_norm": 2.8390406347841592, "learning_rate": 6.696600384862093e-08, "loss": 0.6974, "step": 81010 }, { "epoch": 0.9873496398669153, "grad_norm": 2.613199606045195, "learning_rate": 6.664528543938423e-08, "loss": 0.7182, "step": 81015 }, { "epoch": 0.9874105760910631, "grad_norm": 2.4988718846927993, "learning_rate": 6.632456703014754e-08, "loss": 0.684, "step": 81020 }, { "epoch": 0.9874715123152109, "grad_norm": 2.308452318254134, "learning_rate": 6.600384862091085e-08, "loss": 0.6879, "step": 81025 }, { "epoch": 0.9875324485393587, "grad_norm": 2.5565165754175547, "learning_rate": 6.568313021167416e-08, "loss": 0.7381, "step": 81030 }, { "epoch": 0.9875933847635066, "grad_norm": 2.1909748004142875, "learning_rate": 6.536241180243747e-08, "loss": 0.7032, "step": 81035 }, { "epoch": 0.9876543209876543, "grad_norm": 3.2554833708906825, "learning_rate": 6.504169339320077e-08, "loss": 0.779, "step": 81040 }, { "epoch": 0.9877152572118021, "grad_norm": 2.1204095099698597, "learning_rate": 6.472097498396408e-08, "loss": 0.7138, "step": 81045 }, { "epoch": 0.9877761934359499, "grad_norm": 2.4811246871473576, "learning_rate": 6.440025657472739e-08, "loss": 0.7483, "step": 81050 }, { "epoch": 0.9878371296600977, "grad_norm": 2.3196628997748396, "learning_rate": 6.40795381654907e-08, "loss": 0.7283, "step": 81055 }, { "epoch": 0.9878980658842456, "grad_norm": 2.832662293019304, "learning_rate": 6.375881975625402e-08, "loss": 0.6338, "step": 81060 }, { "epoch": 0.9879590021083934, "grad_norm": 2.0869124489737043, "learning_rate": 6.343810134701733e-08, "loss": 0.6643, "step": 81065 }, { "epoch": 0.9880199383325412, "grad_norm": 1.9824765681660206, "learning_rate": 6.311738293778063e-08, "loss": 0.7565, "step": 81070 }, { "epoch": 0.9880808745566889, "grad_norm": 2.3530462740149396, "learning_rate": 6.279666452854394e-08, "loss": 0.6891, "step": 81075 }, { "epoch": 0.9881418107808367, "grad_norm": 2.6743664731306898, "learning_rate": 6.247594611930726e-08, "loss": 0.759, "step": 81080 }, { "epoch": 0.9882027470049846, "grad_norm": 2.602434550785927, "learning_rate": 6.215522771007057e-08, "loss": 0.7763, "step": 81085 }, { "epoch": 0.9882636832291324, "grad_norm": 2.5494170914762084, "learning_rate": 6.183450930083388e-08, "loss": 0.7993, "step": 81090 }, { "epoch": 0.9883246194532802, "grad_norm": 2.261853265137418, "learning_rate": 6.151379089159719e-08, "loss": 0.6564, "step": 81095 }, { "epoch": 0.988385555677428, "grad_norm": 3.359480990887781, "learning_rate": 6.11930724823605e-08, "loss": 0.6503, "step": 81100 }, { "epoch": 0.9884464919015759, "grad_norm": 2.5257174608908217, "learning_rate": 6.08723540731238e-08, "loss": 0.6286, "step": 81105 }, { "epoch": 0.9885074281257236, "grad_norm": 3.210123616710812, "learning_rate": 6.055163566388711e-08, "loss": 0.7326, "step": 81110 }, { "epoch": 0.9885683643498714, "grad_norm": 2.1446268195751665, "learning_rate": 6.023091725465042e-08, "loss": 0.7182, "step": 81115 }, { "epoch": 0.9886293005740192, "grad_norm": 2.3242604022626363, "learning_rate": 5.991019884541373e-08, "loss": 0.76, "step": 81120 }, { "epoch": 0.988690236798167, "grad_norm": 2.6300949795985336, "learning_rate": 5.958948043617704e-08, "loss": 0.7236, "step": 81125 }, { "epoch": 0.9887511730223149, "grad_norm": 2.2716627460098486, "learning_rate": 5.926876202694035e-08, "loss": 0.6584, "step": 81130 }, { "epoch": 0.9888121092464627, "grad_norm": 2.9880347740670854, "learning_rate": 5.894804361770366e-08, "loss": 0.7229, "step": 81135 }, { "epoch": 0.9888730454706105, "grad_norm": 2.506121658666199, "learning_rate": 5.8627325208466965e-08, "loss": 0.6882, "step": 81140 }, { "epoch": 0.9889339816947582, "grad_norm": 3.054379145908553, "learning_rate": 5.830660679923028e-08, "loss": 0.8067, "step": 81145 }, { "epoch": 0.988994917918906, "grad_norm": 2.6610241273766064, "learning_rate": 5.798588838999359e-08, "loss": 0.7479, "step": 81150 }, { "epoch": 0.9890558541430539, "grad_norm": 2.151474618525433, "learning_rate": 5.7665169980756896e-08, "loss": 0.6757, "step": 81155 }, { "epoch": 0.9891167903672017, "grad_norm": 2.7817365000586696, "learning_rate": 5.734445157152022e-08, "loss": 0.7296, "step": 81160 }, { "epoch": 0.9891777265913495, "grad_norm": 2.1532305971622208, "learning_rate": 5.7023733162283525e-08, "loss": 0.6745, "step": 81165 }, { "epoch": 0.9892386628154973, "grad_norm": 3.6086011499946986, "learning_rate": 5.670301475304683e-08, "loss": 0.7848, "step": 81170 }, { "epoch": 0.9892995990396452, "grad_norm": 2.6260366188886044, "learning_rate": 5.638229634381014e-08, "loss": 0.7357, "step": 81175 }, { "epoch": 0.9893605352637929, "grad_norm": 2.287797764657664, "learning_rate": 5.606157793457345e-08, "loss": 0.7187, "step": 81180 }, { "epoch": 0.9894214714879407, "grad_norm": 2.2808938142645045, "learning_rate": 5.574085952533676e-08, "loss": 0.7142, "step": 81185 }, { "epoch": 0.9894824077120885, "grad_norm": 2.595379253670876, "learning_rate": 5.542014111610007e-08, "loss": 0.6975, "step": 81190 }, { "epoch": 0.9895433439362363, "grad_norm": 2.5491482790894175, "learning_rate": 5.509942270686338e-08, "loss": 0.7073, "step": 81195 }, { "epoch": 0.9896042801603842, "grad_norm": 2.1472573458492548, "learning_rate": 5.477870429762669e-08, "loss": 0.6645, "step": 81200 }, { "epoch": 0.989665216384532, "grad_norm": 2.569634566047842, "learning_rate": 5.4457985888389995e-08, "loss": 0.6989, "step": 81205 }, { "epoch": 0.9897261526086798, "grad_norm": 2.6020819129059802, "learning_rate": 5.41372674791533e-08, "loss": 0.7578, "step": 81210 }, { "epoch": 0.9897870888328275, "grad_norm": 2.4332279454181074, "learning_rate": 5.381654906991662e-08, "loss": 0.6626, "step": 81215 }, { "epoch": 0.9898480250569753, "grad_norm": 4.5718788187154535, "learning_rate": 5.3495830660679925e-08, "loss": 0.7014, "step": 81220 }, { "epoch": 0.9899089612811232, "grad_norm": 2.7629467389358435, "learning_rate": 5.317511225144323e-08, "loss": 0.6982, "step": 81225 }, { "epoch": 0.989969897505271, "grad_norm": 2.182931994160988, "learning_rate": 5.285439384220654e-08, "loss": 0.6852, "step": 81230 }, { "epoch": 0.9900308337294188, "grad_norm": 2.227588714823817, "learning_rate": 5.253367543296986e-08, "loss": 0.6717, "step": 81235 }, { "epoch": 0.9900917699535666, "grad_norm": 2.3903336998962486, "learning_rate": 5.221295702373317e-08, "loss": 0.7211, "step": 81240 }, { "epoch": 0.9901527061777144, "grad_norm": 2.256908507770672, "learning_rate": 5.189223861449648e-08, "loss": 0.6713, "step": 81245 }, { "epoch": 0.9902136424018622, "grad_norm": 2.7518984994832993, "learning_rate": 5.1571520205259786e-08, "loss": 0.7159, "step": 81250 }, { "epoch": 0.99027457862601, "grad_norm": 2.8872861724648167, "learning_rate": 5.12508017960231e-08, "loss": 0.6829, "step": 81255 }, { "epoch": 0.9903355148501578, "grad_norm": 3.228820393281955, "learning_rate": 5.093008338678641e-08, "loss": 0.6888, "step": 81260 }, { "epoch": 0.9903964510743056, "grad_norm": 2.296225419616118, "learning_rate": 5.0609364977549717e-08, "loss": 0.7355, "step": 81265 }, { "epoch": 0.9904573872984535, "grad_norm": 2.142182758594269, "learning_rate": 5.0288646568313025e-08, "loss": 0.7323, "step": 81270 }, { "epoch": 0.9905183235226013, "grad_norm": 2.644762340267657, "learning_rate": 4.996792815907633e-08, "loss": 0.671, "step": 81275 }, { "epoch": 0.9905792597467491, "grad_norm": 2.4284493553595903, "learning_rate": 4.964720974983964e-08, "loss": 0.6449, "step": 81280 }, { "epoch": 0.9906401959708968, "grad_norm": 3.4265573778627663, "learning_rate": 4.9326491340602955e-08, "loss": 0.6936, "step": 81285 }, { "epoch": 0.9907011321950446, "grad_norm": 2.4691551022680858, "learning_rate": 4.900577293136626e-08, "loss": 0.7213, "step": 81290 }, { "epoch": 0.9907620684191925, "grad_norm": 2.5758922252869842, "learning_rate": 4.868505452212957e-08, "loss": 0.7844, "step": 81295 }, { "epoch": 0.9908230046433403, "grad_norm": 2.4230669125636437, "learning_rate": 4.836433611289288e-08, "loss": 0.6787, "step": 81300 }, { "epoch": 0.9908839408674881, "grad_norm": 3.34594632704632, "learning_rate": 4.804361770365619e-08, "loss": 0.7275, "step": 81305 }, { "epoch": 0.9909448770916359, "grad_norm": 2.420830551606557, "learning_rate": 4.772289929441951e-08, "loss": 0.7053, "step": 81310 }, { "epoch": 0.9910058133157837, "grad_norm": 2.733267550960442, "learning_rate": 4.7402180885182816e-08, "loss": 0.7826, "step": 81315 }, { "epoch": 0.9910667495399315, "grad_norm": 3.7232330012314008, "learning_rate": 4.7081462475946124e-08, "loss": 0.7395, "step": 81320 }, { "epoch": 0.9911276857640793, "grad_norm": 2.5798949528374155, "learning_rate": 4.676074406670944e-08, "loss": 0.7552, "step": 81325 }, { "epoch": 0.9911886219882271, "grad_norm": 2.8592647751504345, "learning_rate": 4.6440025657472746e-08, "loss": 0.7395, "step": 81330 }, { "epoch": 0.9912495582123749, "grad_norm": 2.5995613272894573, "learning_rate": 4.6119307248236054e-08, "loss": 0.6596, "step": 81335 }, { "epoch": 0.9913104944365227, "grad_norm": 2.356158413938297, "learning_rate": 4.579858883899936e-08, "loss": 0.6896, "step": 81340 }, { "epoch": 0.9913714306606706, "grad_norm": 3.456157894158641, "learning_rate": 4.547787042976267e-08, "loss": 0.7332, "step": 81345 }, { "epoch": 0.9914323668848184, "grad_norm": 2.445789364768607, "learning_rate": 4.515715202052598e-08, "loss": 0.6904, "step": 81350 }, { "epoch": 0.9914933031089661, "grad_norm": 2.876616863565703, "learning_rate": 4.483643361128929e-08, "loss": 0.6818, "step": 81355 }, { "epoch": 0.9915542393331139, "grad_norm": 2.8162821858378932, "learning_rate": 4.45157152020526e-08, "loss": 0.7566, "step": 81360 }, { "epoch": 0.9916151755572618, "grad_norm": 2.4011811048822187, "learning_rate": 4.419499679281591e-08, "loss": 0.73, "step": 81365 }, { "epoch": 0.9916761117814096, "grad_norm": 2.8047513489814073, "learning_rate": 4.3874278383579217e-08, "loss": 0.7317, "step": 81370 }, { "epoch": 0.9917370480055574, "grad_norm": 3.0371899170850734, "learning_rate": 4.3553559974342524e-08, "loss": 0.7407, "step": 81375 }, { "epoch": 0.9917979842297052, "grad_norm": 2.222661746242371, "learning_rate": 4.323284156510584e-08, "loss": 0.6914, "step": 81380 }, { "epoch": 0.991858920453853, "grad_norm": 2.356622475274728, "learning_rate": 4.2912123155869154e-08, "loss": 0.6158, "step": 81385 }, { "epoch": 0.9919198566780008, "grad_norm": 2.6774080718065476, "learning_rate": 4.259140474663246e-08, "loss": 0.7175, "step": 81390 }, { "epoch": 0.9919807929021486, "grad_norm": 2.2615800717141235, "learning_rate": 4.2270686337395776e-08, "loss": 0.637, "step": 81395 }, { "epoch": 0.9920417291262964, "grad_norm": 2.4657274147184975, "learning_rate": 4.1949967928159084e-08, "loss": 0.6979, "step": 81400 }, { "epoch": 0.9921026653504442, "grad_norm": 2.4828798946478705, "learning_rate": 4.162924951892239e-08, "loss": 0.6413, "step": 81405 }, { "epoch": 0.992163601574592, "grad_norm": 3.0758126220101767, "learning_rate": 4.13085311096857e-08, "loss": 0.7093, "step": 81410 }, { "epoch": 0.9922245377987399, "grad_norm": 2.7274778635870365, "learning_rate": 4.098781270044901e-08, "loss": 0.7592, "step": 81415 }, { "epoch": 0.9922854740228877, "grad_norm": 2.6967169343814454, "learning_rate": 4.0667094291212316e-08, "loss": 0.7914, "step": 81420 }, { "epoch": 0.9923464102470354, "grad_norm": 3.0184334548680902, "learning_rate": 4.034637588197563e-08, "loss": 0.6906, "step": 81425 }, { "epoch": 0.9924073464711832, "grad_norm": 3.9232284104358013, "learning_rate": 4.002565747273894e-08, "loss": 0.6585, "step": 81430 }, { "epoch": 0.992468282695331, "grad_norm": 4.5792853550417165, "learning_rate": 3.9704939063502246e-08, "loss": 0.7478, "step": 81435 }, { "epoch": 0.9925292189194789, "grad_norm": 2.575740491412348, "learning_rate": 3.9384220654265554e-08, "loss": 0.7209, "step": 81440 }, { "epoch": 0.9925901551436267, "grad_norm": 2.506366327531049, "learning_rate": 3.906350224502886e-08, "loss": 0.7228, "step": 81445 }, { "epoch": 0.9926510913677745, "grad_norm": 2.8940388617143107, "learning_rate": 3.874278383579218e-08, "loss": 0.7436, "step": 81450 }, { "epoch": 0.9927120275919223, "grad_norm": 2.591368960512075, "learning_rate": 3.8422065426555485e-08, "loss": 0.6548, "step": 81455 }, { "epoch": 0.99277296381607, "grad_norm": 2.587941698202718, "learning_rate": 3.81013470173188e-08, "loss": 0.7479, "step": 81460 }, { "epoch": 0.9928339000402179, "grad_norm": 2.1888068569210675, "learning_rate": 3.778062860808211e-08, "loss": 0.6861, "step": 81465 }, { "epoch": 0.9928948362643657, "grad_norm": 2.331143286717252, "learning_rate": 3.7459910198845415e-08, "loss": 0.7238, "step": 81470 }, { "epoch": 0.9929557724885135, "grad_norm": 2.3898441354073516, "learning_rate": 3.713919178960872e-08, "loss": 0.7322, "step": 81475 }, { "epoch": 0.9930167087126613, "grad_norm": 2.8841199267426236, "learning_rate": 3.681847338037204e-08, "loss": 0.7298, "step": 81480 }, { "epoch": 0.9930776449368092, "grad_norm": 2.4029140035368326, "learning_rate": 3.6497754971135346e-08, "loss": 0.6848, "step": 81485 }, { "epoch": 0.993138581160957, "grad_norm": 2.4861612092236194, "learning_rate": 3.6177036561898653e-08, "loss": 0.6915, "step": 81490 }, { "epoch": 0.9931995173851047, "grad_norm": 2.5630030420732886, "learning_rate": 3.585631815266197e-08, "loss": 0.6272, "step": 81495 }, { "epoch": 0.9932604536092525, "grad_norm": 2.265777702640602, "learning_rate": 3.5535599743425276e-08, "loss": 0.6517, "step": 81500 }, { "epoch": 0.9933213898334003, "grad_norm": 2.1595631162745907, "learning_rate": 3.5214881334188584e-08, "loss": 0.7379, "step": 81505 }, { "epoch": 0.9933823260575482, "grad_norm": 2.021992439378046, "learning_rate": 3.489416292495189e-08, "loss": 0.733, "step": 81510 }, { "epoch": 0.993443262281696, "grad_norm": 2.3747328726209176, "learning_rate": 3.4573444515715206e-08, "loss": 0.7276, "step": 81515 }, { "epoch": 0.9935041985058438, "grad_norm": 2.4375639281878385, "learning_rate": 3.4252726106478514e-08, "loss": 0.6612, "step": 81520 }, { "epoch": 0.9935651347299916, "grad_norm": 4.818480772190139, "learning_rate": 3.393200769724182e-08, "loss": 0.7438, "step": 81525 }, { "epoch": 0.9936260709541394, "grad_norm": 2.463511797416268, "learning_rate": 3.361128928800514e-08, "loss": 0.6546, "step": 81530 }, { "epoch": 0.9936870071782872, "grad_norm": 2.5944277432190557, "learning_rate": 3.3290570878768445e-08, "loss": 0.7967, "step": 81535 }, { "epoch": 0.993747943402435, "grad_norm": 2.568400556581114, "learning_rate": 3.296985246953175e-08, "loss": 0.7765, "step": 81540 }, { "epoch": 0.9938088796265828, "grad_norm": 2.4115426287516573, "learning_rate": 3.264913406029506e-08, "loss": 0.68, "step": 81545 }, { "epoch": 0.9938698158507306, "grad_norm": 3.1787238300135545, "learning_rate": 3.232841565105837e-08, "loss": 0.7861, "step": 81550 }, { "epoch": 0.9939307520748785, "grad_norm": 2.932007184072249, "learning_rate": 3.200769724182168e-08, "loss": 0.6345, "step": 81555 }, { "epoch": 0.9939916882990263, "grad_norm": 2.8887151621187983, "learning_rate": 3.168697883258499e-08, "loss": 0.7397, "step": 81560 }, { "epoch": 0.994052624523174, "grad_norm": 2.3824575595773205, "learning_rate": 3.1366260423348306e-08, "loss": 0.7267, "step": 81565 }, { "epoch": 0.9941135607473218, "grad_norm": 2.2970525764969665, "learning_rate": 3.1045542014111614e-08, "loss": 0.6865, "step": 81570 }, { "epoch": 0.9941744969714696, "grad_norm": 2.870188518563279, "learning_rate": 3.072482360487492e-08, "loss": 0.7584, "step": 81575 }, { "epoch": 0.9942354331956175, "grad_norm": 2.6558130434263396, "learning_rate": 3.040410519563823e-08, "loss": 0.7294, "step": 81580 }, { "epoch": 0.9942963694197653, "grad_norm": 3.2987653646394386, "learning_rate": 3.008338678640154e-08, "loss": 0.6974, "step": 81585 }, { "epoch": 0.9943573056439131, "grad_norm": 2.365400285917797, "learning_rate": 2.9762668377164855e-08, "loss": 0.7222, "step": 81590 }, { "epoch": 0.9944182418680609, "grad_norm": 4.640492737063605, "learning_rate": 2.9441949967928163e-08, "loss": 0.6823, "step": 81595 }, { "epoch": 0.9944791780922086, "grad_norm": 2.6097909928102645, "learning_rate": 2.912123155869147e-08, "loss": 0.7829, "step": 81600 }, { "epoch": 0.9945401143163565, "grad_norm": 2.511529494891467, "learning_rate": 2.8800513149454783e-08, "loss": 0.7322, "step": 81605 }, { "epoch": 0.9946010505405043, "grad_norm": 2.289429708609549, "learning_rate": 2.847979474021809e-08, "loss": 0.7149, "step": 81610 }, { "epoch": 0.9946619867646521, "grad_norm": 2.44912690289963, "learning_rate": 2.81590763309814e-08, "loss": 0.6823, "step": 81615 }, { "epoch": 0.9947229229887999, "grad_norm": 4.282917250908175, "learning_rate": 2.783835792174471e-08, "loss": 0.7687, "step": 81620 }, { "epoch": 0.9947838592129478, "grad_norm": 2.4501882648924416, "learning_rate": 2.7517639512508018e-08, "loss": 0.7573, "step": 81625 }, { "epoch": 0.9948447954370956, "grad_norm": 2.2175508894063007, "learning_rate": 2.7196921103271332e-08, "loss": 0.711, "step": 81630 }, { "epoch": 0.9949057316612433, "grad_norm": 2.413566764706148, "learning_rate": 2.687620269403464e-08, "loss": 0.659, "step": 81635 }, { "epoch": 0.9949666678853911, "grad_norm": 2.5952958765820147, "learning_rate": 2.655548428479795e-08, "loss": 0.6411, "step": 81640 }, { "epoch": 0.9950276041095389, "grad_norm": 2.677033139811305, "learning_rate": 2.623476587556126e-08, "loss": 0.7361, "step": 81645 }, { "epoch": 0.9950885403336868, "grad_norm": 2.334834682219534, "learning_rate": 2.5914047466324567e-08, "loss": 0.7113, "step": 81650 }, { "epoch": 0.9951494765578346, "grad_norm": 2.445255002661764, "learning_rate": 2.559332905708788e-08, "loss": 0.7294, "step": 81655 }, { "epoch": 0.9952104127819824, "grad_norm": 2.153976776984813, "learning_rate": 2.5272610647851186e-08, "loss": 0.6331, "step": 81660 }, { "epoch": 0.9952713490061302, "grad_norm": 4.992724977247187, "learning_rate": 2.4951892238614498e-08, "loss": 0.7061, "step": 81665 }, { "epoch": 0.995332285230278, "grad_norm": 2.6510153947955577, "learning_rate": 2.463117382937781e-08, "loss": 0.722, "step": 81670 }, { "epoch": 0.9953932214544258, "grad_norm": 2.302230931294423, "learning_rate": 2.431045542014112e-08, "loss": 0.7798, "step": 81675 }, { "epoch": 0.9954541576785736, "grad_norm": 2.480511983704652, "learning_rate": 2.3989737010904428e-08, "loss": 0.7604, "step": 81680 }, { "epoch": 0.9955150939027214, "grad_norm": 2.6570271015910008, "learning_rate": 2.3669018601667736e-08, "loss": 0.717, "step": 81685 }, { "epoch": 0.9955760301268692, "grad_norm": 3.409596838235887, "learning_rate": 2.3348300192431047e-08, "loss": 0.7305, "step": 81690 }, { "epoch": 0.9956369663510171, "grad_norm": 3.1485925018939436, "learning_rate": 2.3027581783194355e-08, "loss": 0.7258, "step": 81695 }, { "epoch": 0.9956979025751649, "grad_norm": 2.196959409627147, "learning_rate": 2.2706863373957667e-08, "loss": 0.708, "step": 81700 }, { "epoch": 0.9957588387993126, "grad_norm": 2.0849632575990356, "learning_rate": 2.2386144964720978e-08, "loss": 0.6818, "step": 81705 }, { "epoch": 0.9958197750234604, "grad_norm": 2.1441696730624074, "learning_rate": 2.206542655548429e-08, "loss": 0.6144, "step": 81710 }, { "epoch": 0.9958807112476082, "grad_norm": 2.7334222571144, "learning_rate": 2.1744708146247597e-08, "loss": 0.713, "step": 81715 }, { "epoch": 0.9959416474717561, "grad_norm": 2.9000030778893353, "learning_rate": 2.1423989737010905e-08, "loss": 0.7364, "step": 81720 }, { "epoch": 0.9960025836959039, "grad_norm": 2.1918102986819363, "learning_rate": 2.1103271327774216e-08, "loss": 0.7302, "step": 81725 }, { "epoch": 0.9960635199200517, "grad_norm": 4.4505906047499035, "learning_rate": 2.0782552918537524e-08, "loss": 0.7422, "step": 81730 }, { "epoch": 0.9961244561441995, "grad_norm": 3.6643042230376626, "learning_rate": 2.0461834509300835e-08, "loss": 0.711, "step": 81735 }, { "epoch": 0.9961853923683472, "grad_norm": 2.4880943541828766, "learning_rate": 2.0141116100064143e-08, "loss": 0.8208, "step": 81740 }, { "epoch": 0.9962463285924951, "grad_norm": 2.7933630514420447, "learning_rate": 1.9820397690827458e-08, "loss": 0.7636, "step": 81745 }, { "epoch": 0.9963072648166429, "grad_norm": 3.071273465985278, "learning_rate": 1.9499679281590766e-08, "loss": 0.6256, "step": 81750 }, { "epoch": 0.9963682010407907, "grad_norm": 3.8331087471289966, "learning_rate": 1.9178960872354074e-08, "loss": 0.6988, "step": 81755 }, { "epoch": 0.9964291372649385, "grad_norm": 2.8546345274110316, "learning_rate": 1.8858242463117385e-08, "loss": 0.7326, "step": 81760 }, { "epoch": 0.9964900734890864, "grad_norm": 2.5764439847457368, "learning_rate": 1.8537524053880693e-08, "loss": 0.7251, "step": 81765 }, { "epoch": 0.9965510097132342, "grad_norm": 3.014565178288496, "learning_rate": 1.8216805644644004e-08, "loss": 0.7775, "step": 81770 }, { "epoch": 0.9966119459373819, "grad_norm": 2.9013219198143463, "learning_rate": 1.7896087235407315e-08, "loss": 0.7066, "step": 81775 }, { "epoch": 0.9966728821615297, "grad_norm": 2.5203086200298763, "learning_rate": 1.7575368826170623e-08, "loss": 0.7305, "step": 81780 }, { "epoch": 0.9967338183856775, "grad_norm": 3.154909096417816, "learning_rate": 1.725465041693393e-08, "loss": 0.671, "step": 81785 }, { "epoch": 0.9967947546098254, "grad_norm": 3.3249335768914072, "learning_rate": 1.6933932007697243e-08, "loss": 0.7816, "step": 81790 }, { "epoch": 0.9968556908339732, "grad_norm": 2.7104650701843336, "learning_rate": 1.6613213598460554e-08, "loss": 0.7522, "step": 81795 }, { "epoch": 0.996916627058121, "grad_norm": 2.660643218158807, "learning_rate": 1.6292495189223862e-08, "loss": 0.7524, "step": 81800 }, { "epoch": 0.9969775632822688, "grad_norm": 2.299004972954362, "learning_rate": 1.5971776779987173e-08, "loss": 0.6596, "step": 81805 }, { "epoch": 0.9970384995064165, "grad_norm": 3.0484025582503733, "learning_rate": 1.5651058370750484e-08, "loss": 0.7704, "step": 81810 }, { "epoch": 0.9970994357305644, "grad_norm": 2.7404852808604616, "learning_rate": 1.5330339961513792e-08, "loss": 0.6463, "step": 81815 }, { "epoch": 0.9971603719547122, "grad_norm": 2.6873276667734, "learning_rate": 1.50096215522771e-08, "loss": 0.713, "step": 81820 }, { "epoch": 0.99722130817886, "grad_norm": 2.3965345514194363, "learning_rate": 1.4688903143040411e-08, "loss": 0.6325, "step": 81825 }, { "epoch": 0.9972822444030078, "grad_norm": 2.345218559028408, "learning_rate": 1.4368184733803723e-08, "loss": 0.6959, "step": 81830 }, { "epoch": 0.9973431806271557, "grad_norm": 2.828715783082132, "learning_rate": 1.404746632456703e-08, "loss": 0.6478, "step": 81835 }, { "epoch": 0.9974041168513035, "grad_norm": 4.011556528420113, "learning_rate": 1.372674791533034e-08, "loss": 0.6449, "step": 81840 }, { "epoch": 0.9974650530754512, "grad_norm": 2.0449869933119165, "learning_rate": 1.3406029506093652e-08, "loss": 0.7153, "step": 81845 }, { "epoch": 0.997525989299599, "grad_norm": 2.8628875212853315, "learning_rate": 1.3085311096856961e-08, "loss": 0.7444, "step": 81850 }, { "epoch": 0.9975869255237468, "grad_norm": 2.5968830727167385, "learning_rate": 1.276459268762027e-08, "loss": 0.6715, "step": 81855 }, { "epoch": 0.9976478617478947, "grad_norm": 2.377533788523552, "learning_rate": 1.244387427838358e-08, "loss": 0.7125, "step": 81860 }, { "epoch": 0.9977087979720425, "grad_norm": 4.0669987331390045, "learning_rate": 1.2123155869146892e-08, "loss": 0.7377, "step": 81865 }, { "epoch": 0.9977697341961903, "grad_norm": 2.414656977957162, "learning_rate": 1.18024374599102e-08, "loss": 0.6802, "step": 81870 }, { "epoch": 0.9978306704203381, "grad_norm": 2.473698614820629, "learning_rate": 1.1481719050673509e-08, "loss": 0.7085, "step": 81875 }, { "epoch": 0.9978916066444858, "grad_norm": 2.589879343582922, "learning_rate": 1.1161000641436819e-08, "loss": 0.7682, "step": 81880 }, { "epoch": 0.9979525428686337, "grad_norm": 2.8554656475915814, "learning_rate": 1.084028223220013e-08, "loss": 0.7605, "step": 81885 }, { "epoch": 0.9980134790927815, "grad_norm": 3.68351881363235, "learning_rate": 1.051956382296344e-08, "loss": 0.6903, "step": 81890 }, { "epoch": 0.9980744153169293, "grad_norm": 2.697071763268781, "learning_rate": 1.0198845413726749e-08, "loss": 0.7112, "step": 81895 }, { "epoch": 0.9981353515410771, "grad_norm": 2.5567229643626566, "learning_rate": 9.878127004490057e-09, "loss": 0.6685, "step": 81900 }, { "epoch": 0.998196287765225, "grad_norm": 2.2701280588848274, "learning_rate": 9.557408595253368e-09, "loss": 0.7132, "step": 81905 }, { "epoch": 0.9982572239893728, "grad_norm": 2.162240279381935, "learning_rate": 9.236690186016678e-09, "loss": 0.6637, "step": 81910 }, { "epoch": 0.9983181602135205, "grad_norm": 2.5075125654032733, "learning_rate": 8.915971776779988e-09, "loss": 0.7212, "step": 81915 }, { "epoch": 0.9983790964376683, "grad_norm": 2.3717007565340658, "learning_rate": 8.595253367543297e-09, "loss": 0.6967, "step": 81920 }, { "epoch": 0.9984400326618161, "grad_norm": 3.103587312585308, "learning_rate": 8.274534958306608e-09, "loss": 0.7015, "step": 81925 }, { "epoch": 0.998500968885964, "grad_norm": 2.7177010642724495, "learning_rate": 7.953816549069918e-09, "loss": 0.704, "step": 81930 }, { "epoch": 0.9985619051101118, "grad_norm": 2.5478390987121617, "learning_rate": 7.633098139833228e-09, "loss": 0.7317, "step": 81935 }, { "epoch": 0.9986228413342596, "grad_norm": 2.3202134524604845, "learning_rate": 7.312379730596536e-09, "loss": 0.734, "step": 81940 }, { "epoch": 0.9986837775584074, "grad_norm": 2.553292673791427, "learning_rate": 6.991661321359847e-09, "loss": 0.713, "step": 81945 }, { "epoch": 0.9987447137825551, "grad_norm": 2.611628005835106, "learning_rate": 6.670942912123156e-09, "loss": 0.6675, "step": 81950 }, { "epoch": 0.998805650006703, "grad_norm": 2.381930821544165, "learning_rate": 6.350224502886467e-09, "loss": 0.737, "step": 81955 }, { "epoch": 0.9988665862308508, "grad_norm": 2.46956521972289, "learning_rate": 6.0295060936497756e-09, "loss": 0.6484, "step": 81960 }, { "epoch": 0.9989275224549986, "grad_norm": 2.8528631771326443, "learning_rate": 5.708787684413086e-09, "loss": 0.7249, "step": 81965 }, { "epoch": 0.9989884586791464, "grad_norm": 2.0462528280428254, "learning_rate": 5.388069275176396e-09, "loss": 0.6716, "step": 81970 }, { "epoch": 0.9990493949032943, "grad_norm": 2.0694106689732252, "learning_rate": 5.067350865939705e-09, "loss": 0.6952, "step": 81975 }, { "epoch": 0.999110331127442, "grad_norm": 2.2129768910803995, "learning_rate": 4.746632456703016e-09, "loss": 0.7193, "step": 81980 }, { "epoch": 0.9991712673515898, "grad_norm": 2.774981802241698, "learning_rate": 4.425914047466325e-09, "loss": 0.7014, "step": 81985 }, { "epoch": 0.9992322035757376, "grad_norm": 2.6626335365781646, "learning_rate": 4.105195638229635e-09, "loss": 0.677, "step": 81990 }, { "epoch": 0.9992931397998854, "grad_norm": 2.535333634120103, "learning_rate": 3.7844772289929444e-09, "loss": 0.7555, "step": 81995 }, { "epoch": 0.9993540760240333, "grad_norm": 2.204680447007214, "learning_rate": 3.4637588197562544e-09, "loss": 0.7091, "step": 82000 }, { "epoch": 0.9994150122481811, "grad_norm": 2.3502739356558964, "learning_rate": 3.143040410519564e-09, "loss": 0.69, "step": 82005 }, { "epoch": 0.9994759484723289, "grad_norm": 2.9028111607072447, "learning_rate": 2.8223220012828736e-09, "loss": 0.6561, "step": 82010 }, { "epoch": 0.9995368846964766, "grad_norm": 2.6976592745806416, "learning_rate": 2.5016035920461836e-09, "loss": 0.6937, "step": 82015 }, { "epoch": 0.9995978209206244, "grad_norm": 2.467902899312366, "learning_rate": 2.1808851828094932e-09, "loss": 0.7765, "step": 82020 }, { "epoch": 0.9996587571447723, "grad_norm": 2.4286830106244848, "learning_rate": 1.8601667735728035e-09, "loss": 0.7372, "step": 82025 }, { "epoch": 0.9997196933689201, "grad_norm": 2.0072648829323136, "learning_rate": 1.539448364336113e-09, "loss": 0.6838, "step": 82030 }, { "epoch": 0.9997806295930679, "grad_norm": 2.482165112640844, "learning_rate": 1.2187299550994229e-09, "loss": 0.6411, "step": 82035 }, { "epoch": 0.9998415658172157, "grad_norm": 2.660157040217858, "learning_rate": 8.980115458627327e-10, "loss": 0.6563, "step": 82040 }, { "epoch": 0.9999025020413636, "grad_norm": 2.306769079373391, "learning_rate": 5.772931366260424e-10, "loss": 0.6694, "step": 82045 }, { "epoch": 0.9999634382655113, "grad_norm": 2.186880897092222, "learning_rate": 2.565747273893522e-10, "loss": 0.7267, "step": 82050 }, { "epoch": 1.0, "step": 82053, "total_flos": 1.4286619960868864e+16, "train_loss": 0.7703872848547609, "train_runtime": 178945.2338, "train_samples_per_second": 14.673, "train_steps_per_second": 0.459 } ], "logging_steps": 5, "max_steps": 82053, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4286619960868864e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }